master/lib/KOReaderSync/ChapterXPathIndexer.cpp

#include "ChapterXPathIndexer.h"

#include <Logging.h>
#include <expat.h>

#include <algorithm>
#include <cctype>
#include <cstdlib>
#include <limits>
#include <optional>
#include <string>
#include <unordered_map>
#include <vector>

namespace {

// Anchor used for both mapping directions.
// textOffset is counted as visible (non-whitespace) bytes from chapter start.
// xpath points to the nearest element path at/near that offset.

struct XPathAnchor {
  size_t textOffset = 0;
  std::string xpath;
  std::string xpathNoIndex;  // precomputed removeIndices(xpath)
};

struct StackNode {
  std::string tag;
  int index = 1;
  bool hasTextAnchor = false;
};

// ParserState is intentionally ephemeral and created per lookup call.
// It holds only one spine parse worth of data to avoid retaining structures
// that would increase long-lived heap usage on the ESP32-C3.
struct ParserState {
  explicit ParserState(const int spineIndex) : spineIndex(spineIndex) { siblingCounters.emplace_back(); }

  int spineIndex = 0;
  int skipDepth = -1;
  size_t totalTextBytes = 0;

  std::vector<StackNode> stack;
  std::vector<std::unordered_map<std::string, int>> siblingCounters;
  std::vector<XPathAnchor> anchors;

  std::string baseXPath() const { return "/body/DocFragment[" + std::to_string(spineIndex + 1) + "]/body"; }

  // Canonicalize incoming KOReader XPath before matching:
  // - remove all whitespace
  // - lowercase tags
  // - strip optional trailing /text()
  // - strip trailing slash
  static std::string normalizeXPath(const std::string& input) {
    if (input.empty()) {
      return "";
    }

    std::string out;
    out.reserve(input.size());
    for (char c : input) {
      const unsigned char uc = static_cast<unsigned char>(c);
      if (std::isspace(uc)) {
        continue;
      }
      out.push_back(static_cast<char>(std::tolower(uc)));
    }

    const std::string textSuffix = "/text()";
    const size_t textPos = out.rfind(textSuffix);
    if (textPos != std::string::npos && textPos + textSuffix.size() == out.size()) {
      out.erase(textPos);
    }

    while (!out.empty() && out.back() == '/') {
      out.pop_back();
    }

    return out;
  }

  // Remove bracketed numeric predicates so paths can be compared even when
  // index counters differ between parser implementations.
  static std::string removeIndices(const std::string& xpath) {
    std::string out;
    out.reserve(xpath.size());

    bool inBracket = false;
    for (char c : xpath) {
      if (c == '[') {
        inBracket = true;
        continue;
      }
      if (c == ']') {
        inBracket = false;
        continue;
      }
      if (!inBracket) {
        out.push_back(c);
      }
    }
    return out;
  }

  static int pathDepth(const std::string& xpath) {
    int depth = 0;
    for (char c : xpath) {
      if (c == '/') {
        depth++;
      }
    }
    return depth;
  }

  // Resolve a path to the best anchor offset.
  // If exact node path is not found, progressively trim trailing segments and
  // match ancestors to obtain a stable approximate location.
  bool pickBestAnchorByPath(const std::string& targetPath, const bool ignoreIndices, size_t& outTextOffset,
                            bool& outExact) const {
    if (targetPath.empty() || anchors.empty()) {
      return false;
    }

    const std::string normalizedTarget = ignoreIndices ? removeIndices(targetPath) : targetPath;
    std::string probe = normalizedTarget;
    bool exactProbe = true;

    while (!probe.empty()) {
      int bestDepth = -1;
      size_t bestOffset = 0;
      bool found = false;

      for (const auto& anchor : anchors) {
        const std::string& anchorPath = ignoreIndices ? anchor.xpathNoIndex : anchor.xpath;
        if (anchorPath == probe) {
          const int depth = pathDepth(anchorPath);
          if (!found || depth > bestDepth || (depth == bestDepth && anchor.textOffset < bestOffset)) {
            found = true;
            bestDepth = depth;
            bestOffset = anchor.textOffset;
          }
        }
      }

      if (found) {
        outTextOffset = bestOffset;
        outExact = exactProbe;
        return true;
      }

      const size_t lastSlash = probe.find_last_of('/');
      if (lastSlash == std::string::npos || lastSlash == 0) {
        break;
      }
      probe.erase(lastSlash);
      exactProbe = false;
    }

    return false;
  }

  static std::string toLower(std::string value) {
    for (char& c : value) {
      c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
    }
    return value;
  }

  // Elements that should not contribute text position anchors.
  static bool isSkippableTag(const std::string& tag) { return tag == "head" || tag == "script" || tag == "style"; }

  static bool isWhitespaceOnly(const XML_Char* text, const int len) {
    for (int i = 0; i < len; i++) {
      if (!std::isspace(static_cast<unsigned char>(text[i]))) {
        return false;
      }
    }
    return true;
  }

  // Count non-whitespace bytes to keep offsets stable against formatting-only
  // differences and indentation in source XHTML.
  static size_t countVisibleBytes(const XML_Char* text, const int len) {
    size_t count = 0;
    for (int i = 0; i < len; i++) {
      if (!std::isspace(static_cast<unsigned char>(text[i]))) {
        count++;
      }
    }
    return count;
  }

  int bodyDepth() const {
    for (int i = static_cast<int>(stack.size()) - 1; i >= 0; i--) {
      if (stack[i].tag == "body") {
        return i;
      }
    }
    return -1;
  }

  bool insideBody() const { return bodyDepth() >= 0; }

  std::string currentXPath() const {
    const int bodyIdx = bodyDepth();
    if (bodyIdx < 0) {
      return baseXPath();
    }

    std::string xpath = baseXPath();
    for (size_t i = static_cast<size_t>(bodyIdx + 1); i < stack.size(); i++) {
      xpath += "/" + stack[i].tag + "[" + std::to_string(stack[i].index) + "]";
    }
    return xpath;
  }

  // Adds first anchor for an element when text begins and periodic anchors in
  // longer runs so matching has sufficient granularity without exploding memory.
  void addAnchorIfNeeded() {
    if (!insideBody() || stack.empty()) {
      return;
    }

    if (!stack.back().hasTextAnchor) {
      const std::string xpath = currentXPath();
      anchors.push_back({totalTextBytes, xpath, removeIndices(xpath)});
      stack.back().hasTextAnchor = true;
    } else if (anchors.empty() || totalTextBytes - anchors.back().textOffset >= 192) {
      const std::string xpath = currentXPath();
      if (anchors.empty() || anchors.back().xpath != xpath) {
        anchors.push_back({totalTextBytes, xpath, removeIndices(xpath)});
      }
    }
  }

  void onStartElement(const XML_Char* rawName) {
    std::string name = toLower(rawName ? rawName : "");
    const size_t depth = stack.size();

    if (siblingCounters.size() <= depth) {
      siblingCounters.resize(depth + 1);
    }
    const int siblingIndex = ++siblingCounters[depth][name];

    stack.push_back({name, siblingIndex, false});
    siblingCounters.emplace_back();

    if (skipDepth < 0 && isSkippableTag(name)) {
      skipDepth = static_cast<int>(stack.size()) - 1;
    }
  }

  void onEndElement() {
    if (stack.empty()) {
      return;
    }

    if (skipDepth == static_cast<int>(stack.size()) - 1) {
      skipDepth = -1;
    }

    stack.pop_back();
    if (!siblingCounters.empty()) {
      siblingCounters.pop_back();
    }
  }

  void onCharacterData(const XML_Char* text, const int len) {
    if (skipDepth >= 0 || len <= 0 || !insideBody() || isWhitespaceOnly(text, len)) {
      return;
    }

    addAnchorIfNeeded();
    totalTextBytes += countVisibleBytes(text, len);
  }

  std::string chooseXPath(const float intraSpineProgress) const {
    if (anchors.empty()) {
      return baseXPath();
    }
    if (totalTextBytes == 0) {
      return anchors.front().xpath;
    }

    const float clampedProgress = std::max(0.0f, std::min(1.0f, intraSpineProgress));
    const size_t target = static_cast<size_t>(clampedProgress * static_cast<float>(totalTextBytes));

    // upper_bound returns the first anchor strictly after target; step back to get
    // the last anchor at-or-before target (the element the user is currently inside).
    auto it = std::upper_bound(anchors.begin(), anchors.end(), target,
                               [](const size_t value, const XPathAnchor& anchor) { return value < anchor.textOffset; });
    if (it != anchors.begin()) {
      --it;
    }
    return it->xpath;
  }

  // Convert path -> progress ratio by matching to nearest available anchor.
  bool chooseProgressForXPath(const std::string& xpath, float& outIntraSpineProgress, bool& outExactMatch) const {
    if (anchors.empty()) {
      return false;
    }

    const std::string normalized = normalizeXPath(xpath);
    if (normalized.empty()) {
      return false;
    }

    size_t matchedOffset = 0;
    bool exact = false;
    const char* matchTier = nullptr;

    bool matched = pickBestAnchorByPath(normalized, false, matchedOffset, exact);
    if (matched) {
      matchTier = exact ? "exact" : "ancestor";
    } else {
      bool exactRaw = false;
      matched = pickBestAnchorByPath(normalized, true, matchedOffset, exactRaw);
      if (matched) {
        exact = false;
        matchTier = exactRaw ? "index-insensitive" : "index-insensitive-ancestor";
      }
    }

    if (!matched) {
      LOG_DBG("KOX", "Reverse: spine=%d no anchor match for '%s' (%zu anchors)", spineIndex, normalized.c_str(),
              anchors.size());
      return false;
    }

    outExactMatch = exact;
    if (totalTextBytes == 0) {
      outIntraSpineProgress = 0.0f;
      LOG_DBG("KOX", "Reverse: spine=%d %s match offset=%zu -> progress=0.0 (no text)", spineIndex, matchTier,
              matchedOffset);
      return true;
    }

    outIntraSpineProgress = static_cast<float>(matchedOffset) / static_cast<float>(totalTextBytes);
    outIntraSpineProgress = std::max(0.0f, std::min(1.0f, outIntraSpineProgress));
    LOG_DBG("KOX", "Reverse: spine=%d %s match offset=%zu/%zu -> progress=%.3f", spineIndex, matchTier, matchedOffset,
            totalTextBytes, outIntraSpineProgress);
    return true;
  }
};

void XMLCALL onStartElement(void* userData, const XML_Char* name, const XML_Char**) {
  auto* state = static_cast<ParserState*>(userData);
  state->onStartElement(name);
}

void XMLCALL onEndElement(void* userData, const XML_Char*) {
  auto* state = static_cast<ParserState*>(userData);
  state->onEndElement();
}

void XMLCALL onCharacterData(void* userData, const XML_Char* text, const int len) {
  auto* state = static_cast<ParserState*>(userData);
  state->onCharacterData(text, len);
}

void XMLCALL onDefaultHandlerExpand(void* userData, const XML_Char* text, const int len) {
  // The default handler fires for comments, PIs, DOCTYPE, and entity references.
  // Only forward entity references (&..;) to avoid skewing text offsets with
  // non-visible markup.
  if (len < 3 || text[0] != '&' || text[len - 1] != ';') {
    return;
  }
  for (int i = 1; i < len - 1; ++i) {
    if (text[i] == '<' || text[i] == '>') {
      return;
    }
  }
  auto* state = static_cast<ParserState*>(userData);
  state->onCharacterData(text, len);
}

// Parse one spine item and return a fully populated ParserState.
// Returns std::nullopt if validation, I/O, or XML parse fails.
static std::optional<ParserState> parseSpineItem(const std::shared_ptr<Epub>& epub, const int spineIndex) {
  if (!epub || spineIndex < 0 || spineIndex >= epub->getSpineItemsCount()) {
    return std::nullopt;
  }

  const auto spineItem = epub->getSpineItem(spineIndex);
  if (spineItem.href.empty()) {
    return std::nullopt;
  }

  size_t chapterSize = 0;
  uint8_t* chapterBytes = epub->readItemContentsToBytes(spineItem.href, &chapterSize, false);
  if (!chapterBytes || chapterSize == 0) {
    free(chapterBytes);
    return std::nullopt;
  }

  ParserState state(spineIndex);

  XML_Parser parser = XML_ParserCreate(nullptr);
  if (!parser) {
    free(chapterBytes);
    LOG_ERR("KOX", "Failed to allocate XML parser for spine=%d", spineIndex);
    return std::nullopt;
  }

  XML_SetUserData(parser, &state);
  XML_SetElementHandler(parser, onStartElement, onEndElement);
  XML_SetCharacterDataHandler(parser, onCharacterData);
  XML_SetDefaultHandlerExpand(parser, onDefaultHandlerExpand);

  const bool parseOk = XML_Parse(parser, reinterpret_cast<const char*>(chapterBytes), static_cast<int>(chapterSize),
                                 XML_TRUE) != XML_STATUS_ERROR;

  if (!parseOk) {
    LOG_ERR("KOX", "XPath parse failed for spine=%d at line %lu: %s", spineIndex, XML_GetCurrentLineNumber(parser),
            XML_ErrorString(XML_GetErrorCode(parser)));
  }

  XML_ParserFree(parser);
  free(chapterBytes);

  if (!parseOk) {
    return std::nullopt;
  }

  return state;
}

}  // namespace

std::string ChapterXPathIndexer::findXPathForProgress(const std::shared_ptr<Epub>& epub, const int spineIndex,
                                                      const float intraSpineProgress) {
  const auto state = parseSpineItem(epub, spineIndex);
  if (!state) {
    return "";
  }

  const std::string result = state->chooseXPath(intraSpineProgress);
  LOG_DBG("KOX", "Forward: spine=%d progress=%.3f anchors=%zu textBytes=%zu -> %s", spineIndex, intraSpineProgress,
          state->anchors.size(), state->totalTextBytes, result.c_str());
  return result;
}

bool ChapterXPathIndexer::findProgressForXPath(const std::shared_ptr<Epub>& epub, const int spineIndex,
                                               const std::string& xpath, float& outIntraSpineProgress,
                                               bool& outExactMatch) {
  outIntraSpineProgress = 0.0f;
  outExactMatch = false;

  if (xpath.empty()) {
    return false;
  }

  const auto state = parseSpineItem(epub, spineIndex);
  if (!state) {
    return false;
  }

  LOG_DBG("KOX", "Reverse: spine=%d anchors=%zu textBytes=%zu for '%s'", spineIndex, state->anchors.size(),
          state->totalTextBytes, xpath.c_str());
  return state->chooseProgressForXPath(xpath, outIntraSpineProgress, outExactMatch);
}

bool ChapterXPathIndexer::tryExtractSpineIndexFromXPath(const std::string& xpath, int& outSpineIndex) {
  outSpineIndex = -1;
  if (xpath.empty()) {
    return false;
  }

  const std::string normalized = ParserState::normalizeXPath(xpath);
  const std::string key = "/docfragment[";
  const size_t pos = normalized.find(key);
  if (pos == std::string::npos) {
    LOG_DBG("KOX", "No DocFragment in xpath: '%s'", xpath.c_str());
    return false;
  }

  const size_t start = pos + key.size();
  size_t end = start;
  while (end < normalized.size() && std::isdigit(static_cast<unsigned char>(normalized[end]))) {
    end++;
  }

  if (end == start || end >= normalized.size() || normalized[end] != ']') {
    return false;
  }

  const std::string value = normalized.substr(start, end - start);
  const long parsed = std::strtol(value.c_str(), nullptr, 10);
  // KOReader uses 1-based DocFragment indices; convert to 0-based spine index.
  if (parsed < 1 || parsed > std::numeric_limits<int>::max()) {
    return false;
  }

  outSpineIndex = static_cast<int>(parsed) - 1;
  return true;
}
mod: Phase 1 - bring forward mod-exclusive files with ActivityManager migration Brings ~55 mod-exclusive files to the upstream-based mod/master-resync branch: Activities (migrated to new ActivityManager pattern): - Clock/Time: SetTimeActivity, SetTimezoneOffsetActivity, NtpSyncActivity - Dictionary: DictionaryDefinitionActivity, DictionarySuggestionsActivity, DictionaryWordSelectActivity, LookedUpWordsActivity - Bookmark: EpubReaderBookmarkSelectionActivity - Book management: BookManageMenuActivity, EndOfBookMenuActivity - OPDS: OpdsServerListActivity, OpdsSettingsActivity - Utility: DirectoryPickerActivity, NumericStepperActivity Utilities (unchanged): - BookManager, BookSettings, BookmarkStore, BootNtpSync - Dictionary, LookupHistory, TimeSync, OpdsServerStore Libraries: PlaceholderCover, TableData, ChapterXPathIndexer Scripts: inject_mod_version, generate_book_icon, preview_placeholder_cover Docs: KOReader sync XPath mapping Migration changes: - ActivityWithSubactivity -> Activity base class - Callback constructors -> finish()/setResult() pattern - enterNewActivity() -> startActivityForResult() - Activity::RenderLock&& -> RenderLock&& These files won't compile yet - they reference mod settings and I18n strings that will be added in subsequent phases. Made-with: Cursor 2026-03-07 15:10:00 -05:00			`#include "ChapterXPathIndexer.h"`

			`#include <Logging.h>`
			`#include <expat.h>`

			`#include <algorithm>`
			`#include <cctype>`
			`#include <cstdlib>`
			`#include <limits>`
			`#include <optional>`
			`#include <string>`
			`#include <unordered_map>`
			`#include <vector>`

			`namespace {`

			`// Anchor used for both mapping directions.`
			`// textOffset is counted as visible (non-whitespace) bytes from chapter start.`
			`// xpath points to the nearest element path at/near that offset.`

			`struct XPathAnchor {`
			`size_t textOffset = 0;`
			`std::string xpath;`
			`std::string xpathNoIndex; // precomputed removeIndices(xpath)`
			`};`

			`struct StackNode {`
			`std::string tag;`
			`int index = 1;`
			`bool hasTextAnchor = false;`
			`};`

			`// ParserState is intentionally ephemeral and created per lookup call.`
			`// It holds only one spine parse worth of data to avoid retaining structures`
			`// that would increase long-lived heap usage on the ESP32-C3.`
			`struct ParserState {`
			`explicit ParserState(const int spineIndex) : spineIndex(spineIndex) { siblingCounters.emplace_back(); }`

			`int spineIndex = 0;`
			`int skipDepth = -1;`
			`size_t totalTextBytes = 0;`

			`std::vector<StackNode> stack;`
			`std::vector<std::unordered_map<std::string, int>> siblingCounters;`
			`std::vector<XPathAnchor> anchors;`

			`std::string baseXPath() const { return "/body/DocFragment[" + std::to_string(spineIndex + 1) + "]/body"; }`

			`// Canonicalize incoming KOReader XPath before matching:`
			`// - remove all whitespace`
			`// - lowercase tags`
			`// - strip optional trailing /text()`
			`// - strip trailing slash`
			`static std::string normalizeXPath(const std::string& input) {`
			`if (input.empty()) {`
			`return "";`
			`}`

			`std::string out;`
			`out.reserve(input.size());`
			`for (char c : input) {`
			`const unsigned char uc = static_cast<unsigned char>(c);`
			`if (std::isspace(uc)) {`
			`continue;`
			`}`
			`out.push_back(static_cast<char>(std::tolower(uc)));`
			`}`

			`const std::string textSuffix = "/text()";`
			`const size_t textPos = out.rfind(textSuffix);`
			`if (textPos != std::string::npos && textPos + textSuffix.size() == out.size()) {`
			`out.erase(textPos);`
			`}`

			`while (!out.empty() && out.back() == '/') {`
			`out.pop_back();`
			`}`

			`return out;`
			`}`

			`// Remove bracketed numeric predicates so paths can be compared even when`
			`// index counters differ between parser implementations.`
			`static std::string removeIndices(const std::string& xpath) {`
			`std::string out;`
			`out.reserve(xpath.size());`

			`bool inBracket = false;`
			`for (char c : xpath) {`
			`if (c == '[') {`
			`inBracket = true;`
			`continue;`
			`}`
			`if (c == ']') {`
			`inBracket = false;`
			`continue;`
			`}`
			`if (!inBracket) {`
			`out.push_back(c);`
			`}`
			`}`
			`return out;`
			`}`

			`static int pathDepth(const std::string& xpath) {`
			`int depth = 0;`
			`for (char c : xpath) {`
			`if (c == '/') {`
			`depth++;`
			`}`
			`}`
			`return depth;`
			`}`

			`// Resolve a path to the best anchor offset.`
			`// If exact node path is not found, progressively trim trailing segments and`
			`// match ancestors to obtain a stable approximate location.`
			`bool pickBestAnchorByPath(const std::string& targetPath, const bool ignoreIndices, size_t& outTextOffset,`
			`bool& outExact) const {`
			`if (targetPath.empty() \|\| anchors.empty()) {`
			`return false;`
			`}`

			`const std::string normalizedTarget = ignoreIndices ? removeIndices(targetPath) : targetPath;`
			`std::string probe = normalizedTarget;`
			`bool exactProbe = true;`

			`while (!probe.empty()) {`
			`int bestDepth = -1;`
			`size_t bestOffset = 0;`
			`bool found = false;`

			`for (const auto& anchor : anchors) {`
			`const std::string& anchorPath = ignoreIndices ? anchor.xpathNoIndex : anchor.xpath;`
			`if (anchorPath == probe) {`
			`const int depth = pathDepth(anchorPath);`
			`if (!found \|\| depth > bestDepth \|\| (depth == bestDepth && anchor.textOffset < bestOffset)) {`
			`found = true;`
			`bestDepth = depth;`
			`bestOffset = anchor.textOffset;`
			`}`
			`}`
			`}`

			`if (found) {`
			`outTextOffset = bestOffset;`
			`outExact = exactProbe;`
			`return true;`
			`}`

			`const size_t lastSlash = probe.find_last_of('/');`
			`if (lastSlash == std::string::npos \|\| lastSlash == 0) {`
			`break;`
			`}`
			`probe.erase(lastSlash);`
			`exactProbe = false;`
			`}`

			`return false;`
			`}`

			`static std::string toLower(std::string value) {`
			`for (char& c : value) {`
			`c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));`
			`}`
			`return value;`
			`}`

			`// Elements that should not contribute text position anchors.`
			`static bool isSkippableTag(const std::string& tag) { return tag == "head" \|\| tag == "script" \|\| tag == "style"; }`

			`static bool isWhitespaceOnly(const XML_Char* text, const int len) {`
			`for (int i = 0; i < len; i++) {`
			`if (!std::isspace(static_cast<unsigned char>(text[i]))) {`
			`return false;`
			`}`
			`}`
			`return true;`
			`}`

			`// Count non-whitespace bytes to keep offsets stable against formatting-only`
			`// differences and indentation in source XHTML.`
			`static size_t countVisibleBytes(const XML_Char* text, const int len) {`
			`size_t count = 0;`
			`for (int i = 0; i < len; i++) {`
			`if (!std::isspace(static_cast<unsigned char>(text[i]))) {`
			`count++;`
			`}`
			`}`
			`return count;`
			`}`

			`int bodyDepth() const {`
			`for (int i = static_cast<int>(stack.size()) - 1; i >= 0; i--) {`
			`if (stack[i].tag == "body") {`
			`return i;`
			`}`
			`}`
			`return -1;`
			`}`

			`bool insideBody() const { return bodyDepth() >= 0; }`

			`std::string currentXPath() const {`
			`const int bodyIdx = bodyDepth();`
			`if (bodyIdx < 0) {`
			`return baseXPath();`
			`}`

			`std::string xpath = baseXPath();`
			`for (size_t i = static_cast<size_t>(bodyIdx + 1); i < stack.size(); i++) {`
			`xpath += "/" + stack[i].tag + "[" + std::to_string(stack[i].index) + "]";`
			`}`
			`return xpath;`
			`}`

			`// Adds first anchor for an element when text begins and periodic anchors in`
			`// longer runs so matching has sufficient granularity without exploding memory.`
			`void addAnchorIfNeeded() {`
			`if (!insideBody() \|\| stack.empty()) {`
			`return;`
			`}`

			`if (!stack.back().hasTextAnchor) {`
			`const std::string xpath = currentXPath();`
			`anchors.push_back({totalTextBytes, xpath, removeIndices(xpath)});`
			`stack.back().hasTextAnchor = true;`
			`} else if (anchors.empty() \|\| totalTextBytes - anchors.back().textOffset >= 192) {`
			`const std::string xpath = currentXPath();`
			`if (anchors.empty() \|\| anchors.back().xpath != xpath) {`
			`anchors.push_back({totalTextBytes, xpath, removeIndices(xpath)});`
			`}`
			`}`
			`}`

			`void onStartElement(const XML_Char* rawName) {`
			`std::string name = toLower(rawName ? rawName : "");`
			`const size_t depth = stack.size();`

			`if (siblingCounters.size() <= depth) {`
			`siblingCounters.resize(depth + 1);`
			`}`
			`const int siblingIndex = ++siblingCounters[depth][name];`

			`stack.push_back({name, siblingIndex, false});`
			`siblingCounters.emplace_back();`

			`if (skipDepth < 0 && isSkippableTag(name)) {`
			`skipDepth = static_cast<int>(stack.size()) - 1;`
			`}`
			`}`

			`void onEndElement() {`
			`if (stack.empty()) {`
			`return;`
			`}`

			`if (skipDepth == static_cast<int>(stack.size()) - 1) {`
			`skipDepth = -1;`
			`}`

			`stack.pop_back();`
			`if (!siblingCounters.empty()) {`
			`siblingCounters.pop_back();`
			`}`
			`}`

			`void onCharacterData(const XML_Char* text, const int len) {`
			`if (skipDepth >= 0 \|\| len <= 0 \|\| !insideBody() \|\| isWhitespaceOnly(text, len)) {`
			`return;`
			`}`

			`addAnchorIfNeeded();`
			`totalTextBytes += countVisibleBytes(text, len);`
			`}`

			`std::string chooseXPath(const float intraSpineProgress) const {`
			`if (anchors.empty()) {`
			`return baseXPath();`
			`}`
			`if (totalTextBytes == 0) {`
			`return anchors.front().xpath;`
			`}`

			`const float clampedProgress = std::max(0.0f, std::min(1.0f, intraSpineProgress));`
			`const size_t target = static_cast<size_t>(clampedProgress * static_cast<float>(totalTextBytes));`

			`// upper_bound returns the first anchor strictly after target; step back to get`
			`// the last anchor at-or-before target (the element the user is currently inside).`
			`auto it = std::upper_bound(anchors.begin(), anchors.end(), target,`
			`[](const size_t value, const XPathAnchor& anchor) { return value < anchor.textOffset; });`
			`if (it != anchors.begin()) {`
			`--it;`
			`}`
			`return it->xpath;`
			`}`

			`// Convert path -> progress ratio by matching to nearest available anchor.`
			`bool chooseProgressForXPath(const std::string& xpath, float& outIntraSpineProgress, bool& outExactMatch) const {`
			`if (anchors.empty()) {`
			`return false;`
			`}`

			`const std::string normalized = normalizeXPath(xpath);`
			`if (normalized.empty()) {`
			`return false;`
			`}`

			`size_t matchedOffset = 0;`
			`bool exact = false;`
			`const char* matchTier = nullptr;`

			`bool matched = pickBestAnchorByPath(normalized, false, matchedOffset, exact);`
			`if (matched) {`
			`matchTier = exact ? "exact" : "ancestor";`
			`} else {`
			`bool exactRaw = false;`
			`matched = pickBestAnchorByPath(normalized, true, matchedOffset, exactRaw);`
			`if (matched) {`
			`exact = false;`
			`matchTier = exactRaw ? "index-insensitive" : "index-insensitive-ancestor";`
			`}`
			`}`

			`if (!matched) {`
			`LOG_DBG("KOX", "Reverse: spine=%d no anchor match for '%s' (%zu anchors)", spineIndex, normalized.c_str(),`
			`anchors.size());`
			`return false;`
			`}`

			`outExactMatch = exact;`
			`if (totalTextBytes == 0) {`
			`outIntraSpineProgress = 0.0f;`
			`LOG_DBG("KOX", "Reverse: spine=%d %s match offset=%zu -> progress=0.0 (no text)", spineIndex, matchTier,`
			`matchedOffset);`
			`return true;`
			`}`

			`outIntraSpineProgress = static_cast<float>(matchedOffset) / static_cast<float>(totalTextBytes);`
			`outIntraSpineProgress = std::max(0.0f, std::min(1.0f, outIntraSpineProgress));`
			`LOG_DBG("KOX", "Reverse: spine=%d %s match offset=%zu/%zu -> progress=%.3f", spineIndex, matchTier, matchedOffset,`
			`totalTextBytes, outIntraSpineProgress);`
			`return true;`
			`}`
			`};`

			`void XMLCALL onStartElement(void* userData, const XML_Char* name, const XML_Char**) {`
			`auto* state = static_cast<ParserState*>(userData);`
			`state->onStartElement(name);`
			`}`

			`void XMLCALL onEndElement(void* userData, const XML_Char*) {`
			`auto* state = static_cast<ParserState*>(userData);`
			`state->onEndElement();`
			`}`

			`void XMLCALL onCharacterData(void* userData, const XML_Char* text, const int len) {`
			`auto* state = static_cast<ParserState*>(userData);`
			`state->onCharacterData(text, len);`
			`}`

			`void XMLCALL onDefaultHandlerExpand(void* userData, const XML_Char* text, const int len) {`
			`// The default handler fires for comments, PIs, DOCTYPE, and entity references.`
			`// Only forward entity references (&..;) to avoid skewing text offsets with`
			`// non-visible markup.`
			`if (len < 3 \|\| text[0] != '&' \|\| text[len - 1] != ';') {`
			`return;`
			`}`
			`for (int i = 1; i < len - 1; ++i) {`
			`if (text[i] == '<' \|\| text[i] == '>') {`
			`return;`
			`}`
			`}`
			`auto* state = static_cast<ParserState*>(userData);`
			`state->onCharacterData(text, len);`
			`}`

			`// Parse one spine item and return a fully populated ParserState.`
			`// Returns std::nullopt if validation, I/O, or XML parse fails.`
			`static std::optional<ParserState> parseSpineItem(const std::shared_ptr<Epub>& epub, const int spineIndex) {`
			`if (!epub \|\| spineIndex < 0 \|\| spineIndex >= epub->getSpineItemsCount()) {`
			`return std::nullopt;`
			`}`

			`const auto spineItem = epub->getSpineItem(spineIndex);`
			`if (spineItem.href.empty()) {`
			`return std::nullopt;`
			`}`

			`size_t chapterSize = 0;`
			`uint8_t* chapterBytes = epub->readItemContentsToBytes(spineItem.href, &chapterSize, false);`
			`if (!chapterBytes \|\| chapterSize == 0) {`
			`free(chapterBytes);`
			`return std::nullopt;`
			`}`

			`ParserState state(spineIndex);`

			`XML_Parser parser = XML_ParserCreate(nullptr);`
			`if (!parser) {`
			`free(chapterBytes);`
			`LOG_ERR("KOX", "Failed to allocate XML parser for spine=%d", spineIndex);`
			`return std::nullopt;`
			`}`

			`XML_SetUserData(parser, &state);`
			`XML_SetElementHandler(parser, onStartElement, onEndElement);`
			`XML_SetCharacterDataHandler(parser, onCharacterData);`
			`XML_SetDefaultHandlerExpand(parser, onDefaultHandlerExpand);`

			`const bool parseOk = XML_Parse(parser, reinterpret_cast<const char*>(chapterBytes), static_cast<int>(chapterSize),`
			`XML_TRUE) != XML_STATUS_ERROR;`

			`if (!parseOk) {`
			`LOG_ERR("KOX", "XPath parse failed for spine=%d at line %lu: %s", spineIndex, XML_GetCurrentLineNumber(parser),`
			`XML_ErrorString(XML_GetErrorCode(parser)));`
			`}`

			`XML_ParserFree(parser);`
			`free(chapterBytes);`

			`if (!parseOk) {`
			`return std::nullopt;`
			`}`

			`return state;`
			`}`

			`} // namespace`

			`std::string ChapterXPathIndexer::findXPathForProgress(const std::shared_ptr<Epub>& epub, const int spineIndex,`
			`const float intraSpineProgress) {`
			`const auto state = parseSpineItem(epub, spineIndex);`
			`if (!state) {`
			`return "";`
			`}`

			`const std::string result = state->chooseXPath(intraSpineProgress);`
			`LOG_DBG("KOX", "Forward: spine=%d progress=%.3f anchors=%zu textBytes=%zu -> %s", spineIndex, intraSpineProgress,`
			`state->anchors.size(), state->totalTextBytes, result.c_str());`
			`return result;`
			`}`

			`bool ChapterXPathIndexer::findProgressForXPath(const std::shared_ptr<Epub>& epub, const int spineIndex,`
			`const std::string& xpath, float& outIntraSpineProgress,`
			`bool& outExactMatch) {`
			`outIntraSpineProgress = 0.0f;`
			`outExactMatch = false;`

			`if (xpath.empty()) {`
			`return false;`
			`}`

			`const auto state = parseSpineItem(epub, spineIndex);`
			`if (!state) {`
			`return false;`
			`}`

			`LOG_DBG("KOX", "Reverse: spine=%d anchors=%zu textBytes=%zu for '%s'", spineIndex, state->anchors.size(),`
			`state->totalTextBytes, xpath.c_str());`
			`return state->chooseProgressForXPath(xpath, outIntraSpineProgress, outExactMatch);`
			`}`

			`bool ChapterXPathIndexer::tryExtractSpineIndexFromXPath(const std::string& xpath, int& outSpineIndex) {`
			`outSpineIndex = -1;`
			`if (xpath.empty()) {`
			`return false;`
			`}`

			`const std::string normalized = ParserState::normalizeXPath(xpath);`
			`const std::string key = "/docfragment[";`
			`const size_t pos = normalized.find(key);`
			`if (pos == std::string::npos) {`
			`LOG_DBG("KOX", "No DocFragment in xpath: '%s'", xpath.c_str());`
			`return false;`
			`}`

			`const size_t start = pos + key.size();`
			`size_t end = start;`
			`while (end < normalized.size() && std::isdigit(static_cast<unsigned char>(normalized[end]))) {`
			`end++;`
			`}`

			`if (end == start \|\| end >= normalized.size() \|\| normalized[end] != ']') {`
			`return false;`
			`}`

			`const std::string value = normalized.substr(start, end - start);`
			`const long parsed = std::strtol(value.c_str(), nullptr, 10);`
			`// KOReader uses 1-based DocFragment indices; convert to 0-based spine index.`
			`if (parsed < 1 \|\| parsed > std::numeric_limits<int>::max()) {`
			`return false;`
			`}`

			`outSpineIndex = static_cast<int>(parsed) - 1;`
			`return true;`
			`}`