feat: parse and display all available EPUB metadata fields

Add parsing for dc:publisher, dc:date, dc:subject, dc:rights,
dc:contributor, dc:identifier (prefers ISBN scheme), and
calibre:rating. All new fields serialized in BookMetadataCache
(version bumped to 7) and displayed in BookInfoActivity with
rating shown as N/5 scale.

Made-with: Cursor
This commit is contained in:
cottongin
2026-03-09 02:43:10 -04:00
parent 1a3e7109e3
commit 8025e6fb0d
10 changed files with 269 additions and 7 deletions

View File

@@ -81,6 +81,13 @@ bool Epub::parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata) {
bookMetadata.series = opfParser.series;
bookMetadata.seriesIndex = opfParser.seriesIndex;
bookMetadata.description = opfParser.description;
bookMetadata.publisher = opfParser.publisher;
bookMetadata.date = opfParser.date;
bookMetadata.subjects = opfParser.subjects;
bookMetadata.rights = opfParser.rights;
bookMetadata.contributor = opfParser.contributor;
bookMetadata.identifier = opfParser.identifier;
bookMetadata.rating = opfParser.rating;
// Guide-based cover fallback: if no cover found via metadata/properties,
// try extracting the image reference from the guide's cover page XHTML
@@ -547,6 +554,48 @@ const std::string& Epub::getDescription() const {
return bookMetadataCache->coreMetadata.description;
}
const std::string& Epub::getPublisher() const {
static std::string blank;
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) return blank;
return bookMetadataCache->coreMetadata.publisher;
}
const std::string& Epub::getDate() const {
static std::string blank;
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) return blank;
return bookMetadataCache->coreMetadata.date;
}
const std::string& Epub::getSubjects() const {
static std::string blank;
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) return blank;
return bookMetadataCache->coreMetadata.subjects;
}
const std::string& Epub::getRights() const {
static std::string blank;
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) return blank;
return bookMetadataCache->coreMetadata.rights;
}
const std::string& Epub::getContributor() const {
static std::string blank;
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) return blank;
return bookMetadataCache->coreMetadata.contributor;
}
const std::string& Epub::getIdentifier() const {
static std::string blank;
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) return blank;
return bookMetadataCache->coreMetadata.identifier;
}
const std::string& Epub::getRating() const {
static std::string blank;
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) return blank;
return bookMetadataCache->coreMetadata.rating;
}
std::string Epub::getCoverBmpPath(bool cropped) const {
const auto coverFileName = std::string("cover") + (cropped ? "_crop" : "");
return cachePath + "/" + coverFileName + ".bmp";

View File

@@ -54,6 +54,13 @@ class Epub {
const std::string& getSeries() const;
const std::string& getSeriesIndex() const;
const std::string& getDescription() const;
const std::string& getPublisher() const;
const std::string& getDate() const;
const std::string& getSubjects() const;
const std::string& getRights() const;
const std::string& getContributor() const;
const std::string& getIdentifier() const;
const std::string& getRating() const;
std::string getCoverBmpPath(bool cropped = false) const;
bool generateCoverBmp(bool cropped = false) const;
std::string getThumbBmpPath() const;

View File

@@ -9,7 +9,7 @@
#include "FsHelpers.h"
namespace {
constexpr uint8_t BOOK_CACHE_VERSION = 6;
constexpr uint8_t BOOK_CACHE_VERSION = 7;
constexpr char bookBinFile[] = "/book.bin";
constexpr char tmpSpineBinFile[] = "/spine.bin.tmp";
constexpr char tmpTocBinFile[] = "/toc.bin.tmp";
@@ -118,7 +118,9 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
const uint32_t metadataSize = metadata.title.size() + metadata.author.size() + metadata.language.size() +
metadata.coverItemHref.size() + metadata.textReferenceHref.size() +
metadata.series.size() + metadata.seriesIndex.size() + metadata.description.size() +
sizeof(uint32_t) * 8;
metadata.publisher.size() + metadata.date.size() + metadata.subjects.size() +
metadata.rights.size() + metadata.contributor.size() + metadata.identifier.size() +
metadata.rating.size() + sizeof(uint32_t) * 15;
const uint32_t lutSize = sizeof(uint32_t) * spineCount + sizeof(uint32_t) * tocCount;
const uint32_t lutOffset = headerASize + metadataSize;
@@ -136,6 +138,13 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
serialization::writeString(bookFile, metadata.series);
serialization::writeString(bookFile, metadata.seriesIndex);
serialization::writeString(bookFile, metadata.description);
serialization::writeString(bookFile, metadata.publisher);
serialization::writeString(bookFile, metadata.date);
serialization::writeString(bookFile, metadata.subjects);
serialization::writeString(bookFile, metadata.rights);
serialization::writeString(bookFile, metadata.contributor);
serialization::writeString(bookFile, metadata.identifier);
serialization::writeString(bookFile, metadata.rating);
// Loop through spine entries, writing LUT positions
spineFile.seek(0);
@@ -392,7 +401,14 @@ bool BookMetadataCache::load() {
!serialization::readString(bookFile, coreMetadata.textReferenceHref) ||
!serialization::readString(bookFile, coreMetadata.series) ||
!serialization::readString(bookFile, coreMetadata.seriesIndex) ||
!serialization::readString(bookFile, coreMetadata.description)) {
!serialization::readString(bookFile, coreMetadata.description) ||
!serialization::readString(bookFile, coreMetadata.publisher) ||
!serialization::readString(bookFile, coreMetadata.date) ||
!serialization::readString(bookFile, coreMetadata.subjects) ||
!serialization::readString(bookFile, coreMetadata.rights) ||
!serialization::readString(bookFile, coreMetadata.contributor) ||
!serialization::readString(bookFile, coreMetadata.identifier) ||
!serialization::readString(bookFile, coreMetadata.rating)) {
LOG_ERR("BMC", "Failed to read metadata strings from cache");
bookFile.close();
return false;

View File

@@ -17,6 +17,13 @@ class BookMetadataCache {
std::string series;
std::string seriesIndex;
std::string description;
std::string publisher;
std::string date;
std::string subjects;
std::string rights;
std::string contributor;
std::string identifier;
std::string rating;
};
struct SpineEntry {

View File

@@ -158,6 +158,59 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
return;
}
if (self->state == IN_METADATA && strcmp(name, "dc:publisher") == 0) {
if (self->publisher.empty()) {
self->state = IN_BOOK_PUBLISHER;
}
return;
}
if (self->state == IN_METADATA && strcmp(name, "dc:date") == 0) {
if (self->date.empty()) {
self->state = IN_BOOK_DATE;
}
return;
}
if (self->state == IN_METADATA && strcmp(name, "dc:subject") == 0) {
if (!self->subjects.empty()) {
self->subjects += ", ";
}
self->state = IN_BOOK_SUBJECT;
return;
}
if (self->state == IN_METADATA && strcmp(name, "dc:rights") == 0) {
if (self->rights.empty()) {
self->state = IN_BOOK_RIGHTS;
}
return;
}
if (self->state == IN_METADATA && strcmp(name, "dc:contributor") == 0) {
if (!self->contributor.empty()) {
self->contributor += ", ";
}
self->state = IN_BOOK_CONTRIBUTOR;
return;
}
if (self->state == IN_METADATA && strcmp(name, "dc:identifier") == 0) {
self->identifierIsIsbn = false;
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "opf:scheme") == 0 && strcasecmp(atts[i + 1], "ISBN") == 0) {
self->identifierIsIsbn = true;
}
}
if (self->identifier.empty() || self->identifierIsIsbn) {
if (self->identifierIsIsbn) {
self->identifier.clear();
}
self->state = IN_BOOK_IDENTIFIER;
}
return;
}
if (self->state == IN_PACKAGE && (strcmp(name, "manifest") == 0 || strcmp(name, "opf:manifest") == 0)) {
self->state = IN_MANIFEST;
if (!Storage.openFileForWrite("COF", self->cachePath + itemCacheFile, self->tempItemStore)) {
@@ -221,6 +274,8 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
self->series = trim(std::string(metaContent, std::min(strlen(metaContent), MAX_DESCRIPTION_LENGTH)));
} else if (strcmp(metaName, "calibre:series_index") == 0 && self->seriesIndex.empty()) {
self->seriesIndex = trim(std::string(metaContent, std::min(strlen(metaContent), MAX_DESCRIPTION_LENGTH)));
} else if (strcmp(metaName, "calibre:rating") == 0 && self->rating.empty()) {
self->rating = trim(std::string(metaContent, std::min(strlen(metaContent), static_cast<size_t>(8))));
}
}
@@ -439,6 +494,42 @@ void XMLCALL ContentOpfParser::characterData(void* userData, const XML_Char* s,
}
return;
}
if (self->state == IN_BOOK_PUBLISHER) {
self->publisher.append(s, std::min(static_cast<size_t>(len), MAX_DESCRIPTION_LENGTH - self->publisher.size()));
return;
}
if (self->state == IN_BOOK_DATE) {
self->date.append(s, std::min(static_cast<size_t>(len), MAX_DESCRIPTION_LENGTH - self->date.size()));
return;
}
if (self->state == IN_BOOK_SUBJECT) {
const size_t remaining = MAX_DESCRIPTION_LENGTH - self->subjects.size();
if (remaining > 0) {
self->subjects.append(s, std::min(static_cast<size_t>(len), remaining));
}
return;
}
if (self->state == IN_BOOK_RIGHTS) {
self->rights.append(s, std::min(static_cast<size_t>(len), MAX_DESCRIPTION_LENGTH - self->rights.size()));
return;
}
if (self->state == IN_BOOK_CONTRIBUTOR) {
const size_t remaining = MAX_DESCRIPTION_LENGTH - self->contributor.size();
if (remaining > 0) {
self->contributor.append(s, std::min(static_cast<size_t>(len), remaining));
}
return;
}
if (self->state == IN_BOOK_IDENTIFIER) {
self->identifier.append(s, std::min(static_cast<size_t>(len), MAX_DESCRIPTION_LENGTH - self->identifier.size()));
return;
}
}
void XMLCALL ContentOpfParser::endElement(void* userData, const XML_Char* name) {
@@ -496,6 +587,40 @@ void XMLCALL ContentOpfParser::endElement(void* userData, const XML_Char* name)
return;
}
if (self->state == IN_BOOK_PUBLISHER && strcmp(name, "dc:publisher") == 0) {
self->publisher = trim(self->publisher);
self->state = IN_METADATA;
return;
}
if (self->state == IN_BOOK_DATE && strcmp(name, "dc:date") == 0) {
self->date = trim(self->date);
self->state = IN_METADATA;
return;
}
if (self->state == IN_BOOK_SUBJECT && strcmp(name, "dc:subject") == 0) {
self->state = IN_METADATA;
return;
}
if (self->state == IN_BOOK_RIGHTS && strcmp(name, "dc:rights") == 0) {
self->rights = trim(self->rights);
self->state = IN_METADATA;
return;
}
if (self->state == IN_BOOK_CONTRIBUTOR && strcmp(name, "dc:contributor") == 0) {
self->state = IN_METADATA;
return;
}
if (self->state == IN_BOOK_IDENTIFIER && strcmp(name, "dc:identifier") == 0) {
self->identifier = trim(self->identifier);
self->state = IN_METADATA;
return;
}
if (self->state == IN_METADATA && (strcmp(name, "metadata") == 0 || strcmp(name, "opf:metadata") == 0)) {
self->state = IN_PACKAGE;
return;

View File

@@ -20,6 +20,12 @@ class ContentOpfParser final : public Print {
IN_BOOK_DESCRIPTION,
IN_BOOK_SERIES,
IN_BOOK_SERIES_INDEX,
IN_BOOK_PUBLISHER,
IN_BOOK_DATE,
IN_BOOK_SUBJECT,
IN_BOOK_RIGHTS,
IN_BOOK_CONTRIBUTOR,
IN_BOOK_IDENTIFIER,
IN_MANIFEST,
IN_SPINE,
IN_GUIDE,
@@ -33,6 +39,7 @@ class ContentOpfParser final : public Print {
BookMetadataCache* cache;
FsFile tempItemStore;
std::string coverItemId;
bool identifierIsIsbn = false;
// Index for fast idref→href lookup (used only for large EPUBs)
struct ItemIndexEntry {
@@ -66,6 +73,13 @@ class ContentOpfParser final : public Print {
std::string series;
std::string seriesIndex;
std::string description;
std::string publisher;
std::string date;
std::string subjects;
std::string rights;
std::string contributor;
std::string identifier;
std::string rating;
std::string tocNcxPath;
std::string tocNavPath; // EPUB 3 nav document path
std::string coverItemHref;

View File

@@ -452,6 +452,13 @@ enum class StrId : uint16_t {
STR_SERIES,
STR_FILE_SIZE,
STR_DESCRIPTION,
STR_PUBLISHER,
STR_DATE,
STR_SUBJECTS,
STR_RATING,
STR_ISBN,
STR_RIGHTS,
STR_CONTRIBUTOR,
STR_MANAGE,
STR_INFO,
STR_ARCHIVE_BOOK,

View File

@@ -399,6 +399,13 @@ STR_AUTHOR: "Author"
STR_SERIES: "Series"
STR_FILE_SIZE: "File Size"
STR_DESCRIPTION: "Description"
STR_PUBLISHER: "Publisher"
STR_DATE: "Date"
STR_SUBJECTS: "Subjects"
STR_RATING: "Rating"
STR_ISBN: "ISBN"
STR_RIGHTS: "Rights"
STR_CONTRIBUTOR: "Contributor"
STR_MANAGE: "Manage"
STR_INFO: "Info"
STR_ARCHIVE_BOOK: "Archive Book"