feat: ePub builder with chapters, images, TOC, cover
Made-with: Cursor
This commit is contained in:
118
src/epub_builder.py
Normal file
118
src/epub_builder.py
Normal file
@@ -0,0 +1,118 @@
|
||||
import json
|
||||
import os
|
||||
from datetime import date
|
||||
|
||||
from ebooklib import epub
|
||||
|
||||
from src.models import Article, Image
|
||||
|
||||
EPUB_CSS = """
|
||||
body { font-family: serif; margin: 1em; line-height: 1.5; }
|
||||
h1 { font-size: 1.4em; margin-bottom: 0.3em; }
|
||||
.byline { font-size: 0.85em; color: #555; margin-bottom: 0.5em; }
|
||||
.categories { font-size: 0.8em; color: #777; margin-bottom: 1em; }
|
||||
img { max-width: 100%; display: block; margin: 0.5em auto; }
|
||||
figcaption { font-size: 0.8em; text-align: center; color: #555; }
|
||||
"""
|
||||
|
||||
|
||||
def build_epub(
|
||||
week_start: date,
|
||||
week_end: date,
|
||||
article_ids: list[int],
|
||||
cover_path: str,
|
||||
output_dir: str,
|
||||
) -> str:
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
articles = (
|
||||
Article.query
|
||||
.filter(Article.id.in_(article_ids))
|
||||
.order_by(Article.pub_date.asc())
|
||||
.all()
|
||||
)
|
||||
|
||||
title = (
|
||||
f"Plymouth Independent \u2014 "
|
||||
f"Week of {week_start.strftime('%b %d')}\u2013{week_end.strftime('%b %d, %Y')}"
|
||||
)
|
||||
|
||||
book = epub.EpubBook()
|
||||
book.set_identifier(f"pi-{week_start.isoformat()}")
|
||||
book.set_title(title)
|
||||
book.set_language("en")
|
||||
book.add_author("Plymouth Independent")
|
||||
|
||||
with open(cover_path, "rb") as f:
|
||||
book.set_cover("cover.jpg", f.read())
|
||||
|
||||
style = epub.EpubItem(
|
||||
uid="style", file_name="style/default.css",
|
||||
media_type="text/css", content=EPUB_CSS.encode("utf-8"),
|
||||
)
|
||||
book.add_item(style)
|
||||
|
||||
chapters = []
|
||||
image_counter = 0
|
||||
|
||||
for article in articles:
|
||||
categories = json.loads(article.categories)
|
||||
cat_str = ", ".join(categories) if categories else ""
|
||||
|
||||
chapter_html = f"<h1>{article.title}</h1>\n"
|
||||
chapter_html += (
|
||||
f'<p class="byline">{article.author} \u00b7 '
|
||||
f'{article.pub_date.strftime("%B %d, %Y")}</p>\n'
|
||||
)
|
||||
if cat_str:
|
||||
chapter_html += f'<p class="categories">{cat_str}</p>\n'
|
||||
|
||||
content = article.content_html
|
||||
article_images = Image.query.filter_by(article_id=article.id).all()
|
||||
|
||||
for img_record in article_images:
|
||||
if not os.path.exists(img_record.local_path):
|
||||
continue
|
||||
|
||||
image_counter += 1
|
||||
epub_img_name = f"images/img_{image_counter}.jpg"
|
||||
|
||||
with open(img_record.local_path, "rb") as f:
|
||||
img_data = f.read()
|
||||
|
||||
epub_img = epub.EpubItem(
|
||||
uid=f"img_{image_counter}",
|
||||
file_name=epub_img_name,
|
||||
media_type="image/jpeg",
|
||||
content=img_data,
|
||||
)
|
||||
book.add_item(epub_img)
|
||||
content = content.replace(img_record.local_path, epub_img_name)
|
||||
|
||||
chapter_html += content
|
||||
|
||||
chapter = epub.EpubHtml(
|
||||
title=article.title,
|
||||
file_name=f"chapter_{article.id}.xhtml",
|
||||
lang="en",
|
||||
)
|
||||
chapter.set_content(
|
||||
f'<html><head><link rel="stylesheet" href="style/default.css"/>'
|
||||
f"</head><body>{chapter_html}</body></html>"
|
||||
)
|
||||
chapter.add_item(style)
|
||||
chapters.append(chapter)
|
||||
book.add_item(chapter)
|
||||
|
||||
book.toc = [(c, []) for c in chapters]
|
||||
book.add_item(epub.EpubNcx())
|
||||
book.add_item(epub.EpubNav())
|
||||
|
||||
book.spine = ["nav"] + chapters
|
||||
|
||||
iso_week = week_start.isocalendar()[1]
|
||||
filename = f"plymouth-independent-{week_start.year}-W{iso_week:02d}.epub"
|
||||
epub_path = os.path.join(output_dir, filename)
|
||||
epub.write_epub(epub_path, book)
|
||||
|
||||
return epub_path
|
||||
102
tests/test_epub_builder.py
Normal file
102
tests/test_epub_builder.py
Normal file
@@ -0,0 +1,102 @@
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, date
|
||||
from PIL import Image as PILImage
|
||||
|
||||
from src.models import Article, Image
|
||||
from src.epub_builder import build_epub
|
||||
|
||||
|
||||
def _create_test_image(path):
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
img = PILImage.new("RGB", (800, 450), color="green")
|
||||
img.save(path, format="JPEG")
|
||||
|
||||
|
||||
def test_build_epub_creates_file(app, db, tmp_path):
|
||||
with app.app_context():
|
||||
img_path = str(tmp_path / "images" / "abc123.jpg")
|
||||
_create_test_image(img_path)
|
||||
|
||||
a1 = Article(
|
||||
guid="g1", title="First Article", author="Author A",
|
||||
pub_date=datetime(2026, 4, 6, 10, 0),
|
||||
categories=json.dumps(["Government"]),
|
||||
link="http://example.com/1",
|
||||
content_html=f'<p>Content one.</p><img src="{img_path}" />',
|
||||
)
|
||||
a2 = Article(
|
||||
guid="g2", title="Second Article", author="Author B",
|
||||
pub_date=datetime(2026, 4, 7, 10, 0),
|
||||
categories=json.dumps(["Culture Calendar"]),
|
||||
link="http://example.com/2",
|
||||
content_html="<p>Content two.</p>",
|
||||
)
|
||||
db.session.add_all([a1, a2])
|
||||
db.session.flush()
|
||||
|
||||
img_record = Image(
|
||||
article_id=a1.id, original_url="https://example.com/photo.jpg",
|
||||
local_path=img_path, width=800, height=450,
|
||||
)
|
||||
db.session.add(img_record)
|
||||
db.session.commit()
|
||||
|
||||
cover_img = PILImage.new("RGB", (800, 480), color="white")
|
||||
cover_path = str(tmp_path / "cover.jpg")
|
||||
cover_img.save(cover_path, format="JPEG")
|
||||
|
||||
output_dir = str(tmp_path / "issues")
|
||||
epub_path = build_epub(
|
||||
week_start=date(2026, 4, 6),
|
||||
week_end=date(2026, 4, 12),
|
||||
article_ids=[a1.id, a2.id],
|
||||
cover_path=cover_path,
|
||||
output_dir=output_dir,
|
||||
)
|
||||
|
||||
assert os.path.exists(epub_path)
|
||||
assert epub_path.endswith(".epub")
|
||||
assert os.path.getsize(epub_path) > 0
|
||||
|
||||
|
||||
def test_build_epub_respects_article_order(app, db, tmp_path):
|
||||
with app.app_context():
|
||||
a1 = Article(
|
||||
guid="g1", title="Later Article", author="A",
|
||||
pub_date=datetime(2026, 4, 8, 10, 0),
|
||||
categories="[]", link="http://a", content_html="<p>Later</p>",
|
||||
)
|
||||
a2 = Article(
|
||||
guid="g2", title="Earlier Article", author="B",
|
||||
pub_date=datetime(2026, 4, 6, 10, 0),
|
||||
categories="[]", link="http://b", content_html="<p>Earlier</p>",
|
||||
)
|
||||
db.session.add_all([a1, a2])
|
||||
db.session.commit()
|
||||
|
||||
cover_path = str(tmp_path / "cover.jpg")
|
||||
PILImage.new("RGB", (800, 480)).save(cover_path, format="JPEG")
|
||||
|
||||
epub_path = build_epub(
|
||||
week_start=date(2026, 4, 6),
|
||||
week_end=date(2026, 4, 12),
|
||||
article_ids=[a1.id, a2.id],
|
||||
cover_path=cover_path,
|
||||
output_dir=str(tmp_path / "issues"),
|
||||
)
|
||||
|
||||
from ebooklib import epub as epublib
|
||||
book = epublib.read_epub(epub_path)
|
||||
spine_items = [book.get_item_with_id(item_id)
|
||||
for item_id, _ in book.spine if item_id != "nav"]
|
||||
titles = []
|
||||
for item in spine_items:
|
||||
if item and b"<h1>" in item.get_content():
|
||||
content = item.get_content().decode("utf-8")
|
||||
start = content.index("<h1>") + 4
|
||||
end = content.index("</h1>")
|
||||
titles.append(content[start:end])
|
||||
|
||||
assert titles[0] == "Earlier Article"
|
||||
assert titles[1] == "Later Article"
|
||||
Reference in New Issue
Block a user