diff --git a/src/epub_builder.py b/src/epub_builder.py new file mode 100644 index 0000000..237c6e7 --- /dev/null +++ b/src/epub_builder.py @@ -0,0 +1,118 @@ +import json +import os +from datetime import date + +from ebooklib import epub + +from src.models import Article, Image + +EPUB_CSS = """ +body { font-family: serif; margin: 1em; line-height: 1.5; } +h1 { font-size: 1.4em; margin-bottom: 0.3em; } +.byline { font-size: 0.85em; color: #555; margin-bottom: 0.5em; } +.categories { font-size: 0.8em; color: #777; margin-bottom: 1em; } +img { max-width: 100%; display: block; margin: 0.5em auto; } +figcaption { font-size: 0.8em; text-align: center; color: #555; } +""" + + +def build_epub( + week_start: date, + week_end: date, + article_ids: list[int], + cover_path: str, + output_dir: str, +) -> str: + os.makedirs(output_dir, exist_ok=True) + + articles = ( + Article.query + .filter(Article.id.in_(article_ids)) + .order_by(Article.pub_date.asc()) + .all() + ) + + title = ( + f"Plymouth Independent \u2014 " + f"Week of {week_start.strftime('%b %d')}\u2013{week_end.strftime('%b %d, %Y')}" + ) + + book = epub.EpubBook() + book.set_identifier(f"pi-{week_start.isoformat()}") + book.set_title(title) + book.set_language("en") + book.add_author("Plymouth Independent") + + with open(cover_path, "rb") as f: + book.set_cover("cover.jpg", f.read()) + + style = epub.EpubItem( + uid="style", file_name="style/default.css", + media_type="text/css", content=EPUB_CSS.encode("utf-8"), + ) + book.add_item(style) + + chapters = [] + image_counter = 0 + + for article in articles: + categories = json.loads(article.categories) + cat_str = ", ".join(categories) if categories else "" + + chapter_html = f"

{article.title}

\n" + chapter_html += ( + f'

{article.author} \u00b7 ' + f'{article.pub_date.strftime("%B %d, %Y")}

\n' + ) + if cat_str: + chapter_html += f'

{cat_str}

\n' + + content = article.content_html + article_images = Image.query.filter_by(article_id=article.id).all() + + for img_record in article_images: + if not os.path.exists(img_record.local_path): + continue + + image_counter += 1 + epub_img_name = f"images/img_{image_counter}.jpg" + + with open(img_record.local_path, "rb") as f: + img_data = f.read() + + epub_img = epub.EpubItem( + uid=f"img_{image_counter}", + file_name=epub_img_name, + media_type="image/jpeg", + content=img_data, + ) + book.add_item(epub_img) + content = content.replace(img_record.local_path, epub_img_name) + + chapter_html += content + + chapter = epub.EpubHtml( + title=article.title, + file_name=f"chapter_{article.id}.xhtml", + lang="en", + ) + chapter.set_content( + f'' + f"{chapter_html}" + ) + chapter.add_item(style) + chapters.append(chapter) + book.add_item(chapter) + + book.toc = [(c, []) for c in chapters] + book.add_item(epub.EpubNcx()) + book.add_item(epub.EpubNav()) + + book.spine = ["nav"] + chapters + + iso_week = week_start.isocalendar()[1] + filename = f"plymouth-independent-{week_start.year}-W{iso_week:02d}.epub" + epub_path = os.path.join(output_dir, filename) + epub.write_epub(epub_path, book) + + return epub_path diff --git a/tests/test_epub_builder.py b/tests/test_epub_builder.py new file mode 100644 index 0000000..8a8b5e1 --- /dev/null +++ b/tests/test_epub_builder.py @@ -0,0 +1,102 @@ +import json +import os +from datetime import datetime, date +from PIL import Image as PILImage + +from src.models import Article, Image +from src.epub_builder import build_epub + + +def _create_test_image(path): + os.makedirs(os.path.dirname(path), exist_ok=True) + img = PILImage.new("RGB", (800, 450), color="green") + img.save(path, format="JPEG") + + +def test_build_epub_creates_file(app, db, tmp_path): + with app.app_context(): + img_path = str(tmp_path / "images" / "abc123.jpg") + _create_test_image(img_path) + + a1 = Article( + guid="g1", title="First Article", author="Author A", + pub_date=datetime(2026, 4, 6, 10, 0), + categories=json.dumps(["Government"]), + link="http://example.com/1", + content_html=f'

Content one.

', + ) + a2 = Article( + guid="g2", title="Second Article", author="Author B", + pub_date=datetime(2026, 4, 7, 10, 0), + categories=json.dumps(["Culture Calendar"]), + link="http://example.com/2", + content_html="

Content two.

", + ) + db.session.add_all([a1, a2]) + db.session.flush() + + img_record = Image( + article_id=a1.id, original_url="https://example.com/photo.jpg", + local_path=img_path, width=800, height=450, + ) + db.session.add(img_record) + db.session.commit() + + cover_img = PILImage.new("RGB", (800, 480), color="white") + cover_path = str(tmp_path / "cover.jpg") + cover_img.save(cover_path, format="JPEG") + + output_dir = str(tmp_path / "issues") + epub_path = build_epub( + week_start=date(2026, 4, 6), + week_end=date(2026, 4, 12), + article_ids=[a1.id, a2.id], + cover_path=cover_path, + output_dir=output_dir, + ) + + assert os.path.exists(epub_path) + assert epub_path.endswith(".epub") + assert os.path.getsize(epub_path) > 0 + + +def test_build_epub_respects_article_order(app, db, tmp_path): + with app.app_context(): + a1 = Article( + guid="g1", title="Later Article", author="A", + pub_date=datetime(2026, 4, 8, 10, 0), + categories="[]", link="http://a", content_html="

Later

", + ) + a2 = Article( + guid="g2", title="Earlier Article", author="B", + pub_date=datetime(2026, 4, 6, 10, 0), + categories="[]", link="http://b", content_html="

Earlier

", + ) + db.session.add_all([a1, a2]) + db.session.commit() + + cover_path = str(tmp_path / "cover.jpg") + PILImage.new("RGB", (800, 480)).save(cover_path, format="JPEG") + + epub_path = build_epub( + week_start=date(2026, 4, 6), + week_end=date(2026, 4, 12), + article_ids=[a1.id, a2.id], + cover_path=cover_path, + output_dir=str(tmp_path / "issues"), + ) + + from ebooklib import epub as epublib + book = epublib.read_epub(epub_path) + spine_items = [book.get_item_with_id(item_id) + for item_id, _ in book.spine if item_id != "nav"] + titles = [] + for item in spine_items: + if item and b"

" in item.get_content(): + content = item.get_content().decode("utf-8") + start = content.index("

") + 4 + end = content.index("

") + titles.append(content[start:end]) + + assert titles[0] == "Earlier Article" + assert titles[1] == "Later Article"