2026-04-06 15:17:21 -04:00
|
|
|
import json
|
|
|
|
|
import os
|
|
|
|
|
from datetime import datetime, date
|
|
|
|
|
from PIL import Image as PILImage
|
|
|
|
|
|
|
|
|
|
from src.models import Article, Image
|
2026-04-06 17:04:40 -04:00
|
|
|
from src.epub_builder import _cleanup_html, build_epub
|
2026-04-06 15:17:21 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def _create_test_image(path):
|
|
|
|
|
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
|
|
|
img = PILImage.new("RGB", (800, 450), color="green")
|
|
|
|
|
img.save(path, format="JPEG")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_build_epub_creates_file(app, db, tmp_path):
|
|
|
|
|
with app.app_context():
|
|
|
|
|
img_path = str(tmp_path / "images" / "abc123.jpg")
|
|
|
|
|
_create_test_image(img_path)
|
|
|
|
|
|
|
|
|
|
a1 = Article(
|
|
|
|
|
guid="g1", title="First Article", author="Author A",
|
|
|
|
|
pub_date=datetime(2026, 4, 6, 10, 0),
|
|
|
|
|
categories=json.dumps(["Government"]),
|
|
|
|
|
link="http://example.com/1",
|
|
|
|
|
content_html=f'<p>Content one.</p><img src="{img_path}" />',
|
|
|
|
|
)
|
|
|
|
|
a2 = Article(
|
|
|
|
|
guid="g2", title="Second Article", author="Author B",
|
|
|
|
|
pub_date=datetime(2026, 4, 7, 10, 0),
|
|
|
|
|
categories=json.dumps(["Culture Calendar"]),
|
|
|
|
|
link="http://example.com/2",
|
|
|
|
|
content_html="<p>Content two.</p>",
|
|
|
|
|
)
|
|
|
|
|
db.session.add_all([a1, a2])
|
|
|
|
|
db.session.flush()
|
|
|
|
|
|
|
|
|
|
img_record = Image(
|
|
|
|
|
article_id=a1.id, original_url="https://example.com/photo.jpg",
|
|
|
|
|
local_path=img_path, width=800, height=450,
|
|
|
|
|
)
|
|
|
|
|
db.session.add(img_record)
|
|
|
|
|
db.session.commit()
|
|
|
|
|
|
|
|
|
|
cover_img = PILImage.new("RGB", (800, 480), color="white")
|
|
|
|
|
cover_path = str(tmp_path / "cover.jpg")
|
|
|
|
|
cover_img.save(cover_path, format="JPEG")
|
|
|
|
|
|
|
|
|
|
output_dir = str(tmp_path / "issues")
|
|
|
|
|
epub_path = build_epub(
|
|
|
|
|
week_start=date(2026, 4, 6),
|
|
|
|
|
week_end=date(2026, 4, 12),
|
|
|
|
|
article_ids=[a1.id, a2.id],
|
|
|
|
|
cover_path=cover_path,
|
|
|
|
|
output_dir=output_dir,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
assert os.path.exists(epub_path)
|
|
|
|
|
assert epub_path.endswith(".epub")
|
|
|
|
|
assert os.path.getsize(epub_path) > 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_build_epub_respects_article_order(app, db, tmp_path):
|
|
|
|
|
with app.app_context():
|
|
|
|
|
a1 = Article(
|
|
|
|
|
guid="g1", title="Later Article", author="A",
|
|
|
|
|
pub_date=datetime(2026, 4, 8, 10, 0),
|
|
|
|
|
categories="[]", link="http://a", content_html="<p>Later</p>",
|
|
|
|
|
)
|
|
|
|
|
a2 = Article(
|
|
|
|
|
guid="g2", title="Earlier Article", author="B",
|
|
|
|
|
pub_date=datetime(2026, 4, 6, 10, 0),
|
|
|
|
|
categories="[]", link="http://b", content_html="<p>Earlier</p>",
|
|
|
|
|
)
|
|
|
|
|
db.session.add_all([a1, a2])
|
|
|
|
|
db.session.commit()
|
|
|
|
|
|
|
|
|
|
cover_path = str(tmp_path / "cover.jpg")
|
|
|
|
|
PILImage.new("RGB", (800, 480)).save(cover_path, format="JPEG")
|
|
|
|
|
|
|
|
|
|
epub_path = build_epub(
|
|
|
|
|
week_start=date(2026, 4, 6),
|
|
|
|
|
week_end=date(2026, 4, 12),
|
|
|
|
|
article_ids=[a1.id, a2.id],
|
|
|
|
|
cover_path=cover_path,
|
|
|
|
|
output_dir=str(tmp_path / "issues"),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
from ebooklib import epub as epublib
|
|
|
|
|
book = epublib.read_epub(epub_path)
|
|
|
|
|
spine_items = [book.get_item_with_id(item_id)
|
|
|
|
|
for item_id, _ in book.spine if item_id != "nav"]
|
|
|
|
|
titles = []
|
|
|
|
|
for item in spine_items:
|
|
|
|
|
if item and b"<h1>" in item.get_content():
|
|
|
|
|
content = item.get_content().decode("utf-8")
|
|
|
|
|
start = content.index("<h1>") + 4
|
|
|
|
|
end = content.index("</h1>")
|
|
|
|
|
titles.append(content[start:end])
|
|
|
|
|
|
|
|
|
|
assert titles[0] == "Earlier Article"
|
|
|
|
|
assert titles[1] == "Later Article"
|
2026-04-06 17:04:40 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_cleanup_removes_empty_paragraphs():
|
|
|
|
|
html = '<p>Real content.</p><p></p><p> </p><p> </p><p><br></p><p>More.</p>'
|
|
|
|
|
result = _cleanup_html(html)
|
|
|
|
|
assert "<p>Real content.</p>" in result
|
|
|
|
|
assert "<p>More.</p>" in result
|
|
|
|
|
assert result.count("<p") == 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_cleanup_removes_br_near_images():
|
|
|
|
|
html = '<p>Text</p><br><img src="test.jpg"><br/><p>More</p>'
|
|
|
|
|
result = _cleanup_html(html)
|
|
|
|
|
assert "<br" not in result
|
|
|
|
|
assert '<img src="test.jpg"' in result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_cleanup_collapses_image_wrappers():
|
|
|
|
|
html = '<div><figure><img src="test.jpg"></figure></div>'
|
|
|
|
|
result = _cleanup_html(html)
|
|
|
|
|
assert '<img src="test.jpg"' in result
|
|
|
|
|
assert "<figure" not in result or "<figcaption" in result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_cleanup_preserves_figcaption():
|
|
|
|
|
html = '<figure><img src="test.jpg"><figcaption>Caption</figcaption></figure>'
|
|
|
|
|
result = _cleanup_html(html)
|
|
|
|
|
assert '<img src="test.jpg"' in result
|
|
|
|
|
assert "Caption" in result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_cleanup_removes_empty_divs():
|
|
|
|
|
html = '<p>Content</p><div> </div><div></div><p>More</p>'
|
|
|
|
|
result = _cleanup_html(html)
|
|
|
|
|
assert result.count("<div") == 0
|