fix: tighten ePub image spacing — CSS margins, HTML cleanup for empty tags and wrappers

Made-with: Cursor
This commit is contained in:
cottongin
2026-04-06 17:04:40 -04:00
parent 49acf09aa1
commit 807ab8610d
2 changed files with 81 additions and 3 deletions

View File

@@ -4,7 +4,7 @@ from datetime import datetime, date
from PIL import Image as PILImage
from src.models import Article, Image
from src.epub_builder import build_epub
from src.epub_builder import _cleanup_html, build_epub
def _create_test_image(path):
@@ -100,3 +100,38 @@ def test_build_epub_respects_article_order(app, db, tmp_path):
assert titles[0] == "Earlier Article"
assert titles[1] == "Later Article"
def test_cleanup_removes_empty_paragraphs():
html = '<p>Real content.</p><p></p><p>&nbsp;</p><p> </p><p><br></p><p>More.</p>'
result = _cleanup_html(html)
assert "<p>Real content.</p>" in result
assert "<p>More.</p>" in result
assert result.count("<p") == 2
def test_cleanup_removes_br_near_images():
html = '<p>Text</p><br><img src="test.jpg"><br/><p>More</p>'
result = _cleanup_html(html)
assert "<br" not in result
assert '<img src="test.jpg"' in result
def test_cleanup_collapses_image_wrappers():
html = '<div><figure><img src="test.jpg"></figure></div>'
result = _cleanup_html(html)
assert '<img src="test.jpg"' in result
assert "<figure" not in result or "<figcaption" in result
def test_cleanup_preserves_figcaption():
html = '<figure><img src="test.jpg"><figcaption>Caption</figcaption></figure>'
result = _cleanup_html(html)
assert '<img src="test.jpg"' in result
assert "Caption" in result
def test_cleanup_removes_empty_divs():
html = '<p>Content</p><div> </div><div></div><p>More</p>'
result = _cleanup_html(html)
assert result.count("<div") == 0