feat: full integration — app.py wiring, scheduler startup, route registration, README

- Wire blueprints and scheduler into create_app() - Add start_scheduler param to skip scheduler in tests - Fix Setting.get/set to use modern db.session.get() - Remove unused imports from conftest and models - Add README with quick start and usage guide Made-with: Cursor
2026-04-06 15:22:38 -04:00
parent ec9f31f072
commit 5c924f7dba
6 changed files with 3086 additions and 9 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,42 @@
+# PI Weekly Newspaper
+
+Generates weekly ePub "newspapers" from the [Plymouth Independent](https://www.plymouthindependent.org/) RSS feed, optimized for the Xtreink X4 e-reader (800x480 screen).
+
+## Quick Start
+
+```bash
+python -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
+python app.py
+```
+
+Open http://localhost:5000 in your browser.
+
+## Features
+
+- **Periodic RSS fetching** with configurable interval
+- **Automatic image processing** — downloads, resizes to e-reader constraints, converts to baseline JPEG
+- **ePub generation** with articles as chapters, table of contents, and embedded images
+- **AI-generated covers** via Pollinations.ai (free, no API key) with text fallback
+- **Web UI** accessible from any device on your network
+- **Scheduled or manual publishing**
+
+## Usage
+
+1. Click **Fetch Now** on the dashboard to pull articles
+2. Go to **Publish**, select the target week, toggle articles on/off
+3. Choose a cover method (AI or Text) and click **Generate Issue**
+4. Download the `.epub` from the **Issues** archive
+
+## Configuration
+
+Settings are editable via the web UI at `/settings`, or in `config.py`:
+
+- `FEED_URL` — RSS feed URL
+- `FETCH_INTERVAL_HOURS` — how often to check for new articles
+- `IMAGE_MAX_LANDSCAPE` / `IMAGE_MAX_PORTRAIT` — image bounding box dimensions
+
+## Access from Other Devices
+
+The app binds to `0.0.0.0:5000`, so access it from any device on your network using your Mac's IP address (e.g., `http://192.168.1.x:5000`).
--- a/app.py
+++ b/app.py
@@ -1,15 +1,19 @@
+import logging
 import os
+
 from flask import Flask
 from flask_sqlalchemy import SQLAlchemy
+
 import config

 db = SQLAlchemy()
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s")


-def create_app():
+def create_app(start_scheduler=True):
    app = Flask(__name__)
    app.config.from_object(config)
-    app.config["SECRET_KEY"] = os.urandom(24)
+    app.config["SECRET_KEY"] = os.environ.get("SECRET_KEY", os.urandom(24))

    os.makedirs(config.DATA_DIR, exist_ok=True)
    os.makedirs(config.IMAGES_DIR, exist_ok=True)
@@ -21,9 +25,18 @@ def create_app():
        from src import models  # noqa: F401
        db.create_all()

+        from src.routes import register_blueprints
+        register_blueprints(app)
+
+        if start_scheduler:
+            from src.scheduler import SchedulerManager
+            scheduler_mgr = SchedulerManager(app)
+            scheduler_mgr.start()
+            app.config["SCHEDULER_MANAGER"] = scheduler_mgr
+
    return app


 if __name__ == "__main__":
    app = create_app()
-    app.run(host="0.0.0.0", port=5000, debug=True)
+    app.run(host="0.0.0.0", port=5000, debug=False)
--- a/docs/superpowers/plans/2026-04-06-pi-weekly-newspaper.md
+++ b/docs/superpowers/plans/2026-04-06-pi-weekly-newspaper.md
--- a/docs/superpowers/specs/2026-04-06-pi-weekly-newspaper-design.md
+++ b/docs/superpowers/specs/2026-04-06-pi-weekly-newspaper-design.md
@@ -0,0 +1,251 @@
+# Plymouth Independent Weekly Newspaper — Design Spec
+
+## Goal
+
+Publish a weekly ePub "newspaper" containing articles from the Plymouth Independent RSS feed, optimized for reading on an Xtreink X4 e-reader.
+
+## Requirements Summary
+
+- **Output:** ePub with articles as chapters, chronological order (Monday–Sunday ISO weeks)
+- **Offline:** All images downloaded and embedded
+- **E-reader formatting:** Images fit within 800x480 (landscape) or 480x800 (portrait) bounding box, aspect ratio preserved, baseline JPEG
+- **Interface:** Self-hosted Python web app, accessible via browser from MacBook and Android phone on local network
+- **Pipeline:** Periodic RSS fetch/cache, then manual or scheduled compile-and-publish
+- **Cover:** AI-generated via Pollinations.ai (primary), programmatic text fallback, selectable at publish time
+- **Article selection:** All articles included by default; user can exclude specific ones via UI before publishing
+
+---
+
+## Architecture
+
+### Stack
+
+| Component | Choice | Rationale |
+|---|---|---|
+| Web framework | Flask + Jinja2 | Lightweight, single-process |
+| ORM / DB | Flask-SQLAlchemy + SQLite | Zero-config, single-file DB |
+| Scheduler | APScheduler (BackgroundScheduler) | In-process, no external dependencies |
+| RSS parsing | feedparser | Standard Python RSS library |
+| ePub generation | ebooklib | Mature ePub 3 library |
+| Image processing | Pillow | Resize, format conversion, text rendering |
+| HTML parsing | beautifulsoup4 | Extract images from article HTML |
+| HTTP | requests | Feed + image downloads |
+| AI cover | Pollinations.ai | Free, no API key, URL-based |
+| Frontend | Plain HTML + Pico CSS + vanilla JS | No build step, mobile-friendly |
+
+### Project Structure
+
+```
+pi-weekly-newspaper/
+├── app.py                  # Entry point: Flask app + APScheduler setup
+├── config.py               # Config (feed URL, check interval, image dims, etc.)
+├── requirements.txt
+├── src/
+│   ├── __init__.py
+│   ├── fetcher.py          # RSS fetch, parse, cache articles to DB
+│   ├── images.py           # Download images, resize, baseline JPEG conversion
+│   ├── epub_builder.py     # Assemble ePub from cached articles + images
+│   ├── cover.py            # Cover generation (Pollinations.ai + text fallback)
+│   ├── models.py           # SQLAlchemy models (Article, Image, Issue, Settings)
+│   └── scheduler.py        # APScheduler config, job management
+├── static/                 # CSS, JS for web UI
+├── templates/              # Jinja2 templates for web UI
+├── data/
+│   ├── newspaper.db        # SQLite database (created at runtime)
+│   ├── images/             # Downloaded/processed images (runtime)
+│   └── issues/             # Generated ePub files (runtime)
+└── README.md
+```
+
+### Data Flow
+
+1. **Fetch job** (periodic, default every 1 hour): RSS feed → parse → store new articles + metadata in SQLite → download & process images to `data/images/`
+2. **Publish action** (manual via UI, or auto-scheduled): query articles for target week → user reviews/excludes via UI → generate cover → assemble ePub → save to `data/issues/` → download link available
+
+---
+
+## Data Model
+
+### `articles`
+
+| Column | Type | Notes |
+|---|---|---|
+| `id` | INTEGER PK | Auto-increment |
+| `guid` | TEXT UNIQUE | RSS `<guid>`, deduplication key |
+| `title` | TEXT | Article title |
+| `author` | TEXT | `dc:creator` value |
+| `pub_date` | DATETIME | Publication timestamp |
+| `categories` | TEXT | JSON array of category strings |
+| `link` | TEXT | Original article URL |
+| `content_html` | TEXT | Full `content:encoded` HTML with local image refs |
+| `fetched_at` | DATETIME | When we cached it |
+
+### `images`
+
+| Column | Type | Notes |
+|---|---|---|
+| `id` | INTEGER PK | Auto-increment |
+| `article_id` | INTEGER FK | References `articles.id` |
+| `original_url` | TEXT | Source URL from the article HTML |
+| `local_path` | TEXT | Path to processed file in `data/images/` |
+| `width` | INTEGER | Final width after resize |
+| `height` | INTEGER | Final height after resize |
+
+### `issues`
+
+| Column | Type | Notes |
+|---|---|---|
+| `id` | INTEGER PK | Auto-increment |
+| `week_start` | DATE | Monday of the ISO week |
+| `week_end` | DATE | Sunday of the ISO week |
+| `cover_method` | TEXT | `"ai"` or `"text"` |
+| `cover_path` | TEXT | Path to cover image |
+| `epub_path` | TEXT | Path to generated `.epub` |
+| `article_ids` | TEXT | JSON array of included article IDs |
+| `excluded_article_ids` | TEXT | JSON array of excluded article IDs |
+| `created_at` | DATETIME | When the issue was generated |
+| `status` | TEXT | `"draft"` / `"published"` |
+
+### `settings`
+
+| Column | Type | Notes |
+|---|---|---|
+| `key` | TEXT PK | Setting name |
+| `value` | TEXT | JSON-encoded value |
+
+Used for: feed URL, fetch interval, auto-publish config, image constraints. Read on startup to restore scheduler state.
+
+---
+
+## Module Details
+
+### `fetcher.py` — RSS Fetch & Article Caching
+
+1. Fetch RSS feed via `feedparser` + `requests`
+2. Deduplicate by `guid` — skip articles already in DB
+3. Parse each new `<item>`: title, author, pub_date, categories, link, content_html
+4. **Save article record to SQLite first** (to obtain `article_id`)
+5. Extract image URLs from `content:encoded` HTML using `BeautifulSoup` with `html.parser`
+6. Download & process each image via `images.py` — store to `data/images/{url_hash}.jpg` (deduped by URL hash across all articles)
+7. Create `images` DB records linking `article_id` to each processed image
+8. Rewrite `<img src>` attributes in stored `content_html` to point to local paths
+9. Update the article record with the rewritten `content_html`
+
+**Edge cases:**
+- Feed unavailable: log warning, retry next cycle, no crash
+- Duplicate images across articles (same URL): download once, reference by URL hash
+- Images that 404: log warning, skip image, article still included
+- Malformed HTML: `BeautifulSoup` with `html.parser` is tolerant
+
+### `images.py` — Image Processing
+
+1. Download image from URL via `requests`
+2. Check if `data/images/{url_hash}.jpg` already exists — if so, return cached path (dedup)
+3. Open with Pillow
+4. Determine orientation: if width >= height → landscape bounding box (800x480), else portrait (480x800)
+5. Resize to fit within bounding box, preserving aspect ratio:
+   - If image is **larger** than the box: use `Image.thumbnail()` to scale down
+   - If image is **smaller** than the box: use `Image.resize()` with `LANCZOS` to scale up, so it renders at a reasonable size on the e-reader
+6. Save as baseline JPEG (`progressive=False`)
+7. Return local path and final dimensions
+
+### `epub_builder.py` — ePub Assembly
+
+1. Query articles for target ISO week (Monday–Sunday), minus excluded ones
+2. Sort chronologically by `pub_date`
+3. Build ePub structure with `ebooklib`:
+   - **Metadata:** title ("Plymouth Independent — Week of Apr 7–13, 2026"), language (en)
+   - **Cover:** generated JPEG as ePub cover image
+   - **Table of Contents:** article titles linked to chapters
+   - **Chapters:** one per article, chronological
+4. Each chapter:
+   - `<h1>` article title
+   - Author/date byline, category tags
+   - Article HTML with image `src` rewritten to ePub-internal references
+   - All referenced images embedded as ePub items
+5. Stylesheet: minimal CSS for e-ink — no colors, high contrast, images `max-width: 100%; display: block`
+6. Output: `data/issues/plymouth-independent-2026-W15.epub`
+
+### `cover.py` — Cover Generation
+
+**AI mode (Pollinations.ai):**
+1. Build a prompt from the week's top headlines: "Newspaper front page illustration for Plymouth Massachusetts local news, featuring: [top 3 titles], classic newspaper style"
+2. Fetch from `https://image.pollinations.ai/prompt/{encoded_prompt}?width=800&height=480`
+3. Resize/fit to 800x480 bounding box, baseline JPEG
+4. Overlay masthead text ("Plymouth Independent") and date range using Pillow `ImageDraw`
+
+**Text fallback mode:**
+1. Create 800x480 Pillow image with white background
+2. Draw bold "Plymouth Independent" masthead
+3. Date range subtitle
+4. List top article headlines
+5. Save as baseline JPEG
+
+Both modes produce a single baseline JPEG within e-reader constraints.
+
+### `scheduler.py` — Background Scheduling
+
+- APScheduler `BackgroundScheduler`, started on app launch
+- Two jobs:
+  1. **RSS fetch:** `IntervalTrigger`, default every 1 hour
+  2. **Auto-publish** (optional): `CronTrigger`, configurable day/time
+- Schedule config persisted to `settings` table in SQLite
+- On startup: read settings from DB, restore scheduler jobs
+- Web UI can pause/resume/reconfigure jobs live
+
+---
+
+## Web UI
+
+Five views, all server-rendered with Jinja2. Responsive layout via Pico CSS.
+
+### Dashboard (`/`)
+- Scheduler status (running/paused, next fetch, interval)
+- Quick stats: articles this week, total cached, latest issue
+- Buttons: "Fetch Now", "New Issue"
+
+### Articles (`/articles`)
+- Table of cached articles, filterable by week and category
+- Columns: title, author, date, categories, thumbnail
+- When preparing an issue: checkboxes for include/exclude
+
+### Publish (`/publish`)
+- Select target week (defaults to current ISO week)
+- Article list with include/exclude toggles (all on by default)
+- Cover method picker: "AI Cover" / "Text Cover"
+- "Generate Issue" button
+- Progress: synchronous POST request with a CSS spinner overlay; generation typically takes 5–15 seconds (dominated by Pollinations.ai round-trip if using AI cover)
+- On completion: page reloads with download link and cover preview
+
+### Settings (`/settings`)
+- RSS feed URL
+- Fetch interval (hours)
+- Auto-publish: toggle + day/time + default cover method
+- Image resize constraints
+
+### Issues Archive (`/issues`)
+- List of past issues: date range, article count, cover thumbnail
+- Download link per issue
+- "Regenerate" button
+
+---
+
+## Error Handling
+
+| Scenario | Behavior |
+|---|---|
+| RSS feed down | Log warning, skip cycle, retry next interval |
+| Image download fails | Log warning, skip image, include article without it |
+| Pollinations.ai fails | Log error, fall back to text cover automatically |
+| ePub generation fails | Show error in UI with details, don't save partial issue |
+| DB locked (concurrent access) | SQLite WAL mode for better concurrency; scheduler and web requests share the same process |
+
+---
+
+## Future Enhancements (Out of Scope for V1)
+
+- Full web scraping of article pages for richer content
+- Email delivery of issues
+- Multiple RSS feed support
+- Reading progress tracking
+- Dark mode cover variants
--- a/src/models.py
+++ b/src/models.py
@@ -1,5 +1,5 @@
 import json
-from datetime import datetime, date, timezone
+from datetime import datetime, timezone
 from app import db


@@ -58,14 +58,14 @@ class Setting(db.Model):

    @staticmethod
    def get(key, default=None):
-        row = Setting.query.get(key)
+        row = db.session.get(Setting, key)
        if row is None:
            return default
        return json.loads(row.value)

    @staticmethod
    def set(key, value):
-        row = Setting.query.get(key)
+        row = db.session.get(Setting, key)
        if row is None:
            row = Setting(key=key, value=json.dumps(value))
            db.session.add(row)
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,5 +1,3 @@
-import os
-import tempfile
 import pytest
 from app import create_app, db as _db
 import config
@@ -12,7 +10,7 @@ def app(tmp_path):
    config.ISSUES_DIR = str(tmp_path / "data" / "issues")
    config.SQLALCHEMY_DATABASE_URI = f"sqlite:///{tmp_path / 'test.db'}"

-    app = create_app()
+    app = create_app(start_scheduler=False)
    app.config["TESTING"] = True

    with app.app_context():