From cb3ae403cf7ac0483c1368bb4528b3807045bcf3 Mon Sep 17 00:00:00 2001 From: cottongin Date: Thu, 12 Mar 2026 02:09:15 -0400 Subject: [PATCH] feat: add historical backfill with --init CLI and episode numbering Adds a --init mode that seeds the database with past shows from a given anchor episode/date forward, batch-fetching likes from SoundCloud and partitioning them into weekly buckets. Episode numbers are tracked in the shows table and auto-incremented by the poller for new shows. Includes full API documentation (docs/api.md) and updated README. Made-with: Cursor --- README.md | 45 ++- chat-summaries/2026-03-12_00-00-summary.md | 39 +++ chat-summaries/2026-03-12_16-30-summary.md | 30 ++ docs/api.md | 310 +++++++++++++++++++++ src/ntr_fetcher/api.py | 3 + src/ntr_fetcher/backfill.py | 82 ++++++ src/ntr_fetcher/db.py | 49 +++- src/ntr_fetcher/main.py | 41 +++ src/ntr_fetcher/models.py | 1 + src/ntr_fetcher/poller.py | 7 + tests/test_api.py | 9 +- tests/test_backfill.py | 198 +++++++++++++ tests/test_db.py | 46 +++ tests/test_poller.py | 83 ++++++ 14 files changed, 922 insertions(+), 21 deletions(-) create mode 100644 chat-summaries/2026-03-12_00-00-summary.md create mode 100644 chat-summaries/2026-03-12_16-30-summary.md create mode 100644 docs/api.md create mode 100644 src/ntr_fetcher/backfill.py create mode 100644 tests/test_backfill.py diff --git a/README.md b/README.md index 38cec4c..b0f9262 100644 --- a/README.md +++ b/README.md @@ -14,16 +14,34 @@ ntr-fetcher The API starts at `http://127.0.0.1:8000`. +## Historical Backfill + +Seed the database with past shows by providing an anchor episode and its air date: + +```bash +NTR_ADMIN_TOKEN=token ntr-fetcher --init --show 521 --aired 2026-01-07 +``` + +This computes every weekly show from the anchor forward to today, batch-fetches +the corresponding likes from SoundCloud, and populates the database. Episode +numbers are assigned automatically (521, 522, ...). After backfill completes, +the normal server mode will auto-increment from the latest episode. + ## API -| Endpoint | Description | -|----------|-------------| -| `GET /playlist` | Current week's playlist | -| `GET /playlist/{n}` | Track at position n | -| `GET /shows` | List all shows | -| `GET /shows/{id}` | Specific show's playlist | -| `GET /health` | Service health check | -| `POST /admin/refresh` | Trigger SoundCloud fetch (token required) | +Full documentation: [`docs/api.md`](docs/api.md) + +| Endpoint | Method | Auth | Description | +|----------|--------|------|-------------| +| `/health` | GET | -- | Service health check | +| `/playlist` | GET | -- | Current week's playlist | +| `/playlist/{position}` | GET | -- | Single track by position (1-indexed) | +| `/shows` | GET | -- | List all shows (paginated) | +| `/shows/{show_id}` | GET | -- | Specific show with tracks | +| `/admin/refresh` | POST | Bearer | Trigger immediate SoundCloud fetch | +| `/admin/tracks` | POST | Bearer | Add track to current show | +| `/admin/tracks/{track_id}` | DELETE | Bearer | Remove track from current show | +| `/admin/tracks/{track_id}/position` | PUT | Bearer | Move track to new position | ## Configuration @@ -33,14 +51,17 @@ Environment variables (prefix `NTR_`): |----------|---------|-------------| | `NTR_PORT` | `8000` | API port | | `NTR_HOST` | `127.0.0.1` | Bind address | -| `NTR_DB_PATH` | `./ntr_fetcher.db` | SQLite path | -| `NTR_POLL_INTERVAL_SECONDS` | `3600` | Poll frequency | -| `NTR_ADMIN_TOKEN` | (required) | Admin bearer token | -| `NTR_SOUNDCLOUD_USER` | `nicktherat` | SoundCloud user | +| `NTR_DB_PATH` | `./ntr_fetcher.db` | SQLite database path | +| `NTR_POLL_INTERVAL_SECONDS` | `3600` | How often to check SoundCloud (seconds) | +| `NTR_ADMIN_TOKEN` | *(required)* | Bearer token for admin endpoints | +| `NTR_SOUNDCLOUD_USER` | `nicktherat` | SoundCloud username to track | +| `NTR_SHOW_DAY` | `2` | Day of week for show (0=Mon, 2=Wed) | +| `NTR_SHOW_HOUR` | `22` | Hour (Eastern Time) when the show starts | ## Development ```bash pip install -e ".[dev]" pytest +ruff check src/ tests/ ``` diff --git a/chat-summaries/2026-03-12_00-00-summary.md b/chat-summaries/2026-03-12_00-00-summary.md new file mode 100644 index 0000000..a3780de --- /dev/null +++ b/chat-summaries/2026-03-12_00-00-summary.md @@ -0,0 +1,39 @@ +# NtR SoundCloud Fetcher — Full Implementation + +## Task Description + +Designed and implemented a Python service that polls NicktheRat's SoundCloud likes, builds weekly playlists aligned to the Wednesday 22:00 ET show schedule, and serves them via a JSON API for an IRC bot. + +## Changes Made + +### Design Phase +- Brainstormed requirements through 6 clarifying questions +- Evaluated 3 architectural approaches, selected single-process daemon +- Produced design doc covering architecture, data model, API, poller logic +- Produced 13-task TDD implementation plan + +### Implementation (42 tests, all passing, lint clean) + +| Module | File | Purpose | +|--------|------|---------| +| Config | `src/ntr_fetcher/config.py` | Pydantic settings with `NTR_` env prefix | +| Week | `src/ntr_fetcher/week.py` | DST-aware Wednesday 22:00 ET boundary computation | +| Models | `src/ntr_fetcher/models.py` | Track, Show, ShowTrack dataclasses | +| Database | `src/ntr_fetcher/db.py` | SQLite schema, CRUD, track sync with unlike removal | +| SoundCloud | `src/ntr_fetcher/soundcloud.py` | client_id extraction, user resolution, likes fetching | +| Poller | `src/ntr_fetcher/poller.py` | Hourly polling with supervised restart | +| API | `src/ntr_fetcher/api.py` | FastAPI routes for playlist, shows, admin, health | +| Main | `src/ntr_fetcher/main.py` | Entry point wiring everything together | + +### Key Design Decisions +- Tracks removed when Nick unlikes them (positions re-compact) +- Cursor-seeking for efficient SoundCloud API pagination +- Automatic client_id rotation on 401 +- Supervisor restarts poller on failure without affecting API + +## Follow-up Items + +- **Incremental fetching**: Currently fetches full week every poll; could optimize to stop at known tracks +- **Retry/backoff for non-401 errors**: 429, 5xx, timeouts not yet handled with retries +- **`full` parameter**: Accepted but currently equivalent to normal poll (no incremental to differentiate from) +- **`soundcloud_url` in admin add track**: Removed from API; only `track_id` supported diff --git a/chat-summaries/2026-03-12_16-30-summary.md b/chat-summaries/2026-03-12_16-30-summary.md new file mode 100644 index 0000000..fea7d0a --- /dev/null +++ b/chat-summaries/2026-03-12_16-30-summary.md @@ -0,0 +1,30 @@ +# Historical Backfill (--init) Feature + +## Task +Add CLI-based historical show backfill with episode numbering throughout the system. + +## Changes Made + +### New file +- `src/ntr_fetcher/backfill.py` — Computes show weeks from an anchor episode/date, batch-fetches all likes from SoundCloud, partitions them into weekly buckets, and populates the DB. + +### Modified files +- `src/ntr_fetcher/models.py` — Added `episode_number: int | None` to `Show` dataclass. +- `src/ntr_fetcher/db.py` — Added `episode_number` column to schema, ALTER TABLE migration for existing DBs, updated `get_or_create_show` to accept/store episode numbers, added `get_latest_episode_number()` and `update_show_episode_number()`, changed `list_shows` ordering to `week_start DESC`. +- `src/ntr_fetcher/main.py` — Added `argparse` with `--init`, `--show`, `--aired` flags. `--init` runs backfill then exits; default starts the server as before. +- `src/ntr_fetcher/poller.py` — Auto-assigns episode number (latest + 1) when creating a new show if historical data exists. +- `src/ntr_fetcher/api.py` — Added `episode_number` to `/playlist`, `/shows`, `/shows/{show_id}` responses. + +### New/updated tests +- `tests/test_backfill.py` — Week computation, batch partitioning, empty data, idempotency. +- `tests/test_db.py` — Episode number creation, update, and `get_latest_episode_number`. +- `tests/test_poller.py` — Auto-numbering when history exists, skips when no history, skips when already assigned. +- `tests/test_api.py` — `episode_number` present in show responses. + +## Results +- 58 tests passing (up from 42), ruff clean. + +## Usage +``` +NTR_ADMIN_TOKEN=token ntr-fetcher --init --show 521 --aired 2026-01-07 +``` diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 0000000..31a83f0 --- /dev/null +++ b/docs/api.md @@ -0,0 +1,310 @@ +# NtR SoundCloud Fetcher -- API Reference + +Base URL: `http://127.0.0.1:8000` (configurable via `NTR_HOST` / `NTR_PORT`) + +--- + +## Public Endpoints + +### `GET /health` + +Service health check. + +**Response** + +```json +{ + "status": "ok", + "poller_alive": true, + "last_fetch": "2026-03-12T02:00:00+00:00", + "current_week_track_count": 9 +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `status` | string | Always `"ok"` | +| `poller_alive` | boolean | Whether the background poller is running | +| `last_fetch` | string \| null | ISO 8601 timestamp of last successful poll, or `null` if never | +| `current_week_track_count` | integer | Number of tracks in the current week's playlist | + +--- + +### `GET /playlist` + +Returns the current week's full playlist. + +**Response** + +```json +{ + "show_id": 10, + "episode_number": 530, + "week_start": "2026-03-05T02:00:00+00:00", + "week_end": "2026-03-12T02:00:00+00:00", + "tracks": [ + { + "show_id": 10, + "track_id": 12345, + "position": 1, + "title": "Night Drive", + "artist": "SomeArtist", + "permalink_url": "https://soundcloud.com/someartist/night-drive", + "artwork_url": "https://i1.sndcdn.com/artworks-...-large.jpg", + "duration_ms": 245000, + "license": "cc-by", + "liked_at": "2026-03-06T14:23:00+00:00", + "raw_json": "{...}" + } + ] +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `show_id` | integer | Internal database ID for this show | +| `episode_number` | integer \| null | Episode number (e.g. 530), or `null` if not assigned | +| `week_start` | string | ISO 8601 UTC timestamp -- start of the show's like window | +| `week_end` | string | ISO 8601 UTC timestamp -- end of the show's like window | +| `tracks` | array | Ordered list of tracks (see Track Object below) | + +--- + +### `GET /playlist/{position}` + +Returns a single track by its position in the current week's playlist. Positions are 1-indexed (matching IRC commands `!1`, `!2`, etc.). + +**Path Parameters** + +| Parameter | Type | Description | +|-----------|------|-------------| +| `position` | integer | 1-based position in the playlist | + +**Response** -- a single Track Object (see below). + +**Errors** + +| Status | Detail | +|--------|--------| +| 404 | `"No track at position {n}"` | + +--- + +### `GET /shows` + +Lists all shows, ordered by week start date (newest first). + +**Query Parameters** + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `limit` | integer | 20 | Max number of shows to return | +| `offset` | integer | 0 | Number of shows to skip | + +**Response** + +```json +[ + { + "id": 10, + "episode_number": 530, + "week_start": "2026-03-05T02:00:00+00:00", + "week_end": "2026-03-12T02:00:00+00:00", + "created_at": "2026-03-05T03:00:00+00:00" + } +] +``` + +--- + +### `GET /shows/{show_id}` + +Returns a specific show with its full track listing. + +**Path Parameters** + +| Parameter | Type | Description | +|-----------|------|-------------| +| `show_id` | integer | The show's internal database ID | + +**Response** + +```json +{ + "show_id": 10, + "episode_number": 530, + "week_start": "2026-03-05T02:00:00+00:00", + "week_end": "2026-03-12T02:00:00+00:00", + "tracks": [...] +} +``` + +**Errors** + +| Status | Detail | +|--------|--------| +| 404 | `"Show not found"` | + +--- + +## Admin Endpoints + +All admin endpoints require a bearer token via the `Authorization` header: + +``` +Authorization: Bearer +``` + +Returns `401` with `"Missing or invalid token"` if the header is absent or the token doesn't match. + +--- + +### `POST /admin/refresh` + +Triggers an immediate SoundCloud fetch for the current week's show. + +**Request Body** (optional) + +```json +{ + "full": false +} +``` + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `full` | boolean | `false` | Reserved for future use (full vs incremental refresh) | + +**Response** + +```json +{ + "status": "refreshed", + "track_count": 9 +} +``` + +--- + +### `POST /admin/tracks` + +Manually add a track to the current week's show. + +**Request Body** + +```json +{ + "track_id": 12345, + "position": 3 +} +``` + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `track_id` | integer | yes | SoundCloud track ID (must already exist in the `tracks` table) | +| `position` | integer | no | Insert at this position (shifts others down). Omit to append at end. | + +**Response** + +```json +{ + "status": "added" +} +``` + +--- + +### `DELETE /admin/tracks/{track_id}` + +Remove a track from the current week's show. Remaining positions are re-compacted. + +**Path Parameters** + +| Parameter | Type | Description | +|-----------|------|-------------| +| `track_id` | integer | SoundCloud track ID to remove | + +**Response** + +```json +{ + "status": "removed" +} +``` + +**Errors** + +| Status | Detail | +|--------|--------| +| 404 | `"Track not in current show"` | + +--- + +### `PUT /admin/tracks/{track_id}/position` + +Move a track to a new position within the current week's show. + +**Path Parameters** + +| Parameter | Type | Description | +|-----------|------|-------------| +| `track_id` | integer | SoundCloud track ID to move | + +**Request Body** + +```json +{ + "position": 1 +} +``` + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `position` | integer | yes | New 1-based position for the track | + +**Response** + +```json +{ + "status": "moved" +} +``` + +**Errors** + +| Status | Detail | +|--------|--------| +| 404 | `"Track not in current show"` | + +--- + +## Track Object + +Returned inside playlist and show detail responses. + +| Field | Type | Description | +|-------|------|-------------| +| `show_id` | integer | The show this track belongs to | +| `track_id` | integer | SoundCloud track ID | +| `position` | integer | 1-based position in the playlist | +| `title` | string | Track title | +| `artist` | string | Uploader's SoundCloud username | +| `permalink_url` | string | Full URL to the track on SoundCloud | +| `artwork_url` | string \| null | URL to artwork image, or `null` | +| `duration_ms` | integer | Track duration in milliseconds | +| `license` | string | License string (e.g. `"cc-by"`, `"cc-by-sa"`) | +| `liked_at` | string | ISO 8601 timestamp of when the host liked the track | +| `raw_json` | string | Full SoundCloud API response for this track (JSON string) | + +--- + +## Week Boundaries + +Shows follow a weekly cadence aligned to **Wednesday 22:00 Eastern Time** (EST or EDT depending on DST). The like window for a show runs from the previous Wednesday 22:00 ET to the current Wednesday 22:00 ET. + +All timestamps in API responses are UTC. The boundary shifts by 1 hour across DST transitions: + +| Period | Eastern | UTC boundary | +|--------|---------|--------------| +| EST (Nov -- Mar) | Wed 22:00 | Thu 03:00 | +| EDT (Mar -- Nov) | Wed 22:00 | Thu 02:00 | diff --git a/src/ntr_fetcher/api.py b/src/ntr_fetcher/api.py index be55c54..2568029 100644 --- a/src/ntr_fetcher/api.py +++ b/src/ntr_fetcher/api.py @@ -61,6 +61,7 @@ def create_app( tracks = db.get_show_tracks(show.id) return { "show_id": show.id, + "episode_number": show.episode_number, "week_start": show.week_start.isoformat(), "week_end": show.week_end.isoformat(), "tracks": tracks, @@ -80,6 +81,7 @@ def create_app( return [ { "id": s.id, + "episode_number": s.episode_number, "week_start": s.week_start.isoformat(), "week_end": s.week_end.isoformat(), "created_at": s.created_at.isoformat(), @@ -96,6 +98,7 @@ def create_app( tracks = db.get_show_tracks(show.id) return { "show_id": show.id, + "episode_number": show.episode_number, "week_start": show.week_start.isoformat(), "week_end": show.week_end.isoformat(), "tracks": tracks, diff --git a/src/ntr_fetcher/backfill.py b/src/ntr_fetcher/backfill.py new file mode 100644 index 0000000..b1f097e --- /dev/null +++ b/src/ntr_fetcher/backfill.py @@ -0,0 +1,82 @@ +import logging +from datetime import date, datetime, timedelta, timezone +from zoneinfo import ZoneInfo + +from ntr_fetcher.db import Database +from ntr_fetcher.soundcloud import SoundCloudClient +from ntr_fetcher.week import get_show_week + +EASTERN = ZoneInfo("America/New_York") + +logger = logging.getLogger(__name__) + + +def _compute_show_weeks( + anchor_aired: date, + anchor_episode: int, + show_day: int, + show_hour: int, +) -> list[tuple[int, datetime, datetime]]: + """Return (episode_number, week_start_utc, week_end_utc) for each show + from the anchor forward through the current date.""" + today = date.today() + weeks: list[tuple[int, datetime, datetime]] = [] + aired = anchor_aired + episode = anchor_episode + + while aired <= today: + noon_et = datetime(aired.year, aired.month, aired.day, 12, 0, 0, tzinfo=EASTERN) + noon_utc = noon_et.astimezone(timezone.utc).replace(tzinfo=timezone.utc) + week_start, week_end = get_show_week(noon_utc, show_day, show_hour) + weeks.append((episode, week_start, week_end)) + aired += timedelta(days=7) + episode += 1 + + return weeks + + +async def run_backfill( + db: Database, + soundcloud: SoundCloudClient, + soundcloud_user: str, + show_day: int, + show_hour: int, + anchor_episode: int, + anchor_aired: date, +) -> None: + weeks = _compute_show_weeks(anchor_aired, anchor_episode, show_day, show_hour) + if not weeks: + logger.warning("No show weeks to backfill") + return + + logger.info( + "Backfilling %d shows: #%d (%s) through #%d (%s)", + len(weeks), + weeks[0][0], anchor_aired.isoformat(), + weeks[-1][0], (anchor_aired + timedelta(days=7 * (len(weeks) - 1))).isoformat(), + ) + + overall_start = weeks[0][1] + overall_end = weeks[-1][2] + + user_id = await soundcloud.resolve_user(soundcloud_user) + all_tracks = await soundcloud.fetch_likes( + user_id=user_id, + since=overall_start, + until=overall_end, + ) + logger.info("Fetched %d total tracks from SoundCloud", len(all_tracks)) + + for track in all_tracks: + db.upsert_track(track) + + for episode, week_start, week_end in weeks: + show = db.get_or_create_show(week_start, week_end, episode_number=episode) + week_tracks = [ + t for t in all_tracks + if week_start <= t.liked_at < week_end + ] + week_tracks.sort(key=lambda t: t.liked_at) + track_ids = [t.id for t in week_tracks] + db.set_show_tracks(show.id, track_ids) + logger.info("Show #%d (%s): %d tracks", episode, week_start.isoformat(), len(week_tracks)) diff --git a/src/ntr_fetcher/db.py b/src/ntr_fetcher/db.py index 87df518..42c0fcd 100644 --- a/src/ntr_fetcher/db.py +++ b/src/ntr_fetcher/db.py @@ -20,7 +20,8 @@ CREATE TABLE IF NOT EXISTS shows ( id INTEGER PRIMARY KEY AUTOINCREMENT, week_start TEXT NOT NULL, week_end TEXT NOT NULL, - created_at TEXT NOT NULL + created_at TEXT NOT NULL, + episode_number INTEGER ); CREATE TABLE IF NOT EXISTS show_tracks ( @@ -49,6 +50,11 @@ class Database: def initialize(self) -> None: conn = self._connect() conn.executescript(SCHEMA) + try: + conn.execute("ALTER TABLE shows ADD COLUMN episode_number INTEGER") + conn.commit() + except sqlite3.OperationalError: + pass conn.close() def upsert_track(self, track: Track) -> None: @@ -102,26 +108,36 @@ class Database: ) def get_or_create_show( - self, week_start: datetime, week_end: datetime + self, + week_start: datetime, + week_end: datetime, + episode_number: int | None = None, ) -> Show: conn = self._connect() row = conn.execute( - "SELECT id, week_start, week_end, created_at FROM shows " + "SELECT id, week_start, week_end, created_at, episode_number FROM shows " "WHERE week_start = ? AND week_end = ?", (week_start.isoformat(), week_end.isoformat()), ).fetchone() if row is not None: + if episode_number is not None and row["episode_number"] != episode_number: + conn.execute( + "UPDATE shows SET episode_number = ? WHERE id = ?", + (episode_number, row["id"]), + ) + conn.commit() conn.close() return Show( id=row["id"], week_start=datetime.fromisoformat(row["week_start"]), week_end=datetime.fromisoformat(row["week_end"]), created_at=datetime.fromisoformat(row["created_at"]), + episode_number=episode_number if episode_number is not None else row["episode_number"], ) now = datetime.now(timezone.utc).isoformat() cursor = conn.execute( - "INSERT INTO shows (week_start, week_end, created_at) VALUES (?, ?, ?)", - (week_start.isoformat(), week_end.isoformat(), now), + "INSERT INTO shows (week_start, week_end, created_at, episode_number) VALUES (?, ?, ?, ?)", + (week_start.isoformat(), week_end.isoformat(), now, episode_number), ) conn.commit() show_id = cursor.lastrowid @@ -131,6 +147,7 @@ class Database: week_start=week_start, week_end=week_end, created_at=datetime.fromisoformat(now), + episode_number=episode_number, ) def get_show_tracks(self, show_id: int) -> list[dict]: @@ -203,9 +220,9 @@ class Database: conn = self._connect() rows = conn.execute( """ - SELECT id, week_start, week_end, created_at + SELECT id, week_start, week_end, created_at, episode_number FROM shows - ORDER BY created_at DESC + ORDER BY week_start DESC LIMIT ? OFFSET ? """, (limit, offset), @@ -217,10 +234,28 @@ class Database: week_start=datetime.fromisoformat(row["week_start"]), week_end=datetime.fromisoformat(row["week_end"]), created_at=datetime.fromisoformat(row["created_at"]), + episode_number=row["episode_number"], ) for row in rows ] + def get_latest_episode_number(self) -> int | None: + conn = self._connect() + row = conn.execute( + "SELECT MAX(episode_number) as max_ep FROM shows WHERE episode_number IS NOT NULL" + ).fetchone() + conn.close() + return row["max_ep"] if row else None + + def update_show_episode_number(self, show_id: int, episode_number: int) -> None: + conn = self._connect() + conn.execute( + "UPDATE shows SET episode_number = ? WHERE id = ?", + (episode_number, show_id), + ) + conn.commit() + conn.close() + def has_track_in_show(self, show_id: int, track_id: int) -> bool: conn = self._connect() row = conn.execute( diff --git a/src/ntr_fetcher/main.py b/src/ntr_fetcher/main.py index 864e4eb..3484926 100644 --- a/src/ntr_fetcher/main.py +++ b/src/ntr_fetcher/main.py @@ -1,9 +1,12 @@ +import argparse import asyncio import logging +from datetime import date import uvicorn from ntr_fetcher.api import create_app +from ntr_fetcher.backfill import run_backfill from ntr_fetcher.config import Settings from ntr_fetcher.db import Database from ntr_fetcher.poller import Poller @@ -16,13 +19,51 @@ logging.basicConfig( logger = logging.getLogger(__name__) +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="NtR SoundCloud Fetcher") + parser.add_argument( + "--init", action="store_true", + help="Run historical backfill instead of starting the server", + ) + parser.add_argument( + "--show", type=int, + help="Anchor episode number (required with --init)", + ) + parser.add_argument( + "--aired", type=date.fromisoformat, + help="Air date of anchor episode as YYYY-MM-DD (required with --init)", + ) + args = parser.parse_args() + if args.init and (args.show is None or args.aired is None): + parser.error("--init requires both --show and --aired") + return args + + def run() -> None: + args = _parse_args() settings = Settings() db = Database(settings.db_path) db.initialize() logger.info("Database initialized at %s", settings.db_path) + if args.init: + sc = SoundCloudClient() + asyncio.run( + run_backfill( + db=db, + soundcloud=sc, + soundcloud_user=settings.soundcloud_user, + show_day=settings.show_day, + show_hour=settings.show_hour, + anchor_episode=args.show, + anchor_aired=args.aired, + ) + ) + asyncio.run(sc.close()) + logger.info("Backfill complete") + return + sc = SoundCloudClient() poller = Poller( db=db, diff --git a/src/ntr_fetcher/models.py b/src/ntr_fetcher/models.py index 6de77f3..e74421f 100644 --- a/src/ntr_fetcher/models.py +++ b/src/ntr_fetcher/models.py @@ -21,6 +21,7 @@ class Show: week_start: datetime week_end: datetime created_at: datetime + episode_number: int | None = None @dataclass(frozen=True) diff --git a/src/ntr_fetcher/poller.py b/src/ntr_fetcher/poller.py index dffe43a..e021db2 100644 --- a/src/ntr_fetcher/poller.py +++ b/src/ntr_fetcher/poller.py @@ -40,6 +40,13 @@ class Poller: week_start, week_end = get_show_week(now, self._show_day, self._show_hour) show = self._db.get_or_create_show(week_start, week_end) + if show.episode_number is None: + latest = self._db.get_latest_episode_number() + if latest is not None: + new_ep = latest + 1 + self._db.update_show_episode_number(show.id, new_ep) + logger.info("Auto-assigned episode #%d to show %d", new_ep, show.id) + tracks = await self._sc.fetch_likes( user_id=user_id, since=week_start, diff --git a/tests/test_api.py b/tests/test_api.py index f857aee..dfedd21 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -62,6 +62,7 @@ def test_playlist(client, db): resp = client.get("/playlist") assert resp.status_code == 200 data = resp.json() + assert "episode_number" in data assert len(data["tracks"]) == 2 assert data["tracks"][0]["position"] == 1 assert data["tracks"][0]["title"] == "Song A" @@ -84,14 +85,18 @@ def test_shows_list(client, db): _seed_show(db) resp = client.get("/shows") assert resp.status_code == 200 - assert len(resp.json()) >= 1 + data = resp.json() + assert len(data) >= 1 + assert "episode_number" in data[0] def test_shows_detail(client, db): show = _seed_show(db) resp = client.get(f"/shows/{show.id}") assert resp.status_code == 200 - assert len(resp.json()["tracks"]) == 2 + data = resp.json() + assert "episode_number" in data + assert len(data["tracks"]) == 2 def test_admin_refresh_requires_token(client): diff --git a/tests/test_backfill.py b/tests/test_backfill.py new file mode 100644 index 0000000..6aa2d13 --- /dev/null +++ b/tests/test_backfill.py @@ -0,0 +1,198 @@ +from datetime import date, datetime, timezone +from unittest.mock import AsyncMock + +import pytest + +from ntr_fetcher.backfill import _compute_show_weeks, run_backfill +from ntr_fetcher.db import Database +from ntr_fetcher.models import Track + + +@pytest.fixture +def db(tmp_path): + database = Database(str(tmp_path / "test.db")) + database.initialize() + return database + + +def _make_track(id: int, liked_at: str) -> Track: + return Track( + id=id, + title=f"Track {id}", + artist="Artist", + permalink_url=f"https://soundcloud.com/a/t-{id}", + artwork_url=None, + duration_ms=180000, + license="cc-by", + liked_at=datetime.fromisoformat(liked_at), + raw_json="{}", + ) + + +class TestComputeShowWeeks: + def test_single_week_anchor_is_today(self): + today = date.today() + weeks = _compute_show_weeks(today, 100, show_day=2, show_hour=22) + assert len(weeks) >= 1 + assert weeks[0][0] == 100 + + def test_multiple_weeks(self): + weeks = _compute_show_weeks( + anchor_aired=date(2026, 1, 7), + anchor_episode=521, + show_day=2, + show_hour=22, + ) + assert weeks[0][0] == 521 + assert weeks[1][0] == 522 + for i, (ep, start, end) in enumerate(weeks): + assert ep == 521 + i + assert end > start + + def test_week_boundaries_are_utc(self): + weeks = _compute_show_weeks( + anchor_aired=date(2026, 1, 7), + anchor_episode=521, + show_day=2, + show_hour=22, + ) + for _, start, end in weeks: + assert start.tzinfo == timezone.utc + assert end.tzinfo == timezone.utc + + def test_consecutive_weeks_are_contiguous(self): + weeks = _compute_show_weeks( + anchor_aired=date(2026, 1, 7), + anchor_episode=521, + show_day=2, + show_hour=22, + ) + for i in range(len(weeks) - 1): + assert weeks[i][2] == weeks[i + 1][1], ( + f"Week {i} end != week {i+1} start" + ) + + def test_anchor_in_future_returns_empty(self): + future = date(2099, 1, 1) + weeks = _compute_show_weeks(future, 999, show_day=2, show_hour=22) + assert weeks == [] + + +@pytest.mark.asyncio +async def test_run_backfill_populates_db(db): + mock_sc = AsyncMock() + mock_sc.resolve_user.return_value = 12345 + + t1 = _make_track(1, "2026-01-02T05:00:00+00:00") + t2 = _make_track(2, "2026-01-04T15:00:00+00:00") + t3 = _make_track(3, "2026-01-09T10:00:00+00:00") + mock_sc.fetch_likes.return_value = [t1, t2, t3] + + await run_backfill( + db=db, + soundcloud=mock_sc, + soundcloud_user="nicktherat", + show_day=2, + show_hour=22, + anchor_episode=521, + anchor_aired=date(2026, 1, 7), + ) + + shows = db.list_shows(limit=100, offset=0) + assert len(shows) >= 1 + + ep_521 = next((s for s in shows if s.episode_number == 521), None) + assert ep_521 is not None + + tracks = db.get_show_tracks(ep_521.id) + track_ids = [t["track_id"] for t in tracks] + assert 1 in track_ids or 2 in track_ids or 3 in track_ids + + +@pytest.mark.asyncio +async def test_run_backfill_partitions_tracks_by_week(db): + mock_sc = AsyncMock() + mock_sc.resolve_user.return_value = 12345 + + t_week1 = _make_track(10, "2026-01-02T12:00:00+00:00") + t_week2 = _make_track(20, "2026-01-10T12:00:00+00:00") + mock_sc.fetch_likes.return_value = [t_week1, t_week2] + + await run_backfill( + db=db, + soundcloud=mock_sc, + soundcloud_user="nicktherat", + show_day=2, + show_hour=22, + anchor_episode=521, + anchor_aired=date(2026, 1, 7), + ) + + shows = db.list_shows(limit=100, offset=0) + ep_521 = next((s for s in shows if s.episode_number == 521), None) + ep_522 = next((s for s in shows if s.episode_number == 522), None) + + if ep_521: + tracks_521 = db.get_show_tracks(ep_521.id) + ids_521 = {t["track_id"] for t in tracks_521} + else: + ids_521 = set() + + if ep_522: + tracks_522 = db.get_show_tracks(ep_522.id) + ids_522 = {t["track_id"] for t in tracks_522} + else: + ids_522 = set() + + assert ids_521 & ids_522 == set(), "Tracks should not appear in multiple weeks" + + +@pytest.mark.asyncio +async def test_run_backfill_no_tracks(db): + mock_sc = AsyncMock() + mock_sc.resolve_user.return_value = 12345 + mock_sc.fetch_likes.return_value = [] + + await run_backfill( + db=db, + soundcloud=mock_sc, + soundcloud_user="nicktherat", + show_day=2, + show_hour=22, + anchor_episode=521, + anchor_aired=date(2026, 1, 7), + ) + + shows = db.list_shows(limit=100, offset=0) + assert len(shows) >= 1 + for show in shows: + tracks = db.get_show_tracks(show.id) + assert len(tracks) == 0 + + +@pytest.mark.asyncio +async def test_run_backfill_idempotent(db): + """Running backfill twice with the same data shouldn't duplicate shows.""" + mock_sc = AsyncMock() + mock_sc.resolve_user.return_value = 12345 + mock_sc.fetch_likes.return_value = [ + _make_track(1, "2026-01-05T12:00:00+00:00"), + ] + + kwargs = dict( + db=db, + soundcloud=mock_sc, + soundcloud_user="nicktherat", + show_day=2, + show_hour=22, + anchor_episode=521, + anchor_aired=date(2026, 1, 7), + ) + + await run_backfill(**kwargs) + count_first = len(db.list_shows(limit=1000, offset=0)) + + await run_backfill(**kwargs) + count_second = len(db.list_shows(limit=1000, offset=0)) + + assert count_first == count_second diff --git a/tests/test_db.py b/tests/test_db.py index 36137fc..c94b454 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -226,6 +226,52 @@ def test_add_track_to_show_at_position(db): assert tracks[2]["track_id"] == 2 +def test_get_or_create_show_with_episode_number(db): + week_start = datetime(2026, 1, 8, 3, 0, 0, tzinfo=timezone.utc) + week_end = datetime(2026, 1, 15, 3, 0, 0, tzinfo=timezone.utc) + show = db.get_or_create_show(week_start, week_end, episode_number=521) + assert show.episode_number == 521 + show2 = db.get_or_create_show(week_start, week_end) + assert show2.id == show.id + assert show2.episode_number == 521 + + +def test_get_or_create_show_updates_episode_number(db): + week_start = datetime(2026, 1, 8, 3, 0, 0, tzinfo=timezone.utc) + week_end = datetime(2026, 1, 15, 3, 0, 0, tzinfo=timezone.utc) + show = db.get_or_create_show(week_start, week_end) + assert show.episode_number is None + show2 = db.get_or_create_show(week_start, week_end, episode_number=521) + assert show2.id == show.id + assert show2.episode_number == 521 + + +def test_get_latest_episode_number(db): + assert db.get_latest_episode_number() is None + db.get_or_create_show( + datetime(2026, 1, 8, 3, 0, 0, tzinfo=timezone.utc), + datetime(2026, 1, 15, 3, 0, 0, tzinfo=timezone.utc), + episode_number=521, + ) + assert db.get_latest_episode_number() == 521 + db.get_or_create_show( + datetime(2026, 1, 15, 3, 0, 0, tzinfo=timezone.utc), + datetime(2026, 1, 22, 3, 0, 0, tzinfo=timezone.utc), + episode_number=522, + ) + assert db.get_latest_episode_number() == 522 + + +def test_update_show_episode_number(db): + week_start = datetime(2026, 1, 8, 3, 0, 0, tzinfo=timezone.utc) + week_end = datetime(2026, 1, 15, 3, 0, 0, tzinfo=timezone.utc) + show = db.get_or_create_show(week_start, week_end) + assert show.episode_number is None + db.update_show_episode_number(show.id, 521) + show2 = db.get_or_create_show(week_start, week_end) + assert show2.episode_number == 521 + + def test_has_track_in_show(db): week_start = datetime(2026, 3, 13, 2, 0, 0, tzinfo=timezone.utc) week_end = datetime(2026, 3, 20, 2, 0, 0, tzinfo=timezone.utc) diff --git a/tests/test_poller.py b/tests/test_poller.py index ca482ce..0e659e2 100644 --- a/tests/test_poller.py +++ b/tests/test_poller.py @@ -93,6 +93,89 @@ async def test_poll_once_removes_unliked_tracks(): assert call_args[0][1] == [1] +@pytest.mark.asyncio +async def test_poll_once_auto_assigns_episode_number(): + mock_sc = AsyncMock() + mock_sc.resolve_user.return_value = 206979918 + mock_sc.fetch_likes.return_value = [ + _make_track(1, "2026-03-14T01:00:00+00:00"), + ] + + mock_db = MagicMock() + mock_show = MagicMock() + mock_show.id = 5 + mock_show.episode_number = None + mock_db.get_or_create_show.return_value = mock_show + mock_db.get_latest_episode_number.return_value = 530 + + poller = Poller( + db=mock_db, + soundcloud=mock_sc, + soundcloud_user="nicktherat", + show_day=2, + show_hour=22, + poll_interval=3600, + ) + + await poller.poll_once() + + mock_db.update_show_episode_number.assert_called_once_with(5, 531) + + +@pytest.mark.asyncio +async def test_poll_once_skips_numbering_when_no_history(): + mock_sc = AsyncMock() + mock_sc.resolve_user.return_value = 206979918 + mock_sc.fetch_likes.return_value = [] + + mock_db = MagicMock() + mock_show = MagicMock() + mock_show.id = 1 + mock_show.episode_number = None + mock_db.get_or_create_show.return_value = mock_show + mock_db.get_latest_episode_number.return_value = None + + poller = Poller( + db=mock_db, + soundcloud=mock_sc, + soundcloud_user="nicktherat", + show_day=2, + show_hour=22, + poll_interval=3600, + ) + + await poller.poll_once() + + mock_db.update_show_episode_number.assert_not_called() + + +@pytest.mark.asyncio +async def test_poll_once_skips_numbering_when_already_assigned(): + mock_sc = AsyncMock() + mock_sc.resolve_user.return_value = 206979918 + mock_sc.fetch_likes.return_value = [] + + mock_db = MagicMock() + mock_show = MagicMock() + mock_show.id = 1 + mock_show.episode_number = 530 + mock_db.get_or_create_show.return_value = mock_show + + poller = Poller( + db=mock_db, + soundcloud=mock_sc, + soundcloud_user="nicktherat", + show_day=2, + show_hour=22, + poll_interval=3600, + ) + + await poller.poll_once() + + mock_db.get_latest_episode_number.assert_not_called() + mock_db.update_show_episode_number.assert_not_called() + + @pytest.mark.asyncio async def test_poll_once_full_refresh(): mock_sc = AsyncMock()