feat: add historical backfill with --init CLI and episode numbering
Adds a --init mode that seeds the database with past shows from a given anchor episode/date forward, batch-fetching likes from SoundCloud and partitioning them into weekly buckets. Episode numbers are tracked in the shows table and auto-incremented by the poller for new shows. Includes full API documentation (docs/api.md) and updated README. Made-with: Cursor
This commit is contained in:
@@ -62,6 +62,7 @@ def test_playlist(client, db):
|
||||
resp = client.get("/playlist")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert "episode_number" in data
|
||||
assert len(data["tracks"]) == 2
|
||||
assert data["tracks"][0]["position"] == 1
|
||||
assert data["tracks"][0]["title"] == "Song A"
|
||||
@@ -84,14 +85,18 @@ def test_shows_list(client, db):
|
||||
_seed_show(db)
|
||||
resp = client.get("/shows")
|
||||
assert resp.status_code == 200
|
||||
assert len(resp.json()) >= 1
|
||||
data = resp.json()
|
||||
assert len(data) >= 1
|
||||
assert "episode_number" in data[0]
|
||||
|
||||
|
||||
def test_shows_detail(client, db):
|
||||
show = _seed_show(db)
|
||||
resp = client.get(f"/shows/{show.id}")
|
||||
assert resp.status_code == 200
|
||||
assert len(resp.json()["tracks"]) == 2
|
||||
data = resp.json()
|
||||
assert "episode_number" in data
|
||||
assert len(data["tracks"]) == 2
|
||||
|
||||
|
||||
def test_admin_refresh_requires_token(client):
|
||||
|
||||
198
tests/test_backfill.py
Normal file
198
tests/test_backfill.py
Normal file
@@ -0,0 +1,198 @@
|
||||
from datetime import date, datetime, timezone
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from ntr_fetcher.backfill import _compute_show_weeks, run_backfill
|
||||
from ntr_fetcher.db import Database
|
||||
from ntr_fetcher.models import Track
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db(tmp_path):
|
||||
database = Database(str(tmp_path / "test.db"))
|
||||
database.initialize()
|
||||
return database
|
||||
|
||||
|
||||
def _make_track(id: int, liked_at: str) -> Track:
|
||||
return Track(
|
||||
id=id,
|
||||
title=f"Track {id}",
|
||||
artist="Artist",
|
||||
permalink_url=f"https://soundcloud.com/a/t-{id}",
|
||||
artwork_url=None,
|
||||
duration_ms=180000,
|
||||
license="cc-by",
|
||||
liked_at=datetime.fromisoformat(liked_at),
|
||||
raw_json="{}",
|
||||
)
|
||||
|
||||
|
||||
class TestComputeShowWeeks:
|
||||
def test_single_week_anchor_is_today(self):
|
||||
today = date.today()
|
||||
weeks = _compute_show_weeks(today, 100, show_day=2, show_hour=22)
|
||||
assert len(weeks) >= 1
|
||||
assert weeks[0][0] == 100
|
||||
|
||||
def test_multiple_weeks(self):
|
||||
weeks = _compute_show_weeks(
|
||||
anchor_aired=date(2026, 1, 7),
|
||||
anchor_episode=521,
|
||||
show_day=2,
|
||||
show_hour=22,
|
||||
)
|
||||
assert weeks[0][0] == 521
|
||||
assert weeks[1][0] == 522
|
||||
for i, (ep, start, end) in enumerate(weeks):
|
||||
assert ep == 521 + i
|
||||
assert end > start
|
||||
|
||||
def test_week_boundaries_are_utc(self):
|
||||
weeks = _compute_show_weeks(
|
||||
anchor_aired=date(2026, 1, 7),
|
||||
anchor_episode=521,
|
||||
show_day=2,
|
||||
show_hour=22,
|
||||
)
|
||||
for _, start, end in weeks:
|
||||
assert start.tzinfo == timezone.utc
|
||||
assert end.tzinfo == timezone.utc
|
||||
|
||||
def test_consecutive_weeks_are_contiguous(self):
|
||||
weeks = _compute_show_weeks(
|
||||
anchor_aired=date(2026, 1, 7),
|
||||
anchor_episode=521,
|
||||
show_day=2,
|
||||
show_hour=22,
|
||||
)
|
||||
for i in range(len(weeks) - 1):
|
||||
assert weeks[i][2] == weeks[i + 1][1], (
|
||||
f"Week {i} end != week {i+1} start"
|
||||
)
|
||||
|
||||
def test_anchor_in_future_returns_empty(self):
|
||||
future = date(2099, 1, 1)
|
||||
weeks = _compute_show_weeks(future, 999, show_day=2, show_hour=22)
|
||||
assert weeks == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_run_backfill_populates_db(db):
|
||||
mock_sc = AsyncMock()
|
||||
mock_sc.resolve_user.return_value = 12345
|
||||
|
||||
t1 = _make_track(1, "2026-01-02T05:00:00+00:00")
|
||||
t2 = _make_track(2, "2026-01-04T15:00:00+00:00")
|
||||
t3 = _make_track(3, "2026-01-09T10:00:00+00:00")
|
||||
mock_sc.fetch_likes.return_value = [t1, t2, t3]
|
||||
|
||||
await run_backfill(
|
||||
db=db,
|
||||
soundcloud=mock_sc,
|
||||
soundcloud_user="nicktherat",
|
||||
show_day=2,
|
||||
show_hour=22,
|
||||
anchor_episode=521,
|
||||
anchor_aired=date(2026, 1, 7),
|
||||
)
|
||||
|
||||
shows = db.list_shows(limit=100, offset=0)
|
||||
assert len(shows) >= 1
|
||||
|
||||
ep_521 = next((s for s in shows if s.episode_number == 521), None)
|
||||
assert ep_521 is not None
|
||||
|
||||
tracks = db.get_show_tracks(ep_521.id)
|
||||
track_ids = [t["track_id"] for t in tracks]
|
||||
assert 1 in track_ids or 2 in track_ids or 3 in track_ids
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_run_backfill_partitions_tracks_by_week(db):
|
||||
mock_sc = AsyncMock()
|
||||
mock_sc.resolve_user.return_value = 12345
|
||||
|
||||
t_week1 = _make_track(10, "2026-01-02T12:00:00+00:00")
|
||||
t_week2 = _make_track(20, "2026-01-10T12:00:00+00:00")
|
||||
mock_sc.fetch_likes.return_value = [t_week1, t_week2]
|
||||
|
||||
await run_backfill(
|
||||
db=db,
|
||||
soundcloud=mock_sc,
|
||||
soundcloud_user="nicktherat",
|
||||
show_day=2,
|
||||
show_hour=22,
|
||||
anchor_episode=521,
|
||||
anchor_aired=date(2026, 1, 7),
|
||||
)
|
||||
|
||||
shows = db.list_shows(limit=100, offset=0)
|
||||
ep_521 = next((s for s in shows if s.episode_number == 521), None)
|
||||
ep_522 = next((s for s in shows if s.episode_number == 522), None)
|
||||
|
||||
if ep_521:
|
||||
tracks_521 = db.get_show_tracks(ep_521.id)
|
||||
ids_521 = {t["track_id"] for t in tracks_521}
|
||||
else:
|
||||
ids_521 = set()
|
||||
|
||||
if ep_522:
|
||||
tracks_522 = db.get_show_tracks(ep_522.id)
|
||||
ids_522 = {t["track_id"] for t in tracks_522}
|
||||
else:
|
||||
ids_522 = set()
|
||||
|
||||
assert ids_521 & ids_522 == set(), "Tracks should not appear in multiple weeks"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_run_backfill_no_tracks(db):
|
||||
mock_sc = AsyncMock()
|
||||
mock_sc.resolve_user.return_value = 12345
|
||||
mock_sc.fetch_likes.return_value = []
|
||||
|
||||
await run_backfill(
|
||||
db=db,
|
||||
soundcloud=mock_sc,
|
||||
soundcloud_user="nicktherat",
|
||||
show_day=2,
|
||||
show_hour=22,
|
||||
anchor_episode=521,
|
||||
anchor_aired=date(2026, 1, 7),
|
||||
)
|
||||
|
||||
shows = db.list_shows(limit=100, offset=0)
|
||||
assert len(shows) >= 1
|
||||
for show in shows:
|
||||
tracks = db.get_show_tracks(show.id)
|
||||
assert len(tracks) == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_run_backfill_idempotent(db):
|
||||
"""Running backfill twice with the same data shouldn't duplicate shows."""
|
||||
mock_sc = AsyncMock()
|
||||
mock_sc.resolve_user.return_value = 12345
|
||||
mock_sc.fetch_likes.return_value = [
|
||||
_make_track(1, "2026-01-05T12:00:00+00:00"),
|
||||
]
|
||||
|
||||
kwargs = dict(
|
||||
db=db,
|
||||
soundcloud=mock_sc,
|
||||
soundcloud_user="nicktherat",
|
||||
show_day=2,
|
||||
show_hour=22,
|
||||
anchor_episode=521,
|
||||
anchor_aired=date(2026, 1, 7),
|
||||
)
|
||||
|
||||
await run_backfill(**kwargs)
|
||||
count_first = len(db.list_shows(limit=1000, offset=0))
|
||||
|
||||
await run_backfill(**kwargs)
|
||||
count_second = len(db.list_shows(limit=1000, offset=0))
|
||||
|
||||
assert count_first == count_second
|
||||
@@ -226,6 +226,52 @@ def test_add_track_to_show_at_position(db):
|
||||
assert tracks[2]["track_id"] == 2
|
||||
|
||||
|
||||
def test_get_or_create_show_with_episode_number(db):
|
||||
week_start = datetime(2026, 1, 8, 3, 0, 0, tzinfo=timezone.utc)
|
||||
week_end = datetime(2026, 1, 15, 3, 0, 0, tzinfo=timezone.utc)
|
||||
show = db.get_or_create_show(week_start, week_end, episode_number=521)
|
||||
assert show.episode_number == 521
|
||||
show2 = db.get_or_create_show(week_start, week_end)
|
||||
assert show2.id == show.id
|
||||
assert show2.episode_number == 521
|
||||
|
||||
|
||||
def test_get_or_create_show_updates_episode_number(db):
|
||||
week_start = datetime(2026, 1, 8, 3, 0, 0, tzinfo=timezone.utc)
|
||||
week_end = datetime(2026, 1, 15, 3, 0, 0, tzinfo=timezone.utc)
|
||||
show = db.get_or_create_show(week_start, week_end)
|
||||
assert show.episode_number is None
|
||||
show2 = db.get_or_create_show(week_start, week_end, episode_number=521)
|
||||
assert show2.id == show.id
|
||||
assert show2.episode_number == 521
|
||||
|
||||
|
||||
def test_get_latest_episode_number(db):
|
||||
assert db.get_latest_episode_number() is None
|
||||
db.get_or_create_show(
|
||||
datetime(2026, 1, 8, 3, 0, 0, tzinfo=timezone.utc),
|
||||
datetime(2026, 1, 15, 3, 0, 0, tzinfo=timezone.utc),
|
||||
episode_number=521,
|
||||
)
|
||||
assert db.get_latest_episode_number() == 521
|
||||
db.get_or_create_show(
|
||||
datetime(2026, 1, 15, 3, 0, 0, tzinfo=timezone.utc),
|
||||
datetime(2026, 1, 22, 3, 0, 0, tzinfo=timezone.utc),
|
||||
episode_number=522,
|
||||
)
|
||||
assert db.get_latest_episode_number() == 522
|
||||
|
||||
|
||||
def test_update_show_episode_number(db):
|
||||
week_start = datetime(2026, 1, 8, 3, 0, 0, tzinfo=timezone.utc)
|
||||
week_end = datetime(2026, 1, 15, 3, 0, 0, tzinfo=timezone.utc)
|
||||
show = db.get_or_create_show(week_start, week_end)
|
||||
assert show.episode_number is None
|
||||
db.update_show_episode_number(show.id, 521)
|
||||
show2 = db.get_or_create_show(week_start, week_end)
|
||||
assert show2.episode_number == 521
|
||||
|
||||
|
||||
def test_has_track_in_show(db):
|
||||
week_start = datetime(2026, 3, 13, 2, 0, 0, tzinfo=timezone.utc)
|
||||
week_end = datetime(2026, 3, 20, 2, 0, 0, tzinfo=timezone.utc)
|
||||
|
||||
@@ -93,6 +93,89 @@ async def test_poll_once_removes_unliked_tracks():
|
||||
assert call_args[0][1] == [1]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_poll_once_auto_assigns_episode_number():
|
||||
mock_sc = AsyncMock()
|
||||
mock_sc.resolve_user.return_value = 206979918
|
||||
mock_sc.fetch_likes.return_value = [
|
||||
_make_track(1, "2026-03-14T01:00:00+00:00"),
|
||||
]
|
||||
|
||||
mock_db = MagicMock()
|
||||
mock_show = MagicMock()
|
||||
mock_show.id = 5
|
||||
mock_show.episode_number = None
|
||||
mock_db.get_or_create_show.return_value = mock_show
|
||||
mock_db.get_latest_episode_number.return_value = 530
|
||||
|
||||
poller = Poller(
|
||||
db=mock_db,
|
||||
soundcloud=mock_sc,
|
||||
soundcloud_user="nicktherat",
|
||||
show_day=2,
|
||||
show_hour=22,
|
||||
poll_interval=3600,
|
||||
)
|
||||
|
||||
await poller.poll_once()
|
||||
|
||||
mock_db.update_show_episode_number.assert_called_once_with(5, 531)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_poll_once_skips_numbering_when_no_history():
|
||||
mock_sc = AsyncMock()
|
||||
mock_sc.resolve_user.return_value = 206979918
|
||||
mock_sc.fetch_likes.return_value = []
|
||||
|
||||
mock_db = MagicMock()
|
||||
mock_show = MagicMock()
|
||||
mock_show.id = 1
|
||||
mock_show.episode_number = None
|
||||
mock_db.get_or_create_show.return_value = mock_show
|
||||
mock_db.get_latest_episode_number.return_value = None
|
||||
|
||||
poller = Poller(
|
||||
db=mock_db,
|
||||
soundcloud=mock_sc,
|
||||
soundcloud_user="nicktherat",
|
||||
show_day=2,
|
||||
show_hour=22,
|
||||
poll_interval=3600,
|
||||
)
|
||||
|
||||
await poller.poll_once()
|
||||
|
||||
mock_db.update_show_episode_number.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_poll_once_skips_numbering_when_already_assigned():
|
||||
mock_sc = AsyncMock()
|
||||
mock_sc.resolve_user.return_value = 206979918
|
||||
mock_sc.fetch_likes.return_value = []
|
||||
|
||||
mock_db = MagicMock()
|
||||
mock_show = MagicMock()
|
||||
mock_show.id = 1
|
||||
mock_show.episode_number = 530
|
||||
mock_db.get_or_create_show.return_value = mock_show
|
||||
|
||||
poller = Poller(
|
||||
db=mock_db,
|
||||
soundcloud=mock_sc,
|
||||
soundcloud_user="nicktherat",
|
||||
show_day=2,
|
||||
show_hour=22,
|
||||
poll_interval=3600,
|
||||
)
|
||||
|
||||
await poller.poll_once()
|
||||
|
||||
mock_db.get_latest_episode_number.assert_not_called()
|
||||
mock_db.update_show_episode_number.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_poll_once_full_refresh():
|
||||
mock_sc = AsyncMock()
|
||||
|
||||
Reference in New Issue
Block a user