Adds a --init mode that seeds the database with past shows from a given anchor episode/date forward, batch-fetching likes from SoundCloud and partitioning them into weekly buckets. Episode numbers are tracked in the shows table and auto-incremented by the poller for new shows. Includes full API documentation (docs/api.md) and updated README. Made-with: Cursor
199 lines
5.5 KiB
Python
199 lines
5.5 KiB
Python
from datetime import date, datetime, timezone
|
|
from unittest.mock import AsyncMock
|
|
|
|
import pytest
|
|
|
|
from ntr_fetcher.backfill import _compute_show_weeks, run_backfill
|
|
from ntr_fetcher.db import Database
|
|
from ntr_fetcher.models import Track
|
|
|
|
|
|
@pytest.fixture
|
|
def db(tmp_path):
|
|
database = Database(str(tmp_path / "test.db"))
|
|
database.initialize()
|
|
return database
|
|
|
|
|
|
def _make_track(id: int, liked_at: str) -> Track:
|
|
return Track(
|
|
id=id,
|
|
title=f"Track {id}",
|
|
artist="Artist",
|
|
permalink_url=f"https://soundcloud.com/a/t-{id}",
|
|
artwork_url=None,
|
|
duration_ms=180000,
|
|
license="cc-by",
|
|
liked_at=datetime.fromisoformat(liked_at),
|
|
raw_json="{}",
|
|
)
|
|
|
|
|
|
class TestComputeShowWeeks:
|
|
def test_single_week_anchor_is_today(self):
|
|
today = date.today()
|
|
weeks = _compute_show_weeks(today, 100, show_day=2, show_hour=22)
|
|
assert len(weeks) >= 1
|
|
assert weeks[0][0] == 100
|
|
|
|
def test_multiple_weeks(self):
|
|
weeks = _compute_show_weeks(
|
|
anchor_aired=date(2026, 1, 7),
|
|
anchor_episode=521,
|
|
show_day=2,
|
|
show_hour=22,
|
|
)
|
|
assert weeks[0][0] == 521
|
|
assert weeks[1][0] == 522
|
|
for i, (ep, start, end) in enumerate(weeks):
|
|
assert ep == 521 + i
|
|
assert end > start
|
|
|
|
def test_week_boundaries_are_utc(self):
|
|
weeks = _compute_show_weeks(
|
|
anchor_aired=date(2026, 1, 7),
|
|
anchor_episode=521,
|
|
show_day=2,
|
|
show_hour=22,
|
|
)
|
|
for _, start, end in weeks:
|
|
assert start.tzinfo == timezone.utc
|
|
assert end.tzinfo == timezone.utc
|
|
|
|
def test_consecutive_weeks_are_contiguous(self):
|
|
weeks = _compute_show_weeks(
|
|
anchor_aired=date(2026, 1, 7),
|
|
anchor_episode=521,
|
|
show_day=2,
|
|
show_hour=22,
|
|
)
|
|
for i in range(len(weeks) - 1):
|
|
assert weeks[i][2] == weeks[i + 1][1], (
|
|
f"Week {i} end != week {i+1} start"
|
|
)
|
|
|
|
def test_anchor_in_future_returns_empty(self):
|
|
future = date(2099, 1, 1)
|
|
weeks = _compute_show_weeks(future, 999, show_day=2, show_hour=22)
|
|
assert weeks == []
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_backfill_populates_db(db):
|
|
mock_sc = AsyncMock()
|
|
mock_sc.resolve_user.return_value = 12345
|
|
|
|
t1 = _make_track(1, "2026-01-02T05:00:00+00:00")
|
|
t2 = _make_track(2, "2026-01-04T15:00:00+00:00")
|
|
t3 = _make_track(3, "2026-01-09T10:00:00+00:00")
|
|
mock_sc.fetch_likes.return_value = [t1, t2, t3]
|
|
|
|
await run_backfill(
|
|
db=db,
|
|
soundcloud=mock_sc,
|
|
soundcloud_user="nicktherat",
|
|
show_day=2,
|
|
show_hour=22,
|
|
anchor_episode=521,
|
|
anchor_aired=date(2026, 1, 7),
|
|
)
|
|
|
|
shows = db.list_shows(limit=100, offset=0)
|
|
assert len(shows) >= 1
|
|
|
|
ep_521 = next((s for s in shows if s.episode_number == 521), None)
|
|
assert ep_521 is not None
|
|
|
|
tracks = db.get_show_tracks(ep_521.id)
|
|
track_ids = [t["track_id"] for t in tracks]
|
|
assert 1 in track_ids or 2 in track_ids or 3 in track_ids
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_backfill_partitions_tracks_by_week(db):
|
|
mock_sc = AsyncMock()
|
|
mock_sc.resolve_user.return_value = 12345
|
|
|
|
t_week1 = _make_track(10, "2026-01-02T12:00:00+00:00")
|
|
t_week2 = _make_track(20, "2026-01-10T12:00:00+00:00")
|
|
mock_sc.fetch_likes.return_value = [t_week1, t_week2]
|
|
|
|
await run_backfill(
|
|
db=db,
|
|
soundcloud=mock_sc,
|
|
soundcloud_user="nicktherat",
|
|
show_day=2,
|
|
show_hour=22,
|
|
anchor_episode=521,
|
|
anchor_aired=date(2026, 1, 7),
|
|
)
|
|
|
|
shows = db.list_shows(limit=100, offset=0)
|
|
ep_521 = next((s for s in shows if s.episode_number == 521), None)
|
|
ep_522 = next((s for s in shows if s.episode_number == 522), None)
|
|
|
|
if ep_521:
|
|
tracks_521 = db.get_show_tracks(ep_521.id)
|
|
ids_521 = {t["track_id"] for t in tracks_521}
|
|
else:
|
|
ids_521 = set()
|
|
|
|
if ep_522:
|
|
tracks_522 = db.get_show_tracks(ep_522.id)
|
|
ids_522 = {t["track_id"] for t in tracks_522}
|
|
else:
|
|
ids_522 = set()
|
|
|
|
assert ids_521 & ids_522 == set(), "Tracks should not appear in multiple weeks"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_backfill_no_tracks(db):
|
|
mock_sc = AsyncMock()
|
|
mock_sc.resolve_user.return_value = 12345
|
|
mock_sc.fetch_likes.return_value = []
|
|
|
|
await run_backfill(
|
|
db=db,
|
|
soundcloud=mock_sc,
|
|
soundcloud_user="nicktherat",
|
|
show_day=2,
|
|
show_hour=22,
|
|
anchor_episode=521,
|
|
anchor_aired=date(2026, 1, 7),
|
|
)
|
|
|
|
shows = db.list_shows(limit=100, offset=0)
|
|
assert len(shows) >= 1
|
|
for show in shows:
|
|
tracks = db.get_show_tracks(show.id)
|
|
assert len(tracks) == 0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_backfill_idempotent(db):
|
|
"""Running backfill twice with the same data shouldn't duplicate shows."""
|
|
mock_sc = AsyncMock()
|
|
mock_sc.resolve_user.return_value = 12345
|
|
mock_sc.fetch_likes.return_value = [
|
|
_make_track(1, "2026-01-05T12:00:00+00:00"),
|
|
]
|
|
|
|
kwargs = dict(
|
|
db=db,
|
|
soundcloud=mock_sc,
|
|
soundcloud_user="nicktherat",
|
|
show_day=2,
|
|
show_hour=22,
|
|
anchor_episode=521,
|
|
anchor_aired=date(2026, 1, 7),
|
|
)
|
|
|
|
await run_backfill(**kwargs)
|
|
count_first = len(db.list_shows(limit=1000, offset=0))
|
|
|
|
await run_backfill(**kwargs)
|
|
count_second = len(db.list_shows(limit=1000, offset=0))
|
|
|
|
assert count_first == count_second
|