From a328684af08c2822de546239aff5851998e8e432 Mon Sep 17 00:00:00 2001 From: cottongin Date: Wed, 25 Mar 2026 08:20:20 -0400 Subject: [PATCH] fix: handle SoundCloud API 5xx errors with client_id refresh, backoff, and cursor fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SoundCloud began rejecting the fabricated pagination cursor with 500 errors. Fixed cursor user_id padding (zfill 22→20) to match the documented format, added 5xx retry with exponential backoff in _api_get, and added a fallback in fetch_likes that drops the fabricated cursor when it causes persistent 500s. Made-with: Cursor --- src/ntr_fetcher/soundcloud.py | 32 +++++++++++---- tests/test_soundcloud.py | 75 +++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 7 deletions(-) diff --git a/src/ntr_fetcher/soundcloud.py b/src/ntr_fetcher/soundcloud.py index 038eb1f..39ded9e 100644 --- a/src/ntr_fetcher/soundcloud.py +++ b/src/ntr_fetcher/soundcloud.py @@ -1,3 +1,4 @@ +import asyncio import json import logging import re @@ -17,7 +18,7 @@ HYDRATION_PATTERN = re.compile(r"__sc_hydration\s*=\s*(\[.*?\])\s*;", re.DOTALL) def _build_cursor(until: datetime, user_id: int) -> str: ts = until.strftime("%Y-%m-%dT%H:%M:%S.000Z") - padded_user = str(user_id).zfill(22) + padded_user = str(user_id).zfill(20) return f"{ts},user-track-likes,000-{padded_user}-99999999999999999999" @@ -55,19 +56,27 @@ class SoundCloudClient: params = dict(params or {}) params["client_id"] = client_id - for attempt in range(3): + max_attempts = 3 + for attempt in range(max_attempts): resp = await self._http.get(url, params=params) - if resp.status_code == 401: - logger.warning("Got 401 from SoundCloud API, refreshing client_id (attempt %d)", attempt + 1) + + if resp.status_code == 401 or resp.status_code >= 500: + logger.warning( + "Got %d from SoundCloud API, refreshing client_id (attempt %d/%d)", + resp.status_code, attempt + 1, max_attempts, + ) self.invalidate_client_id() + if resp.status_code >= 500: + await asyncio.sleep(2 ** attempt) client_id = await self._extract_client_id() params["client_id"] = client_id continue + resp.raise_for_status() return resp raise httpx.HTTPStatusError( - "Failed after 3 attempts (401)", + f"Failed after {max_attempts} attempts (last status: {resp.status_code})", request=resp.request, response=resp, ) @@ -86,15 +95,24 @@ class SoundCloudClient: until: datetime, limit: int = 50, ) -> list[Track]: - cursor = _build_cursor(until, user_id) + cursor: str | None = _build_cursor(until, user_id) collected: list[Track] = [] + used_fabricated_cursor = True while True: params: dict = {"limit": limit} if cursor: params["offset"] = cursor - resp = await self._api_get(f"{API_BASE}/users/{user_id}/likes", params=params) + try: + resp = await self._api_get(f"{API_BASE}/users/{user_id}/likes", params=params) + except httpx.HTTPStatusError as exc: + if used_fabricated_cursor and cursor and exc.response.status_code >= 500: + logger.warning("Fabricated cursor rejected (HTTP %d), retrying without cursor", exc.response.status_code) + cursor = None + used_fabricated_cursor = False + continue + raise data = resp.json() collection = data.get("collection", []) diff --git a/tests/test_soundcloud.py b/tests/test_soundcloud.py index 3514d77..d7f8093 100644 --- a/tests/test_soundcloud.py +++ b/tests/test_soundcloud.py @@ -1,6 +1,8 @@ import re from datetime import datetime, timezone +from unittest.mock import patch +import httpx import pytest from ntr_fetcher.soundcloud import SoundCloudClient @@ -151,3 +153,76 @@ async def test_fetch_likes_retries_on_401(httpx_mock): until=datetime(2026, 3, 10, 0, 0, 0, tzinfo=timezone.utc), ) assert len(tracks) == 1 + + +@pytest.mark.asyncio +@patch("ntr_fetcher.soundcloud.asyncio.sleep", return_value=None) +async def test_fetch_likes_retries_on_500(mock_sleep, httpx_mock): + httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML) + httpx_mock.add_response( + url=re.compile(r"https://api-v2\.soundcloud\.com/users/206979918/likes.*"), + status_code=500, + ) + httpx_mock.add_response( + url="https://soundcloud.com", + text=FAKE_HTML.replace("test_client_id_abc123", "fresh_client_id_789"), + ) + httpx_mock.add_response( + url=re.compile(r"https://api-v2\.soundcloud\.com/users/206979918/likes.*"), + json=FAKE_LIKES_RESPONSE, + ) + client = SoundCloudClient() + tracks = await client.fetch_likes( + user_id=206979918, + since=datetime(2026, 3, 1, 0, 0, 0, tzinfo=timezone.utc), + until=datetime(2026, 3, 10, 0, 0, 0, tzinfo=timezone.utc), + ) + assert len(tracks) == 1 + mock_sleep.assert_called_once_with(1) + + +@pytest.mark.asyncio +@patch("ntr_fetcher.soundcloud.asyncio.sleep", return_value=None) +async def test_fetch_likes_falls_back_to_no_cursor_on_persistent_500(mock_sleep, httpx_mock): + """When the fabricated cursor causes persistent 500s, fall back to no cursor.""" + httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML) + for _ in range(3): + httpx_mock.add_response( + url=re.compile(r"https://api-v2\.soundcloud\.com/users/206979918/likes.*"), + status_code=500, + ) + httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML) + httpx_mock.add_response( + url=re.compile(r"https://api-v2\.soundcloud\.com/users/206979918/likes.*"), + json=FAKE_LIKES_RESPONSE, + ) + client = SoundCloudClient() + tracks = await client.fetch_likes( + user_id=206979918, + since=datetime(2026, 3, 1, 0, 0, 0, tzinfo=timezone.utc), + until=datetime(2026, 3, 10, 0, 0, 0, tzinfo=timezone.utc), + ) + assert len(tracks) == 1 + assert mock_sleep.call_count == 3 + + +@pytest.mark.asyncio +@patch("ntr_fetcher.soundcloud.asyncio.sleep", return_value=None) +async def test_fetch_likes_raises_when_all_requests_fail_500(mock_sleep, httpx_mock): + """When both fabricated cursor and cursorless fallback fail, the error propagates.""" + httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML) + # 3 retries for fabricated cursor + 3 retries for cursorless fallback = 6 API calls + for _ in range(6): + httpx_mock.add_response( + url=re.compile(r"https://api-v2\.soundcloud\.com/users/206979918/likes.*"), + status_code=500, + ) + httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML) + client = SoundCloudClient() + with pytest.raises(httpx.HTTPStatusError, match="500"): + await client.fetch_likes( + user_id=206979918, + since=datetime(2026, 3, 1, 0, 0, 0, tzinfo=timezone.utc), + until=datetime(2026, 3, 10, 0, 0, 0, tzinfo=timezone.utc), + ) + assert mock_sleep.call_count == 6