fix: handle SoundCloud API 5xx errors with client_id refresh, backoff, and cursor fallback
SoundCloud began rejecting the fabricated pagination cursor with 500 errors. Fixed cursor user_id padding (zfill 22→20) to match the documented format, added 5xx retry with exponential backoff in _api_get, and added a fallback in fetch_likes that drops the fabricated cursor when it causes persistent 500s. Made-with: Cursor
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
@@ -17,7 +18,7 @@ HYDRATION_PATTERN = re.compile(r"__sc_hydration\s*=\s*(\[.*?\])\s*;", re.DOTALL)
|
|||||||
|
|
||||||
def _build_cursor(until: datetime, user_id: int) -> str:
|
def _build_cursor(until: datetime, user_id: int) -> str:
|
||||||
ts = until.strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
ts = until.strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
||||||
padded_user = str(user_id).zfill(22)
|
padded_user = str(user_id).zfill(20)
|
||||||
return f"{ts},user-track-likes,000-{padded_user}-99999999999999999999"
|
return f"{ts},user-track-likes,000-{padded_user}-99999999999999999999"
|
||||||
|
|
||||||
|
|
||||||
@@ -55,19 +56,27 @@ class SoundCloudClient:
|
|||||||
params = dict(params or {})
|
params = dict(params or {})
|
||||||
params["client_id"] = client_id
|
params["client_id"] = client_id
|
||||||
|
|
||||||
for attempt in range(3):
|
max_attempts = 3
|
||||||
|
for attempt in range(max_attempts):
|
||||||
resp = await self._http.get(url, params=params)
|
resp = await self._http.get(url, params=params)
|
||||||
if resp.status_code == 401:
|
|
||||||
logger.warning("Got 401 from SoundCloud API, refreshing client_id (attempt %d)", attempt + 1)
|
if resp.status_code == 401 or resp.status_code >= 500:
|
||||||
|
logger.warning(
|
||||||
|
"Got %d from SoundCloud API, refreshing client_id (attempt %d/%d)",
|
||||||
|
resp.status_code, attempt + 1, max_attempts,
|
||||||
|
)
|
||||||
self.invalidate_client_id()
|
self.invalidate_client_id()
|
||||||
|
if resp.status_code >= 500:
|
||||||
|
await asyncio.sleep(2 ** attempt)
|
||||||
client_id = await self._extract_client_id()
|
client_id = await self._extract_client_id()
|
||||||
params["client_id"] = client_id
|
params["client_id"] = client_id
|
||||||
continue
|
continue
|
||||||
|
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
raise httpx.HTTPStatusError(
|
raise httpx.HTTPStatusError(
|
||||||
"Failed after 3 attempts (401)",
|
f"Failed after {max_attempts} attempts (last status: {resp.status_code})",
|
||||||
request=resp.request,
|
request=resp.request,
|
||||||
response=resp,
|
response=resp,
|
||||||
)
|
)
|
||||||
@@ -86,15 +95,24 @@ class SoundCloudClient:
|
|||||||
until: datetime,
|
until: datetime,
|
||||||
limit: int = 50,
|
limit: int = 50,
|
||||||
) -> list[Track]:
|
) -> list[Track]:
|
||||||
cursor = _build_cursor(until, user_id)
|
cursor: str | None = _build_cursor(until, user_id)
|
||||||
collected: list[Track] = []
|
collected: list[Track] = []
|
||||||
|
used_fabricated_cursor = True
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
params: dict = {"limit": limit}
|
params: dict = {"limit": limit}
|
||||||
if cursor:
|
if cursor:
|
||||||
params["offset"] = cursor
|
params["offset"] = cursor
|
||||||
|
|
||||||
|
try:
|
||||||
resp = await self._api_get(f"{API_BASE}/users/{user_id}/likes", params=params)
|
resp = await self._api_get(f"{API_BASE}/users/{user_id}/likes", params=params)
|
||||||
|
except httpx.HTTPStatusError as exc:
|
||||||
|
if used_fabricated_cursor and cursor and exc.response.status_code >= 500:
|
||||||
|
logger.warning("Fabricated cursor rejected (HTTP %d), retrying without cursor", exc.response.status_code)
|
||||||
|
cursor = None
|
||||||
|
used_fabricated_cursor = False
|
||||||
|
continue
|
||||||
|
raise
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
collection = data.get("collection", [])
|
collection = data.get("collection", [])
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
import re
|
import re
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import httpx
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from ntr_fetcher.soundcloud import SoundCloudClient
|
from ntr_fetcher.soundcloud import SoundCloudClient
|
||||||
@@ -151,3 +153,76 @@ async def test_fetch_likes_retries_on_401(httpx_mock):
|
|||||||
until=datetime(2026, 3, 10, 0, 0, 0, tzinfo=timezone.utc),
|
until=datetime(2026, 3, 10, 0, 0, 0, tzinfo=timezone.utc),
|
||||||
)
|
)
|
||||||
assert len(tracks) == 1
|
assert len(tracks) == 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@patch("ntr_fetcher.soundcloud.asyncio.sleep", return_value=None)
|
||||||
|
async def test_fetch_likes_retries_on_500(mock_sleep, httpx_mock):
|
||||||
|
httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML)
|
||||||
|
httpx_mock.add_response(
|
||||||
|
url=re.compile(r"https://api-v2\.soundcloud\.com/users/206979918/likes.*"),
|
||||||
|
status_code=500,
|
||||||
|
)
|
||||||
|
httpx_mock.add_response(
|
||||||
|
url="https://soundcloud.com",
|
||||||
|
text=FAKE_HTML.replace("test_client_id_abc123", "fresh_client_id_789"),
|
||||||
|
)
|
||||||
|
httpx_mock.add_response(
|
||||||
|
url=re.compile(r"https://api-v2\.soundcloud\.com/users/206979918/likes.*"),
|
||||||
|
json=FAKE_LIKES_RESPONSE,
|
||||||
|
)
|
||||||
|
client = SoundCloudClient()
|
||||||
|
tracks = await client.fetch_likes(
|
||||||
|
user_id=206979918,
|
||||||
|
since=datetime(2026, 3, 1, 0, 0, 0, tzinfo=timezone.utc),
|
||||||
|
until=datetime(2026, 3, 10, 0, 0, 0, tzinfo=timezone.utc),
|
||||||
|
)
|
||||||
|
assert len(tracks) == 1
|
||||||
|
mock_sleep.assert_called_once_with(1)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@patch("ntr_fetcher.soundcloud.asyncio.sleep", return_value=None)
|
||||||
|
async def test_fetch_likes_falls_back_to_no_cursor_on_persistent_500(mock_sleep, httpx_mock):
|
||||||
|
"""When the fabricated cursor causes persistent 500s, fall back to no cursor."""
|
||||||
|
httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML)
|
||||||
|
for _ in range(3):
|
||||||
|
httpx_mock.add_response(
|
||||||
|
url=re.compile(r"https://api-v2\.soundcloud\.com/users/206979918/likes.*"),
|
||||||
|
status_code=500,
|
||||||
|
)
|
||||||
|
httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML)
|
||||||
|
httpx_mock.add_response(
|
||||||
|
url=re.compile(r"https://api-v2\.soundcloud\.com/users/206979918/likes.*"),
|
||||||
|
json=FAKE_LIKES_RESPONSE,
|
||||||
|
)
|
||||||
|
client = SoundCloudClient()
|
||||||
|
tracks = await client.fetch_likes(
|
||||||
|
user_id=206979918,
|
||||||
|
since=datetime(2026, 3, 1, 0, 0, 0, tzinfo=timezone.utc),
|
||||||
|
until=datetime(2026, 3, 10, 0, 0, 0, tzinfo=timezone.utc),
|
||||||
|
)
|
||||||
|
assert len(tracks) == 1
|
||||||
|
assert mock_sleep.call_count == 3
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@patch("ntr_fetcher.soundcloud.asyncio.sleep", return_value=None)
|
||||||
|
async def test_fetch_likes_raises_when_all_requests_fail_500(mock_sleep, httpx_mock):
|
||||||
|
"""When both fabricated cursor and cursorless fallback fail, the error propagates."""
|
||||||
|
httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML)
|
||||||
|
# 3 retries for fabricated cursor + 3 retries for cursorless fallback = 6 API calls
|
||||||
|
for _ in range(6):
|
||||||
|
httpx_mock.add_response(
|
||||||
|
url=re.compile(r"https://api-v2\.soundcloud\.com/users/206979918/likes.*"),
|
||||||
|
status_code=500,
|
||||||
|
)
|
||||||
|
httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML)
|
||||||
|
client = SoundCloudClient()
|
||||||
|
with pytest.raises(httpx.HTTPStatusError, match="500"):
|
||||||
|
await client.fetch_likes(
|
||||||
|
user_id=206979918,
|
||||||
|
since=datetime(2026, 3, 1, 0, 0, 0, tzinfo=timezone.utc),
|
||||||
|
until=datetime(2026, 3, 10, 0, 0, 0, tzinfo=timezone.utc),
|
||||||
|
)
|
||||||
|
assert mock_sleep.call_count == 6
|
||||||
|
|||||||
Reference in New Issue
Block a user