Files
NtR-soudcloud-fetcher/tests/test_soundcloud.py
cottongin a328684af0 fix: handle SoundCloud API 5xx errors with client_id refresh, backoff, and cursor fallback
SoundCloud began rejecting the fabricated pagination cursor with 500
errors. Fixed cursor user_id padding (zfill 22→20) to match the
documented format, added 5xx retry with exponential backoff in _api_get,
and added a fallback in fetch_likes that drops the fabricated cursor
when it causes persistent 500s.

Made-with: Cursor
2026-03-25 08:20:20 -04:00

229 lines
7.9 KiB
Python

import re
from datetime import datetime, timezone
from unittest.mock import patch
import httpx
import pytest
from ntr_fetcher.soundcloud import SoundCloudClient
FAKE_HTML = """
<html><head><script>
window.__sc_hydration = [
{"hydratable": "user", "data": {}},
{"hydratable": "apiClient", "data": {"id": "test_client_id_abc123", "isExpiring": false}}
];
</script></head></html>
"""
FAKE_HTML_EXPIRING = """
<html><head><script>
window.__sc_hydration = [
{"hydratable": "apiClient", "data": {"id": "expiring_id_xyz", "isExpiring": true}}
];
</script></head></html>
"""
@pytest.mark.asyncio
async def test_extract_client_id(httpx_mock):
httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML)
client = SoundCloudClient()
client_id = await client._extract_client_id()
assert client_id == "test_client_id_abc123"
@pytest.mark.asyncio
async def test_extract_client_id_caches(httpx_mock):
httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML)
client = SoundCloudClient()
id1 = await client._extract_client_id()
id2 = await client._extract_client_id()
assert id1 == id2
assert len(httpx_mock.get_requests()) == 1
@pytest.mark.asyncio
async def test_extract_client_id_bad_html(httpx_mock):
httpx_mock.add_response(url="https://soundcloud.com", text="<html>no hydration here</html>")
client = SoundCloudClient()
with pytest.raises(ValueError, match="client_id"):
await client._extract_client_id()
FAKE_RESOLVE_RESPONSE = {"id": 206979918, "kind": "user", "username": "NICKtheRAT"}
FAKE_LIKES_RESPONSE = {
"collection": [
{
"created_at": "2026-03-09T02:25:43Z",
"kind": "like",
"track": {
"id": 12345,
"title": "Test Track",
"permalink_url": "https://soundcloud.com/artist/test-track",
"duration": 180000,
"full_duration": 180000,
"genre": "Electronic",
"tag_list": "",
"created_at": "2026-03-01T00:00:00Z",
"description": "",
"artwork_url": "https://i1.sndcdn.com/artworks-abc-large.jpg",
"license": "cc-by",
"user": {
"id": 999,
"username": "TestArtist",
"permalink_url": "https://soundcloud.com/testartist",
},
"media": {"transcodings": []},
},
}
],
"next_href": None,
}
@pytest.mark.asyncio
async def test_resolve_user(httpx_mock):
httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML)
httpx_mock.add_response(
url=re.compile(r"https://api-v2\.soundcloud\.com/resolve.*"),
json=FAKE_RESOLVE_RESPONSE,
)
client = SoundCloudClient()
user_id = await client.resolve_user("nicktherat")
assert user_id == 206979918
@pytest.mark.asyncio
async def test_fetch_likes(httpx_mock):
httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML)
httpx_mock.add_response(
url=re.compile(r"https://api-v2\.soundcloud\.com/users/206979918/likes.*"),
json=FAKE_LIKES_RESPONSE,
)
client = SoundCloudClient()
tracks = await client.fetch_likes(
user_id=206979918,
since=datetime(2026, 3, 1, 0, 0, 0, tzinfo=timezone.utc),
until=datetime(2026, 3, 10, 0, 0, 0, tzinfo=timezone.utc),
)
assert len(tracks) == 1
assert tracks[0].title == "Test Track"
assert tracks[0].artist == "TestArtist"
assert tracks[0].id == 12345
@pytest.mark.asyncio
async def test_fetch_likes_filters_outside_range(httpx_mock):
httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML)
httpx_mock.add_response(
url=re.compile(r"https://api-v2\.soundcloud\.com/users/206979918/likes.*"),
json=FAKE_LIKES_RESPONSE,
)
client = SoundCloudClient()
tracks = await client.fetch_likes(
user_id=206979918,
since=datetime(2026, 3, 10, 0, 0, 0, tzinfo=timezone.utc),
until=datetime(2026, 3, 12, 0, 0, 0, tzinfo=timezone.utc),
)
assert len(tracks) == 0
@pytest.mark.asyncio
async def test_fetch_likes_retries_on_401(httpx_mock):
httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML)
httpx_mock.add_response(
url=re.compile(r"https://api-v2\.soundcloud\.com/users/206979918/likes.*"),
status_code=401,
)
httpx_mock.add_response(
url="https://soundcloud.com",
text=FAKE_HTML.replace("test_client_id_abc123", "new_client_id_456"),
)
httpx_mock.add_response(
url=re.compile(r"https://api-v2\.soundcloud\.com/users/206979918/likes.*"),
json=FAKE_LIKES_RESPONSE,
)
client = SoundCloudClient()
tracks = await client.fetch_likes(
user_id=206979918,
since=datetime(2026, 3, 1, 0, 0, 0, tzinfo=timezone.utc),
until=datetime(2026, 3, 10, 0, 0, 0, tzinfo=timezone.utc),
)
assert len(tracks) == 1
@pytest.mark.asyncio
@patch("ntr_fetcher.soundcloud.asyncio.sleep", return_value=None)
async def test_fetch_likes_retries_on_500(mock_sleep, httpx_mock):
httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML)
httpx_mock.add_response(
url=re.compile(r"https://api-v2\.soundcloud\.com/users/206979918/likes.*"),
status_code=500,
)
httpx_mock.add_response(
url="https://soundcloud.com",
text=FAKE_HTML.replace("test_client_id_abc123", "fresh_client_id_789"),
)
httpx_mock.add_response(
url=re.compile(r"https://api-v2\.soundcloud\.com/users/206979918/likes.*"),
json=FAKE_LIKES_RESPONSE,
)
client = SoundCloudClient()
tracks = await client.fetch_likes(
user_id=206979918,
since=datetime(2026, 3, 1, 0, 0, 0, tzinfo=timezone.utc),
until=datetime(2026, 3, 10, 0, 0, 0, tzinfo=timezone.utc),
)
assert len(tracks) == 1
mock_sleep.assert_called_once_with(1)
@pytest.mark.asyncio
@patch("ntr_fetcher.soundcloud.asyncio.sleep", return_value=None)
async def test_fetch_likes_falls_back_to_no_cursor_on_persistent_500(mock_sleep, httpx_mock):
"""When the fabricated cursor causes persistent 500s, fall back to no cursor."""
httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML)
for _ in range(3):
httpx_mock.add_response(
url=re.compile(r"https://api-v2\.soundcloud\.com/users/206979918/likes.*"),
status_code=500,
)
httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML)
httpx_mock.add_response(
url=re.compile(r"https://api-v2\.soundcloud\.com/users/206979918/likes.*"),
json=FAKE_LIKES_RESPONSE,
)
client = SoundCloudClient()
tracks = await client.fetch_likes(
user_id=206979918,
since=datetime(2026, 3, 1, 0, 0, 0, tzinfo=timezone.utc),
until=datetime(2026, 3, 10, 0, 0, 0, tzinfo=timezone.utc),
)
assert len(tracks) == 1
assert mock_sleep.call_count == 3
@pytest.mark.asyncio
@patch("ntr_fetcher.soundcloud.asyncio.sleep", return_value=None)
async def test_fetch_likes_raises_when_all_requests_fail_500(mock_sleep, httpx_mock):
"""When both fabricated cursor and cursorless fallback fail, the error propagates."""
httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML)
# 3 retries for fabricated cursor + 3 retries for cursorless fallback = 6 API calls
for _ in range(6):
httpx_mock.add_response(
url=re.compile(r"https://api-v2\.soundcloud\.com/users/206979918/likes.*"),
status_code=500,
)
httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML)
client = SoundCloudClient()
with pytest.raises(httpx.HTTPStatusError, match="500"):
await client.fetch_likes(
user_id=206979918,
since=datetime(2026, 3, 1, 0, 0, 0, tzinfo=timezone.utc),
until=datetime(2026, 3, 10, 0, 0, 0, tzinfo=timezone.utc),
)
assert mock_sleep.call_count == 6