diff --git a/src/ntr_fetcher/soundcloud.py b/src/ntr_fetcher/soundcloud.py new file mode 100644 index 0000000..a0b8d25 --- /dev/null +++ b/src/ntr_fetcher/soundcloud.py @@ -0,0 +1,54 @@ +import json +import logging +import re +from datetime import datetime +from urllib.parse import parse_qs, urlparse + +import httpx + +from ntr_fetcher.models import Track + +logger = logging.getLogger(__name__) + +SOUNDCLOUD_BASE = "https://soundcloud.com" +API_BASE = "https://api-v2.soundcloud.com" +HYDRATION_PATTERN = re.compile(r"__sc_hydration\s*=\s*(\[.*?\])\s*;", re.DOTALL) + + +def _build_cursor(until: datetime, user_id: int) -> str: + ts = until.strftime("%Y-%m-%dT%H:%M:%S.000Z") + padded_user = str(user_id).zfill(22) + return f"{ts},user-track-likes,000-{padded_user}-99999999999999999999" + + +class SoundCloudClient: + def __init__(self, http_client: httpx.AsyncClient | None = None): + self._http = http_client or httpx.AsyncClient(timeout=15.0) + self._client_id: str | None = None + + async def _extract_client_id(self) -> str: + if self._client_id is not None: + return self._client_id + + resp = await self._http.get(SOUNDCLOUD_BASE) + resp.raise_for_status() + match = HYDRATION_PATTERN.search(resp.text) + if not match: + raise ValueError("Could not find __sc_hydration in SoundCloud HTML — cannot extract client_id") + + hydration = json.loads(match.group(1)) + for entry in hydration: + if entry.get("hydratable") == "apiClient": + self._client_id = entry["data"]["id"] + is_expiring = entry["data"].get("isExpiring", False) + if is_expiring: + logger.warning("SoundCloud client_id is marked as expiring") + return self._client_id + + raise ValueError("No apiClient entry in __sc_hydration — cannot extract client_id") + + def invalidate_client_id(self) -> None: + self._client_id = None + + async def close(self) -> None: + await self._http.aclose() diff --git a/tests/test_soundcloud.py b/tests/test_soundcloud.py new file mode 100644 index 0000000..f9436af --- /dev/null +++ b/tests/test_soundcloud.py @@ -0,0 +1,50 @@ +import re + +import pytest +import httpx + +from ntr_fetcher.soundcloud import SoundCloudClient + + +FAKE_HTML = """ + +""" + +FAKE_HTML_EXPIRING = """ + +""" + + +@pytest.mark.asyncio +async def test_extract_client_id(httpx_mock): + httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML) + client = SoundCloudClient() + client_id = await client._extract_client_id() + assert client_id == "test_client_id_abc123" + + +@pytest.mark.asyncio +async def test_extract_client_id_caches(httpx_mock): + httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML) + client = SoundCloudClient() + id1 = await client._extract_client_id() + id2 = await client._extract_client_id() + assert id1 == id2 + assert len(httpx_mock.get_requests()) == 1 + + +@pytest.mark.asyncio +async def test_extract_client_id_bad_html(httpx_mock): + httpx_mock.add_response(url="https://soundcloud.com", text="no hydration here") + client = SoundCloudClient() + with pytest.raises(ValueError, match="client_id"): + await client._extract_client_id()