feat: add SoundCloud client with client_id extraction
Made-with: Cursor
This commit is contained in:
54
src/ntr_fetcher/soundcloud.py
Normal file
54
src/ntr_fetcher/soundcloud.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
import httpx
|
||||
|
||||
from ntr_fetcher.models import Track
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SOUNDCLOUD_BASE = "https://soundcloud.com"
|
||||
API_BASE = "https://api-v2.soundcloud.com"
|
||||
HYDRATION_PATTERN = re.compile(r"__sc_hydration\s*=\s*(\[.*?\])\s*;", re.DOTALL)
|
||||
|
||||
|
||||
def _build_cursor(until: datetime, user_id: int) -> str:
|
||||
ts = until.strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
||||
padded_user = str(user_id).zfill(22)
|
||||
return f"{ts},user-track-likes,000-{padded_user}-99999999999999999999"
|
||||
|
||||
|
||||
class SoundCloudClient:
|
||||
def __init__(self, http_client: httpx.AsyncClient | None = None):
|
||||
self._http = http_client or httpx.AsyncClient(timeout=15.0)
|
||||
self._client_id: str | None = None
|
||||
|
||||
async def _extract_client_id(self) -> str:
|
||||
if self._client_id is not None:
|
||||
return self._client_id
|
||||
|
||||
resp = await self._http.get(SOUNDCLOUD_BASE)
|
||||
resp.raise_for_status()
|
||||
match = HYDRATION_PATTERN.search(resp.text)
|
||||
if not match:
|
||||
raise ValueError("Could not find __sc_hydration in SoundCloud HTML — cannot extract client_id")
|
||||
|
||||
hydration = json.loads(match.group(1))
|
||||
for entry in hydration:
|
||||
if entry.get("hydratable") == "apiClient":
|
||||
self._client_id = entry["data"]["id"]
|
||||
is_expiring = entry["data"].get("isExpiring", False)
|
||||
if is_expiring:
|
||||
logger.warning("SoundCloud client_id is marked as expiring")
|
||||
return self._client_id
|
||||
|
||||
raise ValueError("No apiClient entry in __sc_hydration — cannot extract client_id")
|
||||
|
||||
def invalidate_client_id(self) -> None:
|
||||
self._client_id = None
|
||||
|
||||
async def close(self) -> None:
|
||||
await self._http.aclose()
|
||||
50
tests/test_soundcloud.py
Normal file
50
tests/test_soundcloud.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import re
|
||||
|
||||
import pytest
|
||||
import httpx
|
||||
|
||||
from ntr_fetcher.soundcloud import SoundCloudClient
|
||||
|
||||
|
||||
FAKE_HTML = """
|
||||
<html><head><script>
|
||||
window.__sc_hydration = [
|
||||
{"hydratable": "user", "data": {}},
|
||||
{"hydratable": "apiClient", "data": {"id": "test_client_id_abc123", "isExpiring": false}}
|
||||
];
|
||||
</script></head></html>
|
||||
"""
|
||||
|
||||
FAKE_HTML_EXPIRING = """
|
||||
<html><head><script>
|
||||
window.__sc_hydration = [
|
||||
{"hydratable": "apiClient", "data": {"id": "expiring_id_xyz", "isExpiring": true}}
|
||||
];
|
||||
</script></head></html>
|
||||
"""
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_client_id(httpx_mock):
|
||||
httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML)
|
||||
client = SoundCloudClient()
|
||||
client_id = await client._extract_client_id()
|
||||
assert client_id == "test_client_id_abc123"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_client_id_caches(httpx_mock):
|
||||
httpx_mock.add_response(url="https://soundcloud.com", text=FAKE_HTML)
|
||||
client = SoundCloudClient()
|
||||
id1 = await client._extract_client_id()
|
||||
id2 = await client._extract_client_id()
|
||||
assert id1 == id2
|
||||
assert len(httpx_mock.get_requests()) == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_client_id_bad_html(httpx_mock):
|
||||
httpx_mock.add_response(url="https://soundcloud.com", text="<html>no hydration here</html>")
|
||||
client = SoundCloudClient()
|
||||
with pytest.raises(ValueError, match="client_id"):
|
||||
await client._extract_client_id()
|
||||
Reference in New Issue
Block a user