import json import logging import re from datetime import datetime from urllib.parse import parse_qs, urlparse import httpx from ntr_fetcher.models import Track logger = logging.getLogger(__name__) SOUNDCLOUD_BASE = "https://soundcloud.com" API_BASE = "https://api-v2.soundcloud.com" HYDRATION_PATTERN = re.compile(r"__sc_hydration\s*=\s*(\[.*?\])\s*;", re.DOTALL) def _build_cursor(until: datetime, user_id: int) -> str: ts = until.strftime("%Y-%m-%dT%H:%M:%S.000Z") padded_user = str(user_id).zfill(22) return f"{ts},user-track-likes,000-{padded_user}-99999999999999999999" class SoundCloudClient: def __init__(self, http_client: httpx.AsyncClient | None = None): self._http = http_client or httpx.AsyncClient(timeout=15.0) self._client_id: str | None = None async def _extract_client_id(self) -> str: if self._client_id is not None: return self._client_id resp = await self._http.get(SOUNDCLOUD_BASE) resp.raise_for_status() match = HYDRATION_PATTERN.search(resp.text) if not match: raise ValueError("Could not find __sc_hydration in SoundCloud HTML — cannot extract client_id") hydration = json.loads(match.group(1)) for entry in hydration: if entry.get("hydratable") == "apiClient": self._client_id = entry["data"]["id"] is_expiring = entry["data"].get("isExpiring", False) if is_expiring: logger.warning("SoundCloud client_id is marked as expiring") return self._client_id raise ValueError("No apiClient entry in __sc_hydration — cannot extract client_id") def invalidate_client_id(self) -> None: self._client_id = None async def close(self) -> None: await self._http.aclose()