fix: detect actual stream format instead of hardcoding 44100 Hz stereo

AudioEngine previously hardcoded 44100 Hz stereo for AudioTrack and
MediaCodec, causing streams at other sample rates (22050, 48000, etc.)
to play at wrong speed and mono streams to be misinterpreted.

Now defers AudioTrack/MediaCodec creation until the first MP3 frame
arrives, reads the actual sample rate and channel mode from the frame
header, and configures output accordingly. Timing constants (frame
duration, frames-per-second) are computed dynamically. Adds
INFO_OUTPUT_FORMAT_CHANGED handling as a safety net and surfaces
detected format (e.g. "44.1 kHz Stereo") in the Now Playing UI.

Made-with: Cursor
This commit is contained in:
cottongin
2026-04-27 04:28:14 -04:00
parent 26dcc8008e
commit 7795904d93
8 changed files with 327 additions and 63 deletions

View File

@@ -18,7 +18,7 @@ android {
minSdk = 28
targetSdk = 35
versionCode = 1
versionName = "1.0"
versionName = "1.1"
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
}

View File

@@ -39,6 +39,10 @@ class AudioEngine(
@Volatile
private var timedStream: TimedInputStream? = null
private var presentationTimeUs = 0L
private var frameDurationUs = 0L
private var framesPerSecond = 0
private var configuredSampleRate = 0
private var configuredChannelCount = 0
fun start() {
Log.i(TAG, "start() url=$url")
@@ -89,12 +93,28 @@ class AudioEngine(
val tStream = TimedInputStream(connection.inputStream!!)
timedStream = tStream
val sampleRate = 44100
val channelConfig = AudioFormat.CHANNEL_OUT_STEREO
val encoding = AudioFormat.ENCODING_PCM_16BIT
val minBuf = AudioTrack.getMinBufferSize(sampleRate, channelConfig, encoding)
var audioTrack: AudioTrack? = null
var codec: MediaCodec? = null
var ringBuffer: RingBuffer? = null
val audioTrack = AudioTrack.Builder()
_events.tryEmit(AudioEngineEvent.Started)
connection.streamInfo?.let { _events.tryEmit(AudioEngineEvent.StreamInfoReceived(it)) }
fun initAudioOutput(format: Mp3FrameInfo) {
frameDurationUs = format.samplesPerFrame.toLong() * 1_000_000 / format.sampleRate
framesPerSecond = (1_000_000L / frameDurationUs).toInt()
configuredSampleRate = format.sampleRate
configuredChannelCount = format.channelCount
Log.i(TAG, "Detected format: ${format.sampleRate} Hz, ${format.channelCount}ch, " +
"${format.samplesPerFrame} samples/frame, frameDuration=${frameDurationUs}us")
val channelConfig = if (format.channelCount == 1)
AudioFormat.CHANNEL_OUT_MONO else AudioFormat.CHANNEL_OUT_STEREO
val encoding = AudioFormat.ENCODING_PCM_16BIT
val minBuf = AudioTrack.getMinBufferSize(format.sampleRate, channelConfig, encoding)
audioTrack = AudioTrack.Builder()
.setAudioAttributes(
AudioAttributes.Builder()
.setUsage(AudioAttributes.USAGE_MEDIA)
@@ -103,7 +123,7 @@ class AudioEngine(
)
.setAudioFormat(
AudioFormat.Builder()
.setSampleRate(sampleRate)
.setSampleRate(format.sampleRate)
.setChannelMask(channelConfig)
.setEncoding(encoding)
.build()
@@ -113,28 +133,38 @@ class AudioEngine(
.setTransferMode(AudioTrack.MODE_STREAM)
.build()
val codec = MediaCodec.createDecoderByType("audio/mpeg")
val format = MediaFormat.createAudioFormat("audio/mpeg", sampleRate, 2)
codec.configure(format, null, null, 0)
codec.start()
audioTrack.play()
codec = MediaCodec.createDecoderByType("audio/mpeg")
val mediaFormat = MediaFormat.createAudioFormat(
"audio/mpeg", format.sampleRate, format.channelCount
)
codec!!.configure(mediaFormat, null, null, 0)
codec!!.start()
audioTrack!!.play()
currentAudioTrack = audioTrack
currentCodec = codec
_events.tryEmit(AudioEngineEvent.Started)
connection.streamInfo?.let { _events.tryEmit(AudioEngineEvent.StreamInfoReceived(it)) }
val frameDurationMs = (frameDurationUs / 1000).toInt().coerceAtLeast(1)
val bufferFrames = if (bufferMs > 0) (bufferMs / frameDurationMs).coerceAtLeast(1) else 0
try {
val bufferFrames = if (bufferMs > 0) (bufferMs / 26).coerceAtLeast(1) else 0
val ringBuffer = RingBuffer(bufferFrames) { mp3Frame ->
decodeToPcm(codec, mp3Frame, audioTrack)
ringBuffer = RingBuffer(bufferFrames) { mp3Frame ->
decodeToPcm(codec!!, mp3Frame, audioTrack!!)
}
currentRingBuffer = ringBuffer
_events.tryEmit(AudioEngineEvent.AudioFormatDetected(
format.sampleRate, format.channelCount
))
}
try {
val frameSync = Mp3FrameSync { mp3Frame ->
ringBuffer.write(mp3Frame)
if (audioTrack == null) {
val format = Mp3FrameSync.parseFrameInfo(mp3Frame)
?: Mp3FrameInfo(44100, 2, 1152)
initAudioOutput(format)
}
ringBuffer!!.write(mp3Frame)
}
val icyParser = IcyParser(
@@ -146,7 +176,7 @@ class AudioEngine(
icyParser.readAll()
ringBuffer.flush()
ringBuffer?.flush()
frameSync.flush()
_events.tryEmit(AudioEngineEvent.Error(EngineError.StreamEnded))
} finally {
@@ -154,10 +184,10 @@ class AudioEngine(
currentRingBuffer = null
currentCodec = null
currentAudioTrack = null
codec.stop()
codec.release()
audioTrack.stop()
audioTrack.release()
codec?.stop()
codec?.release()
audioTrack?.stop()
audioTrack?.release()
connection.close()
}
}
@@ -166,8 +196,9 @@ class AudioEngine(
if (catchingUp) {
catchupFramesSkipped++
val lastReadMs = timedStream?.lastReadDurationMs ?: 0L
if (lastReadMs >= CATCHUP_THRESHOLD_MS || catchupFramesSkipped >= MAX_CATCHUP_FRAMES) {
if (catchupFramesSkipped >= MAX_CATCHUP_FRAMES) {
val maxCatchupFrames = framesPerSecond * 5
if (lastReadMs >= CATCHUP_THRESHOLD_MS || catchupFramesSkipped >= maxCatchupFrames) {
if (catchupFramesSkipped >= maxCatchupFrames) {
Log.w(TAG, "Catchup cap reached after $catchupFramesSkipped frames, starting playback")
}
catchingUp = false
@@ -178,7 +209,7 @@ class AudioEngine(
val skips = pendingSkips.getAndSet(0)
if (skips > 0) {
val framesToDrop = skips * FRAMES_PER_SECOND
val framesToDrop = skips * framesPerSecond
currentRingBuffer?.drop(framesToDrop)
audioTrack.pause()
audioTrack.flush()
@@ -194,12 +225,24 @@ class AudioEngine(
inBuf.clear()
inBuf.put(mp3Frame)
codec.queueInputBuffer(inIdx, 0, mp3Frame.size, presentationTimeUs, 0)
presentationTimeUs += FRAME_DURATION_US
presentationTimeUs += frameDurationUs
}
val bufferInfo = MediaCodec.BufferInfo()
var outIdx = codec.dequeueOutputBuffer(bufferInfo, 1000)
while (outIdx >= 0) {
if (outIdx == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
val outFormat = codec.outputFormat
val outRate = outFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE)
val outChannels = outFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
Log.i(TAG, "Decoder output format: ${outRate} Hz, ${outChannels}ch")
if (outRate != configuredSampleRate || outChannels != configuredChannelCount) {
Log.w(TAG, "Decoder output ($outRate Hz, ${outChannels}ch) differs from " +
"AudioTrack ($configuredSampleRate Hz, ${configuredChannelCount}ch)")
}
outIdx = codec.dequeueOutputBuffer(bufferInfo, 0)
continue
}
val outBuf = codec.getOutputBuffer(outIdx)!!
outBuf.position(bufferInfo.offset)
outBuf.limit(bufferInfo.offset + bufferInfo.size)
@@ -222,10 +265,7 @@ class AudioEngine(
companion object {
private const val TAG = "AudioEngine"
private const val FRAMES_PER_SECOND = 38
private const val CATCHUP_THRESHOLD_MS = 30L
private const val MAX_CATCHUP_FRAMES = FRAMES_PER_SECOND * 5 // 5 seconds max skip
private const val FRAME_DURATION_US = 26_122L // 1152 samples at 44100 Hz
}
}

View File

@@ -3,6 +3,7 @@ package xyz.cottongin.radio247.audio
sealed interface AudioEngineEvent {
data class MetadataChanged(val metadata: IcyMetadata) : AudioEngineEvent
data class StreamInfoReceived(val streamInfo: StreamInfo) : AudioEngineEvent
data class AudioFormatDetected(val sampleRate: Int, val channelCount: Int) : AudioEngineEvent
data class Error(val cause: EngineError) : AudioEngineEvent
data object Started : AudioEngineEvent
data object Stopped : AudioEngineEvent

View File

@@ -2,8 +2,17 @@ package xyz.cottongin.radio247.audio
import java.io.ByteArrayOutputStream
data class Mp3FrameInfo(
val sampleRate: Int,
val channelCount: Int,
val samplesPerFrame: Int
)
private data class ParsedHeader(
val frameSize: Int
val frameSize: Int,
val sampleRate: Int,
val channelCount: Int,
val samplesPerFrame: Int
)
// MPEG1 Layer 3 bitrates (kbps), index 0 and 15 invalid
@@ -22,6 +31,9 @@ class Mp3FrameSync(
) {
private val buffer = ByteArrayOutputStream()
var detectedFormat: Mp3FrameInfo? = null
private set
fun feed(data: ByteArray, offset: Int = 0, length: Int = data.size) {
buffer.write(data, offset, length)
processBuffer()
@@ -60,6 +72,14 @@ class Mp3FrameSync(
}
}
if (detectedFormat == null) {
detectedFormat = Mp3FrameInfo(
sampleRate = header.sampleRate,
channelCount = header.channelCount,
samplesPerFrame = header.samplesPerFrame
)
}
val frame = bytes.copyOfRange(pos, pos + frameSize)
onFrame(frame)
pos += frameSize
@@ -77,6 +97,46 @@ class Mp3FrameSync(
return b0 == 0xFF && (b1 and 0xE0) == 0xE0
}
companion object {
fun parseFrameInfo(frame: ByteArray): Mp3FrameInfo? {
if (frame.size < 4) return null
val b0 = frame[0].toInt() and 0xFF
val b1 = frame[1].toInt() and 0xFF
if (b0 != 0xFF || (b1 and 0xE0) != 0xE0) return null
val b2 = frame[2].toInt() and 0xFF
val b3 = frame[3].toInt() and 0xFF
val header = (b0 shl 24) or (b1 shl 16) or (b2 shl 8) or b3
val mpegVersion = (header ushr 19) and 0x03
val layer = (header ushr 17) and 0x03
if (mpegVersion == 1 || layer == 0) return null
val sampleRateIndex = (b2 shr 2) and 0x03
if (sampleRateIndex == 3) return null
val sampleRate = when (mpegVersion) {
3 -> MPEG1_SAMPLE_RATE[sampleRateIndex]
2 -> MPEG2_SAMPLE_RATE[sampleRateIndex]
0 -> MPEG25_SAMPLE_RATE[sampleRateIndex]
else -> return null
}
if (sampleRate == 0) return null
val channelMode = (b3 shr 6) and 0x03
val channelCount = if (channelMode == 3) 1 else 2
val samplesPerFrame = when (layer) {
3 -> 384 // Layer I
2 -> 1152 // Layer II
1 -> if (mpegVersion == 3) 1152 else 576 // Layer III
else -> return null
}
return Mp3FrameInfo(sampleRate, channelCount, samplesPerFrame)
}
}
private fun parseHeader(bytes: ByteArray, pos: Int): ParsedHeader? {
if (pos + 4 > bytes.size) return null
val header = ((bytes[pos].toInt() and 0xFF) shl 24) or
@@ -84,8 +144,8 @@ class Mp3FrameSync(
((bytes[pos + 2].toInt() and 0xFF) shl 8) or
(bytes[pos + 3].toInt() and 0xFF)
val b1 = bytes[pos + 1].toInt() and 0xFF
val b2 = bytes[pos + 2].toInt() and 0xFF
val b3 = bytes[pos + 3].toInt() and 0xFF
val mpegVersion = (header ushr 19) and 0x03
val layer = (header ushr 17) and 0x03
@@ -114,6 +174,16 @@ class Mp3FrameSync(
}
if (sampleRate == 0) return null
val channelMode = (b3 shr 6) and 0x03
val channelCount = if (channelMode == 3) 1 else 2
val samplesPerFrame = when (layer) {
3 -> 384 // Layer I
2 -> 1152 // Layer II
1 -> if (mpegVersion == 3) 1152 else 576 // Layer III
else -> return null
}
val frameSize = when (layer) {
1 -> { // Layer III
when (mpegVersion) {
@@ -134,6 +204,6 @@ class Mp3FrameSync(
}
if (frameSize <= 0) return null
return ParsedHeader(frameSize)
return ParsedHeader(frameSize, sampleRate, channelCount, samplesPerFrame)
}
}

View File

@@ -13,7 +13,9 @@ class ConnectionFailed(message: String, cause: Throwable? = null) : Exception(me
data class StreamInfo(
val bitrate: Int?,
val ssl: Boolean,
val contentType: String?
val contentType: String?,
val sampleRate: Int? = null,
val channelCount: Int? = null
)
class StreamConnection(private val url: String) {

View File

@@ -31,6 +31,7 @@ import xyz.cottongin.radio247.audio.AudioEngine
import xyz.cottongin.radio247.audio.AudioEngineEvent
import xyz.cottongin.radio247.audio.EngineError
import xyz.cottongin.radio247.audio.IcyMetadata
import xyz.cottongin.radio247.audio.StreamInfo
import xyz.cottongin.radio247.data.db.ConnectionSpanDao
import xyz.cottongin.radio247.data.db.ListeningSessionDao
import xyz.cottongin.radio247.data.db.MetadataSnapshotDao
@@ -577,6 +578,18 @@ class RadioPlaybackService : MediaLibraryService() {
transition(playingState.copy(streamInfo = event.streamInfo))
}
}
is AudioEngineEvent.AudioFormatDetected -> {
val playingState = controller.state.value
if (playingState is PlaybackState.Playing) {
val updated = (playingState.streamInfo ?: StreamInfo(
bitrate = null, ssl = false, contentType = null
)).copy(
sampleRate = event.sampleRate,
channelCount = event.channelCount
)
transition(playingState.copy(streamInfo = updated))
}
}
is AudioEngineEvent.Error -> {
engine?.stop()
engine = null

View File

@@ -668,6 +668,24 @@ private fun QualityBadge(
}
Text(text = codec, style = badgeStyle, color = dim)
}
val hasPrevBadge = streamInfo.bitrate != null || codec != null
if (streamInfo.sampleRate != null) {
if (hasPrevBadge) {
Text(text = " \u00B7 ", style = badgeStyle, color = dim)
}
val rateLabel = when (streamInfo.sampleRate) {
44100 -> "44.1 kHz"
22050 -> "22.05 kHz"
48000 -> "48 kHz"
32000 -> "32 kHz"
24000 -> "24 kHz"
16000 -> "16 kHz"
11025 -> "11.025 kHz"
else -> "${streamInfo.sampleRate} Hz"
}
val channelLabel = if (streamInfo.channelCount == 1) "Mono" else "Stereo"
Text(text = "$rateLabel $channelLabel", style = badgeStyle, color = dim)
}
if (streamInfo.ssl) {
Spacer(modifier = Modifier.width(4.dp))
Icon(

View File

@@ -2,38 +2,43 @@ package xyz.cottongin.radio247.audio
import org.junit.Assert.assertArrayEquals
import org.junit.Assert.assertEquals
import org.junit.Assert.assertNotNull
import org.junit.Assert.assertNull
import org.junit.Test
class Mp3FrameSyncTest {
/**
* Build MPEG1 Layer3 header: 0xFF, (0xE0 | version<<3 | layer<<1 | crc), byte2, 0x00
* MPEG1=11, Layer3=01, noCRC=1. 0xFA has Layer=01 (III). Bitrate in 15-12, sample in 11-10, padding in bit 9.
* Build MPEG1 Layer3 header: 0xFF, (0xE0 | version<<3 | layer<<1 | crc), byte2, byte3
* MPEG1=11, Layer3=01, noCRC=1. Bitrate in 15-12, sample in 11-10, padding in bit 9.
* channelMode: 0=Stereo, 1=JointStereo, 2=DualChannel, 3=Mono
*/
private fun buildMpeg1Layer3Header(
bitrateIndex: Int,
sampleRateIndex: Int,
padding: Boolean
padding: Boolean,
channelMode: Int = 0
): ByteArray {
// Byte 0: sync. Byte 1: 0xFB = sync + MPEG1(11) + LayerIII(01) + noCRC(1)
val byte1 = 0xFB.toByte()
// Byte 2: bitrate(4) | sample(2) | padding(1) | private(1)
val byte2 = ((bitrateIndex shl 4) or (sampleRateIndex shl 2) or (if (padding) 2 else 0)).toByte()
return byteArrayOf(0xFF.toByte(), byte1, byte2, 0x00)
val byte3 = (channelMode shl 6).toByte()
return byteArrayOf(0xFF.toByte(), byte1, byte2, byte3)
}
/**
* Build MPEG2 Layer3 header. MPEG2 = 10, Layer3 = 01.
* channelMode: 0=Stereo, 1=JointStereo, 2=DualChannel, 3=Mono
*/
private fun buildMpeg2Layer3Header(
bitrateIndex: Int,
sampleRateIndex: Int,
padding: Boolean
padding: Boolean,
channelMode: Int = 0
): ByteArray {
// Byte 1: 0xF3 = sync + MPEG2(10) + LayerIII(01) + noCRC(1)
val byte1 = 0xF3.toByte()
val byte2 = ((bitrateIndex shl 4) or (sampleRateIndex shl 2) or (if (padding) 2 else 0)).toByte()
return byteArrayOf(0xFF.toByte(), byte1, byte2, 0x00)
val byte3 = (channelMode shl 6).toByte()
return byteArrayOf(0xFF.toByte(), byte1, byte2, byte3)
}
private fun buildFrame(header: ByteArray, bodySize: Int): ByteArray {
@@ -207,4 +212,119 @@ class Mp3FrameSyncTest {
assertEquals(2, frames.size)
assertEquals(expectedSize, frames[0].size)
}
@Test
fun detectedFormatForMpeg1Stereo44100() {
val header = buildMpeg1Layer3Header(9, 0, false, channelMode = 1)
val frame = buildFrame(header, 417 - 4)
val nextFrame = buildFrame(header, 417 - 4)
val sync = Mp3FrameSync { }
sync.feed(frame + nextFrame)
val format = sync.detectedFormat
assertNotNull(format)
assertEquals(44100, format!!.sampleRate)
assertEquals(2, format.channelCount)
assertEquals(1152, format.samplesPerFrame)
}
@Test
fun detectedFormatForMpeg1Mono48000() {
// sampleRateIndex=1 → 48000 Hz, channelMode=3 → Mono
val header = buildMpeg1Layer3Header(9, 1, false, channelMode = 3)
val expectedSize = 144 * 128 * 1000 / 48000 // = 384
val frame = buildFrame(header, expectedSize - 4)
val nextFrame = buildFrame(header, expectedSize - 4)
val sync = Mp3FrameSync { }
sync.feed(frame + nextFrame)
val format = sync.detectedFormat
assertNotNull(format)
assertEquals(48000, format!!.sampleRate)
assertEquals(1, format.channelCount)
assertEquals(1152, format.samplesPerFrame)
}
@Test
fun detectedFormatForMpeg2Mono22050() {
// MPEG2 Layer3, 128kbps (index 12), 22050 Hz (index 0), mono
val header = buildMpeg2Layer3Header(12, 0, false, channelMode = 3)
val expectedSize = 72 * 128 * 1000 / 22050 // = 417
val frame = buildFrame(header, expectedSize - 4)
val nextFrame = buildFrame(header, expectedSize - 4)
val sync = Mp3FrameSync { }
sync.feed(frame + nextFrame)
val format = sync.detectedFormat
assertNotNull(format)
assertEquals(22050, format!!.sampleRate)
assertEquals(1, format.channelCount)
assertEquals(576, format.samplesPerFrame)
}
@Test
fun detectedFormatIsNullBeforeFirstFrame() {
val sync = Mp3FrameSync { }
assertNull(sync.detectedFormat)
}
@Test
fun parseFrameInfoMpeg1Stereo44100() {
val header = buildMpeg1Layer3Header(9, 0, false, channelMode = 0)
val frame = buildFrame(header, 417 - 4)
val info = Mp3FrameSync.parseFrameInfo(frame)
assertNotNull(info)
assertEquals(44100, info!!.sampleRate)
assertEquals(2, info.channelCount)
assertEquals(1152, info.samplesPerFrame)
}
@Test
fun parseFrameInfoMpeg2Mono22050() {
val header = buildMpeg2Layer3Header(12, 0, false, channelMode = 3)
val frame = buildFrame(header, 417 - 4)
val info = Mp3FrameSync.parseFrameInfo(frame)
assertNotNull(info)
assertEquals(22050, info!!.sampleRate)
assertEquals(1, info.channelCount)
assertEquals(576, info.samplesPerFrame)
}
@Test
fun parseFrameInfoReturnsNullForGarbage() {
val garbage = ByteArray(100) { 0x42 }
assertNull(Mp3FrameSync.parseFrameInfo(garbage))
}
@Test
fun parseFrameInfoReturnsNullForTooShort() {
assertNull(Mp3FrameSync.parseFrameInfo(byteArrayOf(0xFF.toByte(), 0xFB.toByte())))
}
@Test
fun parseFrameInfoJointStereoIsTwoChannels() {
// Joint stereo (channelMode=1) should report 2 channels
val header = buildMpeg1Layer3Header(9, 0, false, channelMode = 1)
val frame = buildFrame(header, 417 - 4)
val info = Mp3FrameSync.parseFrameInfo(frame)
assertNotNull(info)
assertEquals(2, info!!.channelCount)
}
@Test
fun parseFrameInfoDualChannelIsTwoChannels() {
// Dual channel (channelMode=2) should report 2 channels
val header = buildMpeg1Layer3Header(9, 0, false, channelMode = 2)
val frame = buildFrame(header, 417 - 4)
val info = Mp3FrameSync.parseFrameInfo(frame)
assertNotNull(info)
assertEquals(2, info!!.channelCount)
}
}