feat: replace picojpeg with JPEGDEC for JPEG image decoding (#1136)

## Summary Replaces the picojpeg library with bitbank2/JPEGDEC for JPEG decoding in the EPUB image pipeline. JPEGDEC provides built-in coarse scaling (1/2, 1/4, 1/8), 8-bit grayscale output, and streaming block-based decoding via callbacks. Includes a pre-build patch script for two JPEGDEC changes affecting progressive JPEG support and EIGHT_BIT_GRAYSCALE mode. Closes #912 ## Additional Context # Example progressive jpeg <img src="https://github.com/user-attachments/assets/e63bb4f8-f862-4aa0-a01f-d1ef43a4b27a" width="400" height="800" /> Good performance increase from JPEGDEC over picojpeg cc @bitbank2 thanks ## Baseline JPEG Decode Performance: picojpeg vs JPEGDEC (float in callback) vs JPEGDEC (fixed-point in callback) Tested with `test_jpeg_images.epub` on device (ESP32-C3), first decode (no cache). | Image | Source | Output | picojpeg | JPEGDEC float | JPEGDEC fixed-point | vs picojpeg | vs float | |-------|--------|--------|----------|---------------|---------------------|-------------|----------| | jpeg_format.jpg | 350x250 | 350x250 | 313 ms | 256 ms | **104 ms** | **3.0x** | **2.5x** | | grayscale_test.jpg | 400x600 | 400x600 | 768 ms | 661 ms | **246 ms** | **3.1x** | **2.7x** | | gradient_test.jpg | 400x500 | 400x500 | 707 ms | 597 ms | **247 ms** | **2.9x** | **2.4x** | | centering_test.jpg | 350x400 | 350x400 | 502 ms | 412 ms | **169 ms** | **3.0x** | **2.4x** | | scaling_test.jpg | 1200x1500 | 464x580 | 5487 ms | 1114 ms | **668 ms** | **8.2x** | **1.7x** | | wide_scaling_test.jpg | 1807x736 | 464x188 | 4237 ms | 642 ms | **497 ms** | **8.5x** | **1.3x** | | cache_test_1.jpg | 400x300 | 400x300 | 422 ms | 348 ms | **141 ms** | **3.0x** | **2.5x** | | cache_test_2.jpg | 400x300 | 400x300 | 424 ms | 349 ms | **142 ms** | **3.0x** | **2.5x** | ### Summary - **1:1 scale (fixed-point vs float)**: ~2.5x faster — eliminating software float on the FPU-less ESP32-C3 is the dominant win - **1:1 scale (fixed-point vs picojpeg)**: ~3.0x faster overall - **Downscaled images (vs picojpeg)**: 8-9x faster — JPEGDEC's coarse scaling + fixed-point draw callback - **Downscaled images (fixed-point vs float)**: 1.3-1.7x — less dramatic since JPEG library decode dominates over the draw callback for fewer output pixels - The fixed-point optimization alone (vs float JPEGDEC) saved **~60% of render time** on 1:1 images, confirming that software float emulation was the primary bottleneck in the draw callback - See thread for discussions on quality of progressive images, https://github.com/crosspoint-reader/crosspoint-reader/pull/1136#issuecomment-3952952315 - and the conclusion https://github.com/crosspoint-reader/crosspoint-reader/pull/1136#issuecomment-3959379386 - Proposal to improve quality added at https://github.com/crosspoint-reader/crosspoint-reader/discussions/1179 --- ### AI Usage While CrossPoint doesn't have restrictions on AI tools in contributing, please be transparent about their usage as it helps set the right context for reviewers. Did you use AI tools to help write this code? _**< PARTIALLY >**_ --------- Co-authored-by: Dave Allie <dave@daveallie.com>
2026-03-01 01:24:58 +00:00
parent a57c62f0b4
commit 2b25f4d168
4 changed files with 550 additions and 236 deletions
--- a/lib/Epub/Epub/converters/JpegToFramebufferConverter.cpp
+++ b/lib/Epub/Epub/converters/JpegToFramebufferConverter.cpp
@@ -2,44 +2,358 @@

 #include <GfxRenderer.h>
 #include <HalStorage.h>
+#include <JPEGDEC.h>
 #include <Logging.h>
-#include <picojpeg.h>

-#include <cstdio>
-#include <cstring>
+#include <cstdlib>
+#include <new>

 #include "DitherUtils.h"
 #include "PixelCache.h"

+namespace {
+
+// Context struct passed through JPEGDEC callbacks to avoid global mutable state.
+// The draw callback receives this via pDraw->pUser (set by setUserPointer()).
+// The file I/O callbacks receive the FsFile* via pFile->fHandle (set by jpegOpen()).
 struct JpegContext {
-  FsFile& file;
-  uint8_t buffer[512];
-  size_t bufferPos;
-  size_t bufferFilled;
-  JpegContext(FsFile& f) : file(f), bufferPos(0), bufferFilled(0) {}
+  GfxRenderer* renderer;
+  const RenderConfig* config;
+  int screenWidth;
+  int screenHeight;
+
+  // Source dimensions after JPEGDEC's built-in scaling
+  int scaledSrcWidth;
+  int scaledSrcHeight;
+
+  // Final output dimensions
+  int dstWidth;
+  int dstHeight;
+
+  // Fine scale in 16.16 fixed-point (ESP32-C3 has no FPU)
+  int32_t fineScaleFP;  // src -> dst mapping
+  int32_t invScaleFP;   // dst -> src mapping
+
+  PixelCache cache;
+  bool caching;
+
+  JpegContext()
+      : renderer(nullptr),
+        config(nullptr),
+        screenWidth(0),
+        screenHeight(0),
+        scaledSrcWidth(0),
+        scaledSrcHeight(0),
+        dstWidth(0),
+        dstHeight(0),
+        fineScaleFP(1 << 16),
+        invScaleFP(1 << 16),
+        caching(false) {}
 };

+// File I/O callbacks use pFile->fHandle to access the FsFile*,
+// avoiding the need for global file state.
+void* jpegOpen(const char* filename, int32_t* size) {
+  FsFile* f = new FsFile();
+  if (!Storage.openFileForRead("JPG", std::string(filename), *f)) {
+    delete f;
+    return nullptr;
+  }
+  *size = f->size();
+  return f;
+}
+
+void jpegClose(void* handle) {
+  FsFile* f = reinterpret_cast<FsFile*>(handle);
+  if (f) {
+    f->close();
+    delete f;
+  }
+}
+
+// JPEGDEC tracks file position via pFile->iPos internally (e.g. JPEGGetMoreData
+// checks iPos < iSize to decide whether more data is available). The callbacks
+// MUST maintain iPos to match the actual file position, otherwise progressive
+// JPEGs with large headers fail during parsing.
+int32_t jpegRead(JPEGFILE* pFile, uint8_t* pBuf, int32_t len) {
+  FsFile* f = reinterpret_cast<FsFile*>(pFile->fHandle);
+  if (!f) return 0;
+  int32_t bytesRead = f->read(pBuf, len);
+  if (bytesRead < 0) return 0;
+  pFile->iPos += bytesRead;
+  return bytesRead;
+}
+
+int32_t jpegSeek(JPEGFILE* pFile, int32_t pos) {
+  FsFile* f = reinterpret_cast<FsFile*>(pFile->fHandle);
+  if (!f) return -1;
+  if (!f->seek(pos)) return -1;
+  pFile->iPos = pos;
+  return pos;
+}
+
+// JPEGDEC object is ~17 KB due to internal decode buffers.
+// Heap-allocate on demand so memory is only used during active decode.
+constexpr size_t JPEG_DECODER_APPROX_SIZE = 20 * 1024;
+constexpr size_t MIN_FREE_HEAP_FOR_JPEG = JPEG_DECODER_APPROX_SIZE + 16 * 1024;
+
+// Choose JPEGDEC's built-in scale factor for coarse downscaling.
+// Returns the scale denominator (1, 2, 4, or 8) and sets jpegScaleOption.
+int chooseJpegScale(float targetScale, int& jpegScaleOption) {
+  if (targetScale <= 0.125f) {
+    jpegScaleOption = JPEG_SCALE_EIGHTH;
+    return 8;
+  }
+  if (targetScale <= 0.25f) {
+    jpegScaleOption = JPEG_SCALE_QUARTER;
+    return 4;
+  }
+  if (targetScale <= 0.5f) {
+    jpegScaleOption = JPEG_SCALE_HALF;
+    return 2;
+  }
+  jpegScaleOption = 0;
+  return 1;
+}
+
+// Fixed-point 16.16 arithmetic avoids software float emulation on ESP32-C3 (no FPU).
+constexpr int FP_SHIFT = 16;
+constexpr int32_t FP_ONE = 1 << FP_SHIFT;
+constexpr int32_t FP_MASK = FP_ONE - 1;
+
+int jpegDrawCallback(JPEGDRAW* pDraw) {
+  JpegContext* ctx = reinterpret_cast<JpegContext*>(pDraw->pUser);
+  if (!ctx || !ctx->config || !ctx->renderer) return 0;
+
+  // In EIGHT_BIT_GRAYSCALE mode, pPixels contains 8-bit grayscale values
+  // Buffer is densely packed: stride = pDraw->iWidth, valid columns = pDraw->iWidthUsed
+  uint8_t* pixels = reinterpret_cast<uint8_t*>(pDraw->pPixels);
+  const int stride = pDraw->iWidth;
+  const int validW = pDraw->iWidthUsed;
+  const int blockH = pDraw->iHeight;
+
+  if (stride <= 0 || blockH <= 0 || validW <= 0) return 1;
+
+  const bool useDithering = ctx->config->useDithering;
+  const bool caching = ctx->caching;
+  const int32_t fineScaleFP = ctx->fineScaleFP;
+  const int32_t invScaleFP = ctx->invScaleFP;
+  GfxRenderer& renderer = *ctx->renderer;
+  const int cfgX = ctx->config->x;
+  const int cfgY = ctx->config->y;
+  const int blockX = pDraw->x;
+  const int blockY = pDraw->y;
+
+  // Determine destination pixel range covered by this source block
+  const int srcYEnd = blockY + blockH;
+  const int srcXEnd = blockX + validW;
+
+  int dstYStart = (int)((int64_t)blockY * fineScaleFP >> FP_SHIFT);
+  int dstYEnd = (srcYEnd >= ctx->scaledSrcHeight) ? ctx->dstHeight : (int)((int64_t)srcYEnd * fineScaleFP >> FP_SHIFT);
+  int dstXStart = (int)((int64_t)blockX * fineScaleFP >> FP_SHIFT);
+  int dstXEnd = (srcXEnd >= ctx->scaledSrcWidth) ? ctx->dstWidth : (int)((int64_t)srcXEnd * fineScaleFP >> FP_SHIFT);
+
+  // Pre-clamp destination ranges to screen bounds (eliminates per-pixel screen checks)
+  int clampYMax = ctx->dstHeight;
+  if (ctx->screenHeight - cfgY < clampYMax) clampYMax = ctx->screenHeight - cfgY;
+  if (dstYStart < -cfgY) dstYStart = -cfgY;
+  if (dstYEnd > clampYMax) dstYEnd = clampYMax;
+
+  int clampXMax = ctx->dstWidth;
+  if (ctx->screenWidth - cfgX < clampXMax) clampXMax = ctx->screenWidth - cfgX;
+  if (dstXStart < -cfgX) dstXStart = -cfgX;
+  if (dstXEnd > clampXMax) dstXEnd = clampXMax;
+
+  if (dstYStart >= dstYEnd || dstXStart >= dstXEnd) return 1;
+
+  // === 1:1 fast path: no scaling math ===
+  if (fineScaleFP == FP_ONE) {
+    for (int dstY = dstYStart; dstY < dstYEnd; dstY++) {
+      const int outY = cfgY + dstY;
+      const uint8_t* row = &pixels[(dstY - blockY) * stride];
+      for (int dstX = dstXStart; dstX < dstXEnd; dstX++) {
+        const int outX = cfgX + dstX;
+        uint8_t gray = row[dstX - blockX];
+        uint8_t dithered;
+        if (useDithering) {
+          dithered = applyBayerDither4Level(gray, outX, outY);
+        } else {
+          dithered = gray / 85;
+          if (dithered > 3) dithered = 3;
+        }
+        drawPixelWithRenderMode(renderer, outX, outY, dithered);
+        if (caching) ctx->cache.setPixel(outX, outY, dithered);
+      }
+    }
+    return 1;
+  }
+
+  // === Bilinear interpolation (upscale: fineScale > 1.0) ===
+  // Smooths block boundaries that would otherwise create visible banding
+  // on progressive JPEG DC-only decode (1/8 resolution upscaled to target).
+  if (fineScaleFP > FP_ONE) {
+    // Pre-compute safe X range where lx0 and lx0+1 are both in [0, validW-1].
+    // Only the left/right edge pixels (typically 0-2 and 1-8 respectively) need clamping.
+    int safeXStart = (int)(((int64_t)blockX * fineScaleFP + FP_MASK) >> FP_SHIFT);
+    int safeXEnd = (int)((int64_t)(blockX + validW - 1) * fineScaleFP >> FP_SHIFT);
+    if (safeXStart < dstXStart) safeXStart = dstXStart;
+    if (safeXEnd > dstXEnd) safeXEnd = dstXEnd;
+    if (safeXStart > safeXEnd) safeXEnd = safeXStart;
+
+    for (int dstY = dstYStart; dstY < dstYEnd; dstY++) {
+      const int outY = cfgY + dstY;
+      const int32_t srcFyFP = dstY * invScaleFP;
+      const int32_t fy = srcFyFP & FP_MASK;
+      const int32_t fyInv = FP_ONE - fy;
+      int ly0 = (srcFyFP >> FP_SHIFT) - blockY;
+      int ly1 = ly0 + 1;
+      if (ly0 < 0) ly0 = 0;
+      if (ly0 >= blockH) ly0 = blockH - 1;
+      if (ly1 >= blockH) ly1 = blockH - 1;
+
+      const uint8_t* row0 = &pixels[ly0 * stride];
+      const uint8_t* row1 = &pixels[ly1 * stride];
+
+      // Left edge (with X boundary clamping)
+      for (int dstX = dstXStart; dstX < safeXStart; dstX++) {
+        const int outX = cfgX + dstX;
+        const int32_t srcFxFP = dstX * invScaleFP;
+        const int32_t fx = srcFxFP & FP_MASK;
+        const int32_t fxInv = FP_ONE - fx;
+        int lx0 = (srcFxFP >> FP_SHIFT) - blockX;
+        int lx1 = lx0 + 1;
+        if (lx0 < 0) lx0 = 0;
+        if (lx1 < 0) lx1 = 0;
+        if (lx0 >= validW) lx0 = validW - 1;
+        if (lx1 >= validW) lx1 = validW - 1;
+
+        int top = ((int)row0[lx0] * fxInv + (int)row0[lx1] * fx) >> FP_SHIFT;
+        int bot = ((int)row1[lx0] * fxInv + (int)row1[lx1] * fx) >> FP_SHIFT;
+        uint8_t gray = (uint8_t)((top * fyInv + bot * fy) >> FP_SHIFT);
+
+        uint8_t dithered;
+        if (useDithering) {
+          dithered = applyBayerDither4Level(gray, outX, outY);
+        } else {
+          dithered = gray / 85;
+          if (dithered > 3) dithered = 3;
+        }
+        drawPixelWithRenderMode(renderer, outX, outY, dithered);
+        if (caching) ctx->cache.setPixel(outX, outY, dithered);
+      }
+
+      // Interior (no X boundary checks — lx0 and lx0+1 guaranteed in bounds)
+      for (int dstX = safeXStart; dstX < safeXEnd; dstX++) {
+        const int outX = cfgX + dstX;
+        const int32_t srcFxFP = dstX * invScaleFP;
+        const int32_t fx = srcFxFP & FP_MASK;
+        const int32_t fxInv = FP_ONE - fx;
+        const int lx0 = (srcFxFP >> FP_SHIFT) - blockX;
+
+        int top = ((int)row0[lx0] * fxInv + (int)row0[lx0 + 1] * fx) >> FP_SHIFT;
+        int bot = ((int)row1[lx0] * fxInv + (int)row1[lx0 + 1] * fx) >> FP_SHIFT;
+        uint8_t gray = (uint8_t)((top * fyInv + bot * fy) >> FP_SHIFT);
+
+        uint8_t dithered;
+        if (useDithering) {
+          dithered = applyBayerDither4Level(gray, outX, outY);
+        } else {
+          dithered = gray / 85;
+          if (dithered > 3) dithered = 3;
+        }
+        drawPixelWithRenderMode(renderer, outX, outY, dithered);
+        if (caching) ctx->cache.setPixel(outX, outY, dithered);
+      }
+
+      // Right edge (with X boundary clamping)
+      for (int dstX = safeXEnd; dstX < dstXEnd; dstX++) {
+        const int outX = cfgX + dstX;
+        const int32_t srcFxFP = dstX * invScaleFP;
+        const int32_t fx = srcFxFP & FP_MASK;
+        const int32_t fxInv = FP_ONE - fx;
+        int lx0 = (srcFxFP >> FP_SHIFT) - blockX;
+        int lx1 = lx0 + 1;
+        if (lx0 >= validW) lx0 = validW - 1;
+        if (lx1 >= validW) lx1 = validW - 1;
+
+        int top = ((int)row0[lx0] * fxInv + (int)row0[lx1] * fx) >> FP_SHIFT;
+        int bot = ((int)row1[lx0] * fxInv + (int)row1[lx1] * fx) >> FP_SHIFT;
+        uint8_t gray = (uint8_t)((top * fyInv + bot * fy) >> FP_SHIFT);
+
+        uint8_t dithered;
+        if (useDithering) {
+          dithered = applyBayerDither4Level(gray, outX, outY);
+        } else {
+          dithered = gray / 85;
+          if (dithered > 3) dithered = 3;
+        }
+        drawPixelWithRenderMode(renderer, outX, outY, dithered);
+        if (caching) ctx->cache.setPixel(outX, outY, dithered);
+      }
+    }
+    return 1;
+  }
+
+  // === Nearest-neighbor (downscale: fineScale < 1.0) ===
+  for (int dstY = dstYStart; dstY < dstYEnd; dstY++) {
+    const int outY = cfgY + dstY;
+    const int32_t srcFyFP = dstY * invScaleFP;
+    int ly = (srcFyFP >> FP_SHIFT) - blockY;
+    if (ly < 0) ly = 0;
+    if (ly >= blockH) ly = blockH - 1;
+    const uint8_t* row = &pixels[ly * stride];
+
+    for (int dstX = dstXStart; dstX < dstXEnd; dstX++) {
+      const int outX = cfgX + dstX;
+      const int32_t srcFxFP = dstX * invScaleFP;
+      int lx = (srcFxFP >> FP_SHIFT) - blockX;
+      if (lx < 0) lx = 0;
+      if (lx >= validW) lx = validW - 1;
+      uint8_t gray = row[lx];
+
+      uint8_t dithered;
+      if (useDithering) {
+        dithered = applyBayerDither4Level(gray, outX, outY);
+      } else {
+        dithered = gray / 85;
+        if (dithered > 3) dithered = 3;
+      }
+      drawPixelWithRenderMode(renderer, outX, outY, dithered);
+      if (caching) ctx->cache.setPixel(outX, outY, dithered);
+    }
+  }
+
+  return 1;
+}
+
+}  // namespace
+
 bool JpegToFramebufferConverter::getDimensionsStatic(const std::string& imagePath, ImageDimensions& out) {
-  FsFile file;
-  if (!Storage.openFileForRead("JPG", imagePath, file)) {
-    LOG_ERR("JPG", "Failed to open file for dimensions: %s", imagePath.c_str());
+  size_t freeHeap = ESP.getFreeHeap();
+  if (freeHeap < MIN_FREE_HEAP_FOR_JPEG) {
+    LOG_ERR("JPG", "Not enough heap for JPEG decoder (%u free, need %u)", freeHeap, MIN_FREE_HEAP_FOR_JPEG);
    return false;
  }

-  JpegContext context(file);
-  pjpeg_image_info_t imageInfo;
-
-  int status = pjpeg_decode_init(&imageInfo, jpegReadCallback, &context, 0);
-  file.close();
-
-  if (status != 0) {
-    LOG_ERR("JPG", "Failed to init JPEG for dimensions: %d", status);
+  JPEGDEC* jpeg = new (std::nothrow) JPEGDEC();
+  if (!jpeg) {
+    LOG_ERR("JPG", "Failed to allocate JPEG decoder for dimensions");
    return false;
  }

-  out.width = imageInfo.m_width;
-  out.height = imageInfo.m_height;
+  int rc = jpeg->open(imagePath.c_str(), jpegOpen, jpegClose, jpegRead, jpegSeek, nullptr);
+  if (rc != 1) {
+    LOG_ERR("JPG", "Failed to open JPEG for dimensions (err=%d): %s", jpeg->getLastError(), imagePath.c_str());
+    delete jpeg;
+    return false;
+  }
+
+  out.width = jpeg->getWidth();
+  out.height = jpeg->getHeight();
  LOG_DBG("JPG", "Image dimensions: %dx%d", out.width, out.height);
+
+  jpeg->close();
+  delete jpeg;
  return true;
 }

@@ -47,246 +361,130 @@ bool JpegToFramebufferConverter::decodeToFramebuffer(const std::string& imagePat
                                                     const RenderConfig& config) {
  LOG_DBG("JPG", "Decoding JPEG: %s", imagePath.c_str());

-  FsFile file;
-  if (!Storage.openFileForRead("JPG", imagePath, file)) {
-    LOG_ERR("JPG", "Failed to open file: %s", imagePath.c_str());
+  size_t freeHeap = ESP.getFreeHeap();
+  if (freeHeap < MIN_FREE_HEAP_FOR_JPEG) {
+    LOG_ERR("JPG", "Not enough heap for JPEG decoder (%u free, need %u)", freeHeap, MIN_FREE_HEAP_FOR_JPEG);
    return false;
  }

-  JpegContext context(file);
-  pjpeg_image_info_t imageInfo;
-
-  int status = pjpeg_decode_init(&imageInfo, jpegReadCallback, &context, 0);
-  if (status != 0) {
-    LOG_ERR("JPG", "picojpeg init failed: %d", status);
-    file.close();
+  JPEGDEC* jpeg = new (std::nothrow) JPEGDEC();
+  if (!jpeg) {
+    LOG_ERR("JPG", "Failed to allocate JPEG decoder");
    return false;
  }

-  if (!validateImageDimensions(imageInfo.m_width, imageInfo.m_height, "JPEG")) {
-    file.close();
+  JpegContext ctx;
+  ctx.renderer = &renderer;
+  ctx.config = &config;
+  ctx.screenWidth = renderer.getScreenWidth();
+  ctx.screenHeight = renderer.getScreenHeight();
+
+  int rc = jpeg->open(imagePath.c_str(), jpegOpen, jpegClose, jpegRead, jpegSeek, jpegDrawCallback);
+  if (rc != 1) {
+    LOG_ERR("JPG", "Failed to open JPEG (err=%d): %s", jpeg->getLastError(), imagePath.c_str());
+    delete jpeg;
    return false;
  }

-  // Calculate output dimensions
+  int srcWidth = jpeg->getWidth();
+  int srcHeight = jpeg->getHeight();
+
+  if (srcWidth <= 0 || srcHeight <= 0) {
+    LOG_ERR("JPG", "Invalid JPEG dimensions: %dx%d", srcWidth, srcHeight);
+    jpeg->close();
+    delete jpeg;
+    return false;
+  }
+
+  if (!validateImageDimensions(srcWidth, srcHeight, "JPEG")) {
+    jpeg->close();
+    delete jpeg;
+    return false;
+  }
+
+  bool isProgressive = jpeg->getJPEGType() == JPEG_MODE_PROGRESSIVE;
+  if (isProgressive) {
+    LOG_INF("JPG", "Progressive JPEG detected - decoding DC coefficients only (lower quality)");
+  }
+
+  // Calculate overall target scale
+  float targetScale;
  int destWidth, destHeight;
-  float scale;

  if (config.useExactDimensions && config.maxWidth > 0 && config.maxHeight > 0) {
-    // Use exact dimensions as specified (avoids rounding mismatches with pre-calculated sizes)
    destWidth = config.maxWidth;
    destHeight = config.maxHeight;
-    scale = (float)destWidth / imageInfo.m_width;
+    targetScale = (float)destWidth / srcWidth;
  } else {
-    // Calculate scale factor to fit within maxWidth/maxHeight
-    float scaleX = (config.maxWidth > 0 && imageInfo.m_width > config.maxWidth)
-                       ? (float)config.maxWidth / imageInfo.m_width
-                       : 1.0f;
-    float scaleY = (config.maxHeight > 0 && imageInfo.m_height > config.maxHeight)
-                       ? (float)config.maxHeight / imageInfo.m_height
-                       : 1.0f;
-    scale = (scaleX < scaleY) ? scaleX : scaleY;
-    if (scale > 1.0f) scale = 1.0f;
+    float scaleX = (config.maxWidth > 0 && srcWidth > config.maxWidth) ? (float)config.maxWidth / srcWidth : 1.0f;
+    float scaleY = (config.maxHeight > 0 && srcHeight > config.maxHeight) ? (float)config.maxHeight / srcHeight : 1.0f;
+    targetScale = (scaleX < scaleY) ? scaleX : scaleY;
+    if (targetScale > 1.0f) targetScale = 1.0f;

-    destWidth = (int)(imageInfo.m_width * scale);
-    destHeight = (int)(imageInfo.m_height * scale);
+    destWidth = (int)(srcWidth * targetScale);
+    destHeight = (int)(srcHeight * targetScale);
  }

-  LOG_DBG("JPG", "JPEG %dx%d -> %dx%d (scale %.2f), scan type: %d, MCU: %dx%d", imageInfo.m_width, imageInfo.m_height,
-          destWidth, destHeight, scale, imageInfo.m_scanType, imageInfo.m_MCUWidth, imageInfo.m_MCUHeight);
+  // Choose JPEGDEC built-in scaling for coarse downscaling.
+  // Progressive JPEGs: JPEGDEC forces JPEG_SCALE_EIGHTH internally (DC-only
+  // decode produces 1/8 resolution). We must match this to avoid the if/else
+  // priority chain in DecodeJPEG selecting a different scale.
+  int jpegScaleOption;
+  int jpegScaleDenom;
+  if (isProgressive) {
+    jpegScaleOption = JPEG_SCALE_EIGHTH;
+    jpegScaleDenom = 8;
+  } else {
+    jpegScaleDenom = chooseJpegScale(targetScale, jpegScaleOption);
+  }

-  if (!imageInfo.m_pMCUBufR || !imageInfo.m_pMCUBufG || !imageInfo.m_pMCUBufB) {
-    LOG_ERR("JPG", "Null buffer pointers in imageInfo");
-    file.close();
+  ctx.scaledSrcWidth = (srcWidth + jpegScaleDenom - 1) / jpegScaleDenom;
+  ctx.scaledSrcHeight = (srcHeight + jpegScaleDenom - 1) / jpegScaleDenom;
+  ctx.dstWidth = destWidth;
+  ctx.dstHeight = destHeight;
+  ctx.fineScaleFP = (int32_t)((int64_t)destWidth * FP_ONE / ctx.scaledSrcWidth);
+  ctx.invScaleFP = (int32_t)((int64_t)ctx.scaledSrcWidth * FP_ONE / destWidth);
+
+  LOG_DBG("JPG", "JPEG %dx%d -> %dx%d (scale %.2f, jpegScale 1/%d, fineScale %.2f)%s", srcWidth, srcHeight, destWidth,
+          destHeight, targetScale, jpegScaleDenom, (float)destWidth / ctx.scaledSrcWidth,
+          isProgressive ? " [progressive]" : "");
+
+  // Set pixel type to 8-bit grayscale (must be after open())
+  jpeg->setPixelType(EIGHT_BIT_GRAYSCALE);
+  jpeg->setUserPointer(&ctx);
+
+  // Allocate cache buffer using final output dimensions
+  ctx.caching = !config.cachePath.empty();
+  if (ctx.caching) {
+    if (!ctx.cache.allocate(destWidth, destHeight, config.x, config.y)) {
+      LOG_ERR("JPG", "Failed to allocate cache buffer, continuing without caching");
+      ctx.caching = false;
+    }
+  }
+
+  unsigned long decodeStart = millis();
+  rc = jpeg->decode(0, 0, jpegScaleOption);
+  unsigned long decodeTime = millis() - decodeStart;
+
+  if (rc != 1) {
+    LOG_ERR("JPG", "Decode failed (rc=%d, lastError=%d)", rc, jpeg->getLastError());
+    jpeg->close();
+    delete jpeg;
    return false;
  }

-  const int screenWidth = renderer.getScreenWidth();
-  const int screenHeight = renderer.getScreenHeight();
-
-  // Allocate pixel cache if cachePath is provided
-  PixelCache cache;
-  bool caching = !config.cachePath.empty();
-  if (caching) {
-    if (!cache.allocate(destWidth, destHeight, config.x, config.y)) {
-      LOG_ERR("JPG", "Failed to allocate cache buffer, continuing without caching");
-      caching = false;
-    }
-  }
-
-  int mcuX = 0;
-  int mcuY = 0;
-
-  while (mcuY < imageInfo.m_MCUSPerCol) {
-    status = pjpeg_decode_mcu();
-    if (status == PJPG_NO_MORE_BLOCKS) {
-      break;
-    }
-    if (status != 0) {
-      LOG_ERR("JPG", "MCU decode failed: %d", status);
-      file.close();
-      return false;
-    }
-
-    // Source position in image coordinates
-    int srcStartX = mcuX * imageInfo.m_MCUWidth;
-    int srcStartY = mcuY * imageInfo.m_MCUHeight;
-
-    switch (imageInfo.m_scanType) {
-      case PJPG_GRAYSCALE:
-        for (int row = 0; row < 8; row++) {
-          int srcY = srcStartY + row;
-          int destY = config.y + (int)(srcY * scale);
-          if (destY >= screenHeight || destY >= config.y + destHeight) continue;
-          for (int col = 0; col < 8; col++) {
-            int srcX = srcStartX + col;
-            int destX = config.x + (int)(srcX * scale);
-            if (destX >= screenWidth || destX >= config.x + destWidth) continue;
-            uint8_t gray = imageInfo.m_pMCUBufR[row * 8 + col];
-            uint8_t dithered = config.useDithering ? applyBayerDither4Level(gray, destX, destY) : gray / 85;
-            if (dithered > 3) dithered = 3;
-            drawPixelWithRenderMode(renderer, destX, destY, dithered);
-            if (caching) cache.setPixel(destX, destY, dithered);
-          }
-        }
-        break;
-
-      case PJPG_YH1V1:
-        for (int row = 0; row < 8; row++) {
-          int srcY = srcStartY + row;
-          int destY = config.y + (int)(srcY * scale);
-          if (destY >= screenHeight || destY >= config.y + destHeight) continue;
-          for (int col = 0; col < 8; col++) {
-            int srcX = srcStartX + col;
-            int destX = config.x + (int)(srcX * scale);
-            if (destX >= screenWidth || destX >= config.x + destWidth) continue;
-            uint8_t r = imageInfo.m_pMCUBufR[row * 8 + col];
-            uint8_t g = imageInfo.m_pMCUBufG[row * 8 + col];
-            uint8_t b = imageInfo.m_pMCUBufB[row * 8 + col];
-            uint8_t gray = (uint8_t)((r * 77 + g * 150 + b * 29) >> 8);
-            uint8_t dithered = config.useDithering ? applyBayerDither4Level(gray, destX, destY) : gray / 85;
-            if (dithered > 3) dithered = 3;
-            drawPixelWithRenderMode(renderer, destX, destY, dithered);
-            if (caching) cache.setPixel(destX, destY, dithered);
-          }
-        }
-        break;
-
-      case PJPG_YH2V1:
-        for (int row = 0; row < 8; row++) {
-          int srcY = srcStartY + row;
-          int destY = config.y + (int)(srcY * scale);
-          if (destY >= screenHeight || destY >= config.y + destHeight) continue;
-          for (int col = 0; col < 16; col++) {
-            int srcX = srcStartX + col;
-            int destX = config.x + (int)(srcX * scale);
-            if (destX >= screenWidth || destX >= config.x + destWidth) continue;
-            int blockIndex = (col < 8) ? 0 : 1;
-            int pixelIndex = row * 8 + (col % 8);
-            uint8_t r = imageInfo.m_pMCUBufR[blockIndex * 64 + pixelIndex];
-            uint8_t g = imageInfo.m_pMCUBufG[blockIndex * 64 + pixelIndex];
-            uint8_t b = imageInfo.m_pMCUBufB[blockIndex * 64 + pixelIndex];
-            uint8_t gray = (uint8_t)((r * 77 + g * 150 + b * 29) >> 8);
-            uint8_t dithered = config.useDithering ? applyBayerDither4Level(gray, destX, destY) : gray / 85;
-            if (dithered > 3) dithered = 3;
-            drawPixelWithRenderMode(renderer, destX, destY, dithered);
-            if (caching) cache.setPixel(destX, destY, dithered);
-          }
-        }
-        break;
-
-      case PJPG_YH1V2:
-        for (int row = 0; row < 16; row++) {
-          int srcY = srcStartY + row;
-          int destY = config.y + (int)(srcY * scale);
-          if (destY >= screenHeight || destY >= config.y + destHeight) continue;
-          for (int col = 0; col < 8; col++) {
-            int srcX = srcStartX + col;
-            int destX = config.x + (int)(srcX * scale);
-            if (destX >= screenWidth || destX >= config.x + destWidth) continue;
-            int blockIndex = (row < 8) ? 0 : 1;
-            int pixelIndex = (row % 8) * 8 + col;
-            uint8_t r = imageInfo.m_pMCUBufR[blockIndex * 128 + pixelIndex];
-            uint8_t g = imageInfo.m_pMCUBufG[blockIndex * 128 + pixelIndex];
-            uint8_t b = imageInfo.m_pMCUBufB[blockIndex * 128 + pixelIndex];
-            uint8_t gray = (uint8_t)((r * 77 + g * 150 + b * 29) >> 8);
-            uint8_t dithered = config.useDithering ? applyBayerDither4Level(gray, destX, destY) : gray / 85;
-            if (dithered > 3) dithered = 3;
-            drawPixelWithRenderMode(renderer, destX, destY, dithered);
-            if (caching) cache.setPixel(destX, destY, dithered);
-          }
-        }
-        break;
-
-      case PJPG_YH2V2:
-        for (int row = 0; row < 16; row++) {
-          int srcY = srcStartY + row;
-          int destY = config.y + (int)(srcY * scale);
-          if (destY >= screenHeight || destY >= config.y + destHeight) continue;
-          for (int col = 0; col < 16; col++) {
-            int srcX = srcStartX + col;
-            int destX = config.x + (int)(srcX * scale);
-            if (destX >= screenWidth || destX >= config.x + destWidth) continue;
-            int blockX = (col < 8) ? 0 : 1;
-            int blockY = (row < 8) ? 0 : 1;
-            int blockIndex = blockY * 2 + blockX;
-            int pixelIndex = (row % 8) * 8 + (col % 8);
-            int blockOffset = blockIndex * 64;
-            uint8_t r = imageInfo.m_pMCUBufR[blockOffset + pixelIndex];
-            uint8_t g = imageInfo.m_pMCUBufG[blockOffset + pixelIndex];
-            uint8_t b = imageInfo.m_pMCUBufB[blockOffset + pixelIndex];
-            uint8_t gray = (uint8_t)((r * 77 + g * 150 + b * 29) >> 8);
-            uint8_t dithered = config.useDithering ? applyBayerDither4Level(gray, destX, destY) : gray / 85;
-            if (dithered > 3) dithered = 3;
-            drawPixelWithRenderMode(renderer, destX, destY, dithered);
-            if (caching) cache.setPixel(destX, destY, dithered);
-          }
-        }
-        break;
-    }
-
-    mcuX++;
-    if (mcuX >= imageInfo.m_MCUSPerRow) {
-      mcuX = 0;
-      mcuY++;
-    }
-  }
-
-  LOG_DBG("JPG", "Decoding complete");
-  file.close();
+  jpeg->close();
+  delete jpeg;
+  LOG_DBG("JPG", "JPEG decoding complete - render time: %lu ms", decodeTime);

  // Write cache file if caching was enabled
-  if (caching) {
-    cache.writeToFile(config.cachePath);
+  if (ctx.caching) {
+    ctx.cache.writeToFile(config.cachePath);
  }

  return true;
 }

-unsigned char JpegToFramebufferConverter::jpegReadCallback(unsigned char* pBuf, unsigned char buf_size,
-                                                           unsigned char* pBytes_actually_read, void* pCallback_data) {
-  JpegContext* context = reinterpret_cast<JpegContext*>(pCallback_data);
-
-  if (context->bufferPos >= context->bufferFilled) {
-    int readCount = context->file.read(context->buffer, sizeof(context->buffer));
-    if (readCount <= 0) {
-      *pBytes_actually_read = 0;
-      return 0;
-    }
-    context->bufferFilled = readCount;
-    context->bufferPos = 0;
-  }
-
-  unsigned int bytesAvailable = context->bufferFilled - context->bufferPos;
-  unsigned int bytesToCopy = (bytesAvailable < buf_size) ? bytesAvailable : buf_size;
-
-  memcpy(pBuf, &context->buffer[context->bufferPos], bytesToCopy);
-  context->bufferPos += bytesToCopy;
-  *pBytes_actually_read = bytesToCopy;
-
-  return 0;
-}
-
 bool JpegToFramebufferConverter::supportsFormat(const std::string& extension) {
  std::string ext = extension;
  for (auto& c : ext) {
--- a/lib/Epub/Epub/converters/JpegToFramebufferConverter.h
+++ b/lib/Epub/Epub/converters/JpegToFramebufferConverter.h
@@ -1,4 +1,5 @@
 #pragma once
+
 #include <stdint.h>

 #include <string>
@@ -17,8 +18,4 @@ class JpegToFramebufferConverter final : public ImageToFramebufferDecoder {

  static bool supportsFormat(const std::string& extension);
  const char* getFormatName() const override { return "JPEG"; }
-
- private:
-  static unsigned char jpegReadCallback(unsigned char* pBuf, unsigned char buf_size,
-                                        unsigned char* pBytes_actually_read, void* pCallback_data);
 };
--- a/platformio.ini
+++ b/platformio.ini
@@ -46,6 +46,7 @@ board_build.partitions = partitions.csv
 extra_scripts =
  pre:scripts/build_html.py
  pre:scripts/gen_i18n.py
+  pre:scripts/patch_jpegdec.py
  pre:scripts/git_branch.py

 ; Libraries
@@ -57,6 +58,7 @@ lib_deps =
  bblanchon/ArduinoJson @ 7.4.2
  ricmoo/QRCode @ 0.0.1
  bitbank2/PNGdec @ ^1.0.0
+  bitbank2/JPEGDEC @ ^1.8.0
  links2004/WebSockets @ 2.7.3

 [env:default]
--- a/scripts/patch_jpegdec.py
+++ b/scripts/patch_jpegdec.py
@@ -0,0 +1,117 @@
+"""
+PlatformIO pre-build script: patch JPEGDEC library for progressive JPEG support.
+
+Two patches are applied:
+
+1. JPEGMakeHuffTables: Skip AC Huffman table construction for progressive JPEGs.
+   JPEGDEC 1.8.x fails to open progressive JPEGs because JPEGMakeHuffTables()
+   cannot build AC tables with 11+-bit codes (the "slow tables" path is disabled).
+   Since progressive decode only uses DC coefficients, AC tables are not needed.
+
+2. JPEGDecodeMCU_P: Guard pMCU writes against MCU_SKIP (-8).
+   The non-progressive JPEGDecodeMCU checks `iMCU >= 0` before writing to pMCU,
+   but JPEGDecodeMCU_P does not.  When EIGHT_BIT_GRAYSCALE mode skips chroma
+   channels by passing MCU_SKIP, the unguarded write goes to a wild pointer
+   (sMCUs[0xFFFFF8]) and crashes.
+
+Both patches are applied idempotently so it is safe to run on every build.
+"""
+
+Import("env")
+import os
+
+def patch_jpegdec(env):
+    # Find the JPEGDEC library in libdeps
+    libdeps_dir = os.path.join(env["PROJECT_DIR"], ".pio", "libdeps")
+    if not os.path.isdir(libdeps_dir):
+        return
+    for env_dir in os.listdir(libdeps_dir):
+        jpeg_inl = os.path.join(libdeps_dir, env_dir, "JPEGDEC", "src", "jpeg.inl")
+        if os.path.isfile(jpeg_inl):
+            _apply_ac_table_patch(jpeg_inl)
+            _apply_mcu_skip_patch(jpeg_inl)
+
+def _apply_ac_table_patch(filepath):
+    MARKER = "// CrossPoint patch: skip AC tables for progressive JPEG"
+    with open(filepath, "r") as f:
+        content = f.read()
+
+    if MARKER in content:
+        return  # already patched
+
+    OLD = """\
+    }
+    // now do AC components (up to 4 tables of 16-bit codes)"""
+
+    NEW = """\
+    }
+    """ + MARKER + """
+    // Progressive JPEG: only DC coefficients are decoded (first scan), so AC
+    // Huffman tables are not needed.  Skip building them to avoid failing on
+    // 11+-bit AC codes that the optimized table builder cannot handle.
+    if (pJPEG->ucMode == 0xc2)
+        return 1;
+    // now do AC components (up to 4 tables of 16-bit codes)"""
+
+    if OLD not in content:
+        print("WARNING: JPEGDEC AC table patch target not found in %s — library may have been updated" % filepath)
+        return
+
+    content = content.replace(OLD, NEW, 1)
+    with open(filepath, "w") as f:
+        f.write(content)
+    print("Patched JPEGDEC: skip AC tables for progressive JPEG: %s" % filepath)
+
+def _apply_mcu_skip_patch(filepath):
+    MARKER = "// CrossPoint patch: guard pMCU write for MCU_SKIP"
+    with open(filepath, "r") as f:
+        content = f.read()
+
+    if MARKER in content:
+        return  # already patched
+
+    # Patch 1: Guard the unconditional pMCU[0] write in JPEGDecodeMCU_P.
+    # This is the DC coefficient store that crashes when iMCU = MCU_SKIP (-8).
+    OLD_DC = """\
+        pMCU[0] = (short)*iDCPredictor; // store in MCU[0]
+    }
+    // Now get the other 63 AC coefficients"""
+
+    NEW_DC = """\
+        """ + MARKER + """
+        if (iMCU >= 0)
+            pMCU[0] = (short)*iDCPredictor; // store in MCU[0]
+    }
+    // Now get the other 63 AC coefficients"""
+
+    if OLD_DC not in content:
+        print("WARNING: JPEGDEC MCU_SKIP patch target not found in %s — library may have been updated" % filepath)
+        return
+
+    content = content.replace(OLD_DC, NEW_DC, 1)
+
+    # Patch 2: Guard the successive approximation pMCU[0] write.
+    # This path is taken on subsequent scans (cApproxBitsHigh != 0), which we
+    # don't normally hit (we only decode first scan), but guard it for safety.
+    OLD_SA = """\
+                pMCU[0] |= iPositive;
+            }
+            goto mcu_done; // that's it"""
+
+    NEW_SA = """\
+                if (iMCU >= 0)
+                    pMCU[0] |= iPositive;
+            }
+            goto mcu_done; // that's it"""
+
+    if OLD_SA in content:
+        content = content.replace(OLD_SA, NEW_SA, 1)
+
+    with open(filepath, "w") as f:
+        f.write(content)
+    print("Patched JPEGDEC: guard pMCU writes for MCU_SKIP in JPEGDecodeMCU_P: %s" % filepath)
+
+# Apply patches immediately when this pre: script runs, before compilation starts.
+# Previously used env.AddPreAction("buildprog", ...) which deferred patching until
+# the link step — after the library was already compiled from unpatched source.
+patch_jpegdec(env)