diff --git a/lib/Epub/Epub/blocks/ImageBlock.cpp b/lib/Epub/Epub/blocks/ImageBlock.cpp index b95dfb93..9a958d11 100644 --- a/lib/Epub/Epub/blocks/ImageBlock.cpp +++ b/lib/Epub/Epub/blocks/ImageBlock.cpp @@ -4,7 +4,7 @@ #include #include -#include "../converters/DitherUtils.h" +#include "../converters/DirectPixelWriter.h" #include "../converters/ImageDecoderFactory.h" // Cache file format: @@ -66,6 +66,9 @@ bool renderFromCache(GfxRenderer& renderer, const std::string& cachePath, int x, return false; } + DirectPixelWriter pw; + pw.init(renderer); + for (int row = 0; row < cachedHeight; row++) { if (cacheFile.read(rowBuffer, bytesPerRow) != bytesPerRow) { LOG_ERR("IMG", "Cache read error at row %d", row); @@ -74,13 +77,14 @@ bool renderFromCache(GfxRenderer& renderer, const std::string& cachePath, int x, return false; } - int destY = y + row; + const int destY = y + row; + pw.beginRow(destY); for (int col = 0; col < cachedWidth; col++) { - int byteIdx = col / 4; - int bitShift = 6 - (col % 4) * 2; // MSB first within byte + const int byteIdx = col >> 2; // col / 4 + const int bitShift = 6 - (col & 3) * 2; // MSB first within byte uint8_t pixelValue = (rowBuffer[byteIdx] >> bitShift) & 0x03; - drawPixelWithRenderMode(renderer, x + col, destY, pixelValue); + pw.writePixel(x + col, pixelValue); } } diff --git a/lib/Epub/Epub/converters/DirectPixelWriter.h b/lib/Epub/Epub/converters/DirectPixelWriter.h new file mode 100644 index 00000000..3c742871 --- /dev/null +++ b/lib/Epub/Epub/converters/DirectPixelWriter.h @@ -0,0 +1,156 @@ +#pragma once + +#include +#include +#include + +// Direct framebuffer writer that eliminates per-pixel overhead from the image +// rendering hot path. Pre-computes orientation transform as linear coefficients +// and caches render-mode state so the inner loop is: one multiply, one add, +// one shift, and one AND per pixel — no branches, no method calls. +// +// Caller is responsible for ensuring (outX, outY) are within screen bounds. +// ImageBlock::render() already validates this before entering the pixel loop, +// and the JPEG/PNG callbacks pre-clamp destination ranges to screen bounds. +struct DirectPixelWriter { + uint8_t* fb; + GfxRenderer::RenderMode mode; + + // Orientation is collapsed into a linear transform: + // phyX = phyXBase + x * phyXStepX + y * phyXStepY + // phyY = phyYBase + x * phyYStepX + y * phyYStepY + int phyXBase, phyYBase; + int phyXStepX, phyYStepX; // per logical-X step + int phyXStepY, phyYStepY; // per logical-Y step + + // Row-precomputed: the Y-dependent portion of the physical coords + int rowPhyXBase, rowPhyYBase; + + void init(GfxRenderer& renderer) { + fb = renderer.getFrameBuffer(); + mode = renderer.getRenderMode(); + + switch (renderer.getOrientation()) { + case GfxRenderer::Portrait: + // phyX = y, phyY = (DISPLAY_HEIGHT-1) - x + phyXBase = 0; + phyYBase = HalDisplay::DISPLAY_HEIGHT - 1; + phyXStepX = 0; + phyYStepX = -1; + phyXStepY = 1; + phyYStepY = 0; + break; + case GfxRenderer::LandscapeClockwise: + // phyX = (DISPLAY_WIDTH-1) - x, phyY = (DISPLAY_HEIGHT-1) - y + phyXBase = HalDisplay::DISPLAY_WIDTH - 1; + phyYBase = HalDisplay::DISPLAY_HEIGHT - 1; + phyXStepX = -1; + phyYStepX = 0; + phyXStepY = 0; + phyYStepY = -1; + break; + case GfxRenderer::PortraitInverted: + // phyX = (DISPLAY_WIDTH-1) - y, phyY = x + phyXBase = HalDisplay::DISPLAY_WIDTH - 1; + phyYBase = 0; + phyXStepX = 0; + phyYStepX = 1; + phyXStepY = -1; + phyYStepY = 0; + break; + case GfxRenderer::LandscapeCounterClockwise: + // phyX = x, phyY = y + phyXBase = 0; + phyYBase = 0; + phyXStepX = 1; + phyYStepX = 0; + phyXStepY = 0; + phyYStepY = 1; + break; + default: + // Fallback to LandscapeCounterClockwise (identity transform) + phyXBase = 0; + phyYBase = 0; + phyXStepX = 1; + phyYStepX = 0; + phyXStepY = 0; + phyYStepY = 1; + break; + } + } + + // Call once per row before the column loop. + // Pre-computes the Y-dependent portion so writePixel() only needs the X part. + inline void beginRow(int logicalY) { + rowPhyXBase = phyXBase + logicalY * phyXStepY; + rowPhyYBase = phyYBase + logicalY * phyYStepY; + } + + // Write a single 2-bit dithered pixel value to the framebuffer. + // Must be called after beginRow() for the current row. + // No bounds checking — caller guarantees coordinates are valid. + inline void writePixel(int logicalX, uint8_t pixelValue) const { + // Determine whether to draw based on render mode + bool draw; + bool state; + switch (mode) { + case GfxRenderer::BW: + draw = (pixelValue < 3); + state = true; + break; + case GfxRenderer::GRAYSCALE_MSB: + draw = (pixelValue == 1 || pixelValue == 2); + state = false; + break; + case GfxRenderer::GRAYSCALE_LSB: + draw = (pixelValue == 1); + state = false; + break; + default: + return; + } + + if (!draw) return; + + const int phyX = rowPhyXBase + logicalX * phyXStepX; + const int phyY = rowPhyYBase + logicalX * phyYStepX; + + const uint16_t byteIndex = phyY * HalDisplay::DISPLAY_WIDTH_BYTES + (phyX >> 3); + const uint8_t bitMask = 1 << (7 - (phyX & 7)); + + if (state) { + fb[byteIndex] &= ~bitMask; // Clear bit (draw black) + } else { + fb[byteIndex] |= bitMask; // Set bit (draw white) + } + } +}; + +// Direct cache writer that eliminates per-pixel overhead from PixelCache::setPixel(). +// Pre-computes row pointer so the inner loop is just byte index + bit manipulation. +// +// Caller guarantees coordinates are within cache bounds. +struct DirectCacheWriter { + uint8_t* buffer; + int bytesPerRow; + int originX; + uint8_t* rowPtr; // Pre-computed for current row + + void init(uint8_t* cacheBuffer, int cacheBytesPerRow, int cacheOriginX) { + buffer = cacheBuffer; + bytesPerRow = cacheBytesPerRow; + originX = cacheOriginX; + rowPtr = nullptr; + } + + // Call once per row before the column loop. + inline void beginRow(int screenY, int cacheOriginY) { rowPtr = buffer + (screenY - cacheOriginY) * bytesPerRow; } + + // Write a 2-bit pixel value. No bounds checking. + inline void writePixel(int screenX, uint8_t value) const { + const int localX = screenX - originX; + const int byteIdx = localX >> 2; // localX / 4 + const int bitShift = 6 - (localX & 3) * 2; // MSB first: pixel 0 at bits 6-7 + rowPtr[byteIdx] = (rowPtr[byteIdx] & ~(0x03 << bitShift)) | ((value & 0x03) << bitShift); + } +}; diff --git a/lib/Epub/Epub/converters/DitherUtils.h b/lib/Epub/Epub/converters/DitherUtils.h index ec14a332..ec63a768 100644 --- a/lib/Epub/Epub/converters/DitherUtils.h +++ b/lib/Epub/Epub/converters/DitherUtils.h @@ -1,6 +1,5 @@ #pragma once -#include #include // 4x4 Bayer matrix for ordered dithering @@ -26,15 +25,3 @@ inline uint8_t applyBayerDither4Level(uint8_t gray, int x, int y) { if (adjusted < 192) return 2; return 3; } - -// Draw a pixel respecting the current render mode for grayscale support -inline void drawPixelWithRenderMode(GfxRenderer& renderer, int x, int y, uint8_t pixelValue) { - GfxRenderer::RenderMode renderMode = renderer.getRenderMode(); - if (renderMode == GfxRenderer::BW && pixelValue < 3) { - renderer.drawPixel(x, y, true); - } else if (renderMode == GfxRenderer::GRAYSCALE_MSB && (pixelValue == 1 || pixelValue == 2)) { - renderer.drawPixel(x, y, false); - } else if (renderMode == GfxRenderer::GRAYSCALE_LSB && pixelValue == 1) { - renderer.drawPixel(x, y, false); - } -} diff --git a/lib/Epub/Epub/converters/JpegToFramebufferConverter.cpp b/lib/Epub/Epub/converters/JpegToFramebufferConverter.cpp index 52aa6aec..83e6b547 100644 --- a/lib/Epub/Epub/converters/JpegToFramebufferConverter.cpp +++ b/lib/Epub/Epub/converters/JpegToFramebufferConverter.cpp @@ -9,6 +9,7 @@ #include #include +#include "DirectPixelWriter.h" #include "DitherUtils.h" #include "PixelCache.h" @@ -167,10 +168,21 @@ int jpegDrawCallback(JPEGDRAW* pDraw) { if (dstYStart >= dstYEnd || dstXStart >= dstXEnd) return 1; + // Pre-compute orientation and render-mode state once per callback invocation + DirectPixelWriter pw; + pw.init(renderer); + + DirectCacheWriter cw; + if (caching) { + cw.init(ctx->cache.buffer, ctx->cache.bytesPerRow, ctx->cache.originX); + } + // === 1:1 fast path: no scaling math === if (fineScaleFP == FP_ONE) { for (int dstY = dstYStart; dstY < dstYEnd; dstY++) { const int outY = cfgY + dstY; + pw.beginRow(outY); + if (caching) cw.beginRow(outY, ctx->config->y); const uint8_t* row = &pixels[(dstY - blockY) * stride]; for (int dstX = dstXStart; dstX < dstXEnd; dstX++) { const int outX = cfgX + dstX; @@ -182,8 +194,8 @@ int jpegDrawCallback(JPEGDRAW* pDraw) { dithered = gray / 85; if (dithered > 3) dithered = 3; } - drawPixelWithRenderMode(renderer, outX, outY, dithered); - if (caching) ctx->cache.setPixel(outX, outY, dithered); + pw.writePixel(outX, dithered); + if (caching) cw.writePixel(outX, dithered); } } return 1; @@ -203,6 +215,8 @@ int jpegDrawCallback(JPEGDRAW* pDraw) { for (int dstY = dstYStart; dstY < dstYEnd; dstY++) { const int outY = cfgY + dstY; + pw.beginRow(outY); + if (caching) cw.beginRow(outY, ctx->config->y); const int32_t srcFyFP = dstY * invScaleFP; const int32_t fy = srcFyFP & FP_MASK; const int32_t fyInv = FP_ONE - fy; @@ -239,8 +253,8 @@ int jpegDrawCallback(JPEGDRAW* pDraw) { dithered = gray / 85; if (dithered > 3) dithered = 3; } - drawPixelWithRenderMode(renderer, outX, outY, dithered); - if (caching) ctx->cache.setPixel(outX, outY, dithered); + pw.writePixel(outX, dithered); + if (caching) cw.writePixel(outX, dithered); } // Interior (no X boundary checks — lx0 and lx0+1 guaranteed in bounds) @@ -262,8 +276,8 @@ int jpegDrawCallback(JPEGDRAW* pDraw) { dithered = gray / 85; if (dithered > 3) dithered = 3; } - drawPixelWithRenderMode(renderer, outX, outY, dithered); - if (caching) ctx->cache.setPixel(outX, outY, dithered); + pw.writePixel(outX, dithered); + if (caching) cw.writePixel(outX, dithered); } // Right edge (with X boundary clamping) @@ -288,8 +302,8 @@ int jpegDrawCallback(JPEGDRAW* pDraw) { dithered = gray / 85; if (dithered > 3) dithered = 3; } - drawPixelWithRenderMode(renderer, outX, outY, dithered); - if (caching) ctx->cache.setPixel(outX, outY, dithered); + pw.writePixel(outX, dithered); + if (caching) cw.writePixel(outX, dithered); } } return 1; @@ -298,6 +312,8 @@ int jpegDrawCallback(JPEGDRAW* pDraw) { // === Nearest-neighbor (downscale: fineScale < 1.0) === for (int dstY = dstYStart; dstY < dstYEnd; dstY++) { const int outY = cfgY + dstY; + pw.beginRow(outY); + if (caching) cw.beginRow(outY, ctx->config->y); const int32_t srcFyFP = dstY * invScaleFP; int ly = (srcFyFP >> FP_SHIFT) - blockY; if (ly < 0) ly = 0; @@ -319,8 +335,8 @@ int jpegDrawCallback(JPEGDRAW* pDraw) { dithered = gray / 85; if (dithered > 3) dithered = 3; } - drawPixelWithRenderMode(renderer, outX, outY, dithered); - if (caching) ctx->cache.setPixel(outX, outY, dithered); + pw.writePixel(outX, dithered); + if (caching) cw.writePixel(outX, dithered); } } diff --git a/lib/Epub/Epub/converters/PngToFramebufferConverter.cpp b/lib/Epub/Epub/converters/PngToFramebufferConverter.cpp index 6ba1bcdf..c80cb23a 100644 --- a/lib/Epub/Epub/converters/PngToFramebufferConverter.cpp +++ b/lib/Epub/Epub/converters/PngToFramebufferConverter.cpp @@ -9,6 +9,7 @@ #include #include +#include "DirectPixelWriter.h" #include "DitherUtils.h" #include "PixelCache.h" @@ -207,6 +208,17 @@ int pngDrawCallback(PNGDRAW* pDraw) { bool useDithering = ctx->config->useDithering; bool caching = ctx->caching; + // Pre-compute orientation and render-mode state once per row + DirectPixelWriter pw; + pw.init(*ctx->renderer); + pw.beginRow(outY); + + DirectCacheWriter cw; + if (caching) { + cw.init(ctx->cache.buffer, ctx->cache.bytesPerRow, ctx->cache.originX); + cw.beginRow(outY, ctx->config->y); + } + int srcX = 0; int error = 0; @@ -222,8 +234,8 @@ int pngDrawCallback(PNGDRAW* pDraw) { ditheredGray = gray / 85; if (ditheredGray > 3) ditheredGray = 3; } - drawPixelWithRenderMode(*ctx->renderer, outX, outY, ditheredGray); - if (caching) ctx->cache.setPixel(outX, outY, ditheredGray); + pw.writePixel(outX, ditheredGray); + if (caching) cw.writePixel(outX, ditheredGray); } // Bresenham-style stepping: advance srcX based on ratio srcWidth/dstWidth @@ -356,10 +368,18 @@ bool PngToFramebufferConverter::decodeToFramebuffer(const std::string& imagePath return false; } - // Allocate cache buffer using SCALED dimensions + // Allocate cache buffer using SCALED dimensions. + // PNG decode is fast enough (~135ms for 400x600) that caching provides minimal benefit + // for larger images, while the cache buffer competes with the 44KB PNG decoder for heap. + // Skip caching when the buffer would exceed the framebuffer size (48KB). + static constexpr size_t PNG_MAX_CACHE_BYTES = 48000; ctx.caching = !config.cachePath.empty(); if (ctx.caching) { - if (!ctx.cache.allocate(ctx.dstWidth, ctx.dstHeight, config.x, config.y)) { + size_t cacheSize = (size_t)((ctx.dstWidth + 3) / 4) * ctx.dstHeight; + if (cacheSize > PNG_MAX_CACHE_BYTES) { + LOG_DBG("PNG", "Skipping cache: %zu bytes exceeds PNG limit (%zu)", cacheSize, PNG_MAX_CACHE_BYTES); + ctx.caching = false; + } else if (!ctx.cache.allocate(ctx.dstWidth, ctx.dstHeight, config.x, config.y)) { LOG_ERR("PNG", "Failed to allocate cache buffer, continuing without caching"); ctx.caching = false; }