perf: optimize drawPixel() (#748)

## Summary

Ref https://github.com/crosspoint-reader/crosspoint-reader/pull/737

This PR further reduce ~25ms from rendering time, testing inside the
Setting screen:

```
master:
[68440] [GFX] Time = 73 ms from clearScreen to displayBuffer

PR:
[97806] [GFX] Time = 47 ms from clearScreen to displayBuffer
```

And in extreme case (fill the entire screen with black or gray color):

```
master:
[1125] [   ] Test fillRectDither drawn in 327 ms
[1347] [   ] Test fillRect drawn in 222 ms

PR:
[1334] [   ] Test fillRectDither drawn in 225 ms
[1455] [   ] Test fillRect drawn in 121 ms
```

Note that
https://github.com/crosspoint-reader/crosspoint-reader/pull/737 is NOT
applied on top of this PR. But with 2 of them combined, it should reduce
from 47ms --> 42ms

## Details

This PR based on the fact that function calls are costly if the function
is small enough. For example, this simple call:

```
  int rotatedX = 0;
  int rotatedY = 0;
  rotateCoordinates(x, y, &rotatedX, &rotatedY);
```

Generated assembly code:

<img width="771" height="215" alt="image"
src="https://github.com/user-attachments/assets/37991659-3304-41c3-a3b2-fb967da53f82"
/>

This adds ~10 instructions just to prepare the registers prior to the
function call, plus some more instructions for the function's
epilogue/prologue. Inlining it removing all of these:

<img width="1471" height="832" alt="image"
src="https://github.com/user-attachments/assets/b67a22ee-93ba-4017-88ed-c973e28ec914"
/>

Of course, this optimization is not magic. It's only beneficial under 3
conditions:
- The function is small, not in size, but in terms of effective
instructions. For example, the `rotateCoordinates` is simply a jump
table, where each branch is just 3-4 inst
- The function has multiple input arguments, which requires some move to
put it onto the correct place
- The function is called very frequently (i.e. critical path)

---

### AI Usage

While CrossPoint doesn't have restrictions on AI tools in contributing,
please be transparent about their usage as it
helps set the right context for reviewers.

Did you use AI tools to help write this code? **NO**
This commit is contained in:
Xuan-Son Nguyen
2026-02-08 19:05:42 +01:00
committed by Dave Allie
parent 4f0a3aa4dd
commit 6909f127b4
3 changed files with 52 additions and 55 deletions

View File

@@ -2,61 +2,68 @@
#include <Utf8.h>
void GfxRenderer::begin() {
frameBuffer = display.getFrameBuffer();
if (!frameBuffer) {
Serial.printf("[%lu] [GFX] !! No framebuffer\n", millis());
assert(false);
}
}
void GfxRenderer::insertFont(const int fontId, EpdFontFamily font) { fontMap.insert({fontId, font}); }
void GfxRenderer::rotateCoordinates(const int x, const int y, int* rotatedX, int* rotatedY) const {
// Translate logical (x,y) coordinates to physical panel coordinates based on current orientation
// This should always be inlined for better performance
static inline void rotateCoordinates(const GfxRenderer::Orientation orientation, const int x, const int y, int* phyX,
int* phyY) {
switch (orientation) {
case Portrait: {
case GfxRenderer::Portrait: {
// Logical portrait (480x800) → panel (800x480)
// Rotation: 90 degrees clockwise
*rotatedX = y;
*rotatedY = HalDisplay::DISPLAY_HEIGHT - 1 - x;
*phyX = y;
*phyY = HalDisplay::DISPLAY_HEIGHT - 1 - x;
break;
}
case LandscapeClockwise: {
case GfxRenderer::LandscapeClockwise: {
// Logical landscape (800x480) rotated 180 degrees (swap top/bottom and left/right)
*rotatedX = HalDisplay::DISPLAY_WIDTH - 1 - x;
*rotatedY = HalDisplay::DISPLAY_HEIGHT - 1 - y;
*phyX = HalDisplay::DISPLAY_WIDTH - 1 - x;
*phyY = HalDisplay::DISPLAY_HEIGHT - 1 - y;
break;
}
case PortraitInverted: {
case GfxRenderer::PortraitInverted: {
// Logical portrait (480x800) → panel (800x480)
// Rotation: 90 degrees counter-clockwise
*rotatedX = HalDisplay::DISPLAY_WIDTH - 1 - y;
*rotatedY = x;
*phyX = HalDisplay::DISPLAY_WIDTH - 1 - y;
*phyY = x;
break;
}
case LandscapeCounterClockwise: {
case GfxRenderer::LandscapeCounterClockwise: {
// Logical landscape (800x480) aligned with panel orientation
*rotatedX = x;
*rotatedY = y;
*phyX = x;
*phyY = y;
break;
}
}
}
// IMPORTANT: This function is in critical rendering path and is called for every pixel. Please keep it as simple and
// efficient as possible.
void GfxRenderer::drawPixel(const int x, const int y, const bool state) const {
uint8_t* frameBuffer = display.getFrameBuffer();
int phyX = 0;
int phyY = 0;
// Early return if no framebuffer is set
if (!frameBuffer) {
Serial.printf("[%lu] [GFX] !! No framebuffer\n", millis());
return;
}
int rotatedX = 0;
int rotatedY = 0;
rotateCoordinates(x, y, &rotatedX, &rotatedY);
// Note: this call should be inlined for better performance
rotateCoordinates(orientation, x, y, &phyX, &phyY);
// Bounds checking against physical panel dimensions
if (rotatedX < 0 || rotatedX >= HalDisplay::DISPLAY_WIDTH || rotatedY < 0 || rotatedY >= HalDisplay::DISPLAY_HEIGHT) {
Serial.printf("[%lu] [GFX] !! Outside range (%d, %d) -> (%d, %d)\n", millis(), x, y, rotatedX, rotatedY);
if (phyX < 0 || phyX >= HalDisplay::DISPLAY_WIDTH || phyY < 0 || phyY >= HalDisplay::DISPLAY_HEIGHT) {
Serial.printf("[%lu] [GFX] !! Outside range (%d, %d) -> (%d, %d)\n", millis(), x, y, phyX, phyY);
return;
}
// Calculate byte position and bit position
const uint16_t byteIndex = rotatedY * HalDisplay::DISPLAY_WIDTH_BYTES + (rotatedX / 8);
const uint8_t bitPosition = 7 - (rotatedX % 8); // MSB first
const uint16_t byteIndex = phyY * HalDisplay::DISPLAY_WIDTH_BYTES + (phyX / 8);
const uint8_t bitPosition = 7 - (phyX % 8); // MSB first
if (state) {
frameBuffer[byteIndex] &= ~(1 << bitPosition); // Clear bit
@@ -376,7 +383,7 @@ void GfxRenderer::fillRoundedRect(const int x, const int y, const int width, con
void GfxRenderer::drawImage(const uint8_t bitmap[], const int x, const int y, const int width, const int height) const {
int rotatedX = 0;
int rotatedY = 0;
rotateCoordinates(x, y, &rotatedX, &rotatedY);
rotateCoordinates(orientation, x, y, &rotatedX, &rotatedY);
// Rotate origin corner
switch (orientation) {
case Portrait:
@@ -632,20 +639,23 @@ void GfxRenderer::fillPolygon(const int* xPoints, const int* yPoints, int numPoi
free(nodeX);
}
void GfxRenderer::clearScreen(const uint8_t color) const { display.clearScreen(color); }
// For performance measurement (using static to allow "const" methods)
static unsigned long start_ms = 0;
void GfxRenderer::clearScreen(const uint8_t color) const {
start_ms = millis();
display.clearScreen(color);
}
void GfxRenderer::invertScreen() const {
uint8_t* buffer = display.getFrameBuffer();
if (!buffer) {
Serial.printf("[%lu] [GFX] !! No framebuffer in invertScreen\n", millis());
return;
}
for (int i = 0; i < HalDisplay::BUFFER_SIZE; i++) {
buffer[i] = ~buffer[i];
frameBuffer[i] = ~frameBuffer[i];
}
}
void GfxRenderer::displayBuffer(const HalDisplay::RefreshMode refreshMode) const {
auto elapsed = millis() - start_ms;
Serial.printf("[%lu] [GFX] Time = %lu ms from clearScreen to displayBuffer\n", millis(), elapsed);
display.displayBuffer(refreshMode, fadingFix);
}
@@ -829,16 +839,16 @@ void GfxRenderer::drawTextRotated90CW(const int fontId, const int x, const int y
}
}
uint8_t* GfxRenderer::getFrameBuffer() const { return display.getFrameBuffer(); }
uint8_t* GfxRenderer::getFrameBuffer() const { return frameBuffer; }
size_t GfxRenderer::getBufferSize() { return HalDisplay::BUFFER_SIZE; }
// unused
// void GfxRenderer::grayscaleRevert() const { display.grayscaleRevert(); }
void GfxRenderer::copyGrayscaleLsbBuffers() const { display.copyGrayscaleLsbBuffers(display.getFrameBuffer()); }
void GfxRenderer::copyGrayscaleLsbBuffers() const { display.copyGrayscaleLsbBuffers(frameBuffer); }
void GfxRenderer::copyGrayscaleMsbBuffers() const { display.copyGrayscaleMsbBuffers(display.getFrameBuffer()); }
void GfxRenderer::copyGrayscaleMsbBuffers() const { display.copyGrayscaleMsbBuffers(frameBuffer); }
void GfxRenderer::displayGrayBuffer() const { display.displayGrayBuffer(fadingFix); }
@@ -858,12 +868,6 @@ void GfxRenderer::freeBwBufferChunks() {
* Returns true if buffer was stored successfully, false if allocation failed.
*/
bool GfxRenderer::storeBwBuffer() {
const uint8_t* frameBuffer = display.getFrameBuffer();
if (!frameBuffer) {
Serial.printf("[%lu] [GFX] !! No framebuffer in storeBwBuffer\n", millis());
return false;
}
// Allocate and copy each chunk
for (size_t i = 0; i < BW_BUFFER_NUM_CHUNKS; i++) {
// Check if any chunks are already allocated
@@ -913,13 +917,6 @@ void GfxRenderer::restoreBwBuffer() {
return;
}
uint8_t* frameBuffer = display.getFrameBuffer();
if (!frameBuffer) {
Serial.printf("[%lu] [GFX] !! No framebuffer in restoreBwBuffer\n", millis());
freeBwBufferChunks();
return;
}
for (size_t i = 0; i < BW_BUFFER_NUM_CHUNKS; i++) {
// Check if chunk is missing
if (!bwBufferChunks[i]) {
@@ -943,7 +940,6 @@ void GfxRenderer::restoreBwBuffer() {
* Use this when BW buffer was re-rendered instead of stored/restored.
*/
void GfxRenderer::cleanupGrayscaleWithFrameBuffer() const {
uint8_t* frameBuffer = display.getFrameBuffer();
if (frameBuffer) {
display.cleanupGrayscaleBuffers(frameBuffer);
}

View File

@@ -33,12 +33,12 @@ class GfxRenderer {
RenderMode renderMode;
Orientation orientation;
bool fadingFix;
uint8_t* frameBuffer = nullptr;
uint8_t* bwBufferChunks[BW_BUFFER_NUM_CHUNKS] = {nullptr};
std::map<int, EpdFontFamily> fontMap;
void renderChar(const EpdFontFamily& fontFamily, uint32_t cp, int* x, const int* y, bool pixelState,
EpdFontFamily::Style style) const;
void freeBwBufferChunks();
void rotateCoordinates(int x, int y, int* rotatedX, int* rotatedY) const;
template <Color color>
void drawPixelDither(int x, int y) const;
template <Color color>
@@ -55,6 +55,7 @@ class GfxRenderer {
static constexpr int VIEWABLE_MARGIN_LEFT = 3;
// Setup
void begin(); // must be called right after display.begin()
void insertFont(int fontId, EpdFontFamily font);
// Orientation control (affects logical width/height and coordinate transforms)
@@ -72,6 +73,7 @@ class GfxRenderer {
// void displayWindow(int x, int y, int width, int height) const;
void invertScreen() const;
void clearScreen(uint8_t color = 0xFF) const;
void getOrientedViewableTRBL(int* outTop, int* outRight, int* outBottom, int* outLeft) const;
// Drawing
void drawPixel(int x, int y, bool state = true) const;
@@ -125,6 +127,4 @@ class GfxRenderer {
// Low level functions
uint8_t* getFrameBuffer() const;
static size_t getBufferSize();
void grayscaleRevert() const;
void getOrientedViewableTRBL(int* outTop, int* outRight, int* outBottom, int* outLeft) const;
};

View File

@@ -253,6 +253,7 @@ void onGoHome() {
void setupDisplayAndFonts() {
display.begin();
renderer.begin();
Serial.printf("[%lu] [ ] Display initialized\n", millis());
renderer.insertFont(BOOKERLY_14_FONT_ID, bookerly14FontFamily);
#ifndef OMIT_FONTS