## Summary **What is the goal of this PR?** Compress reader font bitmaps to reduce flash usage by 30.7%. **What changes are included?** - New `EpdFontGroup` struct and extended `EpdFontData` with `groups`/`groupCount` fields - `--compress` flag in `fontconvert.py`: groups glyphs (ASCII base group + groups of 8) and compresses each with raw DEFLATE - `FontDecompressor` class with 4-slot LRU cache for on-demand decompression during rendering - `GfxRenderer` transparently routes bitmap access through `getGlyphBitmap()` (compressed or direct flash) - Uses `uzlib` for decompression with minimal heap overhead. - 48 reader fonts (Bookerly, NotoSans 12-18pt, OpenDyslexic) regenerated with compression; 5 UI fonts unchanged - Round-trip verification script (`verify_compression.py`) runs as part of font generation ## Additional Context ## Flash & RAM | | baseline | font-compression | Difference | |--|--------|-----------------|------------| | Flash (ELF) | 6,302,476 B (96.2%) | 4,365,022 B (66.6%) | -1,937,454 B (-30.7%) | | firmware.bin | 6,468,192 B | 4,531,008 B | -1,937,184 B (-29.9%) | | RAM | 101,700 B (31.0%) | 103,076 B (31.5%) | +1,376 B (+0.5%) | ## Script-Based Grouping (Cold Cache) Comparison of uncompressed baseline vs script-based group compression (4-slot LRU cache, cleared each page). Glyphs are grouped by Unicode block (ASCII, Latin-1, Latin Extended-A, Combining Marks, Cyrillic, General Punctuation, etc.) instead of sequential groups of 8. ### Render Time | | Baseline | Compressed (cold cache) | Difference | |---|---|---|---| | **Median** | 414.9 ms | 431.6 ms | +16.7 ms (+4.0%) | | **Pages** | 37 | 37 | | ### Memory Usage | | Baseline | Compressed (cold cache) | Difference | |---|---|---|---| | **Heap free (median)** | 187.0 KB | 176.3 KB | -10.7 KB | | **Heap free (min)** | 186.0 KB | 166.5 KB | -19.5 KB | | **Largest block (median)** | 148.0 KB | 128.0 KB | -20.0 KB | | **Largest block (min)** | 148.0 KB | 120.0 KB | -28.0 KB | ### Cache Effectiveness | | Misses/page | Hit rate | |---|---|---| | **Compressed (cold cache)** | 2.1 | 99.85% | ------ ### AI Usage While CrossPoint doesn't have restrictions on AI tools in contributing, please be transparent about their usage as it helps set the right context for reviewers. Did you use AI tools to help write this code? _**YES**_ Implementation was done by Claude Code (Opus 4.6) based on a plan developed collaboratively. All generated font headers were verified with an automated round-trip decompression test. The firmware was compiled successfully but has not yet been tested on-device. --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
420 lines
14 KiB
Python
Executable File
420 lines
14 KiB
Python
Executable File
#!python3
|
||
import freetype
|
||
import zlib
|
||
import sys
|
||
import re
|
||
import math
|
||
import argparse
|
||
from collections import namedtuple
|
||
|
||
# Originally from https://github.com/vroland/epdiy
|
||
|
||
parser = argparse.ArgumentParser(description="Generate a header file from a font to be used with epdiy.")
|
||
parser.add_argument("name", action="store", help="name of the font.")
|
||
parser.add_argument("size", type=int, help="font size to use.")
|
||
parser.add_argument("fontstack", action="store", nargs='+', help="list of font files, ordered by descending priority.")
|
||
parser.add_argument("--2bit", dest="is2Bit", action="store_true", help="generate 2-bit greyscale bitmap instead of 1-bit black and white.")
|
||
parser.add_argument("--additional-intervals", dest="additional_intervals", action="append", help="Additional code point intervals to export as min,max. This argument can be repeated.")
|
||
parser.add_argument("--compress", dest="compress", action="store_true", help="Compress glyph bitmaps using DEFLATE with group-based compression.")
|
||
args = parser.parse_args()
|
||
|
||
GlyphProps = namedtuple("GlyphProps", ["width", "height", "advance_x", "left", "top", "data_length", "data_offset", "code_point"])
|
||
|
||
font_stack = [freetype.Face(f) for f in args.fontstack]
|
||
is2Bit = args.is2Bit
|
||
size = args.size
|
||
font_name = args.name
|
||
|
||
# inclusive unicode code point intervals
|
||
# must not overlap and be in ascending order
|
||
intervals = [
|
||
### Basic Latin ###
|
||
# ASCII letters, digits, punctuation, control characters
|
||
(0x0000, 0x007F),
|
||
### Latin-1 Supplement ###
|
||
# Accented characters for Western European languages
|
||
(0x0080, 0x00FF),
|
||
### Latin Extended-A ###
|
||
# Eastern European and Baltic languages
|
||
(0x0100, 0x017F),
|
||
### General Punctuation (core subset) ###
|
||
# Smart quotes, en dash, em dash, ellipsis, NO-BREAK SPACE
|
||
(0x2000, 0x206F),
|
||
### Basic Symbols From "Latin-1 + Misc" ###
|
||
# dashes, quotes, prime marks
|
||
(0x2010, 0x203A),
|
||
# misc punctuation
|
||
(0x2040, 0x205F),
|
||
# common currency symbols
|
||
(0x20A0, 0x20CF),
|
||
### Combining Diacritical Marks (minimal subset) ###
|
||
# Needed for proper rendering of many extended Latin languages
|
||
(0x0300, 0x036F),
|
||
### Greek & Coptic ###
|
||
# Used in science, maths, philosophy, some academic texts
|
||
# (0x0370, 0x03FF),
|
||
### Cyrillic ###
|
||
# Russian, Ukrainian, Bulgarian, etc.
|
||
(0x0400, 0x04FF),
|
||
### Math Symbols (common subset) ###
|
||
# Superscripts and Subscripts
|
||
(0x2070, 0x209F),
|
||
# General math operators
|
||
(0x2200, 0x22FF),
|
||
# Arrows
|
||
(0x2190, 0x21FF),
|
||
### CJK ###
|
||
# Core Unified Ideographs
|
||
# (0x4E00, 0x9FFF),
|
||
# # Extension A
|
||
# (0x3400, 0x4DBF),
|
||
# # Extension B
|
||
# (0x20000, 0x2A6DF),
|
||
# # Extension C–F
|
||
# (0x2A700, 0x2EBEF),
|
||
# # Extension G
|
||
# (0x30000, 0x3134F),
|
||
# # Hiragana
|
||
# (0x3040, 0x309F),
|
||
# # Katakana
|
||
# (0x30A0, 0x30FF),
|
||
# # Katakana Phonetic Extensions
|
||
# (0x31F0, 0x31FF),
|
||
# # Halfwidth Katakana
|
||
# (0xFF60, 0xFF9F),
|
||
# # Hangul Syllables
|
||
# (0xAC00, 0xD7AF),
|
||
# # Hangul Jamo
|
||
# (0x1100, 0x11FF),
|
||
# # Hangul Compatibility Jamo
|
||
# (0x3130, 0x318F),
|
||
# # Hangul Jamo Extended-A
|
||
# (0xA960, 0xA97F),
|
||
# # Hangul Jamo Extended-B
|
||
# (0xD7B0, 0xD7FF),
|
||
# # CJK Radicals Supplement
|
||
# (0x2E80, 0x2EFF),
|
||
# # Kangxi Radicals
|
||
# (0x2F00, 0x2FDF),
|
||
# # CJK Symbols and Punctuation
|
||
# (0x3000, 0x303F),
|
||
# # CJK Compatibility Forms
|
||
# (0xFE30, 0xFE4F),
|
||
# # CJK Compatibility Ideographs
|
||
# (0xF900, 0xFAFF),
|
||
### Specials
|
||
# Replacement Character
|
||
(0xFFFD, 0xFFFD),
|
||
]
|
||
|
||
add_ints = []
|
||
if args.additional_intervals:
|
||
add_ints = [tuple([int(n, base=0) for n in i.split(",")]) for i in args.additional_intervals]
|
||
|
||
def norm_floor(val):
|
||
return int(math.floor(val / (1 << 6)))
|
||
|
||
def norm_ceil(val):
|
||
return int(math.ceil(val / (1 << 6)))
|
||
|
||
def chunks(l, n):
|
||
for i in range(0, len(l), n):
|
||
yield l[i:i + n]
|
||
|
||
def load_glyph(code_point):
|
||
face_index = 0
|
||
while face_index < len(font_stack):
|
||
face = font_stack[face_index]
|
||
glyph_index = face.get_char_index(code_point)
|
||
if glyph_index > 0:
|
||
face.load_glyph(glyph_index, freetype.FT_LOAD_RENDER)
|
||
return face
|
||
face_index += 1
|
||
print(f"code point {code_point} ({hex(code_point)}) not found in font stack!", file=sys.stderr)
|
||
return None
|
||
|
||
unmerged_intervals = sorted(intervals + add_ints)
|
||
intervals = []
|
||
unvalidated_intervals = []
|
||
for i_start, i_end in unmerged_intervals:
|
||
if len(unvalidated_intervals) > 0 and i_start + 1 <= unvalidated_intervals[-1][1]:
|
||
unvalidated_intervals[-1] = (unvalidated_intervals[-1][0], max(unvalidated_intervals[-1][1], i_end))
|
||
continue
|
||
unvalidated_intervals.append((i_start, i_end))
|
||
|
||
for i_start, i_end in unvalidated_intervals:
|
||
start = i_start
|
||
for code_point in range(i_start, i_end + 1):
|
||
face = load_glyph(code_point)
|
||
if face is None:
|
||
if start < code_point:
|
||
intervals.append((start, code_point - 1))
|
||
start = code_point + 1
|
||
if start != i_end + 1:
|
||
intervals.append((start, i_end))
|
||
|
||
for face in font_stack:
|
||
face.set_char_size(size << 6, size << 6, 150, 150)
|
||
|
||
total_size = 0
|
||
all_glyphs = []
|
||
|
||
for i_start, i_end in intervals:
|
||
for code_point in range(i_start, i_end + 1):
|
||
face = load_glyph(code_point)
|
||
bitmap = face.glyph.bitmap
|
||
|
||
# Build out 4-bit greyscale bitmap
|
||
pixels4g = []
|
||
px = 0
|
||
for i, v in enumerate(bitmap.buffer):
|
||
y = i / bitmap.width
|
||
x = i % bitmap.width
|
||
if x % 2 == 0:
|
||
px = (v >> 4)
|
||
else:
|
||
px = px | (v & 0xF0)
|
||
pixels4g.append(px);
|
||
px = 0
|
||
# eol
|
||
if x == bitmap.width - 1 and bitmap.width % 2 > 0:
|
||
pixels4g.append(px)
|
||
px = 0
|
||
|
||
if is2Bit:
|
||
# 0-3 white, 4-7 light grey, 8-11 dark grey, 12-15 black
|
||
# Downsample to 2-bit bitmap
|
||
pixels2b = []
|
||
px = 0
|
||
pitch = (bitmap.width // 2) + (bitmap.width % 2)
|
||
for y in range(bitmap.rows):
|
||
for x in range(bitmap.width):
|
||
px = px << 2
|
||
bm = pixels4g[y * pitch + (x // 2)]
|
||
bm = (bm >> ((x % 2) * 4)) & 0xF
|
||
|
||
if bm >= 12:
|
||
px += 3
|
||
elif bm >= 8:
|
||
px += 2
|
||
elif bm >= 4:
|
||
px += 1
|
||
|
||
if (y * bitmap.width + x) % 4 == 3:
|
||
pixels2b.append(px)
|
||
px = 0
|
||
if (bitmap.width * bitmap.rows) % 4 != 0:
|
||
px = px << (4 - (bitmap.width * bitmap.rows) % 4) * 2
|
||
pixels2b.append(px)
|
||
|
||
# for y in range(bitmap.rows):
|
||
# line = ''
|
||
# for x in range(bitmap.width):
|
||
# pixelPosition = y * bitmap.width + x
|
||
# byte = pixels2b[pixelPosition // 4]
|
||
# bit_index = (3 - (pixelPosition % 4)) * 2
|
||
# line += '#' if ((byte >> bit_index) & 3) > 0 else '.'
|
||
# print(line)
|
||
# print('')
|
||
else:
|
||
# Downsample to 1-bit bitmap - treat any 2+ as black
|
||
pixelsbw = []
|
||
px = 0
|
||
pitch = (bitmap.width // 2) + (bitmap.width % 2)
|
||
for y in range(bitmap.rows):
|
||
for x in range(bitmap.width):
|
||
px = px << 1
|
||
bm = pixels4g[y * pitch + (x // 2)]
|
||
px += 1 if ((x & 1) == 0 and bm & 0xE > 0) or ((x & 1) == 1 and bm & 0xE0 > 0) else 0
|
||
|
||
if (y * bitmap.width + x) % 8 == 7:
|
||
pixelsbw.append(px)
|
||
px = 0
|
||
if (bitmap.width * bitmap.rows) % 8 != 0:
|
||
px = px << (8 - (bitmap.width * bitmap.rows) % 8)
|
||
pixelsbw.append(px)
|
||
|
||
# for y in range(bitmap.rows):
|
||
# line = ''
|
||
# for x in range(bitmap.width):
|
||
# pixelPosition = y * bitmap.width + x
|
||
# byte = pixelsbw[pixelPosition // 8]
|
||
# bit_index = 7 - (pixelPosition % 8)
|
||
# line += '#' if (byte >> bit_index) & 1 else '.'
|
||
# print(line)
|
||
# print('')
|
||
|
||
pixels = pixels2b if is2Bit else pixelsbw
|
||
|
||
# Build output data
|
||
packed = bytes(pixels)
|
||
glyph = GlyphProps(
|
||
width = bitmap.width,
|
||
height = bitmap.rows,
|
||
advance_x = norm_floor(face.glyph.advance.x),
|
||
left = face.glyph.bitmap_left,
|
||
top = face.glyph.bitmap_top,
|
||
data_length = len(packed),
|
||
data_offset = total_size,
|
||
code_point = code_point,
|
||
)
|
||
total_size += len(packed)
|
||
all_glyphs.append((glyph, packed))
|
||
|
||
# pipe seems to be a good heuristic for the "real" descender
|
||
face = load_glyph(ord('|'))
|
||
|
||
glyph_data = []
|
||
glyph_props = []
|
||
for index, glyph in enumerate(all_glyphs):
|
||
props, packed = glyph
|
||
glyph_data.extend([b for b in packed])
|
||
glyph_props.append(props)
|
||
|
||
compress = args.compress
|
||
|
||
# Build groups for compression
|
||
if compress:
|
||
# Script-based grouping: glyphs that co-occur in typical text rendering
|
||
# are grouped together for efficient LRU caching on the embedded target.
|
||
# Since glyphs are in codepoint order, glyphs in the same Unicode block
|
||
# are contiguous in the array and form natural groups.
|
||
SCRIPT_GROUP_RANGES = [
|
||
(0x0000, 0x007F), # ASCII
|
||
(0x0080, 0x00FF), # Latin-1 Supplement
|
||
(0x0100, 0x017F), # Latin Extended-A
|
||
(0x0300, 0x036F), # Combining Diacritical Marks
|
||
(0x0400, 0x04FF), # Cyrillic
|
||
(0x2000, 0x206F), # General Punctuation
|
||
(0x2070, 0x209F), # Superscripts & Subscripts
|
||
(0x20A0, 0x20CF), # Currency Symbols
|
||
(0x2190, 0x21FF), # Arrows
|
||
(0x2200, 0x22FF), # Math Operators
|
||
(0xFFFD, 0xFFFD), # Replacement Character
|
||
]
|
||
|
||
def get_script_group(code_point):
|
||
for i, (start, end) in enumerate(SCRIPT_GROUP_RANGES):
|
||
if start <= code_point <= end:
|
||
return i
|
||
return -1
|
||
|
||
groups = [] # list of (first_glyph_index, glyph_count)
|
||
current_group_id = None
|
||
group_start = 0
|
||
group_count = 0
|
||
|
||
for i, (props, packed) in enumerate(all_glyphs):
|
||
sg = get_script_group(props.code_point)
|
||
if sg != current_group_id:
|
||
if group_count > 0:
|
||
groups.append((group_start, group_count))
|
||
current_group_id = sg
|
||
group_start = i
|
||
group_count = 1
|
||
else:
|
||
group_count += 1
|
||
|
||
if group_count > 0:
|
||
groups.append((group_start, group_count))
|
||
|
||
# Compress each group
|
||
compressed_groups = [] # list of (compressed_bytes, uncompressed_size, glyph_count, first_glyph_index)
|
||
compressed_bitmap_data = []
|
||
compressed_offset = 0
|
||
|
||
# Also build modified glyph props with within-group offsets
|
||
modified_glyph_props = list(glyph_props)
|
||
|
||
for first_idx, count in groups:
|
||
# Concatenate bitmap data for this group
|
||
group_data = b''
|
||
for gi in range(first_idx, first_idx + count):
|
||
props, packed = all_glyphs[gi]
|
||
# Update glyph's dataOffset to be within-group offset
|
||
within_group_offset = len(group_data)
|
||
old_props = modified_glyph_props[gi]
|
||
modified_glyph_props[gi] = GlyphProps(
|
||
width=old_props.width,
|
||
height=old_props.height,
|
||
advance_x=old_props.advance_x,
|
||
left=old_props.left,
|
||
top=old_props.top,
|
||
data_length=old_props.data_length,
|
||
data_offset=within_group_offset,
|
||
code_point=old_props.code_point,
|
||
)
|
||
group_data += packed
|
||
|
||
# Compress with raw DEFLATE (no zlib/gzip header)
|
||
compressor = zlib.compressobj(level=9, wbits=-15)
|
||
compressed = compressor.compress(group_data) + compressor.flush()
|
||
|
||
compressed_groups.append((compressed, len(group_data), count, first_idx))
|
||
compressed_bitmap_data.extend(compressed)
|
||
compressed_offset += len(compressed)
|
||
|
||
glyph_props = modified_glyph_props
|
||
total_compressed = len(compressed_bitmap_data)
|
||
total_uncompressed = len(glyph_data)
|
||
print(f"// Compression: {total_uncompressed} -> {total_compressed} bytes ({100*total_compressed/total_uncompressed:.1f}%), {len(groups)} groups", file=sys.stderr)
|
||
|
||
print(f"""/**
|
||
* generated by fontconvert.py
|
||
* name: {font_name}
|
||
* size: {size}
|
||
* mode: {'2-bit' if is2Bit else '1-bit'}{' compressed: true' if compress else ''}
|
||
* Command used: {' '.join(sys.argv)}
|
||
*/
|
||
#pragma once
|
||
#include "EpdFontData.h"
|
||
""")
|
||
|
||
if compress:
|
||
print(f"static const uint8_t {font_name}Bitmaps[{len(compressed_bitmap_data)}] = {{")
|
||
for c in chunks(compressed_bitmap_data, 16):
|
||
print (" " + " ".join(f"0x{b:02X}," for b in c))
|
||
print ("};\n");
|
||
else:
|
||
print(f"static const uint8_t {font_name}Bitmaps[{len(glyph_data)}] = {{")
|
||
for c in chunks(glyph_data, 16):
|
||
print (" " + " ".join(f"0x{b:02X}," for b in c))
|
||
print ("};\n");
|
||
|
||
print(f"static const EpdGlyph {font_name}Glyphs[] = {{")
|
||
for i, g in enumerate(glyph_props):
|
||
print (" { " + ", ".join([f"{a}" for a in list(g[:-1])]),"},", f"// {chr(g.code_point) if g.code_point != 92 else '<backslash>'}")
|
||
print ("};\n");
|
||
|
||
print(f"static const EpdUnicodeInterval {font_name}Intervals[] = {{")
|
||
offset = 0
|
||
for i_start, i_end in intervals:
|
||
print (f" {{ 0x{i_start:X}, 0x{i_end:X}, 0x{offset:X} }},")
|
||
offset += i_end - i_start + 1
|
||
print ("};\n");
|
||
|
||
if compress:
|
||
print(f"static const EpdFontGroup {font_name}Groups[] = {{")
|
||
compressed_offset = 0
|
||
for compressed, uncompressed_size, count, first_idx in compressed_groups:
|
||
print(f" {{ {compressed_offset}, {len(compressed)}, {uncompressed_size}, {count}, {first_idx} }},")
|
||
compressed_offset += len(compressed)
|
||
print("};\n")
|
||
|
||
print(f"static const EpdFontData {font_name} = {{")
|
||
print(f" {font_name}Bitmaps,")
|
||
print(f" {font_name}Glyphs,")
|
||
print(f" {font_name}Intervals,")
|
||
print(f" {len(intervals)},")
|
||
print(f" {norm_ceil(face.size.height)},")
|
||
print(f" {norm_ceil(face.size.ascender)},")
|
||
print(f" {norm_floor(face.size.descender)},")
|
||
print(f" {'true' if is2Bit else 'false'},")
|
||
if compress:
|
||
print(f" {font_name}Groups,")
|
||
print(f" {len(compressed_groups)},")
|
||
else:
|
||
print(f" nullptr,")
|
||
print(f" 0,")
|
||
print("};")
|