## Purpose This PR includes some preparatory changes that are needed for an upcoming performant CJK font feature. The changes have no impact on render time and heap allocation for latin text. **Despite this, I think these changes stand on their own as a better font compression/decompression implementation.** ## Summary - Font decompressor rewrite: Replaced the 4-slot LRU group cache with a two-tier system — a page buffer (glyphs prewarmed before rendering begins) and a hot-group fallback (last decompressed group retained for non-prewarmed glyphs). - Byte-aligned compressed bitmap format: Glyph bitmaps within compressed groups are now stored row-padded rather than tightly packed before DEFLATE compression, improving compression ratios by making identical pixel rows produce identical byte patterns. Glyphs are compacted back to packed format on demand at render time. Reduces flash size by 155 KB. - Page prewarm system: Added `Page::collectText` and `Page::getDominantStyle` to extract per-style glyph requirements before rendering, and `GfxRenderer::prewarmFontCache` to pre-decompress only the groups needed for the dominant style — eliminating mid-render decompression for the common case. - UTF-8 robustness fixes: `utf8NextCodepoint` now validates continuation bytes and returns a replacement glyph on malformed input; `ChapterHtmlSlimParser` correctly preserves incomplete multi-byte sequences across word-buffer flush boundaries rather than splitting them. --- ### AI Usage While CrossPoint doesn't have restrictions on AI tools in contributing, please be transparent about their usage as it helps set the right context for reviewers. Did you use AI tools to help write this code? _**YES**_ Architecture and design was done by me, refined a bit by Claude. Code mostly by Claude, but not entirely.
269 lines
11 KiB
Python
269 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Round-trip verification for compressed font headers.
|
|
|
|
Parses each generated .h file in the given directory, identifies compressed fonts
|
|
(those with a Groups array), decompresses each group (byte-aligned bitmap format),
|
|
compacts to packed format, and verifies the data matches expected glyph sizes.
|
|
|
|
Supports both contiguous-group fonts (Latin) and frequency-grouped fonts (CJK)
|
|
with glyphToGroup mapping arrays.
|
|
"""
|
|
import math
|
|
import os
|
|
import re
|
|
import sys
|
|
import zlib
|
|
|
|
|
|
def parse_hex_array(text):
|
|
"""Extract bytes from a C hex array string like '{ 0xAB, 0xCD, ... }'"""
|
|
hex_vals = re.findall(r'0x([0-9A-Fa-f]{2})', text)
|
|
return bytes(int(h, 16) for h in hex_vals)
|
|
|
|
|
|
def parse_uint8_array(text):
|
|
"""Extract uint8/uint16 values from a C array string like '{ 0, 1, 0xFF, ... }'"""
|
|
return [int(v, 0) for v in re.findall(r'\b0x[0-9A-Fa-f]+\b|\b\d+\b', text)]
|
|
|
|
|
|
def parse_groups(text):
|
|
"""Parse EpdFontGroup array entries: { compressedOffset, compressedSize, uncompressedSize, glyphCount, firstGlyphIndex }"""
|
|
groups = []
|
|
for match in re.finditer(r'\{\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\}', text):
|
|
groups.append({
|
|
'compressedOffset': int(match.group(1)),
|
|
'compressedSize': int(match.group(2)),
|
|
'uncompressedSize': int(match.group(3)),
|
|
'glyphCount': int(match.group(4)),
|
|
'firstGlyphIndex': int(match.group(5)),
|
|
})
|
|
return groups
|
|
|
|
|
|
def parse_glyphs(text):
|
|
"""Parse EpdGlyph array entries: { width, height, advanceX, left, top, dataLength, dataOffset }"""
|
|
glyphs = []
|
|
for match in re.finditer(r'\{\s*(-?\d+)\s*,\s*(-?\d+)\s*,\s*(-?\d+)\s*,\s*(-?\d+)\s*,\s*(-?\d+)\s*,\s*(-?\d+)\s*,\s*(-?\d+)\s*\}', text):
|
|
glyphs.append({
|
|
'width': int(match.group(1)),
|
|
'height': int(match.group(2)),
|
|
'advanceX': int(match.group(3)),
|
|
'left': int(match.group(4)),
|
|
'top': int(match.group(5)),
|
|
'dataLength': int(match.group(6)),
|
|
'dataOffset': int(match.group(7)),
|
|
})
|
|
return glyphs
|
|
|
|
|
|
def get_group_glyph_indices(group, group_index, glyphs, glyph_to_group):
|
|
"""Get the ordered list of glyph indices belonging to a group."""
|
|
if glyph_to_group is not None:
|
|
# Frequency-grouped: scan all glyphs
|
|
return [i for i in range(len(glyphs)) if glyph_to_group[i] == group_index]
|
|
else:
|
|
# Contiguous: sequential from firstGlyphIndex
|
|
first = group['firstGlyphIndex']
|
|
return list(range(first, first + group['glyphCount']))
|
|
|
|
|
|
def compact_aligned_to_packed(aligned_data, width, height):
|
|
"""Convert byte-aligned 2-bit bitmap to packed format (reverse of to_byte_aligned).
|
|
|
|
In byte-aligned format, each row starts at a byte boundary.
|
|
In packed format, pixels flow continuously across row boundaries (4 pixels/byte).
|
|
"""
|
|
if width == 0 or height == 0:
|
|
return b''
|
|
packed_size = math.ceil(width * height / 4)
|
|
packed = bytearray(packed_size)
|
|
row_stride = (width + 3) // 4 # bytes per byte-aligned row
|
|
|
|
for y in range(height):
|
|
for x in range(width):
|
|
# Read pixel from byte-aligned format (row-aligned)
|
|
aligned_byte_idx = y * row_stride + x // 4
|
|
aligned_shift = (3 - (x % 4)) * 2
|
|
pixel = (aligned_data[aligned_byte_idx] >> aligned_shift) & 0x3
|
|
|
|
# Write pixel to packed format (continuous bit stream)
|
|
packed_pos = y * width + x
|
|
packed_byte_idx = packed_pos // 4
|
|
packed_shift = (3 - (packed_pos % 4)) * 2
|
|
packed[packed_byte_idx] |= (pixel << packed_shift)
|
|
|
|
return bytes(packed)
|
|
|
|
|
|
def verify_font_file(filepath):
|
|
"""Verify a single font header file. Returns (font_name, success, message)."""
|
|
with open(filepath, 'r') as f:
|
|
content = f.read()
|
|
|
|
# Check if this is a compressed font (has Groups array)
|
|
groups_match = re.search(r'static const EpdFontGroup (\w+)Groups\[\]', content)
|
|
if not groups_match:
|
|
return (os.path.basename(filepath), None, "uncompressed, skipping")
|
|
|
|
font_name = groups_match.group(1)
|
|
|
|
# Extract bitmap data
|
|
bitmap_match = re.search(
|
|
r'static const uint8_t ' + re.escape(font_name) + r'Bitmaps\[\d+\]\s*=\s*\{([^}]+)\}',
|
|
content, re.DOTALL
|
|
)
|
|
if not bitmap_match:
|
|
return (font_name, False, "could not find Bitmaps array")
|
|
|
|
compressed_data = parse_hex_array(bitmap_match.group(1))
|
|
|
|
# Extract groups
|
|
groups_array_match = re.search(
|
|
r'static const EpdFontGroup ' + re.escape(font_name) + r'Groups\[\]\s*=\s*\{(.+?)\};',
|
|
content, re.DOTALL
|
|
)
|
|
if not groups_array_match:
|
|
return (font_name, False, "could not find Groups array")
|
|
|
|
groups = parse_groups(groups_array_match.group(1))
|
|
if not groups:
|
|
return (font_name, False, "Groups array parsed to 0 entries; check format")
|
|
|
|
# Extract glyphs
|
|
glyphs_match = re.search(
|
|
r'static const EpdGlyph ' + re.escape(font_name) + r'Glyphs\[\]\s*=\s*\{(.+?)\};',
|
|
content, re.DOTALL
|
|
)
|
|
if not glyphs_match:
|
|
return (font_name, False, "could not find Glyphs array")
|
|
|
|
glyphs = parse_glyphs(glyphs_match.group(1))
|
|
|
|
# Check for glyphToGroup array (frequency-grouped fonts)
|
|
glyph_to_group = None
|
|
g2g_match = re.search(
|
|
r'static const uint16_t ' + re.escape(font_name) + r'GlyphToGroup\[\]\s*=\s*\{(.+?)\};',
|
|
content, re.DOTALL
|
|
)
|
|
if g2g_match:
|
|
glyph_to_group = parse_uint8_array(g2g_match.group(1))
|
|
if len(glyph_to_group) != len(glyphs):
|
|
return (font_name, False, f"glyphToGroup length ({len(glyph_to_group)}) != glyph count ({len(glyphs)})")
|
|
max_group_id = max(glyph_to_group)
|
|
if max_group_id >= len(groups):
|
|
return (font_name, False, f"glyphToGroup contains group ID {max_group_id} but only {len(groups)} groups exist")
|
|
|
|
# Verify each group
|
|
for gi, group in enumerate(groups):
|
|
# Extract compressed chunk
|
|
chunk = compressed_data[group['compressedOffset']:group['compressedOffset'] + group['compressedSize']]
|
|
if len(chunk) != group['compressedSize']:
|
|
return (font_name, False, f"group {gi}: compressed data truncated (expected {group['compressedSize']}, got {len(chunk)})")
|
|
|
|
# Decompress with raw DEFLATE — result is byte-aligned data
|
|
try:
|
|
decompressed = zlib.decompress(chunk, -15)
|
|
except zlib.error as e:
|
|
return (font_name, False, f"group {gi}: decompression failed: {e}")
|
|
|
|
if len(decompressed) != group['uncompressedSize']:
|
|
return (font_name, False, f"group {gi}: size mismatch (expected {group['uncompressedSize']}, got {len(decompressed)})")
|
|
|
|
# Get glyph indices for this group
|
|
group_glyph_indices = get_group_glyph_indices(group, gi, glyphs, glyph_to_group)
|
|
if glyph_to_group is not None and len(group_glyph_indices) != group['glyphCount']:
|
|
return (font_name, False,
|
|
f"group {gi}: glyphCount {group['glyphCount']} != mapping count {len(group_glyph_indices)}")
|
|
|
|
# Walk through byte-aligned data, compact each glyph, and verify against packed format
|
|
byte_aligned_offset = 0
|
|
packed_offset = 0
|
|
|
|
for glyph_idx in group_glyph_indices:
|
|
if glyph_idx >= len(glyphs):
|
|
return (font_name, False, f"group {gi}: glyph index {glyph_idx} out of range")
|
|
glyph = glyphs[glyph_idx]
|
|
width = glyph['width']
|
|
height = glyph['height']
|
|
|
|
if width == 0 or height == 0:
|
|
# Zero-size glyphs should have dataOffset == current packed_offset and dataLength == 0
|
|
if glyph['dataOffset'] != packed_offset:
|
|
return (font_name, False, f"group {gi}, glyph {glyph_idx}: zero-size glyph dataOffset {glyph['dataOffset']} != expected packed offset {packed_offset}")
|
|
if glyph['dataLength'] != 0:
|
|
return (font_name, False, f"group {gi}, glyph {glyph_idx}: zero-size glyph dataLength {glyph['dataLength']} != expected 0")
|
|
continue
|
|
|
|
aligned_size = ((width + 3) // 4) * height
|
|
packed_size = math.ceil(width * height / 4)
|
|
|
|
# Verify packed offset and size match glyph metadata
|
|
if glyph['dataOffset'] != packed_offset:
|
|
return (font_name, False, f"group {gi}, glyph {glyph_idx}: dataOffset {glyph['dataOffset']} != expected packed offset {packed_offset}")
|
|
if glyph['dataLength'] != packed_size:
|
|
return (font_name, False, f"group {gi}, glyph {glyph_idx}: dataLength {glyph['dataLength']} != expected packed length {packed_size} "
|
|
f"(width={width}, height={height})")
|
|
|
|
# Extract byte-aligned data for this glyph
|
|
if byte_aligned_offset + aligned_size > len(decompressed):
|
|
return (font_name, False, f"group {gi}, glyph {glyph_idx}: byte-aligned data extends beyond decompressed buffer "
|
|
f"(offset={byte_aligned_offset}, size={aligned_size}, buf_size={len(decompressed)})")
|
|
|
|
aligned_glyph = decompressed[byte_aligned_offset:byte_aligned_offset + aligned_size]
|
|
|
|
# Compact to packed and verify pixel values are valid (0-3 for 2-bit)
|
|
packed_glyph = compact_aligned_to_packed(aligned_glyph, width, height)
|
|
if len(packed_glyph) != packed_size:
|
|
return (font_name, False, f"group {gi}, glyph {glyph_idx}: compacted size {len(packed_glyph)} != expected {packed_size}")
|
|
|
|
byte_aligned_offset += aligned_size
|
|
packed_offset += packed_size
|
|
|
|
# Verify total byte-aligned size matches uncompressedSize
|
|
if byte_aligned_offset != group['uncompressedSize']:
|
|
return (font_name, False, f"group {gi}: total byte-aligned size {byte_aligned_offset} != uncompressedSize {group['uncompressedSize']}")
|
|
|
|
extra_info = ""
|
|
if glyph_to_group is not None:
|
|
extra_info = " (frequency-grouped)"
|
|
return (font_name, True, f"{len(groups)} groups, {len(glyphs)} glyphs OK{extra_info}")
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) < 2:
|
|
print(f"Usage: {sys.argv[0]} <font_headers_directory>", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
font_dir = sys.argv[1]
|
|
if not os.path.isdir(font_dir):
|
|
print(f"Error: {font_dir} is not a directory", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
files = sorted(f for f in os.listdir(font_dir) if f.endswith('.h') and f != 'all.h')
|
|
passed = 0
|
|
failed = 0
|
|
skipped = 0
|
|
|
|
for filename in files:
|
|
filepath = os.path.join(font_dir, filename)
|
|
_font_name, success, message = verify_font_file(filepath)
|
|
|
|
if success is None:
|
|
skipped += 1
|
|
elif success:
|
|
passed += 1
|
|
print(f" PASS: {filename} ({message})")
|
|
else:
|
|
failed += 1
|
|
print(f" FAIL: {filename} - {message}")
|
|
|
|
print(f"\nResults: {passed} passed, {failed} failed, {skipped} skipped (uncompressed)")
|
|
|
|
if failed > 0:
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|