crosspoint-reader-mod/lib/EpdFont/scripts/verify_compression.py

#!/usr/bin/env python3
"""
Round-trip verification for compressed font headers.

Parses each generated .h file in the given directory, identifies compressed fonts
(those with a Groups array), decompresses each group (byte-aligned bitmap format),
compacts to packed format, and verifies the data matches expected glyph sizes.

Supports both contiguous-group fonts (Latin) and frequency-grouped fonts (CJK)
with glyphToGroup mapping arrays.
"""
import math
import os
import re
import sys
import zlib


def parse_hex_array(text):
    """Extract bytes from a C hex array string like '{ 0xAB, 0xCD, ... }'"""
    hex_vals = re.findall(r'0x([0-9A-Fa-f]{2})', text)
    return bytes(int(h, 16) for h in hex_vals)


def parse_uint8_array(text):
    """Extract uint8/uint16 values from a C array string like '{ 0, 1, 0xFF, ... }'"""
    return [int(v, 0) for v in re.findall(r'\b0x[0-9A-Fa-f]+\b|\b\d+\b', text)]


def parse_groups(text):
    """Parse EpdFontGroup array entries: { compressedOffset, compressedSize, uncompressedSize, glyphCount, firstGlyphIndex }"""
    groups = []
    for match in re.finditer(r'\{\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\}', text):
        groups.append({
            'compressedOffset': int(match.group(1)),
            'compressedSize': int(match.group(2)),
            'uncompressedSize': int(match.group(3)),
            'glyphCount': int(match.group(4)),
            'firstGlyphIndex': int(match.group(5)),
        })
    return groups


def parse_glyphs(text):
    """Parse EpdGlyph array entries: { width, height, advanceX, left, top, dataLength, dataOffset }"""
    glyphs = []
    for match in re.finditer(r'\{\s*(-?\d+)\s*,\s*(-?\d+)\s*,\s*(-?\d+)\s*,\s*(-?\d+)\s*,\s*(-?\d+)\s*,\s*(-?\d+)\s*,\s*(-?\d+)\s*\}', text):
        glyphs.append({
            'width': int(match.group(1)),
            'height': int(match.group(2)),
            'advanceX': int(match.group(3)),
            'left': int(match.group(4)),
            'top': int(match.group(5)),
            'dataLength': int(match.group(6)),
            'dataOffset': int(match.group(7)),
        })
    return glyphs


def get_group_glyph_indices(group, group_index, glyphs, glyph_to_group):
    """Get the ordered list of glyph indices belonging to a group."""
    if glyph_to_group is not None:
        # Frequency-grouped: scan all glyphs
        return [i for i in range(len(glyphs)) if glyph_to_group[i] == group_index]
    else:
        # Contiguous: sequential from firstGlyphIndex
        first = group['firstGlyphIndex']
        return list(range(first, first + group['glyphCount']))


def compact_aligned_to_packed(aligned_data, width, height):
    """Convert byte-aligned 2-bit bitmap to packed format (reverse of to_byte_aligned).

    In byte-aligned format, each row starts at a byte boundary.
    In packed format, pixels flow continuously across row boundaries (4 pixels/byte).
    """
    if width == 0 or height == 0:
        return b''
    packed_size = math.ceil(width * height / 4)
    packed = bytearray(packed_size)
    row_stride = (width + 3) // 4  # bytes per byte-aligned row

    for y in range(height):
        for x in range(width):
            # Read pixel from byte-aligned format (row-aligned)
            aligned_byte_idx = y * row_stride + x // 4
            aligned_shift = (3 - (x % 4)) * 2
            pixel = (aligned_data[aligned_byte_idx] >> aligned_shift) & 0x3

            # Write pixel to packed format (continuous bit stream)
            packed_pos = y * width + x
            packed_byte_idx = packed_pos // 4
            packed_shift = (3 - (packed_pos % 4)) * 2
            packed[packed_byte_idx] |= (pixel << packed_shift)

    return bytes(packed)


def verify_font_file(filepath):
    """Verify a single font header file. Returns (font_name, success, message)."""
    with open(filepath, 'r') as f:
        content = f.read()

    # Check if this is a compressed font (has Groups array)
    groups_match = re.search(r'static const EpdFontGroup (\w+)Groups\[\]', content)
    if not groups_match:
        return (os.path.basename(filepath), None, "uncompressed, skipping")

    font_name = groups_match.group(1)

    # Extract bitmap data
    bitmap_match = re.search(
        r'static const uint8_t ' + re.escape(font_name) + r'Bitmaps\[\d+\]\s*=\s*\{([^}]+)\}',
        content, re.DOTALL
    )
    if not bitmap_match:
        return (font_name, False, "could not find Bitmaps array")

    compressed_data = parse_hex_array(bitmap_match.group(1))

    # Extract groups
    groups_array_match = re.search(
        r'static const EpdFontGroup ' + re.escape(font_name) + r'Groups\[\]\s*=\s*\{(.+?)\};',
        content, re.DOTALL
    )
    if not groups_array_match:
        return (font_name, False, "could not find Groups array")

    groups = parse_groups(groups_array_match.group(1))
    if not groups:
        return (font_name, False, "Groups array parsed to 0 entries; check format")

    # Extract glyphs
    glyphs_match = re.search(
        r'static const EpdGlyph ' + re.escape(font_name) + r'Glyphs\[\]\s*=\s*\{(.+?)\};',
        content, re.DOTALL
    )
    if not glyphs_match:
        return (font_name, False, "could not find Glyphs array")

    glyphs = parse_glyphs(glyphs_match.group(1))

    # Check for glyphToGroup array (frequency-grouped fonts)
    glyph_to_group = None
    g2g_match = re.search(
        r'static const uint16_t ' + re.escape(font_name) + r'GlyphToGroup\[\]\s*=\s*\{(.+?)\};',
        content, re.DOTALL
    )
    if g2g_match:
        glyph_to_group = parse_uint8_array(g2g_match.group(1))
        if len(glyph_to_group) != len(glyphs):
            return (font_name, False, f"glyphToGroup length ({len(glyph_to_group)}) != glyph count ({len(glyphs)})")
        max_group_id = max(glyph_to_group)
        if max_group_id >= len(groups):
            return (font_name, False, f"glyphToGroup contains group ID {max_group_id} but only {len(groups)} groups exist")

    # Verify each group
    for gi, group in enumerate(groups):
        # Extract compressed chunk
        chunk = compressed_data[group['compressedOffset']:group['compressedOffset'] + group['compressedSize']]
        if len(chunk) != group['compressedSize']:
            return (font_name, False, f"group {gi}: compressed data truncated (expected {group['compressedSize']}, got {len(chunk)})")

        # Decompress with raw DEFLATE — result is byte-aligned data
        try:
            decompressed = zlib.decompress(chunk, -15)
        except zlib.error as e:
            return (font_name, False, f"group {gi}: decompression failed: {e}")

        if len(decompressed) != group['uncompressedSize']:
            return (font_name, False, f"group {gi}: size mismatch (expected {group['uncompressedSize']}, got {len(decompressed)})")

        # Get glyph indices for this group
        group_glyph_indices = get_group_glyph_indices(group, gi, glyphs, glyph_to_group)
        if glyph_to_group is not None and len(group_glyph_indices) != group['glyphCount']:
            return (font_name, False,
                    f"group {gi}: glyphCount {group['glyphCount']} != mapping count {len(group_glyph_indices)}")

        # Walk through byte-aligned data, compact each glyph, and verify against packed format
        byte_aligned_offset = 0
        packed_offset = 0

        for glyph_idx in group_glyph_indices:
            if glyph_idx >= len(glyphs):
                return (font_name, False, f"group {gi}: glyph index {glyph_idx} out of range")
            glyph = glyphs[glyph_idx]
            width = glyph['width']
            height = glyph['height']

            if width == 0 or height == 0:
                # Zero-size glyphs should have dataOffset == current packed_offset and dataLength == 0
                if glyph['dataOffset'] != packed_offset:
                    return (font_name, False, f"group {gi}, glyph {glyph_idx}: zero-size glyph dataOffset {glyph['dataOffset']} != expected packed offset {packed_offset}")
                if glyph['dataLength'] != 0:
                    return (font_name, False, f"group {gi}, glyph {glyph_idx}: zero-size glyph dataLength {glyph['dataLength']} != expected 0")
                continue

            aligned_size = ((width + 3) // 4) * height
            packed_size = math.ceil(width * height / 4)

            # Verify packed offset and size match glyph metadata
            if glyph['dataOffset'] != packed_offset:
                return (font_name, False, f"group {gi}, glyph {glyph_idx}: dataOffset {glyph['dataOffset']} != expected packed offset {packed_offset}")
            if glyph['dataLength'] != packed_size:
                return (font_name, False, f"group {gi}, glyph {glyph_idx}: dataLength {glyph['dataLength']} != expected packed length {packed_size} "
                        f"(width={width}, height={height})")

            # Extract byte-aligned data for this glyph
            if byte_aligned_offset + aligned_size > len(decompressed):
                return (font_name, False, f"group {gi}, glyph {glyph_idx}: byte-aligned data extends beyond decompressed buffer "
                        f"(offset={byte_aligned_offset}, size={aligned_size}, buf_size={len(decompressed)})")

            aligned_glyph = decompressed[byte_aligned_offset:byte_aligned_offset + aligned_size]

            # Compact to packed and verify pixel values are valid (0-3 for 2-bit)
            packed_glyph = compact_aligned_to_packed(aligned_glyph, width, height)
            if len(packed_glyph) != packed_size:
                return (font_name, False, f"group {gi}, glyph {glyph_idx}: compacted size {len(packed_glyph)} != expected {packed_size}")

            byte_aligned_offset += aligned_size
            packed_offset += packed_size

        # Verify total byte-aligned size matches uncompressedSize
        if byte_aligned_offset != group['uncompressedSize']:
            return (font_name, False, f"group {gi}: total byte-aligned size {byte_aligned_offset} != uncompressedSize {group['uncompressedSize']}")

    extra_info = ""
    if glyph_to_group is not None:
        extra_info = " (frequency-grouped)"
    return (font_name, True, f"{len(groups)} groups, {len(glyphs)} glyphs OK{extra_info}")


def main():
    if len(sys.argv) < 2:
        print(f"Usage: {sys.argv[0]} <font_headers_directory>", file=sys.stderr)
        sys.exit(1)

    font_dir = sys.argv[1]
    if not os.path.isdir(font_dir):
        print(f"Error: {font_dir} is not a directory", file=sys.stderr)
        sys.exit(1)

    files = sorted(f for f in os.listdir(font_dir) if f.endswith('.h') and f != 'all.h')
    passed = 0
    failed = 0
    skipped = 0

    for filename in files:
        filepath = os.path.join(font_dir, filename)
        _font_name, success, message = verify_font_file(filepath)

        if success is None:
            skipped += 1
        elif success:
            passed += 1
            print(f"  PASS: {filename} ({message})")
        else:
            failed += 1
            print(f"  FAIL: {filename} - {message}")

    print(f"\nResults: {passed} passed, {failed} failed, {skipped} skipped (uncompressed)")

    if failed > 0:
        sys.exit(1)


if __name__ == '__main__':
    main()