Files
crosspoint-reader-mod/lib/EpdFont/scripts/fontconvert.py
Adrian Wilkins-Caruana 052f497b9e fix: force auto-hinting for Bookerly to fix inconsistent stem widths (#1098)
## Summary

Bookerly's native TrueType hinting is effectively a no-op at the sizes
used here, causing FreeType to place stems at inconsistent sub-pixel
positions. This results in the 'k' stem (8-bit fringe: 0x38=56) falling
just below the 2-bit quantization threshold while 'l' and 'h' stems
(fringes: 0x4C=76, 0x40=64) land above it --- making 'k' visibly
narrower (2.00px vs 2.33px effective width).

FreeType's auto-hinter snaps all stems to consistent grid positions,
normalizing effective stem width to 2.67px across all glyphs.

Adds --force-autohint flag to fontconvert.py and applies it to Bookerly
only. NotoSans, OpenDyslexic, and Ubuntu fonts are unaffected.

Here is an example of before/after. Take notice of the vertical stems on
characters like `l`, `k`, `n`, `i`, etc. The font is Bookerly 12pt
regular:

**BEFORE**:

![before](https://github.com/user-attachments/assets/65b2acab-ad95-489e-885e-e3a0163cc252)

**AFTER**:


![after](https://github.com/user-attachments/assets/d09a8b5d-40af-4a7d-b622-e1b2cabcce85)

Claude generated this script to quantitatively determine that this
change makes the vertical stems on a variety of characters more
consistent for Bookerly _only_.

<details>
  <summary>Python script</summary>
    
  ```python
#!/usr/bin/env python3
"""Compare stem consistency across all font families with and without
auto-hinting.

Run from repo root:
    python3 compare_all_fonts.py
"""

import freetype

DPI = 150
CHARS = ["k", "l", "h", "i", "b", "d"]
SIZES = [12, 14, 16, 18]

FONTS = {
"Bookerly":
"lib/EpdFont/builtinFonts/source/Bookerly/Bookerly-Regular.ttf",
"NotoSans":
"lib/EpdFont/builtinFonts/source/NotoSans/NotoSans-Regular.ttf",
"OpenDyslexic":
"lib/EpdFont/builtinFonts/source/OpenDyslexic/OpenDyslexic-Regular.otf",
"Ubuntu": "lib/EpdFont/builtinFonts/source/Ubuntu/Ubuntu-Regular.ttf",
}

MODES = {
    "default": freetype.FT_LOAD_RENDER,
"autohint": freetype.FT_LOAD_RENDER | freetype.FT_LOAD_FORCE_AUTOHINT,
}


def q4to2(v):
    if v >= 12:
        return 3
    elif v >= 8:
        return 2
    elif v >= 4:
        return 1
    else:
        return 0


def get_stem_eff(face, char, flags):
    gi = face.get_char_index(ord(char))
    if gi == 0:
        return None
    face.load_glyph(gi, flags)
    bm = face.glyph.bitmap
    w, h = bm.width, bm.rows
    if w == 0 or h == 0:
        return None

    p2 = []
    for y in range(h):
        row = []
        for x in range(w):
            row.append(q4to2(bm.buffer[y * bm.pitch + x] >> 4))
        p2.append(row)

    # Measure leftmost stem in stable middle rows
    mid_start, mid_end = h // 4, h - h // 4
    widths = []
    for y in range(mid_start, mid_end):
        first = next((x for x in range(w) if p2[y][x] > 0), -1)
        if first < 0:
            continue
        last = first
        for x in range(first, w):
            if p2[y][x] > 0:
                last = x
            else:
                break
        eff = sum(p2[y][x] for x in range(first, last + 1)) / 3.0
        widths.append(eff)
    return round(sum(widths) / len(widths), 2) if widths else None


def main():
    for font_name, font_path in FONTS.items():
        try:
            freetype.Face(font_path)
        except Exception:
            print(f"\n  {font_name}: SKIPPED (file not found)")
            continue

        print(f"\n{'=' * 80}")
        print(f"  {font_name}")
        print(f"{'=' * 80}")

        for size in SIZES:
            print(f"\n  {size}pt:")
            print(f"  {'':6s}", end="")
            for c in CHARS:
                print(f"  '{c}'  ", end="")
            print("  | spread")

            for mode_name, flags in MODES.items():
                face = freetype.Face(font_path)
                face.set_char_size(size << 6, size << 6, DPI, DPI)
                vals = []
                print(f"  {mode_name:6s}", end="")
                for c in CHARS:
                    v = get_stem_eff(face, c, flags)
                    vals.append(v)
                    print(f"  {v:5.2f}" if v else "    N/A", end="")

                valid = [v for v in vals if v is not None]
spread = max(valid) - min(valid) if len(valid) >= 2 else 0
                marker = " <-- inconsistent" if spread > 0.5 else ""
                print(f"  | {spread:.2f}{marker}")


if __name__ == "__main__":
    main()

  ```
  
</details>

Here are the results. The table compares how the font-generation
`autohint` flag affects the range of widths of various characters. Lower
`spread` mean that glyph stroke widths should appear more consistent.
```
    Spread = max stem width - min stem width across glyphs (lower = more consistent):                                                          
                                                                                                                                               
    ┌──────────────┬──────┬─────────┬──────────┬──────────┐                                                                                    
    │     Font     │ Size │ Default │ Autohint │  Winner  │                                                                                    
    ├──────────────┼──────┼─────────┼──────────┼──────────┤                                                                                    
    │ Bookerly     │ 12pt │ 1.49    │ 1.12     │ autohint │                                                                                    
    ├──────────────┼──────┼─────────┼──────────┼──────────┤                                                                                    
    │              │ 14pt │ 1.39    │ 1.13     │ autohint │                                                                                    
    ├──────────────┼──────┼─────────┼──────────┼──────────┤                                                                                    
    │              │ 16pt │ 1.38    │ 1.16     │ autohint │                                                                                    
    ├──────────────┼──────┼─────────┼──────────┼──────────┤                                                                                    
    │              │ 18pt │ 1.90    │ 1.58     │ autohint │                                                                                    
    ├──────────────┼──────┼─────────┼──────────┼──────────┤                                                                                    
    │ NotoSans     │ 12pt │ 1.16    │ 0.94     │ mixed    │                                                                                    
    ├──────────────┼──────┼─────────┼──────────┼──────────┤                                                                                    
    │              │ 14pt │ 0.83    │ 1.14     │ default  │                                                                                    
    ├──────────────┼──────┼─────────┼──────────┼──────────┤                                                                                    
    │              │ 16pt │ 1.41    │ 1.51     │ default  │                                                                                    
    ├──────────────┼──────┼─────────┼──────────┼──────────┤                                                                                    
    │              │ 18pt │ 1.74    │ 1.63     │ mixed    │                                                                                    
    ├──────────────┼──────┼─────────┼──────────┼──────────┤                                                                                    
    │ OpenDyslexic │ 12pt │ 2.22    │ 1.44     │ autohint │                                                                                    
    ├──────────────┼──────┼─────────┼──────────┼──────────┤                                                                                    
    │              │ 14pt │ 2.57    │ 3.29     │ default  │                                                                                    
    ├──────────────┼──────┼─────────┼──────────┼──────────┤                                                                                    
    │              │ 16pt │ 3.13    │ 2.60     │ autohint │                                                                                    
    ├──────────────┼──────┼─────────┼──────────┼──────────┤                                                                                    
    │              │ 18pt │ 3.21    │ 3.23     │ ~tied    │                                                                                    
    ├──────────────┼──────┼─────────┼──────────┼──────────┤                                                                                    
    │ Ubuntu       │ 12pt │ 1.25    │ 1.31     │ default  │                                                                                    
    ├──────────────┼──────┼─────────┼──────────┼──────────┤                                                                                    
    │              │ 14pt │ 1.41    │ 1.64     │ default  │                                                                                    
    ├──────────────┼──────┼─────────┼──────────┼──────────┤                                                                                    
    │              │ 16pt │ 2.21    │ 1.71     │ autohint │                                                                                    
    ├──────────────┼──────┼─────────┼──────────┼──────────┤                                                                                    
    │              │ 18pt │ 1.80    │ 1.71     │ autohint │                                                                                    
    └──────────────┴──────┴─────────┴──────────┴──────────┘                                                                                    
```


---

### AI Usage

While CrossPoint doesn't have restrictions on AI tools in contributing,
please be transparent about their usage as it
helps set the right context for reviewers.

Did you use AI tools to help write this code? I used AI to make sure I'm
not doing something stupid, since I'm not a typography expert. I made
the changes though.
2026-02-23 22:13:08 +03:00

424 lines
15 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!python3
import freetype
import zlib
import sys
import re
import math
import argparse
from collections import namedtuple
# Originally from https://github.com/vroland/epdiy
parser = argparse.ArgumentParser(description="Generate a header file from a font to be used with epdiy.")
parser.add_argument("name", action="store", help="name of the font.")
parser.add_argument("size", type=int, help="font size to use.")
parser.add_argument("fontstack", action="store", nargs='+', help="list of font files, ordered by descending priority.")
parser.add_argument("--2bit", dest="is2Bit", action="store_true", help="generate 2-bit greyscale bitmap instead of 1-bit black and white.")
parser.add_argument("--additional-intervals", dest="additional_intervals", action="append", help="Additional code point intervals to export as min,max. This argument can be repeated.")
parser.add_argument("--compress", dest="compress", action="store_true", help="Compress glyph bitmaps using DEFLATE with group-based compression.")
parser.add_argument("--force-autohint", dest="force_autohint", action="store_true", help="Force FreeType auto-hinter instead of native font hinting. Improves stem width consistency for fonts with weak or no native TrueType hints.")
args = parser.parse_args()
GlyphProps = namedtuple("GlyphProps", ["width", "height", "advance_x", "left", "top", "data_length", "data_offset", "code_point"])
font_stack = [freetype.Face(f) for f in args.fontstack]
is2Bit = args.is2Bit
size = args.size
font_name = args.name
load_flags = freetype.FT_LOAD_RENDER
if args.force_autohint:
load_flags |= freetype.FT_LOAD_FORCE_AUTOHINT
# inclusive unicode code point intervals
# must not overlap and be in ascending order
intervals = [
### Basic Latin ###
# ASCII letters, digits, punctuation, control characters
(0x0000, 0x007F),
### Latin-1 Supplement ###
# Accented characters for Western European languages
(0x0080, 0x00FF),
### Latin Extended-A ###
# Eastern European and Baltic languages
(0x0100, 0x017F),
### General Punctuation (core subset) ###
# Smart quotes, en dash, em dash, ellipsis, NO-BREAK SPACE
(0x2000, 0x206F),
### Basic Symbols From "Latin-1 + Misc" ###
# dashes, quotes, prime marks
(0x2010, 0x203A),
# misc punctuation
(0x2040, 0x205F),
# common currency symbols
(0x20A0, 0x20CF),
### Combining Diacritical Marks (minimal subset) ###
# Needed for proper rendering of many extended Latin languages
(0x0300, 0x036F),
### Greek & Coptic ###
# Used in science, maths, philosophy, some academic texts
# (0x0370, 0x03FF),
### Cyrillic ###
# Russian, Ukrainian, Bulgarian, etc.
(0x0400, 0x04FF),
### Math Symbols (common subset) ###
# Superscripts and Subscripts
(0x2070, 0x209F),
# General math operators
(0x2200, 0x22FF),
# Arrows
(0x2190, 0x21FF),
### CJK ###
# Core Unified Ideographs
# (0x4E00, 0x9FFF),
# # Extension A
# (0x3400, 0x4DBF),
# # Extension B
# (0x20000, 0x2A6DF),
# # Extension CF
# (0x2A700, 0x2EBEF),
# # Extension G
# (0x30000, 0x3134F),
# # Hiragana
# (0x3040, 0x309F),
# # Katakana
# (0x30A0, 0x30FF),
# # Katakana Phonetic Extensions
# (0x31F0, 0x31FF),
# # Halfwidth Katakana
# (0xFF60, 0xFF9F),
# # Hangul Syllables
# (0xAC00, 0xD7AF),
# # Hangul Jamo
# (0x1100, 0x11FF),
# # Hangul Compatibility Jamo
# (0x3130, 0x318F),
# # Hangul Jamo Extended-A
# (0xA960, 0xA97F),
# # Hangul Jamo Extended-B
# (0xD7B0, 0xD7FF),
# # CJK Radicals Supplement
# (0x2E80, 0x2EFF),
# # Kangxi Radicals
# (0x2F00, 0x2FDF),
# # CJK Symbols and Punctuation
# (0x3000, 0x303F),
# # CJK Compatibility Forms
# (0xFE30, 0xFE4F),
# # CJK Compatibility Ideographs
# (0xF900, 0xFAFF),
### Specials
# Replacement Character
(0xFFFD, 0xFFFD),
]
add_ints = []
if args.additional_intervals:
add_ints = [tuple([int(n, base=0) for n in i.split(",")]) for i in args.additional_intervals]
def norm_floor(val):
return int(math.floor(val / (1 << 6)))
def norm_ceil(val):
return int(math.ceil(val / (1 << 6)))
def chunks(l, n):
for i in range(0, len(l), n):
yield l[i:i + n]
def load_glyph(code_point):
face_index = 0
while face_index < len(font_stack):
face = font_stack[face_index]
glyph_index = face.get_char_index(code_point)
if glyph_index > 0:
face.load_glyph(glyph_index, load_flags)
return face
face_index += 1
print(f"code point {code_point} ({hex(code_point)}) not found in font stack!", file=sys.stderr)
return None
unmerged_intervals = sorted(intervals + add_ints)
intervals = []
unvalidated_intervals = []
for i_start, i_end in unmerged_intervals:
if len(unvalidated_intervals) > 0 and i_start + 1 <= unvalidated_intervals[-1][1]:
unvalidated_intervals[-1] = (unvalidated_intervals[-1][0], max(unvalidated_intervals[-1][1], i_end))
continue
unvalidated_intervals.append((i_start, i_end))
for i_start, i_end in unvalidated_intervals:
start = i_start
for code_point in range(i_start, i_end + 1):
face = load_glyph(code_point)
if face is None:
if start < code_point:
intervals.append((start, code_point - 1))
start = code_point + 1
if start != i_end + 1:
intervals.append((start, i_end))
for face in font_stack:
face.set_char_size(size << 6, size << 6, 150, 150)
total_size = 0
all_glyphs = []
for i_start, i_end in intervals:
for code_point in range(i_start, i_end + 1):
face = load_glyph(code_point)
bitmap = face.glyph.bitmap
# Build out 4-bit greyscale bitmap
pixels4g = []
px = 0
for i, v in enumerate(bitmap.buffer):
y = i / bitmap.width
x = i % bitmap.width
if x % 2 == 0:
px = (v >> 4)
else:
px = px | (v & 0xF0)
pixels4g.append(px);
px = 0
# eol
if x == bitmap.width - 1 and bitmap.width % 2 > 0:
pixels4g.append(px)
px = 0
if is2Bit:
# 0-3 white, 4-7 light grey, 8-11 dark grey, 12-15 black
# Downsample to 2-bit bitmap
pixels2b = []
px = 0
pitch = (bitmap.width // 2) + (bitmap.width % 2)
for y in range(bitmap.rows):
for x in range(bitmap.width):
px = px << 2
bm = pixels4g[y * pitch + (x // 2)]
bm = (bm >> ((x % 2) * 4)) & 0xF
if bm >= 12:
px += 3
elif bm >= 8:
px += 2
elif bm >= 4:
px += 1
if (y * bitmap.width + x) % 4 == 3:
pixels2b.append(px)
px = 0
if (bitmap.width * bitmap.rows) % 4 != 0:
px = px << (4 - (bitmap.width * bitmap.rows) % 4) * 2
pixels2b.append(px)
# for y in range(bitmap.rows):
# line = ''
# for x in range(bitmap.width):
# pixelPosition = y * bitmap.width + x
# byte = pixels2b[pixelPosition // 4]
# bit_index = (3 - (pixelPosition % 4)) * 2
# line += '#' if ((byte >> bit_index) & 3) > 0 else '.'
# print(line)
# print('')
else:
# Downsample to 1-bit bitmap - treat any 2+ as black
pixelsbw = []
px = 0
pitch = (bitmap.width // 2) + (bitmap.width % 2)
for y in range(bitmap.rows):
for x in range(bitmap.width):
px = px << 1
bm = pixels4g[y * pitch + (x // 2)]
px += 1 if ((x & 1) == 0 and bm & 0xE > 0) or ((x & 1) == 1 and bm & 0xE0 > 0) else 0
if (y * bitmap.width + x) % 8 == 7:
pixelsbw.append(px)
px = 0
if (bitmap.width * bitmap.rows) % 8 != 0:
px = px << (8 - (bitmap.width * bitmap.rows) % 8)
pixelsbw.append(px)
# for y in range(bitmap.rows):
# line = ''
# for x in range(bitmap.width):
# pixelPosition = y * bitmap.width + x
# byte = pixelsbw[pixelPosition // 8]
# bit_index = 7 - (pixelPosition % 8)
# line += '#' if (byte >> bit_index) & 1 else '.'
# print(line)
# print('')
pixels = pixels2b if is2Bit else pixelsbw
# Build output data
packed = bytes(pixels)
glyph = GlyphProps(
width = bitmap.width,
height = bitmap.rows,
advance_x = norm_floor(face.glyph.advance.x),
left = face.glyph.bitmap_left,
top = face.glyph.bitmap_top,
data_length = len(packed),
data_offset = total_size,
code_point = code_point,
)
total_size += len(packed)
all_glyphs.append((glyph, packed))
# pipe seems to be a good heuristic for the "real" descender
face = load_glyph(ord('|'))
glyph_data = []
glyph_props = []
for index, glyph in enumerate(all_glyphs):
props, packed = glyph
glyph_data.extend([b for b in packed])
glyph_props.append(props)
compress = args.compress
# Build groups for compression
if compress:
# Script-based grouping: glyphs that co-occur in typical text rendering
# are grouped together for efficient LRU caching on the embedded target.
# Since glyphs are in codepoint order, glyphs in the same Unicode block
# are contiguous in the array and form natural groups.
SCRIPT_GROUP_RANGES = [
(0x0000, 0x007F), # ASCII
(0x0080, 0x00FF), # Latin-1 Supplement
(0x0100, 0x017F), # Latin Extended-A
(0x0300, 0x036F), # Combining Diacritical Marks
(0x0400, 0x04FF), # Cyrillic
(0x2000, 0x206F), # General Punctuation
(0x2070, 0x209F), # Superscripts & Subscripts
(0x20A0, 0x20CF), # Currency Symbols
(0x2190, 0x21FF), # Arrows
(0x2200, 0x22FF), # Math Operators
(0xFFFD, 0xFFFD), # Replacement Character
]
def get_script_group(code_point):
for i, (start, end) in enumerate(SCRIPT_GROUP_RANGES):
if start <= code_point <= end:
return i
return -1
groups = [] # list of (first_glyph_index, glyph_count)
current_group_id = None
group_start = 0
group_count = 0
for i, (props, packed) in enumerate(all_glyphs):
sg = get_script_group(props.code_point)
if sg != current_group_id:
if group_count > 0:
groups.append((group_start, group_count))
current_group_id = sg
group_start = i
group_count = 1
else:
group_count += 1
if group_count > 0:
groups.append((group_start, group_count))
# Compress each group
compressed_groups = [] # list of (compressed_bytes, uncompressed_size, glyph_count, first_glyph_index)
compressed_bitmap_data = []
compressed_offset = 0
# Also build modified glyph props with within-group offsets
modified_glyph_props = list(glyph_props)
for first_idx, count in groups:
# Concatenate bitmap data for this group
group_data = b''
for gi in range(first_idx, first_idx + count):
props, packed = all_glyphs[gi]
# Update glyph's dataOffset to be within-group offset
within_group_offset = len(group_data)
old_props = modified_glyph_props[gi]
modified_glyph_props[gi] = GlyphProps(
width=old_props.width,
height=old_props.height,
advance_x=old_props.advance_x,
left=old_props.left,
top=old_props.top,
data_length=old_props.data_length,
data_offset=within_group_offset,
code_point=old_props.code_point,
)
group_data += packed
# Compress with raw DEFLATE (no zlib/gzip header)
compressor = zlib.compressobj(level=9, wbits=-15)
compressed = compressor.compress(group_data) + compressor.flush()
compressed_groups.append((compressed, len(group_data), count, first_idx))
compressed_bitmap_data.extend(compressed)
compressed_offset += len(compressed)
glyph_props = modified_glyph_props
total_compressed = len(compressed_bitmap_data)
total_uncompressed = len(glyph_data)
print(f"// Compression: {total_uncompressed} -> {total_compressed} bytes ({100*total_compressed/total_uncompressed:.1f}%), {len(groups)} groups", file=sys.stderr)
print(f"""/**
* generated by fontconvert.py
* name: {font_name}
* size: {size}
* mode: {'2-bit' if is2Bit else '1-bit'}{' compressed: true' if compress else ''}
* Command used: {' '.join(sys.argv)}
*/
#pragma once
#include "EpdFontData.h"
""")
if compress:
print(f"static const uint8_t {font_name}Bitmaps[{len(compressed_bitmap_data)}] = {{")
for c in chunks(compressed_bitmap_data, 16):
print (" " + " ".join(f"0x{b:02X}," for b in c))
print ("};\n");
else:
print(f"static const uint8_t {font_name}Bitmaps[{len(glyph_data)}] = {{")
for c in chunks(glyph_data, 16):
print (" " + " ".join(f"0x{b:02X}," for b in c))
print ("};\n");
print(f"static const EpdGlyph {font_name}Glyphs[] = {{")
for i, g in enumerate(glyph_props):
print (" { " + ", ".join([f"{a}" for a in list(g[:-1])]),"},", f"// {chr(g.code_point) if g.code_point != 92 else '<backslash>'}")
print ("};\n");
print(f"static const EpdUnicodeInterval {font_name}Intervals[] = {{")
offset = 0
for i_start, i_end in intervals:
print (f" {{ 0x{i_start:X}, 0x{i_end:X}, 0x{offset:X} }},")
offset += i_end - i_start + 1
print ("};\n");
if compress:
print(f"static const EpdFontGroup {font_name}Groups[] = {{")
compressed_offset = 0
for compressed, uncompressed_size, count, first_idx in compressed_groups:
print(f" {{ {compressed_offset}, {len(compressed)}, {uncompressed_size}, {count}, {first_idx} }},")
compressed_offset += len(compressed)
print("};\n")
print(f"static const EpdFontData {font_name} = {{")
print(f" {font_name}Bitmaps,")
print(f" {font_name}Glyphs,")
print(f" {font_name}Intervals,")
print(f" {len(intervals)},")
print(f" {norm_ceil(face.size.height)},")
print(f" {norm_ceil(face.size.ascender)},")
print(f" {norm_floor(face.size.descender)},")
print(f" {'true' if is2Bit else 'false'},")
if compress:
print(f" {font_name}Groups,")
print(f" {len(compressed_groups)},")
else:
print(f" nullptr,")
print(f" 0,")
print("};")