#!/usr/bin/env python3 """ Generate a small EPUB with prose that exercises kerning and ligature edge cases. Kerning pairs targeted (Basic Latin — "western" scope, ASCII): AV, AW, AY, AT, AC, AG, AO, AQ, AU FA, FO, Fe, Fo, Fr, Fy LT, LV, LW, LY PA, Pe, Po TA, Te, To, Tr, Ty, Tu, Ta, Tw VA, Ve, Vo, Vy, Va WA, We, Wo, Wa, Wy YA, Ya, Ye, Yo, Yu Av, Aw, Ay ov, oy, ow, ox rv, ry, rw "r." "r," (right-side space after r) f., f, Kerning pairs targeted (Latin-1 Supplement — "western" scope, non-ASCII): Tö, Tü, Tä (German: Töchter, Türkei, Tänzer) Vö, Vä (German: Vögel, Väter) Wü, Wö (German: Würde, Wörter) Fü, Fé, Fê (German/French: Für, Février, Fête) Äu (German: Äußerst) Öf (German: Öffnung — also exercises ff ligature) Üb (German: Über) Àl, Àp (French: À la, À propos) Pè, Pé (French: Père, Pétanque) Ré (French: République, Rémy) Ño, Ñu (Spanish: niño, Muñoz) Eñ (Spanish: España) Ça, Çe (French: Ça, Garçon) Åk (Scandinavian: Åkesson) Ør (Scandinavian: Ørsted) Æs, Cæ (Scandinavian/archaic: Cæsar, æsthetic) ße, ßb (German: Straße, weißblau) «L, «V, r», é» (guillemets: « and ») „G, ‚W (German-style low-9 quotation marks) …" (horizontal ellipsis adjacent to quotes) Kerning pairs targeted (Latin Extended-A — "latin" scope additions): Tě, Tř (Czech: Těšín, Třebíč) Vě (Czech: Věra, věda) Př (Czech: Příbram, příroda) Wą, Wę (Polish: Wąchock, Węgry) Łó, Łu, Ły (Polish: Łódź, Łukasz, łyżka) Čá, Če (Czech: Čáslav, České) Ří, Řa, Ře (Czech: Říjen, Řád, Řeka) Šk, Št (Czech/Slovak: Škoda, Šťastný) Ží, Žá (Czech: život, žádný) Ať (Czech) Tő, Vő (Hungarian: tőke, vőlegény) İs (Turkish: İstanbul) Ğa, Ğı (Turkish: dağ, Beyoğlu) Ligature sequences targeted (ASCII): fi, fl, ff, ffi, ffl, ft, fb, fh, fj, fk st, ct (historical) Th (common Th ligature) Ligature sequences in Latin-1 Supplement context: fi adjacent to accented chars: définition, magnifique, officière fl adjacent to accented chars: réflexion, soufflé ff adjacent to accented chars: Öffnung, différent, souffrir ffi adjacent to accented chars: efficacité, officière ffl adjacent to accented chars: soufflé Æ/æ (U+00C6/U+00E6): Cæsar, Ærø, mediæval, encyclopædia, æsthetic Ligature sequences in Latin Extended-A context: fi near Extended-A chars: filozofie, firma, finále, fikir fl near Extended-A chars: flétnista, flétna, refleks ff near Extended-A chars: offikás œ (U+0153): cœur, sœur, œuvre, bœuf, manœuvre ij (U+0133): ijzer, vrij, bijzonder, ijverig Kerning pairs targeted (Latin Extended-B — U+0180–024F): Ța, Țe, Țo, Țu (Romanian: T-comma overhang, like T) Șa, Șe, Și (Romanian: S-comma descender) Tș, Vș (Latin T/V followed by Romanian s-comma) Tơ, Vơ, Tư, Vư (Vietnamese: horn diacritics under T/V overhangs) Ƒa, Ƒo, Ƒe (African: F-hook pairs) DŽ, Dž, LJ, Lj, NJ, Nj (Croatian digraph ligatures) Tǎ, Tǒ, Tǔ (Pinyin: caron vowels under T overhang) Tǖ, Tǘ, Tǚ, Tǜ (Pinyin: u-diaeresis with tone marks) Kerning pairs targeted (Greek & Coptic — U+0370–03FF): Γα, Γε, Γο, Γυ, Γρ (Γ overhang, like Latin T / Cyrillic Г) Τα, Τε, Το, Τυ, Τρ (Τ overhang, identical to Latin T) Αυ, Αν, Ατ, Αδ (Α diagonal, like Latin A) Υα, Υε, Υο (Υ diagonal, like Latin Y) Ρα, Ρε, Ρο (Ρ bowl, like Latin P) Φα, Φο, Φυ (Φ wide circular) Δα, Δε, Δο (Δ triangular base) Λα, Λε, Λο (Λ inverted-V) «Γ, «Τ, ε», ο» (guillemets in Greek context) Kerning pairs targeted (Cyrillic — U+0400–04FF): Ге, Го, Гу, Га, Гр (Г has overhanging crossbar like T/F) Та, Те, То, Ту, Тр, Ті, Тя (Т = Latin T shape) Ра, Ре, Ро, Ру (Р = Latin P shape) Ау, Ав, Ат, Ад (А = Latin A shape) Ув, Уд, Ук, Ум (У = Latin Y shape — diagonal) Фа, Фо, Фу (Ф = wide circular letter) Да, Де, До, Ду (Д has descending serifs) Ла, Ле, Ло, Лу (Л = inverted V shape) Ча, Чо, Чу (Ч has overhanging stroke) «Г, «Т, «В, р», е» (guillemets in Cyrillic context) Ukrainian: Її, Єв, Ґа Bulgarian: Щу, Жа, Юл Combining marks targeted (U+0300–U+036F — Combining Diacritical Marks): U+0300 grave, U+0301 acute, U+0302 circumflex, U+0303 tilde U+0304 macron, U+0306 breve, U+0307 dot above, U+0308 diaeresis U+030A ring above, U+030B double acute, U+030C caron U+0323 dot below (Vietnamese stacking) U+0327 cedilla, U+0328 ogonek U+031B horn (Vietnamese) Decomposed equivalents of precomposed characters (NFD vs NFC): o+U+0308 vs ö, e+U+0301 vs é, e+U+0302 vs ê, a+U+0300 vs à, etc. Multiple combining marks on one base character: e+U+0302+U+0323 (Vietnamese ệ), u+U+031B+U+0301, etc. Combining marks adjacent to kerning pairs: To+U+0308 (decomposed Tö), Vo+U+0308, Wu+U+0308, etc. Combining marks adjacent to ligature sequences: de+U+0301+fi (définition), re+U+0301+fl (réflexion), etc. Extended Latin-A decomposed compositions: e+U+030C (ě), r+U+030C (ř), a+U+0328 (ą), s+U+0327 (ş), D+U+030C (Ď), etc. Precomposed vs decomposed side-by-side comparison (Latin-1 and Extended-A) Also includes: Quotes around kerning-sensitive letters (e.g. "AWAY", "Typography") Numerals with kerning (10, 17, 74, 47) Punctuation adjacency (T., V., W., Y.) """ import io import os import zipfile import uuid from datetime import datetime try: from PIL import Image, ImageDraw, ImageFont except ImportError: print("Please install Pillow: pip install Pillow") exit(1) _PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) _BOOKERLY_FONT = os.path.join( _PROJECT_ROOT, "lib", "EpdFont", "builtinFonts", "source", "Bookerly", "Bookerly-Regular.ttf", ) def _get_font(size=20): """Get the Bookerly font at the requested size, with system fallbacks.""" paths = [_BOOKERLY_FONT] for path in paths: try: return ImageFont.truetype(path, size) except (OSError, IOError): continue return ImageFont.load_default(size) def _draw_text_centered(draw, y, text, font, fill, width): bbox = draw.textbbox((0, 0), text, font=font) text_width = bbox[2] - bbox[0] x = (width - text_width) // 2 draw.text((x, y), text, font=font, fill=fill) def create_cover_image(): """Generate a cover image matching the original layout and return JPEG bytes.""" width, height = 536, 800 bg_color = (30, 42, 58) text_color = (225, 220, 205) img = Image.new("RGB", (width, height), bg_color) draw = ImageDraw.Draw(img) font_title = _get_font(72) font_subtitle = _get_font(26) font_author = _get_font(14) font_ornament = _get_font(64) title_lines = ["Kerning", "& Ligature", "Edge Cases"] title_y = 92 for line in title_lines: _draw_text_centered(draw, title_y, line, font_title, text_color, width) title_y += 90 ornament_y = title_y + 10 _draw_text_centered(draw, ornament_y, "*", font_ornament, text_color, width) subtitle_y = ornament_y + 72 _draw_text_centered(draw, subtitle_y, "A Typographer\u2019s Compendium", font_subtitle, text_color, width) _draw_text_centered(draw, height - 70, "CROSSPOINT TEST FIXTURES", font_author, text_color, width) buf = io.BytesIO() img.save(buf, "JPEG", quality=90) return buf.getvalue() BOOK_UUID = str(uuid.uuid4()) TITLE = "Kerning & Ligature Edge Cases" AUTHOR = "Crosspoint Test Fixtures" DATE = datetime.now().strftime("%Y-%m-%d") # ── XHTML content pages ────────────────────────────────────────────── CHAPTER_1 = """\ Chapter 1 – The Typographer's Affliction

Chapter 1
The Typographer’s Affliction

AVERY WATT always wanted to be a typographer. Years of careful study at Yale had taught him that every typeface holds a secret: the negative space between letters matters as much as the strokes themselves. “AWAY with sloppy kerning!” he would thunder at his apprentices, waving a proof sheet covered in red annotations.

The office of Watt & Yardley, Fine Typography occupied the top floor of an old factory on Waverly Avenue. On the frosted glass of the door, gold leaf spelled WATT & YARDLEY in Caslon capitals. Beneath it, in smaller letters: Purveyors of Tasteful Composition.

Today Avery sat at his desk, frowning at a page of proofs. The client — a wealthy patron named Lydia Thornton-Foxwell — had commissioned a lavish coffee-table volume on the history of calligraphy. It was the sort of project Avery loved: difficult, fussy, and likely to be appreciated by fewer than forty people on Earth.

“Look at this,” he muttered to his assistant, Vera Young. He tapped the offending line with a pencil. “The ‘AW’ pair in DRAWN is too loose. And the ‘To’ in ‘Towards’ — the overhang of the T-crossbar should tuck over the lowercase o. This is first-rate typeface work; we can’t afford sloppy fit.”

Vera adjusted her glasses and peered at the proof. “You’re right. The ‘Ty’ in ‘Typography’ also looks off. And further down — see the ‘VA’ in ‘VAULTED’? The diagonals aren’t meshing at all.”

“Exactly!” Avery slapped the desk. “We’ll need to revisit every pair: AV, AW, AT, AY, FA, Fe, LT, LV, LW, LY, PA, TA, Te, To, Tu, Tw, VA, Ve, Vo, WA, Wa, YA, Ya — the whole catalogue. I want this volume to be flawless.”

He leaned back and stared at the ceiling. Forty-seven years of typesetting had left Avery with impeccable standards and a permanent squint. He could spot a miskerned ‘AT’ pair from across the room. “Fetch the reference sheets,” he told Vera. “And coffee. Strong coffee.”

""" CHAPTER_2 = """\ Chapter 2 – Ligatures in the Afflicted Offices

Chapter 2
Ligatures in the Afflicted Offices

The first difficulty arose with ligatures. Avery was fiercely attached to the classic fi and fl ligatures — the ones where the terminal of the f swings gracefully into the dot of the i or the ascender of the l. Without them, he felt, the page looked ragged and unfinished.

“A fine figure of a man,” he read aloud from the proofs, testing the fi combination. “The daffodils in the field were in full flower, their ruffled petals fluttering in the stiff breeze.” He nodded — the fi and fl joins looked clean. But then he frowned. “What about the double-f ligatures? ‘Affixed,’ ‘baffled,’ ‘scaffolding,’ ‘offload’ — we need the ff, ffi, and ffl forms.”

Vera flipped through the character map. “The typeface supports ff, fi, fl, ffi, and ffl. But I’m not sure about the rarer ones — ft, fb, fh, fj, fk.”

“Test them,” Avery said. “Set a line: The loft’s rooftop offered a deft, soft refuge. That gives us ft. Now try: halfback, offbeat. That’s fb. For fh: The wolfhound sniffed the foxhole. And fj — well, that’s mostly in loanwords. Fjord and fjeld are the usual suspects. Fk is almost nonexistent in English; skip it.”

Vera typed dutifully. “What about the historical st and ct ligatures? I know some revival faces include them.”

“Yes! The ‘st’ ligature in words like first, strongest, last, masterful, fastidious — it gives the page a lovely archaic flavour. And ‘ct’ in strictly, perfectly, tactful, connected, architectural, instructed. Mrs. Thornton-Foxwell specifically requested them.”

He paused, then added: “And don’t forget the Th ligature. The word ‘The’ appears thousands of times in any book. If we can join the T and the h into a graceful Th, the texture of every page improves. Set The thrush sat on the thatched roof of the theatre, thinking. There — Th six times in one sentence.”

""" CHAPTER_3 = """\ Chapter 3 – The Proof of the Pudding

Chapter 3
The Proof of the Pudding

Two weeks later, the revised proofs arrived. Avery carried them to the window and held them up to the light. The paper was a beautiful warm ivory, the ink a deep, true black.

He began to read, his eye scanning every pair. “AWAY TO YESTERDAY” ran the chapter title, in large capitals. The AW was tight, the AY tucked in, the TO well-fitted, the YE elegantly kerned. He exhaled slowly.

“Page fourteen,” he murmured. “After years of toil, the faithful craftsman affixed the final flourish to the magnificent oak panel.” The fi in faithful, the ffi in affixed, the fi in final, the fl in flourish, the fi in magnificent — all were perfectly joined. The ft in craftsman and after showed a subtle but satisfying connection.

He turned to page seventeen. The text was denser here, a scholarly passage on the evolution of letterforms. Effective typographic practice requires an officer’s efficiency and a professor’s perfectionism. Suffice it to say that afflicted typesetters often find themselves baffled by the sheer profusion of difficulties.

Avery counted: the passage contained ff four times, fi six times, ffl once (in “baffled” — wait, no, that was ff+l+ed), and ffi twice (in “officer’s” and “efficiency”). He smiled. The ligatures were holding up perfectly.

The kerning was impeccable too. In the word “ATAVISTIC” — set as a pull-quote in small capitals — the AT pair was snug, the AV nestled tightly, and the TI showed just the right clearance. Lower down, a passage about calligraphers in various countries offered a feast of tricky pairs:

Twelve Welsh calligraphers traveled to Avignon, where they studied Venetian lettering techniques. Years later, they returned to Pwllheli, Tywyn, and Aberystwyth, bringing with them a wealth of knowledge about vowel placement, Tuscan ornament, and Lombardic versals.

The Tw in Twelve, the We in Welsh, the Av in Avignon, the Ve in Venetian, the Ye in Years, the Ty in Tywyn, the Tu in Tuscan, the Lo in Lombardic — every pair sat comfortably on the baseline, with not a hair’s breadth of excess space.

""" CHAPTER_4 = """\ Chapter 4 – Punctuation and Numerals

Chapter 4
Punctuation and Numerals

“Now for the tricky part,” Avery said, reaching for a loupe. Kerning around punctuation was notoriously fiddly. A period after a capital V or W or Y could leave an ugly gap; a comma after an r or an f needed careful attention.

He set a test passage: Dr. Foxwell arrived at 7:47 a.m. on the 14th of November. “Truly,” she declared, “your work is perfect.” “We try,” Avery replied, “but perfection is elusive.”

The r-comma in “your,” the r-period in “Dr.” and “Mr.”, the f-period in “Prof.” — all needed to be set so that the punctuation didn’t drift too far from the preceding letter. Avery had seen appalling examples where the period after a V seemed to float in space, marooned from the word it belonged to.

“V. S. Naipaul,” he muttered, setting the name in various sizes. “W. B. Yeats. T. S. Eliot. P. G. Wodehouse. F. Scott Fitzgerald. Y. Mishima.” Each initial-period-space sequence was a potential trap. At display sizes the gaps yawned; at text sizes they could vanish into a murky blur.

Numerals brought their own challenges. The figures 1, 4, and 7 were the worst offenders — their open shapes created awkward spacing next to rounder digits. “Set these,” Avery instructed: 10, 17, 47, 74, 114, 747, 1471. Vera typed them in both tabular and proportional figures. The tabular set looked even but wasteful; the proportional set was compact but needed kerning between 7 and 4, and between 1 and 7.

“And fractions,” Avery added. “Try ½, ¼, ¾, and the arbitrary ones: 3/8, 5/16, 7/32. The virgule kerning against the numerals is always a headache.”

By five o’clock they had tested every combination Avery could think of. The proofs, now bristling with pencil marks and sticky notes, were ready for the foundry. “Tomorrow,” Avery said, “we tackle the italic and the bold. And after that — the small capitals.”

Vera groaned. “You’re a perfectionist, Avery Watt.”

“Naturally,” he replied. “That’s what they pay us for.”

""" CHAPTER_5 = """\ Chapter 5 – A Glossary of Troublesome Pairs

Chapter 5
A Glossary of Troublesome Pairs

As a final flourish, Avery drafted an appendix for the volume: a glossary of every kerning pair and ligature that had given him grief over forty-seven years. Vera typed it up while Avery dictated.

Kerning Pairs

AV — As in AVID, AVIARY, AVOCADO, TRAVESTY, CAVALIER.
AW — As in AWAY, AWARD, AWNING, DRAWN, BRAWL, SHAWL.
AY — As in AYAH, LAYER, PLAYER, PRAYER, BAYONET.
AT — As in ATLAS, ATTIC, LATERAL, WATER, PLATTER.
AC — As in ACORN, ACCURATE, BACON, PLACATE.
AG — As in AGAIN, AGATE, DRAGON, STAGGER.
AO — As in KAOLIN, PHARAOH, EXTRAORDINARY.
AQ — As in AQUA, AQUIFER, AQUILINE, OPAQUE.
AU — As in AUTHOR, AUTUMN, HAUL, VAULT.
FA — As in FACE, FACTOR, SOFA, AFFAIR.
FO — As in FOLLOW, FORCE, COMFORT, BEFORE.
Fe — As in February, feline, festival.
Fo — As in Forsyth, forever, fortune.
Fr — As in France, fragile, friction.
Fy — As in Fyodor, fytte.
LT — As in ALTITUDE, EXALT, RESULT, VAULT.
LV — As in SILVER, SOLVE, INVOLVE, VALVE.
LW — As in ALWAYS, RAILWAY, HALLWAY.
LY — As in TRULY, ONLY, HOLY, UGLY.
PA — As in PACE, PALACE, COMPANION, SEPARATE.
TA — As in TABLE, TASTE, GUITAR, FATAL.
Te — As in Ten, temple, tender.
To — As in Tomorrow, together, towards.
Tr — As in Travel, trouble, triumph.
Tu — As in Tuesday, tulip, tumble.
Tw — As in Twelve, twenty, twilight.
Ty — As in Tyrant, typical, type.
VA — As in VALUE, VAGUE, CANVAS, OVAL.
Ve — As in Venice, verse, venture.
Vo — As in Voice, volume, voyage.
Wa — As in Water, watch, wander.
We — As in Welcome, weather, welfare.
Wo — As in Wonder, worry, worship.
Ya — As in Yard, yacht, yawn.
Ye — As in Yellow, yesterday, yeoman.
Yo — As in Young, yoke, yoga.
Yu — As in Yukon, Yugoslavia, yule.

Ligatures

fi — fifty, fiction, filter,efinite, affirm, magnify.
fl — flag, flair, flame, floor, influence, reflect.
ff — affair, affect, affirm, afford, buffalo, coffin, daffodil, differ, effect, effort, offend, offer, office, scaffold, stiff, suffocate, traffic, waffle.
ffi — affidavit, affiliated, affirmative, baffling (wait — that is ffl!), coefficient, coffin, daffiness, diffident, efficient, fficacy, muffin, officious, paraffin, sufficient, trafficking.
ffl — affluent, baffled,ffle, offload, piffle, raffle, riffle, ruffle, scaffold, scuffle, shuffle, sniffle, stiffly, truffle, waffle.
ft — after, craft, deft, drift, gift, left, loft, raft, shaft, shift, soft, swift, theft, tuft, waft.
fb — halfback, offbeat, surfboard.
fh — wolfhound, cliffhanger, halfhearted.
st — strong, first, last, must, fast, mist, ghost, roast, trust, artist, honest, forest, harvest, modest.
ct — act, fact, strict, direct, perfect, connect, collect, distinct, instruct, architect, effect, exact, expect.
Th — The, This, That, There, Their, They, Than, Though, Through, Thought, Thousand, Thrive, Throne, Thatch.

“There,” Avery said, setting down his pencil. “If a typesetter can handle every word in that glossary without a single misfit, miskerned, or malformed glyph, they deserve their weight in Garamond.”

""" CHAPTER_6 = """\ Chapter 6 – Western European Accents

Chapter 6
Western European Accents

Before the calligraphy volume was even bound, Mrs. Thornton-Foxwell rang with a revision. Half the captions were in French and German, the bibliography included Scandinavian and Spanish sources, and the whole thing needed to work in those languages too. “The accented characters,” she said. “They must be perfect.”

Avery sighed. The Latin-1 Supplement block — the accented vowels, cedillas, tildes, and special letters of Western European typography — would double his kerning workload. Every pair he had already perfected for plain ASCII now had accented variants.

German Pairs

German was the first test. Avery set a paragraph: Töchter saßen über den Büchern. Vögel flogen über die Wälder. Die Würde des Menschen ist unantastbar. Tänzer übten in der Türkei. The Tö in “Töchter” was telling — the umlaut dots on the ö sat precisely where the crossbar of the T wanted to extend. Vö in “Vögel” had a similar conflict: the V’s diagonal met the ö at an angle that the umlaut dots complicated. Wü in “Würde” and Wö in “Wörter” each demanded individual adjustment. Tü in “Türkei” and Tä in “Tänzer” added two more accented vowels to the T’s already long list of right-side partners.

“And don’t forget Öffnung,” Avery said. “The Öf pair is tricky enough, but ‘Öffnung’ also contains an ff ligature right after the umlaut. A double test.” He set more examples: Äußerst sorgfältig prüfte er die Größe der Straße. Für die Grüße seiner Füße brauchte er Maßband. The Äu in “Äußerst,” the Fü in “Für,” the Grü in “Grüße” — every pairing of accented vowels against consonants needed attention. The ß (eszett) in “Straße,” “Grüße,” and “Füße” had its own right-side bearing issues: ße and ßb in “weißblau” required careful attention, as the eszett’s unusual tail affected spacing against the following letter. Üb in “Über” and “Übung” placed an umlaut directly over the narrow U, which could collide with ascenders in the line above.

German punctuation style added another layer of complexity. „Guten Tag,“ sagte er. ‚Warum nicht?‘ The low opening quotes — „ (U+201E) and ‚ (U+201A) — sat on the baseline rather than hanging near the cap height, changing the spacing dynamics against the following capital letter. The „G pair, the ‚W pair — these were entirely different animals from their English-style “G and ‘W counterparts.

French Pairs

French was rich in accented characters. Fête de la République. Père Noël arriva en Février. À la recherche du café idéal. À propos de rien. The Fê in “Fête,” the Pè in “Père,” the Fé in “Février,” the Àl in “À la,” the Àp in “À propos” — each involved a diacritical mark that could interfere with kerning. The Ré in “République” needed the accent on the É to clear the shoulder of the R.

French also offered excellent ligature-with-accent test cases: La définition de l’efficacité réside dans la réflexion. L’officière vérifia les différentes soufflés. Il souffrit magnifiquement. The fi in “définition” and “magnifiquement,” the ffi in “efficacité” and “officière,” the fl in “réflexion,” the ff in “différentes” and “souffrir,” the ffl in “soufflés” — all occurred in words where accented characters sat adjacent to the ligature sequence. This was precisely the sort of combination that exposed rendering bugs.

Then there was Ça. “The cedilla on the Ç,” Avery explained, “descends below the baseline just like a comma. Ça and Çe are pairs we must not ignore.” He added: Ça va? Garçon, un café crème, s’il vous plaît.

French typography also used guillemets instead of quotation marks. « Venez ici, » dit-elle. « Regardez la beauté de ces lettres. » The kerning between « and the following letter («V, «R, «L), and between the preceding letter and » (r», é», s»), required their own adjustments — the angular shapes of the guillemets created different spacing needs from curly quotation marks.

Spanish and Portuguese

Spanish contributed the tilde-N. El niño soñó con el año nuevo en España. Señor Muñoz enseñaba con cariño. The Ño in “niño” and “año,” the Ñu in “Muñoz,” the Eñ in “España” — the tilde sat high, potentially colliding with ascenders in the line above and altering the perceived spacing of the pair. ESPAÑA and AÑO in capitals were particularly demanding: the Ñ’s tilde could feel disconnected from the diagonal strokes of a flanking A.

Portuguese added its own accents: A tradição da nação é a educação. Três irmãos viviam em São Paulo. The ão sequence in “tradição” and “nação,” the ãos in “irmãos,” the ês in “Três” — all involved characters with tildes or circumflexes that changed vertical clearance.

Scandinavian and the Æ Ligature

The Scandinavian languages brought Å, Ø, and the Æ ligature into play. Åkesson reste till Ørsted via Ærø. Mediæval æsthetics influenced Encyclopædia entries about Cæsar.

The Åk in “Åkesson” placed a ring-above diacritical directly over the A’s apex — a collision risk with the line above. Ør in “Ørsted” combined the O-stroke with a tight r pairing. And Æ (U+00C6) was itself a ligature glyph: the visual fusion of A and E into a single character. Kerning Æ against its neighbors — Ær, Æs, Cæ, mediæ — required treating it as a wide glyph with unique sidebearings.

Typographic Punctuation

Vera looked up from her notes. “Should I add the en dash and ellipsis tests? We’ve been using em dashes everywhere, but en dashes kern differently.”

“Yes,” Avery said. “Set: pages 47–74, the years 1910–1947. The en dash sits higher than a hyphen and is narrower than an em dash, so it creates different spacing against the flanking digits.”

“And for the ellipsis: The answer was… not what he expected. ‘Well…’ she trailed off. “Vraiment…” murmured the Frenchman. The horizontal ellipsis — a single glyph at U+2026, not three periods — needs its own kerning against adjacent quotation marks, letters, and spaces. The pair …” and …’ are especially important: the ellipsis must not crash into the closing quote.”

""" CHAPTER_7 = """\ Chapter 7 – Beyond the Western Alphabet

Chapter 7
Beyond the Western Alphabet

Just when Avery thought the project was finished, Lydia Thornton-Foxwell rang with a new request. She wanted a companion volume — a survey of calligraphic traditions across Central and Eastern Europe, with chapters on Polish, Czech, Hungarian, and Turkish lettering. “The same standard of kerning,” she insisted. “Every pair, every ligature.”

Avery groaned. The Latin Extended characters — the haceks, ogoneks, acutes, and cedillas of Slavic and Turkic alphabets — would multiply his kerning tables enormously. But he was a professional. He reached for his reference books and began.

Czech Pairs

The Czech language was a minefield of diacritics. Avery set a test paragraph: Těšín leží nedaleko Třebíče. Příbram a Přerov jsou města, kde se Věra učila vědě. Čáslav leží na jih od Českého Brodu. He examined the Tě pair in “Těšín” — the crossbar of the T needed to tuck over the ě just as it would over a plain e. The Tř in “Třebíče” was trickier; the caron on the ř changed its vertical profile.

“And look at these,” he said to Vera. “Př in ‘Příbram’ and ‘Přerov’ — the overhang of the P’s bowl over the ř is critical. Vě in ‘Věra’ and ‘vědě’ — the diagonal of the V must relate correctly to the caron.”

He continued with more Czech pairs: Říjen je krásný měsíc. Řeka teče přes Řad obchodních domů. Škoda vyrábí automobily. Šťastný den! Život není žádná procházka. The Ří in “Říjen,” the Ře in “Řeka,” the Šk in “Škoda,” the Šť in “Šťastný,” the Ži in “Život,” the žá in “žádná” — each demanded individual attention. Ať he added to the list: the Czech word “ať” was tiny but the kerning between A and ť mattered in display settings.

Polish Pairs

Polish was equally demanding. Wąchock to małe miasteczko. Węgry sąsiadują z Polską. Łódź jest trzecim co do wielkości miastem. Łukasz mieszka w Łucku. Łyżka leży na stole.

The Wą in “Wąchock” was crucial — the ogonek on the ą dangled below the baseline, and the W’s diagonal had to account for it. Similarly, Wę in “Węgry” needed the same care. The Ł with its stroke was a special case: Łó in “Łódź,” Łu in “Łukasz” and “Łuck,” Ły in “Łyżka” — the horizontal bar through the L altered every right-side pairing.

Hungarian and Turkish Pairs

Hungarian brought the double-acute characters. A tőke növekedett. A vőlegény megérkezett. Fűző készítette az ételt. The Tő in “tőke” and Vő in “vőlegény” were new territory — the double acute over the ő added height that could collide with ascenders in the line above.

Turkish was another story entirely. İstanbul’da yaşıyoruz. Beyoğlu güzel bir semt. Dağdan inen yol Şişli’ye ulaşır. The İs in “İstanbul” was distinctive — the dotted capital I (İ) sat differently from a standard I. Ğa and Ğı pairs appeared in words like “dağ” (mountain), where the breve on the Ğ changed the letter’s visual weight. The Şi in “Şişli” required the cedilla of the Ş to clear the descending stroke gracefully.

Ligatures Across Extended Latin

Ligature handling grew more complex with extended characters. Avery tested sequences where fi and fl appeared near or adjacent to diacritical marks: Filozofie vyžaduje přesné myšlení. Firma z Třebíče exportuje finále do celého světa. Flétnista hrál na flétnu.

The fi in “Filozofie,” “Firma,” and “finále” all needed proper ligature joining even when surrounded by Extended-A characters. The fl in “Flétnista” and “flétnu” similarly demanded clean joins. Polish offered its own test cases: Refleks jest szybki. Oficjalny dokument leży na biurku. Afirmacja jest ważna w filozofii. The fl in “Refleks,” the fi in “Oficjalny” and “filozofii,” the ffi in “Afirmacja” — all exercised the ligature engine in a Latin Extended-A context.

Turkish added another dimension: Fikir özgürlüğün temelidir. Fişek havaya fırlatıldı. The fi in “Fikir” and “Fişek” tested whether the ligature engine correctly handled the Turkish dotless-ı (ı) and dotted-İ (İ) distinction.

French Œ and Dutch ij

Two Latin Extended-A characters were themselves ligatures by heritage. The French œ (o-e ligature) appeared in: Le cœur de l’œuvre bat au rythme des sœurs. Le bœuf traverse la manœuvre avec aplomb. Though modern French treats œ as a single letter rather than a typographic ligature, its glyph still required careful kerning against adjacent characters — the œu in “cœur,” the œv in “œuvre,” the bœ in “bœuf.”

Dutch provided the ij digraph. Het ijzer is sterk. Zij is ijverig en bijzonder vrij in haar oordeel. The ij glyph, occupying a single codepoint (U+0133), needed its own kerning entries — particularly the pairs Hij, Zij, bij, and vrij, where the preceding letter’s right-side bearing abutted the unusual shape of the ij.

Extended-A Kerning Glossary

Avery appended a supplementary glossary to his earlier catalogue:

— As in Těšín, těžký, tělo.
— As in Třebíč, třída, tři.
— As in Věra, věda, věž.
— As in Příbram, příroda, přítel.
— As in Wąchock, wąski, wąwóz.
— As in Węgry, węzeł, Węgierska.
Łó — As in Łódź, łódź, łóżko.
Łu — As in Łukasz, Łuck, łuk.
Ły — As in Łyżka, łydka, łysy.
Čá — As in Čáslav, část, čáp.
Če — As in České, český, čelo.
Ří — As in Říjen, říční, řízení.
Ře — As in Řeka, řeč, řemeslo.
Šk — As in Škoda, škála, školák.
Šť — As in Šťastný.
Ži — As in Život, živý, živnost.
Žá — As in Žádný, žák, žár.
— As in ať (Czech: “let” / “whether”).
— As in tőke, tőr, tőlegény.
— As in vőlegény, vőfél.
İs — As in İstanbul, İstiklal, İslam.
Ğa — As in dağ, yağmur, ğaraj.
Şi — As in Şişli, şifa, şirin.

“If we can kern all of these correctly,” Avery declared, “we’ll have covered every major Latin-script language in Europe and beyond. Not just the Western set — the full Latin range.”

Vera looked at the list and sighed. “I’ll put the kettle on. This is going to be a long night.”

""" CHAPTER_8 = """\ Chapter 8 – The Cyrillic Challenge

Chapter 8
The Cyrillic Challenge

The companion volume was barely off the press when Mrs. Thornton-Foxwell telephoned again. “Avery, darling, I’ve been in contact with a collector in Saint Petersburg. He wants the calligraphy survey extended to cover Cyrillic traditions — Russian, Ukrainian, Bulgarian. The same standard.”

Avery set down his coffee. Cyrillic was an entirely new script, with its own letterforms and its own kerning nightmares. Several Cyrillic letters shared shapes with their Latin counterparts — А resembled A, Р resembled P, Т resembled T — but many others were unique. He would need to kern every pair from scratch.

The Overhanging Letters

The most troublesome Cyrillic letter was Г (Ge). Its shape — a horizontal crossbar extending rightward from a vertical stem, like a reversed L — created an overhang that demanded tight kerning against every following letter. Avery set his first test: Генерал Гоголь говорил о Гусарах. Грамота Галилея поразила Германию.

The Ге in “Генерал” was critical — the crossbar of Г needed to tuck over the lowercase е without crushing it. Го in “Гоголь” demanded similar attention, as did Гу in “Гусарах” and Гр in “Грамота.” Га in “Галилея” rounded out the set.

Т (Te) presented the same challenge as its Latin twin T. Там Татьяна тихо ткала ткань. Тепло текло из Тульского камина. Три тысячи труб пели в Тяньцзинь. Every pair — Та, Те, Ту, Тр, Ти, Тя — required the T-crossbar to reach over the following lowercase letter.

Ч (Che) had a subtler overhang. Часы пробили четверть четверга. Чудо! Чорное море. The Ча, Чу, Чо pairs each had different spacing needs depending on the round or straight shape of the following vowel.

The Diagonal Letters

У (U) was the Cyrillic counterpart of the Latin Y — a letter whose diagonals created open space against adjacent characters. Уверенность Удалось укрепить. Ум устремился вперёд. The Ув, Уд, Ук, Ум pairs all needed tighter kerning than the default sidebearings provided.

А (A) and Л (El) were equally demanding. Аудитория Авиатор Атлас Адресат. Лампа Ленинград Лондон Луна. The Ау, Ав, Ат, Ад pairs mirrored the Latin AV/AW/AT family. The Л (El), with its inverted-V left stroke, created unique spacing against а, е, о, у.

Round and Complex Letters

Р (Er) was the Cyrillic P — a letter with a bowl that overhung the following character. Работа Речи России Русский. The Ра, Ре, Ро, Ру pairs echoed the Latin Pa, Pe, Po challenge.

Ф (Ef) was the widest Cyrillic letter — a circle bisected by a vertical stem. Факультет Фонтанка Фура. The Фа, Фо, Фу pairs needed generous clearance on both sides of the circle.

Д (De) had descending serifs that complicated baseline kerning. Дальний День Дома Думать. The Да, Де, До, Ду pairs were unique to Cyrillic — no Latin letter had quite the same descending structure.

Ukrainian and Bulgarian

Ukrainian added its own characters. Її мати немає рівних. Європа чекає. Ґанок виріс на Ґрунті. The Її pair (Yi + yi) tested the double-dotted characters unique to Ukrainian. Єв in “Європа” tested the Ukrainian Ye against a following consonant. Ґа and Ґр in “Ґанок” and “Ґрунті” tested the upturn-Ge (Ґ), a letter unique to Ukrainian.

Bulgarian Cyrillic had its own typographic traditions. Щука щастлива жена живееше в Железник. Юлия ютилась. The Щу pair tested the complex Shcha with its descender against a round vowel. Жа and Же tested the wide Zhe. Юл in “Юлия” placed the round Yu against the narrow El.

Cyrillic with Guillemets

Russian typography uses guillemets as quotation marks, just like French. «Говорите тише,» — сказала она. «Тихо!» «Всё будет хорошо,» — ответил он. The «Г, «Т, «В pairs — guillemet against the overhanging Ge, the crossbarred Te, and the round Ve — each needed individual spacing. On the closing side, р» and е» presented the same challenges as their Latin counterparts.

Cyrillic Kerning Glossary

Avery appended the Cyrillic pairs to his growing catalogue:

Га — Галилея, газета.
Ге — Генерал, герой.
Го — Гоголь, город.
Гу — Гусары, губерния.
Гр — Грамота, граница.
Та — Там, также, танец.
Те — Тепло, текст, тело.
То — Только, товар.
Ту — Тульский, туча.
Тр — Три, труба.
Тя — Тяньцзинь.
Ра — Работа, разум.
Ре — Речи, река.
Ро — Россия, род.
Ру — Русский, рука.
Ау — Аудитория.
Ав — Авиатор.
Ат — Атлас, атом.
Ад — Адресат.
Ув — Уверенность.
Уд — Удалось.
Ук — Укрепить.
Ум — Ум, умник.
Да — Дальний, дата.
Де — День, дело.
До — Дома, дорога.
Ла — Лампа, лавка.
Ле — Ленинград, лес.
Ло — Лондон, лодка.
Ча — Часы, чай.
Чо — Чорное, чорт.
Чу — Чудо, чувство.
Фа — Факультет.
Фо — Фонтанка.

“Cyrillic has fewer kerning traps than Latin,” Avery reflected, “but the ones it has are severe. Г and Т dominate every page of Russian text, and if they’re not kerned properly, the whole paragraph looks like it’s falling apart.”

Vera glanced at the stack of proofs — now three volumes deep — and smiled wearily. “At least there are no Cyrillic ligatures.”

“Yet,” said Avery.

""" CHAPTER_9 = """\ Chapter 9 – Latin Extended-B

Chapter 9
Latin Extended-B

Months passed. Avery had just begun to relax when the telephone rang again. This time it was not Mrs. Thornton-Foxwell but her publisher, a harried man named Grigor, who explained that the calligraphy survey had attracted interest from scholars in Bucharest, Hanoi, and Lagos. “We need Romanian, Vietnamese, and several West African languages,” he said. “Plus Croatian digraphs and Pinyin romanization. Latin Extended-B, the whole block.”

Avery looked at the Unicode chart for U+0180–U+024F and sighed. It was a miscellany: characters from a dozen unrelated traditions, each with its own typographic demands.

Romanian

Romanian was the most urgent addition. The language required two characters that looked deceptively like their Latin-1 cousins but were typographically distinct: Ș (S with comma below, U+0218) and Ț (T with comma below, U+021A). Avery set a test paragraph: Țara noastră este frumoasă. Șase sute de școlari au venit la Țărăncuța. Țesătura ținutului este unică. Șeful stației știa totul.

The Ță pair in “Țara” and “Țărăncuța” was the key test — the comma descender on Ț distinguished it from the cedilla-T (Þ) found in Turkish, but the crossbar overhang was identical. Țe in “Țesătura” and Țo demanded the same T-crossbar tucking as their ASCII equivalents. Șa and Șe in “Șase” and “Șeful” needed the comma below to clear the baseline without colliding with descenders in the line below.

“The tricky part,” Avery told Vera, “is that Romanian also uses ă (a-breve) and î (i-circumflex) from Latin-1, and ț (t-comma) interacts with both. The pair ță in ‘Țărăncuța’ tests the comma-below against the breve-above — a vertical sandwich of diacritics.”

Vietnamese

Vietnamese typography brought the horn diacritic into play. The characters Ơ (O with horn, U+01A0) and Ư (U with horn, U+01AF) appeared constantly in Vietnamese text. Avery set: Ơi ! Ngươi Việt Nam yêu thương đất nước. Vừa đẹp vừa hay. Tươi sáng rỡi.

The horn on Ơ and Ư extended to the upper right of the letter, creating potential collisions with the following character. Tư and Vư were particularly demanding: the T-crossbar or V-diagonal needed to accommodate the horn’s extra width. Similarly, Tơ placed the T’s crossbar over a horned lowercase o — the horn could crash into the crossbar at small sizes.

Croatian Digraphs

Croatian contributed its titular digraph ligatures. The Unicode block included precomposed forms: DŽ (U+01C4), Dž (U+01C5), dž (U+01C6), LJ (U+01C7), Lj (U+01C8), lj (U+01C9), NJ (U+01CA), Nj (U+01CB), nj (U+01CC). These were single codepoints representing two-letter combinations, each with unique glyph widths. Džep je velik. Ljeto je toplo. Njiva je zelena. Džamija stoji na brdu.

“These digraphs are wider than normal letters,” Avery observed. “Kerning Dž against a following lowercase vowel is unlike kerning D or Ž individually — the combined glyph has its own sidebearings. Same for Lj and Nj.”

Pinyin Tone Marks

Mandarin Chinese romanization — Pinyin — used Latin letters with caron and diaeresis-plus-tone combinations that fell squarely in Extended-B. Nǐ hǎo! Wǒ shi Zhōngguó rén. Lǚshi zhǔyi Tǐmen de fāyīn.

The ǎ (a with caron) under a T-crossbar in “Tǎmen” presented the same challenge as Czech Tě — but the Pinyin context meant it appeared in entirely different words. The diaeresis- plus-tone characters were uniquely demanding: ǖ (u-diaeresis- macron), ǘ (u-diaeresis-acute), ǚ (u-diaeresis-caron), ǜ (u-diaeresis-grave) each stacked two diacritical marks above the u, creating height that could collide with the preceding T-crossbar. Lǜshi Tǖ Vǘ — Avery set each combination and winced at the vertical crowding.

African Languages

West African languages used hooked and barred variants of familiar Latin letters. Ɓala ɓe Ɗala ɗe. Ƒarin kowa ya san. Ɛdiɲ ɔkɔ nɔ. The Ɓ (B-hook) and Ɗ (D-hook) had descending hooks that affected baseline spacing. Ƒ (F-hook) shared the overhang issues of a standard F but with an added complication: the hook at the bottom altered the letter’s center of gravity. Ƒa, Ƒo, Ƒe all needed individual attention — the hook pulled the eye downward while the crossbar demanded tuck-over kerning above.

“And the open vowels,” Avery added. “Ɛ (open E, U+0190) and ɔ (open O, U+0254) have wider apertures than their standard counterparts. Every consonant-to-open-vowel pair needs rechecking.”

Extended-B Kerning Glossary

Avery appended to his catalogue:

Ța — As in Țara, țară.
Țe — As in Țesătura, țesut.
Țo — As in Țoca, țocul.
Șa — As in Șase, șarpe.
Șe — As in Șeful, șed.
— As in Tơi, tơi sáng.
— As in Tươi, tương lai.
— As in Vơi, vơ.
— As in Vừa, vươn.
— As in Tǎmen (Pinyin).
— As in nǖ (Pinyin: female).
Ƒa — As in Ƒarin (Hausa).
Ƒo — As in Ƒoto (Hausa).

""" CHAPTER_10 = """\ Chapter 10 – Greek & Coptic

Chapter 10
Greek & Coptic

The final challenge arrived not by telephone but by post: a handwritten letter from a professor of classics at the University of Athens, requesting that the calligraphy survey include a chapter on the Greek alphabet. “The birthplace of Western lettering,” the professor wrote, “deserves proper typographic treatment.”

Avery could hardly disagree. Greek was where it all began — the ancestor of Latin, Cyrillic, and Coptic. And the Greek alphabet had its own kerning nightmares, many of them eerily familiar.

The Overhanging Letters

Γ (Gamma, U+0393) was the Greek counterpart of the Cyrillic Г and a close relative of the Latin T. Its horizontal stroke extended rightward, creating the same tuck-over demands. Avery set: Γαλήνη γαλάζια Γεωργία Γοργόνα Γραμματική Γυμνάσιο.

The Γα in “Γαλήνη” and “γαλάζια” needed tight kerning — the horizontal bar of Γ had to reach over the α without crushing it. Γε in “Γεωργία” was equally sensitive. Γο in “Γοργόνα”, Γρ in “Γραμματική”, and Γυ in “Γυμνάσιο” completed the set of Gamma’s right-side partners.

Τ (Tau, U+03A4) was structurally identical to the Latin T. Ταξίδι ταχύ Τεχνολογία Τοποθεσία Τρίγωνο Τυρί. Every pair — Τα, Τε, Το, Τρ, Τυ — demanded the crossbar to tuck over the following lowercase letter, just as in Latin.

The Diagonal Letters

Υ (Upsilon, U+03A5) mirrored the Latin Y’s diagonal challenges. Υγεία υπάρχει Υπουργός Υποθήκη. The Υγ, Υπ pairs showed the same open spacing that the Latin Y created against following lowercase letters.

Α (Alpha, U+0391) and Λ (Lambda, U+039B) were the Greek equivalents of A and an inverted V. Αυτός Ανατολή Ατλαντικός Αδελφός. Λαμπρή Λευκάδα Λονδίνο. The Αυ, Αν, Ατ, Αδ pairs followed the same diagonal-against-vertical pattern as Latin AV, AW, AT. Λα, Λε, Λο needed the inverted-V’s right stroke to relate cleanly to the following round or vertical letter.

Round and Complex Letters

Ρ (Rho, U+03A1) was the Greek P — bowl overhanging the following character. Ραδιόφωνο Ρεύμα Ροδός ρόδα. The Ρα, Ρε, Ρο pairs echoed the Latin Pa, Pe, Po and Cyrillic Ра, Ре, Ро challenges.

Φ (Phi, U+03A6) was one of the widest Greek letters — a circle bisected by a vertical stem, like the Cyrillic Ф. Φαντασία Φοίνικας φυσική. The Φα, Φο, Φυ pairs needed generous clearance for the circle’s width.

Δ (Delta, U+0394) had a triangular shape with a wide base, unlike anything in Latin. Δασκάλα Δελφοί Δούναβης διάβαση. The Δα, Δε, Δο pairs needed the wide base to relate to the following letter without excessive gaps.

Greek with Guillemets and Polytonic

Modern Greek typography, like French and Russian, uses guillemets. «Γεια σας,» είπε. «Τι κάνετε;» «Καλά,» απάντησε. The «Γ and «Τ pairs tested the guillemet against overhanging capitals, while ε» and ο» tested closing spacing.

Greek also carried a rich tradition of polytonic accents — the acute (΄), grave, circumflex, rough breathing, and smooth breathing marks that adorned classical and katharevousa texts. Ἀθήνα ἐστί μεγάλη πόλη. Ὀ κόσμος εἶναι ωραῖος. Though polytonic marks are handled by combining characters (from the U+0300 block), their visual interaction with kerning pairs remained — a breathing mark over an Alpha could encroach on the preceding or following letter’s space.

Greek in Scientific Text

Beyond natural language, Greek letters appeared constantly in scientific and mathematical prose. The wavelength λ is inversely proportional to frequency ν. The ratio π/φ appears in the golden angle. Angle θ subtends arc αβ, while Σ denotes summation and Δ denotes change.

“When Greek letters appear inline with Latin text,” Avery explained, “the kerning engine must handle cross-script pairs: Latin-T followed by Greek-α, or Greek-σ followed by a Latin comma. These hybrid pairs are rare but they matter in any book that discusses physics, mathematics, or engineering.”

Greek Kerning Glossary

Avery added the final appendix to his growing catalogue:

Γα — Γαλήνη, γαλαξίας.
Γε — Γεωργία, γερός.
Γο — Γοργόνα, γονιός.
Γυ — Γυμνάσιο, γύρος.
Γρ — Γραμματική.
Τα — Ταξίδι, ταχύ.
Τε — Τεχνολογία.
Το — Τοποθεσία.
Τυ — Τυρί, τυχερός.
Αυ — Αυτός, αυλή.
Αν — Ανατολή.
Ατ — Ατλαντικός.
Αδ — Αδελφός.
Υγ — Υγεία.
Υπ — Υπουργός.
Ρα — Ραδιόφωνο.
Ρε — Ρεύμα.
Ρο — Ροδός.
Φα — Φαντασία.
Φο — Φοίνικας.
Δα — Δασκάλα.
Δε — Δελφοί.
Λα — Λαμπρή.
Λε — Λευκάδα.
Λο — Λονδίνο.

“And with that,” Avery said, setting down his pencil for the last time, “we have covered every script from the Acropolis to the Urals, from the Rhine to the Mekong. If a typesetter can render every word in these chapters without a single miskerned pair, they have earned my respect.”

Vera closed her notebook and smiled. “Shall I put the kettle on one last time?”

“Please,” said Avery. “And make it strong.”

""" CHAPTER_11 = """\ Chapter 11 – Combining Marks

Chapter 11
Combining Marks

Avery had thought the project was finally complete when Vera placed a new stack of proofs on his desk. “These came from a different typesetter,” she explained. “Their system outputs decomposed Unicode — every accented letter is split into a base character followed by one or more combining diacritical marks.”

Avery stared. “You mean instead of ö as a single glyph, they send ö? And instead of é, they send é?”

“Exactly. The renderer has to overlay the combining mark onto the preceding base character — centred horizontally, with proper vertical clearance, and without advancing the cursor. If it gets any of that wrong, the diacritics float off into space or crash into neighbouring letters.”

Single Combining Marks

Avery began with the most common combining diacritical marks from the U+0300 block. He set each one after a simple base character to verify placement:

à (a + grave), é (e + acute), î (i + circumflex), õ (o + tilde), ü (u + diaeresis), å (a + ring above), ç (c + cedilla), ę (e + ogonek), ž (z + caron), ő (o + double acute), ā (a + macron), ĕ (e + breve), ż (z + dot above).

“Each mark must sit centred over its base character,” Avery said, “with at least a pixel of clearance between the top of the base glyph and the bottom of the combining mark. If the mark drifts left or right, the reader sees a broken letter.”

Decomposed German

He turned to German text rendered entirely in decomposed form. Every umlaut and eszett combination that had worked perfectly in Chapter 6 now needed to survive the decomposition:

Töchter saßen über den Büchern. Vögel flogen über die Wälder. Die Würde des Menschen ist unantastbar. Tänzer übten in der Türkei. Öffnung der Ämter war um zehn Uhr. Äußerst sorgfältig prüfte er die Größe der Straße.

The Tö in “Töchter” was the critical test — the T-crossbar had to kern correctly against the base o, while the combining diaeresis (U+0308) sat above without shifting the cursor. Vö in “Vögel,” Wü in “Würde,” and Tä in “Tänzer” each exercised a different kerning pair with a decomposed umlaut. The Öf in “Öffnung” tested a combining mark immediately before a double-f ligature.

Decomposed French

French offered its own decomposition challenges. Avery set the same passage from Chapter 6, but with every accent decomposed:

Fête de la République. Père Noël arriva en Février. À la recherche du café idéal. Ça va? Garçon, un café crème, s’il vous plaît.

The Fê in “Fête” placed a combining circumflex over the e after the F — both the F-overhang kerning and the mark placement had to work simultaneously. Ré in “République” tested acute placement after an R. The À in “À la” placed a combining grave accent on a capital A, which had to clear the apex of the letterform.

Combining Marks and Ligatures

The most demanding test combined decomposed diacritics with ligature sequences. In precomposed text, the ligature engine only saw single-codepoint accented letters. With decomposition, a combining mark could sit between a base character and the start of a ligature, or immediately after one:

La définition de l’efficacité réside dans la réflexion. L’officière vérifia les différentes soufflés. Il souffrit magnifiquement. Défiant toute difficulté, le greffier affirma l’efficience du système.

The fi ligature in “définition” came right after a combining acute on the e. The ffi in “efficacité” was followed by a combining acute. The fl in “réflexion” came after a combining acute. The ff in “différentes” contained a combining mark between the ligature and the following vowel. Each of these sequences tested whether the combining mark handler and the ligature engine interacted correctly.

Multiple Combining Marks

Some writing systems required two or even three combining marks on a single base character. Vietnamese was the classic example, where a vowel could carry both a diacritical mark (circumflex, horn, or breve) and a tone mark (acute, grave, hook above, tilde, or dot below):

Việt Nam yêu thứơng đất nứớc. Tời sáng rò̀i.

The ệ in “Việt” stacked a combining circumflex (U+0302) and a combining dot below (U+0323) on a single base e. Both marks had to be positioned correctly relative to the base glyph and to each other — the circumflex above and the dot below the baseline. The ứ sequences placed a combining horn (U+031B) and a combining acute (U+0301) on the same base u, testing whether the second mark used the base character’s metrics rather than the first combining mark’s.

Combining Marks in Extended Latin

The Czech and Polish texts from Chapter 7 could also appear in decomposed form. Avery set a test paragraph:

Těšín leží nedaleko Třebíče. Příbram a Přerov jsou města. Věra se učila vědě. Čáslav leží na jih od Českého Brodu. Wąchock to małe miasteczko. Węgry sąsiadują z Polską.

The Tě in “Těšín” placed a combining caron over e after the T-crossbar — the same visual result as the precomposed ě, but assembled from parts. Each subsequent caron and acute in the sentence tested a different base-plus-mark combination. The Polish ogonek (U+0328) in “Wąchock” and “Węgry” tested a below-baseline combining mark, which had to clear descenders in the line below without disrupting the W kerning.

Combining Marks with Capitals

Capital letters presented additional challenges because their greater height left less room for marks above. Avery tested each common combining mark on capitals:

À propos. Ágnes. Âme. Ão. Ärger. Åkesson. Ǎlef. Ève. Émile. Être. Ìtalo. Íngrid. Île. Òslo. Óscar. Ôter. Õtelo. Öffnung. Ùbald. Último. Ûnion. Übung. Ñoquí.

The combining marks on capitals sat higher than on lowercase letters, and each mark needed to clear the top of the letterform. In particular, Ä (A + combining diaeresis) and Ö (O + combining diaeresis) had to match their precomposed equivalents Ä and Ö visually — any discrepancy would be immediately obvious to the reader.

Precomposed vs. Decomposed Comparison

As a final verification, Avery set the same sentence in both forms, one after the other, so the typesetter could compare them directly:

Precomposed: Töchter übten in der Türkei. Vögel flogen über die Wälder. Fête de la République. À la recherche du café. Ça va?

Decomposed: Töchter übten in der Türkei. Vögel flogen über die Wälder. Fête de la République. À la recherche du café. Ça va?

“If those two lines are indistinguishable on screen,” Avery said, “the combining mark renderer is working correctly. Any difference in spacing, vertical position, or glyph alignment means something is wrong.”

Vera studied both lines through the loupe. “They look identical to me.”

Extended Latin Composition

“But what about the Latin Extended-A characters?” Vera asked. “The old composition table only covered grave, acute, circumflex, tilde, diaeresis, and cedilla. Characters like ě (e-caron), ř (r-caron), ą (a-ogonek), ł (l-stroke), and ű (u-double-acute) were never composed from decomposed input.”

Precomposed: Těšín leží nedaleko Třebíče. Příbram a Přerov jsou města. Věra se učila vědě. Čáslav leží na jih od Českého Brodu.

Decomposed: Těšín lěží nedaleko Třebíče. Příbram a Přerov jsou města. Věra se učila vědě. Čáslav lěží na jih od Českého Brodu.

Precomposed: Wąchock to małe miasteczko. Węgry sąsiadują z Polską. Gşrün Über Şen Ďále.

Decomposed: Wąchock to małe miasteczko. Węgry sąsiadują z Polską. Gşrün Über Şen Ďále.

“With the new composition table these should be indistinguishable,” Avery said. “Carons, ogoneks, cedillas, double acutes — all composed from their parts into the same precomposed codepoints the font expects.”

“Then we’re done,” Avery said. “Eleven chapters, four scripts, three hundred kerning pairs, two dozen ligature sequences, and now combining marks. If the renderer survives all of that, it can handle anything a publisher throws at it.”

He set down his pencil and reached for his coffee. It was cold.

""" COVER_XHTML = """\ Cover Kerning & Ligature Edge Cases """ STYLESHEET = """\ body { font-family: serif; margin: 2em; line-height: 1.6; } h1 { font-size: 1.5em; text-align: center; margin-bottom: 1.5em; line-height: 1.3; } h2 { font-size: 1.15em; margin-top: 1.5em; margin-bottom: 0.5em; } p { text-indent: 1.5em; margin: 0.25em 0; text-align: justify; } blockquote p { text-indent: 0; margin: 0.5em 1.5em; font-style: italic; } """ CONTAINER_XML = """\ """ CONTENT_OPF = f"""\ urn:uuid:{BOOK_UUID} {TITLE} {AUTHOR} en {DATE} {DATE}T00:00:00Z """ TOC_XHTML = """\ Table of Contents

Kerning & Ligature Edge Cases

""" def build_epub(output_path: str): cover_data = create_cover_image() with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zf: zf.writestr("mimetype", "application/epub+zip", compress_type=zipfile.ZIP_STORED) zf.writestr("META-INF/container.xml", CONTAINER_XML) zf.writestr("OEBPS/content.opf", CONTENT_OPF) zf.writestr("OEBPS/toc.xhtml", TOC_XHTML) zf.writestr("OEBPS/style.css", STYLESHEET) zf.writestr("OEBPS/cover.jpg", cover_data) zf.writestr("OEBPS/cover.xhtml", COVER_XHTML) zf.writestr("OEBPS/chapter1.xhtml", CHAPTER_1) zf.writestr("OEBPS/chapter2.xhtml", CHAPTER_2) zf.writestr("OEBPS/chapter3.xhtml", CHAPTER_3) zf.writestr("OEBPS/chapter4.xhtml", CHAPTER_4) zf.writestr("OEBPS/chapter5.xhtml", CHAPTER_5) zf.writestr("OEBPS/chapter6.xhtml", CHAPTER_6) zf.writestr("OEBPS/chapter7.xhtml", CHAPTER_7) zf.writestr("OEBPS/chapter8.xhtml", CHAPTER_8) zf.writestr("OEBPS/chapter9.xhtml", CHAPTER_9) zf.writestr("OEBPS/chapter10.xhtml", CHAPTER_10) zf.writestr("OEBPS/chapter11.xhtml", CHAPTER_11) print(f"EPUB written to {output_path}") if __name__ == "__main__": project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) out = os.path.join(project_root, "test", "epubs", "test_kerning_ligature.epub") os.makedirs(os.path.dirname(out), exist_ok=True) build_epub(out)