1710 lines
86 KiB
Python
1710 lines
86 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
Generate a small EPUB with prose that exercises kerning and ligature edge cases.
|
|||
|
|
|
|||
|
|
Kerning pairs targeted (Basic Latin — "western" scope, ASCII):
|
|||
|
|
AV, AW, AY, AT, AC, AG, AO, AQ, AU
|
|||
|
|
FA, FO, Fe, Fo, Fr, Fy
|
|||
|
|
LT, LV, LW, LY
|
|||
|
|
PA, Pe, Po
|
|||
|
|
TA, Te, To, Tr, Ty, Tu, Ta, Tw
|
|||
|
|
VA, Ve, Vo, Vy, Va
|
|||
|
|
WA, We, Wo, Wa, Wy
|
|||
|
|
YA, Ya, Ye, Yo, Yu
|
|||
|
|
Av, Aw, Ay
|
|||
|
|
ov, oy, ow, ox
|
|||
|
|
rv, ry, rw
|
|||
|
|
"r." "r," (right-side space after r)
|
|||
|
|
f., f,
|
|||
|
|
|
|||
|
|
Kerning pairs targeted (Latin-1 Supplement — "western" scope, non-ASCII):
|
|||
|
|
Tö, Tü, Tä (German: Töchter, Türkei, Tänzer)
|
|||
|
|
Vö, Vä (German: Vögel, Väter)
|
|||
|
|
Wü, Wö (German: Würde, Wörter)
|
|||
|
|
Fü, Fé, Fê (German/French: Für, Février, Fête)
|
|||
|
|
Äu (German: Äußerst)
|
|||
|
|
Öf (German: Öffnung — also exercises ff ligature)
|
|||
|
|
Üb (German: Über)
|
|||
|
|
Àl, Àp (French: À la, À propos)
|
|||
|
|
Pè, Pé (French: Père, Pétanque)
|
|||
|
|
Ré (French: République, Rémy)
|
|||
|
|
Ño, Ñu (Spanish: niño, Muñoz)
|
|||
|
|
Eñ (Spanish: España)
|
|||
|
|
Ça, Çe (French: Ça, Garçon)
|
|||
|
|
Åk (Scandinavian: Åkesson)
|
|||
|
|
Ør (Scandinavian: Ørsted)
|
|||
|
|
Æs, Cæ (Scandinavian/archaic: Cæsar, æsthetic)
|
|||
|
|
ße, ßb (German: Straße, weißblau)
|
|||
|
|
«L, «V, r», é» (guillemets: « and »)
|
|||
|
|
„G, ‚W (German-style low-9 quotation marks)
|
|||
|
|
…" (horizontal ellipsis adjacent to quotes)
|
|||
|
|
|
|||
|
|
Kerning pairs targeted (Latin Extended-A — "latin" scope additions):
|
|||
|
|
Tě, Tř (Czech: Těšín, Třebíč)
|
|||
|
|
Vě (Czech: Věra, věda)
|
|||
|
|
Př (Czech: Příbram, příroda)
|
|||
|
|
Wą, Wę (Polish: Wąchock, Węgry)
|
|||
|
|
Łó, Łu, Ły (Polish: Łódź, Łukasz, łyżka)
|
|||
|
|
Čá, Če (Czech: Čáslav, České)
|
|||
|
|
Ří, Řa, Ře (Czech: Říjen, Řád, Řeka)
|
|||
|
|
Šk, Št (Czech/Slovak: Škoda, Šťastný)
|
|||
|
|
Ží, Žá (Czech: život, žádný)
|
|||
|
|
Ať (Czech)
|
|||
|
|
Tő, Vő (Hungarian: tőke, vőlegény)
|
|||
|
|
İs (Turkish: İstanbul)
|
|||
|
|
Ğa, Ğı (Turkish: dağ, Beyoğlu)
|
|||
|
|
|
|||
|
|
Ligature sequences targeted (ASCII):
|
|||
|
|
fi, fl, ff, ffi, ffl, ft, fb, fh, fj, fk
|
|||
|
|
st, ct (historical)
|
|||
|
|
Th (common Th ligature)
|
|||
|
|
|
|||
|
|
Ligature sequences in Latin-1 Supplement context:
|
|||
|
|
fi adjacent to accented chars: définition, magnifique, officière
|
|||
|
|
fl adjacent to accented chars: réflexion, soufflé
|
|||
|
|
ff adjacent to accented chars: Öffnung, différent, souffrir
|
|||
|
|
ffi adjacent to accented chars: efficacité, officière
|
|||
|
|
ffl adjacent to accented chars: soufflé
|
|||
|
|
Æ/æ (U+00C6/U+00E6): Cæsar, Ærø, mediæval, encyclopædia, æsthetic
|
|||
|
|
|
|||
|
|
Ligature sequences in Latin Extended-A context:
|
|||
|
|
fi near Extended-A chars: filozofie, firma, finále, fikir
|
|||
|
|
fl near Extended-A chars: flétnista, flétna, refleks
|
|||
|
|
ff near Extended-A chars: offikás
|
|||
|
|
œ (U+0153): cœur, sœur, œuvre, bœuf, manœuvre
|
|||
|
|
ij (U+0133): ijzer, vrij, bijzonder, ijverig
|
|||
|
|
|
|||
|
|
Kerning pairs targeted (Latin Extended-B — U+0180–024F):
|
|||
|
|
Ța, Țe, Țo, Țu (Romanian: T-comma overhang, like T)
|
|||
|
|
Șa, Șe, Și (Romanian: S-comma descender)
|
|||
|
|
Tș, Vș (Latin T/V followed by Romanian s-comma)
|
|||
|
|
Tơ, Vơ, Tư, Vư (Vietnamese: horn diacritics under T/V overhangs)
|
|||
|
|
Ƒa, Ƒo, Ƒe (African: F-hook pairs)
|
|||
|
|
DŽ, Dž, LJ, Lj, NJ, Nj (Croatian digraph ligatures)
|
|||
|
|
Tǎ, Tǒ, Tǔ (Pinyin: caron vowels under T overhang)
|
|||
|
|
Tǖ, Tǘ, Tǚ, Tǜ (Pinyin: u-diaeresis with tone marks)
|
|||
|
|
|
|||
|
|
Kerning pairs targeted (Greek & Coptic — U+0370–03FF):
|
|||
|
|
Γα, Γε, Γο, Γυ, Γρ (Γ overhang, like Latin T / Cyrillic Г)
|
|||
|
|
Τα, Τε, Το, Τυ, Τρ (Τ overhang, identical to Latin T)
|
|||
|
|
Αυ, Αν, Ατ, Αδ (Α diagonal, like Latin A)
|
|||
|
|
Υα, Υε, Υο (Υ diagonal, like Latin Y)
|
|||
|
|
Ρα, Ρε, Ρο (Ρ bowl, like Latin P)
|
|||
|
|
Φα, Φο, Φυ (Φ wide circular)
|
|||
|
|
Δα, Δε, Δο (Δ triangular base)
|
|||
|
|
Λα, Λε, Λο (Λ inverted-V)
|
|||
|
|
«Γ, «Τ, ε», ο» (guillemets in Greek context)
|
|||
|
|
|
|||
|
|
Kerning pairs targeted (Cyrillic — U+0400–04FF):
|
|||
|
|
Ге, Го, Гу, Га, Гр (Г has overhanging crossbar like T/F)
|
|||
|
|
Та, Те, То, Ту, Тр, Ті, Тя (Т = Latin T shape)
|
|||
|
|
Ра, Ре, Ро, Ру (Р = Latin P shape)
|
|||
|
|
Ау, Ав, Ат, Ад (А = Latin A shape)
|
|||
|
|
Ув, Уд, Ук, Ум (У = Latin Y shape — diagonal)
|
|||
|
|
Фа, Фо, Фу (Ф = wide circular letter)
|
|||
|
|
Да, Де, До, Ду (Д has descending serifs)
|
|||
|
|
Ла, Ле, Ло, Лу (Л = inverted V shape)
|
|||
|
|
Ча, Чо, Чу (Ч has overhanging stroke)
|
|||
|
|
«Г, «Т, «В, р», е» (guillemets in Cyrillic context)
|
|||
|
|
Ukrainian: Її, Єв, Ґа
|
|||
|
|
Bulgarian: Щу, Жа, Юл
|
|||
|
|
|
|||
|
|
Combining marks targeted (U+0300–U+036F — Combining Diacritical Marks):
|
|||
|
|
U+0300 grave, U+0301 acute, U+0302 circumflex, U+0303 tilde
|
|||
|
|
U+0304 macron, U+0306 breve, U+0307 dot above, U+0308 diaeresis
|
|||
|
|
U+030A ring above, U+030B double acute, U+030C caron
|
|||
|
|
U+0323 dot below (Vietnamese stacking)
|
|||
|
|
U+0327 cedilla, U+0328 ogonek
|
|||
|
|
U+031B horn (Vietnamese)
|
|||
|
|
|
|||
|
|
Decomposed equivalents of precomposed characters (NFD vs NFC):
|
|||
|
|
o+U+0308 vs ö, e+U+0301 vs é, e+U+0302 vs ê, a+U+0300 vs à, etc.
|
|||
|
|
Multiple combining marks on one base character:
|
|||
|
|
e+U+0302+U+0323 (Vietnamese ệ), u+U+031B+U+0301, etc.
|
|||
|
|
Combining marks adjacent to kerning pairs:
|
|||
|
|
To+U+0308 (decomposed Tö), Vo+U+0308, Wu+U+0308, etc.
|
|||
|
|
Combining marks adjacent to ligature sequences:
|
|||
|
|
de+U+0301+fi (définition), re+U+0301+fl (réflexion), etc.
|
|||
|
|
Extended Latin-A decomposed compositions:
|
|||
|
|
e+U+030C (ě), r+U+030C (ř), a+U+0328 (ą), s+U+0327 (ş), D+U+030C (Ď), etc.
|
|||
|
|
Precomposed vs decomposed side-by-side comparison (Latin-1 and Extended-A)
|
|||
|
|
|
|||
|
|
Also includes:
|
|||
|
|
Quotes around kerning-sensitive letters (e.g. "AWAY", "Typography")
|
|||
|
|
Numerals with kerning (10, 17, 74, 47)
|
|||
|
|
Punctuation adjacency (T., V., W., Y.)
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import io
|
|||
|
|
import os
|
|||
|
|
import zipfile
|
|||
|
|
import uuid
|
|||
|
|
from datetime import datetime
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
from PIL import Image, ImageDraw, ImageFont
|
|||
|
|
except ImportError:
|
|||
|
|
print("Please install Pillow: pip install Pillow")
|
|||
|
|
exit(1)
|
|||
|
|
|
|||
|
|
|
|||
|
|
_PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|||
|
|
_BOOKERLY_FONT = os.path.join(
|
|||
|
|
_PROJECT_ROOT, "lib", "EpdFont", "builtinFonts", "source",
|
|||
|
|
"Bookerly", "Bookerly-Regular.ttf",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _get_font(size=20):
|
|||
|
|
"""Get the Bookerly font at the requested size, with system fallbacks."""
|
|||
|
|
paths = [_BOOKERLY_FONT]
|
|||
|
|
for path in paths:
|
|||
|
|
try:
|
|||
|
|
return ImageFont.truetype(path, size)
|
|||
|
|
except (OSError, IOError):
|
|||
|
|
continue
|
|||
|
|
return ImageFont.load_default(size)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _draw_text_centered(draw, y, text, font, fill, width):
|
|||
|
|
bbox = draw.textbbox((0, 0), text, font=font)
|
|||
|
|
text_width = bbox[2] - bbox[0]
|
|||
|
|
x = (width - text_width) // 2
|
|||
|
|
draw.text((x, y), text, font=font, fill=fill)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def create_cover_image():
|
|||
|
|
"""Generate a cover image matching the original layout and return JPEG bytes."""
|
|||
|
|
width, height = 536, 800
|
|||
|
|
bg_color = (30, 42, 58)
|
|||
|
|
text_color = (225, 220, 205)
|
|||
|
|
|
|||
|
|
img = Image.new("RGB", (width, height), bg_color)
|
|||
|
|
draw = ImageDraw.Draw(img)
|
|||
|
|
|
|||
|
|
font_title = _get_font(72)
|
|||
|
|
font_subtitle = _get_font(26)
|
|||
|
|
font_author = _get_font(14)
|
|||
|
|
font_ornament = _get_font(64)
|
|||
|
|
|
|||
|
|
title_lines = ["Kerning", "& Ligature", "Edge Cases"]
|
|||
|
|
title_y = 92
|
|||
|
|
for line in title_lines:
|
|||
|
|
_draw_text_centered(draw, title_y, line, font_title, text_color, width)
|
|||
|
|
title_y += 90
|
|||
|
|
|
|||
|
|
ornament_y = title_y + 10
|
|||
|
|
_draw_text_centered(draw, ornament_y, "*", font_ornament, text_color, width)
|
|||
|
|
|
|||
|
|
subtitle_y = ornament_y + 72
|
|||
|
|
_draw_text_centered(draw, subtitle_y, "A Typographer\u2019s Compendium",
|
|||
|
|
font_subtitle, text_color, width)
|
|||
|
|
|
|||
|
|
_draw_text_centered(draw, height - 70, "CROSSPOINT TEST FIXTURES",
|
|||
|
|
font_author, text_color, width)
|
|||
|
|
|
|||
|
|
buf = io.BytesIO()
|
|||
|
|
img.save(buf, "JPEG", quality=90)
|
|||
|
|
return buf.getvalue()
|
|||
|
|
|
|||
|
|
BOOK_UUID = str(uuid.uuid4())
|
|||
|
|
TITLE = "Kerning & Ligature Edge Cases"
|
|||
|
|
AUTHOR = "Crosspoint Test Fixtures"
|
|||
|
|
DATE = datetime.now().strftime("%Y-%m-%d")
|
|||
|
|
|
|||
|
|
# ── XHTML content pages ──────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
CHAPTER_1 = """\
|
|||
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
|
<!DOCTYPE html>
|
|||
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|||
|
|
<head><title>Chapter 1 – The Typographer's Affliction</title>
|
|||
|
|
<link rel="stylesheet" type="text/css" href="style.css"/></head>
|
|||
|
|
<body>
|
|||
|
|
<h1>Chapter 1<br/>The Typographer’s Affliction</h1>
|
|||
|
|
|
|||
|
|
<p>AVERY WATT always wanted to be a typographer. Years of careful study
|
|||
|
|
at Yale had taught him that every typeface holds a secret: the negative
|
|||
|
|
space between letters matters as much as the strokes themselves. “AWAY
|
|||
|
|
with sloppy kerning!” he would thunder at his apprentices, waving a
|
|||
|
|
proof sheet covered in red annotations.</p>
|
|||
|
|
|
|||
|
|
<p>The office of <i>Watt & Yardley, Fine Typography</i> occupied the top
|
|||
|
|
floor of an old factory on Waverly Avenue. On the frosted glass of the
|
|||
|
|
door, gold leaf spelled WATT & YARDLEY in Caslon capitals. Beneath it,
|
|||
|
|
in smaller letters: <i>Purveyors of Tasteful Composition.</i></p>
|
|||
|
|
|
|||
|
|
<p>Today Avery sat at his desk, frowning at a page of proofs. The client
|
|||
|
|
— a wealthy patron named Lydia Thornton-Foxwell — had commissioned
|
|||
|
|
a lavish coffee-table volume on the history of calligraphy. It was the
|
|||
|
|
sort of project Avery loved: difficult, fussy, and likely to be
|
|||
|
|
appreciated by fewer than forty people on Earth.</p>
|
|||
|
|
|
|||
|
|
<p>“Look at this,” he muttered to his assistant, Vera Young. He tapped
|
|||
|
|
the offending line with a pencil. “The ‘AW’ pair in DRAWN is too
|
|||
|
|
loose. And the ‘To’ in ‘Towards’ — the overhang of the T-crossbar
|
|||
|
|
should tuck over the lowercase o. This is first-rate typeface work; we
|
|||
|
|
can’t afford sloppy fit.”</p>
|
|||
|
|
|
|||
|
|
<p>Vera adjusted her glasses and peered at the proof. “You’re right. The
|
|||
|
|
‘Ty’ in ‘Typography’ also looks off. And further down — see the
|
|||
|
|
‘VA’ in ‘VAULTED’? The diagonals aren’t meshing at all.”</p>
|
|||
|
|
|
|||
|
|
<p>“Exactly!” Avery slapped the desk. “We’ll need to revisit every pair:
|
|||
|
|
AV, AW, AT, AY, FA, Fe, LT, LV, LW, LY, PA, TA, Te, To, Tu, Tw, VA,
|
|||
|
|
Ve, Vo, WA, Wa, YA, Ya — the whole catalogue. I want this volume to be
|
|||
|
|
flawless.”</p>
|
|||
|
|
|
|||
|
|
<p>He leaned back and stared at the ceiling. Forty-seven years of
|
|||
|
|
typesetting had left Avery with impeccable standards and a permanent
|
|||
|
|
squint. He could spot a miskerned ‘AT’ pair from across the room.
|
|||
|
|
“Fetch the reference sheets,” he told Vera. “And coffee. Strong
|
|||
|
|
coffee.”</p>
|
|||
|
|
</body>
|
|||
|
|
</html>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
CHAPTER_2 = """\
|
|||
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
|
<!DOCTYPE html>
|
|||
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|||
|
|
<head><title>Chapter 2 – Ligatures in the Afflicted Offices</title>
|
|||
|
|
<link rel="stylesheet" type="text/css" href="style.css"/></head>
|
|||
|
|
<body>
|
|||
|
|
<h1>Chapter 2<br/>Ligatures in the Afflicted Offices</h1>
|
|||
|
|
|
|||
|
|
<p>The first difficulty arose with ligatures. Avery was fiercely attached
|
|||
|
|
to the classic <i>fi</i> and <i>fl</i> ligatures — the ones where the
|
|||
|
|
terminal of the f swings gracefully into the dot of the i or the
|
|||
|
|
ascender of the l. Without them, he felt, the page looked ragged and
|
|||
|
|
unfinished.</p>
|
|||
|
|
|
|||
|
|
<p>“A fine figure of a man,” he read aloud from the proofs, testing the
|
|||
|
|
fi combination. “The daffodils in the field were in full flower, their
|
|||
|
|
ruffled petals fluttering in the stiff breeze.” He nodded — the fi
|
|||
|
|
and fl joins looked clean. But then he frowned. “What about the
|
|||
|
|
double-f ligatures? ‘Affixed,’ ‘baffled,’ ‘scaffolding,’
|
|||
|
|
‘offload’ — we need the ff, ffi, and ffl forms.”</p>
|
|||
|
|
|
|||
|
|
<p>Vera flipped through the character map. “The typeface supports ff, fi,
|
|||
|
|
fl, ffi, and ffl. But I’m not sure about the rarer ones — ft, fb,
|
|||
|
|
fh, fj, fk.”</p>
|
|||
|
|
|
|||
|
|
<p>“Test them,” Avery said. “Set a line: <i>The loft’s rooftop offered a
|
|||
|
|
deft, soft refuge.</i> That gives us ft. Now try: <i>halfback, offbeat.</i>
|
|||
|
|
That’s fb. For fh: <i>The wolfhound sniffed the foxhole.</i> And fj —
|
|||
|
|
well, that’s mostly in loanwords. <i>Fjord</i> and <i>fjeld</i> are the
|
|||
|
|
usual suspects. Fk is almost nonexistent in English; skip it.”</p>
|
|||
|
|
|
|||
|
|
<p>Vera typed dutifully. “What about the historical st and ct ligatures?
|
|||
|
|
I know some revival faces include them.”</p>
|
|||
|
|
|
|||
|
|
<p>“Yes! The ‘st’ ligature in words like <i>first, strongest, last,
|
|||
|
|
masterful, fastidious</i> — it gives the page a lovely archaic flavour.
|
|||
|
|
And ‘ct’ in <i>strictly, perfectly, tactful, connected, architectural,
|
|||
|
|
instructed.</i> Mrs. Thornton-Foxwell specifically requested them.”</p>
|
|||
|
|
|
|||
|
|
<p>He paused, then added: “And don’t forget the Th ligature. The word
|
|||
|
|
‘The’ appears thousands of times in any book. If we can join the T and
|
|||
|
|
the h into a graceful Th, the texture of every page improves. Set
|
|||
|
|
<i>The thrush sat on the thatched roof of the theatre, thinking.</i>
|
|||
|
|
There — Th six times in one sentence.”</p>
|
|||
|
|
</body>
|
|||
|
|
</html>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
CHAPTER_3 = """\
|
|||
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
|
<!DOCTYPE html>
|
|||
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|||
|
|
<head><title>Chapter 3 – The Proof of the Pudding</title>
|
|||
|
|
<link rel="stylesheet" type="text/css" href="style.css"/></head>
|
|||
|
|
<body>
|
|||
|
|
<h1>Chapter 3<br/>The Proof of the Pudding</h1>
|
|||
|
|
|
|||
|
|
<p>Two weeks later, the revised proofs arrived. Avery carried them to the
|
|||
|
|
window and held them up to the light. The paper was a beautiful warm
|
|||
|
|
ivory, the ink a deep, true black.</p>
|
|||
|
|
|
|||
|
|
<p>He began to read, his eye scanning every pair. “AWAY TO YESTERDAY”
|
|||
|
|
ran the chapter title, in large capitals. The AW was tight, the AY
|
|||
|
|
tucked in, the TO well-fitted, the YE elegantly kerned. He exhaled
|
|||
|
|
slowly.</p>
|
|||
|
|
|
|||
|
|
<p>“Page fourteen,” he murmured. “<i>After years of toil, the faithful
|
|||
|
|
craftsman affixed the final flourish to the magnificent oak
|
|||
|
|
panel.</i>” The fi in <i>faithful</i>, the ffi in <i>affixed</i>, the fi in
|
|||
|
|
<i>final</i>, the fl in <i>flourish</i>, the fi in <i>magnificent</i> — all were
|
|||
|
|
perfectly joined. The ft in <i>craftsman</i> and <i>after</i> showed a subtle
|
|||
|
|
but satisfying connection.</p>
|
|||
|
|
|
|||
|
|
<p>He turned to page seventeen. The text was denser here, a scholarly
|
|||
|
|
passage on the evolution of letterforms. <i>Effective typographic
|
|||
|
|
practice requires an officer’s efficiency and a professor’s
|
|||
|
|
perfectionism. Suffice it to say that afflicted typesetters often find
|
|||
|
|
themselves baffled by the sheer profusion of difficulties.</i></p>
|
|||
|
|
|
|||
|
|
<p>Avery counted: the passage contained <i>ff</i> four times, <i>fi</i> six
|
|||
|
|
times, <i>ffl</i> once (in “baffled” — wait, no, that was ff+l+ed), and
|
|||
|
|
<i>ffi</i> twice (in “officer’s” and “efficiency”). He smiled. The
|
|||
|
|
ligatures were holding up perfectly.</p>
|
|||
|
|
|
|||
|
|
<p>The kerning was impeccable too. In the word “ATAVISTIC” — set as a
|
|||
|
|
pull-quote in small capitals — the AT pair was snug, the AV nestled
|
|||
|
|
tightly, and the TI showed just the right clearance. Lower down, a
|
|||
|
|
passage about calligraphers in various countries offered a feast of
|
|||
|
|
tricky pairs:</p>
|
|||
|
|
|
|||
|
|
<blockquote><p><i>Twelve Welsh calligraphers traveled to Avignon, where they
|
|||
|
|
studied Venetian lettering techniques. Years later, they returned to
|
|||
|
|
Pwllheli, Tywyn, and Aberystwyth, bringing with them a wealth of
|
|||
|
|
knowledge about vowel placement, Tuscan ornament, and Lombardic
|
|||
|
|
versals.</i></p></blockquote>
|
|||
|
|
|
|||
|
|
<p>The Tw in <i>Twelve</i>, the We in <i>Welsh</i>, the Av in <i>Avignon</i>, the Ve
|
|||
|
|
in <i>Venetian</i>, the Ye in <i>Years</i>, the Ty in <i>Tywyn</i>, the Tu in
|
|||
|
|
<i>Tuscan</i>, the Lo in <i>Lombardic</i> — every pair sat comfortably on the
|
|||
|
|
baseline, with not a hair’s breadth of excess space.</p>
|
|||
|
|
</body>
|
|||
|
|
</html>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
CHAPTER_4 = """\
|
|||
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
|
<!DOCTYPE html>
|
|||
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|||
|
|
<head><title>Chapter 4 – Punctuation and Numerals</title>
|
|||
|
|
<link rel="stylesheet" type="text/css" href="style.css"/></head>
|
|||
|
|
<body>
|
|||
|
|
<h1>Chapter 4<br/>Punctuation and Numerals</h1>
|
|||
|
|
|
|||
|
|
<p>“Now for the tricky part,” Avery said, reaching for a loupe. Kerning
|
|||
|
|
around punctuation was notoriously fiddly. A period after a capital V
|
|||
|
|
or W or Y could leave an ugly gap; a comma after an r or an f needed
|
|||
|
|
careful attention.</p>
|
|||
|
|
|
|||
|
|
<p>He set a test passage: <i>Dr. Foxwell arrived at 7:47 a.m. on the 14th
|
|||
|
|
of November. “Truly,” she declared, “your work is perfect.” “We
|
|||
|
|
try,” Avery replied, “but perfection is elusive.”</i></p>
|
|||
|
|
|
|||
|
|
<p>The r-comma in “your,” the r-period in “Dr.” and “Mr.”, the
|
|||
|
|
f-period in “Prof.” — all needed to be set so that the punctuation
|
|||
|
|
didn’t drift too far from the preceding letter. Avery had seen
|
|||
|
|
appalling examples where the period after a V seemed to float in space,
|
|||
|
|
marooned from the word it belonged to.</p>
|
|||
|
|
|
|||
|
|
<p>“V. S. Naipaul,” he muttered, setting the name in various sizes.
|
|||
|
|
“W. B. Yeats. T. S. Eliot. P. G. Wodehouse. F. Scott Fitzgerald.
|
|||
|
|
Y. Mishima.” Each initial-period-space sequence was a potential trap.
|
|||
|
|
At display sizes the gaps yawned; at text sizes they could vanish
|
|||
|
|
into a murky blur.</p>
|
|||
|
|
|
|||
|
|
<p>Numerals brought their own challenges. The figures 1, 4, and 7 were
|
|||
|
|
the worst offenders — their open shapes created awkward spacing next to
|
|||
|
|
rounder digits. “Set these,” Avery instructed: <i>10, 17, 47, 74, 114,
|
|||
|
|
747, 1471.</i> Vera typed them in both tabular and proportional figures.
|
|||
|
|
The tabular set looked even but wasteful; the proportional set was
|
|||
|
|
compact but needed kerning between 7 and 4, and between 1 and 7.</p>
|
|||
|
|
|
|||
|
|
<p>“And fractions,” Avery added. “Try ½, ¼, ¾, and the arbitrary
|
|||
|
|
ones: 3/8, 5/16, 7/32. The virgule kerning against the numerals is
|
|||
|
|
always a headache.”</p>
|
|||
|
|
|
|||
|
|
<p>By five o’clock they had tested every combination Avery could think
|
|||
|
|
of. The proofs, now bristling with pencil marks and sticky notes, were
|
|||
|
|
ready for the foundry. “Tomorrow,” Avery said, “we tackle the italic
|
|||
|
|
and the bold. And after that — the small capitals.”</p>
|
|||
|
|
|
|||
|
|
<p>Vera groaned. “You’re a perfectionist, Avery Watt.”</p>
|
|||
|
|
|
|||
|
|
<p>“Naturally,” he replied. “That’s what they pay us for.”</p>
|
|||
|
|
</body>
|
|||
|
|
</html>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
CHAPTER_5 = """\
|
|||
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
|
<!DOCTYPE html>
|
|||
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|||
|
|
<head><title>Chapter 5 – A Glossary of Troublesome Pairs</title>
|
|||
|
|
<link rel="stylesheet" type="text/css" href="style.css"/></head>
|
|||
|
|
<body>
|
|||
|
|
<h1>Chapter 5<br/>A Glossary of Troublesome Pairs</h1>
|
|||
|
|
|
|||
|
|
<p>As a final flourish, Avery drafted an appendix for the volume: a
|
|||
|
|
glossary of every kerning pair and ligature that had given him grief
|
|||
|
|
over forty-seven years. Vera typed it up while Avery dictated.</p>
|
|||
|
|
|
|||
|
|
<h2>Kerning Pairs</h2>
|
|||
|
|
|
|||
|
|
<p><b>AV</b> — As in AVID, AVIARY, AVOCADO, TRAVESTY, CAVALIER.<br/>
|
|||
|
|
<b>AW</b> — As in AWAY, AWARD, AWNING, DRAWN, BRAWL, SHAWL.<br/>
|
|||
|
|
<b>AY</b> — As in AYAH, LAYER, PLAYER, PRAYER, BAYONET.<br/>
|
|||
|
|
<b>AT</b> — As in ATLAS, ATTIC, LATERAL, WATER, PLATTER.<br/>
|
|||
|
|
<b>AC</b> — As in ACORN, ACCURATE, BACON, PLACATE.<br/>
|
|||
|
|
<b>AG</b> — As in AGAIN, AGATE, DRAGON, STAGGER.<br/>
|
|||
|
|
<b>AO</b> — As in KAOLIN, PHARAOH, EXTRAORDINARY.<br/>
|
|||
|
|
<b>AQ</b> — As in AQUA, AQUIFER, AQUILINE, OPAQUE.<br/>
|
|||
|
|
<b>AU</b> — As in AUTHOR, AUTUMN, HAUL, VAULT.<br/>
|
|||
|
|
<b>FA</b> — As in FACE, FACTOR, SOFA, AFFAIR.<br/>
|
|||
|
|
<b>FO</b> — As in FOLLOW, FORCE, COMFORT, BEFORE.<br/>
|
|||
|
|
<b>Fe</b> — As in February, feline, festival.<br/>
|
|||
|
|
<b>Fo</b> — As in Forsyth, forever, fortune.<br/>
|
|||
|
|
<b>Fr</b> — As in France, fragile, friction.<br/>
|
|||
|
|
<b>Fy</b> — As in Fyodor, fytte.<br/>
|
|||
|
|
<b>LT</b> — As in ALTITUDE, EXALT, RESULT, VAULT.<br/>
|
|||
|
|
<b>LV</b> — As in SILVER, SOLVE, INVOLVE, VALVE.<br/>
|
|||
|
|
<b>LW</b> — As in ALWAYS, RAILWAY, HALLWAY.<br/>
|
|||
|
|
<b>LY</b> — As in TRULY, ONLY, HOLY, UGLY.<br/>
|
|||
|
|
<b>PA</b> — As in PACE, PALACE, COMPANION, SEPARATE.<br/>
|
|||
|
|
<b>TA</b> — As in TABLE, TASTE, GUITAR, FATAL.<br/>
|
|||
|
|
<b>Te</b> — As in Ten, temple, tender.<br/>
|
|||
|
|
<b>To</b> — As in Tomorrow, together, towards.<br/>
|
|||
|
|
<b>Tr</b> — As in Travel, trouble, triumph.<br/>
|
|||
|
|
<b>Tu</b> — As in Tuesday, tulip, tumble.<br/>
|
|||
|
|
<b>Tw</b> — As in Twelve, twenty, twilight.<br/>
|
|||
|
|
<b>Ty</b> — As in Tyrant, typical, type.<br/>
|
|||
|
|
<b>VA</b> — As in VALUE, VAGUE, CANVAS, OVAL.<br/>
|
|||
|
|
<b>Ve</b> — As in Venice, verse, venture.<br/>
|
|||
|
|
<b>Vo</b> — As in Voice, volume, voyage.<br/>
|
|||
|
|
<b>Wa</b> — As in Water, watch, wander.<br/>
|
|||
|
|
<b>We</b> — As in Welcome, weather, welfare.<br/>
|
|||
|
|
<b>Wo</b> — As in Wonder, worry, worship.<br/>
|
|||
|
|
<b>Ya</b> — As in Yard, yacht, yawn.<br/>
|
|||
|
|
<b>Ye</b> — As in Yellow, yesterday, yeoman.<br/>
|
|||
|
|
<b>Yo</b> — As in Young, yoke, yoga.<br/>
|
|||
|
|
<b>Yu</b> — As in Yukon, Yugoslavia, yule.</p>
|
|||
|
|
|
|||
|
|
<h2>Ligatures</h2>
|
|||
|
|
|
|||
|
|
<p><b>fi</b> — fifty, fiction, filter,efinite, affirm, magnify.<br/>
|
|||
|
|
<b>fl</b> — flag, flair, flame, floor, influence, reflect.<br/>
|
|||
|
|
<b>ff</b> — affair, affect, affirm, afford, buffalo, coffin, daffodil,
|
|||
|
|
differ, effect, effort, offend, offer, office, scaffold, stiff,
|
|||
|
|
suffocate, traffic, waffle.<br/>
|
|||
|
|
<b>ffi</b> — affidavit, affiliated, affirmative, baffling (wait — that
|
|||
|
|
is ffl!), coefficient, coffin, daffiness, diffident, efficient,
|
|||
|
|
fficacy, muffin, officious, paraffin, sufficient, trafficking.<br/>
|
|||
|
|
<b>ffl</b> — affluent, baffled,ffle, offload, piffle, raffle, riffle,
|
|||
|
|
ruffle, scaffold, scuffle, shuffle, sniffle, stiffly, truffle,
|
|||
|
|
waffle.<br/>
|
|||
|
|
<b>ft</b> — after, craft, deft, drift, gift, left, loft, raft, shaft,
|
|||
|
|
shift, soft, swift, theft, tuft, waft.<br/>
|
|||
|
|
<b>fb</b> — halfback, offbeat, surfboard.<br/>
|
|||
|
|
<b>fh</b> — wolfhound, cliffhanger, halfhearted.<br/>
|
|||
|
|
<b>st</b> — strong, first, last, must, fast, mist, ghost, roast, trust,
|
|||
|
|
artist, honest, forest, harvest, modest.<br/>
|
|||
|
|
<b>ct</b> — act, fact, strict, direct, perfect, connect, collect,
|
|||
|
|
distinct, instruct, architect, effect, exact, expect.<br/>
|
|||
|
|
<b>Th</b> — The, This, That, There, Their, They, Than, Though, Through,
|
|||
|
|
Thought, Thousand, Thrive, Throne, Thatch.</p>
|
|||
|
|
|
|||
|
|
<p>“There,” Avery said, setting down his pencil. “If a typesetter can
|
|||
|
|
handle every word in that glossary without a single misfit, miskerned,
|
|||
|
|
or malformed glyph, they deserve their weight in Garamond.”</p>
|
|||
|
|
</body>
|
|||
|
|
</html>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
CHAPTER_6 = """\
|
|||
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
|
<!DOCTYPE html>
|
|||
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|||
|
|
<head><title>Chapter 6 – Western European Accents</title>
|
|||
|
|
<link rel="stylesheet" type="text/css" href="style.css"/></head>
|
|||
|
|
<body>
|
|||
|
|
<h1>Chapter 6<br/>Western European Accents</h1>
|
|||
|
|
|
|||
|
|
<p>Before the calligraphy volume was even bound, Mrs. Thornton-Foxwell
|
|||
|
|
rang with a revision. Half the captions were in French and German, the
|
|||
|
|
bibliography included Scandinavian and Spanish sources, and the whole
|
|||
|
|
thing needed to work in those languages too. “The accented characters,”
|
|||
|
|
she said. “They must be perfect.”</p>
|
|||
|
|
|
|||
|
|
<p>Avery sighed. The Latin-1 Supplement block — the accented vowels,
|
|||
|
|
cedillas, tildes, and special letters of Western European typography
|
|||
|
|
— would double his kerning workload. Every pair he had already
|
|||
|
|
perfected for plain ASCII now had accented variants.</p>
|
|||
|
|
|
|||
|
|
<h2>German Pairs</h2>
|
|||
|
|
|
|||
|
|
<p>German was the first test. Avery set a paragraph: <i>Töchter saßen
|
|||
|
|
über den Büchern. Vögel flogen über die Wälder. Die Würde
|
|||
|
|
des Menschen ist unantastbar. Tänzer übten in der Türkei.</i>
|
|||
|
|
The Tö in “Töchter” was telling — the umlaut dots on the
|
|||
|
|
ö sat precisely where the crossbar of the T wanted to extend.
|
|||
|
|
Vö in “Vögel” had a similar conflict: the V’s diagonal
|
|||
|
|
met the ö at an angle that the umlaut dots complicated. Wü in
|
|||
|
|
“Würde” and Wö in “Wörter” each demanded individual
|
|||
|
|
adjustment. Tü in “Türkei” and Tä in “Tänzer”
|
|||
|
|
added two more accented vowels to the T’s already long list of
|
|||
|
|
right-side partners.</p>
|
|||
|
|
|
|||
|
|
<p>“And don’t forget Öffnung,” Avery said. “The Öf pair is
|
|||
|
|
tricky enough, but ‘Öffnung’ also contains an ff ligature right
|
|||
|
|
after the umlaut. A double test.” He set more examples: <i>Äußerst
|
|||
|
|
sorgfältig prüfte er die Größe der Straße. Für die
|
|||
|
|
Grüße seiner Füße brauchte er Maßband.</i> The Äu
|
|||
|
|
in “Äußerst,” the Fü in “Für,” the Grü in
|
|||
|
|
“Grüße” — every pairing of accented vowels against
|
|||
|
|
consonants needed attention. The ß (eszett) in “Straße,”
|
|||
|
|
“Grüße,” and “Füße” had its own right-side bearing
|
|||
|
|
issues: ße and ßb in “weißblau” required careful attention,
|
|||
|
|
as the eszett’s unusual tail affected spacing against the
|
|||
|
|
following letter. Üb in “Über” and “Übung” placed
|
|||
|
|
an umlaut directly over the narrow U, which could collide with
|
|||
|
|
ascenders in the line above.</p>
|
|||
|
|
|
|||
|
|
<p>German punctuation style added another layer of complexity.
|
|||
|
|
„Guten Tag,“ sagte er. ‚Warum nicht?‘ The low opening
|
|||
|
|
quotes — „ (U+201E) and ‚ (U+201A) — sat on the baseline
|
|||
|
|
rather than hanging near the cap height, changing the spacing dynamics
|
|||
|
|
against the following capital letter. The „G pair, the
|
|||
|
|
‚W pair — these were entirely different animals from their
|
|||
|
|
English-style “G and ‘W counterparts.</p>
|
|||
|
|
|
|||
|
|
<h2>French Pairs</h2>
|
|||
|
|
|
|||
|
|
<p>French was rich in accented characters. <i>Fête de la République.
|
|||
|
|
Père Noël arriva en Février. À la recherche du
|
|||
|
|
café idéal. À propos de rien.</i> The Fê in
|
|||
|
|
“Fête,” the Pè in “Père,” the Fé in
|
|||
|
|
“Février,” the Àl in “À la,” the Àp in
|
|||
|
|
“À propos” — each involved a diacritical mark that could
|
|||
|
|
interfere with kerning. The Ré in “République” needed the
|
|||
|
|
accent on the É to clear the shoulder of the R.</p>
|
|||
|
|
|
|||
|
|
<p>French also offered excellent ligature-with-accent test cases:
|
|||
|
|
<i>La définition de l’efficacité réside dans la
|
|||
|
|
réflexion. L’officière vérifia les différentes
|
|||
|
|
soufflés. Il souffrit magnifiquement.</i> The fi in
|
|||
|
|
“définition” and “magnifiquement,” the ffi in
|
|||
|
|
“efficacité” and “officière,” the fl in
|
|||
|
|
“réflexion,” the ff in “différentes” and
|
|||
|
|
“souffrir,” the ffl in “soufflés” — all occurred in
|
|||
|
|
words where accented characters sat adjacent to the ligature sequence.
|
|||
|
|
This was precisely the sort of combination that exposed rendering
|
|||
|
|
bugs.</p>
|
|||
|
|
|
|||
|
|
<p>Then there was Ça. “The cedilla on the Ç,” Avery explained,
|
|||
|
|
“descends below the baseline just like a comma. Ça and Çe are
|
|||
|
|
pairs we must not ignore.” He added: <i>Ça va? Garçon, un
|
|||
|
|
café crème, s’il vous plaît.</i></p>
|
|||
|
|
|
|||
|
|
<p>French typography also used guillemets instead of quotation marks.
|
|||
|
|
« Venez ici, » dit-elle. « Regardez la
|
|||
|
|
beauté de ces lettres. » The kerning between « and the
|
|||
|
|
following letter («V, «R, «L), and between the preceding
|
|||
|
|
letter and » (r», é», s»), required their own
|
|||
|
|
adjustments — the angular shapes of the guillemets created different
|
|||
|
|
spacing needs from curly quotation marks.</p>
|
|||
|
|
|
|||
|
|
<h2>Spanish and Portuguese</h2>
|
|||
|
|
|
|||
|
|
<p>Spanish contributed the tilde-N. <i>El niño soñó con el
|
|||
|
|
año nuevo en España. Señor Muñoz enseñaba con
|
|||
|
|
cariño.</i> The Ño in “niño” and “año,” the
|
|||
|
|
Ñu in “Muñoz,” the Eñ in “España” — the
|
|||
|
|
tilde sat high, potentially colliding with ascenders in the line above
|
|||
|
|
and altering the perceived spacing of the pair. ESPAÑA and AÑO
|
|||
|
|
in capitals were particularly demanding: the Ñ’s tilde could
|
|||
|
|
feel disconnected from the diagonal strokes of a flanking A.</p>
|
|||
|
|
|
|||
|
|
<p>Portuguese added its own accents: <i>A tradição da nação
|
|||
|
|
é a educação. Três irmãos viviam em São Paulo.</i>
|
|||
|
|
The ão sequence in “tradição” and “nação,”
|
|||
|
|
the ãos in “irmãos,” the ês in “Três” — all
|
|||
|
|
involved characters with tildes or circumflexes that changed vertical
|
|||
|
|
clearance.</p>
|
|||
|
|
|
|||
|
|
<h2>Scandinavian and the Æ Ligature</h2>
|
|||
|
|
|
|||
|
|
<p>The Scandinavian languages brought Å, Ø, and the Æ ligature
|
|||
|
|
into play. <i>Åkesson reste till Ørsted via Ærø.
|
|||
|
|
Mediæval æsthetics influenced Encyclopædia entries about
|
|||
|
|
Cæsar.</i></p>
|
|||
|
|
|
|||
|
|
<p>The Åk in “Åkesson” placed a ring-above diacritical directly
|
|||
|
|
over the A’s apex — a collision risk with the line above. Ør in
|
|||
|
|
“Ørsted” combined the O-stroke with a tight r pairing. And
|
|||
|
|
Æ (U+00C6) was itself a ligature glyph: the visual fusion of A and E
|
|||
|
|
into a single character. Kerning Æ against its neighbors —
|
|||
|
|
Ær, Æs, Cæ, mediæ — required treating it as a wide glyph
|
|||
|
|
with unique sidebearings.</p>
|
|||
|
|
|
|||
|
|
<h2>Typographic Punctuation</h2>
|
|||
|
|
|
|||
|
|
<p>Vera looked up from her notes. “Should I add the en dash and ellipsis
|
|||
|
|
tests? We’ve been using em dashes everywhere, but en dashes kern
|
|||
|
|
differently.”</p>
|
|||
|
|
|
|||
|
|
<p>“Yes,” Avery said. “Set: <i>pages 47–74, the years
|
|||
|
|
1910–1947.</i> The en dash sits higher than a hyphen and is narrower
|
|||
|
|
than an em dash, so it creates different spacing against the flanking
|
|||
|
|
digits.”</p>
|
|||
|
|
|
|||
|
|
<p>“And for the ellipsis: <i>The answer was… not what he expected.
|
|||
|
|
‘Well…’ she trailed off. “Vraiment…”
|
|||
|
|
murmured the Frenchman.</i> The horizontal ellipsis — a single glyph
|
|||
|
|
at U+2026, not three periods — needs its own kerning against adjacent
|
|||
|
|
quotation marks, letters, and spaces. The pair …” and
|
|||
|
|
…’ are especially important: the ellipsis must not crash
|
|||
|
|
into the closing quote.”</p>
|
|||
|
|
</body>
|
|||
|
|
</html>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
CHAPTER_7 = """\
|
|||
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
|
<!DOCTYPE html>
|
|||
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|||
|
|
<head><title>Chapter 7 – Beyond the Western Alphabet</title>
|
|||
|
|
<link rel="stylesheet" type="text/css" href="style.css"/></head>
|
|||
|
|
<body>
|
|||
|
|
<h1>Chapter 7<br/>Beyond the Western Alphabet</h1>
|
|||
|
|
|
|||
|
|
<p>Just when Avery thought the project was finished, Lydia Thornton-Foxwell
|
|||
|
|
rang with a new request. She wanted a companion volume — a survey of
|
|||
|
|
calligraphic traditions across Central and Eastern Europe, with chapters
|
|||
|
|
on Polish, Czech, Hungarian, and Turkish lettering. “The same standard
|
|||
|
|
of kerning,” she insisted. “Every pair, every ligature.”</p>
|
|||
|
|
|
|||
|
|
<p>Avery groaned. The Latin Extended characters — the haceks, ogoneks,
|
|||
|
|
acutes, and cedillas of Slavic and Turkic alphabets — would multiply
|
|||
|
|
his kerning tables enormously. But he was a professional. He reached
|
|||
|
|
for his reference books and began.</p>
|
|||
|
|
|
|||
|
|
<h2>Czech Pairs</h2>
|
|||
|
|
|
|||
|
|
<p>The Czech language was a minefield of diacritics. Avery set a test
|
|||
|
|
paragraph: <i>Těšín leží nedaleko Třebíče. Příbram a Přerov
|
|||
|
|
jsou města, kde se Věra učila vědě. Čáslav leží
|
|||
|
|
na jih od Českého Brodu.</i> He examined the
|
|||
|
|
Tě pair in “Těšín” — the crossbar of the T needed to tuck
|
|||
|
|
over the ě just as it would over a plain e. The Tř in
|
|||
|
|
“Třebíče” was trickier; the caron on the ř changed its
|
|||
|
|
vertical profile.</p>
|
|||
|
|
|
|||
|
|
<p>“And look at these,” he said to Vera. “Př in ‘Příbram’
|
|||
|
|
and ‘Přerov’ — the overhang of the P’s bowl over the ř
|
|||
|
|
is critical. Vě in ‘Věra’ and ‘vědě’ — the
|
|||
|
|
diagonal of the V must relate correctly to the caron.”</p>
|
|||
|
|
|
|||
|
|
<p>He continued with more Czech pairs: <i>Říjen je krásný měsíc.
|
|||
|
|
Řeka teče přes Řad obchodních domů. Škoda vyrábí
|
|||
|
|
automobily. Šťastný den! Život není žádná procházka.</i>
|
|||
|
|
The Ří in “Říjen,” the Ře in “Řeka,” the Šk in
|
|||
|
|
“Škoda,” the Šť in “Šťastný,” the Ži in
|
|||
|
|
“Život,” the žá in “žádná” — each demanded
|
|||
|
|
individual attention. Ať he added to the list: the Czech word
|
|||
|
|
“ať” was tiny but the kerning between A and ť mattered in
|
|||
|
|
display settings.</p>
|
|||
|
|
|
|||
|
|
<h2>Polish Pairs</h2>
|
|||
|
|
|
|||
|
|
<p>Polish was equally demanding. <i>Wąchock to małe miasteczko.
|
|||
|
|
Węgry sąsiadują z Polską. Łódź jest trzecim co do
|
|||
|
|
wielkości miastem. Łukasz mieszka w Łucku. Łyżka
|
|||
|
|
leży na stole.</i></p>
|
|||
|
|
|
|||
|
|
<p>The Wą in “Wąchock” was crucial — the ogonek on the
|
|||
|
|
ą dangled below the baseline, and the W’s diagonal had to
|
|||
|
|
account for it. Similarly, Wę in “Węgry” needed the same
|
|||
|
|
care. The Ł with its stroke was a special case: Łó in
|
|||
|
|
“Łódź,” Łu in “Łukasz” and “Łuck,” Ły in
|
|||
|
|
“Łyżka” — the horizontal bar through the L altered every
|
|||
|
|
right-side pairing.</p>
|
|||
|
|
|
|||
|
|
<h2>Hungarian and Turkish Pairs</h2>
|
|||
|
|
|
|||
|
|
<p>Hungarian brought the double-acute characters. <i>A tőke
|
|||
|
|
növekedett. A vőlegény megérkezett. Fűző
|
|||
|
|
készítette az ételt.</i> The Tő in “tőke”
|
|||
|
|
and Vő in “vőlegény” were new territory — the double
|
|||
|
|
acute over the ő added height that could collide with ascenders
|
|||
|
|
in the line above.</p>
|
|||
|
|
|
|||
|
|
<p>Turkish was another story entirely. <i>İstanbul’da yaşıyoruz.
|
|||
|
|
Beyoğlu güzel bir semt. Dağdan inen yol
|
|||
|
|
Şişli’ye ulaşır.</i> The İs in “İstanbul”
|
|||
|
|
was distinctive — the dotted capital I (İ) sat differently from a
|
|||
|
|
standard I. Ğa and Ğı pairs appeared in words like
|
|||
|
|
“dağ” (mountain), where the breve on the Ğ changed the
|
|||
|
|
letter’s visual weight. The Şi in “Şişli”
|
|||
|
|
required the cedilla of the Ş to clear the descending stroke
|
|||
|
|
gracefully.</p>
|
|||
|
|
|
|||
|
|
<h2>Ligatures Across Extended Latin</h2>
|
|||
|
|
|
|||
|
|
<p>Ligature handling grew more complex with extended characters. Avery
|
|||
|
|
tested sequences where fi and fl appeared near or adjacent to
|
|||
|
|
diacritical marks: <i>Filozofie vyžaduje přesné
|
|||
|
|
myšlení. Firma z Třebíče exportuje finále
|
|||
|
|
do celého světa. Flétnista hrál na
|
|||
|
|
flétnu.</i></p>
|
|||
|
|
|
|||
|
|
<p>The fi in “Filozofie,” “Firma,” and “finále”
|
|||
|
|
all needed proper ligature joining even when surrounded by Extended-A
|
|||
|
|
characters. The fl in “Flétnista” and “flétnu”
|
|||
|
|
similarly demanded clean joins. Polish offered its own test cases:
|
|||
|
|
<i>Refleks jest szybki. Oficjalny dokument leży na biurku.
|
|||
|
|
Afirmacja jest ważna w filozofii.</i> The fl in
|
|||
|
|
“Refleks,” the fi in “Oficjalny” and “filozofii,”
|
|||
|
|
the ffi in “Afirmacja” — all exercised the ligature engine in
|
|||
|
|
a Latin Extended-A context.</p>
|
|||
|
|
|
|||
|
|
<p>Turkish added another dimension: <i>Fikir özgürlüğün
|
|||
|
|
temelidir. Fişek havaya fırlatıldı.</i> The fi in
|
|||
|
|
“Fikir” and “Fişek” tested whether the ligature engine
|
|||
|
|
correctly handled the Turkish dotless-ı (ı) and
|
|||
|
|
dotted-İ (İ) distinction.</p>
|
|||
|
|
|
|||
|
|
<h2>French Œ and Dutch ij</h2>
|
|||
|
|
|
|||
|
|
<p>Two Latin Extended-A characters were themselves ligatures by heritage.
|
|||
|
|
The French œ (o-e ligature) appeared in: <i>Le cœur de l’œuvre
|
|||
|
|
bat au rythme des sœurs. Le bœuf traverse la manœuvre
|
|||
|
|
avec aplomb.</i> Though modern French treats œ as a single
|
|||
|
|
letter rather than a typographic ligature, its glyph still required
|
|||
|
|
careful kerning against adjacent characters — the œu in
|
|||
|
|
“cœur,” the œv in “œuvre,” the bœ in
|
|||
|
|
“bœuf.”</p>
|
|||
|
|
|
|||
|
|
<p>Dutch provided the ij digraph. <i>Het ijzer is sterk. Zij is ijverig en
|
|||
|
|
bijzonder vrij in haar oordeel.</i> The ij glyph, occupying a single
|
|||
|
|
codepoint (U+0133), needed its own kerning entries — particularly
|
|||
|
|
the pairs Hij, Zij, bij, and vrij, where the preceding letter’s
|
|||
|
|
right-side bearing abutted the unusual shape of the ij.</p>
|
|||
|
|
|
|||
|
|
<h2>Extended-A Kerning Glossary</h2>
|
|||
|
|
|
|||
|
|
<p>Avery appended a supplementary glossary to his earlier catalogue:</p>
|
|||
|
|
|
|||
|
|
<p><b>Tě</b> — As in Těšín, těžký, tělo.<br/>
|
|||
|
|
<b>Tř</b> — As in Třebíč, třída, tři.<br/>
|
|||
|
|
<b>Vě</b> — As in Věra, věda, věž.<br/>
|
|||
|
|
<b>Př</b> — As in Příbram, příroda, přítel.<br/>
|
|||
|
|
<b>Wą</b> — As in Wąchock, wąski, wąwóz.<br/>
|
|||
|
|
<b>Wę</b> — As in Węgry, węzeł, Węgierska.<br/>
|
|||
|
|
<b>Łó</b> — As in Łódź, łódź, łóżko.<br/>
|
|||
|
|
<b>Łu</b> — As in Łukasz, Łuck, łuk.<br/>
|
|||
|
|
<b>Ły</b> — As in Łyżka, łydka, łysy.<br/>
|
|||
|
|
<b>Čá</b> — As in Čáslav, část, čáp.<br/>
|
|||
|
|
<b>Če</b> — As in České, český, čelo.<br/>
|
|||
|
|
<b>Ří</b> — As in Říjen, říční, řízení.<br/>
|
|||
|
|
<b>Ře</b> — As in Řeka, řeč, řemeslo.<br/>
|
|||
|
|
<b>Šk</b> — As in Škoda, škála, školák.<br/>
|
|||
|
|
<b>Šť</b> — As in Šťastný.<br/>
|
|||
|
|
<b>Ži</b> — As in Život, živý, živnost.<br/>
|
|||
|
|
<b>Žá</b> — As in Žádný, žák, žár.<br/>
|
|||
|
|
<b>Ať</b> — As in ať (Czech: “let” / “whether”).<br/>
|
|||
|
|
<b>Tő</b> — As in tőke, tőr, tőlegény.<br/>
|
|||
|
|
<b>Vő</b> — As in vőlegény, vőfél.<br/>
|
|||
|
|
<b>İs</b> — As in İstanbul, İstiklal, İslam.<br/>
|
|||
|
|
<b>Ğa</b> — As in dağ, yağmur, ğaraj.<br/>
|
|||
|
|
<b>Şi</b> — As in Şişli, şifa, şirin.</p>
|
|||
|
|
|
|||
|
|
<p>“If we can kern all of these correctly,” Avery declared,
|
|||
|
|
“we’ll have covered every major Latin-script language in
|
|||
|
|
Europe and beyond. Not just the Western set — the full Latin
|
|||
|
|
range.”</p>
|
|||
|
|
|
|||
|
|
<p>Vera looked at the list and sighed. “I’ll put the kettle on.
|
|||
|
|
This is going to be a long night.”</p>
|
|||
|
|
</body>
|
|||
|
|
</html>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
CHAPTER_8 = """\
|
|||
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
|
<!DOCTYPE html>
|
|||
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|||
|
|
<head><title>Chapter 8 – The Cyrillic Challenge</title>
|
|||
|
|
<link rel="stylesheet" type="text/css" href="style.css"/></head>
|
|||
|
|
<body>
|
|||
|
|
<h1>Chapter 8<br/>The Cyrillic Challenge</h1>
|
|||
|
|
|
|||
|
|
<p>The companion volume was barely off the press when Mrs. Thornton-Foxwell
|
|||
|
|
telephoned again. “Avery, darling, I’ve been in contact with
|
|||
|
|
a collector in Saint Petersburg. He wants the calligraphy survey
|
|||
|
|
extended to cover Cyrillic traditions — Russian, Ukrainian, Bulgarian.
|
|||
|
|
The same standard.”</p>
|
|||
|
|
|
|||
|
|
<p>Avery set down his coffee. Cyrillic was an entirely new script, with its
|
|||
|
|
own letterforms and its own kerning nightmares. Several Cyrillic letters
|
|||
|
|
shared shapes with their Latin counterparts — А resembled A,
|
|||
|
|
Р resembled P, Т resembled T — but many others were
|
|||
|
|
unique. He would need to kern every pair from scratch.</p>
|
|||
|
|
|
|||
|
|
<h2>The Overhanging Letters</h2>
|
|||
|
|
|
|||
|
|
<p>The most troublesome Cyrillic letter was Г (Ge). Its shape —
|
|||
|
|
a horizontal crossbar extending rightward from a vertical stem, like a
|
|||
|
|
reversed L — created an overhang that demanded tight kerning against
|
|||
|
|
every following letter. Avery set his first test: <i>Генерал
|
|||
|
|
Гоголь говорил о Гусарах.
|
|||
|
|
Грамота Галилея
|
|||
|
|
поразила Германию.</i></p>
|
|||
|
|
|
|||
|
|
<p>The Ге in “Генерал” was
|
|||
|
|
critical — the crossbar of Г needed to tuck over the
|
|||
|
|
lowercase е without crushing it. Го in
|
|||
|
|
“Гоголь” demanded similar attention, as did
|
|||
|
|
Гу in “Гусарах” and Гр in
|
|||
|
|
“Грамота.” Га in
|
|||
|
|
“Галилея” rounded out the set.</p>
|
|||
|
|
|
|||
|
|
<p>Т (Te) presented the same challenge as its Latin twin T. <i>Там
|
|||
|
|
Татьяна тихо ткала
|
|||
|
|
ткань. Тепло текло из
|
|||
|
|
Тульского камина.
|
|||
|
|
Три тысячи труб
|
|||
|
|
пели в Тяньцзинь.</i>
|
|||
|
|
Every pair — Та, Те, Ту, Тр,
|
|||
|
|
Ти, Тя — required the T-crossbar to reach over the
|
|||
|
|
following lowercase letter.</p>
|
|||
|
|
|
|||
|
|
<p>Ч (Che) had a subtler overhang. <i>Часы
|
|||
|
|
пробили четверть
|
|||
|
|
четверга. Чудо!
|
|||
|
|
Чорное море.</i> The
|
|||
|
|
Ча, Чу, Чо pairs each had different spacing needs
|
|||
|
|
depending on the round or straight shape of the following vowel.</p>
|
|||
|
|
|
|||
|
|
<h2>The Diagonal Letters</h2>
|
|||
|
|
|
|||
|
|
<p>У (U) was the Cyrillic counterpart of the Latin Y — a letter whose
|
|||
|
|
diagonals created open space against adjacent characters. <i>Уверенность
|
|||
|
|
Удалось укрепить.
|
|||
|
|
Ум устремился вперёд.</i>
|
|||
|
|
The Ув, Уд, Ук, Ум pairs all
|
|||
|
|
needed tighter kerning than the default sidebearings provided.</p>
|
|||
|
|
|
|||
|
|
<p>А (A) and Л (El) were equally demanding. <i>Аудитория
|
|||
|
|
Авиатор Атлас
|
|||
|
|
Адресат. Лампа
|
|||
|
|
Ленинград Лондон
|
|||
|
|
Луна.</i> The Ау, Ав, Ат,
|
|||
|
|
Ад pairs mirrored the Latin AV/AW/AT family. The Л (El),
|
|||
|
|
with its inverted-V left stroke, created unique spacing against
|
|||
|
|
а, е, о, у.</p>
|
|||
|
|
|
|||
|
|
<h2>Round and Complex Letters</h2>
|
|||
|
|
|
|||
|
|
<p>Р (Er) was the Cyrillic P — a letter with a bowl that overhung
|
|||
|
|
the following character. <i>Работа
|
|||
|
|
Речи России
|
|||
|
|
Русский.</i> The Ра,
|
|||
|
|
Ре, Ро, Ру pairs echoed the Latin Pa, Pe, Po
|
|||
|
|
challenge.</p>
|
|||
|
|
|
|||
|
|
<p>Ф (Ef) was the widest Cyrillic letter — a circle bisected by a
|
|||
|
|
vertical stem. <i>Факультет
|
|||
|
|
Фонтанка Фура.</i>
|
|||
|
|
The Фа, Фо, Фу pairs needed generous clearance
|
|||
|
|
on both sides of the circle.</p>
|
|||
|
|
|
|||
|
|
<p>Д (De) had descending serifs that complicated baseline kerning.
|
|||
|
|
<i>Дальний День
|
|||
|
|
Дома Думать.</i> The
|
|||
|
|
Да, Де, До, Ду pairs were unique to
|
|||
|
|
Cyrillic — no Latin letter had quite the same descending structure.</p>
|
|||
|
|
|
|||
|
|
<h2>Ukrainian and Bulgarian</h2>
|
|||
|
|
|
|||
|
|
<p>Ukrainian added its own characters. <i>Її мати
|
|||
|
|
немає рівних.
|
|||
|
|
Європа чекає.
|
|||
|
|
Ґанок виріс
|
|||
|
|
на Ґрунті.</i>
|
|||
|
|
The Її pair (Yi + yi) tested the double-dotted characters
|
|||
|
|
unique to Ukrainian. Єв in “Європа”
|
|||
|
|
tested the Ukrainian Ye against a following consonant. Ґа and
|
|||
|
|
Ґр in “Ґанок” and
|
|||
|
|
“Ґрунті” tested the upturn-Ge
|
|||
|
|
(Ґ), a letter unique to Ukrainian.</p>
|
|||
|
|
|
|||
|
|
<p>Bulgarian Cyrillic had its own typographic traditions. <i>Щука
|
|||
|
|
щастлива жена
|
|||
|
|
живееше в Железник.
|
|||
|
|
Юлия ютилась.</i>
|
|||
|
|
The Щу pair tested the complex Shcha with its descender
|
|||
|
|
against a round vowel. Жа and Же tested the wide Zhe.
|
|||
|
|
Юл in “Юлия” placed the round Yu
|
|||
|
|
against the narrow El.</p>
|
|||
|
|
|
|||
|
|
<h2>Cyrillic with Guillemets</h2>
|
|||
|
|
|
|||
|
|
<p>Russian typography uses guillemets as quotation marks, just like French.
|
|||
|
|
«Говорите тише,»
|
|||
|
|
— сказала она.
|
|||
|
|
«Тихо!»
|
|||
|
|
«Всё будет
|
|||
|
|
хорошо,» —
|
|||
|
|
ответил он.
|
|||
|
|
The «Г, «Т, «В pairs — guillemet
|
|||
|
|
against the overhanging Ge, the crossbarred Te, and the round Ve —
|
|||
|
|
each needed individual spacing. On the closing side, р» and
|
|||
|
|
е» presented the same challenges as their Latin counterparts.</p>
|
|||
|
|
|
|||
|
|
<h2>Cyrillic Kerning Glossary</h2>
|
|||
|
|
|
|||
|
|
<p>Avery appended the Cyrillic pairs to his growing catalogue:</p>
|
|||
|
|
|
|||
|
|
<p><b>Га</b> — Галилея, газета.<br/>
|
|||
|
|
<b>Ге</b> — Генерал, герой.<br/>
|
|||
|
|
<b>Го</b> — Гоголь, город.<br/>
|
|||
|
|
<b>Гу</b> — Гусары, губерния.<br/>
|
|||
|
|
<b>Гр</b> — Грамота, граница.<br/>
|
|||
|
|
<b>Та</b> — Там, также, танец.<br/>
|
|||
|
|
<b>Те</b> — Тепло, текст, тело.<br/>
|
|||
|
|
<b>То</b> — Только, товар.<br/>
|
|||
|
|
<b>Ту</b> — Тульский, туча.<br/>
|
|||
|
|
<b>Тр</b> — Три, труба.<br/>
|
|||
|
|
<b>Тя</b> — Тяньцзинь.<br/>
|
|||
|
|
<b>Ра</b> — Работа, разум.<br/>
|
|||
|
|
<b>Ре</b> — Речи, река.<br/>
|
|||
|
|
<b>Ро</b> — Россия, род.<br/>
|
|||
|
|
<b>Ру</b> — Русский, рука.<br/>
|
|||
|
|
<b>Ау</b> — Аудитория.<br/>
|
|||
|
|
<b>Ав</b> — Авиатор.<br/>
|
|||
|
|
<b>Ат</b> — Атлас, атом.<br/>
|
|||
|
|
<b>Ад</b> — Адресат.<br/>
|
|||
|
|
<b>Ув</b> — Уверенность.<br/>
|
|||
|
|
<b>Уд</b> — Удалось.<br/>
|
|||
|
|
<b>Ук</b> — Укрепить.<br/>
|
|||
|
|
<b>Ум</b> — Ум, умник.<br/>
|
|||
|
|
<b>Да</b> — Дальний, дата.<br/>
|
|||
|
|
<b>Де</b> — День, дело.<br/>
|
|||
|
|
<b>До</b> — Дома, дорога.<br/>
|
|||
|
|
<b>Ла</b> — Лампа, лавка.<br/>
|
|||
|
|
<b>Ле</b> — Ленинград, лес.<br/>
|
|||
|
|
<b>Ло</b> — Лондон, лодка.<br/>
|
|||
|
|
<b>Ча</b> — Часы, чай.<br/>
|
|||
|
|
<b>Чо</b> — Чорное, чорт.<br/>
|
|||
|
|
<b>Чу</b> — Чудо, чувство.<br/>
|
|||
|
|
<b>Фа</b> — Факультет.<br/>
|
|||
|
|
<b>Фо</b> — Фонтанка.</p>
|
|||
|
|
|
|||
|
|
<p>“Cyrillic has fewer kerning traps than Latin,” Avery reflected,
|
|||
|
|
“but the ones it has are severe. Г and Т dominate every
|
|||
|
|
page of Russian text, and if they’re not kerned properly, the whole
|
|||
|
|
paragraph looks like it’s falling apart.”</p>
|
|||
|
|
|
|||
|
|
<p>Vera glanced at the stack of proofs — now three volumes deep —
|
|||
|
|
and smiled wearily. “At least there are no Cyrillic ligatures.”</p>
|
|||
|
|
|
|||
|
|
<p>“Yet,” said Avery.</p>
|
|||
|
|
</body>
|
|||
|
|
</html>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
CHAPTER_9 = """\
|
|||
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
|
<!DOCTYPE html>
|
|||
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|||
|
|
<head><title>Chapter 9 – Latin Extended-B</title>
|
|||
|
|
<link rel="stylesheet" type="text/css" href="style.css"/></head>
|
|||
|
|
<body>
|
|||
|
|
<h1>Chapter 9<br/>Latin Extended-B</h1>
|
|||
|
|
|
|||
|
|
<p>Months passed. Avery had just begun to relax when the telephone rang
|
|||
|
|
again. This time it was not Mrs. Thornton-Foxwell but her publisher,
|
|||
|
|
a harried man named Grigor, who explained that the calligraphy survey
|
|||
|
|
had attracted interest from scholars in Bucharest, Hanoi, and Lagos.
|
|||
|
|
“We need Romanian, Vietnamese, and several West African
|
|||
|
|
languages,” he said. “Plus Croatian digraphs and Pinyin
|
|||
|
|
romanization. Latin Extended-B, the whole block.”</p>
|
|||
|
|
|
|||
|
|
<p>Avery looked at the Unicode chart for U+0180–U+024F and sighed.
|
|||
|
|
It was a miscellany: characters from a dozen unrelated traditions,
|
|||
|
|
each with its own typographic demands.</p>
|
|||
|
|
|
|||
|
|
<h2>Romanian</h2>
|
|||
|
|
|
|||
|
|
<p>Romanian was the most urgent addition. The language required two
|
|||
|
|
characters that looked deceptively like their Latin-1 cousins but
|
|||
|
|
were typographically distinct: Ș (S with comma below, U+0218)
|
|||
|
|
and Ț (T with comma below, U+021A). Avery set a test
|
|||
|
|
paragraph: <i>Țara noastră este frumoasă.
|
|||
|
|
Șase sute de școlari au venit la Țărăncuța.
|
|||
|
|
Țesătura ținutului este unică.
|
|||
|
|
Șeful stației știa totul.</i></p>
|
|||
|
|
|
|||
|
|
<p>The Ță pair in “Țara” and
|
|||
|
|
“Țărăncuța” was the key test —
|
|||
|
|
the comma descender on Ț distinguished it from the cedilla-T
|
|||
|
|
(Þ) found in Turkish, but the crossbar overhang was identical.
|
|||
|
|
Țe in “Țesătura” and Țo demanded the same
|
|||
|
|
T-crossbar tucking as their ASCII equivalents. Șa and Șe in
|
|||
|
|
“Șase” and “Șeful” needed the comma below to
|
|||
|
|
clear the baseline without colliding with descenders in the line
|
|||
|
|
below.</p>
|
|||
|
|
|
|||
|
|
<p>“The tricky part,” Avery told Vera, “is that Romanian also uses
|
|||
|
|
ă (a-breve) and î (i-circumflex) from Latin-1, and
|
|||
|
|
ț (t-comma) interacts with both. The pair ță in
|
|||
|
|
‘Țărăncuța’ tests the comma-below
|
|||
|
|
against the breve-above — a vertical sandwich of diacritics.”</p>
|
|||
|
|
|
|||
|
|
<h2>Vietnamese</h2>
|
|||
|
|
|
|||
|
|
<p>Vietnamese typography brought the horn diacritic into play. The
|
|||
|
|
characters Ơ (O with horn, U+01A0) and Ư
|
|||
|
|
(U with horn, U+01AF) appeared constantly in Vietnamese text. Avery
|
|||
|
|
set: <i>Ơi ! Ngươi Việt Nam
|
|||
|
|
yêu thương đất nước.
|
|||
|
|
Vừa đẹp vừa hay. Tươi sáng
|
|||
|
|
rỡi.</i></p>
|
|||
|
|
|
|||
|
|
<p>The horn on Ơ and Ư extended to the upper right of the
|
|||
|
|
letter, creating potential collisions with the following character.
|
|||
|
|
Tư and Vư were particularly demanding: the T-crossbar or
|
|||
|
|
V-diagonal needed to accommodate the horn’s extra width. Similarly,
|
|||
|
|
Tơ placed the T’s crossbar over a horned lowercase o — the
|
|||
|
|
horn could crash into the crossbar at small sizes.</p>
|
|||
|
|
|
|||
|
|
<h2>Croatian Digraphs</h2>
|
|||
|
|
|
|||
|
|
<p>Croatian contributed its titular digraph ligatures. The Unicode
|
|||
|
|
block included precomposed forms: DŽ (U+01C4), Dž (U+01C5),
|
|||
|
|
dž (U+01C6), LJ (U+01C7), Lj (U+01C8), lj (U+01C9),
|
|||
|
|
NJ (U+01CA), Nj (U+01CB), nj (U+01CC). These were single codepoints
|
|||
|
|
representing two-letter combinations, each with unique glyph widths.
|
|||
|
|
<i>Džep je velik. Ljeto je toplo. Njiva je zelena.
|
|||
|
|
Džamija stoji na brdu.</i></p>
|
|||
|
|
|
|||
|
|
<p>“These digraphs are wider than normal letters,” Avery observed.
|
|||
|
|
“Kerning Dž against a following lowercase vowel is unlike
|
|||
|
|
kerning D or Ž individually — the combined glyph has its own
|
|||
|
|
sidebearings. Same for Lj and Nj.”</p>
|
|||
|
|
|
|||
|
|
<h2>Pinyin Tone Marks</h2>
|
|||
|
|
|
|||
|
|
<p>Mandarin Chinese romanization — Pinyin — used Latin letters
|
|||
|
|
with caron and diaeresis-plus-tone combinations that fell squarely
|
|||
|
|
in Extended-B. <i>Nǐ hǎo! Wǒ shi
|
|||
|
|
Zhōngguó rén. Lǚshi zhǔyi
|
|||
|
|
Tǐmen de fāyīn.</i></p>
|
|||
|
|
|
|||
|
|
<p>The ǎ (a with caron) under a T-crossbar in “Tǎmen”
|
|||
|
|
presented the same challenge as Czech Tě — but the Pinyin
|
|||
|
|
context meant it appeared in entirely different words. The diaeresis-
|
|||
|
|
plus-tone characters were uniquely demanding: ǖ (u-diaeresis-
|
|||
|
|
macron), ǘ (u-diaeresis-acute), ǚ (u-diaeresis-caron),
|
|||
|
|
ǜ (u-diaeresis-grave) each stacked two diacritical marks above
|
|||
|
|
the u, creating height that could collide with the preceding
|
|||
|
|
T-crossbar. <i>Lǜshi Tǖ Vǘ</i> — Avery set each
|
|||
|
|
combination and winced at the vertical crowding.</p>
|
|||
|
|
|
|||
|
|
<h2>African Languages</h2>
|
|||
|
|
|
|||
|
|
<p>West African languages used hooked and barred variants of familiar
|
|||
|
|
Latin letters. <i>Ɓala ɓe Ɗala ɗe.
|
|||
|
|
Ƒarin kowa ya san. Ɛdiɲ ɔkɔ nɔ.</i>
|
|||
|
|
The Ɓ (B-hook) and Ɗ (D-hook) had descending hooks that
|
|||
|
|
affected baseline spacing. Ƒ (F-hook) shared the overhang
|
|||
|
|
issues of a standard F but with an added complication: the hook at the
|
|||
|
|
bottom altered the letter’s center of gravity. Ƒa,
|
|||
|
|
Ƒo, Ƒe all needed individual attention — the hook
|
|||
|
|
pulled the eye downward while the crossbar demanded tuck-over
|
|||
|
|
kerning above.</p>
|
|||
|
|
|
|||
|
|
<p>“And the open vowels,” Avery added. “Ɛ
|
|||
|
|
(open E, U+0190) and ɔ (open O, U+0254) have wider apertures
|
|||
|
|
than their standard counterparts. Every consonant-to-open-vowel pair
|
|||
|
|
needs rechecking.”</p>
|
|||
|
|
|
|||
|
|
<h2>Extended-B Kerning Glossary</h2>
|
|||
|
|
|
|||
|
|
<p>Avery appended to his catalogue:</p>
|
|||
|
|
|
|||
|
|
<p><b>Ța</b> — As in Țara, țară.<br/>
|
|||
|
|
<b>Țe</b> — As in Țesătura, țesut.<br/>
|
|||
|
|
<b>Țo</b> — As in Țoca, țocul.<br/>
|
|||
|
|
<b>Șa</b> — As in Șase, șarpe.<br/>
|
|||
|
|
<b>Șe</b> — As in Șeful, șed.<br/>
|
|||
|
|
<b>Tơ</b> — As in Tơi, tơi sáng.<br/>
|
|||
|
|
<b>Tư</b> — As in Tươi, tương lai.<br/>
|
|||
|
|
<b>Vơ</b> — As in Vơi, vơ.<br/>
|
|||
|
|
<b>Vư</b> — As in Vừa, vươn.<br/>
|
|||
|
|
<b>Tǎ</b> — As in Tǎmen (Pinyin).<br/>
|
|||
|
|
<b>Tǖ</b> — As in nǖ (Pinyin: female).<br/>
|
|||
|
|
<b>Ƒa</b> — As in Ƒarin (Hausa).<br/>
|
|||
|
|
<b>Ƒo</b> — As in Ƒoto (Hausa).</p>
|
|||
|
|
</body>
|
|||
|
|
</html>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
CHAPTER_10 = """\
|
|||
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
|
<!DOCTYPE html>
|
|||
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|||
|
|
<head><title>Chapter 10 – Greek & Coptic</title>
|
|||
|
|
<link rel="stylesheet" type="text/css" href="style.css"/></head>
|
|||
|
|
<body>
|
|||
|
|
<h1>Chapter 10<br/>Greek & Coptic</h1>
|
|||
|
|
|
|||
|
|
<p>The final challenge arrived not by telephone but by post: a handwritten
|
|||
|
|
letter from a professor of classics at the University of Athens,
|
|||
|
|
requesting that the calligraphy survey include a chapter on the Greek
|
|||
|
|
alphabet. “The birthplace of Western lettering,” the professor
|
|||
|
|
wrote, “deserves proper typographic treatment.”</p>
|
|||
|
|
|
|||
|
|
<p>Avery could hardly disagree. Greek was where it all began — the
|
|||
|
|
ancestor of Latin, Cyrillic, and Coptic. And the Greek alphabet had
|
|||
|
|
its own kerning nightmares, many of them eerily familiar.</p>
|
|||
|
|
|
|||
|
|
<h2>The Overhanging Letters</h2>
|
|||
|
|
|
|||
|
|
<p>Γ (Gamma, U+0393) was the Greek counterpart of the Cyrillic
|
|||
|
|
Г and a close relative of the Latin T. Its horizontal stroke
|
|||
|
|
extended rightward, creating the same tuck-over demands. Avery set:
|
|||
|
|
<i>Γαλήνη γαλάζια
|
|||
|
|
Γεωργία Γοργόνα
|
|||
|
|
Γραμματική
|
|||
|
|
Γυμνάσιο.</i></p>
|
|||
|
|
|
|||
|
|
<p>The Γα in “Γαλήνη” and
|
|||
|
|
“γαλάζια” needed tight
|
|||
|
|
kerning — the horizontal bar of Γ had to reach over the
|
|||
|
|
α without crushing it. Γε in
|
|||
|
|
“Γεωργία” was equally
|
|||
|
|
sensitive. Γο in “Γοργόνα”,
|
|||
|
|
Γρ in “Γραμματική”,
|
|||
|
|
and Γυ in “Γυμνάσιο”
|
|||
|
|
completed the set of Gamma’s right-side partners.</p>
|
|||
|
|
|
|||
|
|
<p>Τ (Tau, U+03A4) was structurally identical to the Latin T.
|
|||
|
|
<i>Ταξίδι ταχύ
|
|||
|
|
Τεχνολογία
|
|||
|
|
Τοποθεσία
|
|||
|
|
Τρίγωνο
|
|||
|
|
Τυρί.</i>
|
|||
|
|
Every pair — Τα, Τε, Το,
|
|||
|
|
Τρ, Τυ — demanded the crossbar to tuck over
|
|||
|
|
the following lowercase letter, just as in Latin.</p>
|
|||
|
|
|
|||
|
|
<h2>The Diagonal Letters</h2>
|
|||
|
|
|
|||
|
|
<p>Υ (Upsilon, U+03A5) mirrored the Latin Y’s diagonal
|
|||
|
|
challenges. <i>Υγεία
|
|||
|
|
υπάρχει
|
|||
|
|
Υπουργός
|
|||
|
|
Υποθήκη.</i>
|
|||
|
|
The Υγ, Υπ pairs showed the same open spacing
|
|||
|
|
that the Latin Y created against following lowercase letters.</p>
|
|||
|
|
|
|||
|
|
<p>Α (Alpha, U+0391) and Λ (Lambda, U+039B) were the
|
|||
|
|
Greek equivalents of A and an inverted V. <i>Αυτός
|
|||
|
|
Ανατολή Ατλαντικός
|
|||
|
|
Αδελφός. Λαμπρή
|
|||
|
|
Λευκάδα Λονδίνο.</i>
|
|||
|
|
The Αυ, Αν, Ατ, Αδ pairs
|
|||
|
|
followed the same diagonal-against-vertical pattern as Latin AV, AW,
|
|||
|
|
AT. Λα, Λε, Λο needed the inverted-V’s
|
|||
|
|
right stroke to relate cleanly to the following round or vertical
|
|||
|
|
letter.</p>
|
|||
|
|
|
|||
|
|
<h2>Round and Complex Letters</h2>
|
|||
|
|
|
|||
|
|
<p>Ρ (Rho, U+03A1) was the Greek P — bowl overhanging the
|
|||
|
|
following character. <i>Ραδιόφωνο
|
|||
|
|
Ρεύμα Ροδός
|
|||
|
|
ρόδα.</i> The Ρα,
|
|||
|
|
Ρε, Ρο pairs echoed the Latin Pa, Pe, Po and
|
|||
|
|
Cyrillic Ра, Ре, Ро challenges.</p>
|
|||
|
|
|
|||
|
|
<p>Φ (Phi, U+03A6) was one of the widest Greek letters — a circle
|
|||
|
|
bisected by a vertical stem, like the Cyrillic Ф.
|
|||
|
|
<i>Φαντασία
|
|||
|
|
Φοίνικας
|
|||
|
|
φυσική.</i> The Φα,
|
|||
|
|
Φο, Φυ pairs needed generous clearance for the
|
|||
|
|
circle’s width.</p>
|
|||
|
|
|
|||
|
|
<p>Δ (Delta, U+0394) had a triangular shape with a wide base,
|
|||
|
|
unlike anything in Latin. <i>Δασκάλα
|
|||
|
|
Δελφοί Δούναβης
|
|||
|
|
διάβαση.</i> The Δα,
|
|||
|
|
Δε, Δο pairs needed the wide base to relate
|
|||
|
|
to the following letter without excessive gaps.</p>
|
|||
|
|
|
|||
|
|
<h2>Greek with Guillemets and Polytonic</h2>
|
|||
|
|
|
|||
|
|
<p>Modern Greek typography, like French and Russian, uses guillemets.
|
|||
|
|
«Γεια σας,»
|
|||
|
|
είπε. «Τι
|
|||
|
|
κάνετε;»
|
|||
|
|
«Καλά,» απάντησε.
|
|||
|
|
The «Γ and «Τ pairs tested the guillemet
|
|||
|
|
against overhanging capitals, while ε» and ο»
|
|||
|
|
tested closing spacing.</p>
|
|||
|
|
|
|||
|
|
<p>Greek also carried a rich tradition of polytonic accents — the
|
|||
|
|
acute (΄), grave, circumflex, rough breathing, and smooth
|
|||
|
|
breathing marks that adorned classical and katharevousa texts.
|
|||
|
|
<i>Ἀθήνα ἐστί
|
|||
|
|
μεγάλη πόλη.
|
|||
|
|
Ὀ κόσμος
|
|||
|
|
εἶναι ωραῖος.</i>
|
|||
|
|
Though polytonic marks are handled by combining characters (from the
|
|||
|
|
U+0300 block), their visual interaction with kerning pairs
|
|||
|
|
remained — a breathing mark over an Alpha could encroach on the
|
|||
|
|
preceding or following letter’s space.</p>
|
|||
|
|
|
|||
|
|
<h2>Greek in Scientific Text</h2>
|
|||
|
|
|
|||
|
|
<p>Beyond natural language, Greek letters appeared constantly in
|
|||
|
|
scientific and mathematical prose. <i>The wavelength λ is
|
|||
|
|
inversely proportional to frequency ν. The ratio π/φ
|
|||
|
|
appears in the golden angle. Angle θ subtends arc αβ,
|
|||
|
|
while Σ denotes summation and Δ denotes change.</i></p>
|
|||
|
|
|
|||
|
|
<p>“When Greek letters appear inline with Latin text,” Avery
|
|||
|
|
explained, “the kerning engine must handle cross-script pairs:
|
|||
|
|
Latin-T followed by Greek-α, or Greek-σ followed by
|
|||
|
|
a Latin comma. These hybrid pairs are rare but they matter in
|
|||
|
|
any book that discusses physics, mathematics, or engineering.”</p>
|
|||
|
|
|
|||
|
|
<h2>Greek Kerning Glossary</h2>
|
|||
|
|
|
|||
|
|
<p>Avery added the final appendix to his growing catalogue:</p>
|
|||
|
|
|
|||
|
|
<p><b>Γα</b> — Γαλήνη, γαλαξίας.<br/>
|
|||
|
|
<b>Γε</b> — Γεωργία, γερός.<br/>
|
|||
|
|
<b>Γο</b> — Γοργόνα, γονιός.<br/>
|
|||
|
|
<b>Γυ</b> — Γυμνάσιο, γύρος.<br/>
|
|||
|
|
<b>Γρ</b> — Γραμματική.<br/>
|
|||
|
|
<b>Τα</b> — Ταξίδι, ταχύ.<br/>
|
|||
|
|
<b>Τε</b> — Τεχνολογία.<br/>
|
|||
|
|
<b>Το</b> — Τοποθεσία.<br/>
|
|||
|
|
<b>Τυ</b> — Τυρί, τυχερός.<br/>
|
|||
|
|
<b>Αυ</b> — Αυτός, αυλή.<br/>
|
|||
|
|
<b>Αν</b> — Ανατολή.<br/>
|
|||
|
|
<b>Ατ</b> — Ατλαντικός.<br/>
|
|||
|
|
<b>Αδ</b> — Αδελφός.<br/>
|
|||
|
|
<b>Υγ</b> — Υγεία.<br/>
|
|||
|
|
<b>Υπ</b> — Υπουργός.<br/>
|
|||
|
|
<b>Ρα</b> — Ραδιόφωνο.<br/>
|
|||
|
|
<b>Ρε</b> — Ρεύμα.<br/>
|
|||
|
|
<b>Ρο</b> — Ροδός.<br/>
|
|||
|
|
<b>Φα</b> — Φαντασία.<br/>
|
|||
|
|
<b>Φο</b> — Φοίνικας.<br/>
|
|||
|
|
<b>Δα</b> — Δασκάλα.<br/>
|
|||
|
|
<b>Δε</b> — Δελφοί.<br/>
|
|||
|
|
<b>Λα</b> — Λαμπρή.<br/>
|
|||
|
|
<b>Λε</b> — Λευκάδα.<br/>
|
|||
|
|
<b>Λο</b> — Λονδίνο.</p>
|
|||
|
|
|
|||
|
|
<p>“And with that,” Avery said, setting down his pencil for the last
|
|||
|
|
time, “we have covered every script from the Acropolis to the
|
|||
|
|
Urals, from the Rhine to the Mekong. If a typesetter can render
|
|||
|
|
every word in these chapters without a single miskerned pair,
|
|||
|
|
they have earned my respect.”</p>
|
|||
|
|
|
|||
|
|
<p>Vera closed her notebook and smiled. “Shall I put the kettle on
|
|||
|
|
one last time?”</p>
|
|||
|
|
|
|||
|
|
<p>“Please,” said Avery. “And make it strong.”</p>
|
|||
|
|
</body>
|
|||
|
|
</html>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
CHAPTER_11 = """\
|
|||
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
|
<!DOCTYPE html>
|
|||
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|||
|
|
<head><title>Chapter 11 – Combining Marks</title>
|
|||
|
|
<link rel="stylesheet" type="text/css" href="style.css"/></head>
|
|||
|
|
<body>
|
|||
|
|
<h1>Chapter 11<br/>Combining Marks</h1>
|
|||
|
|
|
|||
|
|
<p>Avery had thought the project was finally complete when Vera placed
|
|||
|
|
a new stack of proofs on his desk. “These came from a different
|
|||
|
|
typesetter,” she explained. “Their system outputs decomposed
|
|||
|
|
Unicode — every accented letter is split into a base character
|
|||
|
|
followed by one or more combining diacritical marks.”</p>
|
|||
|
|
|
|||
|
|
<p>Avery stared. “You mean instead of ö as a single glyph, they
|
|||
|
|
send ö? And instead of é, they send é?”</p>
|
|||
|
|
|
|||
|
|
<p>“Exactly. The renderer has to overlay the combining mark onto
|
|||
|
|
the preceding base character — centred horizontally, with proper
|
|||
|
|
vertical clearance, and without advancing the cursor. If it gets any
|
|||
|
|
of that wrong, the diacritics float off into space or crash into
|
|||
|
|
neighbouring letters.”</p>
|
|||
|
|
|
|||
|
|
<h2>Single Combining Marks</h2>
|
|||
|
|
|
|||
|
|
<p>Avery began with the most common combining diacritical marks from the
|
|||
|
|
U+0300 block. He set each one after a simple base character to verify
|
|||
|
|
placement:</p>
|
|||
|
|
|
|||
|
|
<p><i>à (a + grave), é (e + acute), î (i + circumflex),
|
|||
|
|
õ (o + tilde), ü (u + diaeresis), å (a + ring above),
|
|||
|
|
ç (c + cedilla), ę (e + ogonek), ž (z + caron),
|
|||
|
|
ő (o + double acute), ā (a + macron),
|
|||
|
|
ĕ (e + breve), ż (z + dot above).</i></p>
|
|||
|
|
|
|||
|
|
<p>“Each mark must sit centred over its base character,” Avery
|
|||
|
|
said, “with at least a pixel of clearance between the top of the
|
|||
|
|
base glyph and the bottom of the combining mark. If the mark drifts
|
|||
|
|
left or right, the reader sees a broken letter.”</p>
|
|||
|
|
|
|||
|
|
<h2>Decomposed German</h2>
|
|||
|
|
|
|||
|
|
<p>He turned to German text rendered entirely in decomposed form.
|
|||
|
|
Every umlaut and eszett combination that had worked perfectly in
|
|||
|
|
Chapter 6 now needed to survive the decomposition:</p>
|
|||
|
|
|
|||
|
|
<p><i>Töchter saßen über den Büchern.
|
|||
|
|
Vögel flogen über die Wälder. Die Würde
|
|||
|
|
des Menschen ist unantastbar. Tänzer übten in der
|
|||
|
|
Türkei. Öffnung der Ämter war um zehn Uhr.
|
|||
|
|
Äußerst sorgfältig prüfte er die
|
|||
|
|
Größe der Straße.</i></p>
|
|||
|
|
|
|||
|
|
<p>The Tö in “Töchter” was the critical
|
|||
|
|
test — the T-crossbar had to kern correctly against the
|
|||
|
|
base o, while the combining diaeresis (U+0308) sat above
|
|||
|
|
without shifting the cursor. Vö in “Vögel,”
|
|||
|
|
Wü in “Würde,” and Tä in
|
|||
|
|
“Tänzer” each exercised a different kerning pair
|
|||
|
|
with a decomposed umlaut. The Öf in “Öffnung”
|
|||
|
|
tested a combining mark immediately before a double-f ligature.</p>
|
|||
|
|
|
|||
|
|
<h2>Decomposed French</h2>
|
|||
|
|
|
|||
|
|
<p>French offered its own decomposition challenges. Avery set the same
|
|||
|
|
passage from Chapter 6, but with every accent decomposed:</p>
|
|||
|
|
|
|||
|
|
<p><i>Fête de la République. Père Noël
|
|||
|
|
arriva en Février. À la recherche du café
|
|||
|
|
idéal. Ça va? Garçon, un café
|
|||
|
|
crème, s’il vous plaît.</i></p>
|
|||
|
|
|
|||
|
|
<p>The Fê in “Fête” placed a combining
|
|||
|
|
circumflex over the e after the F — both the F-overhang kerning
|
|||
|
|
and the mark placement had to work simultaneously.
|
|||
|
|
Ré in “République” tested acute placement
|
|||
|
|
after an R. The À in “À la” placed a
|
|||
|
|
combining grave accent on a capital A, which had to clear the apex
|
|||
|
|
of the letterform.</p>
|
|||
|
|
|
|||
|
|
<h2>Combining Marks and Ligatures</h2>
|
|||
|
|
|
|||
|
|
<p>The most demanding test combined decomposed diacritics with ligature
|
|||
|
|
sequences. In precomposed text, the ligature engine only saw
|
|||
|
|
single-codepoint accented letters. With decomposition, a combining
|
|||
|
|
mark could sit between a base character and the start of a ligature,
|
|||
|
|
or immediately after one:</p>
|
|||
|
|
|
|||
|
|
<p><i>La définition de l’efficacité réside
|
|||
|
|
dans la réflexion. L’officière
|
|||
|
|
vérifia les différentes soufflés. Il souffrit
|
|||
|
|
magnifiquement. Défiant toute difficulté, le
|
|||
|
|
greffier affirma l’efficience du système.</i></p>
|
|||
|
|
|
|||
|
|
<p>The fi ligature in “définition” came right after
|
|||
|
|
a combining acute on the e. The ffi in
|
|||
|
|
“efficacité” was followed by a combining acute.
|
|||
|
|
The fl in “réflexion” came after a combining
|
|||
|
|
acute. The ff in “différentes” contained a combining
|
|||
|
|
mark between the ligature and the following vowel. Each of these
|
|||
|
|
sequences tested whether the combining mark handler and the ligature
|
|||
|
|
engine interacted correctly.</p>
|
|||
|
|
|
|||
|
|
<h2>Multiple Combining Marks</h2>
|
|||
|
|
|
|||
|
|
<p>Some writing systems required two or even three combining marks on
|
|||
|
|
a single base character. Vietnamese was the classic example, where
|
|||
|
|
a vowel could carry both a diacritical mark (circumflex, horn, or
|
|||
|
|
breve) and a tone mark (acute, grave, hook above, tilde, or dot
|
|||
|
|
below):</p>
|
|||
|
|
|
|||
|
|
<p><i>Việt Nam yêu thứơng
|
|||
|
|
đất nứớc.
|
|||
|
|
Tời sáng rò̀i.</i></p>
|
|||
|
|
|
|||
|
|
<p>The ệ in “Việt” stacked
|
|||
|
|
a combining circumflex (U+0302) and a combining dot below (U+0323) on
|
|||
|
|
a single base e. Both marks had to be positioned correctly relative to
|
|||
|
|
the base glyph and to each other — the circumflex above and the
|
|||
|
|
dot below the baseline. The ứ sequences placed a
|
|||
|
|
combining horn (U+031B) and a combining acute (U+0301) on the same
|
|||
|
|
base u, testing whether the second mark used the base character’s
|
|||
|
|
metrics rather than the first combining mark’s.</p>
|
|||
|
|
|
|||
|
|
<h2>Combining Marks in Extended Latin</h2>
|
|||
|
|
|
|||
|
|
<p>The Czech and Polish texts from Chapter 7 could also appear in
|
|||
|
|
decomposed form. Avery set a test paragraph:</p>
|
|||
|
|
|
|||
|
|
<p><i>Těšín leží nedaleko
|
|||
|
|
Třebíče. Příbram a
|
|||
|
|
Přerov jsou města. Věra se učila
|
|||
|
|
vědě. Čáslav leží
|
|||
|
|
na jih od Českého Brodu. Wąchock to
|
|||
|
|
małe miasteczko. Węgry sąsiadują z
|
|||
|
|
Polską.</i></p>
|
|||
|
|
|
|||
|
|
<p>The Tě in “Těšín” placed
|
|||
|
|
a combining caron over e after the T-crossbar — the same visual
|
|||
|
|
result as the precomposed ě, but assembled from parts. Each
|
|||
|
|
subsequent caron and acute in the sentence tested a different
|
|||
|
|
base-plus-mark combination. The Polish ogonek (U+0328) in
|
|||
|
|
“Wąchock” and “Węgry” tested
|
|||
|
|
a below-baseline combining mark, which had to clear descenders in the
|
|||
|
|
line below without disrupting the W kerning.</p>
|
|||
|
|
|
|||
|
|
<h2>Combining Marks with Capitals</h2>
|
|||
|
|
|
|||
|
|
<p>Capital letters presented additional challenges because their greater
|
|||
|
|
height left less room for marks above. Avery tested each common
|
|||
|
|
combining mark on capitals:</p>
|
|||
|
|
|
|||
|
|
<p><i>À propos. Ágnes. Âme. Ão.
|
|||
|
|
Ärger. Åkesson. Ǎlef. Ève.
|
|||
|
|
Émile. Être. Ìtalo. Íngrid.
|
|||
|
|
Île. Òslo. Óscar. Ôter.
|
|||
|
|
Õtelo. Öffnung. Ùbald. Último.
|
|||
|
|
Ûnion. Übung. Ñoquí.</i></p>
|
|||
|
|
|
|||
|
|
<p>The combining marks on capitals sat higher than on lowercase letters,
|
|||
|
|
and each mark needed to clear the top of the letterform. In particular,
|
|||
|
|
Ä (A + combining diaeresis) and Ö (O + combining
|
|||
|
|
diaeresis) had to match their precomposed equivalents Ä and
|
|||
|
|
Ö visually — any discrepancy would be immediately obvious
|
|||
|
|
to the reader.</p>
|
|||
|
|
|
|||
|
|
<h2>Precomposed vs. Decomposed Comparison</h2>
|
|||
|
|
|
|||
|
|
<p>As a final verification, Avery set the same sentence in both forms,
|
|||
|
|
one after the other, so the typesetter could compare them directly:</p>
|
|||
|
|
|
|||
|
|
<p><b>Precomposed:</b> <i>Töchter übten in der Türkei.
|
|||
|
|
Vögel flogen über die Wälder. Fête de la
|
|||
|
|
République. À la recherche du café.
|
|||
|
|
Ça va?</i></p>
|
|||
|
|
|
|||
|
|
<p><b>Decomposed:</b> <i>Töchter übten in der
|
|||
|
|
Türkei. Vögel flogen über die
|
|||
|
|
Wälder. Fête de la République.
|
|||
|
|
À la recherche du café. Ça va?</i></p>
|
|||
|
|
|
|||
|
|
<p>“If those two lines are indistinguishable on screen,” Avery
|
|||
|
|
said, “the combining mark renderer is working correctly. Any
|
|||
|
|
difference in spacing, vertical position, or glyph alignment means
|
|||
|
|
something is wrong.”</p>
|
|||
|
|
|
|||
|
|
<p>Vera studied both lines through the loupe. “They look identical
|
|||
|
|
to me.”</p>
|
|||
|
|
|
|||
|
|
<h2>Extended Latin Composition</h2>
|
|||
|
|
|
|||
|
|
<p>“But what about the Latin Extended-A characters?” Vera
|
|||
|
|
asked. “The old composition table only covered grave, acute,
|
|||
|
|
circumflex, tilde, diaeresis, and cedilla. Characters like
|
|||
|
|
ě (e-caron), ř (r-caron), ą (a-ogonek),
|
|||
|
|
ł (l-stroke), and ű (u-double-acute)
|
|||
|
|
were never composed from decomposed input.”</p>
|
|||
|
|
|
|||
|
|
<p><b>Precomposed:</b> <i>Těšín leží
|
|||
|
|
nedaleko Třebíče. Příbram a Přerov
|
|||
|
|
jsou města. Věra se učila vědě.
|
|||
|
|
Čáslav leží na jih od Českého
|
|||
|
|
Brodu.</i></p>
|
|||
|
|
|
|||
|
|
<p><b>Decomposed:</b> <i>Těšín
|
|||
|
|
lěží nedaleko Třebíče.
|
|||
|
|
Příbram a Přerov jsou města.
|
|||
|
|
Věra se učila vědě.
|
|||
|
|
Čáslav lěží na jih od
|
|||
|
|
Českého Brodu.</i></p>
|
|||
|
|
|
|||
|
|
<p><b>Precomposed:</b> <i>Wąchock to małe miasteczko.
|
|||
|
|
Węgry sąsiadują z Polską. Gşrün
|
|||
|
|
Über Şen Ďále.</i></p>
|
|||
|
|
|
|||
|
|
<p><b>Decomposed:</b> <i>Wąchock to małe miasteczko.
|
|||
|
|
Węgry sąsiadują z Polską.
|
|||
|
|
Gşrün Über Şen
|
|||
|
|
Ďále.</i></p>
|
|||
|
|
|
|||
|
|
<p>“With the new composition table these should be
|
|||
|
|
indistinguishable,” Avery said. “Carons, ogoneks,
|
|||
|
|
cedillas, double acutes — all composed from their parts into the
|
|||
|
|
same precomposed codepoints the font expects.”</p>
|
|||
|
|
|
|||
|
|
<p>“Then we’re done,” Avery said. “Eleven chapters,
|
|||
|
|
four scripts, three hundred kerning pairs, two dozen ligature sequences,
|
|||
|
|
and now combining marks. If the renderer survives all of that, it can
|
|||
|
|
handle anything a publisher throws at it.”</p>
|
|||
|
|
|
|||
|
|
<p>He set down his pencil and reached for his coffee. It was cold.</p>
|
|||
|
|
</body>
|
|||
|
|
</html>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
COVER_XHTML = """\
|
|||
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
|
<!DOCTYPE html>
|
|||
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|||
|
|
<head><title>Cover</title>
|
|||
|
|
<style>
|
|||
|
|
body { margin: 0; padding: 0; text-align: center; }
|
|||
|
|
img { max-width: 100%; max-height: 100%; }
|
|||
|
|
</style>
|
|||
|
|
</head>
|
|||
|
|
<body>
|
|||
|
|
<img src="cover.jpg" alt="Kerning & Ligature Edge Cases"/>
|
|||
|
|
</body>
|
|||
|
|
</html>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
STYLESHEET = """\
|
|||
|
|
body {
|
|||
|
|
font-family: serif;
|
|||
|
|
margin: 2em;
|
|||
|
|
line-height: 1.6;
|
|||
|
|
}
|
|||
|
|
h1 {
|
|||
|
|
font-size: 1.5em;
|
|||
|
|
text-align: center;
|
|||
|
|
margin-bottom: 1.5em;
|
|||
|
|
line-height: 1.3;
|
|||
|
|
}
|
|||
|
|
h2 {
|
|||
|
|
font-size: 1.15em;
|
|||
|
|
margin-top: 1.5em;
|
|||
|
|
margin-bottom: 0.5em;
|
|||
|
|
}
|
|||
|
|
p {
|
|||
|
|
text-indent: 1.5em;
|
|||
|
|
margin: 0.25em 0;
|
|||
|
|
text-align: justify;
|
|||
|
|
}
|
|||
|
|
blockquote p {
|
|||
|
|
text-indent: 0;
|
|||
|
|
margin: 0.5em 1.5em;
|
|||
|
|
font-style: italic;
|
|||
|
|
}
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
CONTAINER_XML = """\
|
|||
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
|
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
|
|||
|
|
<rootfiles>
|
|||
|
|
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
|
|||
|
|
</rootfiles>
|
|||
|
|
</container>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
CONTENT_OPF = f"""\
|
|||
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
|
<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="BookId" version="3.0">
|
|||
|
|
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
|
|||
|
|
<dc:identifier id="BookId">urn:uuid:{BOOK_UUID}</dc:identifier>
|
|||
|
|
<dc:title>{TITLE}</dc:title>
|
|||
|
|
<dc:creator>{AUTHOR}</dc:creator>
|
|||
|
|
<dc:language>en</dc:language>
|
|||
|
|
<dc:date>{DATE}</dc:date>
|
|||
|
|
<meta property="dcterms:modified">{DATE}T00:00:00Z</meta>
|
|||
|
|
<meta name="cover" content="cover-image"/>
|
|||
|
|
</metadata>
|
|||
|
|
<manifest>
|
|||
|
|
<item id="cover-image" href="cover.jpg" media-type="image/jpeg" properties="cover-image"/>
|
|||
|
|
<item id="cover" href="cover.xhtml" media-type="application/xhtml+xml"/>
|
|||
|
|
<item id="style" href="style.css" media-type="text/css"/>
|
|||
|
|
<item id="ch1" href="chapter1.xhtml" media-type="application/xhtml+xml"/>
|
|||
|
|
<item id="ch2" href="chapter2.xhtml" media-type="application/xhtml+xml"/>
|
|||
|
|
<item id="ch3" href="chapter3.xhtml" media-type="application/xhtml+xml"/>
|
|||
|
|
<item id="ch4" href="chapter4.xhtml" media-type="application/xhtml+xml"/>
|
|||
|
|
<item id="ch5" href="chapter5.xhtml" media-type="application/xhtml+xml"/>
|
|||
|
|
<item id="ch6" href="chapter6.xhtml" media-type="application/xhtml+xml"/>
|
|||
|
|
<item id="ch7" href="chapter7.xhtml" media-type="application/xhtml+xml"/>
|
|||
|
|
<item id="ch8" href="chapter8.xhtml" media-type="application/xhtml+xml"/>
|
|||
|
|
<item id="ch9" href="chapter9.xhtml" media-type="application/xhtml+xml"/>
|
|||
|
|
<item id="ch10" href="chapter10.xhtml" media-type="application/xhtml+xml"/>
|
|||
|
|
<item id="ch11" href="chapter11.xhtml" media-type="application/xhtml+xml"/>
|
|||
|
|
<item id="toc" href="toc.xhtml" media-type="application/xhtml+xml" properties="nav"/>
|
|||
|
|
</manifest>
|
|||
|
|
<spine>
|
|||
|
|
<itemref idref="cover"/>
|
|||
|
|
<itemref idref="toc"/>
|
|||
|
|
<itemref idref="ch1"/>
|
|||
|
|
<itemref idref="ch2"/>
|
|||
|
|
<itemref idref="ch3"/>
|
|||
|
|
<itemref idref="ch4"/>
|
|||
|
|
<itemref idref="ch5"/>
|
|||
|
|
<itemref idref="ch6"/>
|
|||
|
|
<itemref idref="ch7"/>
|
|||
|
|
<itemref idref="ch8"/>
|
|||
|
|
<itemref idref="ch9"/>
|
|||
|
|
<itemref idref="ch10"/>
|
|||
|
|
<itemref idref="ch11"/>
|
|||
|
|
</spine>
|
|||
|
|
</package>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
TOC_XHTML = """\
|
|||
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
|
<!DOCTYPE html>
|
|||
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"
|
|||
|
|
xml:lang="en" lang="en">
|
|||
|
|
<head><title>Table of Contents</title>
|
|||
|
|
<link rel="stylesheet" type="text/css" href="style.css"/></head>
|
|||
|
|
<body>
|
|||
|
|
<h1>Kerning & Ligature Edge Cases</h1>
|
|||
|
|
<nav epub:type="toc">
|
|||
|
|
<ol>
|
|||
|
|
<li><a href="chapter1.xhtml">Chapter 1 – The Typographer’s Affliction</a></li>
|
|||
|
|
<li><a href="chapter2.xhtml">Chapter 2 – Ligatures in the Afflicted Offices</a></li>
|
|||
|
|
<li><a href="chapter3.xhtml">Chapter 3 – The Proof of the Pudding</a></li>
|
|||
|
|
<li><a href="chapter4.xhtml">Chapter 4 – Punctuation and Numerals</a></li>
|
|||
|
|
<li><a href="chapter5.xhtml">Chapter 5 – A Glossary of Troublesome Pairs</a></li>
|
|||
|
|
<li><a href="chapter6.xhtml">Chapter 6 – Western European Accents</a></li>
|
|||
|
|
<li><a href="chapter7.xhtml">Chapter 7 – Beyond the Western Alphabet</a></li>
|
|||
|
|
<li><a href="chapter8.xhtml">Chapter 8 – The Cyrillic Challenge</a></li>
|
|||
|
|
<li><a href="chapter9.xhtml">Chapter 9 – Latin Extended-B</a></li>
|
|||
|
|
<li><a href="chapter10.xhtml">Chapter 10 – Greek & Coptic</a></li>
|
|||
|
|
<li><a href="chapter11.xhtml">Chapter 11 – Combining Marks</a></li>
|
|||
|
|
</ol>
|
|||
|
|
</nav>
|
|||
|
|
</body>
|
|||
|
|
</html>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
|
|||
|
|
def build_epub(output_path: str):
|
|||
|
|
cover_data = create_cover_image()
|
|||
|
|
|
|||
|
|
with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
|||
|
|
zf.writestr("mimetype", "application/epub+zip", compress_type=zipfile.ZIP_STORED)
|
|||
|
|
zf.writestr("META-INF/container.xml", CONTAINER_XML)
|
|||
|
|
zf.writestr("OEBPS/content.opf", CONTENT_OPF)
|
|||
|
|
zf.writestr("OEBPS/toc.xhtml", TOC_XHTML)
|
|||
|
|
zf.writestr("OEBPS/style.css", STYLESHEET)
|
|||
|
|
zf.writestr("OEBPS/cover.jpg", cover_data)
|
|||
|
|
zf.writestr("OEBPS/cover.xhtml", COVER_XHTML)
|
|||
|
|
zf.writestr("OEBPS/chapter1.xhtml", CHAPTER_1)
|
|||
|
|
zf.writestr("OEBPS/chapter2.xhtml", CHAPTER_2)
|
|||
|
|
zf.writestr("OEBPS/chapter3.xhtml", CHAPTER_3)
|
|||
|
|
zf.writestr("OEBPS/chapter4.xhtml", CHAPTER_4)
|
|||
|
|
zf.writestr("OEBPS/chapter5.xhtml", CHAPTER_5)
|
|||
|
|
zf.writestr("OEBPS/chapter6.xhtml", CHAPTER_6)
|
|||
|
|
zf.writestr("OEBPS/chapter7.xhtml", CHAPTER_7)
|
|||
|
|
zf.writestr("OEBPS/chapter8.xhtml", CHAPTER_8)
|
|||
|
|
zf.writestr("OEBPS/chapter9.xhtml", CHAPTER_9)
|
|||
|
|
zf.writestr("OEBPS/chapter10.xhtml", CHAPTER_10)
|
|||
|
|
zf.writestr("OEBPS/chapter11.xhtml", CHAPTER_11)
|
|||
|
|
print(f"EPUB written to {output_path}")
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|||
|
|
out = os.path.join(project_root, "test", "epubs", "test_kerning_ligature.epub")
|
|||
|
|
os.makedirs(os.path.dirname(out), exist_ok=True)
|
|||
|
|
build_epub(out)
|