Rewrite add_glyphs, use alias table.
This is a rewrite of add_glyphs in third_party/color_emoji. The primary motivation was to move special aliasing rules out of that code and use an external aliases file instead. This new version is a bit more thorough about aliasing, and hopefully a little easier to read. The new add_glyphs takes its parameters using keywords, so the invocation in the Makefile changed (as well as the path to the tool). emoji_aliases.txt was extended to add the flag aliases that were formerly defined in the old add_glyphs code. add_aliases was modified so the name of the alias file could be passed in as a parameter to the main utility function that reads the alias mapping from the file. The new code expects all glyphs used by the template GSUB tables to be named in the GlyphOrder table, but doesn't require the cmap and hmtx table to be fleshed out. The new code fleshes these out when it processes the sequences to add. As a result the cmap and hmtx tables in the template were truncated. The new code also sorts the GlyphOrder table when it extends/rebuilds it.pull/93/head
parent
e5146fcecb
commit
616ed3a600
5
Makefile
5
Makefile
|
@ -28,7 +28,8 @@ ZOPFLIPNG = zopflipng
|
|||
OPTIPNG = optipng
|
||||
|
||||
EMOJI_BUILDER = third_party/color_emoji/emoji_builder.py
|
||||
ADD_GLYPHS = third_party/color_emoji/add_glyphs.py
|
||||
ADD_GLYPHS = add_glyphs.py
|
||||
ADD_GLYPHS_FLAGS = -a emoji_aliases.txt
|
||||
PUA_ADDER = map_pua_emoji.py
|
||||
VS_ADDER = add_vs_cmap.py # from nototools
|
||||
|
||||
|
@ -193,7 +194,7 @@ endif
|
|||
# Run make without -j if this happens.
|
||||
|
||||
%.ttx: %.ttx.tmpl $(ADD_GLYPHS) $(ALL_COMPRESSED_FILES)
|
||||
@python $(ADD_GLYPHS) "$<" "$@" "$(COMPRESSED_DIR)/emoji_u"
|
||||
@python $(ADD_GLYPHS) -f "$<" -o "$@" -d "$(COMPRESSED_DIR)" $(ADD_GLYPHS_FLAGS)
|
||||
|
||||
%.ttf: %.ttx
|
||||
@rm -f "$@"
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
<GlyphID id="1" name="null"/>
|
||||
<GlyphID id="2" name="nonmarkingreturn"/>
|
||||
<GlyphID id="3" name="space"/>
|
||||
<GlyphID id="4" name="u200D"/>
|
||||
<GlyphID id="4" name="uni200D"/>
|
||||
<GlyphID id="5" name="uE0030"/>
|
||||
<GlyphID id="6" name="uE0031"/>
|
||||
<GlyphID id="7" name="uE0032"/>
|
||||
|
@ -191,72 +191,6 @@
|
|||
<mtx name="null" width="0" lsb="0"/>
|
||||
<mtx name="nonmarkingreturn" width="2550" lsb="0"/>
|
||||
<mtx name="space" width="2550" lsb="0"/>
|
||||
<mtx name="u200D" width="0" lsb="0"/>
|
||||
<mtx name="uE0030" width="0" lsb="0"/>
|
||||
<mtx name="uE0031" width="0" lsb="0"/>
|
||||
<mtx name="uE0032" width="0" lsb="0"/>
|
||||
<mtx name="uE0033" width="0" lsb="0"/>
|
||||
<mtx name="uE0034" width="0" lsb="0"/>
|
||||
<mtx name="uE0035" width="0" lsb="0"/>
|
||||
<mtx name="uE0036" width="0" lsb="0"/>
|
||||
<mtx name="uE0037" width="0" lsb="0"/>
|
||||
<mtx name="uE0038" width="0" lsb="0"/>
|
||||
<mtx name="uE0039" width="0" lsb="0"/>
|
||||
<mtx name="uE0061" width="0" lsb="0"/>
|
||||
<mtx name="uE0062" width="0" lsb="0"/>
|
||||
<mtx name="uE0063" width="0" lsb="0"/>
|
||||
<mtx name="uE0064" width="0" lsb="0"/>
|
||||
<mtx name="uE0065" width="0" lsb="0"/>
|
||||
<mtx name="uE0066" width="0" lsb="0"/>
|
||||
<mtx name="uE0067" width="0" lsb="0"/>
|
||||
<mtx name="uE0068" width="0" lsb="0"/>
|
||||
<mtx name="uE0069" width="0" lsb="0"/>
|
||||
<mtx name="uE006A" width="0" lsb="0"/>
|
||||
<mtx name="uE006B" width="0" lsb="0"/>
|
||||
<mtx name="uE006C" width="0" lsb="0"/>
|
||||
<mtx name="uE006D" width="0" lsb="0"/>
|
||||
<mtx name="uE006E" width="0" lsb="0"/>
|
||||
<mtx name="uE006F" width="0" lsb="0"/>
|
||||
<mtx name="uE0070" width="0" lsb="0"/>
|
||||
<mtx name="uE0071" width="0" lsb="0"/>
|
||||
<mtx name="uE0072" width="0" lsb="0"/>
|
||||
<mtx name="uE0073" width="0" lsb="0"/>
|
||||
<mtx name="uE0074" width="0" lsb="0"/>
|
||||
<mtx name="uE0075" width="0" lsb="0"/>
|
||||
<mtx name="uE0076" width="0" lsb="0"/>
|
||||
<mtx name="uE0077" width="0" lsb="0"/>
|
||||
<mtx name="uE0078" width="0" lsb="0"/>
|
||||
<mtx name="uE0079" width="0" lsb="0"/>
|
||||
<mtx name="uE007A" width="0" lsb="0"/>
|
||||
<mtx name="uE007F" width="0" lsb="0"/>
|
||||
<mtx name="u1F3F4" width="0" lsb="0"/>
|
||||
<mtx name="uFE82B" width="0" lsb="0"/>
|
||||
<mtx name="u1F1E6" width="0" lsb="0"/>
|
||||
<mtx name="u1F1E7" width="0" lsb="0"/>
|
||||
<mtx name="u1F1E8" width="0" lsb="0"/>
|
||||
<mtx name="u1F1E9" width="0" lsb="0"/>
|
||||
<mtx name="u1F1EA" width="0" lsb="0"/>
|
||||
<mtx name="u1F1EB" width="0" lsb="0"/>
|
||||
<mtx name="u1F1EC" width="0" lsb="0"/>
|
||||
<mtx name="u1F1ED" width="0" lsb="0"/>
|
||||
<mtx name="u1F1EE" width="0" lsb="0"/>
|
||||
<mtx name="u1F1EF" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F0" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F1" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F2" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F3" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F4" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F5" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F6" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F7" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F8" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F9" width="0" lsb="0"/>
|
||||
<mtx name="u1F1FA" width="0" lsb="0"/>
|
||||
<mtx name="u1F1FB" width="0" lsb="0"/>
|
||||
<mtx name="u1F1FC" width="0" lsb="0"/>
|
||||
<mtx name="u1F1FD" width="0" lsb="0"/>
|
||||
<mtx name="u1F1FE" width="0" lsb="0"/>
|
||||
<mtx name="u1F1FF" width="0" lsb="0"/>
|
||||
</hmtx>
|
||||
|
||||
<cmap>
|
||||
|
@ -265,72 +199,6 @@
|
|||
<map code="0x0" name="null"/><!-- <control> -->
|
||||
<map code="0xd" name="nonmarkingreturn"/>
|
||||
<map code="0x20" name="space"/>
|
||||
<map code="0x200d" name="u200D"/>
|
||||
<map code="0xE0030" name="uE0030"/>
|
||||
<map code="0xE0031" name="uE0031"/>
|
||||
<map code="0xE0032" name="uE0032"/>
|
||||
<map code="0xE0033" name="uE0033"/>
|
||||
<map code="0xE0034" name="uE0034"/>
|
||||
<map code="0xE0035" name="uE0035"/>
|
||||
<map code="0xE0036" name="uE0036"/>
|
||||
<map code="0xE0037" name="uE0037"/>
|
||||
<map code="0xE0038" name="uE0038"/>
|
||||
<map code="0xE0039" name="uE0039"/>
|
||||
<map code="0xE0061" name="uE0061"/>
|
||||
<map code="0xE0062" name="uE0062"/>
|
||||
<map code="0xE0063" name="uE0063"/>
|
||||
<map code="0xE0064" name="uE0064"/>
|
||||
<map code="0xE0065" name="uE0065"/>
|
||||
<map code="0xE0066" name="uE0066"/>
|
||||
<map code="0xE0067" name="uE0067"/>
|
||||
<map code="0xE0068" name="uE0068"/>
|
||||
<map code="0xE0069" name="uE0069"/>
|
||||
<map code="0xE006A" name="uE006A"/>
|
||||
<map code="0xE006B" name="uE006B"/>
|
||||
<map code="0xE006C" name="uE006C"/>
|
||||
<map code="0xE006D" name="uE006D"/>
|
||||
<map code="0xE006E" name="uE006E"/>
|
||||
<map code="0xE006F" name="uE006F"/>
|
||||
<map code="0xE0070" name="uE0070"/>
|
||||
<map code="0xE0071" name="uE0071"/>
|
||||
<map code="0xE0072" name="uE0072"/>
|
||||
<map code="0xE0073" name="uE0073"/>
|
||||
<map code="0xE0074" name="uE0074"/>
|
||||
<map code="0xE0075" name="uE0075"/>
|
||||
<map code="0xE0076" name="uE0076"/>
|
||||
<map code="0xE0077" name="uE0077"/>
|
||||
<map code="0xE0078" name="uE0078"/>
|
||||
<map code="0xE0079" name="uE0079"/>
|
||||
<map code="0xE007A" name="uE007A"/>
|
||||
<map code="0xE007F" name="uE007F"/>
|
||||
<map code="0x1F3F4" name="u1F3F4"/>
|
||||
<map code="0xFE82B" name="uFE82B"/>
|
||||
<map code="0x1F1E6" name="u1F1E6"/>
|
||||
<map code="0x1F1E7" name="u1F1E7"/>
|
||||
<map code="0x1F1E8" name="u1F1E8"/>
|
||||
<map code="0x1F1E9" name="u1F1E9"/>
|
||||
<map code="0x1F1EA" name="u1F1EA"/>
|
||||
<map code="0x1F1EB" name="u1F1EB"/>
|
||||
<map code="0x1F1EC" name="u1F1EC"/>
|
||||
<map code="0x1F1ED" name="u1F1ED"/>
|
||||
<map code="0x1F1EE" name="u1F1EE"/>
|
||||
<map code="0x1F1EF" name="u1F1EF"/>
|
||||
<map code="0x1F1F0" name="u1F1F0"/>
|
||||
<map code="0x1F1F1" name="u1F1F1"/>
|
||||
<map code="0x1F1F2" name="u1F1F2"/>
|
||||
<map code="0x1F1F3" name="u1F1F3"/>
|
||||
<map code="0x1F1F4" name="u1F1F4"/>
|
||||
<map code="0x1F1F5" name="u1F1F5"/>
|
||||
<map code="0x1F1F6" name="u1F1F6"/>
|
||||
<map code="0x1F1F7" name="u1F1F7"/>
|
||||
<map code="0x1F1F8" name="u1F1F8"/>
|
||||
<map code="0x1F1F9" name="u1F1F9"/>
|
||||
<map code="0x1F1FA" name="u1F1FA"/>
|
||||
<map code="0x1F1FB" name="u1F1FB"/>
|
||||
<map code="0x1F1FC" name="u1F1FC"/>
|
||||
<map code="0x1F1FD" name="u1F1FD"/>
|
||||
<map code="0x1F1FE" name="u1F1FE"/>
|
||||
<map code="0x1F1FF" name="u1F1FF"/>
|
||||
</cmap_format_12>
|
||||
</cmap>
|
||||
|
||||
|
|
|
@ -36,9 +36,14 @@ def seq_to_str(seq):
|
|||
|
||||
|
||||
def read_emoji_aliases():
|
||||
alias_path = path.join(DATA_ROOT, 'emoji_aliases.txt')
|
||||
return read_emoji_aliases(alias_path)
|
||||
|
||||
|
||||
def read_emoji_aliases(filename):
|
||||
result = {}
|
||||
|
||||
with open(path.join(DATA_ROOT, 'emoji_aliases.txt'), 'r') as f:
|
||||
with open(filename, 'r') as f:
|
||||
for line in f:
|
||||
ix = line.find('#')
|
||||
if (ix > -1):
|
||||
|
@ -47,8 +52,8 @@ def read_emoji_aliases():
|
|||
if not line:
|
||||
continue
|
||||
als, trg = (s.strip() for s in line.split(';'))
|
||||
als_seq = tuple([int(x, 16) for x in als.split('_')])
|
||||
try:
|
||||
als_seq = tuple([int(x, 16) for x in als.split('_')])
|
||||
trg_seq = tuple([int(x, 16) for x in trg.split('_')])
|
||||
except:
|
||||
print 'cannot process alias %s -> %s' % (als, trg)
|
||||
|
|
|
@ -0,0 +1,403 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""Extend a ttx file with additional data.
|
||||
|
||||
Takes a ttx file and one or more directories containing image files named
|
||||
after sequences of codepoints, extends the cmap, hmtx, GSUB, and GlyphOrder
|
||||
tables in the source ttx file based on these sequences, and writes out a new
|
||||
ttx file.
|
||||
|
||||
This can also apply aliases from an alias file."""
|
||||
|
||||
import argparse
|
||||
import collections
|
||||
import os
|
||||
from os import path
|
||||
import re
|
||||
import sys
|
||||
|
||||
from fontTools import ttx
|
||||
from fontTools.ttLib.tables import otTables
|
||||
|
||||
import add_emoji_gsub
|
||||
import add_aliases
|
||||
|
||||
sys.path.append(
|
||||
path.join(os.path.dirname(__file__), 'third_party', 'color_emoji'))
|
||||
from png import PNG
|
||||
|
||||
|
||||
def get_seq_to_file(image_dir, prefix, suffix):
|
||||
"""Return a mapping from codepoint sequences to files in the given directory,
|
||||
for files that match the prefix and suffix. File names with this prefix and
|
||||
suffix should consist of codepoints in hex separated by underscore. 'fe0f'
|
||||
(the codepoint of the emoji presentation variation selector) is stripped from
|
||||
the sequence.
|
||||
"""
|
||||
start = len(prefix)
|
||||
limit = -len(suffix)
|
||||
seq_to_file = {}
|
||||
for name in os.listdir(image_dir):
|
||||
if not (name.startswith(prefix) and name.endswith(suffix)):
|
||||
continue
|
||||
try:
|
||||
cps = [int(s, 16) for s in name[start:limit].split('_')]
|
||||
seq = tuple(cp for cp in cps if cp != 0xfe0f)
|
||||
except:
|
||||
raise Exception('could not parse "%s"' % name)
|
||||
for cp in cps:
|
||||
if not (0 <= cp <= 0x10ffff):
|
||||
raise Exception('bad codepoint(s) in "%s"' % name)
|
||||
if seq in seq_to_file:
|
||||
raise Exception('duplicate sequence for "%s" in %s' % (name, image_dir))
|
||||
seq_to_file[seq] = path.join(image_dir, name)
|
||||
return seq_to_file
|
||||
|
||||
|
||||
def collect_seq_to_file(image_dirs, prefix, suffix):
|
||||
"""Return a sequence to file mapping by calling get_seq_to_file on a list
|
||||
of directories. When sequences for files in later directories match those
|
||||
from earlier directories, the later file replaces the earlier one.
|
||||
"""
|
||||
seq_to_file = {}
|
||||
for image_dir in image_dirs:
|
||||
seq_to_file.update(get_seq_to_file(image_dir, prefix, suffix))
|
||||
return seq_to_file
|
||||
|
||||
|
||||
def remap_values(seq_to_file, map_fn):
|
||||
return {k: map_fn(v) for k, v in seq_to_file.iteritems()}
|
||||
|
||||
|
||||
def get_png_file_to_advance_mapper(lineheight):
|
||||
def map_fn(filename):
|
||||
wid, ht = PNG(filename).get_size()
|
||||
return int(round(float(lineheight) * wid / ht))
|
||||
return map_fn
|
||||
|
||||
|
||||
def cp_name(cp):
|
||||
"""return uniXXXX or uXXXXX(X) as a name for the glyph mapped to this cp."""
|
||||
return '%s%04X' % ('u' if cp > 0xffff else 'uni', cp)
|
||||
|
||||
|
||||
def seq_name(seq):
|
||||
"""Sequences of length one get the cp_name. Others start with 'u' followed by
|
||||
two or more 4-to-6-digit hex strings separated by underscore."""
|
||||
if len(seq) == 1:
|
||||
return cp_name(seq[0])
|
||||
return 'u' + '_'.join('%04X' % cp for cp in seq)
|
||||
|
||||
|
||||
def collect_cps(seqs):
|
||||
cps = set()
|
||||
for seq in seqs:
|
||||
cps.update(seq)
|
||||
return cps
|
||||
|
||||
|
||||
def get_glyphorder_cps_and_truncate(glyphOrder):
|
||||
"""This scans glyphOrder for names that correspond to a single codepoint
|
||||
using the 'u(ni)XXXXXX' syntax. All names that don't match are moved
|
||||
to the front the glyphOrder list in their original order, and the
|
||||
list is truncated. The ones that do match are returned as a set of
|
||||
codepoints."""
|
||||
glyph_name_re = re.compile(r'^u(?:ni)?([0-9a-fA-F]{4,6})$')
|
||||
cps = set()
|
||||
write_ix = 0
|
||||
for ix, name in enumerate(glyphOrder):
|
||||
m = glyph_name_re.match(name)
|
||||
if m:
|
||||
cps.add(int(m.group(1), 16))
|
||||
else:
|
||||
glyphOrder[write_ix] = name
|
||||
write_ix += 1
|
||||
del glyphOrder[write_ix:]
|
||||
return cps
|
||||
|
||||
|
||||
def get_all_seqs(font, seq_to_advance):
|
||||
"""Copies the sequences from seq_to_advance and extends it with single-
|
||||
codepoint sequences from the GlyphOrder table as well as those internal
|
||||
to sequences in seq_to_advance. Reduces the GlyphOrder table. """
|
||||
|
||||
all_seqs = set(seq_to_advance.keys())
|
||||
# using collect_cps includes cps internal to a seq
|
||||
cps = collect_cps(all_seqs)
|
||||
glyphOrder = font.getGlyphOrder()
|
||||
# extract cps in glyphOrder and reduce glyphOrder to only those that remain
|
||||
glyphOrder_cps = get_glyphorder_cps_and_truncate(glyphOrder)
|
||||
cps.update(glyphOrder_cps)
|
||||
# add new single codepoint sequences from glyphOrder and sequences
|
||||
all_seqs.update((cp,) for cp in cps)
|
||||
return all_seqs
|
||||
|
||||
|
||||
def get_font_cmap(font):
|
||||
"""Return the first cmap in the font, we assume it exists and is a unicode
|
||||
cmap."""
|
||||
return font['cmap'].tables[0].cmap
|
||||
|
||||
|
||||
def add_glyph_data(font, seqs, seq_to_advance):
|
||||
"""Add hmtx and GlyphOrder data for all sequences in seqs, and ensures there's
|
||||
a cmap entry for each single-codepoint sequence. Seqs not in seq_to_advance
|
||||
will get a zero advance."""
|
||||
|
||||
# We allow the template cmap to omit mappings for single-codepoint glyphs
|
||||
# defined in the template's GlyphOrder table. Similarly, the hmtx table can
|
||||
# omit advances. We assume glyphs named 'uniXXXX' or 'uXXXXX(X)' in the
|
||||
# GlyphOrder table correspond to codepoints based on the name; we don't
|
||||
# attempt to handle other types of names and these must occur in the cmap and
|
||||
# hmtx tables in the template.
|
||||
#
|
||||
# seq_to_advance maps sequences (including single codepoints) to advances.
|
||||
# All codepoints in these sequences will be added to the cmap. Some cps
|
||||
# in these sequences have no corresponding single-codepoint sequence, they
|
||||
# will also get added.
|
||||
#
|
||||
# The added codepoints have no advance information, so will get a zero
|
||||
# advance.
|
||||
|
||||
cmap = get_font_cmap(font)
|
||||
hmtx = font['hmtx'].metrics
|
||||
|
||||
# We don't expect sequences to be in the glyphOrder, since we removed all the
|
||||
# single-cp sequences from it and don't expect it to already contain names
|
||||
# corresponding to multiple-cp sequencess. But just in case, we use
|
||||
# reverseGlyphMap to avoid duplicating names accidentally.
|
||||
|
||||
updatedGlyphOrder = False
|
||||
reverseGlyphMap = font.getReverseGlyphMap()
|
||||
|
||||
# Order the glyphs by grouping all the single-codepoint sequences first,
|
||||
# then order by sequence so that related sequences are together. We group
|
||||
# by single-codepoint sequence first in order to keep these glyphs together--
|
||||
# they're used in the coverage tables for some of the substitutions, and
|
||||
# those tables can be more compact this way.
|
||||
for seq in sorted(seqs, key=lambda s: (0 if len(s) == 1 else 1, s)):
|
||||
name = seq_name(seq)
|
||||
if len(seq) == 1:
|
||||
cmap[seq[0]] = name
|
||||
advance = seq_to_advance.get(seq, 0)
|
||||
hmtx[name] = [advance, 0]
|
||||
if name not in reverseGlyphMap:
|
||||
font.glyphOrder.append(name)
|
||||
updatedGlyphOrder=True
|
||||
|
||||
if updatedGlyphOrder:
|
||||
delattr(font, '_reverseGlyphOrderDict')
|
||||
|
||||
|
||||
def add_aliases_to_cmap(font, aliases):
|
||||
"""Some aliases might map a single codepoint to some other sequence. These
|
||||
should map directly to the glyph for that sequence in the cmap. (Others will
|
||||
map via GSUB).
|
||||
"""
|
||||
if not aliases:
|
||||
return
|
||||
|
||||
cp_aliases = [seq for seq in aliases if len(seq) == 1]
|
||||
if not cp_aliases:
|
||||
return
|
||||
|
||||
cmap = get_font_cmap(font)
|
||||
for src_seq in cp_aliases:
|
||||
cp = src_seq[0]
|
||||
name = seq_name(aliases[src_seq])
|
||||
cmap[cp] = name
|
||||
|
||||
|
||||
def get_rtl_seq(seq):
|
||||
"""Return the rtl variant of the sequence, if it has one, else the empty
|
||||
sequence.
|
||||
"""
|
||||
# Sequences with ZWJ or TAG_END in them will reflect. Fitzpatrick modifiers
|
||||
# however do not, so if we reflect we make a pass to swap them back into their
|
||||
# logical order.
|
||||
|
||||
ZWJ = 0x200d
|
||||
TAG_END = 0xe007f
|
||||
def is_fitzpatrick(cp):
|
||||
return 0x1f3fb <= cp <= 0x1f3ff
|
||||
|
||||
if not (ZWJ in seq or TAG_END in seq):
|
||||
return ()
|
||||
|
||||
rev_seq = list(seq)
|
||||
rev_seq.reverse()
|
||||
for i in xrange(1, len(rev_seq)):
|
||||
if is_fitzpatrick(rev_seq[i-1]):
|
||||
tmp = rev_seq[i]
|
||||
rev_seq[i] = rev_seq[i-1]
|
||||
rev_seq[i-1] = tmp
|
||||
return tuple(rev_seq)
|
||||
|
||||
|
||||
def get_gsub_ligature_lookup(font):
|
||||
"""If the font does not have a GSUB table, create one with a ligature
|
||||
substitution lookup. If it does, ensure the first lookup is a properly
|
||||
initialized ligature substitution lookup. Return the lookup."""
|
||||
|
||||
# The template might include more lookups after lookup 0, if it has a
|
||||
# GSUB table.
|
||||
if 'GSUB' not in font:
|
||||
ligature_subst = otTables.LigatureSubst()
|
||||
ligature_subst.ligatures = {}
|
||||
|
||||
lookup = otTables.Lookup()
|
||||
lookup.LookupType = 4
|
||||
lookup.LookupFlag = 0
|
||||
lookup.SubTableCount = 1
|
||||
lookup.SubTable = [ligature_subst]
|
||||
|
||||
font['GSUB'] = add_emoji_gsub.create_simple_gsub([lookup])
|
||||
else:
|
||||
lookup = font['GSUB'].table.LookupList.Lookup[0]
|
||||
assert lookup.LookupFlag == 0
|
||||
|
||||
# importXML doesn't fully init GSUB structures, so help it out
|
||||
if not hasattr(lookup, 'LookupType'):
|
||||
st = lookup.SubTable[0]
|
||||
assert st.LookupType == 4
|
||||
setattr(lookup, 'LookupType', 4)
|
||||
|
||||
if not hasattr(st, 'ligatures'):
|
||||
setattr(st, 'ligatures', {})
|
||||
|
||||
return lookup
|
||||
|
||||
|
||||
def add_ligature_sequences(font, seqs, aliases):
|
||||
"""Add ligature sequences."""
|
||||
|
||||
seq_to_target_name = {
|
||||
seq: seq_name(seq) for seq in seqs if len(seq) > 1}
|
||||
if aliases:
|
||||
seq_to_target_name.update({
|
||||
seq: seq_name(aliases[seq]) for seq in aliases if len(seq) > 1})
|
||||
if not seq_to_target_name:
|
||||
return
|
||||
|
||||
rtl_seq_to_target_name = {
|
||||
get_rtl_seq(seq): name for seq, name in seq_to_target_name.iteritems()}
|
||||
seq_to_target_name.update(rtl_seq_to_target_name)
|
||||
# sequences that don't have rtl variants get mapped to the empty sequence,
|
||||
# delete it.
|
||||
if () in seq_to_target_name:
|
||||
del seq_to_target_name[()]
|
||||
|
||||
# organize by first codepoint in sequence
|
||||
keyed_ligatures = collections.defaultdict(list)
|
||||
for t in seq_to_target_name.iteritems():
|
||||
first_cp = t[0][0]
|
||||
keyed_ligatures[first_cp].append(t)
|
||||
|
||||
def add_ligature(lookup, cmap, seq, name):
|
||||
# The sequences consist of codepoints, but the entries in the ligature table
|
||||
# are glyph names. Aliasing can give single codepoints names based on
|
||||
# sequences (e.g. 'guardsman' with 'male guardsman') so we map the
|
||||
# codepoints through the cmap to get the glyph names.
|
||||
glyph_names = [cmap[cp] for cp in seq]
|
||||
|
||||
lig = otTables.Ligature()
|
||||
lig.CompCount = len(seq)
|
||||
lig.Component = glyph_names[1:]
|
||||
lig.LigGlyph = name
|
||||
|
||||
ligatures = lookup.SubTable[0].ligatures
|
||||
first_name = glyph_names[0]
|
||||
try:
|
||||
ligatures[first_name].append(lig)
|
||||
except KeyError:
|
||||
ligatures[first_name] = [lig]
|
||||
|
||||
lookup = get_gsub_ligature_lookup(font)
|
||||
cmap = get_font_cmap(font)
|
||||
for first_cp in sorted(keyed_ligatures):
|
||||
pairs = keyed_ligatures[first_cp]
|
||||
|
||||
# Sort longest first, this ensures longer sequences with common prefixes
|
||||
# are handled before shorter ones. The secondary sort is a standard
|
||||
# sort on the codepoints in the sequence.
|
||||
pairs.sort(key = lambda pair: (-len(pair[0]), pair[0]))
|
||||
for seq, name in pairs:
|
||||
add_ligature(lookup, cmap, seq, name)
|
||||
|
||||
|
||||
def update_font_data(font, seq_to_advance, aliases):
|
||||
"""Update the font's cmap, hmtx, GSUB, and GlyphOrder tables."""
|
||||
seqs = get_all_seqs(font, seq_to_advance)
|
||||
add_glyph_data(font, seqs, seq_to_advance)
|
||||
add_aliases_to_cmap(font, aliases)
|
||||
add_ligature_sequences(font, seqs, aliases)
|
||||
|
||||
|
||||
def apply_aliases(seq_dict, aliases):
|
||||
"""Aliases is a mapping from sequence to replacement sequence. We can use
|
||||
an alias if the target is a key in the dictionary. Furthermore, if the
|
||||
source is a key in the dictionary, we can delete it. This updates the
|
||||
dictionary and returns the usable aliases."""
|
||||
usable_aliases = {}
|
||||
for k, v in aliases.iteritems():
|
||||
if v in seq_dict:
|
||||
usable_aliases[k] = v
|
||||
if k in seq_dict:
|
||||
del seq_dict[k]
|
||||
return usable_aliases
|
||||
|
||||
|
||||
def update_ttx(in_file, out_file, image_dirs, prefix, ext, aliases_file):
|
||||
if ext != '.png':
|
||||
raise Exception('extension "%s" not supported' % ext)
|
||||
|
||||
seq_to_file = collect_seq_to_file(image_dirs, prefix, ext)
|
||||
if not seq_to_file:
|
||||
raise ValueError(
|
||||
'no sequences with prefix "%s" and extension "%s" in %s' % (
|
||||
prefix, ext, ', '.join(image_dirs)))
|
||||
|
||||
aliases = None
|
||||
if aliases_file:
|
||||
aliases = add_aliases.read_emoji_aliases(aliases_file)
|
||||
aliases = apply_aliases(seq_to_file, aliases)
|
||||
|
||||
font = ttx.TTFont()
|
||||
font.importXML(in_file)
|
||||
|
||||
lineheight = font['hhea'].ascent - font['hhea'].descent
|
||||
map_fn = get_png_file_to_advance_mapper(lineheight)
|
||||
seq_to_advance = remap_values(seq_to_file, map_fn)
|
||||
|
||||
update_font_data(font, seq_to_advance, aliases)
|
||||
|
||||
font.saveXML(out_file)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
'-f', '--in_file', help='ttx input file', metavar='file', required=True)
|
||||
parser.add_argument(
|
||||
'-o', '--out_file', help='ttx output file', metavar='file', required=True)
|
||||
parser.add_argument(
|
||||
'-d', '--image_dirs', help='directories containing image files',
|
||||
nargs='+', metavar='dir', required=True)
|
||||
parser.add_argument(
|
||||
'-p', '--prefix', help='file prefix (default "emoji_u")',
|
||||
metavar='pfx', default='emoji_u')
|
||||
parser.add_argument(
|
||||
'-e', '--ext', help='file extension (default ".png", currently only '
|
||||
'".png" is supported', metavar='ext', default='.png')
|
||||
parser.add_argument(
|
||||
'-a', '--aliases', help='process alias table', const='emoji_aliases.txt',
|
||||
nargs='?', metavar='file')
|
||||
args = parser.parse_args()
|
||||
|
||||
update_ttx(
|
||||
args.in_file, args.out_file, args.image_dirs, args.prefix, args.ext,
|
||||
args.aliases)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,4 +1,6 @@
|
|||
# alias table
|
||||
# from;to
|
||||
# the 'from' sequence should be represented by the image for the 'to' sequence
|
||||
# 'fe0f' is not in these sequences
|
||||
1f3c3;1f3c3_200d_2642 # RUNNER -> man running
|
||||
1f3c3_1f3fb;1f3c3_1f3fb_200d_2642 # light skin tone
|
||||
|
@ -192,3 +194,10 @@
|
|||
26f9_1f3fe;26f9_1f3fe_200d_2642 # medium-dark skin tone
|
||||
26f9_1f3ff;26f9_1f3ff_200d_2642 # dark skin tone
|
||||
fe82b;unknown_flag # no name -> no name
|
||||
|
||||
# flag aliases
|
||||
1f1e7_1f1fb;1f1f3_1f1f4 # BV -> NO
|
||||
1f1e8_1f1f5;1f1eb_1f1f7 # CP -> FR
|
||||
1f1ed_1f1f2;1f1e6_1f1fa # HM -> AU
|
||||
1f1f8_1f1ef;1f1f3_1f1f4 # SJ -> NO
|
||||
1f1fa_1f1f2;1f1fa_1f1f8 # UM -> US
|
||||
|
|
|
@ -1,282 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import collections, glob, os, re, sys
|
||||
from fontTools import ttx
|
||||
from fontTools.ttLib.tables import otTables
|
||||
from png import PNG
|
||||
|
||||
# PUA character for unknown flag. This avoids the legacy emoji pua values, but
|
||||
# is in the same area.
|
||||
UNKNOWN_FLAG_GLYPH_NAME = "uFE82B"
|
||||
|
||||
sys.path.append(
|
||||
os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
|
||||
import add_emoji_gsub
|
||||
|
||||
|
||||
def is_vs(cp):
|
||||
return cp >= 0xfe00 and cp <= 0xfe0f
|
||||
|
||||
def is_fitzpatrick(gname):
|
||||
cp = int(gname[1:], 16)
|
||||
return 0x1f3fb <= cp <= 0x1f3ff
|
||||
|
||||
def codes_to_string(codes):
|
||||
if "_" in codes:
|
||||
pieces = codes.split ("_")
|
||||
string = "".join ([unichr (int (code, 16)) for code in pieces])
|
||||
else:
|
||||
try:
|
||||
string = unichr (int (codes, 16))
|
||||
except:
|
||||
raise ValueError("uh-oh, no unichr for '%s'" % codes)
|
||||
return string
|
||||
|
||||
|
||||
def glyph_sequence(string):
|
||||
# sequence of names of glyphs that form a ligature
|
||||
# variation selectors are stripped
|
||||
return ["u%04X" % ord(char) for char in string if not is_vs(ord(char))]
|
||||
|
||||
|
||||
def glyph_name(string):
|
||||
# name of a ligature
|
||||
# includes variation selectors when present
|
||||
return "_".join (["u%04X" % ord (char) for char in string])
|
||||
|
||||
|
||||
def add_ligature (font, seq, name):
|
||||
if 'GSUB' not in font:
|
||||
ligature_subst = otTables.LigatureSubst()
|
||||
ligature_subst.ligatures = {}
|
||||
|
||||
lookup = otTables.Lookup()
|
||||
lookup.LookupType = 4
|
||||
lookup.LookupFlag = 0
|
||||
lookup.SubTableCount = 1
|
||||
lookup.SubTable = [ligature_subst]
|
||||
|
||||
font['GSUB'] = add_emoji_gsub.create_simple_gsub([lookup])
|
||||
else:
|
||||
lookup = font['GSUB'].table.LookupList.Lookup[0]
|
||||
# assert lookup.LookupType == 4
|
||||
assert lookup.LookupFlag == 0
|
||||
|
||||
# importXML doesn't fully init GSUB structures, so help it out
|
||||
if not hasattr(lookup, 'LookupType'):
|
||||
st = lookup.SubTable[0]
|
||||
assert st.LookupType == 4
|
||||
setattr(lookup, 'LookupType', 4)
|
||||
|
||||
if not hasattr(st, 'ligatures'):
|
||||
setattr(st, 'ligatures', {})
|
||||
|
||||
ligatures = lookup.SubTable[0].ligatures
|
||||
|
||||
lig = otTables.Ligature()
|
||||
lig.CompCount = len(seq)
|
||||
lig.Component = seq[1:]
|
||||
lig.LigGlyph = name
|
||||
|
||||
first = seq[0]
|
||||
try:
|
||||
ligatures[first].append(lig)
|
||||
except KeyError:
|
||||
ligatures[first] = [lig]
|
||||
|
||||
|
||||
# Ligating sequences for emoji that already have a defined codepoint,
|
||||
# to match the sequences for the related emoji with no codepoint.
|
||||
# The key is the name of the glyph with the codepoint, the value is the
|
||||
# name of the sequence in filename form.
|
||||
EXTRA_SEQUENCES = {
|
||||
'u1F46A': '1F468_200D_1F469_200D_1F466', # MWB
|
||||
'u1F491': '1F469_200D_2764_FE0F_200D_1F468', # WHM
|
||||
'u1F48F': '1F469_200D_2764_FE0F_200D_1F48B_200D_1F468', # WHKM
|
||||
}
|
||||
|
||||
# Flag aliases - from: to
|
||||
FLAG_ALIASES = {
|
||||
'BV': 'NO',
|
||||
'CP': 'FR',
|
||||
'HM': 'AU',
|
||||
'SJ': 'NO',
|
||||
'UM': 'US',
|
||||
}
|
||||
|
||||
if len (sys.argv) < 4:
|
||||
print >>sys.stderr, """
|
||||
Usage:
|
||||
|
||||
add_glyphs.py font.ttx out-font.ttx strike-prefix...
|
||||
|
||||
This will search for files that have strike-prefix followed by one or more
|
||||
hex numbers (separated by underscore if more than one), and end in ".png".
|
||||
For example, if strike-prefix is "icons/u", then files with names like
|
||||
"icons/u1F4A9.png" or "icons/u1F1EF_1F1F5.png" will be loaded. The script
|
||||
then adds cmap, htmx, and potentially GSUB entries for the Unicode
|
||||
characters found. The advance width will be chosen based on image aspect
|
||||
ratio. If Unicode values outside the BMP are desired, the existing cmap
|
||||
table should be of the appropriate (format 12) type. Only the first cmap
|
||||
table and the first GSUB lookup (if existing) are modified.
|
||||
"""
|
||||
sys.exit (1)
|
||||
|
||||
in_file = sys.argv[1]
|
||||
out_file = sys.argv[2]
|
||||
img_prefixen = sys.argv[3:]
|
||||
del sys.argv
|
||||
|
||||
font = ttx.TTFont()
|
||||
font.importXML (in_file)
|
||||
|
||||
img_files = {}
|
||||
for img_prefix in img_prefixen:
|
||||
glb = "%s*.png" % img_prefix
|
||||
print "Looking for images matching '%s'." % glb
|
||||
for img_file in glob.glob (glb):
|
||||
codes = img_file[len (img_prefix):-4]
|
||||
u = codes_to_string(codes)
|
||||
if u in img_files:
|
||||
print 'overwriting %s with %s' % (img_files[u], img_file)
|
||||
img_files[u] = img_file
|
||||
if not img_files:
|
||||
raise Exception ("No image files found in '%s'." % glb)
|
||||
|
||||
ascent = font['hhea'].ascent
|
||||
descent = -font['hhea'].descent
|
||||
|
||||
g = font['GlyphOrder'].glyphOrder
|
||||
c = font['cmap'].tables[0].cmap
|
||||
h = font['hmtx'].metrics
|
||||
|
||||
# Sort the characters by length, then codepoint, to keep the order stable
|
||||
# and avoid adding empty glyphs for multi-character glyphs if any piece is
|
||||
# also included.
|
||||
img_pairs = img_files.items ()
|
||||
img_pairs.sort (key=lambda pair: (len (pair[0]), pair[0]))
|
||||
|
||||
glyph_names = set()
|
||||
ligatures = {}
|
||||
|
||||
def add_lig_sequence(ligatures, seq, n):
|
||||
# We have emoji sequences using regional indicator symbols, tags,
|
||||
# ZWJ, fitzpatrick modifiers, and combinations of ZWJ and fitzpatrick
|
||||
# modifiers. Currently, Harfbuzz special-cases the fitzpatrick
|
||||
# modifiers to treat them as combining marks instead of as Other
|
||||
# Neutral, which unicode says they are, and processes them
|
||||
# in visual order (at least in some circumstances). So to handle
|
||||
# emoji sequences in an RTL context we need GSUB sequences that match
|
||||
# this order.
|
||||
# Regional indicator symbols are LTR, and emoji+fitzpatrick are
|
||||
# effectively LTR, so we only reorder sequences with ZWJ or tags. If
|
||||
# however the ZWJ sequence has fitzpatrick modifiers, those need to
|
||||
# still follow the emoji they logically follow, so simply reversing the
|
||||
# sequence doesn't work. This code assumes the lig sequence is valid.
|
||||
tseq = tuple(seq)
|
||||
if tseq in ligatures:
|
||||
print 'lig sequence %s, replace %s with %s' % (
|
||||
tseq, ligatures[tseq], n)
|
||||
ligatures[tseq] = n
|
||||
if 'u200D' in seq or 'uE007F' in seq:
|
||||
rev_seq = seq[:]
|
||||
rev_seq.reverse()
|
||||
for i in xrange(1, len(rev_seq)):
|
||||
if is_fitzpatrick(rev_seq[i - 1]):
|
||||
tmp = rev_seq[i]
|
||||
rev_seq[i] = rev_seq[i-1]
|
||||
rev_seq[i-1] = tmp
|
||||
|
||||
trseq = tuple(rev_seq)
|
||||
# if trseq in ligatures:
|
||||
# print 'rev lig sequence %s, replace %s with %s' % (
|
||||
# trseq, ligatures[trseq], n)
|
||||
ligatures[trseq] = n
|
||||
|
||||
|
||||
for (u, filename) in img_pairs:
|
||||
n = glyph_name (u)
|
||||
glyph_names.add(n)
|
||||
# print "Adding glyph for %s" % n
|
||||
|
||||
g.append (n)
|
||||
for char in u:
|
||||
cp = ord(char)
|
||||
if cp not in c and not is_vs(cp):
|
||||
name = glyph_name (char)
|
||||
if name not in glyph_names:
|
||||
g.append(name)
|
||||
c[cp] = name
|
||||
if len (u) > 1:
|
||||
h[name] = [0, 0]
|
||||
(img_width, img_height) = PNG (filename).get_size ()
|
||||
advance = int (round ((float (ascent+descent) * img_width / img_height)))
|
||||
h[n] = [advance, 0]
|
||||
if len (u) > 1:
|
||||
seq = glyph_sequence(u)
|
||||
add_lig_sequence(ligatures, seq, n)
|
||||
|
||||
for n in EXTRA_SEQUENCES:
|
||||
if n in glyph_names:
|
||||
seq = glyph_sequence(codes_to_string(EXTRA_SEQUENCES[n]))
|
||||
add_lig_sequence(ligatures, seq, n)
|
||||
else:
|
||||
print 'extras: no glyph for %s' % n
|
||||
|
||||
# Add missing regional indicator sequences and flag aliases
|
||||
# if we support any.
|
||||
regional_names = frozenset('u%X' % cp for cp in range(0x1F1E6, 0x1F200))
|
||||
|
||||
def _is_flag_sequence(t):
|
||||
return len(t) == 2 and t[0] in regional_names and t[1] in regional_names
|
||||
|
||||
have_flags = False
|
||||
for k in ligatures:
|
||||
if _is_flag_sequence(k):
|
||||
have_flags = True
|
||||
break
|
||||
|
||||
if have_flags and UNKNOWN_FLAG_GLYPH_NAME not in glyph_names:
|
||||
raise ValueError(
|
||||
'Have flags but no unknown flag glyph "%s"' % UNKNOWN_FLAG_GLYPH_NAME)
|
||||
|
||||
# sigh, too many separate files with the same code.
|
||||
# copied from add_emoji_gsub.
|
||||
def _reg_indicator(letter):
|
||||
assert 'A' <= letter <= 'Z'
|
||||
return 0x1F1E6 + ord(letter) - ord('A')
|
||||
|
||||
def _reg_lig_sequence(flag_name):
|
||||
"""Returns a tuple of strings naming the codepoints that form the ligature."""
|
||||
assert len(flag_name) == 2
|
||||
return tuple('u%X' % _reg_indicator(cp) for cp in flag_name)
|
||||
|
||||
def _reg_lig_name(flag_name):
|
||||
"""Returns a glyph name for the flag name."""
|
||||
return '_'.join(_reg_lig_sequence(flag_name))
|
||||
|
||||
if have_flags:
|
||||
print 'Adding flag aliases.'
|
||||
for flag_from, flag_to in FLAG_ALIASES.iteritems():
|
||||
seq = _reg_lig_sequence(flag_from)
|
||||
name = _reg_lig_name(flag_to)
|
||||
add_lig_sequence(ligatures, seq, name)
|
||||
|
||||
keyed_ligatures = collections.defaultdict(list)
|
||||
for k, v in ligatures.iteritems():
|
||||
first = k[0]
|
||||
keyed_ligatures[first].append((k, v))
|
||||
|
||||
for base in sorted(keyed_ligatures):
|
||||
pairs = keyed_ligatures[base]
|
||||
# print 'base %s has %d sequences' % (base, len(pairs))
|
||||
|
||||
# Sort longest first, this ensures longer sequences with common prefixes
|
||||
# are handled before shorter ones. It would be better to have multiple
|
||||
# lookups, most likely.
|
||||
pairs.sort(key = lambda pair: (len(pair[0]), pair[0]), reverse=True)
|
||||
for seq, name in pairs:
|
||||
# print seq, name
|
||||
add_ligature(font, seq, name)
|
||||
|
||||
font.saveXML (out_file)
|
Loading…
Reference in New Issue