Merge remote-tracking branch 'upstream/master'
commit
f03c96c57a
5
Makefile
5
Makefile
|
@ -28,7 +28,8 @@ ZOPFLIPNG = zopflipng
|
|||
OPTIPNG = optipng
|
||||
|
||||
EMOJI_BUILDER = third_party/color_emoji/emoji_builder.py
|
||||
ADD_GLYPHS = third_party/color_emoji/add_glyphs.py
|
||||
ADD_GLYPHS = add_glyphs.py
|
||||
ADD_GLYPHS_FLAGS = -a emoji_aliases.txt
|
||||
PUA_ADDER = map_pua_emoji.py
|
||||
VS_ADDER = add_vs_cmap.py # from nototools
|
||||
|
||||
|
@ -193,7 +194,7 @@ endif
|
|||
# Run make without -j if this happens.
|
||||
|
||||
%.ttx: %.ttx.tmpl $(ADD_GLYPHS) $(ALL_COMPRESSED_FILES)
|
||||
@python $(ADD_GLYPHS) "$<" "$@" "$(COMPRESSED_DIR)/emoji_u"
|
||||
@python $(ADD_GLYPHS) -f "$<" -o "$@" -d "$(COMPRESSED_DIR)" $(ADD_GLYPHS_FLAGS)
|
||||
|
||||
%.ttf: %.ttx
|
||||
@rm -f "$@"
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
<GlyphID id="1" name="null"/>
|
||||
<GlyphID id="2" name="nonmarkingreturn"/>
|
||||
<GlyphID id="3" name="space"/>
|
||||
<GlyphID id="4" name="u200D"/>
|
||||
<GlyphID id="4" name="uni200D"/>
|
||||
<GlyphID id="5" name="uE0030"/>
|
||||
<GlyphID id="6" name="uE0031"/>
|
||||
<GlyphID id="7" name="uE0032"/>
|
||||
|
@ -191,72 +191,6 @@
|
|||
<mtx name="null" width="0" lsb="0"/>
|
||||
<mtx name="nonmarkingreturn" width="2550" lsb="0"/>
|
||||
<mtx name="space" width="2550" lsb="0"/>
|
||||
<mtx name="u200D" width="0" lsb="0"/>
|
||||
<mtx name="uE0030" width="0" lsb="0"/>
|
||||
<mtx name="uE0031" width="0" lsb="0"/>
|
||||
<mtx name="uE0032" width="0" lsb="0"/>
|
||||
<mtx name="uE0033" width="0" lsb="0"/>
|
||||
<mtx name="uE0034" width="0" lsb="0"/>
|
||||
<mtx name="uE0035" width="0" lsb="0"/>
|
||||
<mtx name="uE0036" width="0" lsb="0"/>
|
||||
<mtx name="uE0037" width="0" lsb="0"/>
|
||||
<mtx name="uE0038" width="0" lsb="0"/>
|
||||
<mtx name="uE0039" width="0" lsb="0"/>
|
||||
<mtx name="uE0061" width="0" lsb="0"/>
|
||||
<mtx name="uE0062" width="0" lsb="0"/>
|
||||
<mtx name="uE0063" width="0" lsb="0"/>
|
||||
<mtx name="uE0064" width="0" lsb="0"/>
|
||||
<mtx name="uE0065" width="0" lsb="0"/>
|
||||
<mtx name="uE0066" width="0" lsb="0"/>
|
||||
<mtx name="uE0067" width="0" lsb="0"/>
|
||||
<mtx name="uE0068" width="0" lsb="0"/>
|
||||
<mtx name="uE0069" width="0" lsb="0"/>
|
||||
<mtx name="uE006A" width="0" lsb="0"/>
|
||||
<mtx name="uE006B" width="0" lsb="0"/>
|
||||
<mtx name="uE006C" width="0" lsb="0"/>
|
||||
<mtx name="uE006D" width="0" lsb="0"/>
|
||||
<mtx name="uE006E" width="0" lsb="0"/>
|
||||
<mtx name="uE006F" width="0" lsb="0"/>
|
||||
<mtx name="uE0070" width="0" lsb="0"/>
|
||||
<mtx name="uE0071" width="0" lsb="0"/>
|
||||
<mtx name="uE0072" width="0" lsb="0"/>
|
||||
<mtx name="uE0073" width="0" lsb="0"/>
|
||||
<mtx name="uE0074" width="0" lsb="0"/>
|
||||
<mtx name="uE0075" width="0" lsb="0"/>
|
||||
<mtx name="uE0076" width="0" lsb="0"/>
|
||||
<mtx name="uE0077" width="0" lsb="0"/>
|
||||
<mtx name="uE0078" width="0" lsb="0"/>
|
||||
<mtx name="uE0079" width="0" lsb="0"/>
|
||||
<mtx name="uE007A" width="0" lsb="0"/>
|
||||
<mtx name="uE007F" width="0" lsb="0"/>
|
||||
<mtx name="u1F3F4" width="0" lsb="0"/>
|
||||
<mtx name="uFE82B" width="0" lsb="0"/>
|
||||
<mtx name="u1F1E6" width="0" lsb="0"/>
|
||||
<mtx name="u1F1E7" width="0" lsb="0"/>
|
||||
<mtx name="u1F1E8" width="0" lsb="0"/>
|
||||
<mtx name="u1F1E9" width="0" lsb="0"/>
|
||||
<mtx name="u1F1EA" width="0" lsb="0"/>
|
||||
<mtx name="u1F1EB" width="0" lsb="0"/>
|
||||
<mtx name="u1F1EC" width="0" lsb="0"/>
|
||||
<mtx name="u1F1ED" width="0" lsb="0"/>
|
||||
<mtx name="u1F1EE" width="0" lsb="0"/>
|
||||
<mtx name="u1F1EF" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F0" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F1" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F2" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F3" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F4" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F5" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F6" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F7" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F8" width="0" lsb="0"/>
|
||||
<mtx name="u1F1F9" width="0" lsb="0"/>
|
||||
<mtx name="u1F1FA" width="0" lsb="0"/>
|
||||
<mtx name="u1F1FB" width="0" lsb="0"/>
|
||||
<mtx name="u1F1FC" width="0" lsb="0"/>
|
||||
<mtx name="u1F1FD" width="0" lsb="0"/>
|
||||
<mtx name="u1F1FE" width="0" lsb="0"/>
|
||||
<mtx name="u1F1FF" width="0" lsb="0"/>
|
||||
</hmtx>
|
||||
|
||||
<cmap>
|
||||
|
@ -265,72 +199,6 @@
|
|||
<map code="0x0" name="null"/><!-- <control> -->
|
||||
<map code="0xd" name="nonmarkingreturn"/>
|
||||
<map code="0x20" name="space"/>
|
||||
<map code="0x200d" name="u200D"/>
|
||||
<map code="0xE0030" name="uE0030"/>
|
||||
<map code="0xE0031" name="uE0031"/>
|
||||
<map code="0xE0032" name="uE0032"/>
|
||||
<map code="0xE0033" name="uE0033"/>
|
||||
<map code="0xE0034" name="uE0034"/>
|
||||
<map code="0xE0035" name="uE0035"/>
|
||||
<map code="0xE0036" name="uE0036"/>
|
||||
<map code="0xE0037" name="uE0037"/>
|
||||
<map code="0xE0038" name="uE0038"/>
|
||||
<map code="0xE0039" name="uE0039"/>
|
||||
<map code="0xE0061" name="uE0061"/>
|
||||
<map code="0xE0062" name="uE0062"/>
|
||||
<map code="0xE0063" name="uE0063"/>
|
||||
<map code="0xE0064" name="uE0064"/>
|
||||
<map code="0xE0065" name="uE0065"/>
|
||||
<map code="0xE0066" name="uE0066"/>
|
||||
<map code="0xE0067" name="uE0067"/>
|
||||
<map code="0xE0068" name="uE0068"/>
|
||||
<map code="0xE0069" name="uE0069"/>
|
||||
<map code="0xE006A" name="uE006A"/>
|
||||
<map code="0xE006B" name="uE006B"/>
|
||||
<map code="0xE006C" name="uE006C"/>
|
||||
<map code="0xE006D" name="uE006D"/>
|
||||
<map code="0xE006E" name="uE006E"/>
|
||||
<map code="0xE006F" name="uE006F"/>
|
||||
<map code="0xE0070" name="uE0070"/>
|
||||
<map code="0xE0071" name="uE0071"/>
|
||||
<map code="0xE0072" name="uE0072"/>
|
||||
<map code="0xE0073" name="uE0073"/>
|
||||
<map code="0xE0074" name="uE0074"/>
|
||||
<map code="0xE0075" name="uE0075"/>
|
||||
<map code="0xE0076" name="uE0076"/>
|
||||
<map code="0xE0077" name="uE0077"/>
|
||||
<map code="0xE0078" name="uE0078"/>
|
||||
<map code="0xE0079" name="uE0079"/>
|
||||
<map code="0xE007A" name="uE007A"/>
|
||||
<map code="0xE007F" name="uE007F"/>
|
||||
<map code="0x1F3F4" name="u1F3F4"/>
|
||||
<map code="0xFE82B" name="uFE82B"/>
|
||||
<map code="0x1F1E6" name="u1F1E6"/>
|
||||
<map code="0x1F1E7" name="u1F1E7"/>
|
||||
<map code="0x1F1E8" name="u1F1E8"/>
|
||||
<map code="0x1F1E9" name="u1F1E9"/>
|
||||
<map code="0x1F1EA" name="u1F1EA"/>
|
||||
<map code="0x1F1EB" name="u1F1EB"/>
|
||||
<map code="0x1F1EC" name="u1F1EC"/>
|
||||
<map code="0x1F1ED" name="u1F1ED"/>
|
||||
<map code="0x1F1EE" name="u1F1EE"/>
|
||||
<map code="0x1F1EF" name="u1F1EF"/>
|
||||
<map code="0x1F1F0" name="u1F1F0"/>
|
||||
<map code="0x1F1F1" name="u1F1F1"/>
|
||||
<map code="0x1F1F2" name="u1F1F2"/>
|
||||
<map code="0x1F1F3" name="u1F1F3"/>
|
||||
<map code="0x1F1F4" name="u1F1F4"/>
|
||||
<map code="0x1F1F5" name="u1F1F5"/>
|
||||
<map code="0x1F1F6" name="u1F1F6"/>
|
||||
<map code="0x1F1F7" name="u1F1F7"/>
|
||||
<map code="0x1F1F8" name="u1F1F8"/>
|
||||
<map code="0x1F1F9" name="u1F1F9"/>
|
||||
<map code="0x1F1FA" name="u1F1FA"/>
|
||||
<map code="0x1F1FB" name="u1F1FB"/>
|
||||
<map code="0x1F1FC" name="u1F1FC"/>
|
||||
<map code="0x1F1FD" name="u1F1FD"/>
|
||||
<map code="0x1F1FE" name="u1F1FE"/>
|
||||
<map code="0x1F1FF" name="u1F1FF"/>
|
||||
</cmap_format_12>
|
||||
</cmap>
|
||||
|
||||
|
|
|
@ -36,9 +36,14 @@ def seq_to_str(seq):
|
|||
|
||||
|
||||
def read_emoji_aliases():
|
||||
alias_path = path.join(DATA_ROOT, 'emoji_aliases.txt')
|
||||
return read_emoji_aliases(alias_path)
|
||||
|
||||
|
||||
def read_emoji_aliases(filename):
|
||||
result = {}
|
||||
|
||||
with open(path.join(DATA_ROOT, 'emoji_aliases.txt'), 'r') as f:
|
||||
with open(filename, 'r') as f:
|
||||
for line in f:
|
||||
ix = line.find('#')
|
||||
if (ix > -1):
|
||||
|
@ -47,8 +52,8 @@ def read_emoji_aliases():
|
|||
if not line:
|
||||
continue
|
||||
als, trg = (s.strip() for s in line.split(';'))
|
||||
als_seq = tuple([int(x, 16) for x in als.split('_')])
|
||||
try:
|
||||
als_seq = tuple([int(x, 16) for x in als.split('_')])
|
||||
trg_seq = tuple([int(x, 16) for x in trg.split('_')])
|
||||
except:
|
||||
print 'cannot process alias %s -> %s' % (als, trg)
|
||||
|
|
|
@ -0,0 +1,403 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""Extend a ttx file with additional data.
|
||||
|
||||
Takes a ttx file and one or more directories containing image files named
|
||||
after sequences of codepoints, extends the cmap, hmtx, GSUB, and GlyphOrder
|
||||
tables in the source ttx file based on these sequences, and writes out a new
|
||||
ttx file.
|
||||
|
||||
This can also apply aliases from an alias file."""
|
||||
|
||||
import argparse
|
||||
import collections
|
||||
import os
|
||||
from os import path
|
||||
import re
|
||||
import sys
|
||||
|
||||
from fontTools import ttx
|
||||
from fontTools.ttLib.tables import otTables
|
||||
|
||||
import add_emoji_gsub
|
||||
import add_aliases
|
||||
|
||||
sys.path.append(
|
||||
path.join(os.path.dirname(__file__), 'third_party', 'color_emoji'))
|
||||
from png import PNG
|
||||
|
||||
|
||||
def get_seq_to_file(image_dir, prefix, suffix):
|
||||
"""Return a mapping from codepoint sequences to files in the given directory,
|
||||
for files that match the prefix and suffix. File names with this prefix and
|
||||
suffix should consist of codepoints in hex separated by underscore. 'fe0f'
|
||||
(the codepoint of the emoji presentation variation selector) is stripped from
|
||||
the sequence.
|
||||
"""
|
||||
start = len(prefix)
|
||||
limit = -len(suffix)
|
||||
seq_to_file = {}
|
||||
for name in os.listdir(image_dir):
|
||||
if not (name.startswith(prefix) and name.endswith(suffix)):
|
||||
continue
|
||||
try:
|
||||
cps = [int(s, 16) for s in name[start:limit].split('_')]
|
||||
seq = tuple(cp for cp in cps if cp != 0xfe0f)
|
||||
except:
|
||||
raise Exception('could not parse "%s"' % name)
|
||||
for cp in cps:
|
||||
if not (0 <= cp <= 0x10ffff):
|
||||
raise Exception('bad codepoint(s) in "%s"' % name)
|
||||
if seq in seq_to_file:
|
||||
raise Exception('duplicate sequence for "%s" in %s' % (name, image_dir))
|
||||
seq_to_file[seq] = path.join(image_dir, name)
|
||||
return seq_to_file
|
||||
|
||||
|
||||
def collect_seq_to_file(image_dirs, prefix, suffix):
|
||||
"""Return a sequence to file mapping by calling get_seq_to_file on a list
|
||||
of directories. When sequences for files in later directories match those
|
||||
from earlier directories, the later file replaces the earlier one.
|
||||
"""
|
||||
seq_to_file = {}
|
||||
for image_dir in image_dirs:
|
||||
seq_to_file.update(get_seq_to_file(image_dir, prefix, suffix))
|
||||
return seq_to_file
|
||||
|
||||
|
||||
def remap_values(seq_to_file, map_fn):
|
||||
return {k: map_fn(v) for k, v in seq_to_file.iteritems()}
|
||||
|
||||
|
||||
def get_png_file_to_advance_mapper(lineheight):
|
||||
def map_fn(filename):
|
||||
wid, ht = PNG(filename).get_size()
|
||||
return int(round(float(lineheight) * wid / ht))
|
||||
return map_fn
|
||||
|
||||
|
||||
def cp_name(cp):
|
||||
"""return uniXXXX or uXXXXX(X) as a name for the glyph mapped to this cp."""
|
||||
return '%s%04X' % ('u' if cp > 0xffff else 'uni', cp)
|
||||
|
||||
|
||||
def seq_name(seq):
|
||||
"""Sequences of length one get the cp_name. Others start with 'u' followed by
|
||||
two or more 4-to-6-digit hex strings separated by underscore."""
|
||||
if len(seq) == 1:
|
||||
return cp_name(seq[0])
|
||||
return 'u' + '_'.join('%04X' % cp for cp in seq)
|
||||
|
||||
|
||||
def collect_cps(seqs):
|
||||
cps = set()
|
||||
for seq in seqs:
|
||||
cps.update(seq)
|
||||
return cps
|
||||
|
||||
|
||||
def get_glyphorder_cps_and_truncate(glyphOrder):
|
||||
"""This scans glyphOrder for names that correspond to a single codepoint
|
||||
using the 'u(ni)XXXXXX' syntax. All names that don't match are moved
|
||||
to the front the glyphOrder list in their original order, and the
|
||||
list is truncated. The ones that do match are returned as a set of
|
||||
codepoints."""
|
||||
glyph_name_re = re.compile(r'^u(?:ni)?([0-9a-fA-F]{4,6})$')
|
||||
cps = set()
|
||||
write_ix = 0
|
||||
for ix, name in enumerate(glyphOrder):
|
||||
m = glyph_name_re.match(name)
|
||||
if m:
|
||||
cps.add(int(m.group(1), 16))
|
||||
else:
|
||||
glyphOrder[write_ix] = name
|
||||
write_ix += 1
|
||||
del glyphOrder[write_ix:]
|
||||
return cps
|
||||
|
||||
|
||||
def get_all_seqs(font, seq_to_advance):
|
||||
"""Copies the sequences from seq_to_advance and extends it with single-
|
||||
codepoint sequences from the GlyphOrder table as well as those internal
|
||||
to sequences in seq_to_advance. Reduces the GlyphOrder table. """
|
||||
|
||||
all_seqs = set(seq_to_advance.keys())
|
||||
# using collect_cps includes cps internal to a seq
|
||||
cps = collect_cps(all_seqs)
|
||||
glyphOrder = font.getGlyphOrder()
|
||||
# extract cps in glyphOrder and reduce glyphOrder to only those that remain
|
||||
glyphOrder_cps = get_glyphorder_cps_and_truncate(glyphOrder)
|
||||
cps.update(glyphOrder_cps)
|
||||
# add new single codepoint sequences from glyphOrder and sequences
|
||||
all_seqs.update((cp,) for cp in cps)
|
||||
return all_seqs
|
||||
|
||||
|
||||
def get_font_cmap(font):
|
||||
"""Return the first cmap in the font, we assume it exists and is a unicode
|
||||
cmap."""
|
||||
return font['cmap'].tables[0].cmap
|
||||
|
||||
|
||||
def add_glyph_data(font, seqs, seq_to_advance):
|
||||
"""Add hmtx and GlyphOrder data for all sequences in seqs, and ensures there's
|
||||
a cmap entry for each single-codepoint sequence. Seqs not in seq_to_advance
|
||||
will get a zero advance."""
|
||||
|
||||
# We allow the template cmap to omit mappings for single-codepoint glyphs
|
||||
# defined in the template's GlyphOrder table. Similarly, the hmtx table can
|
||||
# omit advances. We assume glyphs named 'uniXXXX' or 'uXXXXX(X)' in the
|
||||
# GlyphOrder table correspond to codepoints based on the name; we don't
|
||||
# attempt to handle other types of names and these must occur in the cmap and
|
||||
# hmtx tables in the template.
|
||||
#
|
||||
# seq_to_advance maps sequences (including single codepoints) to advances.
|
||||
# All codepoints in these sequences will be added to the cmap. Some cps
|
||||
# in these sequences have no corresponding single-codepoint sequence, they
|
||||
# will also get added.
|
||||
#
|
||||
# The added codepoints have no advance information, so will get a zero
|
||||
# advance.
|
||||
|
||||
cmap = get_font_cmap(font)
|
||||
hmtx = font['hmtx'].metrics
|
||||
|
||||
# We don't expect sequences to be in the glyphOrder, since we removed all the
|
||||
# single-cp sequences from it and don't expect it to already contain names
|
||||
# corresponding to multiple-cp sequencess. But just in case, we use
|
||||
# reverseGlyphMap to avoid duplicating names accidentally.
|
||||
|
||||
updatedGlyphOrder = False
|
||||
reverseGlyphMap = font.getReverseGlyphMap()
|
||||
|
||||
# Order the glyphs by grouping all the single-codepoint sequences first,
|
||||
# then order by sequence so that related sequences are together. We group
|
||||
# by single-codepoint sequence first in order to keep these glyphs together--
|
||||
# they're used in the coverage tables for some of the substitutions, and
|
||||
# those tables can be more compact this way.
|
||||
for seq in sorted(seqs, key=lambda s: (0 if len(s) == 1 else 1, s)):
|
||||
name = seq_name(seq)
|
||||
if len(seq) == 1:
|
||||
cmap[seq[0]] = name
|
||||
advance = seq_to_advance.get(seq, 0)
|
||||
hmtx[name] = [advance, 0]
|
||||
if name not in reverseGlyphMap:
|
||||
font.glyphOrder.append(name)
|
||||
updatedGlyphOrder=True
|
||||
|
||||
if updatedGlyphOrder:
|
||||
delattr(font, '_reverseGlyphOrderDict')
|
||||
|
||||
|
||||
def add_aliases_to_cmap(font, aliases):
|
||||
"""Some aliases might map a single codepoint to some other sequence. These
|
||||
should map directly to the glyph for that sequence in the cmap. (Others will
|
||||
map via GSUB).
|
||||
"""
|
||||
if not aliases:
|
||||
return
|
||||
|
||||
cp_aliases = [seq for seq in aliases if len(seq) == 1]
|
||||
if not cp_aliases:
|
||||
return
|
||||
|
||||
cmap = get_font_cmap(font)
|
||||
for src_seq in cp_aliases:
|
||||
cp = src_seq[0]
|
||||
name = seq_name(aliases[src_seq])
|
||||
cmap[cp] = name
|
||||
|
||||
|
||||
def get_rtl_seq(seq):
|
||||
"""Return the rtl variant of the sequence, if it has one, else the empty
|
||||
sequence.
|
||||
"""
|
||||
# Sequences with ZWJ or TAG_END in them will reflect. Fitzpatrick modifiers
|
||||
# however do not, so if we reflect we make a pass to swap them back into their
|
||||
# logical order.
|
||||
|
||||
ZWJ = 0x200d
|
||||
TAG_END = 0xe007f
|
||||
def is_fitzpatrick(cp):
|
||||
return 0x1f3fb <= cp <= 0x1f3ff
|
||||
|
||||
if not (ZWJ in seq or TAG_END in seq):
|
||||
return ()
|
||||
|
||||
rev_seq = list(seq)
|
||||
rev_seq.reverse()
|
||||
for i in xrange(1, len(rev_seq)):
|
||||
if is_fitzpatrick(rev_seq[i-1]):
|
||||
tmp = rev_seq[i]
|
||||
rev_seq[i] = rev_seq[i-1]
|
||||
rev_seq[i-1] = tmp
|
||||
return tuple(rev_seq)
|
||||
|
||||
|
||||
def get_gsub_ligature_lookup(font):
|
||||
"""If the font does not have a GSUB table, create one with a ligature
|
||||
substitution lookup. If it does, ensure the first lookup is a properly
|
||||
initialized ligature substitution lookup. Return the lookup."""
|
||||
|
||||
# The template might include more lookups after lookup 0, if it has a
|
||||
# GSUB table.
|
||||
if 'GSUB' not in font:
|
||||
ligature_subst = otTables.LigatureSubst()
|
||||
ligature_subst.ligatures = {}
|
||||
|
||||
lookup = otTables.Lookup()
|
||||
lookup.LookupType = 4
|
||||
lookup.LookupFlag = 0
|
||||
lookup.SubTableCount = 1
|
||||
lookup.SubTable = [ligature_subst]
|
||||
|
||||
font['GSUB'] = add_emoji_gsub.create_simple_gsub([lookup])
|
||||
else:
|
||||
lookup = font['GSUB'].table.LookupList.Lookup[0]
|
||||
assert lookup.LookupFlag == 0
|
||||
|
||||
# importXML doesn't fully init GSUB structures, so help it out
|
||||
if not hasattr(lookup, 'LookupType'):
|
||||
st = lookup.SubTable[0]
|
||||
assert st.LookupType == 4
|
||||
setattr(lookup, 'LookupType', 4)
|
||||
|
||||
if not hasattr(st, 'ligatures'):
|
||||
setattr(st, 'ligatures', {})
|
||||
|
||||
return lookup
|
||||
|
||||
|
||||
def add_ligature_sequences(font, seqs, aliases):
|
||||
"""Add ligature sequences."""
|
||||
|
||||
seq_to_target_name = {
|
||||
seq: seq_name(seq) for seq in seqs if len(seq) > 1}
|
||||
if aliases:
|
||||
seq_to_target_name.update({
|
||||
seq: seq_name(aliases[seq]) for seq in aliases if len(seq) > 1})
|
||||
if not seq_to_target_name:
|
||||
return
|
||||
|
||||
rtl_seq_to_target_name = {
|
||||
get_rtl_seq(seq): name for seq, name in seq_to_target_name.iteritems()}
|
||||
seq_to_target_name.update(rtl_seq_to_target_name)
|
||||
# sequences that don't have rtl variants get mapped to the empty sequence,
|
||||
# delete it.
|
||||
if () in seq_to_target_name:
|
||||
del seq_to_target_name[()]
|
||||
|
||||
# organize by first codepoint in sequence
|
||||
keyed_ligatures = collections.defaultdict(list)
|
||||
for t in seq_to_target_name.iteritems():
|
||||
first_cp = t[0][0]
|
||||
keyed_ligatures[first_cp].append(t)
|
||||
|
||||
def add_ligature(lookup, cmap, seq, name):
|
||||
# The sequences consist of codepoints, but the entries in the ligature table
|
||||
# are glyph names. Aliasing can give single codepoints names based on
|
||||
# sequences (e.g. 'guardsman' with 'male guardsman') so we map the
|
||||
# codepoints through the cmap to get the glyph names.
|
||||
glyph_names = [cmap[cp] for cp in seq]
|
||||
|
||||
lig = otTables.Ligature()
|
||||
lig.CompCount = len(seq)
|
||||
lig.Component = glyph_names[1:]
|
||||
lig.LigGlyph = name
|
||||
|
||||
ligatures = lookup.SubTable[0].ligatures
|
||||
first_name = glyph_names[0]
|
||||
try:
|
||||
ligatures[first_name].append(lig)
|
||||
except KeyError:
|
||||
ligatures[first_name] = [lig]
|
||||
|
||||
lookup = get_gsub_ligature_lookup(font)
|
||||
cmap = get_font_cmap(font)
|
||||
for first_cp in sorted(keyed_ligatures):
|
||||
pairs = keyed_ligatures[first_cp]
|
||||
|
||||
# Sort longest first, this ensures longer sequences with common prefixes
|
||||
# are handled before shorter ones. The secondary sort is a standard
|
||||
# sort on the codepoints in the sequence.
|
||||
pairs.sort(key = lambda pair: (-len(pair[0]), pair[0]))
|
||||
for seq, name in pairs:
|
||||
add_ligature(lookup, cmap, seq, name)
|
||||
|
||||
|
||||
def update_font_data(font, seq_to_advance, aliases):
|
||||
"""Update the font's cmap, hmtx, GSUB, and GlyphOrder tables."""
|
||||
seqs = get_all_seqs(font, seq_to_advance)
|
||||
add_glyph_data(font, seqs, seq_to_advance)
|
||||
add_aliases_to_cmap(font, aliases)
|
||||
add_ligature_sequences(font, seqs, aliases)
|
||||
|
||||
|
||||
def apply_aliases(seq_dict, aliases):
|
||||
"""Aliases is a mapping from sequence to replacement sequence. We can use
|
||||
an alias if the target is a key in the dictionary. Furthermore, if the
|
||||
source is a key in the dictionary, we can delete it. This updates the
|
||||
dictionary and returns the usable aliases."""
|
||||
usable_aliases = {}
|
||||
for k, v in aliases.iteritems():
|
||||
if v in seq_dict:
|
||||
usable_aliases[k] = v
|
||||
if k in seq_dict:
|
||||
del seq_dict[k]
|
||||
return usable_aliases
|
||||
|
||||
|
||||
def update_ttx(in_file, out_file, image_dirs, prefix, ext, aliases_file):
|
||||
if ext != '.png':
|
||||
raise Exception('extension "%s" not supported' % ext)
|
||||
|
||||
seq_to_file = collect_seq_to_file(image_dirs, prefix, ext)
|
||||
if not seq_to_file:
|
||||
raise ValueError(
|
||||
'no sequences with prefix "%s" and extension "%s" in %s' % (
|
||||
prefix, ext, ', '.join(image_dirs)))
|
||||
|
||||
aliases = None
|
||||
if aliases_file:
|
||||
aliases = add_aliases.read_emoji_aliases(aliases_file)
|
||||
aliases = apply_aliases(seq_to_file, aliases)
|
||||
|
||||
font = ttx.TTFont()
|
||||
font.importXML(in_file)
|
||||
|
||||
lineheight = font['hhea'].ascent - font['hhea'].descent
|
||||
map_fn = get_png_file_to_advance_mapper(lineheight)
|
||||
seq_to_advance = remap_values(seq_to_file, map_fn)
|
||||
|
||||
update_font_data(font, seq_to_advance, aliases)
|
||||
|
||||
font.saveXML(out_file)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
'-f', '--in_file', help='ttx input file', metavar='file', required=True)
|
||||
parser.add_argument(
|
||||
'-o', '--out_file', help='ttx output file', metavar='file', required=True)
|
||||
parser.add_argument(
|
||||
'-d', '--image_dirs', help='directories containing image files',
|
||||
nargs='+', metavar='dir', required=True)
|
||||
parser.add_argument(
|
||||
'-p', '--prefix', help='file prefix (default "emoji_u")',
|
||||
metavar='pfx', default='emoji_u')
|
||||
parser.add_argument(
|
||||
'-e', '--ext', help='file extension (default ".png", currently only '
|
||||
'".png" is supported', metavar='ext', default='.png')
|
||||
parser.add_argument(
|
||||
'-a', '--aliases', help='process alias table', const='emoji_aliases.txt',
|
||||
nargs='?', metavar='file')
|
||||
args = parser.parse_args()
|
||||
|
||||
update_ttx(
|
||||
args.in_file, args.out_file, args.image_dirs, args.prefix, args.ext,
|
||||
args.aliases)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,4 +1,6 @@
|
|||
# alias table
|
||||
# from;to
|
||||
# the 'from' sequence should be represented by the image for the 'to' sequence
|
||||
# 'fe0f' is not in these sequences
|
||||
1f3c3;1f3c3_200d_2642 # RUNNER -> man running
|
||||
1f3c3_1f3fb;1f3c3_1f3fb_200d_2642 # light skin tone
|
||||
|
@ -192,3 +194,10 @@
|
|||
26f9_1f3fe;26f9_1f3fe_200d_2642 # medium-dark skin tone
|
||||
26f9_1f3ff;26f9_1f3ff_200d_2642 # dark skin tone
|
||||
fe82b;unknown_flag # no name -> no name
|
||||
|
||||
# flag aliases
|
||||
1f1e7_1f1fb;1f1f3_1f1f4 # BV -> NO
|
||||
1f1e8_1f1f5;1f1eb_1f1f7 # CP -> FR
|
||||
1f1ed_1f1f2;1f1e6_1f1fa # HM -> AU
|
||||
1f1f8_1f1ef;1f1f3_1f1f4 # SJ -> NO
|
||||
1f1fa_1f1f2;1f1fa_1f1f8 # UM -> US
|
||||
|
|
|
@ -1,282 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import collections, glob, os, re, sys
|
||||
from fontTools import ttx
|
||||
from fontTools.ttLib.tables import otTables
|
||||
from png import PNG
|
||||
|
||||
# PUA character for unknown flag. This avoids the legacy emoji pua values, but
|
||||
# is in the same area.
|
||||
UNKNOWN_FLAG_GLYPH_NAME = "uFE82B"
|
||||
|
||||
sys.path.append(
|
||||
os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
|
||||
import add_emoji_gsub
|
||||
|
||||
|
||||
def is_vs(cp):
|
||||
return cp >= 0xfe00 and cp <= 0xfe0f
|
||||
|
||||
def is_fitzpatrick(gname):
|
||||
cp = int(gname[1:], 16)
|
||||
return 0x1f3fb <= cp <= 0x1f3ff
|
||||
|
||||
def codes_to_string(codes):
|
||||
if "_" in codes:
|
||||
pieces = codes.split ("_")
|
||||
string = "".join ([unichr (int (code, 16)) for code in pieces])
|
||||
else:
|
||||
try:
|
||||
string = unichr (int (codes, 16))
|
||||
except:
|
||||
raise ValueError("uh-oh, no unichr for '%s'" % codes)
|
||||
return string
|
||||
|
||||
|
||||
def glyph_sequence(string):
|
||||
# sequence of names of glyphs that form a ligature
|
||||
# variation selectors are stripped
|
||||
return ["u%04X" % ord(char) for char in string if not is_vs(ord(char))]
|
||||
|
||||
|
||||
def glyph_name(string):
|
||||
# name of a ligature
|
||||
# includes variation selectors when present
|
||||
return "_".join (["u%04X" % ord (char) for char in string])
|
||||
|
||||
|
||||
def add_ligature (font, seq, name):
|
||||
if 'GSUB' not in font:
|
||||
ligature_subst = otTables.LigatureSubst()
|
||||
ligature_subst.ligatures = {}
|
||||
|
||||
lookup = otTables.Lookup()
|
||||
lookup.LookupType = 4
|
||||
lookup.LookupFlag = 0
|
||||
lookup.SubTableCount = 1
|
||||
lookup.SubTable = [ligature_subst]
|
||||
|
||||
font['GSUB'] = add_emoji_gsub.create_simple_gsub([lookup])
|
||||
else:
|
||||
lookup = font['GSUB'].table.LookupList.Lookup[0]
|
||||
# assert lookup.LookupType == 4
|
||||
assert lookup.LookupFlag == 0
|
||||
|
||||
# importXML doesn't fully init GSUB structures, so help it out
|
||||
if not hasattr(lookup, 'LookupType'):
|
||||
st = lookup.SubTable[0]
|
||||
assert st.LookupType == 4
|
||||
setattr(lookup, 'LookupType', 4)
|
||||
|
||||
if not hasattr(st, 'ligatures'):
|
||||
setattr(st, 'ligatures', {})
|
||||
|
||||
ligatures = lookup.SubTable[0].ligatures
|
||||
|
||||
lig = otTables.Ligature()
|
||||
lig.CompCount = len(seq)
|
||||
lig.Component = seq[1:]
|
||||
lig.LigGlyph = name
|
||||
|
||||
first = seq[0]
|
||||
try:
|
||||
ligatures[first].append(lig)
|
||||
except KeyError:
|
||||
ligatures[first] = [lig]
|
||||
|
||||
|
||||
# Ligating sequences for emoji that already have a defined codepoint,
|
||||
# to match the sequences for the related emoji with no codepoint.
|
||||
# The key is the name of the glyph with the codepoint, the value is the
|
||||
# name of the sequence in filename form.
|
||||
EXTRA_SEQUENCES = {
|
||||
'u1F46A': '1F468_200D_1F469_200D_1F466', # MWB
|
||||
'u1F491': '1F469_200D_2764_FE0F_200D_1F468', # WHM
|
||||
'u1F48F': '1F469_200D_2764_FE0F_200D_1F48B_200D_1F468', # WHKM
|
||||
}
|
||||
|
||||
# Flag aliases - from: to
|
||||
FLAG_ALIASES = {
|
||||
'BV': 'NO',
|
||||
'CP': 'FR',
|
||||
'HM': 'AU',
|
||||
'SJ': 'NO',
|
||||
'UM': 'US',
|
||||
}
|
||||
|
||||
if len (sys.argv) < 4:
|
||||
print >>sys.stderr, """
|
||||
Usage:
|
||||
|
||||
add_glyphs.py font.ttx out-font.ttx strike-prefix...
|
||||
|
||||
This will search for files that have strike-prefix followed by one or more
|
||||
hex numbers (separated by underscore if more than one), and end in ".png".
|
||||
For example, if strike-prefix is "icons/u", then files with names like
|
||||
"icons/u1F4A9.png" or "icons/u1F1EF_1F1F5.png" will be loaded. The script
|
||||
then adds cmap, htmx, and potentially GSUB entries for the Unicode
|
||||
characters found. The advance width will be chosen based on image aspect
|
||||
ratio. If Unicode values outside the BMP are desired, the existing cmap
|
||||
table should be of the appropriate (format 12) type. Only the first cmap
|
||||
table and the first GSUB lookup (if existing) are modified.
|
||||
"""
|
||||
sys.exit (1)
|
||||
|
||||
in_file = sys.argv[1]
|
||||
out_file = sys.argv[2]
|
||||
img_prefixen = sys.argv[3:]
|
||||
del sys.argv
|
||||
|
||||
font = ttx.TTFont()
|
||||
font.importXML (in_file)
|
||||
|
||||
img_files = {}
|
||||
for img_prefix in img_prefixen:
|
||||
glb = "%s*.png" % img_prefix
|
||||
print "Looking for images matching '%s'." % glb
|
||||
for img_file in glob.glob (glb):
|
||||
codes = img_file[len (img_prefix):-4]
|
||||
u = codes_to_string(codes)
|
||||
if u in img_files:
|
||||
print 'overwriting %s with %s' % (img_files[u], img_file)
|
||||
img_files[u] = img_file
|
||||
if not img_files:
|
||||
raise Exception ("No image files found in '%s'." % glb)
|
||||
|
||||
ascent = font['hhea'].ascent
|
||||
descent = -font['hhea'].descent
|
||||
|
||||
g = font['GlyphOrder'].glyphOrder
|
||||
c = font['cmap'].tables[0].cmap
|
||||
h = font['hmtx'].metrics
|
||||
|
||||
# Sort the characters by length, then codepoint, to keep the order stable
|
||||
# and avoid adding empty glyphs for multi-character glyphs if any piece is
|
||||
# also included.
|
||||
img_pairs = img_files.items ()
|
||||
img_pairs.sort (key=lambda pair: (len (pair[0]), pair[0]))
|
||||
|
||||
glyph_names = set()
|
||||
ligatures = {}
|
||||
|
||||
def add_lig_sequence(ligatures, seq, n):
|
||||
# We have emoji sequences using regional indicator symbols, tags,
|
||||
# ZWJ, fitzpatrick modifiers, and combinations of ZWJ and fitzpatrick
|
||||
# modifiers. Currently, Harfbuzz special-cases the fitzpatrick
|
||||
# modifiers to treat them as combining marks instead of as Other
|
||||
# Neutral, which unicode says they are, and processes them
|
||||
# in visual order (at least in some circumstances). So to handle
|
||||
# emoji sequences in an RTL context we need GSUB sequences that match
|
||||
# this order.
|
||||
# Regional indicator symbols are LTR, and emoji+fitzpatrick are
|
||||
# effectively LTR, so we only reorder sequences with ZWJ or tags. If
|
||||
# however the ZWJ sequence has fitzpatrick modifiers, those need to
|
||||
# still follow the emoji they logically follow, so simply reversing the
|
||||
# sequence doesn't work. This code assumes the lig sequence is valid.
|
||||
tseq = tuple(seq)
|
||||
if tseq in ligatures:
|
||||
print 'lig sequence %s, replace %s with %s' % (
|
||||
tseq, ligatures[tseq], n)
|
||||
ligatures[tseq] = n
|
||||
if 'u200D' in seq or 'uE007F' in seq:
|
||||
rev_seq = seq[:]
|
||||
rev_seq.reverse()
|
||||
for i in xrange(1, len(rev_seq)):
|
||||
if is_fitzpatrick(rev_seq[i - 1]):
|
||||
tmp = rev_seq[i]
|
||||
rev_seq[i] = rev_seq[i-1]
|
||||
rev_seq[i-1] = tmp
|
||||
|
||||
trseq = tuple(rev_seq)
|
||||
# if trseq in ligatures:
|
||||
# print 'rev lig sequence %s, replace %s with %s' % (
|
||||
# trseq, ligatures[trseq], n)
|
||||
ligatures[trseq] = n
|
||||
|
||||
|
||||
for (u, filename) in img_pairs:
|
||||
n = glyph_name (u)
|
||||
glyph_names.add(n)
|
||||
# print "Adding glyph for %s" % n
|
||||
|
||||
g.append (n)
|
||||
for char in u:
|
||||
cp = ord(char)
|
||||
if cp not in c and not is_vs(cp):
|
||||
name = glyph_name (char)
|
||||
if name not in glyph_names:
|
||||
g.append(name)
|
||||
c[cp] = name
|
||||
if len (u) > 1:
|
||||
h[name] = [0, 0]
|
||||
(img_width, img_height) = PNG (filename).get_size ()
|
||||
advance = int (round ((float (ascent+descent) * img_width / img_height)))
|
||||
h[n] = [advance, 0]
|
||||
if len (u) > 1:
|
||||
seq = glyph_sequence(u)
|
||||
add_lig_sequence(ligatures, seq, n)
|
||||
|
||||
for n in EXTRA_SEQUENCES:
|
||||
if n in glyph_names:
|
||||
seq = glyph_sequence(codes_to_string(EXTRA_SEQUENCES[n]))
|
||||
add_lig_sequence(ligatures, seq, n)
|
||||
else:
|
||||
print 'extras: no glyph for %s' % n
|
||||
|
||||
# Add missing regional indicator sequences and flag aliases
|
||||
# if we support any.
|
||||
regional_names = frozenset('u%X' % cp for cp in range(0x1F1E6, 0x1F200))
|
||||
|
||||
def _is_flag_sequence(t):
|
||||
return len(t) == 2 and t[0] in regional_names and t[1] in regional_names
|
||||
|
||||
have_flags = False
|
||||
for k in ligatures:
|
||||
if _is_flag_sequence(k):
|
||||
have_flags = True
|
||||
break
|
||||
|
||||
if have_flags and UNKNOWN_FLAG_GLYPH_NAME not in glyph_names:
|
||||
raise ValueError(
|
||||
'Have flags but no unknown flag glyph "%s"' % UNKNOWN_FLAG_GLYPH_NAME)
|
||||
|
||||
# sigh, too many separate files with the same code.
|
||||
# copied from add_emoji_gsub.
|
||||
def _reg_indicator(letter):
|
||||
assert 'A' <= letter <= 'Z'
|
||||
return 0x1F1E6 + ord(letter) - ord('A')
|
||||
|
||||
def _reg_lig_sequence(flag_name):
|
||||
"""Returns a tuple of strings naming the codepoints that form the ligature."""
|
||||
assert len(flag_name) == 2
|
||||
return tuple('u%X' % _reg_indicator(cp) for cp in flag_name)
|
||||
|
||||
def _reg_lig_name(flag_name):
|
||||
"""Returns a glyph name for the flag name."""
|
||||
return '_'.join(_reg_lig_sequence(flag_name))
|
||||
|
||||
if have_flags:
|
||||
print 'Adding flag aliases.'
|
||||
for flag_from, flag_to in FLAG_ALIASES.iteritems():
|
||||
seq = _reg_lig_sequence(flag_from)
|
||||
name = _reg_lig_name(flag_to)
|
||||
add_lig_sequence(ligatures, seq, name)
|
||||
|
||||
keyed_ligatures = collections.defaultdict(list)
|
||||
for k, v in ligatures.iteritems():
|
||||
first = k[0]
|
||||
keyed_ligatures[first].append((k, v))
|
||||
|
||||
for base in sorted(keyed_ligatures):
|
||||
pairs = keyed_ligatures[base]
|
||||
# print 'base %s has %d sequences' % (base, len(pairs))
|
||||
|
||||
# Sort longest first, this ensures longer sequences with common prefixes
|
||||
# are handled before shorter ones. It would be better to have multiple
|
||||
# lookups, most likely.
|
||||
pairs.sort(key = lambda pair: (len(pair[0]), pair[0]), reverse=True)
|
||||
for seq, name in pairs:
|
||||
# print seq, name
|
||||
add_ligature(font, seq, name)
|
||||
|
||||
font.saveXML (out_file)
|
Loading…
Reference in New Issue