wip - update emoji tooling to support tr51 sequences
parent
deff1a6545
commit
0f227e7834
3
Makefile
3
Makefile
|
@ -78,7 +78,8 @@ flag-symlinks: $(WAVED_FLAGS)
|
|||
|
||||
$(PNG128_FLAGS): flag-symlinks
|
||||
|
||||
EMOJI_PNG128 = ./png/128/emoji_u
|
||||
#EMOJI_PNG128 = ./png/128/emoji_u
|
||||
EMOJI_PNG128 = /tmp/placeholder_emoji_plus/emoji_u
|
||||
|
||||
EMOJI_BUILDER = third_party/color_emoji/emoji_builder.py
|
||||
ADD_GLYPHS = third_party/color_emoji/add_glyphs.py
|
||||
|
|
|
@ -4,6 +4,10 @@
|
|||
<GlyphOrder>
|
||||
<!-- The 'id' attribute is only for humans; it is ignored when parsed. -->
|
||||
<GlyphID id="0" name=".notdef"/>
|
||||
<GlyphID id="1" name="null"/>
|
||||
<GlyphID id="2" name="nonmarkingreturn"/>
|
||||
<GlyphID id="3" name="space"/>
|
||||
<GlyphID id="4" name="u200D"/>
|
||||
</GlyphOrder>
|
||||
|
||||
<head>
|
||||
|
@ -119,12 +123,19 @@
|
|||
|
||||
<hmtx>
|
||||
<mtx name=".notdef" width="2550" lsb="0"/>
|
||||
<mtx name="null" width="0" lsb="0"/>
|
||||
<mtx name="nonmarkingreturn" width="2550" lsb="0"/>
|
||||
<mtx name="space" width="2550" lsb="0"/>
|
||||
<mtx name="u200D" width="0" lsb="0"/>
|
||||
</hmtx>
|
||||
|
||||
<cmap>
|
||||
<tableVersion version="0"/>
|
||||
<cmap_format_12 platformID="3" platEncID="10" language="0" format="12" reserved="0" length="1" nGroups="1">
|
||||
<map code="0x0" name=".notdef"/><!-- <control> -->
|
||||
<map code="0xd" name="nonmarkingreturn"/>
|
||||
<map code="0x20" name="space"/>
|
||||
<map code="0x200d" name="u200D"/>
|
||||
</cmap_format_12>
|
||||
</cmap>
|
||||
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
# delete dst, then:
|
||||
# copy the placeholders to dst
|
||||
# then copy the noto images to dst
|
||||
# then copy the draft images to dst, skipping names with parens and
|
||||
# after fixing the case of the names
|
||||
|
||||
import glob
|
||||
import os
|
||||
from os import path
|
||||
import re
|
||||
import shutil
|
||||
|
||||
DST = "/tmp/placeholder_emoji_plus"
|
||||
|
||||
SRC_PLACEHOLDER = "/tmp/placeholder_emoji"
|
||||
SRC_NOTO = "/usr/local/google/users/dougfelt/newnoto/noto-emoji/png/128"
|
||||
SRC_DRAFT = "/usr/local/google/home/dougfelt/Downloads/PNG_latest_working_draft"
|
||||
|
||||
# First, scan the draft images and select which ones to use. This does
|
||||
# two things:
|
||||
# - The download package returns all the images, including previous versions.
|
||||
# Ensure we use the one with the highest version.
|
||||
# - The names often mix case. Make sure we have all lower case names.
|
||||
#
|
||||
# If something seems amiss, we fail.
|
||||
|
||||
UPDATED_NAMES = {}
|
||||
FIXED_NAMES = {}
|
||||
VAR_PAT = re.compile(r'(.*?)\((\d+)\)\.png')
|
||||
for fname in glob.glob(path.join(SRC_DRAFT, '*.png')):
|
||||
name = path.basename(fname)
|
||||
m = VAR_PAT.match(name)
|
||||
if m:
|
||||
name = '%s.png' % m.group(1).lower()
|
||||
version = int(m.group(2))
|
||||
if version > UPDATED_NAMES.get(name, (0, None))[0]:
|
||||
print 'update %s to version %d' % (name, version)
|
||||
UPDATED_NAMES[name] = (version, fname)
|
||||
else:
|
||||
name = name.lower()
|
||||
FIXED_NAMES[name] = fname
|
||||
|
||||
for name in UPDATED_NAMES:
|
||||
if name not in FIXED_NAMES:
|
||||
raise Exception('updated name %s not in names' % name)
|
||||
fname = UPDATED_NAMES[name][1]
|
||||
print 'using updated image %s for %s' % (fname, name)
|
||||
FIXED_NAMES[name] = fname
|
||||
|
||||
# Now, recreate the destination directory and copy the data into it.
|
||||
|
||||
if path.isdir(DST):
|
||||
shutil.rmtree(DST)
|
||||
os.makedirs(DST)
|
||||
|
||||
SKIP_PLACEHOLDERS = frozenset([
|
||||
'emoji_u1f468_200d_1f469_200d_1f466.png',
|
||||
'emoji_u1f469_200d_2764_fe0f_200d_1f468.png',
|
||||
'emoji_u1f469_200d_2764_fe0f_200d_1f48b_200d_1f468.png',
|
||||
])
|
||||
|
||||
for fname in glob.glob(path.join(SRC_PLACEHOLDER, '*.png')):
|
||||
basename = path.basename(fname)
|
||||
if basename in SKIP_PLACEHOLDERS:
|
||||
print 'skip %s' % basename
|
||||
continue
|
||||
shutil.copy(fname, DST)
|
||||
|
||||
for fname in glob.glob(path.join(SRC_NOTO, '*.png')):
|
||||
shutil.copy(fname, DST)
|
||||
|
||||
for name, fname in FIXED_NAMES.iteritems():
|
||||
shutil.copy(fname, path.join(DST, name))
|
|
@ -0,0 +1,95 @@
|
|||
import os
|
||||
from os import path
|
||||
import subprocess
|
||||
|
||||
OUTPUT_DIR = '/tmp/placeholder_emoji'
|
||||
|
||||
def generate_image(name, text):
|
||||
print name, text.replace('\n', '_')
|
||||
subprocess.check_call(
|
||||
['convert', '-size', '100x100', 'label:%s' % text,
|
||||
'%s/%s' % (OUTPUT_DIR, name)])
|
||||
|
||||
def is_color_patch(cp):
|
||||
return cp >= 0x1f3fb and cp <= 0x1f3ff
|
||||
|
||||
def has_color_patch(values):
|
||||
for v in values:
|
||||
if is_color_patch(v):
|
||||
return True
|
||||
return False
|
||||
|
||||
def regional_to_ascii(cp):
|
||||
return unichr(ord('A') + cp - 0x1f1e6)
|
||||
|
||||
def is_flag_sequence(values):
|
||||
if len(values) != 2:
|
||||
return False
|
||||
for v in values:
|
||||
v -= 0x1f1e6
|
||||
if v < 0 or v > 25:
|
||||
return False
|
||||
return True
|
||||
|
||||
def is_keycap_sequence(values):
|
||||
return len(values) == 2 and values[1] == 0x20e3
|
||||
|
||||
def get_keycap_text(values):
|
||||
return '-%c-' % unichr(values[0]) # convert gags on '['
|
||||
|
||||
char_map = {
|
||||
0x1f468: 'M',
|
||||
0x1f469: 'W',
|
||||
0x1f466: 'B',
|
||||
0x1f467: 'G',
|
||||
0x2764: 'H', # heavy black heart, no var sel
|
||||
0x1f48b: 'K', # kiss mark
|
||||
0x200D: '-', # zwj placeholder
|
||||
0xfe0f: '-', # variation selector placeholder
|
||||
0x1f441: 'I', # Eye
|
||||
0x1f5e8: 'W', # 'witness' (left speech bubble)
|
||||
}
|
||||
|
||||
def get_combining_text(values):
|
||||
chars = []
|
||||
for v in values:
|
||||
char = char_map.get(v, None)
|
||||
if not char:
|
||||
return None
|
||||
if char != '-':
|
||||
chars.append(char)
|
||||
return ''.join(chars)
|
||||
|
||||
|
||||
if not path.isdir(OUTPUT_DIR):
|
||||
os.makedirs(OUTPUT_DIR)
|
||||
|
||||
with open('sequences.txt', 'r') as f:
|
||||
for seq in f:
|
||||
seq = seq.strip()
|
||||
text = None
|
||||
values = [int(code, 16) for code in seq.split('_')]
|
||||
if len(values) == 1:
|
||||
val = values[0]
|
||||
text = '%04X' % val # ensure upper case format
|
||||
elif is_flag_sequence(values):
|
||||
text = ''.join(regional_to_ascii(cp) for cp in values)
|
||||
elif has_color_patch(values):
|
||||
print 'skipping color patch sequence %s' % seq
|
||||
elif is_keycap_sequence(values):
|
||||
text = get_keycap_text(values)
|
||||
else:
|
||||
text = get_combining_text(values)
|
||||
if not text:
|
||||
print 'missing %s' % seq
|
||||
|
||||
if text:
|
||||
if len(text) > 3:
|
||||
if len(text) == 4:
|
||||
hi = text[:2]
|
||||
lo = text[2:]
|
||||
else:
|
||||
hi = text[:-3]
|
||||
lo = text[-3:]
|
||||
text = '%s\n%s' % (hi, lo)
|
||||
generate_image('emoji_u%s.png' % seq, text)
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import glob, os, sys
|
||||
import collections, glob, os, sys
|
||||
from fontTools import ttx
|
||||
from fontTools.ttLib.tables import otTables
|
||||
from png import PNG
|
||||
|
@ -10,11 +10,31 @@ sys.path.append(
|
|||
import add_emoji_gsub
|
||||
|
||||
|
||||
def is_vs(cp):
|
||||
return cp >= 0xfe00 and cp <= 0xfe0f
|
||||
|
||||
def codes_to_string(codes):
|
||||
if "_" in codes:
|
||||
pieces = codes.split ("_")
|
||||
string = "".join ([unichr (int (code, 16)) for code in pieces])
|
||||
else:
|
||||
string = unichr (int (codes, 16))
|
||||
return string
|
||||
|
||||
|
||||
def glyph_sequence(string):
|
||||
# sequence of names of glyphs that form a ligature
|
||||
# variation selectors are stripped
|
||||
return ["u%04X" % ord(char) for char in string if not is_vs(ord(char))]
|
||||
|
||||
|
||||
def glyph_name(string):
|
||||
# name of a ligature
|
||||
# includes variation selectors when present
|
||||
return "_".join (["u%04X" % ord (char) for char in string])
|
||||
|
||||
|
||||
def add_ligature (font, string):
|
||||
def add_ligature (font, seq, name):
|
||||
if 'GSUB' not in font:
|
||||
ligature_subst = otTables.LigatureSubst()
|
||||
ligature_subst.ligatures = {}
|
||||
|
@ -34,17 +54,27 @@ def add_ligature (font, string):
|
|||
ligatures = lookup.SubTable[0].ligatures
|
||||
|
||||
lig = otTables.Ligature()
|
||||
lig.CompCount = len(string)
|
||||
lig.Component = [glyph_name(ch) for ch in string[1:]]
|
||||
lig.LigGlyph = glyph_name(string)
|
||||
lig.CompCount = len(seq)
|
||||
lig.Component = seq[1:]
|
||||
lig.LigGlyph = name
|
||||
|
||||
first = glyph_name(string[0])
|
||||
first = seq[0]
|
||||
try:
|
||||
ligatures[first].append(lig)
|
||||
except KeyError:
|
||||
ligatures[first] = [lig]
|
||||
|
||||
|
||||
# Ligating sequences for emoji that already have a defined codepoint,
|
||||
# to match the sequences for the related emoji with no codepoint.
|
||||
# The key is the name of the glyph with the codepoint, the value is the
|
||||
# name of the sequence in filename form.
|
||||
EXTRA_SEQUENCES = {
|
||||
'u1F46A': '1F468_200D_1F469_200D_1F466', # MWB
|
||||
'u1F491': '1F469_200D_2764_FE0F_200D_1F468', # WHM
|
||||
'u1F48F': '1F469_200D_2764_FE0F_200D_1F48B_200D_1F468', # WHKM
|
||||
}
|
||||
|
||||
if len (sys.argv) < 4:
|
||||
print >>sys.stderr, """
|
||||
Usage:
|
||||
|
@ -65,22 +95,21 @@ table and the first GSUB lookup (if existing) are modified.
|
|||
|
||||
in_file = sys.argv[1]
|
||||
out_file = sys.argv[2]
|
||||
img_prefix = sys.argv[3]
|
||||
img_prefixen = sys.argv[3:]
|
||||
del sys.argv
|
||||
|
||||
font = ttx.TTFont()
|
||||
font.importXML (in_file)
|
||||
|
||||
img_files = {}
|
||||
for img_prefix in img_prefixen:
|
||||
glb = "%s*.png" % img_prefix
|
||||
print "Looking for images matching '%s'." % glb
|
||||
for img_file in glob.glob (glb):
|
||||
codes = img_file[len (img_prefix):-4]
|
||||
if "_" in codes:
|
||||
pieces = codes.split ("_")
|
||||
u = "".join ([unichr (int (code, 16)) for code in pieces])
|
||||
else:
|
||||
u = unichr (int (codes, 16))
|
||||
u = codes_to_string(codes)
|
||||
if u in img_files:
|
||||
print 'overwriting %s with %s' % (img_files[u], imag_file)
|
||||
img_files[u] = img_file
|
||||
if not img_files:
|
||||
raise Exception ("No image files found in '%s'." % glb)
|
||||
|
@ -98,20 +127,71 @@ h = font['hmtx'].metrics
|
|||
img_pairs = img_files.items ()
|
||||
img_pairs.sort (key=lambda pair: (len (pair[0]), pair[0]))
|
||||
|
||||
glyph_names = set()
|
||||
ligatures = {}
|
||||
|
||||
def add_lig_sequence(ligatures, seq, n):
|
||||
# Assume sequences with ZWJ are emoji 'ligatures' and rtl order
|
||||
# is also valid. Internal permutations, though, no.
|
||||
# We associate a sequence with a filename. We can overwrite the
|
||||
# sequence with a different filename later.
|
||||
tseq = tuple(seq)
|
||||
if tseq in ligatures:
|
||||
print 'lig sequence %s, replace %s with %s' % (
|
||||
tseq, ligatures[tseq], n)
|
||||
ligatures[tseq] = n
|
||||
if 'u200D' in seq:
|
||||
rev_seq = seq[:]
|
||||
rev_seq.reverse()
|
||||
trseq = tuple(rev_seq)
|
||||
if trseq in ligatures:
|
||||
print 'rev lig sequence %s, replace %s with %s' % (
|
||||
trseq, ligatures[trseq], n)
|
||||
ligatures[trseq] = n
|
||||
|
||||
|
||||
for (u, filename) in img_pairs:
|
||||
print "Adding glyph for U+%s" % ",".join (["%04X" % ord (char) for char in u])
|
||||
n = glyph_name (u)
|
||||
glyph_names.add(n)
|
||||
|
||||
g.append (n)
|
||||
for char in u:
|
||||
if char not in c:
|
||||
cp = ord(char)
|
||||
if cp not in c and not is_vs(cp):
|
||||
name = glyph_name (char)
|
||||
c[ord (char)] = name
|
||||
c[cp] = name
|
||||
if len (u) > 1:
|
||||
h[name] = [0, 0]
|
||||
(img_width, img_height) = PNG (filename).get_size ()
|
||||
advance = int (round ((float (ascent+descent) * img_width / img_height)))
|
||||
h[n] = [advance, 0]
|
||||
if len (u) > 1:
|
||||
add_ligature (font, u)
|
||||
seq = glyph_sequence(u)
|
||||
add_lig_sequence(ligatures, seq, n)
|
||||
|
||||
for n in EXTRA_SEQUENCES:
|
||||
if n in glyph_names:
|
||||
seq = glyph_sequence(codes_to_string(EXTRA_SEQUENCES[n]))
|
||||
add_lig_sequence(ligatures, seq, n)
|
||||
else:
|
||||
print 'extras: no glyph for %s' % n
|
||||
|
||||
|
||||
keyed_ligatures = collections.defaultdict(list)
|
||||
for k, v in ligatures.iteritems():
|
||||
first = k[0]
|
||||
keyed_ligatures[first].append((k, v))
|
||||
|
||||
for base in sorted(keyed_ligatures):
|
||||
pairs = keyed_ligatures[base]
|
||||
print 'base %s has %d sequences' % (base, len(pairs))
|
||||
# Sort longest first, this ensures longer sequences with common prefixes
|
||||
# are handled before shorter ones. It would be better to have multiple
|
||||
# lookups, most likely.
|
||||
pairs.sort(key = lambda pair: (len(pair[0]), pair[0]), reverse=True)
|
||||
for seq, name in pairs:
|
||||
print seq, name
|
||||
add_ligature(font, seq, name)
|
||||
|
||||
font.saveXML (out_file)
|
||||
|
|
|
@ -20,7 +20,8 @@
|
|||
|
||||
import sys, struct, StringIO
|
||||
from png import PNG
|
||||
|
||||
import os
|
||||
from os import path
|
||||
|
||||
def get_glyph_name_from_gsub (string, font, cmap_dict):
|
||||
ligatures = font['GSUB'].table.LookupList.Lookup[0].SubTable[0].ligatures
|
||||
|
@ -83,6 +84,7 @@ class CBDT:
|
|||
write_func = self.image_write_func (image_format)
|
||||
for glyph in glyphs:
|
||||
img_file = glyph_filenames[glyph]
|
||||
print 'writing data for glyph %s' % path.basename(img_file)
|
||||
offset = self.tell ()
|
||||
write_func (PNG (img_file))
|
||||
self.glyph_maps.append (GlyphMap (glyph, offset, image_format))
|
||||
|
@ -108,6 +110,7 @@ class CBDT:
|
|||
line_ascent = ascent * y_ppem / float (upem)
|
||||
y_bearing = int (round (line_ascent - .5 * (line_height - height)))
|
||||
advance = width
|
||||
print "small glyph metrics h: %d w: %d a: %d" % (height, width, advance)
|
||||
# smallGlyphMetrics
|
||||
# Type Name
|
||||
# BYTE height
|
||||
|
@ -115,10 +118,14 @@ class CBDT:
|
|||
# CHAR BearingX
|
||||
# CHAR BearingY
|
||||
# BYTE Advance
|
||||
try:
|
||||
self.write (struct.pack ("BBbbB",
|
||||
height, width,
|
||||
x_bearing, y_bearing,
|
||||
advance))
|
||||
except:
|
||||
raise ValueError("h: %d w: %d a: %d x: %d y: 5d" % (
|
||||
height, width, advance, x_braring, y_bearing))
|
||||
|
||||
def write_format1 (self, png):
|
||||
|
||||
|
@ -437,8 +444,10 @@ By default they are dropped.
|
|||
eblc.write_header ()
|
||||
eblc.start_strikes (len (img_prefixes))
|
||||
|
||||
for img_prefix in img_prefixes:
|
||||
def is_vs(cp):
|
||||
return cp >= 0xfe00 and cp <= 0xfe0f
|
||||
|
||||
for img_prefix in img_prefixes:
|
||||
print
|
||||
|
||||
img_files = {}
|
||||
|
@ -448,9 +457,14 @@ By default they are dropped.
|
|||
codes = img_file[len (img_prefix):-4]
|
||||
if "_" in codes:
|
||||
pieces = codes.split ("_")
|
||||
uchars = "".join ([unichr (int (code, 16)) for code in pieces])
|
||||
cps = [int(code, 16) for code in pieces]
|
||||
uchars = "".join ([unichr(cp) for cp in cps if not is_vs(cp)])
|
||||
else:
|
||||
uchars = unichr (int (codes, 16))
|
||||
cp = int(codes, 16)
|
||||
if is_vs(cp):
|
||||
print "ignoring unexpected vs input %04x" % cp
|
||||
continue
|
||||
uchars = unichr(cp)
|
||||
img_files[uchars] = img_file
|
||||
if not img_files:
|
||||
raise Exception ("No image files found in '%s'." % glb)
|
||||
|
@ -460,7 +474,11 @@ By default they are dropped.
|
|||
advance = width = height = 0
|
||||
for uchars, img_file in img_files.items ():
|
||||
if len (uchars) == 1:
|
||||
try:
|
||||
glyph_name = unicode_cmap.cmap[ord (uchars)]
|
||||
except:
|
||||
print "no cmap entry for %x" % ord(uchars)
|
||||
raise ValueError("%x" % ord(uchars))
|
||||
else:
|
||||
glyph_name = get_glyph_name_from_gsub (uchars, font, unicode_cmap.cmap)
|
||||
glyph_id = font.getGlyphID (glyph_name)
|
||||
|
@ -476,7 +494,7 @@ By default they are dropped.
|
|||
|
||||
glyphs = sorted (glyph_imgs.keys ())
|
||||
if not glyphs:
|
||||
raise Exception ("No common characteres found between font and '%s'." % glb)
|
||||
raise Exception ("No common characters found between font and '%s'." % glb)
|
||||
print "Embedding images for %d glyphs for this strike." % len (glyphs)
|
||||
|
||||
advance, width, height = (div (x, len (glyphs)) for x in (advance, width, height))
|
||||
|
|
Loading…
Reference in New Issue