Merge pull request #50 from dougfelt/svg_updates

Update svg emoji tools.
pull/52/head
dougfelt 2016-04-13 11:39:24 -07:00
commit 7c92c3cfdd
4 changed files with 208 additions and 147 deletions

View File

@ -19,28 +19,25 @@
import argparse
import glob
import logging
import os
import re
import sys
# find the noto root, so we can get nototools
# alternatively we could just define PYTHONPATH or always run this from
# noto root, but for testing we might not always be doing that.
_noto_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
sys.path.append(_noto_root)
from fontTools.ttLib.tables import otTables
from fontTools.ttLib.tables import _g_l_y_f
from fontTools.ttLib.tables import S_V_G_ as SVG
from fontTools import ttx
from nototools import tool_utils
import add_emoji_gsub
import svg_builder
import svg_cleaner
class FontBuilder(object):
"""A utility for mutating a ttx font. This maintains glyph_order, cmap, and hmtx tables,
and optionally GSUB, glyf, and SVN tables as well."""
"""A utility for mutating a ttx font. This maintains glyph_order, cmap, and
hmtx tables, and optionally GSUB, glyf, and SVN tables as well."""
def __init__(self, font):
self.font = font;
@ -49,8 +46,8 @@ class FontBuilder(object):
self.hmtx = font['hmtx'].metrics
def init_gsub(self):
"""Call this if you are going to add ligatures to the font. Creates a GSUB table
if there isn't one already."""
"""Call this if you are going to add ligatures to the font. Creates a GSUB
table if there isn't one already."""
if hasattr(self, 'ligatures'):
return
@ -73,8 +70,8 @@ class FontBuilder(object):
self.ligatures = lookup.SubTable[0].ligatures
def init_glyf(self):
"""Call this if you need to create empty glyf entries in the font when you add a new
glyph."""
"""Call this if you need to create empty glyf entries in the font when you
add a new glyph."""
if hasattr(self, 'glyphs'):
return
@ -87,8 +84,9 @@ class FontBuilder(object):
self.glyphs = font['glyf'].glyphs
def init_svg(self):
"""Call this if you expect to add SVG images in the font. This calls init_glyf since SVG
support currently requires fallback glyf records for each SVG image."""
"""Call this if you expect to add SVG images in the font. This calls
init_glyf since SVG support currently requires fallback glyf records for
each SVG image."""
if hasattr(self, 'svgs'):
return
@ -131,7 +129,8 @@ class FontBuilder(object):
self.ligatures[first] = [lig]
def _add_empty_glyph(self, glyphstr, name):
"""Create an empty glyph. If glyphstr is not a ligature, add a cmap entry for it."""
"""Create an empty glyph. If glyphstr is not a ligature, add a cmap entry
for it."""
if len(glyphstr) == 1:
self.cmap[ord(glyphstr)] = name
self.hmtx[name] = [0, 0]
@ -140,11 +139,12 @@ class FontBuilder(object):
self.glyphs[name] = _g_l_y_f.Glyph()
def add_components_and_ligature(self, glyphstr):
"""Convert glyphstr to a name and check if it already exists. If not, check if it is a
ligature (longer than one codepoint), and if it is, generate empty glyphs with cmap
entries for any missing ligature components and add a ligature record. Then generate
an empty glyph for the name. Return a tuple with the name, index, and a bool
indicating whether the glyph already existed."""
"""Convert glyphstr to a name and check if it already exists. If not, check
if it is a ligature (longer than one codepoint), and if it is, generate
empty glyphs with cmap entries for any missing ligature components and add a
ligature record. Then generate an empty glyph for the name. Return a tuple
with the name, index, and a bool indicating whether the glyph already
existed."""
name = self.glyph_name(glyphstr)
index = self.glyph_name_to_index(name)
@ -161,8 +161,8 @@ class FontBuilder(object):
return name, index, exists
def add_svg(self, doc, hmetrics, name, index):
"""Add an svg table entry. If hmetrics is not None, update the hmtx table. This
expects the glyph has already been added."""
"""Add an svg table entry. If hmetrics is not None, update the hmtx table.
This expects the glyph has already been added."""
# sanity check to make sure name and index correspond.
assert name == self.glyph_index_to_name(index)
if hmetrics:
@ -171,27 +171,27 @@ class FontBuilder(object):
self.svgs.append(svg_record)
def collect_glyphstr_file_pairs(prefix, ext, include=None, exclude=None, verbosity=1):
"""Scan files with the given prefix and extension, and return a list of (glyphstr,
filename) where glyphstr is the character or ligature, and filename is the image file
associated with it. The glyphstr is formed by decoding the filename (exclusive of the
prefix) as a sequence of hex codepoints separated by underscore. Include, if defined, is
a regex string to include only matched filenames. Exclude, if defined, is a regex string
to exclude matched filenames, and is applied after include."""
def collect_glyphstr_file_pairs(prefix, ext, include=None, exclude=None):
"""Scan files with the given prefix and extension, and return a list of
(glyphstr, filename) where glyphstr is the character or ligature, and filename
is the image file associated with it. The glyphstr is formed by decoding the
filename (exclusive of the prefix) as a sequence of hex codepoints separated
by underscore. Include, if defined, is a regex string to include only matched
filenames. Exclude, if defined, is a regex string to exclude matched
filenames, and is applied after include."""
image_files = {}
glob_pat = "%s*.%s" % (prefix, ext)
leading = len(prefix)
trailing = len(ext) + 1 # include dot
if verbosity:
print "Looking for images matching '%s'." % glob_pat
logging.info("Looking for images matching '%s'.", glob_pat)
ex_count = 0
ex = re.compile(exclude) if exclude else None
inc = re.compile(include) if include else None
if verbosity and inc:
print "Including images matching '%s'." % include
if verbosity and ex:
print "Excluding images matching '%s'." % exclude
if inc:
logging.info("Including images matching '%s'.", include)
if ex:
logging.info("Excluding images matching '%s'.", exclude)
for image_file in glob.glob(glob_pat):
if inc and not inc.search(image_file):
@ -211,76 +211,83 @@ def collect_glyphstr_file_pairs(prefix, ext, include=None, exclude=None, verbosi
u = unichr(int(codes, 16))
image_files[u] = image_file
if verbosity and ex_count:
print "Excluded %d files." % ex_count
if ex_count:
logging.info("Excluded %d files.", ex_count)
if not image_files:
raise Exception ("No image files matching '%s'." % glob_pat)
if verbosity:
print "Included %s files." % len(image_files)
raise Exception ("No image files matching '%s'.", glob_pat)
logging.info("Matched %s files.", len(image_files))
return image_files.items()
def sort_glyphstr_tuples(glyphstr_tuples):
"""The list contains tuples whose first element is a string representing a character or
ligature. It is sorted with shorter glyphstrs first, then alphabetically. This ensures
that ligature components are added to the font before any ligatures that contain them."""
"""The list contains tuples whose first element is a string representing a
character or ligature. It is sorted with shorter glyphstrs first, then
alphabetically. This ensures that ligature components are added to the font
before any ligatures that contain them."""
glyphstr_tuples.sort(key=lambda t: (len(t[0]), t[0]))
def add_image_glyphs(in_file, out_file, pairs, verbosity=1):
def add_image_glyphs(in_file, out_file, pairs):
"""Add images from pairs (glyphstr, filename) to .ttx file in_file and write
to .ttx file out_file."""
quiet = verbosity < 2
font = ttx.TTFont(quiet=quiet)
font.importXML(in_file, quiet=quiet)
font = ttx.TTFont()
font.importXML(in_file)
sort_glyphstr_tuples(pairs)
font_builder = FontBuilder(font)
# we've already sorted by length, so the longest glyphstrs are at the end. To see if
# we have ligatures, we just need to check the last one.
# we've already sorted by length, so the longest glyphstrs are at the end. To
# see if we have ligatures, we just need to check the last one.
if len(pairs[-1][0]) > 1:
font_builder.init_gsub()
img_builder = svg_builder.SvgBuilder(font_builder)
for glyphstr, filename in pairs:
if verbosity > 1:
print "Adding glyph for U+%s" % ",".join(["%04X" % ord(char) for char in glyphstr])
logging.debug("Adding glyph for U+%s", ",".join(
["%04X" % ord(char) for char in glyphstr]))
img_builder.add_from_filename(glyphstr, filename)
font.saveXML(out_file, quiet=quiet)
if verbosity:
print "added %s images to %s" % (len(pairs), out_file)
font.saveXML(out_file)
logging.info("Added %s images to %s", len(pairs), out_file)
def main(argv):
usage = """This will search for files that have image_prefix followed by one or more
hex numbers (separated by underscore if more than one), and end in ".svg".
For example, if image_prefix is "icons/u", then files with names like
"icons/u1F4A9.svg" or "icons/u1F1EF_1F1F5.svg" will be loaded. The script
then adds cmap, htmx, and potentially GSUB entries for the Unicode
characters found. The advance width will be chosen based on image aspect
ratio. If Unicode values outside the BMP are desired, the existing cmap
table should be of the appropriate (format 12) type. Only the first cmap
table and the first GSUB lookup (if existing) are modified."""
usage = """This will search for files that have image_prefix followed by one
or more hex numbers (separated by underscore if more than one), and end in
".svg". For example, if image_prefix is "icons/u", then files with names like
"icons/u1F4A9.svg" or "icons/u1F1EF_1F1F5.svg" will be loaded. The script
then adds cmap, htmx, and potentially GSUB entries for the Unicode characters
found. The advance width will be chosen based on image aspect ratio. If
Unicode values outside the BMP are desired, the existing cmap table should be
of the appropriate (format 12) type. Only the first cmap table and the first
GSUB lookup (if existing) are modified."""
parser = argparse.ArgumentParser(
description="Update cmap, glyf, GSUB, and hmtx tables from image glyphs.", epilog=usage)
parser.add_argument('in_file', help="Input ttx file name.")
parser.add_argument('out_file', help="Output ttx file name.")
parser.add_argument('image_prefix', help="Location and prefix of image files.")
parser.add_argument('-i', '--include', help='include files whoses name matches this regex')
parser.add_argument('-e', '--exclude', help='exclude files whose name matches this regex')
parser.add_argument('--quiet', '-q', dest='v', help="quiet operation.", default=1,
action='store_const', const=0)
parser.add_argument('--verbose', '-v', dest='v', help="verbose operation.",
action='store_const', const=2)
description='Update cmap, glyf, GSUB, and hmtx tables from image glyphs.',
epilog=usage)
parser.add_argument(
'in_file', help='Input ttx file name.', metavar='fname')
parser.add_argument(
'out_file', help='Output ttx file name.', metavar='fname')
parser.add_argument(
'image_prefix', help='Location and prefix of image files.',
metavar='path')
parser.add_argument(
'-i', '--include', help='include files whoses name matches this regex',
metavar='regex')
parser.add_argument(
'-e', '--exclude', help='exclude files whose name matches this regex',
metavar='regex')
parser.add_argument(
'-l', '--loglevel', help='log level name', default='warning')
args = parser.parse_args(argv)
pairs = collect_glyphstr_file_pairs(args.image_prefix, 'svg', include=args.include,
exclude=args.exclude, verbosity=args.v)
add_image_glyphs(args.in_file, args.out_file, pairs, verbosity=args.v)
tool_utils.setup_logging(args.loglevel)
pairs = collect_glyphstr_file_pairs(
args.image_prefix, 'svg', include=args.include, exclude=args.exclude)
add_image_glyphs(args.in_file, args.out_file, pairs)
if __name__ == '__main__':

View File

@ -33,12 +33,15 @@ the files without messing with the originals."""
import argparse
import glob
import logging
import os
import os.path
import re
import shutil
import sys
from nototools import tool_utils
def _is_svg(f):
return f.endswith('.svg')
@ -48,23 +51,27 @@ def _is_svg_and_startswith_emoji(f):
def _flag_rename(f):
"""Converts file names from region-flags files (upper-case ASCII) to our expected
'encoded-codepoint-ligature' form, mapping each character to the corresponding
"""Converts a file name from two-letter upper-case ASCII to our expected
'emoji_uXXXXX_XXXXX form, mapping each character to the corresponding
regional indicator symbol."""
cp_strs = []
name, ext = os.path.splitext(f)
if len(name) != 2:
raise ValueError('illegal flag name "%s"' % f)
for cp in name:
if not ('A' <= cp <= 'Z'):
raise ValueError('illegal flag name "%s"' % f)
ncp = 0x1f1e6 - 0x41 + ord(cp)
cp_strs.append("%04x" % ncp)
return 'emoji_u%s%s' % ('_'.join(cp_strs), ext)
def copy_with_rename(src_dir, dst_dir, accept_pred=None, rename=None, verbosity=1):
"""Copy files from src_dir to dst_dir that match accept_pred (all if None) and rename
using rename (if not None), replacing existing files. accept_pred takes the filename
and returns True if the file should be copied, rename takes the filename and returns a
new file name."""
def copy_with_rename(src_dir, dst_dir, accept_pred=None, rename=None):
"""Copy files from src_dir to dst_dir that match accept_pred (all if None) and
rename using rename (if not None), replacing existing files. accept_pred
takes the filename and returns True if the file should be copied, rename takes
the filename and returns a new file name."""
count = 0
replace_count = 0
@ -75,66 +82,69 @@ def copy_with_rename(src_dir, dst_dir, accept_pred=None, rename=None, verbosity=
src = os.path.join(src_dir, src_filename)
dst = os.path.join(dst_dir, dst_filename)
if os.path.exists(dst):
if verbosity > 1:
print "Replacing existing file " + dst
logging.debug('Replacing existing file %s', dst)
os.unlink(dst)
replace_count += 1
shutil.copy2(src, dst)
if verbosity > 1:
print "cp -p %s %s" % (src, dst)
logging.debug('cp -p %s %s', src, dst)
count += 1
if verbosity:
print "Copied/renamed %d files from %s to %s" % (count, src_dir, dst_dir)
return count, replace_count
if logging.getLogger().getEffectiveLevel() <= logging.INFO:
src_short = tool_utils.short_path(src_dir)
dst_short = tool_utils.short_path(dst_dir)
logging.info('Copied %d files (replacing %d) from %s to %s',
count, replace_count, src_short, dst_short)
def build_svg_dir(dst_dir, clean=False, flags_only=False, verbosity=1):
"""Copies/renames files from noto/color_emoji/svg and then noto/third_party/region-flags/svg,
giving them the standard format and prefix ('emoji_u' followed by codepoints expressed
in hex separated by underscore). If clean, removes the target dir before proceding.
If flags_only, only does the region-flags."""
def build_svg_dir(dst_dir, clean=False, emoji_dir='', flags_dir=''):
"""Copies/renames files from emoji_dir and then flag_dir, giving them the
standard format and prefix ('emoji_u' followed by codepoints expressed in hex
separated by underscore). If clean, removes the target dir before proceding.
If either emoji_dir or flag_dir are empty, skips them."""
if not os.path.isdir(dst_dir):
os.makedirs(dst_dir)
elif clean:
shutil.rmtree(dst_dir)
os.makedirs(dst_dir)
dst_dir = tool_utils.ensure_dir_exists(dst_dir, clean=clean)
# get files from path relative to noto
notopath = re.match("^.*/noto/", os.path.realpath(__file__)).group()
if not emoji_dir and not flag_dir:
logging.warning('Nothing to do.')
return
# copy region flags, generating new names based on the tlds.
flag_dir = os.path.join(notopath, "third_party/region-flags/svg")
count, replace_count = copy_with_rename(
flag_dir, dst_dir, accept_pred=_is_svg, rename=_flag_rename, verbosity=verbosity)
if emoji_dir:
copy_with_rename(
emoji_dir, dst_dir, accept_pred=_is_svg_and_startswith_emoji)
# copy the 'good' svg
if not flags_only:
svg_dir = os.path.join(notopath, "color_emoji/svg")
temp_count, temp_replace_count = copy_with_rename(
svg_dir, dst_dir, accept_pred=_is_svg_and_startswith_emoji, verbosity=verbosity)
count += temp_count
replace_count += temp_replace_count
if verbosity:
if replace_count:
print "Replaced %d existing files" % replace_count
print "Created %d total files" % (count - replace_count)
if flags_dir:
copy_with_rename(
flags_dir, dst_dir, accept_pred=_is_svg, rename=_flag_rename)
def main(argv):
DEFAULT_EMOJI_DIR = '[emoji]/svg'
DEFAULT_FLAGS_DIR = '[emoji]/third_party/region-flags/svg'
parser = argparse.ArgumentParser(
description="Collect svg files into target directory with prefix.")
parser.add_argument('dst_dir', help="Directory to hold symlinks to files.")
parser.add_argument('--clean', '-c', help="Replace target directory", action='store_true')
parser.add_argument('--flags_only', '-fo', help="Only copy region-flags", action='store_true')
parser.add_argument('--quiet', '-q', dest='v', help="quiet operation.", default=1,
action='store_const', const=0)
parser.add_argument('--verbose', '-v', dest='v', help="verbose operation.",
action='store_const', const=2)
description='Collect svg files into target directory with prefix.')
parser.add_argument(
'dst_dir', help='Directory to hold copied files.', metavar='dir')
parser.add_argument(
'--clean', '-c', help='Replace target directory', action='store_true')
parser.add_argument(
'--flags_dir', '-f', metavar='dir', help='directory containing flag svg, '
'default %s' % DEFAULT_FLAGS_DIR, default=DEFAULT_FLAGS_DIR)
parser.add_argument(
'--emoji_dir', '-e', metavar='dir',
help='directory containing emoji svg, default %s' % DEFAULT_EMOJI_DIR,
default=DEFAULT_EMOJI_DIR)
parser.add_argument(
'-l', '--loglevel', help='log level name/value', default='warning')
args = parser.parse_args(argv)
build_svg_dir(args.dst_dir, clean=args.clean, flags_only=args.flags_only, verbosity=args.v)
tool_utils.setup_logging(args.loglevel)
args.flags_dir = tool_utils.resolve_path(args.flags_dir)
args.emoji_dir = tool_utils.resolve_path(args.emoji_dir)
build_svg_dir(
args.dst_dir, clean=args.clean, emoji_dir=args.emoji_dir,
flags_dir=args.flags_dir)
if __name__ == '__main__':
main(sys.argv[1:])

View File

@ -109,7 +109,8 @@ class SvgBuilder(object):
wid = tree.attrs.get('width')
ht = tree.attrs.get('height')
if not (wid and ht):
raise "missing viewBox and width or height attrs"
raise ValueError(
'missing viewBox and width or height attrs (%s)' % filename)
x, y, w, h = 0, 0, self._strip_px(wid), self._strip_px(ht)
# We're going to assume default values for preserveAspectRatio for now,
@ -175,9 +176,11 @@ class SvgBuilder(object):
# svg element. Unlike chrome. So either we apply an inverse transform, or
# insert a group with the clip between the svg and its children. The latter
# seems cleaner, ultimately.
clip_id = 'clip_' + ''.join(random.choice(string.ascii_lowercase) for i in range(8))
clip_text = """<g clip-path="url(#%s)"><clipPath id="%s">
<path d="M%g %gh%gv%gh%gz"/></clipPath></g>""" % (clip_id, clip_id, x, y, w, h, -w)
clip_id = 'clip_' + ''.join(
random.choice(string.ascii_lowercase) for i in range(8))
clip_text = ('<g clip-path="url(#%s)"><clipPath id="%s">'
'<path d="M%g %gh%gv%gh%gz"/></clipPath></g>' % (
clip_id, clip_id, x, y, w, h, -w))
clip_tree = cleaner.tree_from_text(clip_text)
clip_tree.contents.extend(tree.contents)
tree.contents = [clip_tree]

View File

@ -17,9 +17,14 @@
import argparse
import codecs
import os.path
import logging
import os
from os import path
import re
import sys
from nototools import tool_utils
from xml.parsers import expat
from xml.sax import saxutils
@ -115,16 +120,32 @@ class SvgCleaner(object):
class _Cleaner(object):
def _clean_elem(self, node):
viewBox, width, height = None, None, None
nattrs = {}
for k, v in node.attrs.items():
if node.name == 'svg' and k in [
'x', 'y', 'id', 'version', 'viewBox', 'width', 'height',
'enable-background', 'xml:space']:
if k == 'viewBox':
viewBox = v
elif k == 'width':
width = v
elif k == 'height':
height = v
continue
v = re.sub('\s+', ' ', v)
nattrs[k] = v
if node.name == 'svg':
if not width or not height:
if not viewBox:
raise ValueError('no viewBox, width, or height')
width, height = viewBox.split()[2:]
nattrs['width'] = width
nattrs['height'] = height
node.attrs = nattrs
# scan contents. remove any empty text nodes, or empty 'g' element nodes.
# if a 'g' element has no attrs and only one subnode, replace it with the
# subnode.
@ -214,13 +235,16 @@ class SvgCleaner(object):
return self.tree_to_text(tree)
def clean_svg_files(in_dir, out_dir, match_pat=None, quiet=False):
def clean_svg_files(in_dir, out_dir, match_pat=None, clean=False):
regex = re.compile(match_pat) if match_pat else None
count = 0
if not os.path.isdir(out_dir):
os.makedirs(out_dir)
if not quiet:
print 'created output directory: %s' % out_dir
if clean and path.samefile(in_dir, out_dir):
logging.error('Cannot clean %s (same as in_dir)', out_dir)
return
out_dir = tool_utils.ensure_dir_exists(out_dir, clean=clean)
cleaner = SvgCleaner()
for file_name in os.listdir(in_dir):
if regex and not regex.match(file_name):
@ -230,25 +254,42 @@ def clean_svg_files(in_dir, out_dir, match_pat=None, quiet=False):
result = cleaner.clean_svg(in_fp.read())
out_path = os.path.join(out_dir, file_name)
with codecs.open(out_path, 'w', 'utf-8') as out_fp:
if not quiet:
print 'wrote: %s' % out_path
logging.debug('write: %s', out_path)
out_fp.write(result)
count += 1
if not count:
print 'failed to match any files'
logging.warning('Failed to match any files')
else:
print 'processed %s files to %s' % (count, out_dir)
logging.info('Wrote %s files to %s', count, out_dir)
def main():
parser = argparse.ArgumentParser(
description="Generate 'cleaned' svg files.")
parser.add_argument('in_dir', help='Input directory.')
parser.add_argument('out_dir', help='Output directory.')
parser.add_argument('regex', help='Regex to select files, default matches all files.', default=None)
parser.add_argument('--quiet', '-q', help='Quiet operation.', action='store_true')
parser.add_argument(
'in_dir', help='Input directory.', metavar='dir')
parser.add_argument(
'-o', '--out_dir', help='Output directory, defaults to sibling of in_dir',
metavar='dir')
parser.add_argument(
'-c', '--clean', help='Clean output directory', action='store_true')
parser.add_argument(
'-r', '--regex', help='Regex to select files, default matches all files.',
metavar='regex', default=None)
parser.add_argument(
'-l', '--loglevel', help='log level name/value', default='warning')
args = parser.parse_args()
clean_svg_files(args.in_dir, args.out_dir, match_pat=args.regex, quiet=args.quiet)
tool_utils.setup_logging(args.loglevel)
if not args.out_dir:
if args.in_dir.endswith('/'):
args.in_dir = args.in_dir[:-1]
args.out_dir = args.in_dir + '_clean'
logging.info('Writing output to %s', args.out_dir)
clean_svg_files(
args.in_dir, args.out_dir, match_pat=args.regex, clean=args.clean)
if __name__ == '__main__':