From 83978272f52474fa25469da30636eb656ad4b5d3 Mon Sep 17 00:00:00 2001 From: Doug Felt Date: Tue, 12 Apr 2016 17:07:27 -0700 Subject: [PATCH] Update svg emoji tools. - Reformat lines to 80 columns. - Use logging instead of verbose/quiet other options. - A few miscellaneous small fixes/tweaks to parameters. Removed some file-path-relative stuff that assumed old directory structure. This uses some new fns in nototools.tool_utils, see nototools#220. --- add_svg_glyphs.py | 161 ++++++++++++++++++++++--------------------- collect_emoji_svg.py | 112 ++++++++++++++++-------------- svg_builder.py | 11 +-- svg_cleaner.py | 74 ++++++++++++++++---- 4 files changed, 211 insertions(+), 147 deletions(-) diff --git a/add_svg_glyphs.py b/add_svg_glyphs.py index 191b5ffad..633700282 100755 --- a/add_svg_glyphs.py +++ b/add_svg_glyphs.py @@ -19,28 +19,25 @@ import argparse import glob +import logging import os import re import sys -# find the noto root, so we can get nototools -# alternatively we could just define PYTHONPATH or always run this from -# noto root, but for testing we might not always be doing that. -_noto_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')) -sys.path.append(_noto_root) - from fontTools.ttLib.tables import otTables from fontTools.ttLib.tables import _g_l_y_f from fontTools.ttLib.tables import S_V_G_ as SVG from fontTools import ttx +from nototools import tool_utils + import add_emoji_gsub import svg_builder -import svg_cleaner + class FontBuilder(object): - """A utility for mutating a ttx font. This maintains glyph_order, cmap, and hmtx tables, - and optionally GSUB, glyf, and SVN tables as well.""" + """A utility for mutating a ttx font. This maintains glyph_order, cmap, and + hmtx tables, and optionally GSUB, glyf, and SVN tables as well.""" def __init__(self, font): self.font = font; @@ -49,8 +46,8 @@ class FontBuilder(object): self.hmtx = font['hmtx'].metrics def init_gsub(self): - """Call this if you are going to add ligatures to the font. Creates a GSUB table - if there isn't one already.""" + """Call this if you are going to add ligatures to the font. Creates a GSUB + table if there isn't one already.""" if hasattr(self, 'ligatures'): return @@ -73,8 +70,8 @@ class FontBuilder(object): self.ligatures = lookup.SubTable[0].ligatures def init_glyf(self): - """Call this if you need to create empty glyf entries in the font when you add a new - glyph.""" + """Call this if you need to create empty glyf entries in the font when you + add a new glyph.""" if hasattr(self, 'glyphs'): return @@ -87,8 +84,9 @@ class FontBuilder(object): self.glyphs = font['glyf'].glyphs def init_svg(self): - """Call this if you expect to add SVG images in the font. This calls init_glyf since SVG - support currently requires fallback glyf records for each SVG image.""" + """Call this if you expect to add SVG images in the font. This calls + init_glyf since SVG support currently requires fallback glyf records for + each SVG image.""" if hasattr(self, 'svgs'): return @@ -131,7 +129,8 @@ class FontBuilder(object): self.ligatures[first] = [lig] def _add_empty_glyph(self, glyphstr, name): - """Create an empty glyph. If glyphstr is not a ligature, add a cmap entry for it.""" + """Create an empty glyph. If glyphstr is not a ligature, add a cmap entry + for it.""" if len(glyphstr) == 1: self.cmap[ord(glyphstr)] = name self.hmtx[name] = [0, 0] @@ -140,11 +139,12 @@ class FontBuilder(object): self.glyphs[name] = _g_l_y_f.Glyph() def add_components_and_ligature(self, glyphstr): - """Convert glyphstr to a name and check if it already exists. If not, check if it is a - ligature (longer than one codepoint), and if it is, generate empty glyphs with cmap - entries for any missing ligature components and add a ligature record. Then generate - an empty glyph for the name. Return a tuple with the name, index, and a bool - indicating whether the glyph already existed.""" + """Convert glyphstr to a name and check if it already exists. If not, check + if it is a ligature (longer than one codepoint), and if it is, generate + empty glyphs with cmap entries for any missing ligature components and add a + ligature record. Then generate an empty glyph for the name. Return a tuple + with the name, index, and a bool indicating whether the glyph already + existed.""" name = self.glyph_name(glyphstr) index = self.glyph_name_to_index(name) @@ -161,8 +161,8 @@ class FontBuilder(object): return name, index, exists def add_svg(self, doc, hmetrics, name, index): - """Add an svg table entry. If hmetrics is not None, update the hmtx table. This - expects the glyph has already been added.""" + """Add an svg table entry. If hmetrics is not None, update the hmtx table. + This expects the glyph has already been added.""" # sanity check to make sure name and index correspond. assert name == self.glyph_index_to_name(index) if hmetrics: @@ -171,27 +171,27 @@ class FontBuilder(object): self.svgs.append(svg_record) -def collect_glyphstr_file_pairs(prefix, ext, include=None, exclude=None, verbosity=1): - """Scan files with the given prefix and extension, and return a list of (glyphstr, - filename) where glyphstr is the character or ligature, and filename is the image file - associated with it. The glyphstr is formed by decoding the filename (exclusive of the - prefix) as a sequence of hex codepoints separated by underscore. Include, if defined, is - a regex string to include only matched filenames. Exclude, if defined, is a regex string - to exclude matched filenames, and is applied after include.""" +def collect_glyphstr_file_pairs(prefix, ext, include=None, exclude=None): + """Scan files with the given prefix and extension, and return a list of + (glyphstr, filename) where glyphstr is the character or ligature, and filename + is the image file associated with it. The glyphstr is formed by decoding the + filename (exclusive of the prefix) as a sequence of hex codepoints separated + by underscore. Include, if defined, is a regex string to include only matched + filenames. Exclude, if defined, is a regex string to exclude matched + filenames, and is applied after include.""" image_files = {} glob_pat = "%s*.%s" % (prefix, ext) leading = len(prefix) trailing = len(ext) + 1 # include dot - if verbosity: - print "Looking for images matching '%s'." % glob_pat + logging.info("Looking for images matching '%s'.", glob_pat) ex_count = 0 ex = re.compile(exclude) if exclude else None inc = re.compile(include) if include else None - if verbosity and inc: - print "Including images matching '%s'." % include - if verbosity and ex: - print "Excluding images matching '%s'." % exclude + if inc: + logging.info("Including images matching '%s'.", include) + if ex: + logging.info("Excluding images matching '%s'.", exclude) for image_file in glob.glob(glob_pat): if inc and not inc.search(image_file): @@ -211,76 +211,83 @@ def collect_glyphstr_file_pairs(prefix, ext, include=None, exclude=None, verbosi u = unichr(int(codes, 16)) image_files[u] = image_file - if verbosity and ex_count: - print "Excluded %d files." % ex_count + if ex_count: + logging.info("Excluded %d files.", ex_count) if not image_files: - raise Exception ("No image files matching '%s'." % glob_pat) - if verbosity: - print "Included %s files." % len(image_files) + raise Exception ("No image files matching '%s'.", glob_pat) + logging.info("Matched %s files.", len(image_files)) return image_files.items() def sort_glyphstr_tuples(glyphstr_tuples): - """The list contains tuples whose first element is a string representing a character or - ligature. It is sorted with shorter glyphstrs first, then alphabetically. This ensures - that ligature components are added to the font before any ligatures that contain them.""" + """The list contains tuples whose first element is a string representing a + character or ligature. It is sorted with shorter glyphstrs first, then + alphabetically. This ensures that ligature components are added to the font + before any ligatures that contain them.""" glyphstr_tuples.sort(key=lambda t: (len(t[0]), t[0])) -def add_image_glyphs(in_file, out_file, pairs, verbosity=1): +def add_image_glyphs(in_file, out_file, pairs): """Add images from pairs (glyphstr, filename) to .ttx file in_file and write to .ttx file out_file.""" - quiet = verbosity < 2 - font = ttx.TTFont(quiet=quiet) - font.importXML(in_file, quiet=quiet) + font = ttx.TTFont() + font.importXML(in_file) sort_glyphstr_tuples(pairs) font_builder = FontBuilder(font) - # we've already sorted by length, so the longest glyphstrs are at the end. To see if - # we have ligatures, we just need to check the last one. + # we've already sorted by length, so the longest glyphstrs are at the end. To + # see if we have ligatures, we just need to check the last one. if len(pairs[-1][0]) > 1: font_builder.init_gsub() img_builder = svg_builder.SvgBuilder(font_builder) for glyphstr, filename in pairs: - if verbosity > 1: - print "Adding glyph for U+%s" % ",".join(["%04X" % ord(char) for char in glyphstr]) + logging.debug("Adding glyph for U+%s", ",".join( + ["%04X" % ord(char) for char in glyphstr])) img_builder.add_from_filename(glyphstr, filename) - font.saveXML(out_file, quiet=quiet) - if verbosity: - print "added %s images to %s" % (len(pairs), out_file) + font.saveXML(out_file) + logging.info("Added %s images to %s", len(pairs), out_file) def main(argv): - usage = """This will search for files that have image_prefix followed by one or more - hex numbers (separated by underscore if more than one), and end in ".svg". - For example, if image_prefix is "icons/u", then files with names like - "icons/u1F4A9.svg" or "icons/u1F1EF_1F1F5.svg" will be loaded. The script - then adds cmap, htmx, and potentially GSUB entries for the Unicode - characters found. The advance width will be chosen based on image aspect - ratio. If Unicode values outside the BMP are desired, the existing cmap - table should be of the appropriate (format 12) type. Only the first cmap - table and the first GSUB lookup (if existing) are modified.""" + usage = """This will search for files that have image_prefix followed by one + or more hex numbers (separated by underscore if more than one), and end in + ".svg". For example, if image_prefix is "icons/u", then files with names like + "icons/u1F4A9.svg" or "icons/u1F1EF_1F1F5.svg" will be loaded. The script + then adds cmap, htmx, and potentially GSUB entries for the Unicode characters + found. The advance width will be chosen based on image aspect ratio. If + Unicode values outside the BMP are desired, the existing cmap table should be + of the appropriate (format 12) type. Only the first cmap table and the first + GSUB lookup (if existing) are modified.""" parser = argparse.ArgumentParser( - description="Update cmap, glyf, GSUB, and hmtx tables from image glyphs.", epilog=usage) - parser.add_argument('in_file', help="Input ttx file name.") - parser.add_argument('out_file', help="Output ttx file name.") - parser.add_argument('image_prefix', help="Location and prefix of image files.") - parser.add_argument('-i', '--include', help='include files whoses name matches this regex') - parser.add_argument('-e', '--exclude', help='exclude files whose name matches this regex') - parser.add_argument('--quiet', '-q', dest='v', help="quiet operation.", default=1, - action='store_const', const=0) - parser.add_argument('--verbose', '-v', dest='v', help="verbose operation.", - action='store_const', const=2) + description='Update cmap, glyf, GSUB, and hmtx tables from image glyphs.', + epilog=usage) + parser.add_argument( + 'in_file', help='Input ttx file name.', metavar='fname') + parser.add_argument( + 'out_file', help='Output ttx file name.', metavar='fname') + parser.add_argument( + 'image_prefix', help='Location and prefix of image files.', + metavar='path') + parser.add_argument( + '-i', '--include', help='include files whoses name matches this regex', + metavar='regex') + parser.add_argument( + '-e', '--exclude', help='exclude files whose name matches this regex', + metavar='regex') + parser.add_argument( + '-l', '--loglevel', help='log level name', default='warning') args = parser.parse_args(argv) - pairs = collect_glyphstr_file_pairs(args.image_prefix, 'svg', include=args.include, - exclude=args.exclude, verbosity=args.v) - add_image_glyphs(args.in_file, args.out_file, pairs, verbosity=args.v) + tool_utils.setup_logging(args.loglevel) + + pairs = collect_glyphstr_file_pairs( + args.image_prefix, 'svg', include=args.include, exclude=args.exclude) + add_image_glyphs(args.in_file, args.out_file, pairs) if __name__ == '__main__': diff --git a/collect_emoji_svg.py b/collect_emoji_svg.py index bb0f744a8..b7d8521ce 100755 --- a/collect_emoji_svg.py +++ b/collect_emoji_svg.py @@ -33,12 +33,15 @@ the files without messing with the originals.""" import argparse import glob +import logging import os import os.path import re import shutil import sys +from nototools import tool_utils + def _is_svg(f): return f.endswith('.svg') @@ -48,23 +51,27 @@ def _is_svg_and_startswith_emoji(f): def _flag_rename(f): - """Converts file names from region-flags files (upper-case ASCII) to our expected - 'encoded-codepoint-ligature' form, mapping each character to the corresponding + """Converts a file name from two-letter upper-case ASCII to our expected + 'emoji_uXXXXX_XXXXX form, mapping each character to the corresponding regional indicator symbol.""" cp_strs = [] name, ext = os.path.splitext(f) + if len(name) != 2: + raise ValueError('illegal flag name "%s"' % f) for cp in name: + if not ('A' <= cp <= 'Z'): + raise ValueError('illegal flag name "%s"' % f) ncp = 0x1f1e6 - 0x41 + ord(cp) cp_strs.append("%04x" % ncp) return 'emoji_u%s%s' % ('_'.join(cp_strs), ext) -def copy_with_rename(src_dir, dst_dir, accept_pred=None, rename=None, verbosity=1): - """Copy files from src_dir to dst_dir that match accept_pred (all if None) and rename - using rename (if not None), replacing existing files. accept_pred takes the filename - and returns True if the file should be copied, rename takes the filename and returns a - new file name.""" +def copy_with_rename(src_dir, dst_dir, accept_pred=None, rename=None): + """Copy files from src_dir to dst_dir that match accept_pred (all if None) and + rename using rename (if not None), replacing existing files. accept_pred + takes the filename and returns True if the file should be copied, rename takes + the filename and returns a new file name.""" count = 0 replace_count = 0 @@ -75,66 +82,69 @@ def copy_with_rename(src_dir, dst_dir, accept_pred=None, rename=None, verbosity= src = os.path.join(src_dir, src_filename) dst = os.path.join(dst_dir, dst_filename) if os.path.exists(dst): - if verbosity > 1: - print "Replacing existing file " + dst + logging.debug('Replacing existing file %s', dst) os.unlink(dst) replace_count += 1 shutil.copy2(src, dst) - if verbosity > 1: - print "cp -p %s %s" % (src, dst) + logging.debug('cp -p %s %s', src, dst) count += 1 - if verbosity: - print "Copied/renamed %d files from %s to %s" % (count, src_dir, dst_dir) - return count, replace_count + if logging.getLogger().getEffectiveLevel() >= 20: + src_short = tool_utils.short_path(src_dir) + dst_short = tool_utils.short_path(dst_dir) + logging.info('Copied %d files (replacing %d) from %s to %s', + count, replace_count, src_short, dst_short) -def build_svg_dir(dst_dir, clean=False, flags_only=False, verbosity=1): - """Copies/renames files from noto/color_emoji/svg and then noto/third_party/region-flags/svg, - giving them the standard format and prefix ('emoji_u' followed by codepoints expressed - in hex separated by underscore). If clean, removes the target dir before proceding. - If flags_only, only does the region-flags.""" +def build_svg_dir(dst_dir, clean=False, emoji_dir='', flags_dir=''): + """Copies/renames files from emoji_dir and then flag_dir, giving them the + standard format and prefix ('emoji_u' followed by codepoints expressed in hex + separated by underscore). If clean, removes the target dir before proceding. + If either emoji_dir or flag_dir are empty, skips them.""" - if not os.path.isdir(dst_dir): - os.makedirs(dst_dir) - elif clean: - shutil.rmtree(dst_dir) - os.makedirs(dst_dir) + dst_dir = tool_utils.ensure_dir_exists(dst_dir, clean=clean) - # get files from path relative to noto - notopath = re.match("^.*/noto/", os.path.realpath(__file__)).group() + if not emoji_dir and not flag_dir: + logging.warning('Nothing to do.') + return - # copy region flags, generating new names based on the tlds. - flag_dir = os.path.join(notopath, "third_party/region-flags/svg") - count, replace_count = copy_with_rename( - flag_dir, dst_dir, accept_pred=_is_svg, rename=_flag_rename, verbosity=verbosity) + if emoji_dir: + copy_with_rename( + emoji_dir, dst_dir, accept_pred=_is_svg_and_startswith_emoji) - # copy the 'good' svg - if not flags_only: - svg_dir = os.path.join(notopath, "color_emoji/svg") - temp_count, temp_replace_count = copy_with_rename( - svg_dir, dst_dir, accept_pred=_is_svg_and_startswith_emoji, verbosity=verbosity) - count += temp_count - replace_count += temp_replace_count - - if verbosity: - if replace_count: - print "Replaced %d existing files" % replace_count - print "Created %d total files" % (count - replace_count) + if flags_dir: + copy_with_rename( + flags_dir, dst_dir, accept_pred=_is_svg, rename=_flag_rename) def main(argv): + DEFAULT_EMOJI_DIR = '[emoji]/svg' + DEFAULT_FLAGS_DIR = '[emoji]/third_party/region-flags/svg' + parser = argparse.ArgumentParser( - description="Collect svg files into target directory with prefix.") - parser.add_argument('dst_dir', help="Directory to hold symlinks to files.") - parser.add_argument('--clean', '-c', help="Replace target directory", action='store_true') - parser.add_argument('--flags_only', '-fo', help="Only copy region-flags", action='store_true') - parser.add_argument('--quiet', '-q', dest='v', help="quiet operation.", default=1, - action='store_const', const=0) - parser.add_argument('--verbose', '-v', dest='v', help="verbose operation.", - action='store_const', const=2) + description='Collect svg files into target directory with prefix.') + parser.add_argument( + 'dst_dir', help='Directory to hold copied files.', metavar='dir') + parser.add_argument( + '--clean', '-c', help='Replace target directory', action='store_true') + parser.add_argument( + '--flags_dir', '-f', metavar='dir', help='directory containing flag svg, ' + 'default %s' % DEFAULT_FLAGS_DIR, default=DEFAULT_FLAGS_DIR) + parser.add_argument( + '--emoji_dir', '-e', metavar='dir', + help='directory containing emoji svg, default %s' % DEFAULT_EMOJI_DIR, + default=DEFAULT_EMOJI_DIR) + parser.add_argument( + '-l', '--loglevel', help='log level name/value', default='warning') args = parser.parse_args(argv) - build_svg_dir(args.dst_dir, clean=args.clean, flags_only=args.flags_only, verbosity=args.v) + tool_utils.setup_logging(args.loglevel) + + args.flags_dir = tool_utils.resolve_path(args.flags_dir) + args.emoji_dir = tool_utils.resolve_path(args.emoji_dir) + build_svg_dir( + args.dst_dir, clean=args.clean, emoji_dir=args.emoji_dir, + flags_dir=args.flags_dir) + if __name__ == '__main__': main(sys.argv[1:]) diff --git a/svg_builder.py b/svg_builder.py index 8c1407e42..cab63b751 100755 --- a/svg_builder.py +++ b/svg_builder.py @@ -109,7 +109,8 @@ class SvgBuilder(object): wid = tree.attrs.get('width') ht = tree.attrs.get('height') if not (wid and ht): - raise "missing viewBox and width or height attrs" + raise ValueError( + 'missing viewBox and width or height attrs (%s)' % filename) x, y, w, h = 0, 0, self._strip_px(wid), self._strip_px(ht) # We're going to assume default values for preserveAspectRatio for now, @@ -175,9 +176,11 @@ class SvgBuilder(object): # svg element. Unlike chrome. So either we apply an inverse transform, or # insert a group with the clip between the svg and its children. The latter # seems cleaner, ultimately. - clip_id = 'clip_' + ''.join(random.choice(string.ascii_lowercase) for i in range(8)) - clip_text = """ - """ % (clip_id, clip_id, x, y, w, h, -w) + clip_id = 'clip_' + ''.join( + random.choice(string.ascii_lowercase) for i in range(8)) + clip_text = ('' + '' % ( + clip_id, clip_id, x, y, w, h, -w)) clip_tree = cleaner.tree_from_text(clip_text) clip_tree.contents.extend(tree.contents) tree.contents = [clip_tree] diff --git a/svg_cleaner.py b/svg_cleaner.py index be419046e..58a20bcb1 100755 --- a/svg_cleaner.py +++ b/svg_cleaner.py @@ -17,9 +17,14 @@ import argparse import codecs -import os.path +import logging +import os +from os import path import re import sys + +from nototools import tool_utils + from xml.parsers import expat from xml.sax import saxutils @@ -115,16 +120,32 @@ class SvgCleaner(object): class _Cleaner(object): def _clean_elem(self, node): + viewBox, width, height = None, None, None nattrs = {} for k, v in node.attrs.items(): if node.name == 'svg' and k in [ 'x', 'y', 'id', 'version', 'viewBox', 'width', 'height', 'enable-background', 'xml:space']: + if k == 'viewBox': + viewBox = v + elif k == 'width': + width = v + elif k == 'height': + height = v continue v = re.sub('\s+', ' ', v) nattrs[k] = v + + if node.name == 'svg': + if not width or not height: + if not viewBox: + raise ValueError('no viewBox, width, or height') + width, height = viewBox.split()[2:] + nattrs['width'] = width + nattrs['height'] = height node.attrs = nattrs + # scan contents. remove any empty text nodes, or empty 'g' element nodes. # if a 'g' element has no attrs and only one subnode, replace it with the # subnode. @@ -214,13 +235,16 @@ class SvgCleaner(object): return self.tree_to_text(tree) -def clean_svg_files(in_dir, out_dir, match_pat=None, quiet=False): +def clean_svg_files(in_dir, out_dir, match_pat=None, clean=False): regex = re.compile(match_pat) if match_pat else None count = 0 - if not os.path.isdir(out_dir): - os.makedirs(out_dir) - if not quiet: - print 'created output directory: %s' % out_dir + + if clean and path.samefile(in_dir, out_dir): + logging.error('Cannot clean %s (same as in_dir)', out_dir) + return + + out_dir = tool_utils.ensure_dir_exists(out_dir, clean=clean) + cleaner = SvgCleaner() for file_name in os.listdir(in_dir): if regex and not regex.match(file_name): @@ -230,25 +254,45 @@ def clean_svg_files(in_dir, out_dir, match_pat=None, quiet=False): result = cleaner.clean_svg(in_fp.read()) out_path = os.path.join(out_dir, file_name) with codecs.open(out_path, 'w', 'utf-8') as out_fp: - if not quiet: - print 'wrote: %s' % out_path + logging.debug('write: %s', out_path) out_fp.write(result) count += 1 if not count: - print 'failed to match any files' + logging.warning('Failed to match any files') else: - print 'processed %s files to %s' % (count, out_dir) + logging.info('Wrote %s files to %s', count, out_dir) def main(): parser = argparse.ArgumentParser( description="Generate 'cleaned' svg files.") - parser.add_argument('in_dir', help='Input directory.') - parser.add_argument('out_dir', help='Output directory.') - parser.add_argument('regex', help='Regex to select files, default matches all files.', default=None) - parser.add_argument('--quiet', '-q', help='Quiet operation.', action='store_true') + parser.add_argument( + 'in_dir', help='Input directory.', metavar='dir') + parser.add_argument( + '-o', '--out_dir', help='Output directory, defaults to sibling of in_dir', + metavar='dir') + parser.add_argument( + '-c', '--clean', help='Clean output directory', action='store_true') + parser.add_argument( + '-r', '--regex', help='Regex to select files, default matches all files.', + metavar='regex', default=None) + parser.add_argument( + '-q', '--quiet', dest='v', help='Quiet operation.', default=1, + action='store_const', const=0) + parser.add_argument( + '-l', '--loglevel', help='log level name/value', default='warning') args = parser.parse_args() - clean_svg_files(args.in_dir, args.out_dir, match_pat=args.regex, quiet=args.quiet) + + tool_utils.setup_logging(args.loglevel) + + if not args.out_dir: + if args.in_dir.endswith('/'): + args.in_dir = args.in_dir[:-1] + args.out_dir = args.in_dir + '_clean' + logging.info('Writing output to %s', args.out_dir) + + clean_svg_files( + args.in_dir, args.out_dir, match_pat=args.regex, clean=args.clean) if __name__ == '__main__':