diff --git a/add_svg_glyphs.py b/add_svg_glyphs.py new file mode 100755 index 000000000..413f0d398 --- /dev/null +++ b/add_svg_glyphs.py @@ -0,0 +1,287 @@ +#!/usr/bin/python +# Copyright 2015 Google, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Google Author(s): Doug Felt + +"""Tool to update GSUB, hmtx, cmap, glyf tables with svg image glyphs.""" + +import argparse +import glob +import os +import re +import sys + +# find the noto root, so we can get nototools +# alternatively we could just define PYTHONPATH or always run this from +# noto root, but for testing we might not always be doing that. +_noto_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')) +sys.path.append(_noto_root) + +from fontTools.ttLib.tables import otTables +from fontTools.ttLib.tables import _g_l_y_f +from fontTools.ttLib.tables import S_V_G_ as SVG +from fontTools import ttx +from nototools import add_emoji_gsub + +import svg_builder +import svg_cleaner + +class FontBuilder(object): + """A utility for mutating a ttx font. This maintains glyph_order, cmap, and hmtx tables, + and optionally GSUB, glyf, and SVN tables as well.""" + + def __init__(self, font): + self.font = font; + self.glyph_order = font.getGlyphOrder() + self.cmap = font['cmap'].tables[0].cmap + self.hmtx = font['hmtx'].metrics + + def init_gsub(self): + """Call this if you are going to add ligatures to the font. Creates a GSUB table + if there isn't one already.""" + + if hasattr(self, 'ligatures'): + return + font = self.font + if 'GSUB' not in font: + ligature_subst = otTables.LigatureSubst() + ligature_subst.ligatures = {} + + lookup = otTables.Lookup() + lookup.LookupType = 4 + lookup.LookupFlag = 0 + lookup.SubTableCount = 1 + lookup.SubTable = [ligature_subst] + + font['GSUB'] = add_emoji_gsub.create_simple_gsub([lookup]) + else: + lookup = font['GSUB'].table.LookupList.Lookup[0] + assert lookup.LookupType == 4 + assert lookup.LookupFlag == 0 + self.ligatures = lookup.SubTable[0].ligatures + + def init_glyf(self): + """Call this if you need to create empty glyf entries in the font when you add a new + glyph.""" + + if hasattr(self, 'glyphs'): + return + font = self.font + if 'glyf' not in font: + glyf_table = _g_l_y_f.table__g_l_y_f() + glyf_table.glyphs = {} + glyf_table.glyphOrder = self.glyph_order + font['glyf'] = glyf_table + self.glyphs = font['glyf'].glyphs + + def init_svg(self): + """Call this if you expect to add SVG images in the font. This calls init_glyf since SVG + support currently requires fallback glyf records for each SVG image.""" + + if hasattr(self, 'svgs'): + return + + # svg requires glyf + self.init_glyf() + + font = self.font + if 'SVG ' not in font: + svg_table = SVG.table_S_V_G_() + svg_table.docList = [] + svg_table.colorPalettes = None + font['SVG '] = svg_table + self.svgs = font['SVG '].docList + + def glyph_name(self, string): + return "_".join(["u%04X" % ord(char) for char in string]) + + def glyph_name_to_index(self, name): + return self.glyph_order.index(name) if name in self.glyph_order else -1; + + def glyph_index_to_name(self, glyph_index): + if glyph_index < len(self.glyph_order): + return self.glyph_order[glyph_index] + return '' + + def have_glyph(self, name): + return self.name_to_glyph_index >= 0 + + def _add_ligature(self, glyphstr): + lig = otTables.Ligature() + lig.CompCount = len(glyphstr) + lig.Component = [self.glyph_name(ch) for ch in glyphstr[1:]] + lig.LigGlyph = self.glyph_name(glyphstr) + + first = self.glyph_name(glyphstr[0]) + try: + self.ligatures[first].append(lig) + except KeyError: + self.ligatures[first] = [lig] + + def _add_empty_glyph(self, glyphstr, name): + """Create an empty glyph. If glyphstr is not a ligature, add a cmap entry for it.""" + if len(glyphstr) == 1: + self.cmap[ord(glyphstr)] = name + self.hmtx[name] = [0, 0] + self.glyph_order.append(name) + if hasattr(self, 'glyphs'): + self.glyphs[name] = _g_l_y_f.Glyph() + + def add_components_and_ligature(self, glyphstr): + """Convert glyphstr to a name and check if it already exists. If not, check if it is a + ligature (longer than one codepoint), and if it is, generate empty glyphs with cmap + entries for any missing ligature components and add a ligature record. Then generate + an empty glyph for the name. Return a tuple with the name, index, and a bool + indicating whether the glyph already existed.""" + + name = self.glyph_name(glyphstr) + index = self.glyph_name_to_index(name) + exists = index >= 0 + if not exists: + if len(glyphstr) > 1: + for char in glyphstr: + if ord(char) not in self.cmap: + char_name = self.glyph_name(char) + self._add_empty_glyph(char, char_name) + self._add_ligature(glyphstr) + index = len(self.glyph_order) + self._add_empty_glyph(glyphstr, name) + return name, index, exists + + def add_svg(self, doc, hmetrics, name, index): + """Add an svg table entry. If hmetrics is not None, update the hmtx table. This + expects the glyph has already been added.""" + # sanity check to make sure name and index correspond. + assert name == self.glyph_index_to_name(index) + if hmetrics: + self.hmtx[name] = hmetrics + svg_record = (doc, index, index) # startGlyphId, endGlyphId are the same + self.svgs.append(svg_record) + + +def collect_glyphstr_file_pairs(prefix, ext, include=None, exclude=None, verbosity=1): + """Scan files with the given prefix and extension, and return a list of (glyphstr, + filename) where glyphstr is the character or ligature, and filename is the image file + associated with it. The glyphstr is formed by decoding the filename (exclusive of the + prefix) as a sequence of hex codepoints separated by underscore. Include, if defined, is + a regex string to include only matched filenames. Exclude, if defined, is a regex string + to exclude matched filenames, and is applied after include.""" + + image_files = {} + glob_pat = "%s*.%s" % (prefix, ext) + leading = len(prefix) + trailing = len(ext) + 1 # include dot + if verbosity: + print "Looking for images matching '%s'." % glob_pat + ex_count = 0 + ex = re.compile(exclude) if exclude else None + inc = re.compile(include) if include else None + if verbosity and inc: + print "Including images matching '%s'." % include + if verbosity and ex: + print "Excluding images matching '%s'." % exclude + + for image_file in glob.glob(glob_pat): + if inc and not inc.search(image_file): + continue + + if ex and ex.search(image_file): + if verbosity > 1: + print "Exclude %s" % image_file + ex_count += 1 + continue + + codes = image_file[leading:-trailing] + if "_" in codes: + pieces = codes.split ("_") + u = "".join ([unichr(int(code, 16)) for code in pieces]) + else: + u = unichr(int(codes, 16)) + image_files[u] = image_file + + if verbosity and ex_count: + print "Excluded %d files." % ex_count + if not image_files: + raise Exception ("No image files matching '%s'." % glob_pat) + if verbosity: + print "Included %s files." % len(image_files) + return image_files.items() + + +def sort_glyphstr_tuples(glyphstr_tuples): + """The list contains tuples whose first element is a string representing a character or + ligature. It is sorted with shorter glyphstrs first, then alphabetically. This ensures + that ligature components are added to the font before any ligatures that contain them.""" + glyphstr_tuples.sort(key=lambda t: (len(t[0]), t[0])) + + +def add_image_glyphs(in_file, out_file, pairs, verbosity=1): + """Add images from pairs (glyphstr, filename) to .ttx file in_file and write + to .ttx file out_file.""" + + quiet = verbosity < 2 + font = ttx.TTFont(quiet=quiet) + font.importXML(in_file, quiet=quiet) + + sort_glyphstr_tuples(pairs) + + font_builder = FontBuilder(font) + # we've already sorted by length, so the longest glyphstrs are at the end. To see if + # we have ligatures, we just need to check the last one. + if len(pairs[-1][0]) > 1: + font_builder.init_gsub() + + img_builder = svg_builder.SvgBuilder(font_builder) + for glyphstr, filename in pairs: + if verbosity > 1: + print "Adding glyph for U+%s" % ",".join(["%04X" % ord(char) for char in glyphstr]) + img_builder.add_from_filename(glyphstr, filename) + + font.saveXML(out_file, quiet=quiet) + if verbosity: + print "added %s images to %s" % (len(pairs), out_file) + + +def main(argv): + usage = """This will search for files that have image_prefix followed by one or more + hex numbers (separated by underscore if more than one), and end in ".svg". + For example, if image_prefix is "icons/u", then files with names like + "icons/u1F4A9.svg" or "icons/u1F1EF_1F1F5.svg" will be loaded. The script + then adds cmap, htmx, and potentially GSUB entries for the Unicode + characters found. The advance width will be chosen based on image aspect + ratio. If Unicode values outside the BMP are desired, the existing cmap + table should be of the appropriate (format 12) type. Only the first cmap + table and the first GSUB lookup (if existing) are modified.""" + + parser = argparse.ArgumentParser( + description="Update cmap, glyf, GSUB, and hmtx tables from image glyphs.", epilog=usage) + parser.add_argument('in_file', help="Input ttx file name.") + parser.add_argument('out_file', help="Output ttx file name.") + parser.add_argument('image_prefix', help="Location and prefix of image files.") + parser.add_argument('-i', '--include', help='include files whoses name matches this regex') + parser.add_argument('-e', '--exclude', help='exclude files whose name matches this regex') + parser.add_argument('--quiet', '-q', dest='v', help="quiet operation.", default=1, + action='store_const', const=0) + parser.add_argument('--verbose', '-v', dest='v', help="verbose operation.", + action='store_const', const=2) + args = parser.parse_args(argv) + + pairs = collect_glyphstr_file_pairs(args.image_prefix, 'svg', include=args.include, + exclude=args.exclude, verbosity=args.v) + add_image_glyphs(args.in_file, args.out_file, pairs, verbosity=args.v) + + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/generate_test_html.py b/generate_test_html.py new file mode 100755 index 000000000..d4df53713 --- /dev/null +++ b/generate_test_html.py @@ -0,0 +1,196 @@ +#!/usr/bin/python +# Copyright 2015 Google, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Google Author(s): Doug Felt + +import argparse +import os +import os.path +import re +import sys + +from fontTools import ttx + +import add_svg_glyphs + +def do_generate_test_html(font_basename, pairs, glyph=None, verbosity=1): + header = r""" + + + + + +""" + + body_head = r""" +

Test for SVG glyphs in %(font)s. It uses the proposed +SVG-in-OpenType format. +View using Firefox 26 and later. +

+
%(glyph)s
+
%(glyph_hex)s
+
+

""" + + + body_tail = r"""

+ + +""" + + font_name = font_basename + ".woff" + html_name = font_basename + "_test.html" + + found_initial_glyph = False + initial_glyph_str = None; + initial_glyph_hex = None; + text_parts = [] + for glyphstr, _ in pairs: + name_parts = [] + hex_parts = [] + for cp in glyphstr: + hex_str = hex(ord(cp)) + name_parts.append('&#x%s;' % hex_str[2:]) + hex_parts.append(hex_str) + glyph_str = ''.join(name_parts) + + if not found_initial_glyph: + if not glyph or glyph_str == glyph: + initial_glyph_str = glyph_str + initial_glyph_hex = ' '.join(hex_parts) + found_initial_glyph = True + elif not initial_glyph_str: + initial_glyph_str = glyph_str + initial_glyph_hex = ' '.join(hex_parts) + + text = '%s' % glyph_str + text_parts.append(text) + + if verbosity and glyph and not found_initial_glyph: + print "Did not find glyph '%s', using initial glyph '%s'" % (glyph, initial_glyph_str) + elif verbosity > 1 and not glyph: + print "Using initial glyph '%s'" % initial_glyph_str + + lines = [header % font_name] + lines.append(body_head % {'font':font_name, 'glyph':initial_glyph_str, + 'glyph_hex':initial_glyph_hex}) + lines.extend(text_parts) # we'll end up with space between each emoji + lines.append(body_tail) + output = '\n'.join(lines) + with open(html_name, 'w') as fp: + fp.write(output) + if verbosity: + print 'Wrote ' + html_name + + +def do_generate_fonts(template_file, font_basename, pairs, reuse=False, verbosity=1): + out_woff = font_basename + '.woff' + if reuse and os.path.isfile(out_woff) and os.access(out_woff, os.R_OK): + if verbosity: + print 'Reusing ' + out_woff + return + + out_ttx = font_basename + '.ttx' + add_svg_glyphs.add_image_glyphs(template_file, out_ttx, pairs, verbosity=verbosity) + + quiet=verbosity < 2 + font = ttx.TTFont(flavor='woff', quiet=quiet) + font.importXML(out_ttx, quiet=quiet) + font.save(out_woff) + if verbosity: + print 'Wrote ' + out_woff + + +def main(argv): + usage = """This will search for files that have image_prefix followed by one or more + hex numbers (separated by underscore if more than one), and end in ".svg". + For example, if image_prefix is "icons/u", then files with names like + "icons/u1F4A9.svg" or "icons/u1F1EF_1F1F5.svg" will be found. It generates + an SVG font from this, converts it to woff, and also generates an html test + page containing text for all the SVG glyphs.""" + + parser = argparse.ArgumentParser( + description='Generate font and html test file.', epilog=usage) + parser.add_argument('template_file', help='name of template .ttx file') + parser.add_argument('image_prefix', help='location and prefix of image files') + parser.add_argument('-i', '--include', help='include files whoses name matches this regex') + parser.add_argument('-e', '--exclude', help='exclude files whose name matches this regex') + parser.add_argument('-o', '--out_basename', help='base name of (ttx, woff, html) files to generate, ' + 'defaults to the template base name') + parser.add_argument('-g', '--glyph', help='set the initial glyph text (html encoded string), ' + 'defaults to first glyph') + parser.add_argument('-r', '--reuse_font', help='use existing woff font', action='store_true') + parser.add_argument('-q', '--quiet', dest='v', help='quiet operation', default=1, + action='store_const', const=0) + parser.add_argument('-v', '--verbose', dest='v', help='verbose operation', + action='store_const', const=2) + args = parser.parse_args(argv) + + pairs = add_svg_glyphs.collect_glyphstr_file_pairs( + args.image_prefix, 'svg', include=args.include, exclude=args.exclude, verbosity=args.v) + add_svg_glyphs.sort_glyphstr_tuples(pairs) + + out_basename = args.out_basename + if not out_basename: + out_basename = args.template_file.split('.')[0] # exclude e.g. '.tmpl.ttx' + if args.v: + print "Output basename is %s." % out_basename + do_generate_fonts(args.template_file, out_basename, pairs, reuse=args.reuse_font, verbosity=args.v) + do_generate_test_html(out_basename, pairs, glyph=args.glyph, verbosity=args.v) + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/svg_builder.py b/svg_builder.py new file mode 100755 index 000000000..4684fc963 --- /dev/null +++ b/svg_builder.py @@ -0,0 +1,107 @@ +# Copyright 2015 Google, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Google Author(s): Doug Felt + +import svg_cleaner + +class SvgBuilder(object): + """Modifies a font to add SVG glyphs from a document or string. Once built you + can call add_from_filename or add_from_doc multiple times to add SVG + documents, which should contain a single root svg element representing the glyph. + This element must have width and height attributes (in px), these are used to + determine how to scale the glyph. The svg should be designed to fit inside + this bounds and have its origin at the top left. Adding the svg generates a + transform to scale and position the glyph, so the svg element should not have + a transform attribute since it will be overwritten. Any id attribute on the + glyph is also overwritten. + + Adding a glyph can generate additional default glyphs for components of a + ligature that are not already present. + + It is possible to add SVG images to a font that already has corresponding + glyphs. If a glyph exists already, then its hmtx advance is assumed valid. + Otherwise we will generate an advance based on the image's width and scale + factor. Callers should ensure that glyphs for components of ligatures are + added before the ligatures themselves, otherwise glyphs generated for missing + ligature components will be assigned zero metrics metrics that will not be + overridden later.""" + + def __init__(self, font_builder): + font_builder.init_svg() + + self.font_builder = font_builder + self.cleaner = svg_cleaner.SvgCleaner() + + font = font_builder.font + self.font_ascent = font['hhea'].ascent + self.font_height = self.font_ascent - font['hhea'].descent + self.font_upem = font['head'].unitsPerEm + + def add_from_filename(self, ustr, filename): + with open(filename, "r") as fp: + return self.add_from_doc(ustr, fp.read()) + + def _get_int_px(self, val): + if not val.lower().endswith('px'): + raise "expected width or height ending in 'px' but got: %s" % val + return int(val[:-2]) + + def add_from_doc(self, ustr, svgdoc): + """Cleans the svg doc, tweaks the root svg element's + attributes, then updates the font. ustr is the character or ligature + string, svgdoc is the svg document xml. The doc must have a single + svg root element.""" + + # The svg element must have an id attribute of the form 'glyphNNN' where NNN + # is the glyph id. We capture the index of the glyph we're adding and write + # it into the svg. + # + # We generate a transform that places the origin at the top left of the + # ascent and uniformly scales it to fit both the font height (ascent - + # descent) and glyph advance if it is already present. The width and height + # attributes are not used by rendering, so they are removed from the element + # once we're done with them. + + cleaner = self.cleaner + fbuilder = self.font_builder + + tree = cleaner.tree_from_text(svgdoc) + cleaner.clean_tree(tree) + + name, index, exists = fbuilder.add_components_and_ligature(ustr) + + tree.attrs['id'] = 'glyph%s' % index + + image_width = self._get_int_px(tree.attrs.pop('width')) + image_height = self._get_int_px(tree.attrs.pop('height')) + scale = float(self.font_height) / image_height; + if exists: + width = fbuilder.hmtx[name][0] + # Special case for preexisting zero advance, we scale to height. + if width > 0: + hscale = float(width) / image_width; + if hscale < scale: + scale = hscale + + transform = 'translate(0, -%s) scale(%s)' % (self.font_ascent, scale) + tree.attrs['transform'] = transform + + svgdoc = cleaner.tree_to_text(tree) + + hmetrics = None + if not exists: + # horiz advance and lsb + hmetrics = [int(round(image_width * scale)), 0] + fbuilder.add_svg(svgdoc, hmetrics, name, index) diff --git a/svg_cleaner.py b/svg_cleaner.py new file mode 100755 index 000000000..aaff92760 --- /dev/null +++ b/svg_cleaner.py @@ -0,0 +1,254 @@ +#!/usr/bin/python +# Copyright 2015 Google, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Google Author(s): Doug Felt + +import argparse +import codecs +import os.path +import re +import sys +from xml.parsers import expat +from xml.sax import saxutils + +# Expat doesn't allow me to identify empty tags (in particular, with an +# empty tag the parse location for the start and end is not the same) so I +# have to take a dom-like approach if I want to identify them. There are a +# lot of empty tags in svg. This way I can do some other kinds of cleanup +# as well (remove unnecessary 'g' elements, for instance). + +# Use nodes instead of tuples and strings because it's easier to mutate +# a tree of these, and cleaner will want to do this. + +class _Elem_Node(object): + def __init__(self, name, attrs, contents): + self.name = name + self.attrs = attrs + self.contents = contents + + def __repr__(self): + line = ["elem(name: '%s'" % self.name] + if self.attrs: + line.append(" attrs: '%s'" % self.attrs) + if self.contents: + line.append(" contents[%s]: '%s'" % (len(self.contents), self.contents)) + line.append(')') + return ''.join(line) + +class _Text_Node(object): + def __init__(self, text): + self.text = text + + def __repr__(self): + return "text('%s')" % self.text + +class SvgCleaner(object): + """Strip out unwanted parts of an svg file, primarily the xml declaration and + doctype lines, comments, and some attributes of the outermost element. + The id will be replaced when it is inserted into the font. viewBox causes + unwanted scaling when used in a font and its effect is difficult to + predict. version is unneeded, xml:space is ignored (we're processing spaces + so a request to maintain them has no effect). enable-background appears to + have no effect. x and y on the outermost svg element have no effect. We + keep width and height, and will elsewhere assume these are the dimensions + used for the character box.""" + + def __init__(self): + self.reader = SvgCleaner._Reader() + self.cleaner = SvgCleaner._Cleaner() + self.writer = SvgCleaner._Writer() + + class _Reader(object): + """Loosely based on fonttools's XMLReader. This generates a tree of nodes, + either element nodes or text nodes. Successive text content is merged + into one node, so contents will never contain more than one _Text_Node in + a row. This drops comments, xml declarations, and doctypes.""" + + def _reset(self, parser): + self._stack = [] + self._textbuf = [] + + def _start_element(self, name, attrs): + self._flush_textbuf() + node = _Elem_Node(name, attrs, []) + if len(self._stack): + self._stack[-1].contents.append(node) + self._stack.append(node) + + def _end_element(self, name): + self._flush_textbuf() + if len(self._stack) > 1: + self._stack = self._stack[:-1] + + def _character_data(self, data): + if len(self._stack): + self._textbuf.append(data) + + def _flush_textbuf(self): + if self._textbuf: + node = _Text_Node(''.join(self._textbuf)) + self._stack[-1].contents.append(node) + self._textbuf = [] + + def from_text(self, data): + """Return the root node of a tree representing the svg data.""" + + parser = expat.ParserCreate() + parser.StartElementHandler = self._start_element + parser.EndElementHandler = self._end_element + parser.CharacterDataHandler = self._character_data + self._reset(parser) + parser.Parse(data) + return self._stack[0] + + class _Cleaner(object): + def _clean_elem(self, node): + nattrs = {} + for k, v in node.attrs.items(): + if node.name == 'svg' and k in ['x', 'y', 'id', 'version', 'viewBox', + 'enable-background', 'xml:space']: + continue + v = re.sub('\s+', ' ', v) + nattrs[k] = v + node.attrs = nattrs + + # scan contents. remove any empty text nodes, or empty 'g' element nodes. + # if a 'g' element has no attrs and only one subnode, replace it with the + # subnode. + wpos = 0 + for n in node.contents: + if isinstance(n, _Text_Node): + if not n.text: + continue + elif n.name == 'g': + if not n.contents: + continue + if not n.attrs and len(n.contents) == 1: + n = n.contents[0] + node.contents[wpos] = n + wpos += 1 + if wpos < len(node.contents): + node.contents = node.contents[:wpos] + + def _clean_text(self, node): + text = node.text.strip() + # common case is text is empty (line endings between elements) + if text: + text = re.sub(r'\s+', ' ', text) + node.text = text + + def clean(self, node): + if isinstance(node, _Text_Node): + self._clean_text(node) + else: + # do contents first, so we can check for empty subnodes after + for n in node.contents: + self.clean(n) + self._clean_elem(node) + + class _Writer(object): + """For text nodes, replaces sequences of whitespace with a single space. + For elements, replaces sequences of whitespace in attributes, and + removes unwanted attributes from elements.""" + + def _write_node(self, node, lines, indent): + """Node is a node generated by _Reader, either a TextNode or an + ElementNode. Lines is a list to collect the lines of output. Indent is + the indentation level for this node.""" + + if isinstance(node, _Text_Node): + if node.text: + lines.append(node.text) + else: + margin = ' ' * indent + line = [margin] + line.append('<%s' % node.name) + for k in sorted(node.attrs.keys()): + v = node.attrs[k] + line.append(' %s=%s' % (k, saxutils.quoteattr(v))) + if node.contents: + line.append('>') + lines.append(''.join(line)) + for elem in node.contents: + self._write_node(elem, lines, indent + 1) + line = [margin] + line.append('' % node.name) + lines.append(''.join(line)) + else: + line.append('/>') + lines.append(''.join(line)) + + def to_text(self, root): + # set up lines for recursive calls, let them append lines, then return + # the result. + lines = [] + self._write_node(root, lines, 0) + return '\n'.join(lines) + + def tree_from_text(self, svg_text): + return self.reader.from_text(svg_text) + + def clean_tree(self, svg_tree): + self.cleaner.clean(svg_tree) + + def tree_to_text(self, svg_tree): + return self.writer.to_text(svg_tree) + + def clean_svg(self, svg_text): + """Return the cleaned svg_text.""" + tree = self.tree_from_text(svg_text) + self.clean_tree(tree) + return self.tree_to_text(tree) + + +def clean_svg_files(in_dir, out_dir, match_pat=None, quiet=False): + regex = re.compile(match_pat) if match_pat else None + count = 0 + if not os.path.isdir(out_dir): + os.makedirs(out_dir) + if not quiet: + print 'created output directory: %s' % out_dir + cleaner = SvgCleaner() + for file_name in os.listdir(in_dir): + if regex and not regex.match(file_name): + continue + in_path = os.path.join(in_dir, file_name) + with open(in_path) as in_fp: + result = cleaner.clean_svg(in_fp.read()) + out_path = os.path.join(out_dir, file_name) + with codecs.open(out_path, 'w', 'utf-8') as out_fp: + if not quiet: + print 'wrote: %s' % out_path + out_fp.write(result) + count += 1 + if not count: + print 'failed to match any files' + else: + print 'processed %s files to %s' % (count, out_dir) + + +def main(): + parser = argparse.ArgumentParser( + description="Generate 'cleaned' svg files.") + parser.add_argument('in_dir', help='Input directory.') + parser.add_argument('out_dir', help='Output directory.') + parser.add_argument('regex', help='Regex to select files, default matches all files.', default=None) + parser.add_argument('--quiet', '-q', help='Quiet operation.', action='store_true') + args = parser.parse_args() + clean_svg_files(args.in_dir, args.out_dir, match_pat=args.regex, quiet=args.quiet) + + +if __name__ == '__main__': + main()