Move the new files out of third_party/color_emoji and into color_emoji. The

third_party subdir is for forks/clones of external projects. The svg code isn't a modification to that project, but an addition to the font-building code in noto/color_emoji. This also fixes a slight bug in the html generation, which set the default large glyph image but forgot to set the hex version of the text below it.
2015-02-20 14:58:34 -08:00 · 2015-02-20 14:58:34 -08:00 · 69913fd3fc
parent d66b41255a
commit 69913fd3fc
4 changed files with 844 additions and 0 deletions
--- a/add_svg_glyphs.py
+++ b/add_svg_glyphs.py
@ -0,0 +1,287 @@
 #!/usr/bin/python
 # Copyright 2015 Google, Inc. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # Google Author(s): Doug Felt
 """Tool to update GSUB, hmtx, cmap, glyf tables with svg image glyphs."""
 import argparse
 import glob
 import os
 import re
 import sys
 # find the noto root, so we can get nototools
 # alternatively we could just define PYTHONPATH or always run this from
 # noto root, but for testing we might not always be doing that.
 _noto_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
 sys.path.append(_noto_root)
 from fontTools.ttLib.tables import otTables
 from fontTools.ttLib.tables import _g_l_y_f
 from fontTools.ttLib.tables import S_V_G_ as SVG
 from fontTools import ttx
 from nototools import add_emoji_gsub
 import svg_builder
 import svg_cleaner
 class FontBuilder(object):
  """A utility for mutating a ttx font.  This maintains glyph_order, cmap, and hmtx tables,
  and optionally GSUB, glyf, and SVN tables as well."""
  def __init__(self, font):
    self.font = font;
    self.glyph_order = font.getGlyphOrder()
    self.cmap = font['cmap'].tables[0].cmap
    self.hmtx = font['hmtx'].metrics
  def init_gsub(self):
    """Call this if you are going to add ligatures to the font.  Creates a GSUB table
    if there isn't one already."""
    if hasattr(self, 'ligatures'):
      return
    font = self.font
    if 'GSUB' not in font:
      ligature_subst = otTables.LigatureSubst()
      ligature_subst.ligatures = {}
      lookup = otTables.Lookup()
      lookup.LookupType = 4
      lookup.LookupFlag = 0
      lookup.SubTableCount = 1
      lookup.SubTable = [ligature_subst]
      font['GSUB'] = add_emoji_gsub.create_simple_gsub([lookup])
    else:
      lookup = font['GSUB'].table.LookupList.Lookup[0]
      assert lookup.LookupType == 4
      assert lookup.LookupFlag == 0
    self.ligatures = lookup.SubTable[0].ligatures
  def init_glyf(self):
    """Call this if you need to create empty glyf entries in the font when you add a new
    glyph."""
    if hasattr(self, 'glyphs'):
      return
    font = self.font
    if 'glyf' not in font:
      glyf_table = _g_l_y_f.table__g_l_y_f()
      glyf_table.glyphs = {}
      glyf_table.glyphOrder = self.glyph_order
      font['glyf'] = glyf_table
    self.glyphs = font['glyf'].glyphs
  def init_svg(self):
    """Call this if you expect to add SVG images in the font. This calls init_glyf since SVG
    support currently requires fallback glyf records for each SVG image."""
    if hasattr(self, 'svgs'):
      return
    # svg requires glyf
    self.init_glyf()
    font = self.font
    if 'SVG ' not in font:
      svg_table = SVG.table_S_V_G_()
      svg_table.docList = []
      svg_table.colorPalettes = None
      font['SVG '] = svg_table
    self.svgs = font['SVG '].docList
  def glyph_name(self, string):
    return "_".join(["u%04X" % ord(char) for char in string])
  def glyph_name_to_index(self, name):
    return self.glyph_order.index(name) if name in self.glyph_order else -1;
  def glyph_index_to_name(self, glyph_index):
    if glyph_index < len(self.glyph_order):
      return self.glyph_order[glyph_index]
    return ''
  def have_glyph(self, name):
    return self.name_to_glyph_index >= 0
  def _add_ligature(self, glyphstr):
    lig = otTables.Ligature()
    lig.CompCount = len(glyphstr)
    lig.Component = [self.glyph_name(ch) for ch in glyphstr[1:]]
    lig.LigGlyph = self.glyph_name(glyphstr)
    first = self.glyph_name(glyphstr[0])
    try:
      self.ligatures[first].append(lig)
    except KeyError:
      self.ligatures[first] = [lig]
  def _add_empty_glyph(self, glyphstr, name):
    """Create an empty glyph. If glyphstr is not a ligature, add a cmap entry for it."""
    if len(glyphstr) == 1:
      self.cmap[ord(glyphstr)] = name
    self.hmtx[name] = [0, 0]
    self.glyph_order.append(name)
    if hasattr(self, 'glyphs'):
      self.glyphs[name] = _g_l_y_f.Glyph()
  def add_components_and_ligature(self, glyphstr):
    """Convert glyphstr to a name and check if it already exists. If not, check if it is a
    ligature (longer than one codepoint), and if it is, generate empty glyphs with cmap
    entries for any missing ligature components and add a ligature record.  Then generate
    an empty glyph for the name.  Return a tuple with the name, index, and a bool
    indicating whether the glyph already existed."""
    name = self.glyph_name(glyphstr)
    index = self.glyph_name_to_index(name)
    exists = index >= 0
    if not exists:
      if len(glyphstr) > 1:
        for char in glyphstr:
          if ord(char) not in self.cmap:
            char_name = self.glyph_name(char)
            self._add_empty_glyph(char, char_name)
        self._add_ligature(glyphstr)
      index = len(self.glyph_order)
      self._add_empty_glyph(glyphstr, name)
    return name, index, exists
  def add_svg(self, doc, hmetrics, name, index):
    """Add an svg table entry. If hmetrics is not None, update the hmtx table. This
    expects the glyph has already been added."""
    # sanity check to make sure name and index correspond.
    assert name == self.glyph_index_to_name(index)
    if hmetrics:
      self.hmtx[name] = hmetrics
    svg_record = (doc, index, index) # startGlyphId, endGlyphId are the same
    self.svgs.append(svg_record)
 def collect_glyphstr_file_pairs(prefix, ext, include=None, exclude=None, verbosity=1):
  """Scan files with the given prefix and extension, and return a list of (glyphstr,
  filename) where glyphstr is the character or ligature, and filename is the image file
  associated with it.  The glyphstr is formed by decoding the filename (exclusive of the
  prefix) as a sequence of hex codepoints separated by underscore. Include, if defined, is
  a regex string to include only matched filenames. Exclude, if defined, is a regex string
  to exclude matched filenames, and is applied after include."""
  image_files = {}
  glob_pat = "%s*.%s" % (prefix, ext)
  leading = len(prefix)
  trailing = len(ext) + 1 # include dot
  if verbosity:
    print "Looking for images matching '%s'." % glob_pat
  ex_count = 0
  ex = re.compile(exclude) if exclude else None
  inc = re.compile(include) if include else None
  if verbosity and inc:
    print "Including images matching '%s'." % include
  if verbosity and ex:
    print "Excluding images matching '%s'." % exclude
  for image_file in glob.glob(glob_pat):
    if inc and not inc.search(image_file):
      continue
    if ex and ex.search(image_file):
      if verbosity > 1:
        print "Exclude %s" % image_file
      ex_count += 1
      continue
    codes = image_file[leading:-trailing]
    if "_" in codes:
      pieces = codes.split ("_")
      u = "".join ([unichr(int(code, 16)) for code in pieces])
    else:
      u = unichr(int(codes, 16))
    image_files[u] = image_file
  if verbosity and ex_count:
    print "Excluded %d files." % ex_count
  if not image_files:
    raise Exception ("No image files matching '%s'." % glob_pat)
  if verbosity:
    print "Included %s files." % len(image_files)
  return image_files.items()
 def sort_glyphstr_tuples(glyphstr_tuples):
  """The list contains tuples whose first element is a string representing a character or
  ligature.  It is sorted with shorter glyphstrs first, then alphabetically. This ensures
  that ligature components are added to the font before any ligatures that contain them."""
  glyphstr_tuples.sort(key=lambda t: (len(t[0]), t[0]))
 def add_image_glyphs(in_file, out_file, pairs, verbosity=1):
  """Add images from pairs (glyphstr, filename) to .ttx file in_file and write
  to .ttx file out_file."""
  quiet = verbosity < 2
  font = ttx.TTFont(quiet=quiet)
  font.importXML(in_file, quiet=quiet)
  sort_glyphstr_tuples(pairs)
  font_builder = FontBuilder(font)
  # we've already sorted by length, so the longest glyphstrs are at the end. To see if
  # we have ligatures, we just need to check the last one.
  if len(pairs[-1][0]) > 1:
    font_builder.init_gsub()
  img_builder = svg_builder.SvgBuilder(font_builder)
  for glyphstr, filename in pairs:
    if verbosity > 1:
      print "Adding glyph for U+%s" % ",".join(["%04X" % ord(char) for char in glyphstr])
    img_builder.add_from_filename(glyphstr, filename)
  font.saveXML(out_file, quiet=quiet)
  if verbosity:
    print "added %s images to %s" % (len(pairs), out_file)
 def main(argv):
  usage = """This will search for files that have image_prefix followed by one or more
      hex numbers (separated by underscore if more than one), and end in ".svg".
      For example, if image_prefix is "icons/u", then files with names like
      "icons/u1F4A9.svg" or "icons/u1F1EF_1F1F5.svg" will be loaded.  The script
      then adds cmap, htmx, and potentially GSUB entries for the Unicode
      characters found.  The advance width will be chosen based on image aspect
      ratio.  If Unicode values outside the BMP are desired, the existing cmap
      table should be of the appropriate (format 12) type.  Only the first cmap
      table and the first GSUB lookup (if existing) are modified."""
  parser = argparse.ArgumentParser(
      description="Update cmap, glyf, GSUB, and hmtx tables from image glyphs.", epilog=usage)
  parser.add_argument('in_file', help="Input ttx file name.")
  parser.add_argument('out_file', help="Output ttx file name.")
  parser.add_argument('image_prefix', help="Location and prefix of image files.")
  parser.add_argument('-i', '--include', help='include files whoses name matches this regex')
  parser.add_argument('-e', '--exclude', help='exclude files whose name matches this regex')
  parser.add_argument('--quiet', '-q', dest='v', help="quiet operation.", default=1,
                      action='store_const', const=0)
  parser.add_argument('--verbose', '-v', dest='v', help="verbose operation.",
                      action='store_const', const=2)
  args = parser.parse_args(argv)
  pairs = collect_glyphstr_file_pairs(args.image_prefix, 'svg', include=args.include,
                                      exclude=args.exclude, verbosity=args.v)
  add_image_glyphs(args.in_file, args.out_file, pairs, verbosity=args.v)
 if __name__ == '__main__':
  main(sys.argv[1:])
--- a/generate_test_html.py
+++ b/generate_test_html.py
@ -0,0 +1,196 @@
 #!/usr/bin/python
 # Copyright 2015 Google, Inc. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # Google Author(s): Doug Felt
 import argparse
 import os
 import os.path
 import re
 import sys
 from fontTools import ttx
 import add_svg_glyphs
 def do_generate_test_html(font_basename, pairs, glyph=None, verbosity=1):
  header = r"""<!DOCTYPE html>
 <html>
 <head>
 <meta charset="utf-8">
 <style type="text/css">
@font-face { font-family: svgfont; src: url("%s") }
 body { font-family: sans-serif; font-size: 24px }
 #emoji span { font-family: svgfont, sans-serif }
 #panel { font-family: svgfont, sans-serif; font-size: 256px }
 #paneltitle { font-family: sans-serif; font-size: 36px }
 </style>
 <script type="text/javascript">
 function hexify(text) {
  var surr_offset = 0x10000 - (0xd800 << 10) - 0xdc00
  var str = new String(text.trim())
  var len = str.length
  var result = ""
  for (var i = 0; i < len; ++i) {
    var cp = str.charCodeAt(i)
    if (cp >= 0xd800 && cp < 0xdc00 && i < len - 1) {
      ncp = str.charCodeAt(i+1)
      if (ncp >= 0xdc00 && ncp < 0xe000) {
        cp = (cp << 10) + ncp + surr_offset
        ++i;
      }
    }
    result += " 0x" + cp.toString(16)
  }
  return result
 };
 function showText(event) {
  var text = event.target.textContent
  var p = document.getElementById('panel')
  p.textContent = text
  p = document.getElementById('paneltitle')
  p.textContent = hexify(text)
 };
 function setup() {
  var t = document.getElementById('emoji')
  var tdlist = t.getElementsByTagName('span')
  for (var i = 0, lim = tdlist.length; i < lim; ++i) {
    var e = tdlist[i]
    e.onmouseover = showText
  }
 };
 </script>
 </head>"""
  body_head = r"""<body onload="setup();">
 <p>Test for SVG glyphs in %(font)s.  It uses the proposed
 <a href="http://lists.w3.org/Archives/Public/public-svgopentype/2013Jul/0003.html">SVG-in-OpenType format</a>.
 View using Firefox&nbsp;26 and later.
 <div style="float:left; text-align:center; margin:0 10px">
 <div id='panel' style="margin-left:auto; margin-right:auto">%(glyph)s</div>
 <div id='paneltitle' style="margin-left:auto; margin-right:auto">%(glyph_hex)s</div>
 </div>
 <div id='emoji'><p>"""
  body_tail = r"""</div>
 </body>
 </html>
 """
  font_name = font_basename + ".woff"
  html_name = font_basename + "_test.html"
  found_initial_glyph = False
  initial_glyph_str = None;
  initial_glyph_hex = None;
  text_parts = []
  for glyphstr, _ in pairs:
    name_parts = []
    hex_parts = []
    for cp in glyphstr:
      hex_str = hex(ord(cp))
      name_parts.append('&#x%s;' % hex_str[2:])
      hex_parts.append(hex_str)
    glyph_str = ''.join(name_parts)
    if not found_initial_glyph:
      if not glyph or glyph_str == glyph:
        initial_glyph_str = glyph_str
        initial_glyph_hex = ' '.join(hex_parts)
        found_initial_glyph = True
      elif not initial_glyph_str:
        initial_glyph_str = glyph_str
        initial_glyph_hex = ' '.join(hex_parts)
    text = '<span>%s</span>' % glyph_str
    text_parts.append(text)
  if verbosity and glyph and not found_initial_glyph:
    print "Did not find glyph '%s', using initial glyph '%s'" % (glyph, initial_glyph_str)
  elif verbosity > 1 and not glyph:
    print "Using initial glyph '%s'" % initial_glyph_str
  lines = [header % font_name]
  lines.append(body_head % {'font':font_name, 'glyph':initial_glyph_str,
                            'glyph_hex':initial_glyph_hex})
  lines.extend(text_parts) # we'll end up with space between each emoji
  lines.append(body_tail)
  output = '\n'.join(lines)
  with open(html_name, 'w') as fp:
    fp.write(output)
  if verbosity:
    print 'Wrote ' + html_name
 def do_generate_fonts(template_file, font_basename, pairs, reuse=False, verbosity=1):
  out_woff = font_basename + '.woff'
  if reuse and os.path.isfile(out_woff) and os.access(out_woff, os.R_OK):
    if verbosity:
      print 'Reusing ' + out_woff
    return
  out_ttx = font_basename + '.ttx'
  add_svg_glyphs.add_image_glyphs(template_file, out_ttx, pairs, verbosity=verbosity)
  quiet=verbosity < 2
  font = ttx.TTFont(flavor='woff', quiet=quiet)
  font.importXML(out_ttx, quiet=quiet)
  font.save(out_woff)
  if verbosity:
    print 'Wrote ' + out_woff
 def main(argv):
  usage = """This will search for files that have image_prefix followed by one or more
      hex numbers (separated by underscore if more than one), and end in ".svg".
      For example, if image_prefix is "icons/u", then files with names like
      "icons/u1F4A9.svg" or "icons/u1F1EF_1F1F5.svg" will be found. It generates
      an SVG font from this, converts it to woff, and also generates an html test
      page containing text for all the SVG glyphs."""
  parser = argparse.ArgumentParser(
      description='Generate font and html test file.', epilog=usage)
  parser.add_argument('template_file', help='name of template .ttx file')
  parser.add_argument('image_prefix', help='location and prefix of image files')
  parser.add_argument('-i', '--include', help='include files whoses name matches this regex')
  parser.add_argument('-e', '--exclude', help='exclude files whose name matches this regex')
  parser.add_argument('-o', '--out_basename', help='base name of (ttx, woff, html) files to generate, '
                      'defaults to the template base name')
  parser.add_argument('-g', '--glyph', help='set the initial glyph text (html encoded string), '
                      'defaults to first glyph')
  parser.add_argument('-r', '--reuse_font', help='use existing woff font', action='store_true')
  parser.add_argument('-q', '--quiet', dest='v', help='quiet operation', default=1,
                      action='store_const', const=0)
  parser.add_argument('-v', '--verbose', dest='v', help='verbose operation',
                      action='store_const', const=2)
  args = parser.parse_args(argv)
  pairs = add_svg_glyphs.collect_glyphstr_file_pairs(
    args.image_prefix, 'svg', include=args.include, exclude=args.exclude, verbosity=args.v)
  add_svg_glyphs.sort_glyphstr_tuples(pairs)
  out_basename = args.out_basename
  if not out_basename:
    out_basename = args.template_file.split('.')[0] # exclude e.g. '.tmpl.ttx'
    if args.v:
      print "Output basename is %s." % out_basename
  do_generate_fonts(args.template_file, out_basename, pairs, reuse=args.reuse_font, verbosity=args.v)
  do_generate_test_html(out_basename, pairs, glyph=args.glyph, verbosity=args.v)
 if __name__ == '__main__':
  main(sys.argv[1:])
--- a/svg_builder.py
+++ b/svg_builder.py
@ -0,0 +1,107 @@
 # Copyright 2015 Google, Inc. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # Google Author(s): Doug Felt
 import svg_cleaner
 class SvgBuilder(object):
  """Modifies a font to add SVG glyphs from a document or string.  Once built you
  can call add_from_filename or add_from_doc multiple times to add SVG
  documents, which should contain a single root svg element representing the glyph.
  This element must have width and height attributes (in px), these are used to
  determine how to scale the glyph.  The svg should be designed to fit inside
  this bounds and have its origin at the top left.  Adding the svg generates a
  transform to scale and position the glyph, so the svg element should not have
  a transform attribute since it will be overwritten.  Any id attribute on the
  glyph is also overwritten.
  Adding a glyph can generate additional default glyphs for components of a
  ligature that are not already present.
  It is possible to add SVG images to a font that already has corresponding
  glyphs.  If a glyph exists already, then its hmtx advance is assumed valid.
  Otherwise we will generate an advance based on the image's width and scale
  factor.  Callers should ensure that glyphs for components of ligatures are
  added before the ligatures themselves, otherwise glyphs generated for missing
  ligature components will be assigned zero metrics metrics that will not be
  overridden later."""
  def __init__(self, font_builder):
    font_builder.init_svg()
    self.font_builder = font_builder
    self.cleaner = svg_cleaner.SvgCleaner()
    font = font_builder.font
    self.font_ascent = font['hhea'].ascent
    self.font_height = self.font_ascent - font['hhea'].descent
    self.font_upem = font['head'].unitsPerEm
  def add_from_filename(self, ustr, filename):
    with open(filename, "r") as fp:
      return self.add_from_doc(ustr, fp.read())
  def _get_int_px(self, val):
    if not val.lower().endswith('px'):
      raise "expected width or height ending in 'px' but got: %s" % val
    return int(val[:-2])
  def add_from_doc(self, ustr, svgdoc):
    """Cleans the svg doc, tweaks the root svg element's
    attributes, then updates the font.  ustr is the character or ligature
    string, svgdoc is the svg document xml.  The doc must have a single
    svg root element."""
    # The svg element must have an id attribute of the form 'glyphNNN' where NNN
    # is the glyph id.  We capture the index of the glyph we're adding and write
    # it into the svg.
    #
    # We generate a transform that places the origin at the top left of the
    # ascent and uniformly scales it to fit both the font height (ascent -
    # descent) and glyph advance if it is already present.  The width and height
    # attributes are not used by rendering, so they are removed from the element
    # once we're done with them.
    cleaner = self.cleaner
    fbuilder = self.font_builder
    tree = cleaner.tree_from_text(svgdoc)
    cleaner.clean_tree(tree)
    name, index, exists = fbuilder.add_components_and_ligature(ustr)
    tree.attrs['id'] = 'glyph%s' % index
    image_width = self._get_int_px(tree.attrs.pop('width'))
    image_height = self._get_int_px(tree.attrs.pop('height'))
    scale = float(self.font_height) / image_height;
    if exists:
      width = fbuilder.hmtx[name][0]
      # Special case for preexisting zero advance, we scale to height.
      if width > 0:
        hscale = float(width) / image_width;
        if hscale < scale:
          scale = hscale
    transform = 'translate(0, -%s) scale(%s)' % (self.font_ascent, scale)
    tree.attrs['transform'] = transform
    svgdoc = cleaner.tree_to_text(tree)
    hmetrics = None
    if not exists:
      # horiz advance and lsb
      hmetrics = [int(round(image_width * scale)), 0]
    fbuilder.add_svg(svgdoc, hmetrics, name, index)
--- a/svg_cleaner.py
+++ b/svg_cleaner.py
@ -0,0 +1,254 @@
 #!/usr/bin/python
 # Copyright 2015 Google, Inc. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # Google Author(s): Doug Felt
 import argparse
 import codecs
 import os.path
 import re
 import sys
 from xml.parsers import expat
 from xml.sax import saxutils
 # Expat doesn't allow me to identify empty tags (in particular, with an
 # empty tag the parse location for the start and end is not the same) so I
 # have to take a dom-like approach if I want to identify them. There are a
 # lot of empty tags in svg.  This way I can do some other kinds of cleanup
 # as well (remove unnecessary 'g' elements, for instance).
 # Use nodes instead of tuples and strings because it's easier to mutate
 # a tree of these, and cleaner will want to do this.
 class _Elem_Node(object):
  def __init__(self, name, attrs, contents):
    self.name = name
    self.attrs = attrs
    self.contents = contents
  def __repr__(self):
    line = ["elem(name: '%s'" % self.name]
    if self.attrs:
      line.append(" attrs: '%s'" % self.attrs)
    if self.contents:
      line.append(" contents[%s]: '%s'" % (len(self.contents), self.contents))
    line.append(')')
    return ''.join(line)
 class _Text_Node(object):
  def __init__(self, text):
    self.text = text
  def __repr__(self):
    return "text('%s')" % self.text
 class SvgCleaner(object):
  """Strip out unwanted parts of an svg file, primarily the xml declaration and
  doctype lines, comments, and some attributes of the outermost <svg> element.
  The id will be replaced when it is inserted into the font.  viewBox causes
  unwanted scaling when used in a font and its effect is difficult to
  predict. version is unneeded, xml:space is ignored (we're processing spaces
  so a request to maintain them has no effect).  enable-background appears to
  have no effect.  x and y on the outermost svg element have no effect.  We
  keep width and height, and will elsewhere assume these are the dimensions
  used for the character box."""
  def __init__(self):
    self.reader = SvgCleaner._Reader()
    self.cleaner = SvgCleaner._Cleaner()
    self.writer = SvgCleaner._Writer()
  class _Reader(object):
    """Loosely based on fonttools's XMLReader.  This generates a tree of nodes,
    either element nodes or text nodes.  Successive text content is merged
    into one node, so contents will never contain more than one _Text_Node in
    a row.  This drops comments, xml declarations, and doctypes."""
    def _reset(self, parser):
      self._stack = []
      self._textbuf = []
    def _start_element(self, name, attrs):
      self._flush_textbuf()
      node = _Elem_Node(name, attrs, [])
      if len(self._stack):
        self._stack[-1].contents.append(node)
      self._stack.append(node)
    def _end_element(self, name):
      self._flush_textbuf()
      if len(self._stack) > 1:
        self._stack = self._stack[:-1]
    def _character_data(self, data):
      if len(self._stack):
        self._textbuf.append(data)
    def _flush_textbuf(self):
      if self._textbuf:
        node = _Text_Node(''.join(self._textbuf))
        self._stack[-1].contents.append(node)
        self._textbuf = []
    def from_text(self, data):
      """Return the root node of a tree representing the svg data."""
      parser = expat.ParserCreate()
      parser.StartElementHandler = self._start_element
      parser.EndElementHandler = self._end_element
      parser.CharacterDataHandler = self._character_data
      self._reset(parser)
      parser.Parse(data)
      return self._stack[0]
  class _Cleaner(object):
    def _clean_elem(self, node):
      nattrs = {}
      for k, v in node.attrs.items():
        if node.name == 'svg' and k in ['x', 'y', 'id', 'version', 'viewBox',
                                        'enable-background', 'xml:space']:
          continue
        v = re.sub('\s+', ' ', v)
        nattrs[k] = v
      node.attrs = nattrs
      # scan contents. remove any empty text nodes, or empty 'g' element nodes.
      # if a 'g' element has no attrs and only one subnode, replace it with the
      # subnode.
      wpos = 0
      for n in node.contents:
        if isinstance(n, _Text_Node):
          if not n.text:
            continue
        elif n.name == 'g':
          if not n.contents:
            continue
          if not n.attrs and len(n.contents) == 1:
            n = n.contents[0]
        node.contents[wpos] = n
        wpos += 1
      if wpos < len(node.contents):
        node.contents = node.contents[:wpos]
    def _clean_text(self, node):
      text = node.text.strip()
      # common case is text is empty (line endings between elements)
      if text:
        text = re.sub(r'\s+', ' ', text)
      node.text = text
    def clean(self, node):
      if isinstance(node, _Text_Node):
        self._clean_text(node)
      else:
        # do contents first, so we can check for empty subnodes after
        for n in node.contents:
          self.clean(n)
        self._clean_elem(node)
  class _Writer(object):
    """For text nodes, replaces sequences of whitespace with a single space.
    For elements, replaces sequences of whitespace in attributes, and
    removes unwanted attributes from <svg> elements."""
    def _write_node(self, node, lines, indent):
      """Node is a node generated by _Reader, either a TextNode or an
      ElementNode. Lines is a list to collect the lines of output.  Indent is
      the indentation level for this node."""
      if isinstance(node, _Text_Node):
        if node.text:
          lines.append(node.text)
      else:
        margin = '  ' * indent
        line = [margin]
        line.append('<%s' % node.name)
        for k in sorted(node.attrs.keys()):
          v = node.attrs[k]
          line.append(' %s=%s' % (k, saxutils.quoteattr(v)))
        if node.contents:
          line.append('>')
          lines.append(''.join(line))
          for elem in node.contents:
            self._write_node(elem, lines, indent + 1)
          line = [margin]
          line.append('</%s>' % node.name)
          lines.append(''.join(line))
        else:
          line.append('/>')
          lines.append(''.join(line))
    def to_text(self, root):
      # set up lines for recursive calls, let them append lines, then return
      # the result.
      lines = []
      self._write_node(root, lines, 0)
      return '\n'.join(lines)
  def tree_from_text(self, svg_text):
    return self.reader.from_text(svg_text)
  def clean_tree(self, svg_tree):
    self.cleaner.clean(svg_tree)
  def tree_to_text(self, svg_tree):
    return self.writer.to_text(svg_tree)
  def clean_svg(self, svg_text):
    """Return the cleaned svg_text."""
    tree = self.tree_from_text(svg_text)
    self.clean_tree(tree)
    return self.tree_to_text(tree)
 def clean_svg_files(in_dir, out_dir, match_pat=None, quiet=False):
  regex = re.compile(match_pat) if match_pat else None
  count = 0
  if not os.path.isdir(out_dir):
    os.makedirs(out_dir)
    if not quiet:
      print 'created output directory: %s' % out_dir
  cleaner = SvgCleaner()
  for file_name in os.listdir(in_dir):
    if regex and not regex.match(file_name):
      continue
    in_path = os.path.join(in_dir, file_name)
    with open(in_path) as in_fp:
      result = cleaner.clean_svg(in_fp.read())
    out_path = os.path.join(out_dir, file_name)
    with codecs.open(out_path, 'w', 'utf-8') as out_fp:
      if not quiet:
        print 'wrote: %s' % out_path
      out_fp.write(result)
      count += 1
  if not count:
    print 'failed to match any files'
  else:
    print 'processed %s files to %s' % (count, out_dir)
 def main():
  parser = argparse.ArgumentParser(
      description="Generate 'cleaned' svg files.")
  parser.add_argument('in_dir', help='Input directory.')
  parser.add_argument('out_dir', help='Output directory.')
  parser.add_argument('regex', help='Regex to select files, default matches all files.', default=None)
  parser.add_argument('--quiet', '-q', help='Quiet operation.', action='store_true')
  args = parser.parse_args()
  clean_svg_files(args.in_dir, args.out_dir, match_pat=args.regex, quiet=args.quiet)
 if __name__ == '__main__':
  main()