diff --git a/add_svg_glyphs.py b/add_svg_glyphs.py
new file mode 100755
index 000000000..413f0d398
--- /dev/null
+++ b/add_svg_glyphs.py
@@ -0,0 +1,287 @@
+#!/usr/bin/python
+# Copyright 2015 Google, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Google Author(s): Doug Felt
+
+"""Tool to update GSUB, hmtx, cmap, glyf tables with svg image glyphs."""
+
+import argparse
+import glob
+import os
+import re
+import sys
+
+# find the noto root, so we can get nototools
+# alternatively we could just define PYTHONPATH or always run this from
+# noto root, but for testing we might not always be doing that.
+_noto_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
+sys.path.append(_noto_root)
+
+from fontTools.ttLib.tables import otTables
+from fontTools.ttLib.tables import _g_l_y_f
+from fontTools.ttLib.tables import S_V_G_ as SVG
+from fontTools import ttx
+from nototools import add_emoji_gsub
+
+import svg_builder
+import svg_cleaner
+
+class FontBuilder(object):
+  """A utility for mutating a ttx font.  This maintains glyph_order, cmap, and hmtx tables,
+  and optionally GSUB, glyf, and SVN tables as well."""
+
+  def __init__(self, font):
+    self.font = font;
+    self.glyph_order = font.getGlyphOrder()
+    self.cmap = font['cmap'].tables[0].cmap
+    self.hmtx = font['hmtx'].metrics
+
+  def init_gsub(self):
+    """Call this if you are going to add ligatures to the font.  Creates a GSUB table
+    if there isn't one already."""
+
+    if hasattr(self, 'ligatures'):
+      return
+    font = self.font
+    if 'GSUB' not in font:
+      ligature_subst = otTables.LigatureSubst()
+      ligature_subst.ligatures = {}
+
+      lookup = otTables.Lookup()
+      lookup.LookupType = 4
+      lookup.LookupFlag = 0
+      lookup.SubTableCount = 1
+      lookup.SubTable = [ligature_subst]
+
+      font['GSUB'] = add_emoji_gsub.create_simple_gsub([lookup])
+    else:
+      lookup = font['GSUB'].table.LookupList.Lookup[0]
+      assert lookup.LookupType == 4
+      assert lookup.LookupFlag == 0
+    self.ligatures = lookup.SubTable[0].ligatures
+
+  def init_glyf(self):
+    """Call this if you need to create empty glyf entries in the font when you add a new
+    glyph."""
+
+    if hasattr(self, 'glyphs'):
+      return
+    font = self.font
+    if 'glyf' not in font:
+      glyf_table = _g_l_y_f.table__g_l_y_f()
+      glyf_table.glyphs = {}
+      glyf_table.glyphOrder = self.glyph_order
+      font['glyf'] = glyf_table
+    self.glyphs = font['glyf'].glyphs
+
+  def init_svg(self):
+    """Call this if you expect to add SVG images in the font. This calls init_glyf since SVG
+    support currently requires fallback glyf records for each SVG image."""
+
+    if hasattr(self, 'svgs'):
+      return
+
+    # svg requires glyf
+    self.init_glyf()
+
+    font = self.font
+    if 'SVG ' not in font:
+      svg_table = SVG.table_S_V_G_()
+      svg_table.docList = []
+      svg_table.colorPalettes = None
+      font['SVG '] = svg_table
+    self.svgs = font['SVG '].docList
+
+  def glyph_name(self, string):
+    return "_".join(["u%04X" % ord(char) for char in string])
+
+  def glyph_name_to_index(self, name):
+    return self.glyph_order.index(name) if name in self.glyph_order else -1;
+
+  def glyph_index_to_name(self, glyph_index):
+    if glyph_index < len(self.glyph_order):
+      return self.glyph_order[glyph_index]
+    return ''
+
+  def have_glyph(self, name):
+    return self.name_to_glyph_index >= 0
+
+  def _add_ligature(self, glyphstr):
+    lig = otTables.Ligature()
+    lig.CompCount = len(glyphstr)
+    lig.Component = [self.glyph_name(ch) for ch in glyphstr[1:]]
+    lig.LigGlyph = self.glyph_name(glyphstr)
+
+    first = self.glyph_name(glyphstr[0])
+    try:
+      self.ligatures[first].append(lig)
+    except KeyError:
+      self.ligatures[first] = [lig]
+
+  def _add_empty_glyph(self, glyphstr, name):
+    """Create an empty glyph. If glyphstr is not a ligature, add a cmap entry for it."""
+    if len(glyphstr) == 1:
+      self.cmap[ord(glyphstr)] = name
+    self.hmtx[name] = [0, 0]
+    self.glyph_order.append(name)
+    if hasattr(self, 'glyphs'):
+      self.glyphs[name] = _g_l_y_f.Glyph()
+
+  def add_components_and_ligature(self, glyphstr):
+    """Convert glyphstr to a name and check if it already exists. If not, check if it is a
+    ligature (longer than one codepoint), and if it is, generate empty glyphs with cmap
+    entries for any missing ligature components and add a ligature record.  Then generate
+    an empty glyph for the name.  Return a tuple with the name, index, and a bool
+    indicating whether the glyph already existed."""
+
+    name = self.glyph_name(glyphstr)
+    index = self.glyph_name_to_index(name)
+    exists = index >= 0
+    if not exists:
+      if len(glyphstr) > 1:
+        for char in glyphstr:
+          if ord(char) not in self.cmap:
+            char_name = self.glyph_name(char)
+            self._add_empty_glyph(char, char_name)
+        self._add_ligature(glyphstr)
+      index = len(self.glyph_order)
+      self._add_empty_glyph(glyphstr, name)
+    return name, index, exists
+
+  def add_svg(self, doc, hmetrics, name, index):
+    """Add an svg table entry. If hmetrics is not None, update the hmtx table. This
+    expects the glyph has already been added."""
+    # sanity check to make sure name and index correspond.
+    assert name == self.glyph_index_to_name(index)
+    if hmetrics:
+      self.hmtx[name] = hmetrics
+    svg_record = (doc, index, index) # startGlyphId, endGlyphId are the same
+    self.svgs.append(svg_record)
+
+
+def collect_glyphstr_file_pairs(prefix, ext, include=None, exclude=None, verbosity=1):
+  """Scan files with the given prefix and extension, and return a list of (glyphstr,
+  filename) where glyphstr is the character or ligature, and filename is the image file
+  associated with it.  The glyphstr is formed by decoding the filename (exclusive of the
+  prefix) as a sequence of hex codepoints separated by underscore. Include, if defined, is
+  a regex string to include only matched filenames. Exclude, if defined, is a regex string
+  to exclude matched filenames, and is applied after include."""
+
+  image_files = {}
+  glob_pat = "%s*.%s" % (prefix, ext)
+  leading = len(prefix)
+  trailing = len(ext) + 1 # include dot
+  if verbosity:
+    print "Looking for images matching '%s'." % glob_pat
+  ex_count = 0
+  ex = re.compile(exclude) if exclude else None
+  inc = re.compile(include) if include else None
+  if verbosity and inc:
+    print "Including images matching '%s'." % include
+  if verbosity and ex:
+    print "Excluding images matching '%s'." % exclude
+
+  for image_file in glob.glob(glob_pat):
+    if inc and not inc.search(image_file):
+      continue
+
+    if ex and ex.search(image_file):
+      if verbosity > 1:
+        print "Exclude %s" % image_file
+      ex_count += 1
+      continue
+
+    codes = image_file[leading:-trailing]
+    if "_" in codes:
+      pieces = codes.split ("_")
+      u = "".join ([unichr(int(code, 16)) for code in pieces])
+    else:
+      u = unichr(int(codes, 16))
+    image_files[u] = image_file
+
+  if verbosity and ex_count:
+    print "Excluded %d files." % ex_count
+  if not image_files:
+    raise Exception ("No image files matching '%s'." % glob_pat)
+  if verbosity:
+    print "Included %s files." % len(image_files)
+  return image_files.items()
+
+
+def sort_glyphstr_tuples(glyphstr_tuples):
+  """The list contains tuples whose first element is a string representing a character or
+  ligature.  It is sorted with shorter glyphstrs first, then alphabetically. This ensures
+  that ligature components are added to the font before any ligatures that contain them."""
+  glyphstr_tuples.sort(key=lambda t: (len(t[0]), t[0]))
+
+
+def add_image_glyphs(in_file, out_file, pairs, verbosity=1):
+  """Add images from pairs (glyphstr, filename) to .ttx file in_file and write
+  to .ttx file out_file."""
+
+  quiet = verbosity < 2
+  font = ttx.TTFont(quiet=quiet)
+  font.importXML(in_file, quiet=quiet)
+
+  sort_glyphstr_tuples(pairs)
+
+  font_builder = FontBuilder(font)
+  # we've already sorted by length, so the longest glyphstrs are at the end. To see if
+  # we have ligatures, we just need to check the last one.
+  if len(pairs[-1][0]) > 1:
+    font_builder.init_gsub()
+
+  img_builder = svg_builder.SvgBuilder(font_builder)
+  for glyphstr, filename in pairs:
+    if verbosity > 1:
+      print "Adding glyph for U+%s" % ",".join(["%04X" % ord(char) for char in glyphstr])
+    img_builder.add_from_filename(glyphstr, filename)
+
+  font.saveXML(out_file, quiet=quiet)
+  if verbosity:
+    print "added %s images to %s" % (len(pairs), out_file)
+
+
+def main(argv):
+  usage = """This will search for files that have image_prefix followed by one or more
+      hex numbers (separated by underscore if more than one), and end in ".svg".
+      For example, if image_prefix is "icons/u", then files with names like
+      "icons/u1F4A9.svg" or "icons/u1F1EF_1F1F5.svg" will be loaded.  The script
+      then adds cmap, htmx, and potentially GSUB entries for the Unicode
+      characters found.  The advance width will be chosen based on image aspect
+      ratio.  If Unicode values outside the BMP are desired, the existing cmap
+      table should be of the appropriate (format 12) type.  Only the first cmap
+      table and the first GSUB lookup (if existing) are modified."""
+
+  parser = argparse.ArgumentParser(
+      description="Update cmap, glyf, GSUB, and hmtx tables from image glyphs.", epilog=usage)
+  parser.add_argument('in_file', help="Input ttx file name.")
+  parser.add_argument('out_file', help="Output ttx file name.")
+  parser.add_argument('image_prefix', help="Location and prefix of image files.")
+  parser.add_argument('-i', '--include', help='include files whoses name matches this regex')
+  parser.add_argument('-e', '--exclude', help='exclude files whose name matches this regex')
+  parser.add_argument('--quiet', '-q', dest='v', help="quiet operation.", default=1,
+                      action='store_const', const=0)
+  parser.add_argument('--verbose', '-v', dest='v', help="verbose operation.",
+                      action='store_const', const=2)
+  args = parser.parse_args(argv)
+
+  pairs = collect_glyphstr_file_pairs(args.image_prefix, 'svg', include=args.include,
+                                      exclude=args.exclude, verbosity=args.v)
+  add_image_glyphs(args.in_file, args.out_file, pairs, verbosity=args.v)
+
+
+if __name__ == '__main__':
+  main(sys.argv[1:])
diff --git a/generate_test_html.py b/generate_test_html.py
new file mode 100755
index 000000000..d4df53713
--- /dev/null
+++ b/generate_test_html.py
@@ -0,0 +1,196 @@
+#!/usr/bin/python
+# Copyright 2015 Google, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Google Author(s): Doug Felt
+
+import argparse
+import os
+import os.path
+import re
+import sys
+
+from fontTools import ttx
+
+import add_svg_glyphs
+
+def do_generate_test_html(font_basename, pairs, glyph=None, verbosity=1):
+  header = r"""<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<style type="text/css">
+@font-face { font-family: svgfont; src: url("%s") }
+body { font-family: sans-serif; font-size: 24px }
+#emoji span { font-family: svgfont, sans-serif }
+#panel { font-family: svgfont, sans-serif; font-size: 256px }
+#paneltitle { font-family: sans-serif; font-size: 36px }
+</style>
+<script type="text/javascript">
+function hexify(text) {
+  var surr_offset = 0x10000 - (0xd800 << 10) - 0xdc00
+  var str = new String(text.trim())
+  var len = str.length
+  var result = ""
+  for (var i = 0; i < len; ++i) {
+    var cp = str.charCodeAt(i)
+    if (cp >= 0xd800 && cp < 0xdc00 && i < len - 1) {
+      ncp = str.charCodeAt(i+1)
+      if (ncp >= 0xdc00 && ncp < 0xe000) {
+        cp = (cp << 10) + ncp + surr_offset
+        ++i;
+      }
+    }
+    result += " 0x" + cp.toString(16)
+  }
+  return result
+};
+
+function showText(event) {
+  var text = event.target.textContent
+  var p = document.getElementById('panel')
+  p.textContent = text
+  p = document.getElementById('paneltitle')
+  p.textContent = hexify(text)
+};
+
+function setup() {
+  var t = document.getElementById('emoji')
+  var tdlist = t.getElementsByTagName('span')
+  for (var i = 0, lim = tdlist.length; i < lim; ++i) {
+    var e = tdlist[i]
+    e.onmouseover = showText
+  }
+};
+</script>
+</head>"""
+
+  body_head = r"""<body onload="setup();">
+<p>Test for SVG glyphs in %(font)s.  It uses the proposed
+<a href="http://lists.w3.org/Archives/Public/public-svgopentype/2013Jul/0003.html">SVG-in-OpenType format</a>.
+View using Firefox&nbsp;26 and later.
+<div style="float:left; text-align:center; margin:0 10px">
+<div id='panel' style="margin-left:auto; margin-right:auto">%(glyph)s</div>
+<div id='paneltitle' style="margin-left:auto; margin-right:auto">%(glyph_hex)s</div>
+</div>
+<div id='emoji'><p>"""
+
+
+  body_tail = r"""</div>
+</body>
+</html>
+"""
+
+  font_name = font_basename + ".woff"
+  html_name = font_basename + "_test.html"
+
+  found_initial_glyph = False
+  initial_glyph_str = None;
+  initial_glyph_hex = None;
+  text_parts = []
+  for glyphstr, _ in pairs:
+    name_parts = []
+    hex_parts = []
+    for cp in glyphstr:
+      hex_str = hex(ord(cp))
+      name_parts.append('&#x%s;' % hex_str[2:])
+      hex_parts.append(hex_str)
+    glyph_str = ''.join(name_parts)
+
+    if not found_initial_glyph:
+      if not glyph or glyph_str == glyph:
+        initial_glyph_str = glyph_str
+        initial_glyph_hex = ' '.join(hex_parts)
+        found_initial_glyph = True
+      elif not initial_glyph_str:
+        initial_glyph_str = glyph_str
+        initial_glyph_hex = ' '.join(hex_parts)
+
+    text = '<span>%s</span>' % glyph_str
+    text_parts.append(text)
+
+  if verbosity and glyph and not found_initial_glyph:
+    print "Did not find glyph '%s', using initial glyph '%s'" % (glyph, initial_glyph_str)
+  elif verbosity > 1 and not glyph:
+    print "Using initial glyph '%s'" % initial_glyph_str
+
+  lines = [header % font_name]
+  lines.append(body_head % {'font':font_name, 'glyph':initial_glyph_str,
+                            'glyph_hex':initial_glyph_hex})
+  lines.extend(text_parts) # we'll end up with space between each emoji
+  lines.append(body_tail)
+  output = '\n'.join(lines)
+  with open(html_name, 'w') as fp:
+    fp.write(output)
+  if verbosity:
+    print 'Wrote ' + html_name
+
+
+def do_generate_fonts(template_file, font_basename, pairs, reuse=False, verbosity=1):
+  out_woff = font_basename + '.woff'
+  if reuse and os.path.isfile(out_woff) and os.access(out_woff, os.R_OK):
+    if verbosity:
+      print 'Reusing ' + out_woff
+    return
+
+  out_ttx = font_basename + '.ttx'
+  add_svg_glyphs.add_image_glyphs(template_file, out_ttx, pairs, verbosity=verbosity)
+
+  quiet=verbosity < 2
+  font = ttx.TTFont(flavor='woff', quiet=quiet)
+  font.importXML(out_ttx, quiet=quiet)
+  font.save(out_woff)
+  if verbosity:
+    print 'Wrote ' + out_woff
+
+
+def main(argv):
+  usage = """This will search for files that have image_prefix followed by one or more
+      hex numbers (separated by underscore if more than one), and end in ".svg".
+      For example, if image_prefix is "icons/u", then files with names like
+      "icons/u1F4A9.svg" or "icons/u1F1EF_1F1F5.svg" will be found. It generates
+      an SVG font from this, converts it to woff, and also generates an html test
+      page containing text for all the SVG glyphs."""
+
+  parser = argparse.ArgumentParser(
+      description='Generate font and html test file.', epilog=usage)
+  parser.add_argument('template_file', help='name of template .ttx file')
+  parser.add_argument('image_prefix', help='location and prefix of image files')
+  parser.add_argument('-i', '--include', help='include files whoses name matches this regex')
+  parser.add_argument('-e', '--exclude', help='exclude files whose name matches this regex')
+  parser.add_argument('-o', '--out_basename', help='base name of (ttx, woff, html) files to generate, '
+                      'defaults to the template base name')
+  parser.add_argument('-g', '--glyph', help='set the initial glyph text (html encoded string), '
+                      'defaults to first glyph')
+  parser.add_argument('-r', '--reuse_font', help='use existing woff font', action='store_true')
+  parser.add_argument('-q', '--quiet', dest='v', help='quiet operation', default=1,
+                      action='store_const', const=0)
+  parser.add_argument('-v', '--verbose', dest='v', help='verbose operation',
+                      action='store_const', const=2)
+  args = parser.parse_args(argv)
+
+  pairs = add_svg_glyphs.collect_glyphstr_file_pairs(
+    args.image_prefix, 'svg', include=args.include, exclude=args.exclude, verbosity=args.v)
+  add_svg_glyphs.sort_glyphstr_tuples(pairs)
+
+  out_basename = args.out_basename
+  if not out_basename:
+    out_basename = args.template_file.split('.')[0] # exclude e.g. '.tmpl.ttx'
+    if args.v:
+      print "Output basename is %s." % out_basename
+  do_generate_fonts(args.template_file, out_basename, pairs, reuse=args.reuse_font, verbosity=args.v)
+  do_generate_test_html(out_basename, pairs, glyph=args.glyph, verbosity=args.v)
+
+if __name__ == '__main__':
+  main(sys.argv[1:])
diff --git a/svg_builder.py b/svg_builder.py
new file mode 100755
index 000000000..4684fc963
--- /dev/null
+++ b/svg_builder.py
@@ -0,0 +1,107 @@
+# Copyright 2015 Google, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Google Author(s): Doug Felt
+
+import svg_cleaner
+
+class SvgBuilder(object):
+  """Modifies a font to add SVG glyphs from a document or string.  Once built you
+  can call add_from_filename or add_from_doc multiple times to add SVG
+  documents, which should contain a single root svg element representing the glyph.
+  This element must have width and height attributes (in px), these are used to
+  determine how to scale the glyph.  The svg should be designed to fit inside
+  this bounds and have its origin at the top left.  Adding the svg generates a
+  transform to scale and position the glyph, so the svg element should not have
+  a transform attribute since it will be overwritten.  Any id attribute on the
+  glyph is also overwritten.
+
+  Adding a glyph can generate additional default glyphs for components of a
+  ligature that are not already present.
+
+  It is possible to add SVG images to a font that already has corresponding
+  glyphs.  If a glyph exists already, then its hmtx advance is assumed valid.
+  Otherwise we will generate an advance based on the image's width and scale
+  factor.  Callers should ensure that glyphs for components of ligatures are
+  added before the ligatures themselves, otherwise glyphs generated for missing
+  ligature components will be assigned zero metrics metrics that will not be
+  overridden later."""
+
+  def __init__(self, font_builder):
+    font_builder.init_svg()
+
+    self.font_builder = font_builder
+    self.cleaner = svg_cleaner.SvgCleaner()
+
+    font = font_builder.font
+    self.font_ascent = font['hhea'].ascent
+    self.font_height = self.font_ascent - font['hhea'].descent
+    self.font_upem = font['head'].unitsPerEm
+
+  def add_from_filename(self, ustr, filename):
+    with open(filename, "r") as fp:
+      return self.add_from_doc(ustr, fp.read())
+
+  def _get_int_px(self, val):
+    if not val.lower().endswith('px'):
+      raise "expected width or height ending in 'px' but got: %s" % val
+    return int(val[:-2])
+
+  def add_from_doc(self, ustr, svgdoc):
+    """Cleans the svg doc, tweaks the root svg element's
+    attributes, then updates the font.  ustr is the character or ligature
+    string, svgdoc is the svg document xml.  The doc must have a single
+    svg root element."""
+
+    # The svg element must have an id attribute of the form 'glyphNNN' where NNN
+    # is the glyph id.  We capture the index of the glyph we're adding and write
+    # it into the svg.
+    #
+    # We generate a transform that places the origin at the top left of the
+    # ascent and uniformly scales it to fit both the font height (ascent -
+    # descent) and glyph advance if it is already present.  The width and height
+    # attributes are not used by rendering, so they are removed from the element
+    # once we're done with them.
+
+    cleaner = self.cleaner
+    fbuilder = self.font_builder
+
+    tree = cleaner.tree_from_text(svgdoc)
+    cleaner.clean_tree(tree)
+
+    name, index, exists = fbuilder.add_components_and_ligature(ustr)
+
+    tree.attrs['id'] = 'glyph%s' % index
+
+    image_width = self._get_int_px(tree.attrs.pop('width'))
+    image_height = self._get_int_px(tree.attrs.pop('height'))
+    scale = float(self.font_height) / image_height;
+    if exists:
+      width = fbuilder.hmtx[name][0]
+      # Special case for preexisting zero advance, we scale to height.
+      if width > 0:
+        hscale = float(width) / image_width;
+        if hscale < scale:
+          scale = hscale
+
+    transform = 'translate(0, -%s) scale(%s)' % (self.font_ascent, scale)
+    tree.attrs['transform'] = transform
+
+    svgdoc = cleaner.tree_to_text(tree)
+
+    hmetrics = None
+    if not exists:
+      # horiz advance and lsb
+      hmetrics = [int(round(image_width * scale)), 0]
+    fbuilder.add_svg(svgdoc, hmetrics, name, index)
diff --git a/svg_cleaner.py b/svg_cleaner.py
new file mode 100755
index 000000000..aaff92760
--- /dev/null
+++ b/svg_cleaner.py
@@ -0,0 +1,254 @@
+#!/usr/bin/python
+# Copyright 2015 Google, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Google Author(s): Doug Felt
+
+import argparse
+import codecs
+import os.path
+import re
+import sys
+from xml.parsers import expat
+from xml.sax import saxutils
+
+# Expat doesn't allow me to identify empty tags (in particular, with an
+# empty tag the parse location for the start and end is not the same) so I
+# have to take a dom-like approach if I want to identify them. There are a
+# lot of empty tags in svg.  This way I can do some other kinds of cleanup
+# as well (remove unnecessary 'g' elements, for instance).
+
+# Use nodes instead of tuples and strings because it's easier to mutate
+# a tree of these, and cleaner will want to do this.
+
+class _Elem_Node(object):
+  def __init__(self, name, attrs, contents):
+    self.name = name
+    self.attrs = attrs
+    self.contents = contents
+
+  def __repr__(self):
+    line = ["elem(name: '%s'" % self.name]
+    if self.attrs:
+      line.append(" attrs: '%s'" % self.attrs)
+    if self.contents:
+      line.append(" contents[%s]: '%s'" % (len(self.contents), self.contents))
+    line.append(')')
+    return ''.join(line)
+
+class _Text_Node(object):
+  def __init__(self, text):
+    self.text = text
+
+  def __repr__(self):
+    return "text('%s')" % self.text
+
+class SvgCleaner(object):
+  """Strip out unwanted parts of an svg file, primarily the xml declaration and
+  doctype lines, comments, and some attributes of the outermost <svg> element.
+  The id will be replaced when it is inserted into the font.  viewBox causes
+  unwanted scaling when used in a font and its effect is difficult to
+  predict. version is unneeded, xml:space is ignored (we're processing spaces
+  so a request to maintain them has no effect).  enable-background appears to
+  have no effect.  x and y on the outermost svg element have no effect.  We
+  keep width and height, and will elsewhere assume these are the dimensions
+  used for the character box."""
+
+  def __init__(self):
+    self.reader = SvgCleaner._Reader()
+    self.cleaner = SvgCleaner._Cleaner()
+    self.writer = SvgCleaner._Writer()
+
+  class _Reader(object):
+    """Loosely based on fonttools's XMLReader.  This generates a tree of nodes,
+    either element nodes or text nodes.  Successive text content is merged
+    into one node, so contents will never contain more than one _Text_Node in
+    a row.  This drops comments, xml declarations, and doctypes."""
+
+    def _reset(self, parser):
+      self._stack = []
+      self._textbuf = []
+
+    def _start_element(self, name, attrs):
+      self._flush_textbuf()
+      node = _Elem_Node(name, attrs, [])
+      if len(self._stack):
+        self._stack[-1].contents.append(node)
+      self._stack.append(node)
+
+    def _end_element(self, name):
+      self._flush_textbuf()
+      if len(self._stack) > 1:
+        self._stack = self._stack[:-1]
+
+    def _character_data(self, data):
+      if len(self._stack):
+        self._textbuf.append(data)
+
+    def _flush_textbuf(self):
+      if self._textbuf:
+        node = _Text_Node(''.join(self._textbuf))
+        self._stack[-1].contents.append(node)
+        self._textbuf = []
+
+    def from_text(self, data):
+      """Return the root node of a tree representing the svg data."""
+
+      parser = expat.ParserCreate()
+      parser.StartElementHandler = self._start_element
+      parser.EndElementHandler = self._end_element
+      parser.CharacterDataHandler = self._character_data
+      self._reset(parser)
+      parser.Parse(data)
+      return self._stack[0]
+
+  class _Cleaner(object):
+    def _clean_elem(self, node):
+      nattrs = {}
+      for k, v in node.attrs.items():
+        if node.name == 'svg' and k in ['x', 'y', 'id', 'version', 'viewBox',
+                                        'enable-background', 'xml:space']:
+          continue
+        v = re.sub('\s+', ' ', v)
+        nattrs[k] = v
+      node.attrs = nattrs
+
+      # scan contents. remove any empty text nodes, or empty 'g' element nodes.
+      # if a 'g' element has no attrs and only one subnode, replace it with the
+      # subnode.
+      wpos = 0
+      for n in node.contents:
+        if isinstance(n, _Text_Node):
+          if not n.text:
+            continue
+        elif n.name == 'g':
+          if not n.contents:
+            continue
+          if not n.attrs and len(n.contents) == 1:
+            n = n.contents[0]
+        node.contents[wpos] = n
+        wpos += 1
+      if wpos < len(node.contents):
+        node.contents = node.contents[:wpos]
+
+    def _clean_text(self, node):
+      text = node.text.strip()
+      # common case is text is empty (line endings between elements)
+      if text:
+        text = re.sub(r'\s+', ' ', text)
+      node.text = text
+
+    def clean(self, node):
+      if isinstance(node, _Text_Node):
+        self._clean_text(node)
+      else:
+        # do contents first, so we can check for empty subnodes after
+        for n in node.contents:
+          self.clean(n)
+        self._clean_elem(node)
+
+  class _Writer(object):
+    """For text nodes, replaces sequences of whitespace with a single space.
+    For elements, replaces sequences of whitespace in attributes, and
+    removes unwanted attributes from <svg> elements."""
+
+    def _write_node(self, node, lines, indent):
+      """Node is a node generated by _Reader, either a TextNode or an
+      ElementNode. Lines is a list to collect the lines of output.  Indent is
+      the indentation level for this node."""
+
+      if isinstance(node, _Text_Node):
+        if node.text:
+          lines.append(node.text)
+      else:
+        margin = '  ' * indent
+        line = [margin]
+        line.append('<%s' % node.name)
+        for k in sorted(node.attrs.keys()):
+          v = node.attrs[k]
+          line.append(' %s=%s' % (k, saxutils.quoteattr(v)))
+        if node.contents:
+          line.append('>')
+          lines.append(''.join(line))
+          for elem in node.contents:
+            self._write_node(elem, lines, indent + 1)
+          line = [margin]
+          line.append('</%s>' % node.name)
+          lines.append(''.join(line))
+        else:
+          line.append('/>')
+          lines.append(''.join(line))
+
+    def to_text(self, root):
+      # set up lines for recursive calls, let them append lines, then return
+      # the result.
+      lines = []
+      self._write_node(root, lines, 0)
+      return '\n'.join(lines)
+
+  def tree_from_text(self, svg_text):
+    return self.reader.from_text(svg_text)
+
+  def clean_tree(self, svg_tree):
+    self.cleaner.clean(svg_tree)
+
+  def tree_to_text(self, svg_tree):
+    return self.writer.to_text(svg_tree)
+
+  def clean_svg(self, svg_text):
+    """Return the cleaned svg_text."""
+    tree = self.tree_from_text(svg_text)
+    self.clean_tree(tree)
+    return self.tree_to_text(tree)
+
+
+def clean_svg_files(in_dir, out_dir, match_pat=None, quiet=False):
+  regex = re.compile(match_pat) if match_pat else None
+  count = 0
+  if not os.path.isdir(out_dir):
+    os.makedirs(out_dir)
+    if not quiet:
+      print 'created output directory: %s' % out_dir
+  cleaner = SvgCleaner()
+  for file_name in os.listdir(in_dir):
+    if regex and not regex.match(file_name):
+      continue
+    in_path = os.path.join(in_dir, file_name)
+    with open(in_path) as in_fp:
+      result = cleaner.clean_svg(in_fp.read())
+    out_path = os.path.join(out_dir, file_name)
+    with codecs.open(out_path, 'w', 'utf-8') as out_fp:
+      if not quiet:
+        print 'wrote: %s' % out_path
+      out_fp.write(result)
+      count += 1
+  if not count:
+    print 'failed to match any files'
+  else:
+    print 'processed %s files to %s' % (count, out_dir)
+
+
+def main():
+  parser = argparse.ArgumentParser(
+      description="Generate 'cleaned' svg files.")
+  parser.add_argument('in_dir', help='Input directory.')
+  parser.add_argument('out_dir', help='Output directory.')
+  parser.add_argument('regex', help='Regex to select files, default matches all files.', default=None)
+  parser.add_argument('--quiet', '-q', help='Quiet operation.', action='store_true')
+  args = parser.parse_args()
+  clean_svg_files(args.in_dir, args.out_dir, match_pat=args.regex, quiet=args.quiet)
+
+
+if __name__ == '__main__':
+  main()