Move the new files out of third_party/color_emoji and into color_emoji. The

third_party subdir is for forks/clones of external projects.  The svg code
isn't a modification to that project, but an addition to the font-building
code in noto/color_emoji.

This also fixes a slight bug in the html generation, which set the default
large glyph image but forgot to set the hex version of the text below it.
pull/13/head
Doug Felt 2015-02-20 14:58:34 -08:00
parent d66b41255a
commit 69913fd3fc
4 changed files with 844 additions and 0 deletions

287
add_svg_glyphs.py 100755
View File

@ -0,0 +1,287 @@
#!/usr/bin/python
# Copyright 2015 Google, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Google Author(s): Doug Felt
"""Tool to update GSUB, hmtx, cmap, glyf tables with svg image glyphs."""
import argparse
import glob
import os
import re
import sys
# find the noto root, so we can get nototools
# alternatively we could just define PYTHONPATH or always run this from
# noto root, but for testing we might not always be doing that.
_noto_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
sys.path.append(_noto_root)
from fontTools.ttLib.tables import otTables
from fontTools.ttLib.tables import _g_l_y_f
from fontTools.ttLib.tables import S_V_G_ as SVG
from fontTools import ttx
from nototools import add_emoji_gsub
import svg_builder
import svg_cleaner
class FontBuilder(object):
"""A utility for mutating a ttx font. This maintains glyph_order, cmap, and hmtx tables,
and optionally GSUB, glyf, and SVN tables as well."""
def __init__(self, font):
self.font = font;
self.glyph_order = font.getGlyphOrder()
self.cmap = font['cmap'].tables[0].cmap
self.hmtx = font['hmtx'].metrics
def init_gsub(self):
"""Call this if you are going to add ligatures to the font. Creates a GSUB table
if there isn't one already."""
if hasattr(self, 'ligatures'):
return
font = self.font
if 'GSUB' not in font:
ligature_subst = otTables.LigatureSubst()
ligature_subst.ligatures = {}
lookup = otTables.Lookup()
lookup.LookupType = 4
lookup.LookupFlag = 0
lookup.SubTableCount = 1
lookup.SubTable = [ligature_subst]
font['GSUB'] = add_emoji_gsub.create_simple_gsub([lookup])
else:
lookup = font['GSUB'].table.LookupList.Lookup[0]
assert lookup.LookupType == 4
assert lookup.LookupFlag == 0
self.ligatures = lookup.SubTable[0].ligatures
def init_glyf(self):
"""Call this if you need to create empty glyf entries in the font when you add a new
glyph."""
if hasattr(self, 'glyphs'):
return
font = self.font
if 'glyf' not in font:
glyf_table = _g_l_y_f.table__g_l_y_f()
glyf_table.glyphs = {}
glyf_table.glyphOrder = self.glyph_order
font['glyf'] = glyf_table
self.glyphs = font['glyf'].glyphs
def init_svg(self):
"""Call this if you expect to add SVG images in the font. This calls init_glyf since SVG
support currently requires fallback glyf records for each SVG image."""
if hasattr(self, 'svgs'):
return
# svg requires glyf
self.init_glyf()
font = self.font
if 'SVG ' not in font:
svg_table = SVG.table_S_V_G_()
svg_table.docList = []
svg_table.colorPalettes = None
font['SVG '] = svg_table
self.svgs = font['SVG '].docList
def glyph_name(self, string):
return "_".join(["u%04X" % ord(char) for char in string])
def glyph_name_to_index(self, name):
return self.glyph_order.index(name) if name in self.glyph_order else -1;
def glyph_index_to_name(self, glyph_index):
if glyph_index < len(self.glyph_order):
return self.glyph_order[glyph_index]
return ''
def have_glyph(self, name):
return self.name_to_glyph_index >= 0
def _add_ligature(self, glyphstr):
lig = otTables.Ligature()
lig.CompCount = len(glyphstr)
lig.Component = [self.glyph_name(ch) for ch in glyphstr[1:]]
lig.LigGlyph = self.glyph_name(glyphstr)
first = self.glyph_name(glyphstr[0])
try:
self.ligatures[first].append(lig)
except KeyError:
self.ligatures[first] = [lig]
def _add_empty_glyph(self, glyphstr, name):
"""Create an empty glyph. If glyphstr is not a ligature, add a cmap entry for it."""
if len(glyphstr) == 1:
self.cmap[ord(glyphstr)] = name
self.hmtx[name] = [0, 0]
self.glyph_order.append(name)
if hasattr(self, 'glyphs'):
self.glyphs[name] = _g_l_y_f.Glyph()
def add_components_and_ligature(self, glyphstr):
"""Convert glyphstr to a name and check if it already exists. If not, check if it is a
ligature (longer than one codepoint), and if it is, generate empty glyphs with cmap
entries for any missing ligature components and add a ligature record. Then generate
an empty glyph for the name. Return a tuple with the name, index, and a bool
indicating whether the glyph already existed."""
name = self.glyph_name(glyphstr)
index = self.glyph_name_to_index(name)
exists = index >= 0
if not exists:
if len(glyphstr) > 1:
for char in glyphstr:
if ord(char) not in self.cmap:
char_name = self.glyph_name(char)
self._add_empty_glyph(char, char_name)
self._add_ligature(glyphstr)
index = len(self.glyph_order)
self._add_empty_glyph(glyphstr, name)
return name, index, exists
def add_svg(self, doc, hmetrics, name, index):
"""Add an svg table entry. If hmetrics is not None, update the hmtx table. This
expects the glyph has already been added."""
# sanity check to make sure name and index correspond.
assert name == self.glyph_index_to_name(index)
if hmetrics:
self.hmtx[name] = hmetrics
svg_record = (doc, index, index) # startGlyphId, endGlyphId are the same
self.svgs.append(svg_record)
def collect_glyphstr_file_pairs(prefix, ext, include=None, exclude=None, verbosity=1):
"""Scan files with the given prefix and extension, and return a list of (glyphstr,
filename) where glyphstr is the character or ligature, and filename is the image file
associated with it. The glyphstr is formed by decoding the filename (exclusive of the
prefix) as a sequence of hex codepoints separated by underscore. Include, if defined, is
a regex string to include only matched filenames. Exclude, if defined, is a regex string
to exclude matched filenames, and is applied after include."""
image_files = {}
glob_pat = "%s*.%s" % (prefix, ext)
leading = len(prefix)
trailing = len(ext) + 1 # include dot
if verbosity:
print "Looking for images matching '%s'." % glob_pat
ex_count = 0
ex = re.compile(exclude) if exclude else None
inc = re.compile(include) if include else None
if verbosity and inc:
print "Including images matching '%s'." % include
if verbosity and ex:
print "Excluding images matching '%s'." % exclude
for image_file in glob.glob(glob_pat):
if inc and not inc.search(image_file):
continue
if ex and ex.search(image_file):
if verbosity > 1:
print "Exclude %s" % image_file
ex_count += 1
continue
codes = image_file[leading:-trailing]
if "_" in codes:
pieces = codes.split ("_")
u = "".join ([unichr(int(code, 16)) for code in pieces])
else:
u = unichr(int(codes, 16))
image_files[u] = image_file
if verbosity and ex_count:
print "Excluded %d files." % ex_count
if not image_files:
raise Exception ("No image files matching '%s'." % glob_pat)
if verbosity:
print "Included %s files." % len(image_files)
return image_files.items()
def sort_glyphstr_tuples(glyphstr_tuples):
"""The list contains tuples whose first element is a string representing a character or
ligature. It is sorted with shorter glyphstrs first, then alphabetically. This ensures
that ligature components are added to the font before any ligatures that contain them."""
glyphstr_tuples.sort(key=lambda t: (len(t[0]), t[0]))
def add_image_glyphs(in_file, out_file, pairs, verbosity=1):
"""Add images from pairs (glyphstr, filename) to .ttx file in_file and write
to .ttx file out_file."""
quiet = verbosity < 2
font = ttx.TTFont(quiet=quiet)
font.importXML(in_file, quiet=quiet)
sort_glyphstr_tuples(pairs)
font_builder = FontBuilder(font)
# we've already sorted by length, so the longest glyphstrs are at the end. To see if
# we have ligatures, we just need to check the last one.
if len(pairs[-1][0]) > 1:
font_builder.init_gsub()
img_builder = svg_builder.SvgBuilder(font_builder)
for glyphstr, filename in pairs:
if verbosity > 1:
print "Adding glyph for U+%s" % ",".join(["%04X" % ord(char) for char in glyphstr])
img_builder.add_from_filename(glyphstr, filename)
font.saveXML(out_file, quiet=quiet)
if verbosity:
print "added %s images to %s" % (len(pairs), out_file)
def main(argv):
usage = """This will search for files that have image_prefix followed by one or more
hex numbers (separated by underscore if more than one), and end in ".svg".
For example, if image_prefix is "icons/u", then files with names like
"icons/u1F4A9.svg" or "icons/u1F1EF_1F1F5.svg" will be loaded. The script
then adds cmap, htmx, and potentially GSUB entries for the Unicode
characters found. The advance width will be chosen based on image aspect
ratio. If Unicode values outside the BMP are desired, the existing cmap
table should be of the appropriate (format 12) type. Only the first cmap
table and the first GSUB lookup (if existing) are modified."""
parser = argparse.ArgumentParser(
description="Update cmap, glyf, GSUB, and hmtx tables from image glyphs.", epilog=usage)
parser.add_argument('in_file', help="Input ttx file name.")
parser.add_argument('out_file', help="Output ttx file name.")
parser.add_argument('image_prefix', help="Location and prefix of image files.")
parser.add_argument('-i', '--include', help='include files whoses name matches this regex')
parser.add_argument('-e', '--exclude', help='exclude files whose name matches this regex')
parser.add_argument('--quiet', '-q', dest='v', help="quiet operation.", default=1,
action='store_const', const=0)
parser.add_argument('--verbose', '-v', dest='v', help="verbose operation.",
action='store_const', const=2)
args = parser.parse_args(argv)
pairs = collect_glyphstr_file_pairs(args.image_prefix, 'svg', include=args.include,
exclude=args.exclude, verbosity=args.v)
add_image_glyphs(args.in_file, args.out_file, pairs, verbosity=args.v)
if __name__ == '__main__':
main(sys.argv[1:])

View File

@ -0,0 +1,196 @@
#!/usr/bin/python
# Copyright 2015 Google, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Google Author(s): Doug Felt
import argparse
import os
import os.path
import re
import sys
from fontTools import ttx
import add_svg_glyphs
def do_generate_test_html(font_basename, pairs, glyph=None, verbosity=1):
header = r"""<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<style type="text/css">
@font-face { font-family: svgfont; src: url("%s") }
body { font-family: sans-serif; font-size: 24px }
#emoji span { font-family: svgfont, sans-serif }
#panel { font-family: svgfont, sans-serif; font-size: 256px }
#paneltitle { font-family: sans-serif; font-size: 36px }
</style>
<script type="text/javascript">
function hexify(text) {
var surr_offset = 0x10000 - (0xd800 << 10) - 0xdc00
var str = new String(text.trim())
var len = str.length
var result = ""
for (var i = 0; i < len; ++i) {
var cp = str.charCodeAt(i)
if (cp >= 0xd800 && cp < 0xdc00 && i < len - 1) {
ncp = str.charCodeAt(i+1)
if (ncp >= 0xdc00 && ncp < 0xe000) {
cp = (cp << 10) + ncp + surr_offset
++i;
}
}
result += " 0x" + cp.toString(16)
}
return result
};
function showText(event) {
var text = event.target.textContent
var p = document.getElementById('panel')
p.textContent = text
p = document.getElementById('paneltitle')
p.textContent = hexify(text)
};
function setup() {
var t = document.getElementById('emoji')
var tdlist = t.getElementsByTagName('span')
for (var i = 0, lim = tdlist.length; i < lim; ++i) {
var e = tdlist[i]
e.onmouseover = showText
}
};
</script>
</head>"""
body_head = r"""<body onload="setup();">
<p>Test for SVG glyphs in %(font)s. It uses the proposed
<a href="http://lists.w3.org/Archives/Public/public-svgopentype/2013Jul/0003.html">SVG-in-OpenType format</a>.
View using Firefox&nbsp;26 and later.
<div style="float:left; text-align:center; margin:0 10px">
<div id='panel' style="margin-left:auto; margin-right:auto">%(glyph)s</div>
<div id='paneltitle' style="margin-left:auto; margin-right:auto">%(glyph_hex)s</div>
</div>
<div id='emoji'><p>"""
body_tail = r"""</div>
</body>
</html>
"""
font_name = font_basename + ".woff"
html_name = font_basename + "_test.html"
found_initial_glyph = False
initial_glyph_str = None;
initial_glyph_hex = None;
text_parts = []
for glyphstr, _ in pairs:
name_parts = []
hex_parts = []
for cp in glyphstr:
hex_str = hex(ord(cp))
name_parts.append('&#x%s;' % hex_str[2:])
hex_parts.append(hex_str)
glyph_str = ''.join(name_parts)
if not found_initial_glyph:
if not glyph or glyph_str == glyph:
initial_glyph_str = glyph_str
initial_glyph_hex = ' '.join(hex_parts)
found_initial_glyph = True
elif not initial_glyph_str:
initial_glyph_str = glyph_str
initial_glyph_hex = ' '.join(hex_parts)
text = '<span>%s</span>' % glyph_str
text_parts.append(text)
if verbosity and glyph and not found_initial_glyph:
print "Did not find glyph '%s', using initial glyph '%s'" % (glyph, initial_glyph_str)
elif verbosity > 1 and not glyph:
print "Using initial glyph '%s'" % initial_glyph_str
lines = [header % font_name]
lines.append(body_head % {'font':font_name, 'glyph':initial_glyph_str,
'glyph_hex':initial_glyph_hex})
lines.extend(text_parts) # we'll end up with space between each emoji
lines.append(body_tail)
output = '\n'.join(lines)
with open(html_name, 'w') as fp:
fp.write(output)
if verbosity:
print 'Wrote ' + html_name
def do_generate_fonts(template_file, font_basename, pairs, reuse=False, verbosity=1):
out_woff = font_basename + '.woff'
if reuse and os.path.isfile(out_woff) and os.access(out_woff, os.R_OK):
if verbosity:
print 'Reusing ' + out_woff
return
out_ttx = font_basename + '.ttx'
add_svg_glyphs.add_image_glyphs(template_file, out_ttx, pairs, verbosity=verbosity)
quiet=verbosity < 2
font = ttx.TTFont(flavor='woff', quiet=quiet)
font.importXML(out_ttx, quiet=quiet)
font.save(out_woff)
if verbosity:
print 'Wrote ' + out_woff
def main(argv):
usage = """This will search for files that have image_prefix followed by one or more
hex numbers (separated by underscore if more than one), and end in ".svg".
For example, if image_prefix is "icons/u", then files with names like
"icons/u1F4A9.svg" or "icons/u1F1EF_1F1F5.svg" will be found. It generates
an SVG font from this, converts it to woff, and also generates an html test
page containing text for all the SVG glyphs."""
parser = argparse.ArgumentParser(
description='Generate font and html test file.', epilog=usage)
parser.add_argument('template_file', help='name of template .ttx file')
parser.add_argument('image_prefix', help='location and prefix of image files')
parser.add_argument('-i', '--include', help='include files whoses name matches this regex')
parser.add_argument('-e', '--exclude', help='exclude files whose name matches this regex')
parser.add_argument('-o', '--out_basename', help='base name of (ttx, woff, html) files to generate, '
'defaults to the template base name')
parser.add_argument('-g', '--glyph', help='set the initial glyph text (html encoded string), '
'defaults to first glyph')
parser.add_argument('-r', '--reuse_font', help='use existing woff font', action='store_true')
parser.add_argument('-q', '--quiet', dest='v', help='quiet operation', default=1,
action='store_const', const=0)
parser.add_argument('-v', '--verbose', dest='v', help='verbose operation',
action='store_const', const=2)
args = parser.parse_args(argv)
pairs = add_svg_glyphs.collect_glyphstr_file_pairs(
args.image_prefix, 'svg', include=args.include, exclude=args.exclude, verbosity=args.v)
add_svg_glyphs.sort_glyphstr_tuples(pairs)
out_basename = args.out_basename
if not out_basename:
out_basename = args.template_file.split('.')[0] # exclude e.g. '.tmpl.ttx'
if args.v:
print "Output basename is %s." % out_basename
do_generate_fonts(args.template_file, out_basename, pairs, reuse=args.reuse_font, verbosity=args.v)
do_generate_test_html(out_basename, pairs, glyph=args.glyph, verbosity=args.v)
if __name__ == '__main__':
main(sys.argv[1:])

107
svg_builder.py 100755
View File

@ -0,0 +1,107 @@
# Copyright 2015 Google, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Google Author(s): Doug Felt
import svg_cleaner
class SvgBuilder(object):
"""Modifies a font to add SVG glyphs from a document or string. Once built you
can call add_from_filename or add_from_doc multiple times to add SVG
documents, which should contain a single root svg element representing the glyph.
This element must have width and height attributes (in px), these are used to
determine how to scale the glyph. The svg should be designed to fit inside
this bounds and have its origin at the top left. Adding the svg generates a
transform to scale and position the glyph, so the svg element should not have
a transform attribute since it will be overwritten. Any id attribute on the
glyph is also overwritten.
Adding a glyph can generate additional default glyphs for components of a
ligature that are not already present.
It is possible to add SVG images to a font that already has corresponding
glyphs. If a glyph exists already, then its hmtx advance is assumed valid.
Otherwise we will generate an advance based on the image's width and scale
factor. Callers should ensure that glyphs for components of ligatures are
added before the ligatures themselves, otherwise glyphs generated for missing
ligature components will be assigned zero metrics metrics that will not be
overridden later."""
def __init__(self, font_builder):
font_builder.init_svg()
self.font_builder = font_builder
self.cleaner = svg_cleaner.SvgCleaner()
font = font_builder.font
self.font_ascent = font['hhea'].ascent
self.font_height = self.font_ascent - font['hhea'].descent
self.font_upem = font['head'].unitsPerEm
def add_from_filename(self, ustr, filename):
with open(filename, "r") as fp:
return self.add_from_doc(ustr, fp.read())
def _get_int_px(self, val):
if not val.lower().endswith('px'):
raise "expected width or height ending in 'px' but got: %s" % val
return int(val[:-2])
def add_from_doc(self, ustr, svgdoc):
"""Cleans the svg doc, tweaks the root svg element's
attributes, then updates the font. ustr is the character or ligature
string, svgdoc is the svg document xml. The doc must have a single
svg root element."""
# The svg element must have an id attribute of the form 'glyphNNN' where NNN
# is the glyph id. We capture the index of the glyph we're adding and write
# it into the svg.
#
# We generate a transform that places the origin at the top left of the
# ascent and uniformly scales it to fit both the font height (ascent -
# descent) and glyph advance if it is already present. The width and height
# attributes are not used by rendering, so they are removed from the element
# once we're done with them.
cleaner = self.cleaner
fbuilder = self.font_builder
tree = cleaner.tree_from_text(svgdoc)
cleaner.clean_tree(tree)
name, index, exists = fbuilder.add_components_and_ligature(ustr)
tree.attrs['id'] = 'glyph%s' % index
image_width = self._get_int_px(tree.attrs.pop('width'))
image_height = self._get_int_px(tree.attrs.pop('height'))
scale = float(self.font_height) / image_height;
if exists:
width = fbuilder.hmtx[name][0]
# Special case for preexisting zero advance, we scale to height.
if width > 0:
hscale = float(width) / image_width;
if hscale < scale:
scale = hscale
transform = 'translate(0, -%s) scale(%s)' % (self.font_ascent, scale)
tree.attrs['transform'] = transform
svgdoc = cleaner.tree_to_text(tree)
hmetrics = None
if not exists:
# horiz advance and lsb
hmetrics = [int(round(image_width * scale)), 0]
fbuilder.add_svg(svgdoc, hmetrics, name, index)

254
svg_cleaner.py 100755
View File

@ -0,0 +1,254 @@
#!/usr/bin/python
# Copyright 2015 Google, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Google Author(s): Doug Felt
import argparse
import codecs
import os.path
import re
import sys
from xml.parsers import expat
from xml.sax import saxutils
# Expat doesn't allow me to identify empty tags (in particular, with an
# empty tag the parse location for the start and end is not the same) so I
# have to take a dom-like approach if I want to identify them. There are a
# lot of empty tags in svg. This way I can do some other kinds of cleanup
# as well (remove unnecessary 'g' elements, for instance).
# Use nodes instead of tuples and strings because it's easier to mutate
# a tree of these, and cleaner will want to do this.
class _Elem_Node(object):
def __init__(self, name, attrs, contents):
self.name = name
self.attrs = attrs
self.contents = contents
def __repr__(self):
line = ["elem(name: '%s'" % self.name]
if self.attrs:
line.append(" attrs: '%s'" % self.attrs)
if self.contents:
line.append(" contents[%s]: '%s'" % (len(self.contents), self.contents))
line.append(')')
return ''.join(line)
class _Text_Node(object):
def __init__(self, text):
self.text = text
def __repr__(self):
return "text('%s')" % self.text
class SvgCleaner(object):
"""Strip out unwanted parts of an svg file, primarily the xml declaration and
doctype lines, comments, and some attributes of the outermost <svg> element.
The id will be replaced when it is inserted into the font. viewBox causes
unwanted scaling when used in a font and its effect is difficult to
predict. version is unneeded, xml:space is ignored (we're processing spaces
so a request to maintain them has no effect). enable-background appears to
have no effect. x and y on the outermost svg element have no effect. We
keep width and height, and will elsewhere assume these are the dimensions
used for the character box."""
def __init__(self):
self.reader = SvgCleaner._Reader()
self.cleaner = SvgCleaner._Cleaner()
self.writer = SvgCleaner._Writer()
class _Reader(object):
"""Loosely based on fonttools's XMLReader. This generates a tree of nodes,
either element nodes or text nodes. Successive text content is merged
into one node, so contents will never contain more than one _Text_Node in
a row. This drops comments, xml declarations, and doctypes."""
def _reset(self, parser):
self._stack = []
self._textbuf = []
def _start_element(self, name, attrs):
self._flush_textbuf()
node = _Elem_Node(name, attrs, [])
if len(self._stack):
self._stack[-1].contents.append(node)
self._stack.append(node)
def _end_element(self, name):
self._flush_textbuf()
if len(self._stack) > 1:
self._stack = self._stack[:-1]
def _character_data(self, data):
if len(self._stack):
self._textbuf.append(data)
def _flush_textbuf(self):
if self._textbuf:
node = _Text_Node(''.join(self._textbuf))
self._stack[-1].contents.append(node)
self._textbuf = []
def from_text(self, data):
"""Return the root node of a tree representing the svg data."""
parser = expat.ParserCreate()
parser.StartElementHandler = self._start_element
parser.EndElementHandler = self._end_element
parser.CharacterDataHandler = self._character_data
self._reset(parser)
parser.Parse(data)
return self._stack[0]
class _Cleaner(object):
def _clean_elem(self, node):
nattrs = {}
for k, v in node.attrs.items():
if node.name == 'svg' and k in ['x', 'y', 'id', 'version', 'viewBox',
'enable-background', 'xml:space']:
continue
v = re.sub('\s+', ' ', v)
nattrs[k] = v
node.attrs = nattrs
# scan contents. remove any empty text nodes, or empty 'g' element nodes.
# if a 'g' element has no attrs and only one subnode, replace it with the
# subnode.
wpos = 0
for n in node.contents:
if isinstance(n, _Text_Node):
if not n.text:
continue
elif n.name == 'g':
if not n.contents:
continue
if not n.attrs and len(n.contents) == 1:
n = n.contents[0]
node.contents[wpos] = n
wpos += 1
if wpos < len(node.contents):
node.contents = node.contents[:wpos]
def _clean_text(self, node):
text = node.text.strip()
# common case is text is empty (line endings between elements)
if text:
text = re.sub(r'\s+', ' ', text)
node.text = text
def clean(self, node):
if isinstance(node, _Text_Node):
self._clean_text(node)
else:
# do contents first, so we can check for empty subnodes after
for n in node.contents:
self.clean(n)
self._clean_elem(node)
class _Writer(object):
"""For text nodes, replaces sequences of whitespace with a single space.
For elements, replaces sequences of whitespace in attributes, and
removes unwanted attributes from <svg> elements."""
def _write_node(self, node, lines, indent):
"""Node is a node generated by _Reader, either a TextNode or an
ElementNode. Lines is a list to collect the lines of output. Indent is
the indentation level for this node."""
if isinstance(node, _Text_Node):
if node.text:
lines.append(node.text)
else:
margin = ' ' * indent
line = [margin]
line.append('<%s' % node.name)
for k in sorted(node.attrs.keys()):
v = node.attrs[k]
line.append(' %s=%s' % (k, saxutils.quoteattr(v)))
if node.contents:
line.append('>')
lines.append(''.join(line))
for elem in node.contents:
self._write_node(elem, lines, indent + 1)
line = [margin]
line.append('</%s>' % node.name)
lines.append(''.join(line))
else:
line.append('/>')
lines.append(''.join(line))
def to_text(self, root):
# set up lines for recursive calls, let them append lines, then return
# the result.
lines = []
self._write_node(root, lines, 0)
return '\n'.join(lines)
def tree_from_text(self, svg_text):
return self.reader.from_text(svg_text)
def clean_tree(self, svg_tree):
self.cleaner.clean(svg_tree)
def tree_to_text(self, svg_tree):
return self.writer.to_text(svg_tree)
def clean_svg(self, svg_text):
"""Return the cleaned svg_text."""
tree = self.tree_from_text(svg_text)
self.clean_tree(tree)
return self.tree_to_text(tree)
def clean_svg_files(in_dir, out_dir, match_pat=None, quiet=False):
regex = re.compile(match_pat) if match_pat else None
count = 0
if not os.path.isdir(out_dir):
os.makedirs(out_dir)
if not quiet:
print 'created output directory: %s' % out_dir
cleaner = SvgCleaner()
for file_name in os.listdir(in_dir):
if regex and not regex.match(file_name):
continue
in_path = os.path.join(in_dir, file_name)
with open(in_path) as in_fp:
result = cleaner.clean_svg(in_fp.read())
out_path = os.path.join(out_dir, file_name)
with codecs.open(out_path, 'w', 'utf-8') as out_fp:
if not quiet:
print 'wrote: %s' % out_path
out_fp.write(result)
count += 1
if not count:
print 'failed to match any files'
else:
print 'processed %s files to %s' % (count, out_dir)
def main():
parser = argparse.ArgumentParser(
description="Generate 'cleaned' svg files.")
parser.add_argument('in_dir', help='Input directory.')
parser.add_argument('out_dir', help='Output directory.')
parser.add_argument('regex', help='Regex to select files, default matches all files.', default=None)
parser.add_argument('--quiet', '-q', help='Quiet operation.', action='store_true')
args = parser.parse_args()
clean_svg_files(args.in_dir, args.out_dir, match_pat=args.regex, quiet=args.quiet)
if __name__ == '__main__':
main()