Update svg_cleaner.

Some of the SVGs we're getting aren't clean, so tweak the cleaner
to handle this.
pull/135/head
Doug Felt 2017-05-23 11:29:15 -07:00
parent ca22132c45
commit 0d603f5af5
1 changed files with 46 additions and 4 deletions

View File

@ -15,6 +15,16 @@
# #
# Google Author(s): Doug Felt # Google Author(s): Doug Felt
"""Clean SVG.
svgo could do this, but we're fussy. Also, emacs doesn't understand
that 'style' defaults to 'text/css' and svgo strips this out by
default.
The files we're getting that are exported from AI contain lots of extra
data so that it can reimport the svg, and we don't need it."""
import argparse import argparse
import codecs import codecs
import logging import logging
@ -125,13 +135,19 @@ class SvgCleaner(object):
for k, v in node.attrs.items(): for k, v in node.attrs.items():
if node.name == 'svg' and k in [ if node.name == 'svg' and k in [
'x', 'y', 'id', 'version', 'viewBox', 'width', 'height', 'x', 'y', 'id', 'version', 'viewBox', 'width', 'height',
'enable-background', 'xml:space']: 'enable-background', 'xml:space', 'xmlns:graph', 'xmlns:i',
'xmlns:x']:
if k == 'viewBox': if k == 'viewBox':
viewBox = v viewBox = v
elif k == 'width': elif k == 'width':
width = v width = v
elif k == 'height': elif k == 'height':
height = v height = v
elif k.startswith('xmlns:') and 'ns.adobe.com' not in v:
# keep if not an adobe namespace
logging.debug('keep "%s" = "%s"' % (k, v))
nattrs[k] = v
logging.debug('removing %s=%s' % (k, v))
continue continue
v = re.sub('\s+', ' ', v) v = re.sub('\s+', ' ', v)
nattrs[k] = v nattrs[k] = v
@ -145,7 +161,6 @@ class SvgCleaner(object):
nattrs['height'] = height nattrs['height'] = height
node.attrs = nattrs node.attrs = nattrs
# scan contents. remove any empty text nodes, or empty 'g' element nodes. # scan contents. remove any empty text nodes, or empty 'g' element nodes.
# if a 'g' element has no attrs and only one subnode, replace it with the # if a 'g' element has no attrs and only one subnode, replace it with the
# subnode. # subnode.
@ -157,8 +172,19 @@ class SvgCleaner(object):
elif n.name == 'g': elif n.name == 'g':
if not n.contents: if not n.contents:
continue continue
if 'i:extraneous' in n.attrs:
del n.attrs['i:extraneous']
if not n.attrs and len(n.contents) == 1: if not n.attrs and len(n.contents) == 1:
n = n.contents[0] n = n.contents[0]
elif n.name == 'i:pgf' or n.name == 'foreignObject':
continue
elif n.name =='switch' and len(n.contents) == 1:
n = n.contents[0]
elif n.name == 'style':
# some emacsen don't default 'style' properly, so leave this in.
if False and n.attrs.get('type') == 'text/css':
del n.attrs['type']
node.contents[wpos] = n node.contents[wpos] = n
wpos += 1 wpos += 1
if wpos < len(node.contents): if wpos < len(node.contents):
@ -168,7 +194,9 @@ class SvgCleaner(object):
text = node.text.strip() text = node.text.strip()
# common case is text is empty (line endings between elements) # common case is text is empty (line endings between elements)
if text: if text:
text = re.sub(r'\s+', ' ', text) # main goal here is to leave linefeeds in for style elements
text = re.sub(r'[ \t]*\n+[ \t]*', '\n', text)
text = re.sub(r'[ \t]+', ' ', text)
node.text = text node.text = text
def clean(self, node): def clean(self, node):
@ -197,7 +225,20 @@ class SvgCleaner(object):
margin = ' ' * indent margin = ' ' * indent
line = [margin] line = [margin]
line.append('<%s' % node.name) line.append('<%s' % node.name)
for k in sorted(node.attrs.keys()): # custom sort attributes of svg, yes this is a hack
if node.name == 'svg':
def svgsort(k):
if k == 'width': return (0, None)
elif k == 'height': return (1, None)
else: return (2, k)
ks = sorted(node.attrs.keys(), key=svgsort)
else:
def defsort(k):
if k == 'id': return (0, None)
elif k == 'class': return (1, None)
else: return (2, k)
ks = sorted(node.attrs.keys(), key=defsort)
for k in ks:
v = node.attrs[k] v = node.attrs[k]
line.append(' %s=%s' % (k, saxutils.quoteattr(v))) line.append(' %s=%s' % (k, saxutils.quoteattr(v)))
if node.contents: if node.contents:
@ -250,6 +291,7 @@ def clean_svg_files(in_dir, out_dir, match_pat=None, clean=False):
if regex and not regex.match(file_name): if regex and not regex.match(file_name):
continue continue
in_path = os.path.join(in_dir, file_name) in_path = os.path.join(in_dir, file_name)
logging.debug('read: %s', in_path)
with open(in_path) as in_fp: with open(in_path) as in_fp:
result = cleaner.clean_svg(in_fp.read()) result = cleaner.clean_svg(in_fp.read())
out_path = os.path.join(out_dir, file_name) out_path = os.path.join(out_dir, file_name)