#!/usr/bin/python # Copyright 2015 Google, Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Google Author(s): Doug Felt import argparse import codecs import os.path import re import sys from xml.parsers import expat from xml.sax import saxutils # Expat doesn't allow me to identify empty tags (in particular, with an # empty tag the parse location for the start and end is not the same) so I # have to take a dom-like approach if I want to identify them. There are a # lot of empty tags in svg. This way I can do some other kinds of cleanup # as well (remove unnecessary 'g' elements, for instance). # Use nodes instead of tuples and strings because it's easier to mutate # a tree of these, and cleaner will want to do this. class _Elem_Node(object): def __init__(self, name, attrs, contents): self.name = name self.attrs = attrs self.contents = contents def __repr__(self): line = ["elem(name: '%s'" % self.name] if self.attrs: line.append(" attrs: '%s'" % self.attrs) if self.contents: line.append(" contents[%s]: '%s'" % (len(self.contents), self.contents)) line.append(')') return ''.join(line) class _Text_Node(object): def __init__(self, text): self.text = text def __repr__(self): return "text('%s')" % self.text class SvgCleaner(object): """Strip out unwanted parts of an svg file, primarily the xml declaration and doctype lines, comments, and some attributes of the outermost