From 1a9a94525b1067b5845359bd6e2da81cdae7eb79 Mon Sep 17 00:00:00 2001
From: Doug Felt <dougfelt@google.com>
Date: Thu, 26 Feb 2015 15:01:30 -0800
Subject: [PATCH] Support generation of SVG glyphs using region-flags data.

collect_emoji_svg takes the region-flags data in third_party
and copies/renames it into a target dir, then overlays this with
the data in noto/color_emoji/svg (which also has a few flags in
it).

generate_test_html changes the html to make room for the flag glyphs,
which are a lot wider. It also adds a flag so we can skip regenerating
the .ttx file in case we want to muck with it directly and regenerate
the .woff from that.

svg_builder has a number of changes needed to deal with the much
more varied svg in the region_flags data.  See comments in the file
for some of the changes that needed to be made.  Some of these
changes are provisional pending clarification of the spec, which
doesn't provide enough guidance.

svg_cleaner is changed slightly to strip width and height, this step
now happens after extract this information from svg in svg_builder,
instead of before.
---
 collect_emoji_svg.py  | 140 ++++++++++++++++++++++++++++++++++++++++++
 generate_test_html.py |  17 +++--
 svg_builder.py        | 119 ++++++++++++++++++++++++++++-------
 svg_cleaner.py        |   5 +-
 4 files changed, 250 insertions(+), 31 deletions(-)
 create mode 100755 collect_emoji_svg.py

diff --git a/collect_emoji_svg.py b/collect_emoji_svg.py
new file mode 100755
index 000000000..fb56352be
--- /dev/null
+++ b/collect_emoji_svg.py
@@ -0,0 +1,140 @@
+#!/usr/bin/python
+# Copyright 2015 Google, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Google Author(s): Doug Felt
+
+"""Tool to collect emoji svg glyphs into one directory for processing
+by add_svg_glyphs.  There are two sources, noto/color_emoji/svg and
+noto/third_party/region-flags/svg.  The add_svg_glyphs file expects
+the file names to contain the character string that represents it
+represented as a sequence of hex-encoded codepoints separated by
+underscore.  The files in noto/color_emoji/svg do this, and have the
+prefix 'emoji_u', but the files in region-flags/svg just have the
+two-letter code.
+
+We create a directory and copy the files into it with the required
+naming convention. First we do this for region-flags/svg, converting
+the names, and then we do this for color_emoji/svg, so any duplicates
+will be overwritten by what we assume are the preferred svg.  We use
+copies instead of symlinks so we can continue to optimize or modify
+the files without messing with the originals."""
+
+import argparse
+import glob
+import os
+import os.path
+import re
+import shutil
+import sys
+
+def _is_svg(f):
+  return f.endswith('.svg')
+
+
+def _is_svg_and_startswith_emoji(f):
+  return f.endswith('.svg') and f.startswith('emoji_u')
+
+
+def _flag_rename(f):
+  """Converts file names from region-flags files (upper-case ASCII) to our expected
+  'encoded-codepoint-ligature' form, mapping each character to the corresponding
+  regional indicator symbol."""
+
+  cp_strs = []
+  name, ext = os.path.splitext(f)
+  for cp in name:
+    ncp = 0x1f1e6 - 0x41 + ord(cp)
+    cp_strs.append("%04x" % ncp)
+  return 'emoji_u%s%s' % ('_'.join(cp_strs), ext)
+
+
+def copy_with_rename(src_dir, dst_dir, accept_pred=None, rename=None, verbosity=1):
+  """Copy files from src_dir to dst_dir that match accept_pred (all if None) and rename
+  using rename (if not None), replacing existing files.  accept_pred takes the filename
+  and returns True if the file should be copied, rename takes the filename and returns a
+  new file name."""
+
+  count = 0
+  replace_count = 0
+  for src_filename in os.listdir(src_dir):
+    if accept_pred and not accept_pred(src_filename):
+      continue
+    dst_filename = rename(src_filename) if rename else src_filename
+    src = os.path.join(src_dir, src_filename)
+    dst = os.path.join(dst_dir, dst_filename)
+    if os.path.exists(dst):
+      if verbosity > 1:
+        print "Replacing existing file " + dst
+      os.unlink(dst)
+      replace_count += 1
+    shutil.copy2(src, dst)
+    if verbosity > 1:
+      print "cp -p %s %s" % (src, dst)
+    count += 1
+  if verbosity:
+    print "Copied/renamed %d files from %s to %s" % (count, src_dir, dst_dir)
+  return count, replace_count
+
+
+def build_svg_dir(dst_dir, clean=False, flags_only=False, verbosity=1):
+  """Copies/renames files from noto/color_emoji/svg and then noto/third_party/region-flags/svg,
+  giving them the standard format and prefix ('emoji_u' followed by codepoints expressed
+  in hex separated by underscore).  If clean, removes the target dir before proceding.
+  If flags_only, only does the region-flags."""
+
+  if not os.path.isdir(dst_dir):
+    os.makedirs(dst_dir)
+  elif clean:
+    shutil.rmtree(dst_dir)
+    os.makedirs(dst_dir)
+
+  # get files from path relative to noto
+  notopath = re.match("^.*/noto/", os.path.realpath(__file__)).group()
+
+  # copy region flags, generating new names based on the tlds.
+  flag_dir = os.path.join(notopath, "third_party/region-flags/svg")
+  count, replace_count = copy_with_rename(
+    flag_dir, dst_dir, accept_pred=_is_svg, rename=_flag_rename, verbosity=verbosity)
+
+  # copy the 'good' svg
+  if not flags_only:
+    svg_dir = os.path.join(notopath, "color_emoji/svg")
+    temp_count, temp_replace_count = copy_with_rename(
+      svg_dir, dst_dir, accept_pred=_is_svg_and_startswith_emoji, verbosity=verbosity)
+    count += temp_count
+    replace_count += temp_replace_count
+
+  if verbosity:
+    if replace_count:
+      print "Replaced %d existing files" % replace_count
+    print "Created %d total files" % (count - replace_count)
+
+
+def main(argv):
+  parser = argparse.ArgumentParser(
+      description="Collect svg files into target directory with prefix.")
+  parser.add_argument('dst_dir', help="Directory to hold symlinks to files.")
+  parser.add_argument('--clean', '-c', help="Replace target directory", action='store_true')
+  parser.add_argument('--flags_only', '-fo', help="Only copy region-flags", action='store_true')
+  parser.add_argument('--quiet', '-q', dest='v', help="quiet operation.", default=1,
+                      action='store_const', const=0)
+  parser.add_argument('--verbose', '-v', dest='v', help="verbose operation.",
+                      action='store_const', const=2)
+  args = parser.parse_args(argv)
+
+  build_svg_dir(args.dst_dir, clean=args.clean, flags_only=args.flags_only, verbosity=args.v)
+
+if __name__ == '__main__':
+  main(sys.argv[1:])
diff --git a/generate_test_html.py b/generate_test_html.py
index eef174535..37c76e4e3 100755
--- a/generate_test_html.py
+++ b/generate_test_html.py
@@ -80,13 +80,12 @@ function setup() {
 <p>Test for SVG glyphs in %(font)s.  It uses the proposed
 <a href="http://lists.w3.org/Archives/Public/public-svgopentype/2013Jul/0003.html">SVG-in-OpenType format</a>.
 View using Firefox&nbsp;26 and later.
-<div style="float:left; text-align:center; margin:0 10px">
+<div style="float:left; text-align:center; margin:0 10px; width:40%%">
 <div id='panel' style="margin-left:auto; margin-right:auto">%(glyph)s</div>
 <div id='paneltitle' style="margin-left:auto; margin-right:auto">%(glyph_hex)s</div>
 </div>
 <div id='emoji'><p>"""
 
-
   body_tail = r"""</div>
 </body>
 </html>
@@ -137,15 +136,18 @@ View using Firefox&nbsp;26 and later.
     print 'Wrote ' + html_name
 
 
-def do_generate_fonts(template_file, font_basename, pairs, reuse=False, verbosity=1):
+def do_generate_fonts(template_file, font_basename, pairs, reuse=0, verbosity=1):
   out_woff = font_basename + '.woff'
-  if reuse and os.path.isfile(out_woff) and os.access(out_woff, os.R_OK):
+  if reuse > 1 and os.path.isfile(out_woff) and os.access(out_woff, os.R_OK):
     if verbosity:
       print 'Reusing ' + out_woff
     return
 
   out_ttx = font_basename + '.ttx'
-  add_svg_glyphs.add_image_glyphs(template_file, out_ttx, pairs, verbosity=verbosity)
+  if reuse == 0:
+    add_svg_glyphs.add_image_glyphs(template_file, out_ttx, pairs, verbosity=verbosity)
+  elif verbosity:
+    print 'Reusing ' + out_ttx
 
   quiet=verbosity < 2
   font = ttx.TTFont(flavor='woff', quiet=quiet)
@@ -173,7 +175,10 @@ def main(argv):
                       'defaults to the template base name')
   parser.add_argument('-g', '--glyph', help='set the initial glyph text (html encoded string), '
                       'defaults to first glyph')
-  parser.add_argument('-r', '--reuse_font', help='use existing woff font', action='store_true')
+  parser.add_argument('-rt', '--reuse_ttx_font', dest='reuse_font', help='use existing ttx font',
+                      default=0, const=1, action='store_const')
+  parser.add_argument('-r', '--reuse_font', dest='reuse_font', help='use existing woff font',
+                      const=2, action='store_const')
   parser.add_argument('-q', '--quiet', dest='v', help='quiet operation', default=1,
                       action='store_const', const=0)
   parser.add_argument('-v', '--verbose', dest='v', help='verbose operation',
diff --git a/svg_builder.py b/svg_builder.py
index 4684fc963..720b7624b 100755
--- a/svg_builder.py
+++ b/svg_builder.py
@@ -14,6 +14,11 @@
 #
 # Google Author(s): Doug Felt
 
+import math
+import random
+import re
+import string
+
 import svg_cleaner
 
 class SvgBuilder(object):
@@ -51,14 +56,12 @@ class SvgBuilder(object):
 
   def add_from_filename(self, ustr, filename):
     with open(filename, "r") as fp:
-      return self.add_from_doc(ustr, fp.read())
+      return self.add_from_doc(ustr, fp.read(), filename=filename)
 
-  def _get_int_px(self, val):
-    if not val.lower().endswith('px'):
-      raise "expected width or height ending in 'px' but got: %s" % val
-    return int(val[:-2])
+  def _strip_px(self, val):
+    return float(val[:-2] if val.endswith('px') else val)
 
-  def add_from_doc(self, ustr, svgdoc):
+  def add_from_doc(self, ustr, svgdoc, filename=None):
     """Cleans the svg doc, tweaks the root svg element's
     attributes, then updates the font.  ustr is the character or ligature
     string, svgdoc is the svg document xml.  The doc must have a single
@@ -70,38 +73,108 @@ class SvgBuilder(object):
     #
     # We generate a transform that places the origin at the top left of the
     # ascent and uniformly scales it to fit both the font height (ascent -
-    # descent) and glyph advance if it is already present.  The width and height
-    # attributes are not used by rendering, so they are removed from the element
-    # once we're done with them.
+    # descent) and glyph advance if it is already present.  The initial viewport
+    # is 1000x1000. When present, viewBox scales to fit this and uses default
+    # values for preserveAspectRatio that center the viewBox in this viewport
+    # ('xMidyMid meet'), and ignores the width and height.  If viewBox is not
+    # present, width and height cause a (possibly non-uniform) scale to be
+    # applied that map the extent to the viewport.  This is unfortunate for us,
+    # since we want to preserve the aspect ratio, and the image is likely
+    # designed for a viewport with the width and height it requested.
+    #
+    # If we have an advance, we want to replicate the behavior of viewBox,
+    # except using a 'viewport' of advance, ascent+descent. If we don't have
+    # an advance, we scale the height and compute the advance from the scaled
+    # width.
+    #
+    # Lengths using percentage units map 100% to the width/height/diagonal
+    # of the viewBox, or if it is not defined, the viewport.  Since we can't
+    # define the viewport, we must always have a viewBox.
 
     cleaner = self.cleaner
     fbuilder = self.font_builder
 
     tree = cleaner.tree_from_text(svgdoc)
-    cleaner.clean_tree(tree)
 
     name, index, exists = fbuilder.add_components_and_ligature(ustr)
 
+    advance = 0
+    if exists:
+      advance = fbuilder.hmtx[name][0]
+
+    vb = tree.attrs.get('viewBox')
+    if vb:
+      x, y, w, h = map(self._strip_px, re.split('\s*,\s*|\s+', vb))
+    else:
+      wid = tree.attrs.get('width')
+      ht = tree.attrs.get('height')
+      if not (wid and ht):
+        raise "missing viewBox and width or height attrs"
+      x, y, w, h = 0, 0, self._strip_px(wid), self._strip_px(ht)
+
+    # We're going to assume default values for preserveAspectRatio for now,
+    # this preserves aspect ratio and centers in the viewport.
+    # First scale to the viewport, which is 1000x1000. compute the scaled
+    # extent and translations that center it in the viewport. We won't try
+    # to optimize this, it's clearer what we're doing this way.
+    if w > h:
+        s2vp = 1000.0/w
+        sh = s2vp * h
+        sty = (1000 - sh) / 2
+        sw = 1000.0
+        stx = 0.0
+    else:
+        s2vp = 1000.0/h
+        sh = 1000.0
+        sty = 0.0
+        sw = s2vp * w
+        stx = (1000 - sw) / 2
+
+    # now, compute the scale. we scale to the height, unless we have
+    # to fit an advance, in which case we scale to the width
+    scale = self.font_height / sh
+    fit_height = True
+    if advance and scale * sw > advance:
+      scale = advance / sw
+      fit_height = False
+
+    ty = -self.font_ascent - scale * sty
+    tx = scale * stx
+
+    if fit_height and advance:
+      tx += (advance - scale * sw) / 2
+    else:
+      ty += (self.font_height - scale * sh) / 2
+
+    cleaner.clean_tree(tree)
+
     tree.attrs['id'] = 'glyph%s' % index
 
-    image_width = self._get_int_px(tree.attrs.pop('width'))
-    image_height = self._get_int_px(tree.attrs.pop('height'))
-    scale = float(self.font_height) / image_height;
-    if exists:
-      width = fbuilder.hmtx[name][0]
-      # Special case for preexisting zero advance, we scale to height.
-      if width > 0:
-        hscale = float(width) / image_width;
-        if hscale < scale:
-          scale = hscale
-
-    transform = 'translate(0, -%s) scale(%s)' % (self.font_ascent, scale)
+    transform = 'translate(%g, %g) scale(%g)' % (tx, ty, scale)
     tree.attrs['transform'] = transform
 
+    tree.attrs['viewBox'] = '%g %g %g %g' % (x, y, w, h)
+
+    # In order to clip, we need to create a path and reference it.  You'd think
+    # establishing a rectangular clip would be simpler...  Aaaaand... as it
+    # turns out, in FF the clip on the outer svg element is only relative to the
+    # initial viewport, and is not affected by the viewBox or transform on the
+    # svg element.  Unlike chrome. So either we apply an inverse transform, or
+    # insert a group with the clip between the svg and its children.  The latter
+    # seems cleaner, ultimately.
+    clip_id = 'clip_' + ''.join(random.choice(string.ascii_lowercase) for i in range(8))
+    clip_text = """<g clip-path="url(#%s)"><clipPath id="%s">
+      <path d="M%g %gh%gv%gh%gz"/></clipPath></g>""" % (clip_id, clip_id, x, y, w, h, -w)
+    clip_tree = cleaner.tree_from_text(clip_text)
+    clip_tree.contents.extend(tree.contents)
+    tree.contents = [clip_tree];
+
     svgdoc = cleaner.tree_to_text(tree)
 
     hmetrics = None
     if not exists:
       # horiz advance and lsb
-      hmetrics = [int(round(image_width * scale)), 0]
+      advance = scale * sw
+      hmetrics = [int(round(advance)), 0]
+
     fbuilder.add_svg(svgdoc, hmetrics, name, index)
diff --git a/svg_cleaner.py b/svg_cleaner.py
index aaff92760..6743d9715 100755
--- a/svg_cleaner.py
+++ b/svg_cleaner.py
@@ -117,8 +117,9 @@ class SvgCleaner(object):
     def _clean_elem(self, node):
       nattrs = {}
       for k, v in node.attrs.items():
-        if node.name == 'svg' and k in ['x', 'y', 'id', 'version', 'viewBox',
-                                        'enable-background', 'xml:space']:
+        if node.name == 'svg' and k in [
+            'x', 'y', 'id', 'version', 'viewBox', 'width', 'height',
+            'enable-background', 'xml:space']:
           continue
         v = re.sub('\s+', ' ', v)
         nattrs[k] = v