Merge pull request #46 from dougfelt/emoji_html_fix

Emoji html fix
2016-03-16 16:49:00 -07:00 · 2016-03-16 16:49:00 -07:00 · c6379827aa
parent 8fba2e60fc 2a6be68841
commit c6379827aa
6 changed files with 360 additions and 744 deletions
--- a/10
+++ b/10
@ -43,8 +43,10 @@ RENAMED_FLAGS_DIR := $(BUILD_DIR)/renamed_flags
 QUANTIZED_DIR := $(BUILD_DIR)/quantized_pngs
 COMPRESSED_DIR := $(BUILD_DIR)/compressed_pngs

+# Unknown flag is PUA fe82b
+
 LIMITED_FLAGS = CN DE ES FR GB IT JP KR RU US
-SELECTED_FLAGS = AD AE AF AG AI AL AM AO AR AS AT AU AW AX AZ \
+SELECTED_FLAGS = AC AD AE AF AG AI AL AM AO AQ AR AS AT AU AW AX AZ \
 	BA BB BD BE BF BG BH BI BJ BM BN BO BR BS BT BW BY BZ \
 	CA CC CD CF CG CH CI CK CL CM CN CO CR CU CV CW CX CY CZ \
 	DE DJ DK DM DO DZ \
@ -52,7 +54,7 @@ SELECTED_FLAGS = AD AE AF AG AI AL AM AO AR AS AT AU AW AX AZ \
 	FI FJ FM FO FR \
 	GA GB GD GE GG GH GI GL GM GN GQ GR GT GU GW GY \
 	HK HN HR HT HU \
-	ID IE IL IM IN IO IQ IR IS IT \
+	IC ID IE IL IM IN IO IQ IR IS IT \
 	JE JM JO JP \
 	KE KG KH KI KM KN KP KR KW KY KZ \
 	LA LB LC LI LK LR LS LT LU LV LY \
@ -62,8 +64,8 @@ SELECTED_FLAGS = AD AE AF AG AI AL AM AO AR AS AT AU AW AX AZ \
 	PA PE PF PG PH PK PL PN PR PS PT PW PY \
 	QA \
 	RO RS RU RW \
-	SA SB SC SD SE SG SI SK SL SM SN SO SR SS ST SV SX SY SZ \
-	TC TD TG TH TJ TK TL TM TN TO TR TT TV TW TZ \
+	SA SB SC SD SE SG SH SI SK SL SM SN SO SR SS ST SV SX SY SZ \
+	TA TC TD TG TH TJ TK TL TM TN TO TR TT TV TW TZ \
 	UA UG US UY UZ \
 	VA VC VE VG VI VN VU \
 	WS \
--- a/generate_emoji_html.py
+++ b/generate_emoji_html.py
@ -0,0 +1,287 @@
+#!/usr/bin/python
+#
+# Copyright 2016 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Build an html page showing emoji images.
+
+This takes a list of directories containing emoji image files, and
+builds an html page presenting the images along with their composition
+(for sequences) and unicode names (for individual emoji)."""
+
+import argparse
+import codecs
+import collections
+import glob
+from os import path
+import re
+import sys
+from nototools import unicode_data
+
+_default_dir = 'png/128'
+_default_ext = 'png'
+_default_prefix = 'emoji_u'
+_default_title = 'Emoji List'
+
+# DirInfo represents information about a directory of file names.
+# - directory is the directory path
+# - title is the title to use for this directory
+# - filemap is a dict mapping from a tuple of codepoints to the name of
+#   a file in the directory.
+DirInfo = collections.namedtuple('DirInfo', 'directory, title, filemap')
+
+
+def _merge_keys(dicts):
+  """Return the union of the keys in the list of dicts."""
+  keys = []
+  for d in dicts:
+    keys.extend(d.keys())
+  return frozenset(keys)
+
+def _generate_row_cells(key, dir_infos):
+  CELL_PREFIX = '<td>'
+  def _cell(key, info):
+    if key in info.filemap:
+      return '<img src="%s">' % path.join(
+          info.directory, info.filemap[key])
+    return '-missing-'
+  return [CELL_PREFIX + _cell(key, info) for info in dir_infos]
+
+
+def _get_desc(key_tuple, dir_infos):
+  CELL_PREFIX = '<td class="desc">'
+  def _get_filepath(cp):
+    cp_key = tuple([cp])
+    for info in dir_infos:
+      if cp_key in info.filemap:
+        return path.join(info.directory, info.filemap[cp_key])
+    return None
+
+  def _get_part(cp):
+    if cp == 0x200d:  # zwj, common so replace with '+'
+      return '+'
+    if cp == 0xfe0f:  # emoji variation selector, we ignore it
+      return None
+    fname = _get_filepath(cp)
+    if fname:
+      return '<img src="%s">' % fname
+    return '%04X' % cp
+
+  if len(key_tuple) == 1:
+    desc = 'U+%04X' % key_tuple
+  else:
+    desc = ' '.join(filter(None, [_get_part(cp) for cp in key_tuple]))
+  return CELL_PREFIX + desc
+
+
+def _get_name(key_tuple):
+  CELL_PREFIX = '<td class="name">'
+  if len(key_tuple) != 1:
+    name = ''
+  else:
+    cp = key_tuple[0]
+    if cp in unicode_data.proposed_emoji_cps():
+      name = '(proposed) ' + unicode_data.proposed_emoji_name(cp)
+    else:
+      name = unicode_data.name(cp, '(error)')
+  return CELL_PREFIX + name
+
+
+def _generate_content(dir_infos):
+  """Generate an html table for the infos."""
+  lines = ['<table>']
+  header_row = ['']
+  header_row.extend([info.title for info in dir_infos])
+  header_row.extend(['Description', 'Name'])
+  lines.append('<th>'.join(header_row))
+
+  all_keys = _merge_keys([info.filemap for info in dir_infos])
+  for key in sorted(all_keys):
+    row = []
+    row.extend(_generate_row_cells(key, dir_infos))
+    row.append(_get_desc(key, dir_infos))
+    row.append(_get_name(key))
+    lines.append(''.join(row))
+  return '\n  <tr>'.join(lines) + '\n</table>'
+
+
+def _get_image_data(image_dir, ext, prefix):
+  """Return a map from a tuple of cp sequences to a filename.
+
+  This filters by file extension, and expects the rest of the files
+  to match the prefix followed by a sequence of hex codepoints separated
+  by underscore.  Files that don't match, duplicate sequences (because
+  of casing), and out_of_range or empty codepoints raise an error."""
+
+  fails = []
+  result = {}
+  expect_re = re.compile(r'%s([0-9A-Fa-f_]+).%s' % (prefix, ext))
+  for f in sorted(glob.glob(path.join(image_dir, '*.%s' % ext))):
+    filename = path.basename(f)
+    m = expect_re.match(filename)
+    if not m:
+      if filename.startswith('unknown_flag.'):
+        continue
+      fails.append('"%s" did not match: "%s"' % (expect_re.pattern, filename))
+      continue
+    seq = m.group(1)
+    try:
+      cps = tuple(int(s, 16) for s in seq.split('_'))
+    except:
+      fails.append('bad cp sequence: ' + filename)
+      continue
+    this_failed = False
+    for cp in cps:
+      if (cp > 0x10ffff):
+        fails.append('cp out of range: ' + filename)
+        this_failed = True
+        break
+    if this_failed:
+      continue
+    if cps in result:
+      fails.append('duplicate sequence: %s and %s' (result[cps], filename))
+      continue
+    result[cps] = filename
+  if fails:
+    print >> sys.stderr, 'get_image_data failed (%s, %s, %s):\n  %s' % (
+        image_dir, ext, prefix, '\n  '.join(fails))
+    raise ValueError('get image data failed')
+  return result
+
+
+def _get_dir_infos(
+    image_dirs, exts=None, prefixes=None, titles=None,
+    default_ext=_default_ext, default_prefix=_default_prefix):
+  """Return a list of DirInfos for the image_dirs.  When defined,
+  exts, prefixes, and titles should be the same length as image_dirs.
+  Titles default to using the last segments of the image_dirs,
+  exts and prefixes default to the corresponding default values."""
+
+  count = len(image_dirs)
+  if not titles:
+    titles = [None] * count
+  elif len(titles) != count:
+      raise ValueError('have %d image dirs but %d titles' % (
+          count, len(titles)))
+  if not exts:
+    exts = [default_ext] * count
+  elif len(exts) != count:
+    raise ValueError('have %d image dirs but %d extensions' % (
+        count, len(exts)))
+  if not prefixes:
+    prefixes = [default_prefix] * count
+  elif len(prefixes) != count:
+    raise ValueError('have %d image dirs but %d prefixes' % (
+        count, len(prefixes)))
+
+  infos = []
+  for i in range(count):
+    image_dir = image_dirs[i]
+    title = titles[i] or path.basename(path.normpath(image_dir))
+    ext = exts[i] or default_ext
+    prefix = prefixes[i] or default_prefix
+    filemap = _get_image_data(image_dir, ext, prefix)
+    infos.append(DirInfo(image_dir, title, filemap))
+  return infos
+
+
+def _instantiate_template(template, arg_dict):
+  id_regex = re.compile('{{([a-zA-Z0-9_]+)}}')
+  ids = set(m.group(1) for m in id_regex.finditer(template))
+  keyset = set(arg_dict.keys())
+  missing_ids = ids - keyset
+  extra_args = keyset - ids
+  if extra_args:
+    print >> sys.stderr, (
+        'the following %d args are unused:\n%s' %
+        (len(extra_args), ', '.join(sorted(extra_args))))
+  text = template
+  if missing_ids:
+    raise ValueError(
+        'the following %d ids in the template have no args:\n%s' %
+        (len(missing_ids), ', '.join(sorted(missing_ids))))
+  for arg in ids:
+    text = re.sub('{{%s}}' % arg, arg_dict[arg], text)
+  return text
+
+
+TEMPLATE = """<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>{{title}}</title>
+    <style>{{style}}</style>
+  </head>
+  <body>
+  {{content}}
+  </body>
+</html>
+"""
+
+STYLE = """
+      tbody { background-color: rgb(210, 210, 210) }
+      tbody img { width: 64px; height: 64px }
+      tbody .desc { font-size: 20pt; font-weight: bold }
+      tbody .desc img { vertical-align: middle; width: 32px; height: 32px }
+      tbody .name { background-color: white }
+"""
+
+def write_html_page(filename, page_title, dir_infos):
+  content = _generate_content(dir_infos)
+  text = _instantiate_template(
+      TEMPLATE, {'title': page_title, 'style': STYLE, 'content': content})
+  with codecs.open(filename, 'w', 'utf-8') as f:
+    f.write(text)
+
+
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      'filename', help='path to output file', metavar='filename')
+  parser.add_argument(
+      '--page_title', help='page title', metavar='title', default='Emoji Table')
+  parser.add_argument(
+      '-d', '--image_dirs', help='image directories', metavar='dir',
+      nargs='+')
+  parser.add_argument(
+      '-e', '--exts', help='file extension, one per image dir', metavar='ext',
+      nargs='*')
+  parser.add_argument(
+      '-p', '--prefixes', help='file name prefix, one per image dir',
+      metavar='prefix', nargs='*')
+  parser.add_argument(
+      '-t', '--titles', help='title, one per image dir', metavar='title',
+      nargs='*'),
+  parser.add_argument(
+      '-de', '--default_ext', help='default extension', metavar='ext',
+      default=_default_ext)
+  parser.add_argument(
+      '-dp', '--default_prefix', help='default prefix', metavar='prefix',
+      default=_default_prefix)
+
+  args = parser.parse_args()
+  file_parts = path.splitext(args.filename)
+  if file_parts[1] != 'html':
+    args.filename = file_parts[0] + '.html'
+    print 'added .html extension to filename:\n%s' % args.filename
+
+  dir_infos = _get_dir_infos(
+      args.image_dirs, args.exts, args.prefixes, args.titles, args.default_ext,
+      args.default_prefix)
+
+  write_html_page(args.filename, args.page_title, dir_infos)
+
+
+if __name__ == "__main__":
+    main()
--- a/png/128/emoji_ufe82b.png
+++ b/png/128/emoji_ufe82b.png
--- a/third_party/color_emoji/add_glyphs.py
+++ b/third_party/color_emoji/add_glyphs.py
@ -1,10 +1,14 @@
 #!/usr/bin/env python

-import collections, glob, os, sys
+import collections, glob, os, re, sys
 from fontTools import ttx
 from fontTools.ttLib.tables import otTables
 from png import PNG

+# PUA character for unknown flag.  This avoids the legacy emoji pua values, but
+# is in the same area.
+UNKNOWN_FLAG_GLYPH_NAME = "uFE82B"
+
 sys.path.append(
    os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
 import add_emoji_gsub
@ -78,6 +82,15 @@ EXTRA_SEQUENCES = {
    'u1F48F': '1F469_200D_2764_FE0F_200D_1F48B_200D_1F468', # WHKM
 }

+# Flag aliases - from: to
+FLAG_ALIASES = {
+    'BV': 'NO',
+    'SJ': 'NO',
+    'UM': 'FR',
+    'HM': 'AU',
+    'UM': 'US',
+}
+
 if len (sys.argv) < 4:
 	print >>sys.stderr, """
 Usage:
@ -154,9 +167,9 @@ def add_lig_sequence(ligatures, seq, n):


 for (u, filename) in img_pairs:
-	# print "Adding glyph for U+%s" % ",".join (["%04X" % ord (char) for char in u])
 	n = glyph_name (u)
        glyph_names.add(n)
+	# print "Adding glyph for %s" % n

 	g.append (n)
 	for char in u:
@ -180,6 +193,53 @@ for n in EXTRA_SEQUENCES:
        else:
                print 'extras: no glyph for %s' % n

+# Add missing regional indicator sequences and flag aliases
+# if we support any.
+regional_names = frozenset('u%X' % cp for cp in range(0x1F1E6, 0x1F200))
+
+def _is_flag_sequence(t):
+  return len(t) == 2 and t[0] in regional_names and t[1] in regional_names
+
+have_flags = False
+for k in ligatures:
+  if _is_flag_sequence(k):
+    have_flags = True
+    break
+
+if have_flags and UNKNOWN_FLAG_GLYPH_NAME not in glyph_names:
+  raise ValueError(
+      'Have flags but no unknown flag glyph "%s"' % UNKNOWN_FLAG_GLYPH_NAME)
+
+# sigh, too many separate files with the same code.
+# copied from add_emoji_gsub.
+def _reg_indicator(letter):
+  assert 'A' <= letter <= 'Z'
+  return 0x1F1E6 + ord(letter) - ord('A')
+
+def _reg_lig_sequence(flag_name):
+  """Returns a tuple of strings naming the codepoints that form the ligature."""
+  assert len(flag_name) == 2
+  return tuple('u%X' % _reg_indicator(cp) for cp in flag_name)
+
+def _reg_lig_name(flag_name):
+  """Returns a glyph name for the flag name."""
+  return '_'.join(_reg_lig_sequence(flag_name))
+
+if have_flags:
+  print 'Adding flag aliases.'
+  for flag_from, flag_to in FLAG_ALIASES.iteritems():
+    seq = _reg_lig_sequence(flag_from)
+    name = _reg_lig_name(flag_to)
+    add_lig_sequence(ligatures, seq, name)
+
+  print 'Adding unused flag sequences'
+  # every flag sequence we don't have gets the missing flag glyph
+  for first in regional_names:
+    for second in regional_names:
+      seq = (first, second)
+      if seq not in ligatures:
+        add_lig_sequence(ligatures, seq, UNKNOWN_FLAG_GLYPH_NAME)
+

 keyed_ligatures = collections.defaultdict(list)
 for k, v in ligatures.iteritems():
--- a/third_party/region-flags/IC.png
+++ b/third_party/region-flags/IC.png
--- a/third_party/region-flags/svg/IC.svg
+++ b/third_party/region-flags/svg/IC.svg