Merge pull request #46 from dougfelt/emoji_html_fix

Emoji html fix
2016-03-16 16:49:00 -07:00 · 2016-03-16 16:49:00 -07:00 · c6379827aa
parent 8fba2e60fc 2a6be68841
commit c6379827aa
6 changed files with 360 additions and 744 deletions
--- a/10
+++ b/10
@ -43,8 +43,10 @@ RENAMED_FLAGS_DIR := $(BUILD_DIR)/renamed_flags
 QUANTIZED_DIR := $(BUILD_DIR)/quantized_pngs
 COMPRESSED_DIR := $(BUILD_DIR)/compressed_pngs
 # Unknown flag is PUA fe82b
 LIMITED_FLAGS = CN DE ES FR GB IT JP KR RU US
-SELECTED_FLAGS = AD AE AF AG AI AL AM AO AR AS AT AU AW AX AZ \
+SELECTED_FLAGS = AC AD AE AF AG AI AL AM AO AQ AR AS AT AU AW AX AZ \
 	BA BB BD BE BF BG BH BI BJ BM BN BO BR BS BT BW BY BZ \
 	CA CC CD CF CG CH CI CK CL CM CN CO CR CU CV CW CX CY CZ \
 	DE DJ DK DM DO DZ \
@ -52,7 +54,7 @@ SELECTED_FLAGS = AD AE AF AG AI AL AM AO AR AS AT AU AW AX AZ \
 	FI FJ FM FO FR \
 	GA GB GD GE GG GH GI GL GM GN GQ GR GT GU GW GY \
 	HK HN HR HT HU \
-	ID IE IL IM IN IO IQ IR IS IT \
+	IC ID IE IL IM IN IO IQ IR IS IT \
 	JE JM JO JP \
 	KE KG KH KI KM KN KP KR KW KY KZ \
 	LA LB LC LI LK LR LS LT LU LV LY \
@ -62,8 +64,8 @@ SELECTED_FLAGS = AD AE AF AG AI AL AM AO AR AS AT AU AW AX AZ \
 	PA PE PF PG PH PK PL PN PR PS PT PW PY \
 	QA \
 	RO RS RU RW \
-	SA SB SC SD SE SG SI SK SL SM SN SO SR SS ST SV SX SY SZ \
+	SA SB SC SD SE SG SH SI SK SL SM SN SO SR SS ST SV SX SY SZ \
-	TC TD TG TH TJ TK TL TM TN TO TR TT TV TW TZ \
+	TA TC TD TG TH TJ TK TL TM TN TO TR TT TV TW TZ \
 	UA UG US UY UZ \
 	VA VC VE VG VI VN VU \
 	WS \
--- a/generate_emoji_html.py
+++ b/generate_emoji_html.py
@ -0,0 +1,287 @@
 #!/usr/bin/python
 #
 # Copyright 2016 Google Inc. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Build an html page showing emoji images.
 This takes a list of directories containing emoji image files, and
 builds an html page presenting the images along with their composition
 (for sequences) and unicode names (for individual emoji)."""
 import argparse
 import codecs
 import collections
 import glob
 from os import path
 import re
 import sys
 from nototools import unicode_data
 _default_dir = 'png/128'
 _default_ext = 'png'
 _default_prefix = 'emoji_u'
 _default_title = 'Emoji List'
 # DirInfo represents information about a directory of file names.
 # - directory is the directory path
 # - title is the title to use for this directory
 # - filemap is a dict mapping from a tuple of codepoints to the name of
 #   a file in the directory.
 DirInfo = collections.namedtuple('DirInfo', 'directory, title, filemap')
 def _merge_keys(dicts):
  """Return the union of the keys in the list of dicts."""
  keys = []
  for d in dicts:
    keys.extend(d.keys())
  return frozenset(keys)
 def _generate_row_cells(key, dir_infos):
  CELL_PREFIX = '<td>'
  def _cell(key, info):
    if key in info.filemap:
      return '<img src="%s">' % path.join(
          info.directory, info.filemap[key])
    return '-missing-'
  return [CELL_PREFIX + _cell(key, info) for info in dir_infos]
 def _get_desc(key_tuple, dir_infos):
  CELL_PREFIX = '<td class="desc">'
  def _get_filepath(cp):
    cp_key = tuple([cp])
    for info in dir_infos:
      if cp_key in info.filemap:
        return path.join(info.directory, info.filemap[cp_key])
    return None
  def _get_part(cp):
    if cp == 0x200d:  # zwj, common so replace with '+'
      return '+'
    if cp == 0xfe0f:  # emoji variation selector, we ignore it
      return None
    fname = _get_filepath(cp)
    if fname:
      return '<img src="%s">' % fname
    return '%04X' % cp
  if len(key_tuple) == 1:
    desc = 'U+%04X' % key_tuple
  else:
    desc = ' '.join(filter(None, [_get_part(cp) for cp in key_tuple]))
  return CELL_PREFIX + desc
 def _get_name(key_tuple):
  CELL_PREFIX = '<td class="name">'
  if len(key_tuple) != 1:
    name = ''
  else:
    cp = key_tuple[0]
    if cp in unicode_data.proposed_emoji_cps():
      name = '(proposed) ' + unicode_data.proposed_emoji_name(cp)
    else:
      name = unicode_data.name(cp, '(error)')
  return CELL_PREFIX + name
 def _generate_content(dir_infos):
  """Generate an html table for the infos."""
  lines = ['<table>']
  header_row = ['']
  header_row.extend([info.title for info in dir_infos])
  header_row.extend(['Description', 'Name'])
  lines.append('<th>'.join(header_row))
  all_keys = _merge_keys([info.filemap for info in dir_infos])
  for key in sorted(all_keys):
    row = []
    row.extend(_generate_row_cells(key, dir_infos))
    row.append(_get_desc(key, dir_infos))
    row.append(_get_name(key))
    lines.append(''.join(row))
  return '\n  <tr>'.join(lines) + '\n</table>'
 def _get_image_data(image_dir, ext, prefix):
  """Return a map from a tuple of cp sequences to a filename.
  This filters by file extension, and expects the rest of the files
  to match the prefix followed by a sequence of hex codepoints separated
  by underscore.  Files that don't match, duplicate sequences (because
  of casing), and out_of_range or empty codepoints raise an error."""
  fails = []
  result = {}
  expect_re = re.compile(r'%s([0-9A-Fa-f_]+).%s' % (prefix, ext))
  for f in sorted(glob.glob(path.join(image_dir, '*.%s' % ext))):
    filename = path.basename(f)
    m = expect_re.match(filename)
    if not m:
      if filename.startswith('unknown_flag.'):
        continue
      fails.append('"%s" did not match: "%s"' % (expect_re.pattern, filename))
      continue
    seq = m.group(1)
    try:
      cps = tuple(int(s, 16) for s in seq.split('_'))
    except:
      fails.append('bad cp sequence: ' + filename)
      continue
    this_failed = False
    for cp in cps:
      if (cp > 0x10ffff):
        fails.append('cp out of range: ' + filename)
        this_failed = True
        break
    if this_failed:
      continue
    if cps in result:
      fails.append('duplicate sequence: %s and %s' (result[cps], filename))
      continue
    result[cps] = filename
  if fails:
    print >> sys.stderr, 'get_image_data failed (%s, %s, %s):\n  %s' % (
        image_dir, ext, prefix, '\n  '.join(fails))
    raise ValueError('get image data failed')
  return result
 def _get_dir_infos(
    image_dirs, exts=None, prefixes=None, titles=None,
    default_ext=_default_ext, default_prefix=_default_prefix):
  """Return a list of DirInfos for the image_dirs.  When defined,
  exts, prefixes, and titles should be the same length as image_dirs.
  Titles default to using the last segments of the image_dirs,
  exts and prefixes default to the corresponding default values."""
  count = len(image_dirs)
  if not titles:
    titles = [None] * count
  elif len(titles) != count:
      raise ValueError('have %d image dirs but %d titles' % (
          count, len(titles)))
  if not exts:
    exts = [default_ext] * count
  elif len(exts) != count:
    raise ValueError('have %d image dirs but %d extensions' % (
        count, len(exts)))
  if not prefixes:
    prefixes = [default_prefix] * count
  elif len(prefixes) != count:
    raise ValueError('have %d image dirs but %d prefixes' % (
        count, len(prefixes)))
  infos = []
  for i in range(count):
    image_dir = image_dirs[i]
    title = titles[i] or path.basename(path.normpath(image_dir))
    ext = exts[i] or default_ext
    prefix = prefixes[i] or default_prefix
    filemap = _get_image_data(image_dir, ext, prefix)
    infos.append(DirInfo(image_dir, title, filemap))
  return infos
 def _instantiate_template(template, arg_dict):
  id_regex = re.compile('{{([a-zA-Z0-9_]+)}}')
  ids = set(m.group(1) for m in id_regex.finditer(template))
  keyset = set(arg_dict.keys())
  missing_ids = ids - keyset
  extra_args = keyset - ids
  if extra_args:
    print >> sys.stderr, (
        'the following %d args are unused:\n%s' %
        (len(extra_args), ', '.join(sorted(extra_args))))
  text = template
  if missing_ids:
    raise ValueError(
        'the following %d ids in the template have no args:\n%s' %
        (len(missing_ids), ', '.join(sorted(missing_ids))))
  for arg in ids:
    text = re.sub('{{%s}}' % arg, arg_dict[arg], text)
  return text
 TEMPLATE = """<!DOCTYPE html>
 <html lang="en">
  <head>
    <meta charset="utf-8">
    <title>{{title}}</title>
    <style>{{style}}</style>
  </head>
  <body>
  {{content}}
  </body>
 </html>
 """
 STYLE = """
      tbody { background-color: rgb(210, 210, 210) }
      tbody img { width: 64px; height: 64px }
      tbody .desc { font-size: 20pt; font-weight: bold }
      tbody .desc img { vertical-align: middle; width: 32px; height: 32px }
      tbody .name { background-color: white }
 """
 def write_html_page(filename, page_title, dir_infos):
  content = _generate_content(dir_infos)
  text = _instantiate_template(
      TEMPLATE, {'title': page_title, 'style': STYLE, 'content': content})
  with codecs.open(filename, 'w', 'utf-8') as f:
    f.write(text)
 def main():
  parser = argparse.ArgumentParser()
  parser.add_argument(
      'filename', help='path to output file', metavar='filename')
  parser.add_argument(
      '--page_title', help='page title', metavar='title', default='Emoji Table')
  parser.add_argument(
      '-d', '--image_dirs', help='image directories', metavar='dir',
      nargs='+')
  parser.add_argument(
      '-e', '--exts', help='file extension, one per image dir', metavar='ext',
      nargs='*')
  parser.add_argument(
      '-p', '--prefixes', help='file name prefix, one per image dir',
      metavar='prefix', nargs='*')
  parser.add_argument(
      '-t', '--titles', help='title, one per image dir', metavar='title',
      nargs='*'),
  parser.add_argument(
      '-de', '--default_ext', help='default extension', metavar='ext',
      default=_default_ext)
  parser.add_argument(
      '-dp', '--default_prefix', help='default prefix', metavar='prefix',
      default=_default_prefix)
  args = parser.parse_args()
  file_parts = path.splitext(args.filename)
  if file_parts[1] != 'html':
    args.filename = file_parts[0] + '.html'
    print 'added .html extension to filename:\n%s' % args.filename
  dir_infos = _get_dir_infos(
      args.image_dirs, args.exts, args.prefixes, args.titles, args.default_ext,
      args.default_prefix)
  write_html_page(args.filename, args.page_title, dir_infos)
 if __name__ == "__main__":
    main()
--- a/png/128/emoji_ufe82b.png
+++ b/png/128/emoji_ufe82b.png
--- a/third_party/color_emoji/add_glyphs.py
+++ b/third_party/color_emoji/add_glyphs.py
@ -1,10 +1,14 @@
 #!/usr/bin/env python
-import collections, glob, os, sys
+import collections, glob, os, re, sys
 from fontTools import ttx
 from fontTools.ttLib.tables import otTables
 from png import PNG
 # PUA character for unknown flag.  This avoids the legacy emoji pua values, but
 # is in the same area.
 UNKNOWN_FLAG_GLYPH_NAME = "uFE82B"
 sys.path.append(
    os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
 import add_emoji_gsub
@ -78,6 +82,15 @@ EXTRA_SEQUENCES = {
    'u1F48F': '1F469_200D_2764_FE0F_200D_1F48B_200D_1F468', # WHKM
 }
 # Flag aliases - from: to
 FLAG_ALIASES = {
    'BV': 'NO',
    'SJ': 'NO',
    'UM': 'FR',
    'HM': 'AU',
    'UM': 'US',
 }
 if len (sys.argv) < 4:
 	print >>sys.stderr, """
 Usage:
@ -154,9 +167,9 @@ def add_lig_sequence(ligatures, seq, n):
 for (u, filename) in img_pairs:
 	# print "Adding glyph for U+%s" % ",".join (["%04X" % ord (char) for char in u])
 	n = glyph_name (u)
        glyph_names.add(n)
 	# print "Adding glyph for %s" % n
 	g.append (n)
 	for char in u:
@ -180,6 +193,53 @@ for n in EXTRA_SEQUENCES:
        else:
                print 'extras: no glyph for %s' % n
 # Add missing regional indicator sequences and flag aliases
 # if we support any.
 regional_names = frozenset('u%X' % cp for cp in range(0x1F1E6, 0x1F200))
 def _is_flag_sequence(t):
  return len(t) == 2 and t[0] in regional_names and t[1] in regional_names
 have_flags = False
 for k in ligatures:
  if _is_flag_sequence(k):
    have_flags = True
    break
 if have_flags and UNKNOWN_FLAG_GLYPH_NAME not in glyph_names:
  raise ValueError(
      'Have flags but no unknown flag glyph "%s"' % UNKNOWN_FLAG_GLYPH_NAME)
 # sigh, too many separate files with the same code.
 # copied from add_emoji_gsub.
 def _reg_indicator(letter):
  assert 'A' <= letter <= 'Z'
  return 0x1F1E6 + ord(letter) - ord('A')
 def _reg_lig_sequence(flag_name):
  """Returns a tuple of strings naming the codepoints that form the ligature."""
  assert len(flag_name) == 2
  return tuple('u%X' % _reg_indicator(cp) for cp in flag_name)
 def _reg_lig_name(flag_name):
  """Returns a glyph name for the flag name."""
  return '_'.join(_reg_lig_sequence(flag_name))
 if have_flags:
  print 'Adding flag aliases.'
  for flag_from, flag_to in FLAG_ALIASES.iteritems():
    seq = _reg_lig_sequence(flag_from)
    name = _reg_lig_name(flag_to)
    add_lig_sequence(ligatures, seq, name)
  print 'Adding unused flag sequences'
  # every flag sequence we don't have gets the missing flag glyph
  for first in regional_names:
    for second in regional_names:
      seq = (first, second)
      if seq not in ligatures:
        add_lig_sequence(ligatures, seq, UNKNOWN_FLAG_GLYPH_NAME)
 keyed_ligatures = collections.defaultdict(list)
 for k, v in ligatures.iteritems():
--- a/third_party/region-flags/IC.png
+++ b/third_party/region-flags/IC.png
--- a/third_party/region-flags/svg/IC.svg
+++ b/third_party/region-flags/svg/IC.svg