Merge branch 'master' into subregion_flags

2017-01-25 17:01:41 -08:00 · 2017-01-25 17:01:41 -08:00 · 042a05f19a
parent 57ec57dfd6 e17f85ef19
commit 042a05f19a
5 changed files with 620 additions and 8 deletions
--- a/add_aliases.py
+++ b/add_aliases.py
@ -0,0 +1,133 @@
+#!/usr/bin/env python
+#
+# Copyright 2017 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import glob
+import os
+from os import path
+import sys
+
+"""Create aliases in target directory.
+
+The target files should not contain the emoji variation selector
+codepoint in their names."""
+
+DATA_ROOT = path.dirname(path.abspath(__file__))
+
+def str_to_seq(seq_str):
+  return tuple([int(s, 16) for s in seq_str.split('_')])
+
+
+def seq_to_str(seq):
+  return '_'.join('%04x' % cp for cp in seq)
+
+
+def read_emoji_aliases():
+  result = {}
+
+  with open(path.join(DATA_ROOT, 'emoji_aliases.txt'), 'r') as f:
+    for line in f:
+      ix = line.find('#')
+      if (ix > -1):
+        line = line[:ix]
+      line = line.strip()
+      if not line:
+        continue
+      als, trg = (s.strip() for s in line.split(';'))
+      als_seq = tuple([int(x, 16) for x in als.split('_')])
+      try:
+        trg_seq = tuple([int(x, 16) for x in trg.split('_')])
+      except:
+        print 'cannot process alias %s -> %s' % (als, trg)
+        continue
+      result[als_seq] = trg_seq
+  return result
+
+
+def add_aliases(filedir, prefix, ext, replace=False, dry_run=False):
+  if not path.isdir(filedir):
+    print >> sys.stderr, '%s is not a directory' % filedir
+    return
+
+  prefix_len = len(prefix)
+  suffix_len = len(ext) + 1
+  filenames = [path.basename(f)
+               for f in glob.glob(path.join(filedir, '%s*.%s' % (prefix, ext)))]
+  seq_to_file = {
+      str_to_seq(name[prefix_len:-suffix_len]) : name
+      for name in filenames}
+
+  aliases = read_emoji_aliases()
+  aliases_to_create = {}
+  aliases_to_replace = []
+  for als,trg in sorted(aliases.items()):
+    if trg not in seq_to_file:
+      print >> sys.stderr, 'target %s for %s does not exist' % (
+          seq_to_str(trg), seq_to_str(als))
+      continue
+    if als in seq_to_file:
+      if replace:
+        aliases_to_replace.append(seq_to_file[als])
+      else:
+        print >> sys.stderr, 'alias %s exists' % seq_to_str(als)
+        continue
+    target_file = seq_to_file[trg]
+    alias_name = '%s%s.%s' % (prefix, seq_to_str(als), ext)
+    aliases_to_create[alias_name] = target_file
+
+  if replace:
+    if not dry_run:
+      for k in sorted(aliases_to_replace):
+        os.remove(path.join(filedir, k))
+    print 'replacing %d files' % len(aliases_to_replace)
+
+  for k, v in sorted(aliases_to_create.items()):
+    if dry_run:
+      msg = 'replace ' if k in aliases_to_replace else ''
+      print '%s%s -> %s' % (msg, k, v)
+    else:
+      try:
+        os.symlink(v, path.join(filedir, k))
+      except:
+        print >> sys.stderr, 'failed to create %s -> %s' % (k, v)
+        raise Exception('oops')
+  print 'created %d symlinks' % len(aliases_to_create)
+
+
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      '-d', '--filedir', help='directory containing files to alias',
+      required=True, metavar='dir')
+  parser.add_argument(
+      '-p', '--prefix', help='file name prefix (default emoji_u)',
+      metavar='pfx', default='emoji_u')
+  parser.add_argument(
+      '-e', '--ext', help='file name extension (default png)',
+      choices=['ai', 'png', 'sgv'], default='png')
+  parser.add_argument(
+      '-r', '--replace', help='replace existing files/aliases',
+      action='store_true')
+  parser.add_argument(
+      '-n', '--dry_run', help='print out aliases to create only',
+      action='store_true')
+  args = parser.parse_args()
+
+  add_aliases(args.filedir, args.prefix, args.ext, args.replace, args.dry_run)
+
+
+if __name__ == '__main__':
+  main()
--- a/check_emoji_sequences.py
+++ b/check_emoji_sequences.py
@ -26,6 +26,11 @@ import sys

 from nototools import unicode_data

+DATA_ROOT = path.dirname(path.abspath(__file__))
+
+ZWJ = 0x200d
+EMOJI_VS = 0xfe0f
+
 def _is_regional_indicator(cp):
  return 0x1f1e6 <= cp <= 0x1f1ff

@ -37,6 +42,40 @@ def _is_skintone_modifier(cp):
 def _seq_string(seq):
  return '_'.join('%04x' % cp for cp in seq)

+def strip_vs(seq):
+  return tuple(cp for cp in seq if cp != EMOJI_VS)
+
+_namedata = None
+
+def seq_name(seq):
+  global _namedata
+
+  if not _namedata:
+    def strip_vs_map(seq_map):
+      return {
+          strip_vs(k): v
+          for k, v in seq_map.iteritems()}
+    _namedata = [
+        strip_vs_map(unicode_data.get_emoji_combining_sequences()),
+        strip_vs_map(unicode_data.get_emoji_flag_sequences()),
+        strip_vs_map(unicode_data.get_emoji_modifier_sequences()),
+        strip_vs_map(unicode_data.get_emoji_zwj_sequences()),
+        ]
+
+  if len(seq) == 1:
+    return unicode_data.name(seq[0], None)
+
+  for data in _namedata:
+    if seq in data:
+      return data[seq]
+  if EMOJI_VS in seq:
+    non_vs_seq = strip_vs(seq)
+    for data in _namedata:
+      if non_vs_seq in data:
+        return data[non_vs_seq]
+
+  return None
+

 def _check_valid_emoji(sorted_seq_to_filepath):
  """Ensure all emoji are either valid emoji or specific chars."""
@ -128,11 +167,143 @@ def _check_skintone(sorted_seq_to_filepath):
          base_to_modifiers[cp] = set()
  for cp, modifiers in sorted(base_to_modifiers.iteritems()):
    if len(modifiers) != 5:
-      print 'emoji base %04x has %d modifiers defined (%s) in %s' % (
+      print >> sys.stderr, 'emoji base %04x has %d modifiers defined (%s) in %s' % (
          cp, len(modifiers),
          ', '.join('%04x' % cp for cp in sorted(modifiers)), fp)


+def _check_zwj_sequences(seq_to_filepath):
+  """Verify that zwj sequences are valid."""
+  zwj_sequence_to_name = unicode_data.get_emoji_zwj_sequences()
+  # strip emoji variant selectors and add extra mappings
+  zwj_sequence_without_vs_to_name_canonical = {}
+  for seq, seq_name in zwj_sequence_to_name.iteritems():
+    if EMOJI_VS in seq:
+      stripped_seq = strip_vs(seq)
+      zwj_sequence_without_vs_to_name_canonical[stripped_seq] = (seq_name, seq)
+
+  zwj_seq_to_filepath = {
+      seq: fp for seq, fp in seq_to_filepath.iteritems()
+      if ZWJ in seq}
+
+  for seq, fp in zwj_seq_to_filepath.iteritems():
+    if seq not in zwj_sequence_to_name:
+      if seq not in zwj_sequence_without_vs_to_name_canonical:
+        print >> sys.stderr, 'zwj sequence not defined: %s' % fp
+      else:
+        _, can = zwj_sequence_without_vs_to_name_canonical[seq]
+        # print >> sys.stderr, 'canonical sequence %s contains vs: %s' % (
+        #     _seq_string(can), fp)
+
+def read_emoji_aliases():
+  result = {}
+
+  with open(path.join(DATA_ROOT, 'emoji_aliases.txt'), 'r') as f:
+    for line in f:
+      ix = line.find('#')
+      if (ix > -1):
+        line = line[:ix]
+      line = line.strip()
+      if not line:
+        continue
+      als, trg = (s.strip() for s in line.split(';'))
+      als_seq = tuple([int(x, 16) for x in als.split('_')])
+      try:
+        trg_seq = tuple([int(x, 16) for x in trg.split('_')])
+      except:
+        print 'cannot process alias %s -> %s' % (als, trg)
+        continue
+      result[als_seq] = trg_seq
+  return result
+
+
+def _check_coverage(seq_to_filepath):
+  age = 9.0
+
+  non_vs_to_canonical = {}
+  for k in seq_to_filepath:
+    if EMOJI_VS in k:
+      non_vs = strip_vs(k)
+      non_vs_to_canonical[non_vs] = k
+
+  aliases = read_emoji_aliases()
+  for k, v in sorted(aliases.items()):
+    if v not in seq_to_filepath and v not in non_vs_to_canonical:
+      print 'alias %s missing target %s' % (_seq_string(k), _seq_string(v))
+      continue
+    if k in seq_to_filepath or k in non_vs_to_canonical:
+      print 'alias %s already exists as %s (%s)' % (
+          _seq_string(k), _seq_string(v), seq_name(v))
+      continue
+    filename = seq_to_filepath.get(v) or seq_to_filepath[non_vs_to_canonical[v]]
+    seq_to_filepath[k] = 'alias:' + filename
+
+  # check single emoji, this includes most of the special chars
+  emoji = sorted(unicode_data.get_emoji(age=age))
+  for cp in emoji:
+    if tuple([cp]) not in seq_to_filepath:
+      print 'missing single %04x (%s)' % (cp, unicode_data.name(cp, '<no name>'))
+
+  # special characters
+  # all but combining enclosing keycap are currently marked as emoji
+  for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a):
+    if cp not in emoji and tuple([cp]) not in seq_to_filepath:
+      print 'missing special %04x (%s)' % (cp, unicode_data.name(cp))
+
+  # combining sequences
+  comb_seq_to_name = sorted(
+      unicode_data.get_emoji_combining_sequences(age=age).iteritems())
+  for seq, name in comb_seq_to_name:
+    if seq not in seq_to_filepath:
+      # strip vs and try again
+      non_vs_seq = strip_vs(seq)
+      if non_vs_seq not in seq_to_filepath:
+        print 'missing combining sequence %s (%s)' % (_seq_string(seq), name)
+
+  # flag sequences
+  flag_seq_to_name = sorted(
+      unicode_data.get_emoji_flag_sequences(age=age).iteritems())
+  for seq, name in flag_seq_to_name:
+    if seq not in seq_to_filepath:
+      print 'missing flag sequence %s (%s)' % (_seq_string(seq), name)
+
+  # skin tone modifier sequences
+  mod_seq_to_name = sorted(
+      unicode_data.get_emoji_modifier_sequences(age=age).iteritems())
+  for seq, name in mod_seq_to_name:
+    if seq not in seq_to_filepath:
+      print 'missing modifier sequence %s (%s)' % (
+          _seq_string(seq), name)
+
+  # zwj sequences
+  # some of ours include the emoji presentation variation selector and some
+  # don't, and the same is true for the canonical sequences.  normalize all
+  # of them to omit it to test coverage, but report the canonical sequence.
+  zwj_seq_without_vs = set()
+  for seq in seq_to_filepath:
+    if ZWJ not in seq:
+      continue
+    if EMOJI_VS in seq:
+      seq = tuple(cp for cp in seq if cp != EMOJI_VS)
+    zwj_seq_without_vs.add(seq)
+
+  for seq, name in sorted(
+      unicode_data.get_emoji_zwj_sequences(age=age).iteritems()):
+    if EMOJI_VS in seq:
+      test_seq = tuple(s for s in seq if s != EMOJI_VS)
+    else:
+      test_seq = seq
+    if test_seq not in zwj_seq_without_vs:
+      print 'missing (canonical) zwj sequence %s (%s)' % (
+          _seq_string(seq), name)
+
+  # check for 'unknown flag'
+  # this is either emoji_ufe82b or 'unknown_flag', we filter out things that
+  # don't start with our prefix so 'unknown_flag' would be excluded by default.
+  if tuple([0xfe82b]) not in seq_to_filepath:
+    print 'missing unknown flag PUA fe82b'
+
+
 def check_sequence_to_filepath(seq_to_filepath):
  sorted_seq_to_filepath = collections.OrderedDict(
      sorted(seq_to_filepath.items()))
@ -140,7 +311,8 @@ def check_sequence_to_filepath(seq_to_filepath):
  _check_zwj(sorted_seq_to_filepath)
  _check_flags(sorted_seq_to_filepath)
  _check_skintone(sorted_seq_to_filepath)
-
+  _check_zwj_sequences(sorted_seq_to_filepath)
+  _check_coverage(sorted_seq_to_filepath)

 def create_sequence_to_filepath(name_to_dirpath, prefix, suffix):
  """Check names, and convert name to sequences for names that are ok,
--- a/emoji_aliases.txt
+++ b/emoji_aliases.txt
@ -0,0 +1,194 @@
+# alias table
+# 'fe0f' is not in these sequences
+1f3c3;1f3c3_200d_2642 # RUNNER -> man running
+1f3c3_1f3fb;1f3c3_1f3fb_200d_2642 # light skin tone
+1f3c3_1f3fc;1f3c3_1f3fc_200d_2642 # medium-light skin tone
+1f3c3_1f3fd;1f3c3_1f3fd_200d_2642 # medium skin tone
+1f3c3_1f3fe;1f3c3_1f3fe_200d_2642 # medium-dark skin tone
+1f3c3_1f3ff;1f3c3_1f3ff_200d_2642 # dark skin tone
+1f3c4;1f3c4_200d_2642 # SURFER -> man surfing
+1f3c4_1f3fb;1f3c4_1f3fb_200d_2642 # light skin tone
+1f3c4_1f3fc;1f3c4_1f3fc_200d_2642 # medium-light skin tone
+1f3c4_1f3fd;1f3c4_1f3fd_200d_2642 # medium skin tone
+1f3c4_1f3fe;1f3c4_1f3fe_200d_2642 # medium-dark skin tone
+1f3c4_1f3ff;1f3c4_1f3ff_200d_2642 # dark skin tone
+1f3ca;1f3ca_200d_2642 # SWIMMER -> man swimming
+1f3ca_1f3fb;1f3ca_1f3fb_200d_2642 # light skin tone
+1f3ca_1f3fc;1f3ca_1f3fc_200d_2642 # medium-light skin tone
+1f3ca_1f3fd;1f3ca_1f3fd_200d_2642 # medium skin tone
+1f3ca_1f3fe;1f3ca_1f3fe_200d_2642 # medium-dark skin tone
+1f3ca_1f3ff;1f3ca_1f3ff_200d_2642 # dark skin tone
+1f3cb;1f3cb_200d_2642 # WEIGHT LIFTER -> man lifting weights
+1f3cb_1f3fb;1f3cb_1f3fb_200d_2642 # light skin tone
+1f3cb_1f3fc;1f3cb_1f3fc_200d_2642 # medium-light skin tone
+1f3cb_1f3fd;1f3cb_1f3fd_200d_2642 # medium skin tone
+1f3cb_1f3fe;1f3cb_1f3fe_200d_2642 # medium-dark skin tone
+1f3cb_1f3ff;1f3cb_1f3ff_200d_2642 # dark skin tone
+1f3cc;1f3cc_200d_2642 # GOLFER -> man golfing
+1f3cc_1f3fb;1f3cc_1f3fb_200d_2642 # light skin tone
+1f3cc_1f3fc;1f3cc_1f3fc_200d_2642 # medium-light skin tone
+1f3cc_1f3fd;1f3cc_1f3fd_200d_2642 # medium skin tone
+1f3cc_1f3fe;1f3cc_1f3fe_200d_2642 # medium-dark skin tone
+1f3cc_1f3ff;1f3cc_1f3ff_200d_2642 # dark skin tone
+1f46a;1f468_200d_1f469_200d_1f466 # FAMILY -> family: man, woman, boy
+1f46e;1f46e_200d_2642 # POLICE OFFICER -> man police officer
+1f46e_1f3fb;1f46e_1f3fb_200d_2642 # light skin tone
+1f46e_1f3fc;1f46e_1f3fc_200d_2642 # medium-light skin tone
+1f46e_1f3fd;1f46e_1f3fd_200d_2642 # medium skin tone
+1f46e_1f3fe;1f46e_1f3fe_200d_2642 # medium-dark skin tone
+1f46e_1f3ff;1f46e_1f3ff_200d_2642 # dark skin tone
+1f46f;1f46f_200d_2640 # WOMAN WITH BUNNY EARS -> women with bunny ears partying
+1f471;1f471_200d_2642 # PERSON WITH BLOND HAIR -> blond-haired man
+1f471_1f3fb;1f471_1f3fb_200d_2642 # light skin tone
+1f471_1f3fc;1f471_1f3fc_200d_2642 # medium-light skin tone
+1f471_1f3fd;1f471_1f3fd_200d_2642 # medium skin tone
+1f471_1f3fe;1f471_1f3fe_200d_2642 # medium-dark skin tone
+1f471_1f3ff;1f471_1f3ff_200d_2642 # dark skin tone
+1f473;1f473_200d_2642 # MAN WITH TURBAN -> man wearing turban
+1f473_1f3fb;1f473_1f3fb_200d_2642 # light skin tone
+1f473_1f3fc;1f473_1f3fc_200d_2642 # medium-light skin tone
+1f473_1f3fd;1f473_1f3fd_200d_2642 # medium skin tone
+1f473_1f3fe;1f473_1f3fe_200d_2642 # medium-dark skin tone
+1f473_1f3ff;1f473_1f3ff_200d_2642 # dark skin tone
+1f477;1f477_200d_2642 # CONSTRUCTION WORKER -> man construction worker
+1f477_1f3fb;1f477_1f3fb_200d_2642 # light skin tone
+1f477_1f3fc;1f477_1f3fc_200d_2642 # medium-light skin tone
+1f477_1f3fd;1f477_1f3fd_200d_2642 # medium skin tone
+1f477_1f3fe;1f477_1f3fe_200d_2642 # medium-dark skin tone
+1f477_1f3ff;1f477_1f3ff_200d_2642 # dark skin tone
+1f481;1f481_200d_2640 # INFORMATION DESK PERSON -> woman tipping hand
+1f481_1f3fb;1f481_1f3fb_200d_2640 # light skin tone
+1f481_1f3fc;1f481_1f3fc_200d_2640 # medium-light skin tone
+1f481_1f3fd;1f481_1f3fd_200d_2640 # medium skin tone
+1f481_1f3fe;1f481_1f3fe_200d_2640 # medium-dark skin tone
+1f481_1f3ff;1f481_1f3ff_200d_2640 # dark skin tone
+1f482;1f482_200d_2642 # GUARDSMAN -> man guard
+1f482_1f3fb;1f482_1f3fb_200d_2642 # light skin tone
+1f482_1f3fc;1f482_1f3fc_200d_2642 # medium-light skin tone
+1f482_1f3fd;1f482_1f3fd_200d_2642 # medium skin tone
+1f482_1f3fe;1f482_1f3fe_200d_2642 # medium-dark skin tone
+1f482_1f3ff;1f482_1f3ff_200d_2642 # dark skin tone
+1f486;1f486_200d_2640 # FACE MASSAGE -> woman getting massage
+1f486_1f3fb;1f486_1f3fb_200d_2640 # light skin tone
+1f486_1f3fc;1f486_1f3fc_200d_2640 # medium-light skin tone
+1f486_1f3fd;1f486_1f3fd_200d_2640 # medium skin tone
+1f486_1f3fe;1f486_1f3fe_200d_2640 # medium-dark skin tone
+1f486_1f3ff;1f486_1f3ff_200d_2640 # dark skin tone
+1f487;1f487_200d_2640 # HAIRCUT -> woman getting haircut
+1f487_1f3fb;1f487_1f3fb_200d_2640 # light skin tone
+1f487_1f3fc;1f487_1f3fc_200d_2640 # medium-light skin tone
+1f487_1f3fd;1f487_1f3fd_200d_2640 # medium skin tone
+1f487_1f3fe;1f487_1f3fe_200d_2640 # medium-dark skin tone
+1f487_1f3ff;1f487_1f3ff_200d_2640 # dark skin tone
+1f48f;1f469_200d_2764_200d_1f48b_200d_1f468 # KISS -> kiss: woman, man
+1f491;1f469_200d_2764_200d_1f468 # COUPLE WITH HEART -> couple with heart: woman, man
+1f575;1f575_200d_2642 # SLEUTH OR SPY -> man detective
+1f575_1f3fb;1f575_1f3fb_200d_2642 # light skin tone
+1f575_1f3fc;1f575_1f3fc_200d_2642 # medium-light skin tone
+1f575_1f3fd;1f575_1f3fd_200d_2642 # medium skin tone
+1f575_1f3fe;1f575_1f3fe_200d_2642 # medium-dark skin tone
+1f575_1f3ff;1f575_1f3ff_200d_2642 # dark skin tone
+1f645;1f645_200d_2640 # FACE WITH NO GOOD GESTURE -> woman gesturing NO
+1f645_1f3fb;1f645_1f3fb_200d_2640 # light skin tone
+1f645_1f3fc;1f645_1f3fc_200d_2640 # medium-light skin tone
+1f645_1f3fd;1f645_1f3fd_200d_2640 # medium skin tone
+1f645_1f3fe;1f645_1f3fe_200d_2640 # medium-dark skin tone
+1f645_1f3ff;1f645_1f3ff_200d_2640 # dark skin tone
+1f646;1f646_200d_2640 # FACE WITH OK GESTURE -> woman gesturing OK
+1f646_1f3fb;1f646_1f3fb_200d_2640 # light skin tone
+1f646_1f3fc;1f646_1f3fc_200d_2640 # medium-light skin tone
+1f646_1f3fd;1f646_1f3fd_200d_2640 # medium skin tone
+1f646_1f3fe;1f646_1f3fe_200d_2640 # medium-dark skin tone
+1f646_1f3ff;1f646_1f3ff_200d_2640 # dark skin tone
+1f647;1f647_200d_2642 # PERSON BOWING DEEPLY -> man bowing
+1f647_1f3fb;1f647_1f3fb_200d_2642 # light skin tone
+1f647_1f3fc;1f647_1f3fc_200d_2642 # medium-light skin tone
+1f647_1f3fd;1f647_1f3fd_200d_2642 # medium skin tone
+1f647_1f3fe;1f647_1f3fe_200d_2642 # medium-dark skin tone
+1f647_1f3ff;1f647_1f3ff_200d_2642 # dark skin tone
+1f64b;1f64b_200d_2640 # HAPPY PERSON RAISING ONE HAND -> woman raising hand
+1f64b_1f3fb;1f64b_1f3fb_200d_2640 # light skin tone
+1f64b_1f3fc;1f64b_1f3fc_200d_2640 # medium-light skin tone
+1f64b_1f3fd;1f64b_1f3fd_200d_2640 # medium skin tone
+1f64b_1f3fe;1f64b_1f3fe_200d_2640 # medium-dark skin tone
+1f64b_1f3ff;1f64b_1f3ff_200d_2640 # dark skin tone
+1f64d;1f64d_200d_2640 # PERSON FROWNING -> woman frowning
+1f64d_1f3fb;1f64d_1f3fb_200d_2640 # light skin tone
+1f64d_1f3fc;1f64d_1f3fc_200d_2640 # medium-light skin tone
+1f64d_1f3fd;1f64d_1f3fd_200d_2640 # medium skin tone
+1f64d_1f3fe;1f64d_1f3fe_200d_2640 # medium-dark skin tone
+1f64d_1f3ff;1f64d_1f3ff_200d_2640 # dark skin tone
+1f64e;1f64e_200d_2640 # PERSON WITH POUTING FACE -> woman pouting
+1f64e_1f3fb;1f64e_1f3fb_200d_2640 # light skin tone
+1f64e_1f3fc;1f64e_1f3fc_200d_2640 # medium-light skin tone
+1f64e_1f3fd;1f64e_1f3fd_200d_2640 # medium skin tone
+1f64e_1f3fe;1f64e_1f3fe_200d_2640 # medium-dark skin tone
+1f64e_1f3ff;1f64e_1f3ff_200d_2640 # dark skin tone
+1f6a3;1f6a3_200d_2642 # ROWBOAT -> man rowing boat
+1f6a3_1f3fb;1f6a3_1f3fb_200d_2642 # light skin tone
+1f6a3_1f3fc;1f6a3_1f3fc_200d_2642 # medium-light skin tone
+1f6a3_1f3fd;1f6a3_1f3fd_200d_2642 # medium skin tone
+1f6a3_1f3fe;1f6a3_1f3fe_200d_2642 # medium-dark skin tone
+1f6a3_1f3ff;1f6a3_1f3ff_200d_2642 # dark skin tone
+1f6b4;1f6b4_200d_2642 # BICYCLIST -> man biking
+1f6b4_1f3fb;1f6b4_1f3fb_200d_2642 # light skin tone
+1f6b4_1f3fc;1f6b4_1f3fc_200d_2642 # medium-light skin tone
+1f6b4_1f3fd;1f6b4_1f3fd_200d_2642 # medium skin tone
+1f6b4_1f3fe;1f6b4_1f3fe_200d_2642 # medium-dark skin tone
+1f6b4_1f3ff;1f6b4_1f3ff_200d_2642 # dark skin tone
+1f6b5;1f6b5_200d_2642 # MOUNTAIN BICYCLIST -> man mountain biking
+1f6b5_1f3fb;1f6b5_1f3fb_200d_2642 # light skin tone
+1f6b5_1f3fc;1f6b5_1f3fc_200d_2642 # medium-light skin tone
+1f6b5_1f3fd;1f6b5_1f3fd_200d_2642 # medium skin tone
+1f6b5_1f3fe;1f6b5_1f3fe_200d_2642 # medium-dark skin tone
+1f6b5_1f3ff;1f6b5_1f3ff_200d_2642 # dark skin tone
+1f6b6;1f6b6_200d_2642 # PEDESTRIAN -> man walking
+1f6b6_1f3fb;1f6b6_1f3fb_200d_2642 # light skin tone
+1f6b6_1f3fc;1f6b6_1f3fc_200d_2642 # medium-light skin tone
+1f6b6_1f3fd;1f6b6_1f3fd_200d_2642 # medium skin tone
+1f6b6_1f3fe;1f6b6_1f3fe_200d_2642 # medium-dark skin tone
+1f6b6_1f3ff;1f6b6_1f3ff_200d_2642 # dark skin tone
+1f926;1f926_200d_2640 # FACE PALM -> woman facepalming
+1f926_1f3fb;1f926_1f3fb_200d_2640 # light skin tone
+1f926_1f3fc;1f926_1f3fc_200d_2640 # medium-light skin tone
+1f926_1f3fd;1f926_1f3fd_200d_2640 # medium skin tone
+1f926_1f3fe;1f926_1f3fe_200d_2640 # medium-dark skin tone
+1f926_1f3ff;1f926_1f3ff_200d_2640 # dark skin tone
+1f937;1f937_200d_2640 # SHRUG -> woman shrugging
+1f937_1f3fb;1f937_1f3fb_200d_2640 # light skin tone
+1f937_1f3fc;1f937_1f3fc_200d_2640 # medium-light skin tone
+1f937_1f3fd;1f937_1f3fd_200d_2640 # medium skin tone
+1f937_1f3fe;1f937_1f3fe_200d_2640 # medium-dark skin tone
+1f937_1f3ff;1f937_1f3ff_200d_2640 # dark skin tone
+1f938;1f938_200d_2642 # PERSON DOING CARTWHEEL -> man cartwheeling
+1f938_1f3fb;1f938_1f3fb_200d_2642 # light skin tone
+1f938_1f3fc;1f938_1f3fc_200d_2642 # medium-light skin tone
+1f938_1f3fd;1f938_1f3fd_200d_2642 # medium skin tone
+1f938_1f3fe;1f938_1f3fe_200d_2642 # medium-dark skin tone
+1f938_1f3ff;1f938_1f3ff_200d_2642 # dark skin tone
+1f939;1f939_200d_2642 # JUGGLING -> man juggling
+1f939_1f3fb;1f939_1f3fb_200d_2642 # light skin tone
+1f939_1f3fc;1f939_1f3fc_200d_2642 # medium-light skin tone
+1f939_1f3fd;1f939_1f3fd_200d_2642 # medium skin tone
+1f939_1f3fe;1f939_1f3fe_200d_2642 # medium-dark skin tone
+1f939_1f3ff;1f939_1f3ff_200d_2642 # dark skin tone
+1f93c;1f93c_200d_2642 # WRESTLERS -> men wrestling
+1f93d;1f93d_200d_2642 # WATER POLO -> man playing water polo
+1f93d_1f3fb;1f93d_1f3fb_200d_2642 # light skin tone
+1f93d_1f3fc;1f93d_1f3fc_200d_2642 # medium-light skin tone
+1f93d_1f3fd;1f93d_1f3fd_200d_2642 # medium skin tone
+1f93d_1f3fe;1f93d_1f3fe_200d_2642 # medium-dark skin tone
+1f93d_1f3ff;1f93d_1f3ff_200d_2642 # dark skin tone
+1f93e;1f93e_200d_2642 # HANDBALL -> man playing handball
+1f93e_1f3fb;1f93e_1f3fb_200d_2642 # light skin tone
+1f93e_1f3fc;1f93e_1f3fc_200d_2642 # medium-light skin tone
+1f93e_1f3fd;1f93e_1f3fd_200d_2642 # medium skin tone
+1f93e_1f3fe;1f93e_1f3fe_200d_2642 # medium-dark skin tone
+1f93e_1f3ff;1f93e_1f3ff_200d_2642 # dark skin tone
+26f9;26f9_200d_2642 # PERSON WITH BALL -> man bouncing ball
+26f9_1f3fb;26f9_1f3fb_200d_2642 # light skin tone
+26f9_1f3fc;26f9_1f3fc_200d_2642 # medium-light skin tone
+26f9_1f3fd;26f9_1f3fd_200d_2642 # medium skin tone
+26f9_1f3fe;26f9_1f3fe_200d_2642 # medium-dark skin tone
+26f9_1f3ff;26f9_1f3ff_200d_2642 # dark skin tone
+fe82b;unknown_flag # no name -> no name
--- a/generate_emoji_html.py
+++ b/generate_emoji_html.py
@ -51,7 +51,7 @@ def _merge_keys(dicts):
    keys.extend(d.keys())
  return frozenset(keys)

-def _generate_row_cells(key, font, dir_infos, basepaths):
+def _generate_row_cells(key, font, dir_infos, basepaths, colors):
  CELL_PREFIX = '<td>'
  indices = range(len(basepaths))
  def _cell(key, info, basepath):
@ -78,6 +78,10 @@ def _generate_row_cells(key, font, dir_infos, basepaths):
  row_cells.extend(
      [CELL_PREFIX + _cell(key, dir_infos[i], basepaths[i])
       for i in indices])
+  if len(colors) > 1:
+    ix = indices[-1]
+    extension = CELL_PREFIX + _cell(key, dir_infos[ix], basepaths[ix])
+    row_cells.extend([extension] * (len(colors) - 1))
  return row_cells


@ -153,7 +157,8 @@ def _collect_aux_info(dir_infos, all_keys):
  return aux_info


-def _generate_content(basedir, font, dir_infos, limit, annotate, standalone):
+def _generate_content(
+    basedir, font, dir_infos, limit, annotate, standalone, colors):
  """Generate an html table for the infos.  basedir is the parent directory
  of the content, filenames will be made relative to this if underneath it,
  else absolute. If limit is true and there are multiple dirs, limit the set of
@ -214,12 +219,14 @@ def _generate_content(basedir, font, dir_infos, limit, annotate, standalone):
  if font:
    header_row.extend(['Emoji ltr', 'Emoji rtl'])
  header_row.extend([info.title for info in dir_infos])
+  if len(colors) > 1:
+    header_row.extend([dir_infos[-1].title] * (len(colors) - 1))
  header_row.extend(['Description', 'Name'])
  lines.append('<th>'.join(header_row))

  for key in sorted(all_keys):
    row = []
-    row.extend(_generate_row_cells(key, font, dir_infos, basepaths))
+    row.extend(_generate_row_cells(key, font, dir_infos, basepaths, colors))
    row.append(_get_desc(key, dir_infos, basepaths))
    row.append(_get_name(key, annotate))
    lines.append(''.join(row))
@ -368,9 +375,11 @@ STYLE = """
 """

 def write_html_page(
-    filename, page_title, font, dir_infos, limit, annotate, standalone):
+    filename, page_title, font, dir_infos, limit, annotate, standalone,
+    colors):
  content = _generate_content(
-      path.dirname(filename), font, dir_infos, limit, annotate, standalone)
+      path.dirname(filename), font, dir_infos, limit, annotate, standalone,
+      colors)
  N_STYLE = STYLE
  if font:
    FONT_FACE_STYLE = """
@ -380,6 +389,13 @@ def write_html_page(
    N_STYLE += '      span.efont { font-family: "Emoji"; font-size:32pt }\n'
  else:
    FONT_FACE_STYLE = ''
+  num_final_cols = len(colors)
+  col_colors = ['']
+  for i, color in enumerate(colors):
+    col_colors.append(
+        """td:nth-last-of-type(%d) { background-color: #%s }\n""" % (
+            2 + num_final_cols - i, color))
+  N_STYLE += '       '.join(col_colors)
  text = _instantiate_template(
      TEMPLATE, {
          'title': page_title, 'fontFaceStyle': FONT_FACE_STYLE,
@ -424,6 +440,9 @@ def main():
  parser.add_argument(
      '-s', '--standalone', help='copy resources used by html under target dir',
      action='store_true')
+  parser.add_argument(
+      '-c', '--colors', help='list of colors for background', nargs='*',
+      metavar='hex')

  args = parser.parse_args()
  file_parts = path.splitext(args.outfile)
@ -436,13 +455,20 @@ def main():
  else:
    annotations = None

+  if args.colors == None:
+    args.colors = ['6e6e6e']
+  elif not args.colors:
+    args.colors = """eceff1 f5f5f5 e4e7e9 d9dbdd 080808 263238 21272b 3c474c
+    4db6ac 80cbc4 5e35b1""".split()
+
+
  dir_infos = _get_dir_infos(
      args.image_dirs, args.exts, args.prefixes, args.titles,
      args.default_ext, args.default_prefix)

  write_html_page(
      args.outfile, args.page_title, args.font, dir_infos, args.limit,
-      annotations, args.standalone)
+      annotations, args.standalone, args.colors)


 if __name__ == "__main__":
--- a/strip_vs_from_filenames.py
+++ b/strip_vs_from_filenames.py
@ -0,0 +1,87 @@
+#!/usr/bin/env python
+#
+# Copyright 2017 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import glob
+import os
+from os import path
+import sys
+
+"""Rename image files based on codepoints to remove the emoji variation
+selector from the name.  For our emoji image data, this codepoint is not
+relevant."""
+
+EMOJI_VS = 0xfe0f
+
+
+def str_to_seq(seq_str):
+  return tuple([int(s, 16) for s in seq_str.split('_')])
+
+
+def seq_to_str(seq):
+  return '_'.join('%04x' % cp for cp in seq)
+
+
+def strip_vs(seq):
+  return tuple([cp for cp in seq if cp != EMOJI_VS])
+
+
+def strip_vs_from_filenames(imagedir, prefix, ext, dry_run=False):
+  prefix_len = len(prefix)
+  suffix_len = len(ext) + 1
+  names = [path.basename(f)
+           for f in glob.glob(
+               path.join(imagedir, '%s*.%s' % (prefix, ext)))]
+  renames = {}
+  for name in names:
+    seq = str_to_seq(name[prefix_len:-suffix_len])
+    if seq and EMOJI_VS in seq:
+      newname = '%s%s.%s' % (prefix, seq_to_str(strip_vs(seq)), ext)
+      if newname in names:
+        print >> sys.stderr, '%s non-vs name %s already exists.' % (
+            name, newname)
+        return
+      renames[name] = newname
+
+  for k, v in renames.iteritems():
+    if dry_run:
+      print '%s -> %s' % (k, v)
+    else:
+      os.rename(path.join(imagedir, k), path.join(imagedir, v))
+  print 'renamed %d files in %s' % (len(renames), imagedir)
+
+
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      '-d', '--imagedir', help='directory containing images to rename',
+      metavar='dir', required=True)
+  parser.add_argument(
+      '-e', '--ext', help='image filename extension (default png)',
+      choices=['ai', 'png', 'svg'], default='png')
+  parser.add_argument(
+      '-p', '--prefix', help='image filename prefix (default emoji_u)',
+      default='emoji_u', metavar='pfx')
+  parser.add_argument(
+      '-n', '--dry_run', help='compute renames and list only',
+      action='store_true')
+
+  args = parser.parse_args()
+  strip_vs_from_filenames(args.imagedir, args.prefix, args.ext, args.dry_run)
+
+
+if __name__ == '__main__':
+  main()