diff --git a/generate_emoji_html.py b/generate_emoji_html.py
new file mode 100755
index 000000000..5cbd82f60
--- /dev/null
+++ b/generate_emoji_html.py
@@ -0,0 +1,318 @@
+#!/usr/bin/python
+#
+# Copyright 2016 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Build an html page showing emoji images.
+
+This takes a list of directories containing emoji image files, and
+builds an html page presenting the images along with their composition
+(for sequences) and unicode names (for individual emoji)."""
+
+import argparse
+import codecs
+import collections
+import glob
+from os import path
+import re
+import sys
+from nototools import unicode_data
+
+_default_dir = 'png/128'
+_default_ext = 'png'
+_default_prefix = 'emoji_u'
+_default_title = 'Emoji List'
+
+# DirInfo represents information about a directory of file names.
+# - directory is the directory path
+# - title is the title to use for this directory
+# - filemap is a dict mapping from a tuple of codepoints to the name of
+# a file in the directory.
+DirInfo = collections.namedtuple('DirInfo', 'directory, title, filemap')
+
+
+def _merge_keys(dicts):
+ """Return the union of the keys in the list of dicts."""
+ keys = []
+ for d in dicts:
+ keys.extend(d.keys())
+ return frozenset(keys)
+
+def _generate_row_cells(key, dir_infos):
+ CELL_PREFIX = '
'
+ def _cell(key, info):
+ if key in info.filemap:
+ return '' % path.join(
+ info.directory, info.filemap[key])
+ return '-missing-'
+ return [CELL_PREFIX + _cell(key, info) for info in dir_infos]
+
+
+def _get_desc(key_tuple, dir_infos):
+ CELL_PREFIX = ' | '
+ def _get_filepath(cp):
+ cp_key = tuple([cp])
+ for info in dir_infos:
+ if cp_key in info.filemap:
+ return path.join(info.directory, info.filemap[cp_key])
+ return None
+
+ def _get_part(cp):
+ if cp == 0x200d: # zwj, common so replace with '+'
+ return '+'
+ if cp == 0xfe0f: # emoji variation selector, we ignore it
+ return None
+ fname = _get_filepath(cp)
+ if fname:
+ return '' % fname
+ return '%04X' % cp
+
+ if len(key_tuple) == 1:
+ desc = 'U+%04X' % key_tuple
+ else:
+ desc = ' '.join(filter(None, [_get_part(cp) for cp in key_tuple]))
+ return CELL_PREFIX + desc
+
+
+def _get_name(key_tuple):
+ CELL_PREFIX = ' | '
+ if len(key_tuple) != 1:
+ name = ''
+ else:
+ cp = key_tuple[0]
+ if cp in unicode_data.proposed_emoji_cps():
+ name = '(proposed) ' + unicode_data.proposed_emoji_name(cp)
+ else:
+ name =unicode_data.name(cp, '(error)')
+ return CELL_PREFIX + name
+
+
+def _generate_content(dir_infos):
+ """Generate an html table for the infos."""
+ lines = ['']
+ header_row = ['']
+ header_row.extend([info.title for info in dir_infos])
+ header_row.extend(['Description', 'Name'])
+ lines.append(''.join(header_row))
+
+ all_keys = _merge_keys([info.filemap for info in dir_infos])
+ for key in sorted(all_keys):
+ row = []
+ row.extend(_generate_row_cells(key, dir_infos))
+ row.append(_get_desc(key, dir_infos))
+ row.append(_get_name(key))
+ lines.append(''.join(row))
+ return '\n | '.join(lines) + '\n '
+
+
+"""
+def _generate_content(files, prefix=_default_prefix):
+ key_to_filename = {}
+ for fname in files:
+ filename = path.basename(fname)
+ if not filename.startswith(prefix):
+ print >> sys.stderr, 'bad prefix for filename %s' % fname
+ continue
+ key_string = path.splitext(filename)[0]
+ key_string = key_string[len(prefix):]
+ try:
+ key_tuple = tuple(int(k, 16) for k in key_string.split('_'))
+ except:
+ print 'bad filename: "%s"' % key_string
+ key_to_filename[key_tuple] = fname
+
+ lines = [""]
+ for key_tuple in sorted(key_to_filename):
+ if len(key_tuple) == 1:
+ key_string = 'U+%04X' % key_tuple
+ else:
+ key_string = ' + '.join(
+ '' % key_to_filename[tuple([key])]
+ for key in key_tuple
+ if tuple([key]) in key_to_filename)
+ name = _get_name(key_tuple)
+ lines.append(' | '
+ '%s | '
+ '%s' % (
+ key_to_filename[key_tuple], key_string, name))
+ return '\n '.join(lines) + '\n'
+"""
+
+def _get_image_data(image_dir, ext, prefix):
+ """Return a map from a tuple of cp sequences to a filename.
+
+ This filters by file extension, and expects the rest of the files
+ to match the prefix followed by a sequence of hex codepoints separated
+ by underscore. Files that don't match, duplicate sequences (because
+ of casing), and out_of_range or empty codepoints raise an error."""
+
+ fails = []
+ result = {}
+ expect_re = re.compile(r'%s([0-9A-Fa-f_]+).%s' % (prefix, ext))
+ for f in sorted(glob.glob(path.join(image_dir, '*.%s' % ext))):
+ filename = path.basename(f)
+ m = expect_re.match(filename)
+ if not m:
+ fails.add('did not match: ' + filename)
+ continue
+ seq = m.group(1)
+ try:
+ cps = tuple(int(s, 16) for s in seq.split('_'))
+ except:
+ fails.add('bad cp sequence: ' + filename)
+ continue
+ this_failed = False
+ for cp in cps:
+ if (cp > 0x10ffff):
+ fails.add('cp out of range: ' + filename)
+ this_failed = True
+ break
+ if this_failed:
+ continue
+ if cps in result:
+ fails.add('duplicate sequence: %s and %s' (result[cps], filename))
+ continue
+ result[cps] = filename
+ if fails:
+ print >> sys.stderr, 'get_image_data failed (%s, %s, %s):\n %s' % (
+ image_dir, ext, prefix, '\n '.join(fails))
+ raise ValueError('get image data failed')
+ return result
+
+
+def _get_dir_infos(
+ image_dirs, exts=None, prefixes=None, titles=None,
+ default_ext=_default_ext, default_prefix=_default_prefix):
+ """Return a list of DirInfos for the image_dirs. When defined,
+ exts, prefixes, and titles should be the same length as image_dirs.
+ Titles default to using the last segments of the image_dirs,
+ exts and prefixes default to the corresponding default values."""
+
+ count = len(image_dirs)
+ if not titles:
+ titles = [None] * count
+ elif len(titles) != count:
+ raise ValueError('have %d image dirs but %d titles' % (
+ count, len(titles)))
+ if not exts:
+ exts = [default_ext] * count
+ elif len(exts) != count:
+ raise ValueError('have %d image dirs but %d extensions' % (
+ count, len(exts)))
+ if not prefixes:
+ prefixes = [default_prefix] * count
+ elif len(prefixes) != count:
+ raise ValueError('have %d image dirs but %d prefixes' % (
+ count, len(prefixes)))
+
+ infos = []
+ for i in range(count):
+ image_dir = image_dirs[i]
+ title = titles[i] or path.basename(path.normpath(image_dir))
+ ext = exts[i] or default_ext
+ prefix = prefixes[i] or default_prefix
+ filemap = _get_image_data(image_dir, ext, prefix)
+ infos.append(DirInfo(image_dir, title, filemap))
+ return infos
+
+
+def _instantiate_template(template, arg_dict):
+ id_regex = re.compile('{{([a-zA-Z0-9_]+)}}')
+ ids = set(m.group(1) for m in id_regex.finditer(template))
+ keyset = set(arg_dict.keys())
+ missing_ids = ids - keyset
+ extra_args = keyset - ids
+ if extra_args:
+ print >> sys.stderr, (
+ 'the following %d args are unused:\n%s' %
+ (len(extra_args), ', '.join(sorted(extra_args))))
+ text = template
+ if missing_ids:
+ raise ValueError(
+ 'the following %d ids in the template have no args:\n%s' %
+ (len(missing_ids), ', '.join(sorted(missing_ids))))
+ for arg in ids:
+ text = re.sub('{{%s}}' % arg, arg_dict[arg], text)
+ return text
+
+
+TEMPLATE = """
+
+
+
+ {{title}}
+
+
+
+ {{content}}
+
+
+"""
+
+STYLE = """
+ tbody { background-color: rgb(210, 210, 210) }
+ tbody img { width: 64px; height: 64px }
+ tbody .desc { font-size: 20pt; font-weight: bold }
+ tbody .desc img { vertical-align: middle; width: 32px; height: 32px }
+ tbody .name { background-color: white }
+"""
+
+def write_html_page(filename, page_title, dir_infos):
+ content = _generate_content(dir_infos)
+ text = _instantiate_template(
+ TEMPLATE, {'title': page_title, 'style': STYLE, 'content': content})
+ with codecs.open(filename, 'w', 'utf-8') as f:
+ f.write(text)
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ 'filename', help='path to output file', metavar='filename')
+ parser.add_argument(
+ '--page_title', help='page title', metavar='title', default='Emoji Table')
+ parser.add_argument(
+ '-d', '--image_dirs', help='image directories', metavar='dir',
+ nargs='+')
+ parser.add_argument(
+ '-e', '--exts', help='file extension, one per image dir', metavar='ext',
+ nargs='*')
+ parser.add_argument(
+ '-p', '--prefixes', help='file name prefix, one per image dir',
+ metavar='prefix', nargs='*')
+ parser.add_argument(
+ '-t', '--titles', help='title, one per image dir', metavar='title',
+ nargs='*'),
+ parser.add_argument(
+ '-de', '--default_ext', help='default extension', metavar='ext',
+ default=_default_ext)
+ parser.add_argument(
+ '-dp', '--default_prefix', help='default prefix', metavar='prefix',
+ default=_default_prefix)
+
+ args = parser.parse_args()
+ file_parts = path.splitext(args.filename)
+ if file_parts[1] != 'html':
+ args.filename = file_parts[0] + '.html'
+ print 'added .html extension to filename:\n%s' % args.filename
+
+ dir_infos = _get_dir_infos(
+ args.image_dirs, args.exts, args.prefixes, args.titles, args.default_ext,
+ args.default_prefix)
+
+ write_html_page(args.filename, args.page_title, dir_infos)
+
+
+if __name__ == "__main__":
+ main()
| |