Add tool that generates an html file comparing emoji images.
This uses nototools to get unicode names. It relies on new api in nototools.unicode_data to get data/names of proposed emoji that are not currently approved and so not in the standard data files.pull/46/head
parent
6caa07aaae
commit
a9b8e7f8e0
|
@ -0,0 +1,318 @@
|
|||
#!/usr/bin/python
|
||||
#
|
||||
# Copyright 2016 Google Inc. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Build an html page showing emoji images.
|
||||
|
||||
This takes a list of directories containing emoji image files, and
|
||||
builds an html page presenting the images along with their composition
|
||||
(for sequences) and unicode names (for individual emoji)."""
|
||||
|
||||
import argparse
|
||||
import codecs
|
||||
import collections
|
||||
import glob
|
||||
from os import path
|
||||
import re
|
||||
import sys
|
||||
from nototools import unicode_data
|
||||
|
||||
_default_dir = 'png/128'
|
||||
_default_ext = 'png'
|
||||
_default_prefix = 'emoji_u'
|
||||
_default_title = 'Emoji List'
|
||||
|
||||
# DirInfo represents information about a directory of file names.
|
||||
# - directory is the directory path
|
||||
# - title is the title to use for this directory
|
||||
# - filemap is a dict mapping from a tuple of codepoints to the name of
|
||||
# a file in the directory.
|
||||
DirInfo = collections.namedtuple('DirInfo', 'directory, title, filemap')
|
||||
|
||||
|
||||
def _merge_keys(dicts):
|
||||
"""Return the union of the keys in the list of dicts."""
|
||||
keys = []
|
||||
for d in dicts:
|
||||
keys.extend(d.keys())
|
||||
return frozenset(keys)
|
||||
|
||||
def _generate_row_cells(key, dir_infos):
|
||||
CELL_PREFIX = '<td>'
|
||||
def _cell(key, info):
|
||||
if key in info.filemap:
|
||||
return '<img src="%s">' % path.join(
|
||||
info.directory, info.filemap[key])
|
||||
return '-missing-'
|
||||
return [CELL_PREFIX + _cell(key, info) for info in dir_infos]
|
||||
|
||||
|
||||
def _get_desc(key_tuple, dir_infos):
|
||||
CELL_PREFIX = '<td class="desc">'
|
||||
def _get_filepath(cp):
|
||||
cp_key = tuple([cp])
|
||||
for info in dir_infos:
|
||||
if cp_key in info.filemap:
|
||||
return path.join(info.directory, info.filemap[cp_key])
|
||||
return None
|
||||
|
||||
def _get_part(cp):
|
||||
if cp == 0x200d: # zwj, common so replace with '+'
|
||||
return '+'
|
||||
if cp == 0xfe0f: # emoji variation selector, we ignore it
|
||||
return None
|
||||
fname = _get_filepath(cp)
|
||||
if fname:
|
||||
return '<img src="%s">' % fname
|
||||
return '%04X' % cp
|
||||
|
||||
if len(key_tuple) == 1:
|
||||
desc = 'U+%04X' % key_tuple
|
||||
else:
|
||||
desc = ' '.join(filter(None, [_get_part(cp) for cp in key_tuple]))
|
||||
return CELL_PREFIX + desc
|
||||
|
||||
|
||||
def _get_name(key_tuple):
|
||||
CELL_PREFIX = '<td class="name">'
|
||||
if len(key_tuple) != 1:
|
||||
name = ''
|
||||
else:
|
||||
cp = key_tuple[0]
|
||||
if cp in unicode_data.proposed_emoji_cps():
|
||||
name = '(proposed) ' + unicode_data.proposed_emoji_name(cp)
|
||||
else:
|
||||
name =unicode_data.name(cp, '(error)')
|
||||
return CELL_PREFIX + name
|
||||
|
||||
|
||||
def _generate_content(dir_infos):
|
||||
"""Generate an html table for the infos."""
|
||||
lines = ['<table>']
|
||||
header_row = ['']
|
||||
header_row.extend([info.title for info in dir_infos])
|
||||
header_row.extend(['Description', 'Name'])
|
||||
lines.append('<th>'.join(header_row))
|
||||
|
||||
all_keys = _merge_keys([info.filemap for info in dir_infos])
|
||||
for key in sorted(all_keys):
|
||||
row = []
|
||||
row.extend(_generate_row_cells(key, dir_infos))
|
||||
row.append(_get_desc(key, dir_infos))
|
||||
row.append(_get_name(key))
|
||||
lines.append(''.join(row))
|
||||
return '\n <tr>'.join(lines) + '\n</table>'
|
||||
|
||||
|
||||
"""
|
||||
def _generate_content(files, prefix=_default_prefix):
|
||||
key_to_filename = {}
|
||||
for fname in files:
|
||||
filename = path.basename(fname)
|
||||
if not filename.startswith(prefix):
|
||||
print >> sys.stderr, 'bad prefix for filename %s' % fname
|
||||
continue
|
||||
key_string = path.splitext(filename)[0]
|
||||
key_string = key_string[len(prefix):]
|
||||
try:
|
||||
key_tuple = tuple(int(k, 16) for k in key_string.split('_'))
|
||||
except:
|
||||
print 'bad filename: "%s"' % key_string
|
||||
key_to_filename[key_tuple] = fname
|
||||
|
||||
lines = ["<table>"]
|
||||
for key_tuple in sorted(key_to_filename):
|
||||
if len(key_tuple) == 1:
|
||||
key_string = 'U+%04X' % key_tuple
|
||||
else:
|
||||
key_string = ' + '.join(
|
||||
'<img src="%s">' % key_to_filename[tuple([key])]
|
||||
for key in key_tuple
|
||||
if tuple([key]) in key_to_filename)
|
||||
name = _get_name(key_tuple)
|
||||
lines.append('<tr><td><img src="%s"><td class="desc">'
|
||||
'%s<td class="name">'
|
||||
'%s' % (
|
||||
key_to_filename[key_tuple], key_string, name))
|
||||
return '\n '.join(lines) + '\n<table>'
|
||||
"""
|
||||
|
||||
def _get_image_data(image_dir, ext, prefix):
|
||||
"""Return a map from a tuple of cp sequences to a filename.
|
||||
|
||||
This filters by file extension, and expects the rest of the files
|
||||
to match the prefix followed by a sequence of hex codepoints separated
|
||||
by underscore. Files that don't match, duplicate sequences (because
|
||||
of casing), and out_of_range or empty codepoints raise an error."""
|
||||
|
||||
fails = []
|
||||
result = {}
|
||||
expect_re = re.compile(r'%s([0-9A-Fa-f_]+).%s' % (prefix, ext))
|
||||
for f in sorted(glob.glob(path.join(image_dir, '*.%s' % ext))):
|
||||
filename = path.basename(f)
|
||||
m = expect_re.match(filename)
|
||||
if not m:
|
||||
fails.add('did not match: ' + filename)
|
||||
continue
|
||||
seq = m.group(1)
|
||||
try:
|
||||
cps = tuple(int(s, 16) for s in seq.split('_'))
|
||||
except:
|
||||
fails.add('bad cp sequence: ' + filename)
|
||||
continue
|
||||
this_failed = False
|
||||
for cp in cps:
|
||||
if (cp > 0x10ffff):
|
||||
fails.add('cp out of range: ' + filename)
|
||||
this_failed = True
|
||||
break
|
||||
if this_failed:
|
||||
continue
|
||||
if cps in result:
|
||||
fails.add('duplicate sequence: %s and %s' (result[cps], filename))
|
||||
continue
|
||||
result[cps] = filename
|
||||
if fails:
|
||||
print >> sys.stderr, 'get_image_data failed (%s, %s, %s):\n %s' % (
|
||||
image_dir, ext, prefix, '\n '.join(fails))
|
||||
raise ValueError('get image data failed')
|
||||
return result
|
||||
|
||||
|
||||
def _get_dir_infos(
|
||||
image_dirs, exts=None, prefixes=None, titles=None,
|
||||
default_ext=_default_ext, default_prefix=_default_prefix):
|
||||
"""Return a list of DirInfos for the image_dirs. When defined,
|
||||
exts, prefixes, and titles should be the same length as image_dirs.
|
||||
Titles default to using the last segments of the image_dirs,
|
||||
exts and prefixes default to the corresponding default values."""
|
||||
|
||||
count = len(image_dirs)
|
||||
if not titles:
|
||||
titles = [None] * count
|
||||
elif len(titles) != count:
|
||||
raise ValueError('have %d image dirs but %d titles' % (
|
||||
count, len(titles)))
|
||||
if not exts:
|
||||
exts = [default_ext] * count
|
||||
elif len(exts) != count:
|
||||
raise ValueError('have %d image dirs but %d extensions' % (
|
||||
count, len(exts)))
|
||||
if not prefixes:
|
||||
prefixes = [default_prefix] * count
|
||||
elif len(prefixes) != count:
|
||||
raise ValueError('have %d image dirs but %d prefixes' % (
|
||||
count, len(prefixes)))
|
||||
|
||||
infos = []
|
||||
for i in range(count):
|
||||
image_dir = image_dirs[i]
|
||||
title = titles[i] or path.basename(path.normpath(image_dir))
|
||||
ext = exts[i] or default_ext
|
||||
prefix = prefixes[i] or default_prefix
|
||||
filemap = _get_image_data(image_dir, ext, prefix)
|
||||
infos.append(DirInfo(image_dir, title, filemap))
|
||||
return infos
|
||||
|
||||
|
||||
def _instantiate_template(template, arg_dict):
|
||||
id_regex = re.compile('{{([a-zA-Z0-9_]+)}}')
|
||||
ids = set(m.group(1) for m in id_regex.finditer(template))
|
||||
keyset = set(arg_dict.keys())
|
||||
missing_ids = ids - keyset
|
||||
extra_args = keyset - ids
|
||||
if extra_args:
|
||||
print >> sys.stderr, (
|
||||
'the following %d args are unused:\n%s' %
|
||||
(len(extra_args), ', '.join(sorted(extra_args))))
|
||||
text = template
|
||||
if missing_ids:
|
||||
raise ValueError(
|
||||
'the following %d ids in the template have no args:\n%s' %
|
||||
(len(missing_ids), ', '.join(sorted(missing_ids))))
|
||||
for arg in ids:
|
||||
text = re.sub('{{%s}}' % arg, arg_dict[arg], text)
|
||||
return text
|
||||
|
||||
|
||||
TEMPLATE = """<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>{{title}}</title>
|
||||
<style>{{style}}</style>
|
||||
</head>
|
||||
<body>
|
||||
{{content}}
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
STYLE = """
|
||||
tbody { background-color: rgb(210, 210, 210) }
|
||||
tbody img { width: 64px; height: 64px }
|
||||
tbody .desc { font-size: 20pt; font-weight: bold }
|
||||
tbody .desc img { vertical-align: middle; width: 32px; height: 32px }
|
||||
tbody .name { background-color: white }
|
||||
"""
|
||||
|
||||
def write_html_page(filename, page_title, dir_infos):
|
||||
content = _generate_content(dir_infos)
|
||||
text = _instantiate_template(
|
||||
TEMPLATE, {'title': page_title, 'style': STYLE, 'content': content})
|
||||
with codecs.open(filename, 'w', 'utf-8') as f:
|
||||
f.write(text)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
'filename', help='path to output file', metavar='filename')
|
||||
parser.add_argument(
|
||||
'--page_title', help='page title', metavar='title', default='Emoji Table')
|
||||
parser.add_argument(
|
||||
'-d', '--image_dirs', help='image directories', metavar='dir',
|
||||
nargs='+')
|
||||
parser.add_argument(
|
||||
'-e', '--exts', help='file extension, one per image dir', metavar='ext',
|
||||
nargs='*')
|
||||
parser.add_argument(
|
||||
'-p', '--prefixes', help='file name prefix, one per image dir',
|
||||
metavar='prefix', nargs='*')
|
||||
parser.add_argument(
|
||||
'-t', '--titles', help='title, one per image dir', metavar='title',
|
||||
nargs='*'),
|
||||
parser.add_argument(
|
||||
'-de', '--default_ext', help='default extension', metavar='ext',
|
||||
default=_default_ext)
|
||||
parser.add_argument(
|
||||
'-dp', '--default_prefix', help='default prefix', metavar='prefix',
|
||||
default=_default_prefix)
|
||||
|
||||
args = parser.parse_args()
|
||||
file_parts = path.splitext(args.filename)
|
||||
if file_parts[1] != 'html':
|
||||
args.filename = file_parts[0] + '.html'
|
||||
print 'added .html extension to filename:\n%s' % args.filename
|
||||
|
||||
dir_infos = _get_dir_infos(
|
||||
args.image_dirs, args.exts, args.prefixes, args.titles, args.default_ext,
|
||||
args.default_prefix)
|
||||
|
||||
write_html_page(args.filename, args.page_title, dir_infos)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue