diff --git a/generate_emoji_html.py b/generate_emoji_html.py new file mode 100755 index 000000000..5cbd82f60 --- /dev/null +++ b/generate_emoji_html.py @@ -0,0 +1,318 @@ +#!/usr/bin/python +# +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Build an html page showing emoji images. + +This takes a list of directories containing emoji image files, and +builds an html page presenting the images along with their composition +(for sequences) and unicode names (for individual emoji).""" + +import argparse +import codecs +import collections +import glob +from os import path +import re +import sys +from nototools import unicode_data + +_default_dir = 'png/128' +_default_ext = 'png' +_default_prefix = 'emoji_u' +_default_title = 'Emoji List' + +# DirInfo represents information about a directory of file names. +# - directory is the directory path +# - title is the title to use for this directory +# - filemap is a dict mapping from a tuple of codepoints to the name of +# a file in the directory. +DirInfo = collections.namedtuple('DirInfo', 'directory, title, filemap') + + +def _merge_keys(dicts): + """Return the union of the keys in the list of dicts.""" + keys = [] + for d in dicts: + keys.extend(d.keys()) + return frozenset(keys) + +def _generate_row_cells(key, dir_infos): + CELL_PREFIX = '' + def _cell(key, info): + if key in info.filemap: + return '' % path.join( + info.directory, info.filemap[key]) + return '-missing-' + return [CELL_PREFIX + _cell(key, info) for info in dir_infos] + + +def _get_desc(key_tuple, dir_infos): + CELL_PREFIX = '' + def _get_filepath(cp): + cp_key = tuple([cp]) + for info in dir_infos: + if cp_key in info.filemap: + return path.join(info.directory, info.filemap[cp_key]) + return None + + def _get_part(cp): + if cp == 0x200d: # zwj, common so replace with '+' + return '+' + if cp == 0xfe0f: # emoji variation selector, we ignore it + return None + fname = _get_filepath(cp) + if fname: + return '' % fname + return '%04X' % cp + + if len(key_tuple) == 1: + desc = 'U+%04X' % key_tuple + else: + desc = ' '.join(filter(None, [_get_part(cp) for cp in key_tuple])) + return CELL_PREFIX + desc + + +def _get_name(key_tuple): + CELL_PREFIX = '' + if len(key_tuple) != 1: + name = '' + else: + cp = key_tuple[0] + if cp in unicode_data.proposed_emoji_cps(): + name = '(proposed) ' + unicode_data.proposed_emoji_name(cp) + else: + name =unicode_data.name(cp, '(error)') + return CELL_PREFIX + name + + +def _generate_content(dir_infos): + """Generate an html table for the infos.""" + lines = [''] + header_row = [''] + header_row.extend([info.title for info in dir_infos]) + header_row.extend(['Description', 'Name']) + lines.append(''.join(lines) + '\n
'.join(header_row)) + + all_keys = _merge_keys([info.filemap for info in dir_infos]) + for key in sorted(all_keys): + row = [] + row.extend(_generate_row_cells(key, dir_infos)) + row.append(_get_desc(key, dir_infos)) + row.append(_get_name(key)) + lines.append(''.join(row)) + return '\n
' + + +""" +def _generate_content(files, prefix=_default_prefix): + key_to_filename = {} + for fname in files: + filename = path.basename(fname) + if not filename.startswith(prefix): + print >> sys.stderr, 'bad prefix for filename %s' % fname + continue + key_string = path.splitext(filename)[0] + key_string = key_string[len(prefix):] + try: + key_tuple = tuple(int(k, 16) for k in key_string.split('_')) + except: + print 'bad filename: "%s"' % key_string + key_to_filename[key_tuple] = fname + + lines = [""] + for key_tuple in sorted(key_to_filename): + if len(key_tuple) == 1: + key_string = 'U+%04X' % key_tuple + else: + key_string = ' + '.join( + '' % key_to_filename[tuple([key])] + for key in key_tuple + if tuple([key]) in key_to_filename) + name = _get_name(key_tuple) + lines.append('
' + '%s' + '%s' % ( + key_to_filename[key_tuple], key_string, name)) + return '\n '.join(lines) + '\n' +""" + +def _get_image_data(image_dir, ext, prefix): + """Return a map from a tuple of cp sequences to a filename. + + This filters by file extension, and expects the rest of the files + to match the prefix followed by a sequence of hex codepoints separated + by underscore. Files that don't match, duplicate sequences (because + of casing), and out_of_range or empty codepoints raise an error.""" + + fails = [] + result = {} + expect_re = re.compile(r'%s([0-9A-Fa-f_]+).%s' % (prefix, ext)) + for f in sorted(glob.glob(path.join(image_dir, '*.%s' % ext))): + filename = path.basename(f) + m = expect_re.match(filename) + if not m: + fails.add('did not match: ' + filename) + continue + seq = m.group(1) + try: + cps = tuple(int(s, 16) for s in seq.split('_')) + except: + fails.add('bad cp sequence: ' + filename) + continue + this_failed = False + for cp in cps: + if (cp > 0x10ffff): + fails.add('cp out of range: ' + filename) + this_failed = True + break + if this_failed: + continue + if cps in result: + fails.add('duplicate sequence: %s and %s' (result[cps], filename)) + continue + result[cps] = filename + if fails: + print >> sys.stderr, 'get_image_data failed (%s, %s, %s):\n %s' % ( + image_dir, ext, prefix, '\n '.join(fails)) + raise ValueError('get image data failed') + return result + + +def _get_dir_infos( + image_dirs, exts=None, prefixes=None, titles=None, + default_ext=_default_ext, default_prefix=_default_prefix): + """Return a list of DirInfos for the image_dirs. When defined, + exts, prefixes, and titles should be the same length as image_dirs. + Titles default to using the last segments of the image_dirs, + exts and prefixes default to the corresponding default values.""" + + count = len(image_dirs) + if not titles: + titles = [None] * count + elif len(titles) != count: + raise ValueError('have %d image dirs but %d titles' % ( + count, len(titles))) + if not exts: + exts = [default_ext] * count + elif len(exts) != count: + raise ValueError('have %d image dirs but %d extensions' % ( + count, len(exts))) + if not prefixes: + prefixes = [default_prefix] * count + elif len(prefixes) != count: + raise ValueError('have %d image dirs but %d prefixes' % ( + count, len(prefixes))) + + infos = [] + for i in range(count): + image_dir = image_dirs[i] + title = titles[i] or path.basename(path.normpath(image_dir)) + ext = exts[i] or default_ext + prefix = prefixes[i] or default_prefix + filemap = _get_image_data(image_dir, ext, prefix) + infos.append(DirInfo(image_dir, title, filemap)) + return infos + + +def _instantiate_template(template, arg_dict): + id_regex = re.compile('{{([a-zA-Z0-9_]+)}}') + ids = set(m.group(1) for m in id_regex.finditer(template)) + keyset = set(arg_dict.keys()) + missing_ids = ids - keyset + extra_args = keyset - ids + if extra_args: + print >> sys.stderr, ( + 'the following %d args are unused:\n%s' % + (len(extra_args), ', '.join(sorted(extra_args)))) + text = template + if missing_ids: + raise ValueError( + 'the following %d ids in the template have no args:\n%s' % + (len(missing_ids), ', '.join(sorted(missing_ids)))) + for arg in ids: + text = re.sub('{{%s}}' % arg, arg_dict[arg], text) + return text + + +TEMPLATE = """ + + + + {{title}} + + + + {{content}} + + +""" + +STYLE = """ + tbody { background-color: rgb(210, 210, 210) } + tbody img { width: 64px; height: 64px } + tbody .desc { font-size: 20pt; font-weight: bold } + tbody .desc img { vertical-align: middle; width: 32px; height: 32px } + tbody .name { background-color: white } +""" + +def write_html_page(filename, page_title, dir_infos): + content = _generate_content(dir_infos) + text = _instantiate_template( + TEMPLATE, {'title': page_title, 'style': STYLE, 'content': content}) + with codecs.open(filename, 'w', 'utf-8') as f: + f.write(text) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + 'filename', help='path to output file', metavar='filename') + parser.add_argument( + '--page_title', help='page title', metavar='title', default='Emoji Table') + parser.add_argument( + '-d', '--image_dirs', help='image directories', metavar='dir', + nargs='+') + parser.add_argument( + '-e', '--exts', help='file extension, one per image dir', metavar='ext', + nargs='*') + parser.add_argument( + '-p', '--prefixes', help='file name prefix, one per image dir', + metavar='prefix', nargs='*') + parser.add_argument( + '-t', '--titles', help='title, one per image dir', metavar='title', + nargs='*'), + parser.add_argument( + '-de', '--default_ext', help='default extension', metavar='ext', + default=_default_ext) + parser.add_argument( + '-dp', '--default_prefix', help='default prefix', metavar='prefix', + default=_default_prefix) + + args = parser.parse_args() + file_parts = path.splitext(args.filename) + if file_parts[1] != 'html': + args.filename = file_parts[0] + '.html' + print 'added .html extension to filename:\n%s' % args.filename + + dir_infos = _get_dir_infos( + args.image_dirs, args.exts, args.prefixes, args.titles, args.default_ext, + args.default_prefix) + + write_html_page(args.filename, args.page_title, dir_infos) + + +if __name__ == "__main__": + main()