diff --git a/Makefile b/Makefile index 64974a823..cbdd6ffa1 100644 --- a/Makefile +++ b/Makefile @@ -43,8 +43,10 @@ RENAMED_FLAGS_DIR := $(BUILD_DIR)/renamed_flags QUANTIZED_DIR := $(BUILD_DIR)/quantized_pngs COMPRESSED_DIR := $(BUILD_DIR)/compressed_pngs +# Unknown flag is PUA fe82b + LIMITED_FLAGS = CN DE ES FR GB IT JP KR RU US -SELECTED_FLAGS = AD AE AF AG AI AL AM AO AR AS AT AU AW AX AZ \ +SELECTED_FLAGS = AC AD AE AF AG AI AL AM AO AQ AR AS AT AU AW AX AZ \ BA BB BD BE BF BG BH BI BJ BM BN BO BR BS BT BW BY BZ \ CA CC CD CF CG CH CI CK CL CM CN CO CR CU CV CW CX CY CZ \ DE DJ DK DM DO DZ \ @@ -52,7 +54,7 @@ SELECTED_FLAGS = AD AE AF AG AI AL AM AO AR AS AT AU AW AX AZ \ FI FJ FM FO FR \ GA GB GD GE GG GH GI GL GM GN GQ GR GT GU GW GY \ HK HN HR HT HU \ - ID IE IL IM IN IO IQ IR IS IT \ + IC ID IE IL IM IN IO IQ IR IS IT \ JE JM JO JP \ KE KG KH KI KM KN KP KR KW KY KZ \ LA LB LC LI LK LR LS LT LU LV LY \ @@ -62,8 +64,8 @@ SELECTED_FLAGS = AD AE AF AG AI AL AM AO AR AS AT AU AW AX AZ \ PA PE PF PG PH PK PL PN PR PS PT PW PY \ QA \ RO RS RU RW \ - SA SB SC SD SE SG SI SK SL SM SN SO SR SS ST SV SX SY SZ \ - TC TD TG TH TJ TK TL TM TN TO TR TT TV TW TZ \ + SA SB SC SD SE SG SH SI SK SL SM SN SO SR SS ST SV SX SY SZ \ + TA TC TD TG TH TJ TK TL TM TN TO TR TT TV TW TZ \ UA UG US UY UZ \ VA VC VE VG VI VN VU \ WS \ diff --git a/generate_emoji_html.py b/generate_emoji_html.py new file mode 100755 index 000000000..e33bcc1b5 --- /dev/null +++ b/generate_emoji_html.py @@ -0,0 +1,287 @@ +#!/usr/bin/python +# +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Build an html page showing emoji images. + +This takes a list of directories containing emoji image files, and +builds an html page presenting the images along with their composition +(for sequences) and unicode names (for individual emoji).""" + +import argparse +import codecs +import collections +import glob +from os import path +import re +import sys +from nototools import unicode_data + +_default_dir = 'png/128' +_default_ext = 'png' +_default_prefix = 'emoji_u' +_default_title = 'Emoji List' + +# DirInfo represents information about a directory of file names. +# - directory is the directory path +# - title is the title to use for this directory +# - filemap is a dict mapping from a tuple of codepoints to the name of +# a file in the directory. +DirInfo = collections.namedtuple('DirInfo', 'directory, title, filemap') + + +def _merge_keys(dicts): + """Return the union of the keys in the list of dicts.""" + keys = [] + for d in dicts: + keys.extend(d.keys()) + return frozenset(keys) + +def _generate_row_cells(key, dir_infos): + CELL_PREFIX = '' + def _cell(key, info): + if key in info.filemap: + return '' % path.join( + info.directory, info.filemap[key]) + return '-missing-' + return [CELL_PREFIX + _cell(key, info) for info in dir_infos] + + +def _get_desc(key_tuple, dir_infos): + CELL_PREFIX = '' + def _get_filepath(cp): + cp_key = tuple([cp]) + for info in dir_infos: + if cp_key in info.filemap: + return path.join(info.directory, info.filemap[cp_key]) + return None + + def _get_part(cp): + if cp == 0x200d: # zwj, common so replace with '+' + return '+' + if cp == 0xfe0f: # emoji variation selector, we ignore it + return None + fname = _get_filepath(cp) + if fname: + return '' % fname + return '%04X' % cp + + if len(key_tuple) == 1: + desc = 'U+%04X' % key_tuple + else: + desc = ' '.join(filter(None, [_get_part(cp) for cp in key_tuple])) + return CELL_PREFIX + desc + + +def _get_name(key_tuple): + CELL_PREFIX = '' + if len(key_tuple) != 1: + name = '' + else: + cp = key_tuple[0] + if cp in unicode_data.proposed_emoji_cps(): + name = '(proposed) ' + unicode_data.proposed_emoji_name(cp) + else: + name = unicode_data.name(cp, '(error)') + return CELL_PREFIX + name + + +def _generate_content(dir_infos): + """Generate an html table for the infos.""" + lines = [''] + header_row = [''] + header_row.extend([info.title for info in dir_infos]) + header_row.extend(['Description', 'Name']) + lines.append(''.join(lines) + '\n
'.join(header_row)) + + all_keys = _merge_keys([info.filemap for info in dir_infos]) + for key in sorted(all_keys): + row = [] + row.extend(_generate_row_cells(key, dir_infos)) + row.append(_get_desc(key, dir_infos)) + row.append(_get_name(key)) + lines.append(''.join(row)) + return '\n
' + + +def _get_image_data(image_dir, ext, prefix): + """Return a map from a tuple of cp sequences to a filename. + + This filters by file extension, and expects the rest of the files + to match the prefix followed by a sequence of hex codepoints separated + by underscore. Files that don't match, duplicate sequences (because + of casing), and out_of_range or empty codepoints raise an error.""" + + fails = [] + result = {} + expect_re = re.compile(r'%s([0-9A-Fa-f_]+).%s' % (prefix, ext)) + for f in sorted(glob.glob(path.join(image_dir, '*.%s' % ext))): + filename = path.basename(f) + m = expect_re.match(filename) + if not m: + if filename.startswith('unknown_flag.'): + continue + fails.append('"%s" did not match: "%s"' % (expect_re.pattern, filename)) + continue + seq = m.group(1) + try: + cps = tuple(int(s, 16) for s in seq.split('_')) + except: + fails.append('bad cp sequence: ' + filename) + continue + this_failed = False + for cp in cps: + if (cp > 0x10ffff): + fails.append('cp out of range: ' + filename) + this_failed = True + break + if this_failed: + continue + if cps in result: + fails.append('duplicate sequence: %s and %s' (result[cps], filename)) + continue + result[cps] = filename + if fails: + print >> sys.stderr, 'get_image_data failed (%s, %s, %s):\n %s' % ( + image_dir, ext, prefix, '\n '.join(fails)) + raise ValueError('get image data failed') + return result + + +def _get_dir_infos( + image_dirs, exts=None, prefixes=None, titles=None, + default_ext=_default_ext, default_prefix=_default_prefix): + """Return a list of DirInfos for the image_dirs. When defined, + exts, prefixes, and titles should be the same length as image_dirs. + Titles default to using the last segments of the image_dirs, + exts and prefixes default to the corresponding default values.""" + + count = len(image_dirs) + if not titles: + titles = [None] * count + elif len(titles) != count: + raise ValueError('have %d image dirs but %d titles' % ( + count, len(titles))) + if not exts: + exts = [default_ext] * count + elif len(exts) != count: + raise ValueError('have %d image dirs but %d extensions' % ( + count, len(exts))) + if not prefixes: + prefixes = [default_prefix] * count + elif len(prefixes) != count: + raise ValueError('have %d image dirs but %d prefixes' % ( + count, len(prefixes))) + + infos = [] + for i in range(count): + image_dir = image_dirs[i] + title = titles[i] or path.basename(path.normpath(image_dir)) + ext = exts[i] or default_ext + prefix = prefixes[i] or default_prefix + filemap = _get_image_data(image_dir, ext, prefix) + infos.append(DirInfo(image_dir, title, filemap)) + return infos + + +def _instantiate_template(template, arg_dict): + id_regex = re.compile('{{([a-zA-Z0-9_]+)}}') + ids = set(m.group(1) for m in id_regex.finditer(template)) + keyset = set(arg_dict.keys()) + missing_ids = ids - keyset + extra_args = keyset - ids + if extra_args: + print >> sys.stderr, ( + 'the following %d args are unused:\n%s' % + (len(extra_args), ', '.join(sorted(extra_args)))) + text = template + if missing_ids: + raise ValueError( + 'the following %d ids in the template have no args:\n%s' % + (len(missing_ids), ', '.join(sorted(missing_ids)))) + for arg in ids: + text = re.sub('{{%s}}' % arg, arg_dict[arg], text) + return text + + +TEMPLATE = """ + + + + {{title}} + + + + {{content}} + + +""" + +STYLE = """ + tbody { background-color: rgb(210, 210, 210) } + tbody img { width: 64px; height: 64px } + tbody .desc { font-size: 20pt; font-weight: bold } + tbody .desc img { vertical-align: middle; width: 32px; height: 32px } + tbody .name { background-color: white } +""" + +def write_html_page(filename, page_title, dir_infos): + content = _generate_content(dir_infos) + text = _instantiate_template( + TEMPLATE, {'title': page_title, 'style': STYLE, 'content': content}) + with codecs.open(filename, 'w', 'utf-8') as f: + f.write(text) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + 'filename', help='path to output file', metavar='filename') + parser.add_argument( + '--page_title', help='page title', metavar='title', default='Emoji Table') + parser.add_argument( + '-d', '--image_dirs', help='image directories', metavar='dir', + nargs='+') + parser.add_argument( + '-e', '--exts', help='file extension, one per image dir', metavar='ext', + nargs='*') + parser.add_argument( + '-p', '--prefixes', help='file name prefix, one per image dir', + metavar='prefix', nargs='*') + parser.add_argument( + '-t', '--titles', help='title, one per image dir', metavar='title', + nargs='*'), + parser.add_argument( + '-de', '--default_ext', help='default extension', metavar='ext', + default=_default_ext) + parser.add_argument( + '-dp', '--default_prefix', help='default prefix', metavar='prefix', + default=_default_prefix) + + args = parser.parse_args() + file_parts = path.splitext(args.filename) + if file_parts[1] != 'html': + args.filename = file_parts[0] + '.html' + print 'added .html extension to filename:\n%s' % args.filename + + dir_infos = _get_dir_infos( + args.image_dirs, args.exts, args.prefixes, args.titles, args.default_ext, + args.default_prefix) + + write_html_page(args.filename, args.page_title, dir_infos) + + +if __name__ == "__main__": + main() diff --git a/png/128/emoji_ufe82b.png b/png/128/emoji_ufe82b.png new file mode 100644 index 000000000..90ffa25e6 Binary files /dev/null and b/png/128/emoji_ufe82b.png differ diff --git a/third_party/color_emoji/add_glyphs.py b/third_party/color_emoji/add_glyphs.py index 0d78a7d2b..77d5c7e89 100644 --- a/third_party/color_emoji/add_glyphs.py +++ b/third_party/color_emoji/add_glyphs.py @@ -1,10 +1,14 @@ #!/usr/bin/env python -import collections, glob, os, sys +import collections, glob, os, re, sys from fontTools import ttx from fontTools.ttLib.tables import otTables from png import PNG +# PUA character for unknown flag. This avoids the legacy emoji pua values, but +# is in the same area. +UNKNOWN_FLAG_GLYPH_NAME = "uFE82B" + sys.path.append( os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)) import add_emoji_gsub @@ -78,6 +82,15 @@ EXTRA_SEQUENCES = { 'u1F48F': '1F469_200D_2764_FE0F_200D_1F48B_200D_1F468', # WHKM } +# Flag aliases - from: to +FLAG_ALIASES = { + 'BV': 'NO', + 'SJ': 'NO', + 'UM': 'FR', + 'HM': 'AU', + 'UM': 'US', +} + if len (sys.argv) < 4: print >>sys.stderr, """ Usage: @@ -154,9 +167,9 @@ def add_lig_sequence(ligatures, seq, n): for (u, filename) in img_pairs: - # print "Adding glyph for U+%s" % ",".join (["%04X" % ord (char) for char in u]) n = glyph_name (u) glyph_names.add(n) + # print "Adding glyph for %s" % n g.append (n) for char in u: @@ -180,6 +193,53 @@ for n in EXTRA_SEQUENCES: else: print 'extras: no glyph for %s' % n +# Add missing regional indicator sequences and flag aliases +# if we support any. +regional_names = frozenset('u%X' % cp for cp in range(0x1F1E6, 0x1F200)) + +def _is_flag_sequence(t): + return len(t) == 2 and t[0] in regional_names and t[1] in regional_names + +have_flags = False +for k in ligatures: + if _is_flag_sequence(k): + have_flags = True + break + +if have_flags and UNKNOWN_FLAG_GLYPH_NAME not in glyph_names: + raise ValueError( + 'Have flags but no unknown flag glyph "%s"' % UNKNOWN_FLAG_GLYPH_NAME) + +# sigh, too many separate files with the same code. +# copied from add_emoji_gsub. +def _reg_indicator(letter): + assert 'A' <= letter <= 'Z' + return 0x1F1E6 + ord(letter) - ord('A') + +def _reg_lig_sequence(flag_name): + """Returns a tuple of strings naming the codepoints that form the ligature.""" + assert len(flag_name) == 2 + return tuple('u%X' % _reg_indicator(cp) for cp in flag_name) + +def _reg_lig_name(flag_name): + """Returns a glyph name for the flag name.""" + return '_'.join(_reg_lig_sequence(flag_name)) + +if have_flags: + print 'Adding flag aliases.' + for flag_from, flag_to in FLAG_ALIASES.iteritems(): + seq = _reg_lig_sequence(flag_from) + name = _reg_lig_name(flag_to) + add_lig_sequence(ligatures, seq, name) + + print 'Adding unused flag sequences' + # every flag sequence we don't have gets the missing flag glyph + for first in regional_names: + for second in regional_names: + seq = (first, second) + if seq not in ligatures: + add_lig_sequence(ligatures, seq, UNKNOWN_FLAG_GLYPH_NAME) + keyed_ligatures = collections.defaultdict(list) for k, v in ligatures.iteritems(): diff --git a/third_party/region-flags/IC.png b/third_party/region-flags/IC.png new file mode 100644 index 000000000..cb5d9f2fe Binary files /dev/null and b/third_party/region-flags/IC.png differ diff --git a/third_party/region-flags/svg/IC.svg b/third_party/region-flags/svg/IC.svg index 96d6b1307..87f9e7480 100644 --- a/third_party/region-flags/svg/IC.svg +++ b/third_party/region-flags/svg/IC.svg @@ -1,739 +1,6 @@ - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + +