Merge pull request #46 from dougfelt/emoji_html_fix

Emoji html fix
pull/52/head
dougfelt 2016-03-16 16:49:00 -07:00
commit c6379827aa
6 changed files with 360 additions and 744 deletions

View File

@ -43,8 +43,10 @@ RENAMED_FLAGS_DIR := $(BUILD_DIR)/renamed_flags
QUANTIZED_DIR := $(BUILD_DIR)/quantized_pngs
COMPRESSED_DIR := $(BUILD_DIR)/compressed_pngs
# Unknown flag is PUA fe82b
LIMITED_FLAGS = CN DE ES FR GB IT JP KR RU US
SELECTED_FLAGS = AD AE AF AG AI AL AM AO AR AS AT AU AW AX AZ \
SELECTED_FLAGS = AC AD AE AF AG AI AL AM AO AQ AR AS AT AU AW AX AZ \
BA BB BD BE BF BG BH BI BJ BM BN BO BR BS BT BW BY BZ \
CA CC CD CF CG CH CI CK CL CM CN CO CR CU CV CW CX CY CZ \
DE DJ DK DM DO DZ \
@ -52,7 +54,7 @@ SELECTED_FLAGS = AD AE AF AG AI AL AM AO AR AS AT AU AW AX AZ \
FI FJ FM FO FR \
GA GB GD GE GG GH GI GL GM GN GQ GR GT GU GW GY \
HK HN HR HT HU \
ID IE IL IM IN IO IQ IR IS IT \
IC ID IE IL IM IN IO IQ IR IS IT \
JE JM JO JP \
KE KG KH KI KM KN KP KR KW KY KZ \
LA LB LC LI LK LR LS LT LU LV LY \
@ -62,8 +64,8 @@ SELECTED_FLAGS = AD AE AF AG AI AL AM AO AR AS AT AU AW AX AZ \
PA PE PF PG PH PK PL PN PR PS PT PW PY \
QA \
RO RS RU RW \
SA SB SC SD SE SG SI SK SL SM SN SO SR SS ST SV SX SY SZ \
TC TD TG TH TJ TK TL TM TN TO TR TT TV TW TZ \
SA SB SC SD SE SG SH SI SK SL SM SN SO SR SS ST SV SX SY SZ \
TA TC TD TG TH TJ TK TL TM TN TO TR TT TV TW TZ \
UA UG US UY UZ \
VA VC VE VG VI VN VU \
WS \

View File

@ -0,0 +1,287 @@
#!/usr/bin/python
#
# Copyright 2016 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Build an html page showing emoji images.
This takes a list of directories containing emoji image files, and
builds an html page presenting the images along with their composition
(for sequences) and unicode names (for individual emoji)."""
import argparse
import codecs
import collections
import glob
from os import path
import re
import sys
from nototools import unicode_data
_default_dir = 'png/128'
_default_ext = 'png'
_default_prefix = 'emoji_u'
_default_title = 'Emoji List'
# DirInfo represents information about a directory of file names.
# - directory is the directory path
# - title is the title to use for this directory
# - filemap is a dict mapping from a tuple of codepoints to the name of
# a file in the directory.
DirInfo = collections.namedtuple('DirInfo', 'directory, title, filemap')
def _merge_keys(dicts):
"""Return the union of the keys in the list of dicts."""
keys = []
for d in dicts:
keys.extend(d.keys())
return frozenset(keys)
def _generate_row_cells(key, dir_infos):
CELL_PREFIX = '<td>'
def _cell(key, info):
if key in info.filemap:
return '<img src="%s">' % path.join(
info.directory, info.filemap[key])
return '-missing-'
return [CELL_PREFIX + _cell(key, info) for info in dir_infos]
def _get_desc(key_tuple, dir_infos):
CELL_PREFIX = '<td class="desc">'
def _get_filepath(cp):
cp_key = tuple([cp])
for info in dir_infos:
if cp_key in info.filemap:
return path.join(info.directory, info.filemap[cp_key])
return None
def _get_part(cp):
if cp == 0x200d: # zwj, common so replace with '+'
return '+'
if cp == 0xfe0f: # emoji variation selector, we ignore it
return None
fname = _get_filepath(cp)
if fname:
return '<img src="%s">' % fname
return '%04X' % cp
if len(key_tuple) == 1:
desc = 'U+%04X' % key_tuple
else:
desc = ' '.join(filter(None, [_get_part(cp) for cp in key_tuple]))
return CELL_PREFIX + desc
def _get_name(key_tuple):
CELL_PREFIX = '<td class="name">'
if len(key_tuple) != 1:
name = ''
else:
cp = key_tuple[0]
if cp in unicode_data.proposed_emoji_cps():
name = '(proposed) ' + unicode_data.proposed_emoji_name(cp)
else:
name = unicode_data.name(cp, '(error)')
return CELL_PREFIX + name
def _generate_content(dir_infos):
"""Generate an html table for the infos."""
lines = ['<table>']
header_row = ['']
header_row.extend([info.title for info in dir_infos])
header_row.extend(['Description', 'Name'])
lines.append('<th>'.join(header_row))
all_keys = _merge_keys([info.filemap for info in dir_infos])
for key in sorted(all_keys):
row = []
row.extend(_generate_row_cells(key, dir_infos))
row.append(_get_desc(key, dir_infos))
row.append(_get_name(key))
lines.append(''.join(row))
return '\n <tr>'.join(lines) + '\n</table>'
def _get_image_data(image_dir, ext, prefix):
"""Return a map from a tuple of cp sequences to a filename.
This filters by file extension, and expects the rest of the files
to match the prefix followed by a sequence of hex codepoints separated
by underscore. Files that don't match, duplicate sequences (because
of casing), and out_of_range or empty codepoints raise an error."""
fails = []
result = {}
expect_re = re.compile(r'%s([0-9A-Fa-f_]+).%s' % (prefix, ext))
for f in sorted(glob.glob(path.join(image_dir, '*.%s' % ext))):
filename = path.basename(f)
m = expect_re.match(filename)
if not m:
if filename.startswith('unknown_flag.'):
continue
fails.append('"%s" did not match: "%s"' % (expect_re.pattern, filename))
continue
seq = m.group(1)
try:
cps = tuple(int(s, 16) for s in seq.split('_'))
except:
fails.append('bad cp sequence: ' + filename)
continue
this_failed = False
for cp in cps:
if (cp > 0x10ffff):
fails.append('cp out of range: ' + filename)
this_failed = True
break
if this_failed:
continue
if cps in result:
fails.append('duplicate sequence: %s and %s' (result[cps], filename))
continue
result[cps] = filename
if fails:
print >> sys.stderr, 'get_image_data failed (%s, %s, %s):\n %s' % (
image_dir, ext, prefix, '\n '.join(fails))
raise ValueError('get image data failed')
return result
def _get_dir_infos(
image_dirs, exts=None, prefixes=None, titles=None,
default_ext=_default_ext, default_prefix=_default_prefix):
"""Return a list of DirInfos for the image_dirs. When defined,
exts, prefixes, and titles should be the same length as image_dirs.
Titles default to using the last segments of the image_dirs,
exts and prefixes default to the corresponding default values."""
count = len(image_dirs)
if not titles:
titles = [None] * count
elif len(titles) != count:
raise ValueError('have %d image dirs but %d titles' % (
count, len(titles)))
if not exts:
exts = [default_ext] * count
elif len(exts) != count:
raise ValueError('have %d image dirs but %d extensions' % (
count, len(exts)))
if not prefixes:
prefixes = [default_prefix] * count
elif len(prefixes) != count:
raise ValueError('have %d image dirs but %d prefixes' % (
count, len(prefixes)))
infos = []
for i in range(count):
image_dir = image_dirs[i]
title = titles[i] or path.basename(path.normpath(image_dir))
ext = exts[i] or default_ext
prefix = prefixes[i] or default_prefix
filemap = _get_image_data(image_dir, ext, prefix)
infos.append(DirInfo(image_dir, title, filemap))
return infos
def _instantiate_template(template, arg_dict):
id_regex = re.compile('{{([a-zA-Z0-9_]+)}}')
ids = set(m.group(1) for m in id_regex.finditer(template))
keyset = set(arg_dict.keys())
missing_ids = ids - keyset
extra_args = keyset - ids
if extra_args:
print >> sys.stderr, (
'the following %d args are unused:\n%s' %
(len(extra_args), ', '.join(sorted(extra_args))))
text = template
if missing_ids:
raise ValueError(
'the following %d ids in the template have no args:\n%s' %
(len(missing_ids), ', '.join(sorted(missing_ids))))
for arg in ids:
text = re.sub('{{%s}}' % arg, arg_dict[arg], text)
return text
TEMPLATE = """<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>{{title}}</title>
<style>{{style}}</style>
</head>
<body>
{{content}}
</body>
</html>
"""
STYLE = """
tbody { background-color: rgb(210, 210, 210) }
tbody img { width: 64px; height: 64px }
tbody .desc { font-size: 20pt; font-weight: bold }
tbody .desc img { vertical-align: middle; width: 32px; height: 32px }
tbody .name { background-color: white }
"""
def write_html_page(filename, page_title, dir_infos):
content = _generate_content(dir_infos)
text = _instantiate_template(
TEMPLATE, {'title': page_title, 'style': STYLE, 'content': content})
with codecs.open(filename, 'w', 'utf-8') as f:
f.write(text)
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
'filename', help='path to output file', metavar='filename')
parser.add_argument(
'--page_title', help='page title', metavar='title', default='Emoji Table')
parser.add_argument(
'-d', '--image_dirs', help='image directories', metavar='dir',
nargs='+')
parser.add_argument(
'-e', '--exts', help='file extension, one per image dir', metavar='ext',
nargs='*')
parser.add_argument(
'-p', '--prefixes', help='file name prefix, one per image dir',
metavar='prefix', nargs='*')
parser.add_argument(
'-t', '--titles', help='title, one per image dir', metavar='title',
nargs='*'),
parser.add_argument(
'-de', '--default_ext', help='default extension', metavar='ext',
default=_default_ext)
parser.add_argument(
'-dp', '--default_prefix', help='default prefix', metavar='prefix',
default=_default_prefix)
args = parser.parse_args()
file_parts = path.splitext(args.filename)
if file_parts[1] != 'html':
args.filename = file_parts[0] + '.html'
print 'added .html extension to filename:\n%s' % args.filename
dir_infos = _get_dir_infos(
args.image_dirs, args.exts, args.prefixes, args.titles, args.default_ext,
args.default_prefix)
write_html_page(args.filename, args.page_title, dir_infos)
if __name__ == "__main__":
main()

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

View File

@ -1,10 +1,14 @@
#!/usr/bin/env python
import collections, glob, os, sys
import collections, glob, os, re, sys
from fontTools import ttx
from fontTools.ttLib.tables import otTables
from png import PNG
# PUA character for unknown flag. This avoids the legacy emoji pua values, but
# is in the same area.
UNKNOWN_FLAG_GLYPH_NAME = "uFE82B"
sys.path.append(
os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
import add_emoji_gsub
@ -78,6 +82,15 @@ EXTRA_SEQUENCES = {
'u1F48F': '1F469_200D_2764_FE0F_200D_1F48B_200D_1F468', # WHKM
}
# Flag aliases - from: to
FLAG_ALIASES = {
'BV': 'NO',
'SJ': 'NO',
'UM': 'FR',
'HM': 'AU',
'UM': 'US',
}
if len (sys.argv) < 4:
print >>sys.stderr, """
Usage:
@ -154,9 +167,9 @@ def add_lig_sequence(ligatures, seq, n):
for (u, filename) in img_pairs:
# print "Adding glyph for U+%s" % ",".join (["%04X" % ord (char) for char in u])
n = glyph_name (u)
glyph_names.add(n)
# print "Adding glyph for %s" % n
g.append (n)
for char in u:
@ -180,6 +193,53 @@ for n in EXTRA_SEQUENCES:
else:
print 'extras: no glyph for %s' % n
# Add missing regional indicator sequences and flag aliases
# if we support any.
regional_names = frozenset('u%X' % cp for cp in range(0x1F1E6, 0x1F200))
def _is_flag_sequence(t):
return len(t) == 2 and t[0] in regional_names and t[1] in regional_names
have_flags = False
for k in ligatures:
if _is_flag_sequence(k):
have_flags = True
break
if have_flags and UNKNOWN_FLAG_GLYPH_NAME not in glyph_names:
raise ValueError(
'Have flags but no unknown flag glyph "%s"' % UNKNOWN_FLAG_GLYPH_NAME)
# sigh, too many separate files with the same code.
# copied from add_emoji_gsub.
def _reg_indicator(letter):
assert 'A' <= letter <= 'Z'
return 0x1F1E6 + ord(letter) - ord('A')
def _reg_lig_sequence(flag_name):
"""Returns a tuple of strings naming the codepoints that form the ligature."""
assert len(flag_name) == 2
return tuple('u%X' % _reg_indicator(cp) for cp in flag_name)
def _reg_lig_name(flag_name):
"""Returns a glyph name for the flag name."""
return '_'.join(_reg_lig_sequence(flag_name))
if have_flags:
print 'Adding flag aliases.'
for flag_from, flag_to in FLAG_ALIASES.iteritems():
seq = _reg_lig_sequence(flag_from)
name = _reg_lig_name(flag_to)
add_lig_sequence(ligatures, seq, name)
print 'Adding unused flag sequences'
# every flag sequence we don't have gets the missing flag glyph
for first in regional_names:
for second in regional_names:
seq = (first, second)
if seq not in ligatures:
add_lig_sequence(ligatures, seq, UNKNOWN_FLAG_GLYPH_NAME)
keyed_ligatures = collections.defaultdict(list)
for k, v in ligatures.iteritems():

BIN
third_party/region-flags/IC.png vendored 100644

Binary file not shown.

After

Width:  |  Height:  |  Size: 356 B

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 140 KiB

After

Width:  |  Height:  |  Size: 250 B