Add check for emoji sequence coverage.

pull/86/head
Doug Felt 2017-01-19 10:50:40 -08:00
parent 2a6c35a1bd
commit dd2244a9b8
1 changed files with 41 additions and 1 deletions

View File

@ -26,6 +26,9 @@ import sys
from nototools import unicode_data
ZWJ = 0x200d
EMOJI_VS = 0xfe0f
def _is_regional_indicator(cp):
return 0x1f1e6 <= cp <= 0x1f1ff
@ -128,11 +131,47 @@ def _check_skintone(sorted_seq_to_filepath):
base_to_modifiers[cp] = set()
for cp, modifiers in sorted(base_to_modifiers.iteritems()):
if len(modifiers) != 5:
print 'emoji base %04x has %d modifiers defined (%s) in %s' % (
print >> sys.stderr, 'emoji base %04x has %d modifiers defined (%s) in %s' % (
cp, len(modifiers),
', '.join('%04x' % cp for cp in sorted(modifiers)), fp)
def _check_zwj_sequences(seq_to_filepath):
"""Verify that zwj sequences are valid."""
zwj_sequence_to_type = unicode_data.get_emoji_zwj_sequences()
# strip emoji variant selectors and add these back in
zwj_sequence_without_vs_to_type_canonical = {}
for seq, seq_type in zwj_sequence_to_type.iteritems():
if EMOJI_VS in seq:
stripped_seq = tuple(s for s in seq if s != EMOJI_VS)
zwj_sequence_without_vs_to_type_canonical[stripped_seq] = (seq_type, seq)
zwj_seq_to_filepath = {
seq: fp for seq, fp in seq_to_filepath.iteritems()
if ZWJ in seq}
for seq, fp in zwj_seq_to_filepath.iteritems():
if seq not in zwj_sequence_to_type:
if seq not in zwj_sequence_without_vs_to_type_canonical:
print >> sys.stderr, 'zwj sequence not defined: %s' % fp
else:
_, can = zwj_sequence_without_vs_to_type_canonical[seq]
print >> sys.stderr, 'canonical sequence %s contains vs: %s' % (
_seq_string(can), fp)
# check that all zwj sequences are covered
for seq in zwj_seq_to_filepath:
if seq in zwj_sequence_to_type:
del zwj_sequence_to_type[seq]
elif seq in zwj_sequence_without_vs_to_type_canonical:
canon_seq = zwj_sequence_without_vs_to_type_canonical[seq][1]
del zwj_sequence_to_type[canon_seq]
if zwj_sequence_to_type:
print >> sys.stderr, 'missing %d zwj sequences' % len(zwj_sequence_to_type)
for seq, seq_type in sorted(zwj_sequence_to_type.items()):
print >> sys.stderr, ' %s: %s' % (_seq_string(seq), seq_type)
def check_sequence_to_filepath(seq_to_filepath):
sorted_seq_to_filepath = collections.OrderedDict(
sorted(seq_to_filepath.items()))
@ -140,6 +179,7 @@ def check_sequence_to_filepath(seq_to_filepath):
_check_zwj(sorted_seq_to_filepath)
_check_flags(sorted_seq_to_filepath)
_check_skintone(sorted_seq_to_filepath)
_check_zwj_sequences(sorted_seq_to_filepath)
def create_sequence_to_filepath(name_to_dirpath, prefix, suffix):