diff --git a/Makefile b/Makefile index 9760d6721..ca6b7e34b 100644 --- a/Makefile +++ b/Makefile @@ -38,6 +38,8 @@ VS_ADDER = add_vs_cmap.py # from nototools EMOJI_SRC_DIR ?= png/128 FLAGS_SRC_DIR := third_party/region-flags/png +SEQUENCE_CHECK_PY = check_emoji_sequences.py + BUILD_DIR := build EMOJI_DIR := $(BUILD_DIR)/emoji FLAGS_DIR := $(BUILD_DIR)/flags @@ -204,14 +206,22 @@ $(COMPRESSED_DIR)/%.png: $(QUANTIZED_DIR)/%.png | check_tools $(COMPRESSED_DIR) @rm -f "$@" ttx "$<" -$(EMOJI).ttf: $(EMOJI).tmpl.ttf $(EMOJI_BUILDER) $(PUA_ADDER) \ +$(EMOJI).ttf: check_sequence $(EMOJI).tmpl.ttf $(EMOJI_BUILDER) $(PUA_ADDER) \ $(ALL_COMPRESSED_FILES) | check_tools + @$(PYTHON) $(EMOJI_BUILDER) $(SMALL_METRICS) -V $< "$@" "$(COMPRESSED_DIR)/emoji_u" @$(PYTHON) $(PUA_ADDER) "$@" "$@-with-pua" @$(VS_ADDER) -vs 2640 2642 2695 --dstdir '.' -o "$@-with-pua-varsel" "$@-with-pua" @mv "$@-with-pua-varsel" "$@" @rm "$@-with-pua" +check_sequence: +ifdef BYPASS_SEQUENCE_CHECK + @echo Bypassing the emoji sequence checks +else + $(PYTHON) $(SEQUENCE_CHECK_PY) -d $(EMOJI_SRC_DIR) -c +endif + clean: rm -f $(EMOJI).ttf $(EMOJI).tmpl.ttf $(EMOJI).tmpl.ttx rm -f waveflag diff --git a/check_emoji_sequences.py b/check_emoji_sequences.py index dff1b8657..3405f2e62 100755 --- a/check_emoji_sequences.py +++ b/check_emoji_sequences.py @@ -14,7 +14,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Compare emoji image file namings against unicode property data.""" +"""Compare emoji image file namings against unicode property data. +The intent of this script is to check if the resulting font will pass +the Android linter: +https://android.googlesource.com/platform/frameworks/base/+/master/tools/fonts/fontchain_linter.py +""" from __future__ import print_function import argparse @@ -51,7 +55,7 @@ def seq_name(seq): def strip_vs_map(seq_map): return { unicode_data.strip_emoji_vs(k): v - for k, v in seq_map.iteritems()} + for k, v in seq_map.items()} _namedata = [ strip_vs_map(unicode_data.get_emoji_combining_sequences()), strip_vs_map(unicode_data.get_emoji_flag_sequences()), @@ -76,9 +80,9 @@ def seq_name(seq): def _check_no_vs(sorted_seq_to_filepath): """Our image data does not use emoji presentation variation selectors.""" - for seq, fp in sorted_seq_to_filepath.iteritems(): + for seq, fp in sorted_seq_to_filepath.items(): if EMOJI_VS in seq: - print('check no VS: FE0F in path: %s' % fp) + print(f'check no VS: {EMOJI_VS} in path: {fp}') def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version): @@ -99,7 +103,7 @@ def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version): valid_cps |= TAG_SET # used in subregion tag sequences not_emoji = {} - for seq, fp in sorted_seq_to_filepath.iteritems(): + for seq, fp in sorted_seq_to_filepath.items(): for cp in seq: if cp not in valid_cps: if cp not in not_emoji: @@ -108,48 +112,46 @@ def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version): if len(not_emoji): print( - 'check valid emoji cps: %d non-emoji cp found' % len(not_emoji), - file=sys.stderr) + f'check valid emoji cps: {len(not_emoji)} non-emoji cp found', file=sys.stderr) for cp in sorted(not_emoji): fps = not_emoji[cp] print( - 'check valid emoji cps: %04x (in %d sequences)' % (cp, len(fps)), - file=sys.stderr) + f'check valid emoji cps: {cp} (in {len(fps)} sequences)', file=sys.stderr) def _check_zwj(sorted_seq_to_filepath): """Ensure zwj is only between two appropriate emoji. This is a 'pre-check' that reports this specific problem.""" - for seq, fp in sorted_seq_to_filepath.iteritems(): + for seq, fp in sorted_seq_to_filepath.items(): if ZWJ not in seq: continue if seq[0] == ZWJ: - print('check zwj: zwj at head of sequence in %s' % fp, file=sys.stderr) + print(f'check zwj: zwj at head of sequence in {fp}', file=sys.stderr) if len(seq) == 1: continue if seq[-1] == ZWJ: - print('check zwj: zwj at end of sequence in %s' % fp, file=sys.stderr) + print(f'check zwj: zwj at end of sequence in {fp}', file=sys.stderr) for i, cp in enumerate(seq): if cp == ZWJ: if i > 0: pcp = seq[i-1] if pcp != EMOJI_VS and not unicode_data.is_emoji(pcp): print( - 'check zwj: non-emoji %04x preceeds ZWJ in %s' % (pcp, fp), + f'check zwj: non-emoji {pcp} preceeds ZWJ in {fp}', file=sys.stderr) if i < len(seq) - 1: fcp = seq[i+1] if not unicode_data.is_emoji(fcp): print( - 'check zwj: non-emoji %04x follows ZWJ in %s' % (fcp, fp), + f'check zwj: non-emoji {fcp} follows ZWJ in {fp}', file=sys.stderr) def _check_flags(sorted_seq_to_filepath): """Ensure regional indicators are only in sequences of one or two, and never mixed.""" - for seq, fp in sorted_seq_to_filepath.iteritems(): + for seq, fp in sorted_seq_to_filepath.items(): have_reg = None for cp in seq: is_reg = unicode_data.is_regional_indicator(cp) @@ -157,13 +159,13 @@ def _check_flags(sorted_seq_to_filepath): have_reg = is_reg elif have_reg != is_reg: print( - 'check flags: mix of regional and non-regional in %s' % fp, + f'check flags: mix of regional and non-regional in {fp}', file=sys.stderr) if have_reg and len(seq) > 2: # We provide dummy glyphs for regional indicators, so there are sequences # with single regional indicator symbols, the len check handles this. print( - 'check flags: regional indicator sequence length != 2 in %s' % fp, + f'check flags: regional indicator sequence length != 2 in {fp}', file=sys.stderr) def _check_tags(sorted_seq_to_filepath): @@ -173,19 +175,19 @@ def _check_tags(sorted_seq_to_filepath): BLACK_FLAG = 0x1f3f4 BLACK_FLAG_SET = set([BLACK_FLAG]) - for seq, fp in sorted_seq_to_filepath.iteritems(): + for seq, fp in sorted_seq_to_filepath.items(): seq_set = set(cp for cp in seq) overlap_set = seq_set & TAG_SET if not overlap_set: continue if seq[0] != BLACK_FLAG: - print('check tags: bad start tag in %s' % fp) + print(f'check tags: bad start tag in {fp}') elif seq[-1] != END_TAG: - print('check tags: bad end tag in %s' % fp) + print(f'check tags: bad end tag in {fp}') elif len(seq) < 4: - print('check tags: sequence too short in %s' % fp) + print(f'check tags: sequence too short in {fp}') elif seq_set - TAG_SET != BLACK_FLAG_SET: - print('check tags: non-tag items in %s' % fp) + print(f'check tags: non-tag items in {fp}') def _check_skintone(sorted_seq_to_filepath): @@ -193,27 +195,27 @@ def _check_skintone(sorted_seq_to_filepath): to take them. May appear standalone, though. Also check that emoji that take skin tone modifiers have a complete set.""" base_to_modifiers = collections.defaultdict(set) - for seq, fp in sorted_seq_to_filepath.iteritems(): + for seq, fp in sorted_seq_to_filepath.items(): for i, cp in enumerate(seq): if unicode_data.is_skintone_modifier(cp): if i == 0: if len(seq) > 1: print( - 'check skintone: skin color selector first in sequence %s' % fp, + f'check skintone: skin color selector first in sequence {fp}', file=sys.stderr) # standalone are ok continue pcp = seq[i-1] if not unicode_data.is_emoji_modifier_base(pcp): print( - 'check skintone: emoji skintone modifier applied to non-base ' + - 'at %d: %s' % (i, fp), file=sys.stderr) + f'check skintone: emoji skintone modifier applied to non-base at {i}: {fp}', + file=sys.stderr) else: if pcp not in base_to_modifiers: base_to_modifiers[pcp] = set() base_to_modifiers[pcp].add(cp) - for cp, modifiers in sorted(base_to_modifiers.iteritems()): + for cp, modifiers in sorted(base_to_modifiers.items()): if len(modifiers) != 5: print( 'check skintone: base %04x has %d modifiers defined (%s) in %s' % ( @@ -224,27 +226,28 @@ def _check_skintone(sorted_seq_to_filepath): def _check_zwj_sequences(sorted_seq_to_filepath, unicode_version): """Verify that zwj sequences are valid for the given unicode version.""" - for seq, fp in sorted_seq_to_filepath.iteritems(): + for seq, fp in sorted_seq_to_filepath.items(): if ZWJ not in seq: continue age = unicode_data.get_emoji_sequence_age(seq) if age is None or unicode_version is not None and age > unicode_version: - print('check zwj sequences: undefined sequence %s' % fp) + print(f'check zwj sequences: undefined sequence {fp}') def _check_no_alias_sources(sorted_seq_to_filepath): """Check that we don't have sequences that we expect to be aliased to some other sequence.""" aliases = add_aliases.read_default_emoji_aliases() - for seq, fp in sorted_seq_to_filepath.iteritems(): + for seq, fp in sorted_seq_to_filepath.items(): if seq in aliases: - print('check no alias sources: aliased sequence %s' % fp) + print(f'check no alias sources: aliased sequence {fp}') def _check_coverage(seq_to_filepath, unicode_version): """Ensure we have all and only the cps and sequences that we need for the font as of this version.""" + coverage_pass = True age = unicode_version non_vs_to_canonical = {} @@ -258,85 +261,53 @@ def _check_coverage(seq_to_filepath, unicode_version): if v not in seq_to_filepath and v not in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) - print('coverage: alias %s missing target %s' % (alias_str, target_str)) + print(f'coverage: alias {alias_str} missing target {target_str}') + coverage_pass = False continue if k in seq_to_filepath or k in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) - print('coverage: alias %s already exists as %s (%s)' % ( - alias_str, target_str, seq_name(v))) + print(f'coverage: alias {alias_str} already exists as {target_str} ({seq_name(v)})') + coverage_pass = False continue filename = seq_to_filepath.get(v) or seq_to_filepath[non_vs_to_canonical[v]] seq_to_filepath[k] = 'alias:' + filename # check single emoji, this includes most of the special chars - emoji = sorted(unicode_data.get_emoji(age=age)) + emoji = sorted(unicode_data.get_emoji()) for cp in emoji: if tuple([cp]) not in seq_to_filepath: print( - 'coverage: missing single %04x (%s)' % ( - cp, unicode_data.name(cp, ''))) + f'coverage: missing single {cp} ({unicode_data.name(cp)})') + coverage_pass = False # special characters # all but combining enclosing keycap are currently marked as emoji - for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a): + for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + list(range(0x30, 0x3a)): if cp not in emoji and tuple([cp]) not in seq_to_filepath: - print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp))) + print(f'coverage: missing special {cp} ({unicode_data.name(cp)})') + coverage_pass = False # combining sequences comb_seq_to_name = sorted( - unicode_data.get_emoji_combining_sequences(age=age).iteritems()) + unicode_data._emoji_sequence_data.items()) for seq, name in comb_seq_to_name: if seq not in seq_to_filepath: # strip vs and try again non_vs_seq = unicode_data.strip_emoji_vs(seq) if non_vs_seq not in seq_to_filepath: - print('coverage: missing combining sequence %s (%s)' % - (unicode_data.seq_to_string(seq), name)) - - # flag sequences - flag_seq_to_name = sorted( - unicode_data.get_emoji_flag_sequences(age=age).iteritems()) - for seq, name in flag_seq_to_name: - if seq not in seq_to_filepath: - print('coverage: missing flag sequence %s (%s)' % - (unicode_data.seq_to_string(seq), name)) - - # skin tone modifier sequences - mod_seq_to_name = sorted( - unicode_data.get_emoji_modifier_sequences(age=age).iteritems()) - for seq, name in mod_seq_to_name: - if seq not in seq_to_filepath: - print('coverage: missing modifier sequence %s (%s)' % ( - unicode_data.seq_to_string(seq), name)) - - # zwj sequences - # some of ours include the emoji presentation variation selector and some - # don't, and the same is true for the canonical sequences. normalize all - # of them to omit it to test coverage, but report the canonical sequence. - zwj_seq_without_vs = set() - for seq in seq_to_filepath: - if ZWJ not in seq: - continue - if EMOJI_VS in seq: - seq = tuple(cp for cp in seq if cp != EMOJI_VS) - zwj_seq_without_vs.add(seq) - - for seq, name in sorted( - unicode_data.get_emoji_zwj_sequences(age=age).iteritems()): - if EMOJI_VS in seq: - test_seq = tuple(s for s in seq if s != EMOJI_VS) - else: - test_seq = seq - if test_seq not in zwj_seq_without_vs: - print('coverage: missing (canonical) zwj sequence %s (%s)' % ( - unicode_data.seq_to_string(seq), name)) + print(f'coverage: missing combining sequence {unicode_data.seq_to_string(seq)} ({name})') + coverage_pass = False # check for 'unknown flag' # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that # don't start with our prefix so 'unknown_flag' would be excluded by default. if tuple([0xfe82b]) not in seq_to_filepath: print('coverage: missing unknown flag PUA fe82b') + coverage_pass = False + + if not coverage_pass: + exit("Please fix the problems metioned above or run: make BYPASS_SEQUENCE_CHECK='True'") def check_sequence_to_filepath(seq_to_filepath, unicode_version, coverage): @@ -360,9 +331,9 @@ def create_sequence_to_filepath(name_to_dirpath, prefix, suffix): of a name to stderr.""" segment_re = re.compile(r'^[0-9a-f]{4,6}$') result = {} - for name, dirname in name_to_dirpath.iteritems(): + for name, dirname in name_to_dirpath.items(): if not name.startswith(prefix): - print('expected prefix "%s" for "%s"' % (prefix, name)) + print(f'expected prefix "{prefix}" for "{name}"') continue segments = name[len(prefix): -len(suffix)].split('_') @@ -370,12 +341,12 @@ def create_sequence_to_filepath(name_to_dirpath, prefix, suffix): seq = [] for s in segments: if not segment_re.match(s): - print('bad codepoint name "%s" in %s/%s' % (s, dirname, name)) + print(f'bad codepoint name "{s}" in {dirname}/{name}') segfail = True continue n = int(s, 16) if n > 0x10ffff: - print('codepoint "%s" out of range in %s/%s' % (s, dirname, name)) + print(f'codepoint "{s}" out of range in {dirname}/{name}') segfail = True continue seq.append(n) @@ -422,15 +393,14 @@ def run_check(dirs, prefix, suffix, exclude, unicode_version, coverage): msg = '' if unicode_version: msg = ' (%3.1f)' % unicode_version - print('Checking files with prefix "%s" and suffix "%s"%s in:\n %s' % ( - prefix, suffix, msg, '\n '.join(dirs))) + print(f'Checking files with prefix "{prefix}" and suffix "{suffix}"{msg} in: {dirs}') name_to_dirpath = collect_name_to_dirpath_with_override( dirs, prefix=prefix, suffix=suffix, exclude=exclude) - print('checking %d names' % len(name_to_dirpath)) + print(f'checking {len(name_to_dirpath)} names') seq_to_filepath = create_sequence_to_filepath(name_to_dirpath, prefix, suffix) - print('checking %d sequences' % len(seq_to_filepath)) + print(f'checking {len(seq_to_filepath)} sequences') check_sequence_to_filepath(seq_to_filepath, unicode_version, coverage) - print('done.') + print('done running checks') def main():