Merge pull request #299 from kabisa/feature/fontlinter

pull/312/head
Dave Crossland 2020-06-25 10:02:56 -04:00 committed by GitHub
commit f8131fc457
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 70 additions and 90 deletions

View File

@ -38,6 +38,8 @@ VS_ADDER = add_vs_cmap.py # from nototools
EMOJI_SRC_DIR ?= png/128 EMOJI_SRC_DIR ?= png/128
FLAGS_SRC_DIR := third_party/region-flags/png FLAGS_SRC_DIR := third_party/region-flags/png
SEQUENCE_CHECK_PY = check_emoji_sequences.py
BUILD_DIR := build BUILD_DIR := build
EMOJI_DIR := $(BUILD_DIR)/emoji EMOJI_DIR := $(BUILD_DIR)/emoji
FLAGS_DIR := $(BUILD_DIR)/flags FLAGS_DIR := $(BUILD_DIR)/flags
@ -204,14 +206,22 @@ $(COMPRESSED_DIR)/%.png: $(QUANTIZED_DIR)/%.png | check_tools $(COMPRESSED_DIR)
@rm -f "$@" @rm -f "$@"
ttx "$<" ttx "$<"
$(EMOJI).ttf: $(EMOJI).tmpl.ttf $(EMOJI_BUILDER) $(PUA_ADDER) \ $(EMOJI).ttf: check_sequence $(EMOJI).tmpl.ttf $(EMOJI_BUILDER) $(PUA_ADDER) \
$(ALL_COMPRESSED_FILES) | check_tools $(ALL_COMPRESSED_FILES) | check_tools
@$(PYTHON) $(EMOJI_BUILDER) $(SMALL_METRICS) -V $< "$@" "$(COMPRESSED_DIR)/emoji_u" @$(PYTHON) $(EMOJI_BUILDER) $(SMALL_METRICS) -V $< "$@" "$(COMPRESSED_DIR)/emoji_u"
@$(PYTHON) $(PUA_ADDER) "$@" "$@-with-pua" @$(PYTHON) $(PUA_ADDER) "$@" "$@-with-pua"
@$(VS_ADDER) -vs 2640 2642 2695 --dstdir '.' -o "$@-with-pua-varsel" "$@-with-pua" @$(VS_ADDER) -vs 2640 2642 2695 --dstdir '.' -o "$@-with-pua-varsel" "$@-with-pua"
@mv "$@-with-pua-varsel" "$@" @mv "$@-with-pua-varsel" "$@"
@rm "$@-with-pua" @rm "$@-with-pua"
check_sequence:
ifdef BYPASS_SEQUENCE_CHECK
@echo Bypassing the emoji sequence checks
else
$(PYTHON) $(SEQUENCE_CHECK_PY) -d $(EMOJI_SRC_DIR) -c
endif
clean: clean:
rm -f $(EMOJI).ttf $(EMOJI).tmpl.ttf $(EMOJI).tmpl.ttx rm -f $(EMOJI).ttf $(EMOJI).tmpl.ttf $(EMOJI).tmpl.ttx
rm -f waveflag rm -f waveflag

View File

@ -14,7 +14,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Compare emoji image file namings against unicode property data.""" """Compare emoji image file namings against unicode property data.
The intent of this script is to check if the resulting font will pass
the Android linter:
https://android.googlesource.com/platform/frameworks/base/+/master/tools/fonts/fontchain_linter.py
"""
from __future__ import print_function from __future__ import print_function
import argparse import argparse
@ -51,7 +55,7 @@ def seq_name(seq):
def strip_vs_map(seq_map): def strip_vs_map(seq_map):
return { return {
unicode_data.strip_emoji_vs(k): v unicode_data.strip_emoji_vs(k): v
for k, v in seq_map.iteritems()} for k, v in seq_map.items()}
_namedata = [ _namedata = [
strip_vs_map(unicode_data.get_emoji_combining_sequences()), strip_vs_map(unicode_data.get_emoji_combining_sequences()),
strip_vs_map(unicode_data.get_emoji_flag_sequences()), strip_vs_map(unicode_data.get_emoji_flag_sequences()),
@ -76,9 +80,9 @@ def seq_name(seq):
def _check_no_vs(sorted_seq_to_filepath): def _check_no_vs(sorted_seq_to_filepath):
"""Our image data does not use emoji presentation variation selectors.""" """Our image data does not use emoji presentation variation selectors."""
for seq, fp in sorted_seq_to_filepath.iteritems(): for seq, fp in sorted_seq_to_filepath.items():
if EMOJI_VS in seq: if EMOJI_VS in seq:
print('check no VS: FE0F in path: %s' % fp) print(f'check no VS: {EMOJI_VS} in path: {fp}')
def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version): def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version):
@ -99,7 +103,7 @@ def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version):
valid_cps |= TAG_SET # used in subregion tag sequences valid_cps |= TAG_SET # used in subregion tag sequences
not_emoji = {} not_emoji = {}
for seq, fp in sorted_seq_to_filepath.iteritems(): for seq, fp in sorted_seq_to_filepath.items():
for cp in seq: for cp in seq:
if cp not in valid_cps: if cp not in valid_cps:
if cp not in not_emoji: if cp not in not_emoji:
@ -108,48 +112,46 @@ def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version):
if len(not_emoji): if len(not_emoji):
print( print(
'check valid emoji cps: %d non-emoji cp found' % len(not_emoji), f'check valid emoji cps: {len(not_emoji)} non-emoji cp found', file=sys.stderr)
file=sys.stderr)
for cp in sorted(not_emoji): for cp in sorted(not_emoji):
fps = not_emoji[cp] fps = not_emoji[cp]
print( print(
'check valid emoji cps: %04x (in %d sequences)' % (cp, len(fps)), f'check valid emoji cps: {cp} (in {len(fps)} sequences)', file=sys.stderr)
file=sys.stderr)
def _check_zwj(sorted_seq_to_filepath): def _check_zwj(sorted_seq_to_filepath):
"""Ensure zwj is only between two appropriate emoji. This is a 'pre-check' """Ensure zwj is only between two appropriate emoji. This is a 'pre-check'
that reports this specific problem.""" that reports this specific problem."""
for seq, fp in sorted_seq_to_filepath.iteritems(): for seq, fp in sorted_seq_to_filepath.items():
if ZWJ not in seq: if ZWJ not in seq:
continue continue
if seq[0] == ZWJ: if seq[0] == ZWJ:
print('check zwj: zwj at head of sequence in %s' % fp, file=sys.stderr) print(f'check zwj: zwj at head of sequence in {fp}', file=sys.stderr)
if len(seq) == 1: if len(seq) == 1:
continue continue
if seq[-1] == ZWJ: if seq[-1] == ZWJ:
print('check zwj: zwj at end of sequence in %s' % fp, file=sys.stderr) print(f'check zwj: zwj at end of sequence in {fp}', file=sys.stderr)
for i, cp in enumerate(seq): for i, cp in enumerate(seq):
if cp == ZWJ: if cp == ZWJ:
if i > 0: if i > 0:
pcp = seq[i-1] pcp = seq[i-1]
if pcp != EMOJI_VS and not unicode_data.is_emoji(pcp): if pcp != EMOJI_VS and not unicode_data.is_emoji(pcp):
print( print(
'check zwj: non-emoji %04x preceeds ZWJ in %s' % (pcp, fp), f'check zwj: non-emoji {pcp} preceeds ZWJ in {fp}',
file=sys.stderr) file=sys.stderr)
if i < len(seq) - 1: if i < len(seq) - 1:
fcp = seq[i+1] fcp = seq[i+1]
if not unicode_data.is_emoji(fcp): if not unicode_data.is_emoji(fcp):
print( print(
'check zwj: non-emoji %04x follows ZWJ in %s' % (fcp, fp), f'check zwj: non-emoji {fcp} follows ZWJ in {fp}',
file=sys.stderr) file=sys.stderr)
def _check_flags(sorted_seq_to_filepath): def _check_flags(sorted_seq_to_filepath):
"""Ensure regional indicators are only in sequences of one or two, and """Ensure regional indicators are only in sequences of one or two, and
never mixed.""" never mixed."""
for seq, fp in sorted_seq_to_filepath.iteritems(): for seq, fp in sorted_seq_to_filepath.items():
have_reg = None have_reg = None
for cp in seq: for cp in seq:
is_reg = unicode_data.is_regional_indicator(cp) is_reg = unicode_data.is_regional_indicator(cp)
@ -157,13 +159,13 @@ def _check_flags(sorted_seq_to_filepath):
have_reg = is_reg have_reg = is_reg
elif have_reg != is_reg: elif have_reg != is_reg:
print( print(
'check flags: mix of regional and non-regional in %s' % fp, f'check flags: mix of regional and non-regional in {fp}',
file=sys.stderr) file=sys.stderr)
if have_reg and len(seq) > 2: if have_reg and len(seq) > 2:
# We provide dummy glyphs for regional indicators, so there are sequences # We provide dummy glyphs for regional indicators, so there are sequences
# with single regional indicator symbols, the len check handles this. # with single regional indicator symbols, the len check handles this.
print( print(
'check flags: regional indicator sequence length != 2 in %s' % fp, f'check flags: regional indicator sequence length != 2 in {fp}',
file=sys.stderr) file=sys.stderr)
def _check_tags(sorted_seq_to_filepath): def _check_tags(sorted_seq_to_filepath):
@ -173,19 +175,19 @@ def _check_tags(sorted_seq_to_filepath):
BLACK_FLAG = 0x1f3f4 BLACK_FLAG = 0x1f3f4
BLACK_FLAG_SET = set([BLACK_FLAG]) BLACK_FLAG_SET = set([BLACK_FLAG])
for seq, fp in sorted_seq_to_filepath.iteritems(): for seq, fp in sorted_seq_to_filepath.items():
seq_set = set(cp for cp in seq) seq_set = set(cp for cp in seq)
overlap_set = seq_set & TAG_SET overlap_set = seq_set & TAG_SET
if not overlap_set: if not overlap_set:
continue continue
if seq[0] != BLACK_FLAG: if seq[0] != BLACK_FLAG:
print('check tags: bad start tag in %s' % fp) print(f'check tags: bad start tag in {fp}')
elif seq[-1] != END_TAG: elif seq[-1] != END_TAG:
print('check tags: bad end tag in %s' % fp) print(f'check tags: bad end tag in {fp}')
elif len(seq) < 4: elif len(seq) < 4:
print('check tags: sequence too short in %s' % fp) print(f'check tags: sequence too short in {fp}')
elif seq_set - TAG_SET != BLACK_FLAG_SET: elif seq_set - TAG_SET != BLACK_FLAG_SET:
print('check tags: non-tag items in %s' % fp) print(f'check tags: non-tag items in {fp}')
def _check_skintone(sorted_seq_to_filepath): def _check_skintone(sorted_seq_to_filepath):
@ -193,27 +195,27 @@ def _check_skintone(sorted_seq_to_filepath):
to take them. May appear standalone, though. Also check that emoji that take to take them. May appear standalone, though. Also check that emoji that take
skin tone modifiers have a complete set.""" skin tone modifiers have a complete set."""
base_to_modifiers = collections.defaultdict(set) base_to_modifiers = collections.defaultdict(set)
for seq, fp in sorted_seq_to_filepath.iteritems(): for seq, fp in sorted_seq_to_filepath.items():
for i, cp in enumerate(seq): for i, cp in enumerate(seq):
if unicode_data.is_skintone_modifier(cp): if unicode_data.is_skintone_modifier(cp):
if i == 0: if i == 0:
if len(seq) > 1: if len(seq) > 1:
print( print(
'check skintone: skin color selector first in sequence %s' % fp, f'check skintone: skin color selector first in sequence {fp}',
file=sys.stderr) file=sys.stderr)
# standalone are ok # standalone are ok
continue continue
pcp = seq[i-1] pcp = seq[i-1]
if not unicode_data.is_emoji_modifier_base(pcp): if not unicode_data.is_emoji_modifier_base(pcp):
print( print(
'check skintone: emoji skintone modifier applied to non-base ' + f'check skintone: emoji skintone modifier applied to non-base at {i}: {fp}',
'at %d: %s' % (i, fp), file=sys.stderr) file=sys.stderr)
else: else:
if pcp not in base_to_modifiers: if pcp not in base_to_modifiers:
base_to_modifiers[pcp] = set() base_to_modifiers[pcp] = set()
base_to_modifiers[pcp].add(cp) base_to_modifiers[pcp].add(cp)
for cp, modifiers in sorted(base_to_modifiers.iteritems()): for cp, modifiers in sorted(base_to_modifiers.items()):
if len(modifiers) != 5: if len(modifiers) != 5:
print( print(
'check skintone: base %04x has %d modifiers defined (%s) in %s' % ( 'check skintone: base %04x has %d modifiers defined (%s) in %s' % (
@ -224,27 +226,28 @@ def _check_skintone(sorted_seq_to_filepath):
def _check_zwj_sequences(sorted_seq_to_filepath, unicode_version): def _check_zwj_sequences(sorted_seq_to_filepath, unicode_version):
"""Verify that zwj sequences are valid for the given unicode version.""" """Verify that zwj sequences are valid for the given unicode version."""
for seq, fp in sorted_seq_to_filepath.iteritems(): for seq, fp in sorted_seq_to_filepath.items():
if ZWJ not in seq: if ZWJ not in seq:
continue continue
age = unicode_data.get_emoji_sequence_age(seq) age = unicode_data.get_emoji_sequence_age(seq)
if age is None or unicode_version is not None and age > unicode_version: if age is None or unicode_version is not None and age > unicode_version:
print('check zwj sequences: undefined sequence %s' % fp) print(f'check zwj sequences: undefined sequence {fp}')
def _check_no_alias_sources(sorted_seq_to_filepath): def _check_no_alias_sources(sorted_seq_to_filepath):
"""Check that we don't have sequences that we expect to be aliased to """Check that we don't have sequences that we expect to be aliased to
some other sequence.""" some other sequence."""
aliases = add_aliases.read_default_emoji_aliases() aliases = add_aliases.read_default_emoji_aliases()
for seq, fp in sorted_seq_to_filepath.iteritems(): for seq, fp in sorted_seq_to_filepath.items():
if seq in aliases: if seq in aliases:
print('check no alias sources: aliased sequence %s' % fp) print(f'check no alias sources: aliased sequence {fp}')
def _check_coverage(seq_to_filepath, unicode_version): def _check_coverage(seq_to_filepath, unicode_version):
"""Ensure we have all and only the cps and sequences that we need for the """Ensure we have all and only the cps and sequences that we need for the
font as of this version.""" font as of this version."""
coverage_pass = True
age = unicode_version age = unicode_version
non_vs_to_canonical = {} non_vs_to_canonical = {}
@ -258,85 +261,53 @@ def _check_coverage(seq_to_filepath, unicode_version):
if v not in seq_to_filepath and v not in non_vs_to_canonical: if v not in seq_to_filepath and v not in non_vs_to_canonical:
alias_str = unicode_data.seq_to_string(k) alias_str = unicode_data.seq_to_string(k)
target_str = unicode_data.seq_to_string(v) target_str = unicode_data.seq_to_string(v)
print('coverage: alias %s missing target %s' % (alias_str, target_str)) print(f'coverage: alias {alias_str} missing target {target_str}')
coverage_pass = False
continue continue
if k in seq_to_filepath or k in non_vs_to_canonical: if k in seq_to_filepath or k in non_vs_to_canonical:
alias_str = unicode_data.seq_to_string(k) alias_str = unicode_data.seq_to_string(k)
target_str = unicode_data.seq_to_string(v) target_str = unicode_data.seq_to_string(v)
print('coverage: alias %s already exists as %s (%s)' % ( print(f'coverage: alias {alias_str} already exists as {target_str} ({seq_name(v)})')
alias_str, target_str, seq_name(v))) coverage_pass = False
continue continue
filename = seq_to_filepath.get(v) or seq_to_filepath[non_vs_to_canonical[v]] filename = seq_to_filepath.get(v) or seq_to_filepath[non_vs_to_canonical[v]]
seq_to_filepath[k] = 'alias:' + filename seq_to_filepath[k] = 'alias:' + filename
# check single emoji, this includes most of the special chars # check single emoji, this includes most of the special chars
emoji = sorted(unicode_data.get_emoji(age=age)) emoji = sorted(unicode_data.get_emoji())
for cp in emoji: for cp in emoji:
if tuple([cp]) not in seq_to_filepath: if tuple([cp]) not in seq_to_filepath:
print( print(
'coverage: missing single %04x (%s)' % ( f'coverage: missing single {cp} ({unicode_data.name(cp)})')
cp, unicode_data.name(cp, '<no name>'))) coverage_pass = False
# special characters # special characters
# all but combining enclosing keycap are currently marked as emoji # all but combining enclosing keycap are currently marked as emoji
for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a): for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + list(range(0x30, 0x3a)):
if cp not in emoji and tuple([cp]) not in seq_to_filepath: if cp not in emoji and tuple([cp]) not in seq_to_filepath:
print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp))) print(f'coverage: missing special {cp} ({unicode_data.name(cp)})')
coverage_pass = False
# combining sequences # combining sequences
comb_seq_to_name = sorted( comb_seq_to_name = sorted(
unicode_data.get_emoji_combining_sequences(age=age).iteritems()) unicode_data._emoji_sequence_data.items())
for seq, name in comb_seq_to_name: for seq, name in comb_seq_to_name:
if seq not in seq_to_filepath: if seq not in seq_to_filepath:
# strip vs and try again # strip vs and try again
non_vs_seq = unicode_data.strip_emoji_vs(seq) non_vs_seq = unicode_data.strip_emoji_vs(seq)
if non_vs_seq not in seq_to_filepath: if non_vs_seq not in seq_to_filepath:
print('coverage: missing combining sequence %s (%s)' % print(f'coverage: missing combining sequence {unicode_data.seq_to_string(seq)} ({name})')
(unicode_data.seq_to_string(seq), name)) coverage_pass = False
# flag sequences
flag_seq_to_name = sorted(
unicode_data.get_emoji_flag_sequences(age=age).iteritems())
for seq, name in flag_seq_to_name:
if seq not in seq_to_filepath:
print('coverage: missing flag sequence %s (%s)' %
(unicode_data.seq_to_string(seq), name))
# skin tone modifier sequences
mod_seq_to_name = sorted(
unicode_data.get_emoji_modifier_sequences(age=age).iteritems())
for seq, name in mod_seq_to_name:
if seq not in seq_to_filepath:
print('coverage: missing modifier sequence %s (%s)' % (
unicode_data.seq_to_string(seq), name))
# zwj sequences
# some of ours include the emoji presentation variation selector and some
# don't, and the same is true for the canonical sequences. normalize all
# of them to omit it to test coverage, but report the canonical sequence.
zwj_seq_without_vs = set()
for seq in seq_to_filepath:
if ZWJ not in seq:
continue
if EMOJI_VS in seq:
seq = tuple(cp for cp in seq if cp != EMOJI_VS)
zwj_seq_without_vs.add(seq)
for seq, name in sorted(
unicode_data.get_emoji_zwj_sequences(age=age).iteritems()):
if EMOJI_VS in seq:
test_seq = tuple(s for s in seq if s != EMOJI_VS)
else:
test_seq = seq
if test_seq not in zwj_seq_without_vs:
print('coverage: missing (canonical) zwj sequence %s (%s)' % (
unicode_data.seq_to_string(seq), name))
# check for 'unknown flag' # check for 'unknown flag'
# this is either emoji_ufe82b or 'unknown_flag', but we filter out things that # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
# don't start with our prefix so 'unknown_flag' would be excluded by default. # don't start with our prefix so 'unknown_flag' would be excluded by default.
if tuple([0xfe82b]) not in seq_to_filepath: if tuple([0xfe82b]) not in seq_to_filepath:
print('coverage: missing unknown flag PUA fe82b') print('coverage: missing unknown flag PUA fe82b')
coverage_pass = False
if not coverage_pass:
exit("Please fix the problems metioned above or run: make BYPASS_SEQUENCE_CHECK='True'")
def check_sequence_to_filepath(seq_to_filepath, unicode_version, coverage): def check_sequence_to_filepath(seq_to_filepath, unicode_version, coverage):
@ -360,9 +331,9 @@ def create_sequence_to_filepath(name_to_dirpath, prefix, suffix):
of a name to stderr.""" of a name to stderr."""
segment_re = re.compile(r'^[0-9a-f]{4,6}$') segment_re = re.compile(r'^[0-9a-f]{4,6}$')
result = {} result = {}
for name, dirname in name_to_dirpath.iteritems(): for name, dirname in name_to_dirpath.items():
if not name.startswith(prefix): if not name.startswith(prefix):
print('expected prefix "%s" for "%s"' % (prefix, name)) print(f'expected prefix "{prefix}" for "{name}"')
continue continue
segments = name[len(prefix): -len(suffix)].split('_') segments = name[len(prefix): -len(suffix)].split('_')
@ -370,12 +341,12 @@ def create_sequence_to_filepath(name_to_dirpath, prefix, suffix):
seq = [] seq = []
for s in segments: for s in segments:
if not segment_re.match(s): if not segment_re.match(s):
print('bad codepoint name "%s" in %s/%s' % (s, dirname, name)) print(f'bad codepoint name "{s}" in {dirname}/{name}')
segfail = True segfail = True
continue continue
n = int(s, 16) n = int(s, 16)
if n > 0x10ffff: if n > 0x10ffff:
print('codepoint "%s" out of range in %s/%s' % (s, dirname, name)) print(f'codepoint "{s}" out of range in {dirname}/{name}')
segfail = True segfail = True
continue continue
seq.append(n) seq.append(n)
@ -422,15 +393,14 @@ def run_check(dirs, prefix, suffix, exclude, unicode_version, coverage):
msg = '' msg = ''
if unicode_version: if unicode_version:
msg = ' (%3.1f)' % unicode_version msg = ' (%3.1f)' % unicode_version
print('Checking files with prefix "%s" and suffix "%s"%s in:\n %s' % ( print(f'Checking files with prefix "{prefix}" and suffix "{suffix}"{msg} in: {dirs}')
prefix, suffix, msg, '\n '.join(dirs)))
name_to_dirpath = collect_name_to_dirpath_with_override( name_to_dirpath = collect_name_to_dirpath_with_override(
dirs, prefix=prefix, suffix=suffix, exclude=exclude) dirs, prefix=prefix, suffix=suffix, exclude=exclude)
print('checking %d names' % len(name_to_dirpath)) print(f'checking {len(name_to_dirpath)} names')
seq_to_filepath = create_sequence_to_filepath(name_to_dirpath, prefix, suffix) seq_to_filepath = create_sequence_to_filepath(name_to_dirpath, prefix, suffix)
print('checking %d sequences' % len(seq_to_filepath)) print(f'checking {len(seq_to_filepath)} sequences')
check_sequence_to_filepath(seq_to_filepath, unicode_version, coverage) check_sequence_to_filepath(seq_to_filepath, unicode_version, coverage)
print('done.') print('done running checks')
def main(): def main():