Merge pull request #299 from kabisa/feature/fontlinter
commit
f8131fc457
12
Makefile
12
Makefile
|
@ -38,6 +38,8 @@ VS_ADDER = add_vs_cmap.py # from nototools
|
||||||
EMOJI_SRC_DIR ?= png/128
|
EMOJI_SRC_DIR ?= png/128
|
||||||
FLAGS_SRC_DIR := third_party/region-flags/png
|
FLAGS_SRC_DIR := third_party/region-flags/png
|
||||||
|
|
||||||
|
SEQUENCE_CHECK_PY = check_emoji_sequences.py
|
||||||
|
|
||||||
BUILD_DIR := build
|
BUILD_DIR := build
|
||||||
EMOJI_DIR := $(BUILD_DIR)/emoji
|
EMOJI_DIR := $(BUILD_DIR)/emoji
|
||||||
FLAGS_DIR := $(BUILD_DIR)/flags
|
FLAGS_DIR := $(BUILD_DIR)/flags
|
||||||
|
@ -204,14 +206,22 @@ $(COMPRESSED_DIR)/%.png: $(QUANTIZED_DIR)/%.png | check_tools $(COMPRESSED_DIR)
|
||||||
@rm -f "$@"
|
@rm -f "$@"
|
||||||
ttx "$<"
|
ttx "$<"
|
||||||
|
|
||||||
$(EMOJI).ttf: $(EMOJI).tmpl.ttf $(EMOJI_BUILDER) $(PUA_ADDER) \
|
$(EMOJI).ttf: check_sequence $(EMOJI).tmpl.ttf $(EMOJI_BUILDER) $(PUA_ADDER) \
|
||||||
$(ALL_COMPRESSED_FILES) | check_tools
|
$(ALL_COMPRESSED_FILES) | check_tools
|
||||||
|
|
||||||
@$(PYTHON) $(EMOJI_BUILDER) $(SMALL_METRICS) -V $< "$@" "$(COMPRESSED_DIR)/emoji_u"
|
@$(PYTHON) $(EMOJI_BUILDER) $(SMALL_METRICS) -V $< "$@" "$(COMPRESSED_DIR)/emoji_u"
|
||||||
@$(PYTHON) $(PUA_ADDER) "$@" "$@-with-pua"
|
@$(PYTHON) $(PUA_ADDER) "$@" "$@-with-pua"
|
||||||
@$(VS_ADDER) -vs 2640 2642 2695 --dstdir '.' -o "$@-with-pua-varsel" "$@-with-pua"
|
@$(VS_ADDER) -vs 2640 2642 2695 --dstdir '.' -o "$@-with-pua-varsel" "$@-with-pua"
|
||||||
@mv "$@-with-pua-varsel" "$@"
|
@mv "$@-with-pua-varsel" "$@"
|
||||||
@rm "$@-with-pua"
|
@rm "$@-with-pua"
|
||||||
|
|
||||||
|
check_sequence:
|
||||||
|
ifdef BYPASS_SEQUENCE_CHECK
|
||||||
|
@echo Bypassing the emoji sequence checks
|
||||||
|
else
|
||||||
|
$(PYTHON) $(SEQUENCE_CHECK_PY) -d $(EMOJI_SRC_DIR) -c
|
||||||
|
endif
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f $(EMOJI).ttf $(EMOJI).tmpl.ttf $(EMOJI).tmpl.ttx
|
rm -f $(EMOJI).ttf $(EMOJI).tmpl.ttf $(EMOJI).tmpl.ttx
|
||||||
rm -f waveflag
|
rm -f waveflag
|
||||||
|
|
|
@ -14,7 +14,11 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
"""Compare emoji image file namings against unicode property data."""
|
"""Compare emoji image file namings against unicode property data.
|
||||||
|
The intent of this script is to check if the resulting font will pass
|
||||||
|
the Android linter:
|
||||||
|
https://android.googlesource.com/platform/frameworks/base/+/master/tools/fonts/fontchain_linter.py
|
||||||
|
"""
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
@ -51,7 +55,7 @@ def seq_name(seq):
|
||||||
def strip_vs_map(seq_map):
|
def strip_vs_map(seq_map):
|
||||||
return {
|
return {
|
||||||
unicode_data.strip_emoji_vs(k): v
|
unicode_data.strip_emoji_vs(k): v
|
||||||
for k, v in seq_map.iteritems()}
|
for k, v in seq_map.items()}
|
||||||
_namedata = [
|
_namedata = [
|
||||||
strip_vs_map(unicode_data.get_emoji_combining_sequences()),
|
strip_vs_map(unicode_data.get_emoji_combining_sequences()),
|
||||||
strip_vs_map(unicode_data.get_emoji_flag_sequences()),
|
strip_vs_map(unicode_data.get_emoji_flag_sequences()),
|
||||||
|
@ -76,9 +80,9 @@ def seq_name(seq):
|
||||||
|
|
||||||
def _check_no_vs(sorted_seq_to_filepath):
|
def _check_no_vs(sorted_seq_to_filepath):
|
||||||
"""Our image data does not use emoji presentation variation selectors."""
|
"""Our image data does not use emoji presentation variation selectors."""
|
||||||
for seq, fp in sorted_seq_to_filepath.iteritems():
|
for seq, fp in sorted_seq_to_filepath.items():
|
||||||
if EMOJI_VS in seq:
|
if EMOJI_VS in seq:
|
||||||
print('check no VS: FE0F in path: %s' % fp)
|
print(f'check no VS: {EMOJI_VS} in path: {fp}')
|
||||||
|
|
||||||
|
|
||||||
def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version):
|
def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version):
|
||||||
|
@ -99,7 +103,7 @@ def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version):
|
||||||
valid_cps |= TAG_SET # used in subregion tag sequences
|
valid_cps |= TAG_SET # used in subregion tag sequences
|
||||||
|
|
||||||
not_emoji = {}
|
not_emoji = {}
|
||||||
for seq, fp in sorted_seq_to_filepath.iteritems():
|
for seq, fp in sorted_seq_to_filepath.items():
|
||||||
for cp in seq:
|
for cp in seq:
|
||||||
if cp not in valid_cps:
|
if cp not in valid_cps:
|
||||||
if cp not in not_emoji:
|
if cp not in not_emoji:
|
||||||
|
@ -108,48 +112,46 @@ def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version):
|
||||||
|
|
||||||
if len(not_emoji):
|
if len(not_emoji):
|
||||||
print(
|
print(
|
||||||
'check valid emoji cps: %d non-emoji cp found' % len(not_emoji),
|
f'check valid emoji cps: {len(not_emoji)} non-emoji cp found', file=sys.stderr)
|
||||||
file=sys.stderr)
|
|
||||||
for cp in sorted(not_emoji):
|
for cp in sorted(not_emoji):
|
||||||
fps = not_emoji[cp]
|
fps = not_emoji[cp]
|
||||||
print(
|
print(
|
||||||
'check valid emoji cps: %04x (in %d sequences)' % (cp, len(fps)),
|
f'check valid emoji cps: {cp} (in {len(fps)} sequences)', file=sys.stderr)
|
||||||
file=sys.stderr)
|
|
||||||
|
|
||||||
|
|
||||||
def _check_zwj(sorted_seq_to_filepath):
|
def _check_zwj(sorted_seq_to_filepath):
|
||||||
"""Ensure zwj is only between two appropriate emoji. This is a 'pre-check'
|
"""Ensure zwj is only between two appropriate emoji. This is a 'pre-check'
|
||||||
that reports this specific problem."""
|
that reports this specific problem."""
|
||||||
|
|
||||||
for seq, fp in sorted_seq_to_filepath.iteritems():
|
for seq, fp in sorted_seq_to_filepath.items():
|
||||||
if ZWJ not in seq:
|
if ZWJ not in seq:
|
||||||
continue
|
continue
|
||||||
if seq[0] == ZWJ:
|
if seq[0] == ZWJ:
|
||||||
print('check zwj: zwj at head of sequence in %s' % fp, file=sys.stderr)
|
print(f'check zwj: zwj at head of sequence in {fp}', file=sys.stderr)
|
||||||
if len(seq) == 1:
|
if len(seq) == 1:
|
||||||
continue
|
continue
|
||||||
if seq[-1] == ZWJ:
|
if seq[-1] == ZWJ:
|
||||||
print('check zwj: zwj at end of sequence in %s' % fp, file=sys.stderr)
|
print(f'check zwj: zwj at end of sequence in {fp}', file=sys.stderr)
|
||||||
for i, cp in enumerate(seq):
|
for i, cp in enumerate(seq):
|
||||||
if cp == ZWJ:
|
if cp == ZWJ:
|
||||||
if i > 0:
|
if i > 0:
|
||||||
pcp = seq[i-1]
|
pcp = seq[i-1]
|
||||||
if pcp != EMOJI_VS and not unicode_data.is_emoji(pcp):
|
if pcp != EMOJI_VS and not unicode_data.is_emoji(pcp):
|
||||||
print(
|
print(
|
||||||
'check zwj: non-emoji %04x preceeds ZWJ in %s' % (pcp, fp),
|
f'check zwj: non-emoji {pcp} preceeds ZWJ in {fp}',
|
||||||
file=sys.stderr)
|
file=sys.stderr)
|
||||||
if i < len(seq) - 1:
|
if i < len(seq) - 1:
|
||||||
fcp = seq[i+1]
|
fcp = seq[i+1]
|
||||||
if not unicode_data.is_emoji(fcp):
|
if not unicode_data.is_emoji(fcp):
|
||||||
print(
|
print(
|
||||||
'check zwj: non-emoji %04x follows ZWJ in %s' % (fcp, fp),
|
f'check zwj: non-emoji {fcp} follows ZWJ in {fp}',
|
||||||
file=sys.stderr)
|
file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
def _check_flags(sorted_seq_to_filepath):
|
def _check_flags(sorted_seq_to_filepath):
|
||||||
"""Ensure regional indicators are only in sequences of one or two, and
|
"""Ensure regional indicators are only in sequences of one or two, and
|
||||||
never mixed."""
|
never mixed."""
|
||||||
for seq, fp in sorted_seq_to_filepath.iteritems():
|
for seq, fp in sorted_seq_to_filepath.items():
|
||||||
have_reg = None
|
have_reg = None
|
||||||
for cp in seq:
|
for cp in seq:
|
||||||
is_reg = unicode_data.is_regional_indicator(cp)
|
is_reg = unicode_data.is_regional_indicator(cp)
|
||||||
|
@ -157,13 +159,13 @@ def _check_flags(sorted_seq_to_filepath):
|
||||||
have_reg = is_reg
|
have_reg = is_reg
|
||||||
elif have_reg != is_reg:
|
elif have_reg != is_reg:
|
||||||
print(
|
print(
|
||||||
'check flags: mix of regional and non-regional in %s' % fp,
|
f'check flags: mix of regional and non-regional in {fp}',
|
||||||
file=sys.stderr)
|
file=sys.stderr)
|
||||||
if have_reg and len(seq) > 2:
|
if have_reg and len(seq) > 2:
|
||||||
# We provide dummy glyphs for regional indicators, so there are sequences
|
# We provide dummy glyphs for regional indicators, so there are sequences
|
||||||
# with single regional indicator symbols, the len check handles this.
|
# with single regional indicator symbols, the len check handles this.
|
||||||
print(
|
print(
|
||||||
'check flags: regional indicator sequence length != 2 in %s' % fp,
|
f'check flags: regional indicator sequence length != 2 in {fp}',
|
||||||
file=sys.stderr)
|
file=sys.stderr)
|
||||||
|
|
||||||
def _check_tags(sorted_seq_to_filepath):
|
def _check_tags(sorted_seq_to_filepath):
|
||||||
|
@ -173,19 +175,19 @@ def _check_tags(sorted_seq_to_filepath):
|
||||||
|
|
||||||
BLACK_FLAG = 0x1f3f4
|
BLACK_FLAG = 0x1f3f4
|
||||||
BLACK_FLAG_SET = set([BLACK_FLAG])
|
BLACK_FLAG_SET = set([BLACK_FLAG])
|
||||||
for seq, fp in sorted_seq_to_filepath.iteritems():
|
for seq, fp in sorted_seq_to_filepath.items():
|
||||||
seq_set = set(cp for cp in seq)
|
seq_set = set(cp for cp in seq)
|
||||||
overlap_set = seq_set & TAG_SET
|
overlap_set = seq_set & TAG_SET
|
||||||
if not overlap_set:
|
if not overlap_set:
|
||||||
continue
|
continue
|
||||||
if seq[0] != BLACK_FLAG:
|
if seq[0] != BLACK_FLAG:
|
||||||
print('check tags: bad start tag in %s' % fp)
|
print(f'check tags: bad start tag in {fp}')
|
||||||
elif seq[-1] != END_TAG:
|
elif seq[-1] != END_TAG:
|
||||||
print('check tags: bad end tag in %s' % fp)
|
print(f'check tags: bad end tag in {fp}')
|
||||||
elif len(seq) < 4:
|
elif len(seq) < 4:
|
||||||
print('check tags: sequence too short in %s' % fp)
|
print(f'check tags: sequence too short in {fp}')
|
||||||
elif seq_set - TAG_SET != BLACK_FLAG_SET:
|
elif seq_set - TAG_SET != BLACK_FLAG_SET:
|
||||||
print('check tags: non-tag items in %s' % fp)
|
print(f'check tags: non-tag items in {fp}')
|
||||||
|
|
||||||
|
|
||||||
def _check_skintone(sorted_seq_to_filepath):
|
def _check_skintone(sorted_seq_to_filepath):
|
||||||
|
@ -193,27 +195,27 @@ def _check_skintone(sorted_seq_to_filepath):
|
||||||
to take them. May appear standalone, though. Also check that emoji that take
|
to take them. May appear standalone, though. Also check that emoji that take
|
||||||
skin tone modifiers have a complete set."""
|
skin tone modifiers have a complete set."""
|
||||||
base_to_modifiers = collections.defaultdict(set)
|
base_to_modifiers = collections.defaultdict(set)
|
||||||
for seq, fp in sorted_seq_to_filepath.iteritems():
|
for seq, fp in sorted_seq_to_filepath.items():
|
||||||
for i, cp in enumerate(seq):
|
for i, cp in enumerate(seq):
|
||||||
if unicode_data.is_skintone_modifier(cp):
|
if unicode_data.is_skintone_modifier(cp):
|
||||||
if i == 0:
|
if i == 0:
|
||||||
if len(seq) > 1:
|
if len(seq) > 1:
|
||||||
print(
|
print(
|
||||||
'check skintone: skin color selector first in sequence %s' % fp,
|
f'check skintone: skin color selector first in sequence {fp}',
|
||||||
file=sys.stderr)
|
file=sys.stderr)
|
||||||
# standalone are ok
|
# standalone are ok
|
||||||
continue
|
continue
|
||||||
pcp = seq[i-1]
|
pcp = seq[i-1]
|
||||||
if not unicode_data.is_emoji_modifier_base(pcp):
|
if not unicode_data.is_emoji_modifier_base(pcp):
|
||||||
print(
|
print(
|
||||||
'check skintone: emoji skintone modifier applied to non-base ' +
|
f'check skintone: emoji skintone modifier applied to non-base at {i}: {fp}',
|
||||||
'at %d: %s' % (i, fp), file=sys.stderr)
|
file=sys.stderr)
|
||||||
else:
|
else:
|
||||||
if pcp not in base_to_modifiers:
|
if pcp not in base_to_modifiers:
|
||||||
base_to_modifiers[pcp] = set()
|
base_to_modifiers[pcp] = set()
|
||||||
base_to_modifiers[pcp].add(cp)
|
base_to_modifiers[pcp].add(cp)
|
||||||
|
|
||||||
for cp, modifiers in sorted(base_to_modifiers.iteritems()):
|
for cp, modifiers in sorted(base_to_modifiers.items()):
|
||||||
if len(modifiers) != 5:
|
if len(modifiers) != 5:
|
||||||
print(
|
print(
|
||||||
'check skintone: base %04x has %d modifiers defined (%s) in %s' % (
|
'check skintone: base %04x has %d modifiers defined (%s) in %s' % (
|
||||||
|
@ -224,27 +226,28 @@ def _check_skintone(sorted_seq_to_filepath):
|
||||||
|
|
||||||
def _check_zwj_sequences(sorted_seq_to_filepath, unicode_version):
|
def _check_zwj_sequences(sorted_seq_to_filepath, unicode_version):
|
||||||
"""Verify that zwj sequences are valid for the given unicode version."""
|
"""Verify that zwj sequences are valid for the given unicode version."""
|
||||||
for seq, fp in sorted_seq_to_filepath.iteritems():
|
for seq, fp in sorted_seq_to_filepath.items():
|
||||||
if ZWJ not in seq:
|
if ZWJ not in seq:
|
||||||
continue
|
continue
|
||||||
age = unicode_data.get_emoji_sequence_age(seq)
|
age = unicode_data.get_emoji_sequence_age(seq)
|
||||||
if age is None or unicode_version is not None and age > unicode_version:
|
if age is None or unicode_version is not None and age > unicode_version:
|
||||||
print('check zwj sequences: undefined sequence %s' % fp)
|
print(f'check zwj sequences: undefined sequence {fp}')
|
||||||
|
|
||||||
|
|
||||||
def _check_no_alias_sources(sorted_seq_to_filepath):
|
def _check_no_alias_sources(sorted_seq_to_filepath):
|
||||||
"""Check that we don't have sequences that we expect to be aliased to
|
"""Check that we don't have sequences that we expect to be aliased to
|
||||||
some other sequence."""
|
some other sequence."""
|
||||||
aliases = add_aliases.read_default_emoji_aliases()
|
aliases = add_aliases.read_default_emoji_aliases()
|
||||||
for seq, fp in sorted_seq_to_filepath.iteritems():
|
for seq, fp in sorted_seq_to_filepath.items():
|
||||||
if seq in aliases:
|
if seq in aliases:
|
||||||
print('check no alias sources: aliased sequence %s' % fp)
|
print(f'check no alias sources: aliased sequence {fp}')
|
||||||
|
|
||||||
|
|
||||||
def _check_coverage(seq_to_filepath, unicode_version):
|
def _check_coverage(seq_to_filepath, unicode_version):
|
||||||
"""Ensure we have all and only the cps and sequences that we need for the
|
"""Ensure we have all and only the cps and sequences that we need for the
|
||||||
font as of this version."""
|
font as of this version."""
|
||||||
|
|
||||||
|
coverage_pass = True
|
||||||
age = unicode_version
|
age = unicode_version
|
||||||
|
|
||||||
non_vs_to_canonical = {}
|
non_vs_to_canonical = {}
|
||||||
|
@ -258,85 +261,53 @@ def _check_coverage(seq_to_filepath, unicode_version):
|
||||||
if v not in seq_to_filepath and v not in non_vs_to_canonical:
|
if v not in seq_to_filepath and v not in non_vs_to_canonical:
|
||||||
alias_str = unicode_data.seq_to_string(k)
|
alias_str = unicode_data.seq_to_string(k)
|
||||||
target_str = unicode_data.seq_to_string(v)
|
target_str = unicode_data.seq_to_string(v)
|
||||||
print('coverage: alias %s missing target %s' % (alias_str, target_str))
|
print(f'coverage: alias {alias_str} missing target {target_str}')
|
||||||
|
coverage_pass = False
|
||||||
continue
|
continue
|
||||||
if k in seq_to_filepath or k in non_vs_to_canonical:
|
if k in seq_to_filepath or k in non_vs_to_canonical:
|
||||||
alias_str = unicode_data.seq_to_string(k)
|
alias_str = unicode_data.seq_to_string(k)
|
||||||
target_str = unicode_data.seq_to_string(v)
|
target_str = unicode_data.seq_to_string(v)
|
||||||
print('coverage: alias %s already exists as %s (%s)' % (
|
print(f'coverage: alias {alias_str} already exists as {target_str} ({seq_name(v)})')
|
||||||
alias_str, target_str, seq_name(v)))
|
coverage_pass = False
|
||||||
continue
|
continue
|
||||||
filename = seq_to_filepath.get(v) or seq_to_filepath[non_vs_to_canonical[v]]
|
filename = seq_to_filepath.get(v) or seq_to_filepath[non_vs_to_canonical[v]]
|
||||||
seq_to_filepath[k] = 'alias:' + filename
|
seq_to_filepath[k] = 'alias:' + filename
|
||||||
|
|
||||||
# check single emoji, this includes most of the special chars
|
# check single emoji, this includes most of the special chars
|
||||||
emoji = sorted(unicode_data.get_emoji(age=age))
|
emoji = sorted(unicode_data.get_emoji())
|
||||||
for cp in emoji:
|
for cp in emoji:
|
||||||
if tuple([cp]) not in seq_to_filepath:
|
if tuple([cp]) not in seq_to_filepath:
|
||||||
print(
|
print(
|
||||||
'coverage: missing single %04x (%s)' % (
|
f'coverage: missing single {cp} ({unicode_data.name(cp)})')
|
||||||
cp, unicode_data.name(cp, '<no name>')))
|
coverage_pass = False
|
||||||
|
|
||||||
# special characters
|
# special characters
|
||||||
# all but combining enclosing keycap are currently marked as emoji
|
# all but combining enclosing keycap are currently marked as emoji
|
||||||
for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a):
|
for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + list(range(0x30, 0x3a)):
|
||||||
if cp not in emoji and tuple([cp]) not in seq_to_filepath:
|
if cp not in emoji and tuple([cp]) not in seq_to_filepath:
|
||||||
print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp)))
|
print(f'coverage: missing special {cp} ({unicode_data.name(cp)})')
|
||||||
|
coverage_pass = False
|
||||||
|
|
||||||
# combining sequences
|
# combining sequences
|
||||||
comb_seq_to_name = sorted(
|
comb_seq_to_name = sorted(
|
||||||
unicode_data.get_emoji_combining_sequences(age=age).iteritems())
|
unicode_data._emoji_sequence_data.items())
|
||||||
for seq, name in comb_seq_to_name:
|
for seq, name in comb_seq_to_name:
|
||||||
if seq not in seq_to_filepath:
|
if seq not in seq_to_filepath:
|
||||||
# strip vs and try again
|
# strip vs and try again
|
||||||
non_vs_seq = unicode_data.strip_emoji_vs(seq)
|
non_vs_seq = unicode_data.strip_emoji_vs(seq)
|
||||||
if non_vs_seq not in seq_to_filepath:
|
if non_vs_seq not in seq_to_filepath:
|
||||||
print('coverage: missing combining sequence %s (%s)' %
|
print(f'coverage: missing combining sequence {unicode_data.seq_to_string(seq)} ({name})')
|
||||||
(unicode_data.seq_to_string(seq), name))
|
coverage_pass = False
|
||||||
|
|
||||||
# flag sequences
|
|
||||||
flag_seq_to_name = sorted(
|
|
||||||
unicode_data.get_emoji_flag_sequences(age=age).iteritems())
|
|
||||||
for seq, name in flag_seq_to_name:
|
|
||||||
if seq not in seq_to_filepath:
|
|
||||||
print('coverage: missing flag sequence %s (%s)' %
|
|
||||||
(unicode_data.seq_to_string(seq), name))
|
|
||||||
|
|
||||||
# skin tone modifier sequences
|
|
||||||
mod_seq_to_name = sorted(
|
|
||||||
unicode_data.get_emoji_modifier_sequences(age=age).iteritems())
|
|
||||||
for seq, name in mod_seq_to_name:
|
|
||||||
if seq not in seq_to_filepath:
|
|
||||||
print('coverage: missing modifier sequence %s (%s)' % (
|
|
||||||
unicode_data.seq_to_string(seq), name))
|
|
||||||
|
|
||||||
# zwj sequences
|
|
||||||
# some of ours include the emoji presentation variation selector and some
|
|
||||||
# don't, and the same is true for the canonical sequences. normalize all
|
|
||||||
# of them to omit it to test coverage, but report the canonical sequence.
|
|
||||||
zwj_seq_without_vs = set()
|
|
||||||
for seq in seq_to_filepath:
|
|
||||||
if ZWJ not in seq:
|
|
||||||
continue
|
|
||||||
if EMOJI_VS in seq:
|
|
||||||
seq = tuple(cp for cp in seq if cp != EMOJI_VS)
|
|
||||||
zwj_seq_without_vs.add(seq)
|
|
||||||
|
|
||||||
for seq, name in sorted(
|
|
||||||
unicode_data.get_emoji_zwj_sequences(age=age).iteritems()):
|
|
||||||
if EMOJI_VS in seq:
|
|
||||||
test_seq = tuple(s for s in seq if s != EMOJI_VS)
|
|
||||||
else:
|
|
||||||
test_seq = seq
|
|
||||||
if test_seq not in zwj_seq_without_vs:
|
|
||||||
print('coverage: missing (canonical) zwj sequence %s (%s)' % (
|
|
||||||
unicode_data.seq_to_string(seq), name))
|
|
||||||
|
|
||||||
# check for 'unknown flag'
|
# check for 'unknown flag'
|
||||||
# this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
|
# this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
|
||||||
# don't start with our prefix so 'unknown_flag' would be excluded by default.
|
# don't start with our prefix so 'unknown_flag' would be excluded by default.
|
||||||
if tuple([0xfe82b]) not in seq_to_filepath:
|
if tuple([0xfe82b]) not in seq_to_filepath:
|
||||||
print('coverage: missing unknown flag PUA fe82b')
|
print('coverage: missing unknown flag PUA fe82b')
|
||||||
|
coverage_pass = False
|
||||||
|
|
||||||
|
if not coverage_pass:
|
||||||
|
exit("Please fix the problems metioned above or run: make BYPASS_SEQUENCE_CHECK='True'")
|
||||||
|
|
||||||
|
|
||||||
def check_sequence_to_filepath(seq_to_filepath, unicode_version, coverage):
|
def check_sequence_to_filepath(seq_to_filepath, unicode_version, coverage):
|
||||||
|
@ -360,9 +331,9 @@ def create_sequence_to_filepath(name_to_dirpath, prefix, suffix):
|
||||||
of a name to stderr."""
|
of a name to stderr."""
|
||||||
segment_re = re.compile(r'^[0-9a-f]{4,6}$')
|
segment_re = re.compile(r'^[0-9a-f]{4,6}$')
|
||||||
result = {}
|
result = {}
|
||||||
for name, dirname in name_to_dirpath.iteritems():
|
for name, dirname in name_to_dirpath.items():
|
||||||
if not name.startswith(prefix):
|
if not name.startswith(prefix):
|
||||||
print('expected prefix "%s" for "%s"' % (prefix, name))
|
print(f'expected prefix "{prefix}" for "{name}"')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
segments = name[len(prefix): -len(suffix)].split('_')
|
segments = name[len(prefix): -len(suffix)].split('_')
|
||||||
|
@ -370,12 +341,12 @@ def create_sequence_to_filepath(name_to_dirpath, prefix, suffix):
|
||||||
seq = []
|
seq = []
|
||||||
for s in segments:
|
for s in segments:
|
||||||
if not segment_re.match(s):
|
if not segment_re.match(s):
|
||||||
print('bad codepoint name "%s" in %s/%s' % (s, dirname, name))
|
print(f'bad codepoint name "{s}" in {dirname}/{name}')
|
||||||
segfail = True
|
segfail = True
|
||||||
continue
|
continue
|
||||||
n = int(s, 16)
|
n = int(s, 16)
|
||||||
if n > 0x10ffff:
|
if n > 0x10ffff:
|
||||||
print('codepoint "%s" out of range in %s/%s' % (s, dirname, name))
|
print(f'codepoint "{s}" out of range in {dirname}/{name}')
|
||||||
segfail = True
|
segfail = True
|
||||||
continue
|
continue
|
||||||
seq.append(n)
|
seq.append(n)
|
||||||
|
@ -422,15 +393,14 @@ def run_check(dirs, prefix, suffix, exclude, unicode_version, coverage):
|
||||||
msg = ''
|
msg = ''
|
||||||
if unicode_version:
|
if unicode_version:
|
||||||
msg = ' (%3.1f)' % unicode_version
|
msg = ' (%3.1f)' % unicode_version
|
||||||
print('Checking files with prefix "%s" and suffix "%s"%s in:\n %s' % (
|
print(f'Checking files with prefix "{prefix}" and suffix "{suffix}"{msg} in: {dirs}')
|
||||||
prefix, suffix, msg, '\n '.join(dirs)))
|
|
||||||
name_to_dirpath = collect_name_to_dirpath_with_override(
|
name_to_dirpath = collect_name_to_dirpath_with_override(
|
||||||
dirs, prefix=prefix, suffix=suffix, exclude=exclude)
|
dirs, prefix=prefix, suffix=suffix, exclude=exclude)
|
||||||
print('checking %d names' % len(name_to_dirpath))
|
print(f'checking {len(name_to_dirpath)} names')
|
||||||
seq_to_filepath = create_sequence_to_filepath(name_to_dirpath, prefix, suffix)
|
seq_to_filepath = create_sequence_to_filepath(name_to_dirpath, prefix, suffix)
|
||||||
print('checking %d sequences' % len(seq_to_filepath))
|
print(f'checking {len(seq_to_filepath)} sequences')
|
||||||
check_sequence_to_filepath(seq_to_filepath, unicode_version, coverage)
|
check_sequence_to_filepath(seq_to_filepath, unicode_version, coverage)
|
||||||
print('done.')
|
print('done running checks')
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
Loading…
Reference in New Issue