From f8ec04e4a34796b2de3366d91d3d32faf9f17975 Mon Sep 17 00:00:00 2001 From: Cosimo Lupo Date: Thu, 3 Feb 2022 12:15:46 +0000 Subject: [PATCH 1/4] colrv1_postproc: add GSUB lookups replacing unsupported flags with the 'unknown flag' Any invalid/unsupported emoji flag sequence (whether using regional indicators or subdivision tags) gets replaced by the 'unknown flag' (PUA 0xFE28B), a flag with a blue question mark in the middle. The logic is mutated from the GSUB table of NotoColorEmoji.tmpl.ttx. We build these extra lookups using feaLib on an temporary empty font (that matches the glyph order of our destination CORLv1 font), then update our font's GSUB with them. The PUA for the unknown flag is then removed from the cmap, as it's no longer needed. It's also deleted from the CBDT NotoColorEmoji (see emoji_builder.py). --- colrv1_postproc.py | 125 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 118 insertions(+), 7 deletions(-) diff --git a/colrv1_postproc.py b/colrv1_postproc.py index 4ebbf82fb..424f4713a 100644 --- a/colrv1_postproc.py +++ b/colrv1_postproc.py @@ -5,11 +5,14 @@ For now substantially based on copying from a correct bitmap build. """ from absl import app import functools +from textwrap import dedent +from fontTools.feaLib.builder import addOpenTypeFeaturesFromString from fontTools import ttLib from fontTools.ttLib.tables import _g_l_y_f as glyf from fontTools.ttLib.tables import otTables as ot import map_pua_emoji from nototools import add_vs_cmap +from nototools import font_data from nototools import unicode_data from pathlib import Path @@ -95,12 +98,13 @@ def _add_cmap_entries(colr_font, codepoint, glyph_name): print(f"Map 0x{codepoint:04x} to {glyph_name}, format {table.format}") -def _map_missing_flag_tag_chars_to_empty_glyphs(colr_font): - # Add all tag characters used in flags - tag_cps = set(range(0xE0030, 0xE0039 + 1)) | set(range(0xE0061, 0xE007A + 1)) +FLAG_TAGS = set(range(0xE0030, 0xE0039 + 1)) | set(range(0xE0061, 0xE007A + 1)) +CANCEL_TAG = 0xE007F - # Cancel tag - tag_cps |= {0xE007F} + +def _map_missing_flag_tag_chars_to_empty_glyphs(colr_font): + # Add all tag characters used in flags + cancel tag + tag_cps = FLAG_TAGS | {CANCEL_TAG} # Anything already cmap'd is fine tag_cps -= set(_Cmap(colr_font).keys()) @@ -143,6 +147,9 @@ def _Cmap(ttfont): return functools.reduce(_Reducer, unicode_cmaps, {}) +BLACK_FLAG = 0x1F3F4 + + def _map_empty_flag_tag_to_black_flag(colr_font): # fontchain_lint wants direct support for empty flag tags # so map them to the default flag to match cbdt behavior @@ -150,8 +157,8 @@ def _map_empty_flag_tag_to_black_flag(colr_font): # if the emoji font starts using extensions this code will require revision cmap = _Cmap(colr_font) - black_flag_glyph = cmap[0x1F3F4] - cancel_tag_glyph = cmap[0xE007F] + black_flag_glyph = cmap[BLACK_FLAG] + cancel_tag_glyph = cmap[CANCEL_TAG] lookup_list = colr_font["GSUB"].table.LookupList liga_set = _ligaset_for_glyph(lookup_list, black_flag_glyph) assert liga_set is not None, "There should be existing ligatures using black flag" @@ -201,6 +208,108 @@ def _add_vertical_layout_tables(cbdt_font, colr_font): vmtx.metrics[gn] = height, 0 +UNKNOWN_FLAG_PUA = 0xFE82B +REGIONAL_INDICATORS = set(range(0x1F1E6, 0x1F1FF + 1)) + + +def _add_fallback_subs_for_unknown_flags(colr_font): + """Add GSUB lookups to replace unsupported flag sequences with the 'unknown flag'. + + In order to locate the unknown flag, the glyph must be mapped to 0xFE82B PUA code; + the latter is removed from the cmap table after the GSUB has been updated. + """ + cmap = _Cmap(colr_font) + unknown_flag = cmap[UNKNOWN_FLAG_PUA] + black_flag = cmap[BLACK_FLAG] + cancel_tag = cmap[CANCEL_TAG] + flag_tags = sorted(cmap[cp] for cp in FLAG_TAGS) + regional_indicators = sorted(cmap[cp] for cp in REGIONAL_INDICATORS) + + classes = dedent( + f"""\ + @FLAG_TAGS = [{" ".join(flag_tags)}]; + @REGIONAL_INDICATORS = [{" ".join(regional_indicators)}]; + @UNKNOWN_FLAG = [{" ".join([unknown_flag] * len(regional_indicators))}]; + """ + ) + lookups = ( + # the first lookup is a dummy that stands for the emoji sequences ligatures + # from the destination font; we only use it to ensure the lookup indices match. + # We can't leave it empty otherwise feaLib optimizes it away. + dedent( + f"""\ + lookup placeholder {{ + sub {unknown_flag} {unknown_flag} by {unknown_flag}; + }} placeholder; + """ + ) + + "\n".join( + ["lookup delete_glyph {"] + + [f" sub {g} by NULL;" for g in sorted(regional_indicators + flag_tags)] + + ["} delete_glyph;"] + ) + + "\n" + + dedent( + """\ + lookup replace_with_unknown_flag { + sub @REGIONAL_INDICATORS by @UNKNOWN_FLAG; + } replace_with_unknown_flag; + """ + ) + ) + features = ( + "languagesystem DFLT dflt;\n" + + classes + + lookups + + dedent( + f"""\ + feature ccmp {{ + lookup placeholder; + sub {black_flag} @FLAG_TAGS' lookup delete_glyph; + sub {black_flag} {cancel_tag} by {unknown_flag}; + sub @REGIONAL_INDICATORS' lookup replace_with_unknown_flag + @REGIONAL_INDICATORS' lookup delete_glyph; + }} ccmp; + """ + ) + ) + # feaLib always builds a new GSUB table (can't update one in place) so we have to + # use an empty TTFont and then update our GSUB with the newly built lookups + temp_font = ttLib.TTFont() + temp_font.setGlyphOrder(colr_font.getGlyphOrder()) + + addOpenTypeFeaturesFromString(temp_font, features) + + temp_gsub = temp_font["GSUB"].table + # sanity check + assert len(temp_gsub.FeatureList.FeatureRecord) == 1 + assert temp_gsub.FeatureList.FeatureRecord[0].FeatureTag == "ccmp" + temp_ccmp = temp_gsub.FeatureList.FeatureRecord[0].Feature + + colr_gsub = colr_font["GSUB"].table + ccmps = [ + r.Feature for r in colr_gsub.FeatureList.FeatureRecord if r.FeatureTag == "ccmp" + ] + assert len(ccmps) == 1, f"expected only 1 'ccmp' feature record, found {len(ccmps)}" + colr_ccmp = ccmps[0] + + colr_lookups = colr_gsub.LookupList.Lookup + assert ( + len(colr_lookups) == 1 + ), f"expected only 1 lookup in COLRv1's GSUB.LookupList, found {len(colr_lookups)}" + assert ( + colr_lookups[0].LookupType == 4 + ), f"expected Lookup[0] of type 4 in COLRv1, found {colr_lookups[0].LookupType}" + + colr_lookups.extend(temp_gsub.LookupList.Lookup[1:]) + colr_gsub.LookupList.LookupCount = len(colr_lookups) + colr_ccmp.LookupListIndex = temp_ccmp.LookupListIndex + colr_ccmp.LookupCount = len(colr_ccmp.LookupListIndex) + + # get rid of the Unknown Flag private codepoint as no longer needed + font_data.delete_from_cmap(colr_font, [UNKNOWN_FLAG_PUA]) + + def main(argv): if len(argv) != 3: raise ValueError( @@ -239,6 +348,8 @@ def main(argv): _add_vertical_layout_tables(cbdt_font, colr_font) + _add_fallback_subs_for_unknown_flags(colr_font) + out_file = Path(_OUTPUT_FILE[colr_file.name]).absolute() print("Writing", out_file) colr_font.save(out_file) From 631766222fdcf2b168936b9d80a7a639b6ce7ae1 Mon Sep 17 00:00:00 2001 From: Cosimo Lupo Date: Thu, 3 Feb 2022 12:28:16 +0000 Subject: [PATCH 2/4] empty subdivision flag (1F3F4+E007F) should be mapped to uknown flag, not the black flag The substitution BLACK_FLAG + CANCEL_TAG => UNKNOWN_FLAG is alrady handled by the _add_fallback_subs_for_unknown_flags method, added in the previous commit. There's no BLACK_FLAG + CANCEL_TAG => BLACK_FLAG substituion in the current NotoColorEmoji.ttf CBDT font. In fact, it makes much more sense that all unsupported subdivision flag sequences, i.e. BLACK_FLAG + (zero or more non-cancel tags) + CANCEL_TAG, are all displayed with the unknown flag. No reason to special case BLACK_FLAG + CANCEL_TAG. I'm not sure where the conclusion about the BLACK_FLAG + CANCEL_TAG => BLACK_FLAG came from (got to ask Rod). --- colrv1_postproc.py | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/colrv1_postproc.py b/colrv1_postproc.py index 424f4713a..bb5481231 100644 --- a/colrv1_postproc.py +++ b/colrv1_postproc.py @@ -147,31 +147,6 @@ def _Cmap(ttfont): return functools.reduce(_Reducer, unicode_cmaps, {}) -BLACK_FLAG = 0x1F3F4 - - -def _map_empty_flag_tag_to_black_flag(colr_font): - # fontchain_lint wants direct support for empty flag tags - # so map them to the default flag to match cbdt behavior - - # if the emoji font starts using extensions this code will require revision - - cmap = _Cmap(colr_font) - black_flag_glyph = cmap[BLACK_FLAG] - cancel_tag_glyph = cmap[CANCEL_TAG] - lookup_list = colr_font["GSUB"].table.LookupList - liga_set = _ligaset_for_glyph(lookup_list, black_flag_glyph) - assert liga_set is not None, "There should be existing ligatures using black flag" - - # Map black flag + cancel tag to just black flag - # Since this is the ligature set for black flag, component is just cancel tag - # Since we only have one component its safe to put our rule at the front - liga = ot.Ligature() - liga.Component = [cancel_tag_glyph] - liga.LigGlyph = black_flag_glyph - liga_set.insert(0, liga) - - def _add_vertical_layout_tables(cbdt_font, colr_font): upem_scale = colr_font["head"].unitsPerEm / cbdt_font["head"].unitsPerEm @@ -209,6 +184,7 @@ def _add_vertical_layout_tables(cbdt_font, colr_font): UNKNOWN_FLAG_PUA = 0xFE82B +BLACK_FLAG = 0x1F3F4 REGIONAL_INDICATORS = set(range(0x1F1E6, 0x1F1FF + 1)) @@ -342,8 +318,6 @@ def main(argv): _map_missing_flag_tag_chars_to_empty_glyphs(colr_font) - _map_empty_flag_tag_to_black_flag(colr_font) - add_soft_light_to_flags(colr_font) _add_vertical_layout_tables(cbdt_font, colr_font) From ec10159d1f41ef4c8ce8a06c2ec8d4040a8532ee Mon Sep 17 00:00:00 2001 From: Cosimo Lupo Date: Thu, 3 Feb 2022 19:05:13 +0000 Subject: [PATCH 3/4] colrv1/noflags.toml: exclude regional indicator symbols from noflags font --- colrv1/noflags.toml | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/colrv1/noflags.toml b/colrv1/noflags.toml index b77aaa211..932d6610c 100644 --- a/colrv1/noflags.toml +++ b/colrv1/noflags.toml @@ -54,32 +54,6 @@ srcs = [ "../svg/emoji_u1f198.svg", "../svg/emoji_u1f199.svg", "../svg/emoji_u1f19a.svg", - "../svg/emoji_u1f1e6.svg", - "../svg/emoji_u1f1e7.svg", - "../svg/emoji_u1f1e8.svg", - "../svg/emoji_u1f1e9.svg", - "../svg/emoji_u1f1ea.svg", - "../svg/emoji_u1f1eb.svg", - "../svg/emoji_u1f1ec.svg", - "../svg/emoji_u1f1ed.svg", - "../svg/emoji_u1f1ee.svg", - "../svg/emoji_u1f1ef.svg", - "../svg/emoji_u1f1f0.svg", - "../svg/emoji_u1f1f1.svg", - "../svg/emoji_u1f1f2.svg", - "../svg/emoji_u1f1f3.svg", - "../svg/emoji_u1f1f4.svg", - "../svg/emoji_u1f1f5.svg", - "../svg/emoji_u1f1f6.svg", - "../svg/emoji_u1f1f7.svg", - "../svg/emoji_u1f1f8.svg", - "../svg/emoji_u1f1f9.svg", - "../svg/emoji_u1f1fa.svg", - "../svg/emoji_u1f1fb.svg", - "../svg/emoji_u1f1fc.svg", - "../svg/emoji_u1f1fd.svg", - "../svg/emoji_u1f1fe.svg", - "../svg/emoji_u1f1ff.svg", "../svg/emoji_u1f201.svg", "../svg/emoji_u1f202.svg", "../svg/emoji_u1f21a.svg", From 083b7fcad716ffa3b99b7a78d8e74c1ae733358b Mon Sep 17 00:00:00 2001 From: Cosimo Lupo Date: Thu, 3 Feb 2022 19:07:42 +0000 Subject: [PATCH 4/4] colrv1_postproc.py: handle missing regional indicators in noflags font don't bother to do textwrap.dedent, as whitespace doesn't matter in FEA code --- colrv1_postproc.py | 56 ++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/colrv1_postproc.py b/colrv1_postproc.py index bb5481231..cdf2acef0 100644 --- a/colrv1_postproc.py +++ b/colrv1_postproc.py @@ -5,7 +5,6 @@ For now substantially based on copying from a correct bitmap build. """ from absl import app import functools -from textwrap import dedent from fontTools.feaLib.builder import addOpenTypeFeaturesFromString from fontTools import ttLib from fontTools.ttLib.tables import _g_l_y_f as glyf @@ -199,55 +198,58 @@ def _add_fallback_subs_for_unknown_flags(colr_font): black_flag = cmap[BLACK_FLAG] cancel_tag = cmap[CANCEL_TAG] flag_tags = sorted(cmap[cp] for cp in FLAG_TAGS) - regional_indicators = sorted(cmap[cp] for cp in REGIONAL_INDICATORS) + # in the *-noflags.ttf font there are no region flags thus this list is empty + regional_indicators = sorted(cmap[cp] for cp in REGIONAL_INDICATORS if cp in cmap) - classes = dedent( - f"""\ - @FLAG_TAGS = [{" ".join(flag_tags)}]; - @REGIONAL_INDICATORS = [{" ".join(regional_indicators)}]; - @UNKNOWN_FLAG = [{" ".join([unknown_flag] * len(regional_indicators))}]; + classes = f'@FLAG_TAGS = [{" ".join(flag_tags)}];\n' + if regional_indicators: + classes += f""" + @REGIONAL_INDICATORS = [{" ".join(regional_indicators)}]; + @UNKNOWN_FLAG = [{" ".join([unknown_flag] * len(regional_indicators))}]; """ - ) lookups = ( # the first lookup is a dummy that stands for the emoji sequences ligatures # from the destination font; we only use it to ensure the lookup indices match. # We can't leave it empty otherwise feaLib optimizes it away. - dedent( - f"""\ - lookup placeholder {{ - sub {unknown_flag} {unknown_flag} by {unknown_flag}; - }} placeholder; - """ - ) + f""" + lookup placeholder {{ + sub {unknown_flag} {unknown_flag} by {unknown_flag}; + }} placeholder; + """ + "\n".join( ["lookup delete_glyph {"] + [f" sub {g} by NULL;" for g in sorted(regional_indicators + flag_tags)] + ["} delete_glyph;"] ) - + "\n" - + dedent( - """\ + + ( + """ lookup replace_with_unknown_flag { sub @REGIONAL_INDICATORS by @UNKNOWN_FLAG; } replace_with_unknown_flag; """ + if regional_indicators + else "\n" ) ) features = ( "languagesystem DFLT dflt;\n" + classes + lookups - + dedent( - f"""\ - feature ccmp {{ - lookup placeholder; - sub {black_flag} @FLAG_TAGS' lookup delete_glyph; - sub {black_flag} {cancel_tag} by {unknown_flag}; - sub @REGIONAL_INDICATORS' lookup replace_with_unknown_flag - @REGIONAL_INDICATORS' lookup delete_glyph; - }} ccmp; + + "feature ccmp {" + + f""" + lookup placeholder; + sub {black_flag} @FLAG_TAGS' lookup delete_glyph; + sub {black_flag} {cancel_tag} by {unknown_flag}; + """ + + ( """ + sub @REGIONAL_INDICATORS' lookup replace_with_unknown_flag + @REGIONAL_INDICATORS' lookup delete_glyph; + """ + if regional_indicators + else "" ) + + "} ccmp;" ) # feaLib always builds a new GSUB table (can't update one in place) so we have to # use an empty TTFont and then update our GSUB with the newly built lookups