marseymoji/add_varsel_cmap.py

#!/usr/bin/env python
#
# Copyright 2015 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Modify an Emoji font to populate cmap 14 variation selector subtable.

This uses the unicode variation selector data, finds all characters in the
standard cmap that match, and generates a cmap format 14 subtable indicating
that they have the provided presentation (emoji by default).  No fonts
are processed if any already has a format 14 subtable.  Modified fonts
are written to the provided dir and optionally have a provided suffix
appended (before the extension).  An output file name can be provided
in which case only one input file is allowed."""

import argparse
import os
from os import path
import re
import sys

from fontTools import ttLib
from fontTools.ttLib.tables import _c_m_a_p as CMap

from nototools import font_data

UCD = path.join(path.dirname(__file__), 'third_party', 'ucd')

def get_emoji_data():
  """Parse emoji-data.txt and return a tuple of two sets of characters:
  - those with a default emoji presentation
  - those with a default text presentation"""

  presentation_default_text = set()
  presentation_default_emoji = set()
  emoji_data = path.join(UCD, 'emoji-data.txt')
  line_re = re.compile(r'([0-9A-F]{4,6})\s*;\s*(emoji|text)\s*;.*')
  with open(emoji_data, 'r') as f:
    for line in f.readlines():
      m = line_re.match(line)
      if m:
        cp = int(m.group(1), 16)
        if m.group(2) == 'emoji':
          presentation_default_emoji.add(cp)
        else:
          presentation_default_text.add(cp)
  return presentation_default_emoji, presentation_default_text


def get_unicode_emoji_variants():
  """Parse StandardizedVariants.txt and return a set of characters
  that have a defined emoji variant presentation.  All such characters
  also have a text variant presentation (as of v8.0.0), so a single
  set works for both."""

  emoji_variants = set()
  variants_data = path.join(UCD, 'StandardizedVariants.txt')
  line_re = re.compile(r'([0-9A-F]{4,6})\sFE0F;\s+emoji style;.*')
  with open(variants_data, 'r') as f:
    for line in f.readlines():
      m = line_re.match(line)
      if m:
        emoji_variants.add(int(m.group(1), 16))
  return emoji_variants


def modify_font(font_name, font, presentation, emoji_variants):
  cmap_table = font_data.get_cmap(font)
  emoji = set(cmap_table.keys()) & emoji_variants
  if not emoji:
    print 'no emoji match those in %s' % font_name
    return
  uvs = 0xfe0f if presentation == 'emoji' else 0xfe0e
  cmap14 = CMap.CmapSubtable.newSubtable(14)
  cmap14.cmap = {}
  cmap14.uvsDict = {uvs : [[c, None] for c in sorted(emoji)]}
  cmap14.platformID = 0
  cmap14.platEncID = 5
  cmap14.language = 0xFF
  font['cmap'].tables.append(cmap14)


def modify_fonts(font_names, presentation='emoji', output=None, suffix=None,
                 dst_dir=None):
  assert dst_dir
  if output:
    assert len(font_names) == 1

  for font_name in font_names:
    font = ttLib.TTFont(font_name)
    if font_data.get_variation_sequence_cmap(font):
      # process no font if any already has a var selector cmap
      raise ValueError('font %s already has a format 14 cmap' % font_name)

  if not path.exists(dst_dir):
    os.makedirs(dst_dir)

  emoji_variants = get_unicode_emoji_variants()

  for font_name in font_names:
    font = ttLib.TTFont(font_name)
    modify_font(font_name, font, presentation, emoji_variants)
    if output:
      new_name = output
    else:
      new_name = path.basename(font_name)
      if suffix:
        name, ext = path.splitext(new_name)
        new_name = name + suffix + ext
    font.save(path.join(dst_dir, new_name))


def main():
  parser = argparse.ArgumentParser()
  parser.add_argument(
      '-d', '--dstdir',
      help='destination directory for modified files, default /tmp/varsub',
      metavar = 'dir',
      default='/tmp/varsub')
  parser.add_argument(
      '-p', '--presentation',
      help='presentation of glyphs in the font, default "emoji"',
      choices=['emoji', 'text'],
      default='emoji')
  parser.add_argument(
      '-s', '--suffix',
      help='suffix to add to file names for output, goes before extension')
  parser.add_argument(
      '-o', '--output',
      help='output file name, requires only one input file')
  parser.add_argument(
      'files',
      help='files to modify',
      metavar='file',
      nargs='+')

  # argparse fails with named arguments that have leading hyphen.  You
  # can work around this by using a short arg and concatenating it and
  # the argument together, e.g. '-s-foo'.
  # Both parse_known_args and inserting '--' between the key and its
  # value fail, though.
  args = parser.parse_args()
  modify_fonts(args.files, presentation=args.presentation, output=args.output,
               suffix=args.suffix, dst_dir=args.dstdir)

if __name__ == '__main__':
  main()
Add tool and data to generate format 14 cmap. emoji-data and the code to process it might be useful, but it not actually used by the tool at present. The tool currently assumes that all emoji glyphs mapped by the default cmap share the same presentation. 2015-10-14 01:16:04 +00:00			`#!/usr/bin/env python`
			`#`
			`# Copyright 2015 Google Inc. All rights reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`

			`"""Modify an Emoji font to populate cmap 14 variation selector subtable.`

			`This uses the unicode variation selector data, finds all characters in the`
			`standard cmap that match, and generates a cmap format 14 subtable indicating`
			`that they have the provided presentation (emoji by default). No fonts`
			`are processed if any already has a format 14 subtable. Modified fonts`
			`are written to the provided dir and optionally have a provided suffix`
			`appended (before the extension). An output file name can be provided`
			`in which case only one input file is allowed."""`

			`import argparse`
			`import os`
			`from os import path`
			`import re`
			`import sys`

			`from fontTools import ttLib`
			`from fontTools.ttLib.tables import _c_m_a_p as CMap`

			`from nototools import font_data`

			`UCD = path.join(path.dirname(__file__), 'third_party', 'ucd')`

			`def get_emoji_data():`
			`"""Parse emoji-data.txt and return a tuple of two sets of characters:`
			`- those with a default emoji presentation`
			`- those with a default text presentation"""`

			`presentation_default_text = set()`
			`presentation_default_emoji = set()`
			`emoji_data = path.join(UCD, 'emoji-data.txt')`
			`line_re = re.compile(r'([0-9A-F]{4,6})\s;\s(emoji\|text)\s;.')`
			`with open(emoji_data, 'r') as f:`
			`for line in f.readlines():`
			`m = line_re.match(line)`
			`if m:`
			`cp = int(m.group(1), 16)`
			`if m.group(2) == 'emoji':`
			`presentation_default_emoji.add(cp)`
			`else:`
			`presentation_default_text.add(cp)`
			`return presentation_default_emoji, presentation_default_text`


			`def get_unicode_emoji_variants():`
			`"""Parse StandardizedVariants.txt and return a set of characters`
			`that have a defined emoji variant presentation. All such characters`
			`also have a text variant presentation (as of v8.0.0), so a single`
			`set works for both."""`

			`emoji_variants = set()`
			`variants_data = path.join(UCD, 'StandardizedVariants.txt')`
			`line_re = re.compile(r'([0-9A-F]{4,6})\sFE0F;\s+emoji style;.*')`
			`with open(variants_data, 'r') as f:`
			`for line in f.readlines():`
			`m = line_re.match(line)`
			`if m:`
			`emoji_variants.add(int(m.group(1), 16))`
			`return emoji_variants`


			`def modify_font(font_name, font, presentation, emoji_variants):`
			`cmap_table = font_data.get_cmap(font)`
			`emoji = set(cmap_table.keys()) & emoji_variants`
			`if not emoji:`
			`print 'no emoji match those in %s' % font_name`
			`return`
			`uvs = 0xfe0f if presentation == 'emoji' else 0xfe0e`
			`cmap14 = CMap.CmapSubtable.newSubtable(14)`
			`cmap14.cmap = {}`
			`cmap14.uvsDict = {uvs : [[c, None] for c in sorted(emoji)]}`
			`cmap14.platformID = 0`
			`cmap14.platEncID = 5`
			`cmap14.language = 0xFF`
			`font['cmap'].tables.append(cmap14)`


			`def modify_fonts(font_names, presentation='emoji', output=None, suffix=None,`
			`dst_dir=None):`
			`assert dst_dir`
			`if output:`
			`assert len(font_names) == 1`

			`for font_name in font_names:`
			`font = ttLib.TTFont(font_name)`
			`if font_data.get_variation_sequence_cmap(font):`
			`# process no font if any already has a var selector cmap`
			`raise ValueError('font %s already has a format 14 cmap' % font_name)`

			`if not path.exists(dst_dir):`
			`os.makedirs(dst_dir)`

			`emoji_variants = get_unicode_emoji_variants()`

			`for font_name in font_names:`
			`font = ttLib.TTFont(font_name)`
			`modify_font(font_name, font, presentation, emoji_variants)`
			`if output:`
			`new_name = output`
			`else:`
			`new_name = path.basename(font_name)`
			`if suffix:`
			`name, ext = path.splitext(new_name)`
			`new_name = name + suffix + ext`
			`font.save(path.join(dst_dir, new_name))`


			`def main():`
			`parser = argparse.ArgumentParser()`
			`parser.add_argument(`
			`'-d', '--dstdir',`
			`help='destination directory for modified files, default /tmp/varsub',`
			`metavar = 'dir',`
			`default='/tmp/varsub')`
			`parser.add_argument(`
			`'-p', '--presentation',`
			`help='presentation of glyphs in the font, default "emoji"',`
			`choices=['emoji', 'text'],`
			`default='emoji')`
			`parser.add_argument(`
			`'-s', '--suffix',`
			`help='suffix to add to file names for output, goes before extension')`
			`parser.add_argument(`
			`'-o', '--output',`
			`help='output file name, requires only one input file')`
			`parser.add_argument(`
			`'files',`
			`help='files to modify',`
			`metavar='file',`
			`nargs='+')`

			`# argparse fails with named arguments that have leading hyphen. You`
			`# can work around this by using a short arg and concatenating it and`
			`# the argument together, e.g. '-s-foo'.`
			`# Both parse_known_args and inserting '--' between the key and its`
			`# value fail, though.`
			`args = parser.parse_args()`
			`modify_fonts(args.files, presentation=args.presentation, output=args.output,`
			`suffix=args.suffix, dst_dir=args.dstdir)`

			`if __name__ == '__main__':`
			`main()`