Update add_aliases tool to generate canonical names.

Clients who want to identify sequences in text that have image files
would like to use the file names to identify these sequences, and
would like to identify canonical sequences using this method.
Add an option to the add_aliases tool to also create aliases using
the canonical names where they differ from the file names.

Our file naming omits the emoji presentation selector character, so
the image files by default do not include it.  This provides a means
to restore that.
pull/173/head
Doug Felt 2017-10-09 11:40:52 -07:00
parent 69952d1fd8
commit 512d629dcb
1 changed files with 53 additions and 16 deletions

View File

@ -22,10 +22,13 @@ from os import path
import shutil import shutil
import sys import sys
from nototools import unicode_data
"""Create aliases in target directory. """Create aliases in target directory.
The target files should not contain the emoji variation selector In addition to links/copies named with aliased sequences, this can also
codepoint in their names.""" create canonically named aliases/copies, if requested."""
DATA_ROOT = path.dirname(path.abspath(__file__)) DATA_ROOT = path.dirname(path.abspath(__file__))
@ -75,12 +78,17 @@ def read_emoji_aliases(filename):
def add_aliases( def add_aliases(
srcdir, dstdir, aliasfile, prefix, ext, replace=False, copy=False, srcdir, dstdir, aliasfile, prefix, ext, replace=False, copy=False,
dry_run=False): canonical_names=False, dry_run=False):
"""Use aliasfile to create aliases of files in srcdir matching prefix/ext in """Use aliasfile to create aliases of files in srcdir matching prefix/ext in
dstdir. If dstdir is null, use srcdir as dstdir. If replace is false dstdir. If dstdir is null, use srcdir as dstdir. If replace is false
and a file already exists in dstdir, report and do nothing. If copy is false and a file already exists in dstdir, report and do nothing. If copy is false
create a symlink, else create a copy. If dry_run is true, report what would create a symlink, else create a copy.
be done. Dstdir will be created if necessary, even if dry_run is true."""
If canonical_names is true, check all source files and generate aliases/copies
using the canonical name if different from the existing name.
If dry_run is true, report what would be done. Dstdir will be created if
necessary, even if dry_run is true."""
if not path.isdir(srcdir): if not path.isdir(srcdir):
print('%s is not a directory' % srcdir, file=sys.stderr) print('%s is not a directory' % srcdir, file=sys.stderr)
@ -103,22 +111,48 @@ def add_aliases(
aliases_to_create = {} aliases_to_create = {}
aliases_to_replace = [] aliases_to_replace = []
alias_exists = False alias_exists = False
for als, trg in sorted(aliases.items()):
if trg not in seq_to_file: def check_alias_seq(seq):
print('target %s for %s does not exist' % ( alias_str = seq_to_str(seq)
seq_to_str(trg), seq_to_str(als)), file=sys.stderr) alias_name = '%s%s.%s' % (prefix, alias_str, ext)
continue
alias_name = '%s%s.%s' % (prefix, seq_to_str(als), ext)
alias_path = path.join(dstdir, alias_name) alias_path = path.join(dstdir, alias_name)
if path.exists(alias_path): if path.exists(alias_path):
if replace: if replace:
aliases_to_replace.append(alias_name) aliases_to_replace.append(alias_name)
else: else:
print('alias %s exists' % seq_to_str(als), file=sys.stderr) print('alias %s exists' % alias_str, file=sys.stderr)
alias_exists = True alias_exists = True
continue return None
target_file = seq_to_file[trg] return alias_name
aliases_to_create[alias_name] = target_file
canonical_to_file = {}
for als, trg in sorted(aliases.items()):
if trg not in seq_to_file:
print('target %s for %s does not exist' % (
seq_to_str(trg), seq_to_str(als)), file=sys.stderr)
continue
alias_name = check_alias_seq(als)
if alias_name:
target_file = seq_to_file[trg]
aliases_to_create[alias_name] = target_file
if canonical_names:
canonical_seq = unicode_data.get_canonical_emoji_sequence(als)
if canonical_seq and canonical_seq != als:
canonical_alias_name = check_alias_seq(canonical_seq)
if canonical_alias_name:
canonical_to_file[canonical_alias_name] = target_file
if canonical_names:
print('adding %d canonical aliases' % len(canonical_to_file))
for seq, f in seq_to_file.iteritems():
canonical_seq = unicode_data.get_canonical_emoji_sequence(seq)
if canonical_seq and canonical_seq != seq:
alias_name = check_alias_seq(canonical_seq)
if alias_name:
canonical_to_file[alias_name] = f
print('adding %d total canonical sequences' % len(canonical_to_file))
aliases_to_create.update(canonical_to_file)
if replace: if replace:
if not dry_run: if not dry_run:
@ -173,6 +207,9 @@ def main():
parser.add_argument( parser.add_argument(
'-c', '--copy', help='create a copy of the file, not a symlink', '-c', '--copy', help='create a copy of the file, not a symlink',
action='store_true') action='store_true')
parser.add_argument(
'--canonical_names', help='include extra copies with canonical names '
'(including fe0f emoji presentation character)', action='store_true');
parser.add_argument( parser.add_argument(
'-n', '--dry_run', help='print out aliases to create only', '-n', '--dry_run', help='print out aliases to create only',
action='store_true') action='store_true')
@ -180,7 +217,7 @@ def main():
add_aliases( add_aliases(
args.srcdir, args.dstdir, args.aliasfile, args.prefix, args.ext, args.srcdir, args.dstdir, args.aliasfile, args.prefix, args.ext,
args.replace, args.copy, args.dry_run) args.replace, args.copy, args.canonical_names, args.dry_run)
if __name__ == '__main__': if __name__ == '__main__':