Update add_aliases tool to generate canonical names.
Clients who want to identify sequences in text that have image files would like to use the file names to identify these sequences, and would like to identify canonical sequences using this method. Add an option to the add_aliases tool to also create aliases using the canonical names where they differ from the file names. Our file naming omits the emoji presentation selector character, so the image files by default do not include it. This provides a means to restore that.pull/173/head
parent
69952d1fd8
commit
512d629dcb
|
@ -22,10 +22,13 @@ from os import path
|
|||
import shutil
|
||||
import sys
|
||||
|
||||
from nototools import unicode_data
|
||||
|
||||
"""Create aliases in target directory.
|
||||
|
||||
The target files should not contain the emoji variation selector
|
||||
codepoint in their names."""
|
||||
In addition to links/copies named with aliased sequences, this can also
|
||||
create canonically named aliases/copies, if requested."""
|
||||
|
||||
|
||||
DATA_ROOT = path.dirname(path.abspath(__file__))
|
||||
|
||||
|
@ -75,12 +78,17 @@ def read_emoji_aliases(filename):
|
|||
|
||||
def add_aliases(
|
||||
srcdir, dstdir, aliasfile, prefix, ext, replace=False, copy=False,
|
||||
dry_run=False):
|
||||
canonical_names=False, dry_run=False):
|
||||
"""Use aliasfile to create aliases of files in srcdir matching prefix/ext in
|
||||
dstdir. If dstdir is null, use srcdir as dstdir. If replace is false
|
||||
and a file already exists in dstdir, report and do nothing. If copy is false
|
||||
create a symlink, else create a copy. If dry_run is true, report what would
|
||||
be done. Dstdir will be created if necessary, even if dry_run is true."""
|
||||
create a symlink, else create a copy.
|
||||
|
||||
If canonical_names is true, check all source files and generate aliases/copies
|
||||
using the canonical name if different from the existing name.
|
||||
|
||||
If dry_run is true, report what would be done. Dstdir will be created if
|
||||
necessary, even if dry_run is true."""
|
||||
|
||||
if not path.isdir(srcdir):
|
||||
print('%s is not a directory' % srcdir, file=sys.stderr)
|
||||
|
@ -103,22 +111,48 @@ def add_aliases(
|
|||
aliases_to_create = {}
|
||||
aliases_to_replace = []
|
||||
alias_exists = False
|
||||
for als, trg in sorted(aliases.items()):
|
||||
if trg not in seq_to_file:
|
||||
print('target %s for %s does not exist' % (
|
||||
seq_to_str(trg), seq_to_str(als)), file=sys.stderr)
|
||||
continue
|
||||
alias_name = '%s%s.%s' % (prefix, seq_to_str(als), ext)
|
||||
|
||||
def check_alias_seq(seq):
|
||||
alias_str = seq_to_str(seq)
|
||||
alias_name = '%s%s.%s' % (prefix, alias_str, ext)
|
||||
alias_path = path.join(dstdir, alias_name)
|
||||
if path.exists(alias_path):
|
||||
if replace:
|
||||
aliases_to_replace.append(alias_name)
|
||||
else:
|
||||
print('alias %s exists' % seq_to_str(als), file=sys.stderr)
|
||||
print('alias %s exists' % alias_str, file=sys.stderr)
|
||||
alias_exists = True
|
||||
continue
|
||||
target_file = seq_to_file[trg]
|
||||
aliases_to_create[alias_name] = target_file
|
||||
return None
|
||||
return alias_name
|
||||
|
||||
canonical_to_file = {}
|
||||
for als, trg in sorted(aliases.items()):
|
||||
if trg not in seq_to_file:
|
||||
print('target %s for %s does not exist' % (
|
||||
seq_to_str(trg), seq_to_str(als)), file=sys.stderr)
|
||||
continue
|
||||
alias_name = check_alias_seq(als)
|
||||
if alias_name:
|
||||
target_file = seq_to_file[trg]
|
||||
aliases_to_create[alias_name] = target_file
|
||||
if canonical_names:
|
||||
canonical_seq = unicode_data.get_canonical_emoji_sequence(als)
|
||||
if canonical_seq and canonical_seq != als:
|
||||
canonical_alias_name = check_alias_seq(canonical_seq)
|
||||
if canonical_alias_name:
|
||||
canonical_to_file[canonical_alias_name] = target_file
|
||||
|
||||
if canonical_names:
|
||||
print('adding %d canonical aliases' % len(canonical_to_file))
|
||||
for seq, f in seq_to_file.iteritems():
|
||||
canonical_seq = unicode_data.get_canonical_emoji_sequence(seq)
|
||||
if canonical_seq and canonical_seq != seq:
|
||||
alias_name = check_alias_seq(canonical_seq)
|
||||
if alias_name:
|
||||
canonical_to_file[alias_name] = f
|
||||
|
||||
print('adding %d total canonical sequences' % len(canonical_to_file))
|
||||
aliases_to_create.update(canonical_to_file)
|
||||
|
||||
if replace:
|
||||
if not dry_run:
|
||||
|
@ -173,6 +207,9 @@ def main():
|
|||
parser.add_argument(
|
||||
'-c', '--copy', help='create a copy of the file, not a symlink',
|
||||
action='store_true')
|
||||
parser.add_argument(
|
||||
'--canonical_names', help='include extra copies with canonical names '
|
||||
'(including fe0f emoji presentation character)', action='store_true');
|
||||
parser.add_argument(
|
||||
'-n', '--dry_run', help='print out aliases to create only',
|
||||
action='store_true')
|
||||
|
@ -180,7 +217,7 @@ def main():
|
|||
|
||||
add_aliases(
|
||||
args.srcdir, args.dstdir, args.aliasfile, args.prefix, args.ext,
|
||||
args.replace, args.copy, args.dry_run)
|
||||
args.replace, args.copy, args.canonical_names, args.dry_run)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
Loading…
Reference in New Issue