#!/usr/bin/env python3 """ For each argument on the command line, look for it in the set of all Unicode names. Arguments are treated as case-insensitive regular expressions, e.g.: % find-uname 'small letter a$' 'horizontal line' *** small letter a$ matches *** LATIN SMALL LETTER A (97) COMBINING LATIN SMALL LETTER A (867) CYRILLIC SMALL LETTER A (1072) PARENTHESIZED LATIN SMALL LETTER A (9372) CIRCLED LATIN SMALL LETTER A (9424) FULLWIDTH LATIN SMALL LETTER A (65345) *** horizontal line matches *** HORIZONTAL LINE EXTENSION (9135) """ import unicodedata import sys import re def main(args): unicode_names = [] for ix in range(sys.maxunicode+1): try: unicode_names.append((ix, unicodedata.name(chr(ix)))) except ValueError: # no name for the character pass for arg in args: pat = re.compile(arg, re.I) matches = [(y,x) for (x,y) in unicode_names if pat.search(y) is not None] if matches: print("***", arg, "matches", "***") for match in matches: print("%s (%d)" % match) if __name__ == "__main__": main(sys.argv[1:])
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#1 | 30809 | C. Thomas Tyler |
Code drop with newer versions of Extensions. Thanks to @jason_gibon. #review-30810 @jabson_gibson |