nerd-fonts/bin/scripts/name_parser/name_parser_test2

#!/usr/bin/env python3
# coding=utf8

import sys
import re
import os.path
import glob
import subprocess
import fontforge

###### Some helpers

def get_sfnt_dict(font):
    """Extract SFNT table as nice dict"""
    d = []
    for i, el in enumerate(font.sfnt_names):
        d += [(el[1], el[2])]
    return dict(d)

def extract_sfnt_data(sfnt):
    """Get the usual names out of the SFNT table"""
    sfnt_full =    sfnt['Fullname']
    sfnt_fam =     sfnt['Family']
    sfnt_subfam =  sfnt['SubFamily']
    sfnt_pfam =    sfnt['Preferred Family'] if 'Preferred Family' in sfnt else ''
    sfnt_psubfam = sfnt['Preferred Styles'] if 'Preferred Styles' in sfnt else ''
    return (sfnt_full, sfnt_fam, sfnt_subfam, sfnt_pfam, sfnt_psubfam)

def format_names(header, *stuff):
    """Unify outputs (with header)"""
    f = '{:1.1}|{:50.50} |{:1.1}| {:65.65} |{:1.1}| {:55.55} |{:1.1}| {:30.30} |{:1.1}| {:40.40} |{:1.1}| {:.40}'
    if header:
        d = '------------------------------------------------------------'
        return f.format(*stuff) + '\n' + f.format('', d, d, d, d, d, d, d, d, d, d, d)
    return f.format(*stuff).rstrip()

def lenient_cmp(s1, s2, allow_shuffle_all):
    """Compare two font name (parts) but be a bit lenient ;->"""
    # We do not care about:
    # - Case
    # - "Display" vs "Disp" (in Noto)
    # Allow for "IBM 3278" name
    s = [ s1, s2 ]
    for i in range(2):
        # Usually given transform from 'their' to 'our' style
        s[i] = s[i].lower()
        s[i] = re.sub(r'\bdisp\b', 'display', s[i])                     # Noto
        s[i] = s[i].replace('ibm 3270', '3270')                         # 3270
        s[i] = s[i].replace('3270-', '3270 ')                           # 3270
        s[i] = s[i].replace('lekton-', 'lekton ')                       # Lekton
        s[i] = s[i].replace('semi-narrow', 'seminarrow')                # 3270
        s[i] = s[i].replace('bolditalic', 'bold italic')
        s[i] = re.sub(r'\bfor\b', '', s[i])                             # Meslo, Monofur
        s[i] = re.sub(r'\bpowerline\b', '', s[i])                       # Meslo, Monofur
        s[i] = s[i].replace('fira mono', 'fura mono')                   # Obviously someone forgot to rename the fonts in Fira/
        s[i] = s[i].replace('aurulentsansmono-', 'aurulent sans mono ') # Aurulent fullname oddity
        s[i] = s[i].replace('mononoki-', 'mononoki ')                   # Mononoki has somtimes a dash
        s[i] = re.sub(r'\br\b', 'regular', s[i])                        # Nonstandard style in Agave
        s[i] = re.sub(r'(bitstream vera sans mono.*) oblique', r'\1 italic', s[i]) # They call it Oblique but the filename says Italic
        s[i] = re.sub(r'gohufont (uni-)?(11|14)', 'gohufont', s[i])     # They put the 'name' into the subfamily/weight
        s[i] = s[i].replace('xltobl', 'extralight oblique')             # Iosevka goes inventing names
        s[i] = re.sub(r'proggyclean(?!TT)( ?)', 'proggycleantt\1', s[i]) # ProggyClean has no TT in filename

        s[i] = re.sub(r' +', ' ', s[i]).strip()

    p = []
    for e in s:
        parts = e.split(' ')
        if not allow_shuffle_all and len(parts) > 2:
            tail = parts[1:]
            tail.sort()
            parts = [parts[0]] + tail
        elif len(parts) > 1:
            parts.sort()
        p.append(' '.join(parts))
    return p[0] == p[1]

###### Let's go!

if len(sys.argv) < 2:
    print('Usage: {} font_name [font_name ...]\n'.format(sys.argv[0]))
    sys.exit(1)

font_patcher = os.path.realpath(os.path.dirname(os.path.realpath(sys.argv[0]))+'/../../../font-patcher')

existing_font = glob.glob('*.[ot]tf')
if len(existing_font):
    sys.exit('Would overwrite any existing *.ttf and *.otf, bailing out (remove them first)')

try:
    with open(sys.argv[0] + '.known_issues', 'r') as f:
        known_issues = f.read().splitlines()
        # known_issues = [line.rstrip() for line in known_issues]
        print('Found {:.0f} known issues'.format(len(known_issues) / 4)) # approx ;)
except OSError:
    print('Can not open known_issues file')
    known_issues = []
new_issues = open(sys.argv[0] + '.known_issues.new', 'w')

print('Examining {} font files'.format(len(sys.argv) - 1))
all_files = 0
issue_files = 0
known_files = 0

print(format_names(True, '', 'Filename', '', 'Fullname', '', 'Family', '', 'Subfamily', '', 'Typogr. Family', '', 'Typogr. Subfamily'))

for filename in sys.argv[1:]:
    data = []
    fullfile = os.path.basename(filename)
    fname = os.path.splitext(fullfile)[0]
    if fname == 'NotoColorEmoji':
        continue # font is not patchable
    if fname in [ 'iosevka-heavyoblique', 'iosevka-term-heavyoblique', 'iosevka-mediumoblique', 'Lilex-VF' ]:
        continue # Patch resultant font not openable
    log = open("log", "w")
    log.write(filename)
    log.close()

    for option in ['--makegroups', '']:
        cmd = ['fontforge', '--script', font_patcher, '--powerline', option, filename ]
        cmd = [ c for c in cmd if len(c) ]
        ff = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, encoding='utf8')
        #ff = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf8')
        if ff.returncode:
            print("\nERROR running command:\n  {}\n\n{}".format(' '.join(cmd), ''))#ff.stdout))
            sys.exit(1)
        new_font = glob.glob('*.[ot]tf')
        font = fontforge.open(new_font[0], 1)
        sfnt = get_sfnt_dict(font)
        font.close()
        os.system('rm -f *.[ot]tf')
        (sfnt_full, sfnt_fam, sfnt_subfam, sfnt_pfam, sfnt_psubfam) = extract_sfnt_data(sfnt)

        data.append(( os.path.basename(new_font[0]), sfnt_full, sfnt_fam, sfnt_subfam, sfnt_pfam, sfnt_psubfam ))

    all_files += 1

    t1 = not lenient_cmp(data[0][1], data[1][1], False)
    t2 = not lenient_cmp(data[0][2], data[1][2], False)
    t3 = not lenient_cmp(data[0][3], data[1][3], True)
    t4 = not lenient_cmp(data[0][4], data[1][4], False)
    t5 = not lenient_cmp(data[0][5], data[1][5], True)

    # Lenience: Allow for dropping unneeded prefered stuff:
    # New (sub)family is same as old preferred sub(family)
    if t4 and data[0][4] == '' and data[1][4].lower() == data[0][2].lower():
        t4 = False
    if t5 and data[0][5] == '' and data[1][5].lower() == data[0][3].lower():
        t5 = False

    if t1 or t2 or t3 or t4 or t5:
        m1 = '+'; m2 = '-'
    else:
        m1 = ''; m2 = ''
    t1_ = 'X' if t1 else ''
    t2_ = 'X' if t2 else ''
    t3_ = 'X' if t3 else ''
    t4_ = 'X' if t4 else ''
    t5_ = 'X' if t5 else ''

    o1 = format_names(False, m1, data[0][0], t1_, data[0][1], t2_, data[0][2], t3_, data[0][3], t4_, data[0][4], t5_, data[0][5])
    o2 = format_names(False, m2, data[1][0], '', data[1][1], '', data[1][2], '', data[1][3], '', data[1][4], '', data[1][5])

    if len(m1):
        issue_files += 1
        font = fontforge.open(filename, 1)
        sfnt = get_sfnt_dict(font)
        font.close()
        (sfnt_full, sfnt_fam, sfnt_subfam, sfnt_pfam, sfnt_psubfam) = extract_sfnt_data(sfnt)
        o3 = format_names(False, '>', os.path.basename(filename), '', sfnt_full, '', sfnt_fam, '', sfnt_subfam, '', sfnt_pfam, '', sfnt_psubfam)
        if not o1 in known_issues or not o2 in known_issues:
            new_issues.writelines(['#### AUTOGENERATED\n', o3 + '\n', o1 + '\n', o2 + '\n'])
        else:
            known_files += 1
            idx = known_issues.index(o1) - 2 # should be the index of the explanation  line
            if known_issues[idx][0] != '#':
                sys.exit('Problem with known issues file, line', known_issues.index(o1))
            new_issues.writelines([known_issues[idx] + '\n', o3 + '\n', o1 + '\n', o2 + '\n'])
            # remove known_issue triplet
            known_issues.pop(idx)
            known_issues.pop(idx)
            known_issues.pop(idx)
            known_issues.pop(idx)

    print(o1, o2, sep='\n')

print('Fonts with different name rendering: {}/{} ({}/{} are in known_issues)'.format(issue_files, all_files, known_files, issue_files))

if len(known_issues) > 0:
    print('There are {} lines not needed in known_issues, appending commented out in new known_issues'.format(len(known_issues)))
    new_issues.write('\n#### The following lines are not needed anymore\n\n')
    for l in known_issues:
        new_issues.writelines([' ', l, '\n'])

new_issues.close()
name-parser: Specify python version [why] Sometimes scripts can not be run. [how] Depending on installed python versions and 'alternatives' setup the script's shebang needs to point to python3 of course. Also the files need the executable bit set. Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2022-02-06 13:07:02 +01:00			`#!/usr/bin/env python3`
Draft: Introduce a file name parser DO NOT MERGE [why] A lot of the fonts have incorrect naming after patching. A completely different approach can help to come up with a consistent naming scheme. [how] See bin/scripts/name-parser/README.md Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2021-12-02 22:29:54 +01:00			`# coding=utf8`

			`import sys`
			`import re`
			`import os.path`
			`import glob`
			`import subprocess`
			`import fontforge`

			`###### Some helpers`

			`def get_sfnt_dict(font):`
			`"""Extract SFNT table as nice dict"""`
			`d = []`
			`for i, el in enumerate(font.sfnt_names):`
			`d += [(el[1], el[2])]`
			`return dict(d)`

			`def extract_sfnt_data(sfnt):`
			`"""Get the usual names out of the SFNT table"""`
			`sfnt_full = sfnt['Fullname']`
			`sfnt_fam = sfnt['Family']`
			`sfnt_subfam = sfnt['SubFamily']`
			`sfnt_pfam = sfnt['Preferred Family'] if 'Preferred Family' in sfnt else ''`
			`sfnt_psubfam = sfnt['Preferred Styles'] if 'Preferred Styles' in sfnt else ''`
			`return (sfnt_full, sfnt_fam, sfnt_subfam, sfnt_pfam, sfnt_psubfam)`

			`def format_names(header, *stuff):`
			`"""Unify outputs (with header)"""`
			`f = '{:1.1}\|{:50.50} \|{:1.1}\| {:65.65} \|{:1.1}\| {:55.55} \|{:1.1}\| {:30.30} \|{:1.1}\| {:40.40} \|{:1.1}\| {:.40}'`
			`if header:`
			`d = '------------------------------------------------------------'`
			`return f.format(*stuff) + '\n' + f.format('', d, d, d, d, d, d, d, d, d, d, d)`
			`return f.format(*stuff).rstrip()`

			`def lenient_cmp(s1, s2, allow_shuffle_all):`
			`"""Compare two font name (parts) but be a bit lenient ;->"""`
			`# We do not care about:`
			`# - Case`
			`# - "Display" vs "Disp" (in Noto)`
			`# Allow for "IBM 3278" name`
			`s = [ s1, s2 ]`
			`for i in range(2):`
			`# Usually given transform from 'their' to 'our' style`
			`s[i] = s[i].lower()`
			`s[i] = re.sub(r'\bdisp\b', 'display', s[i]) # Noto`
			`s[i] = s[i].replace('ibm 3270', '3270') # 3270`
			`s[i] = s[i].replace('3270-', '3270 ') # 3270`
			`s[i] = s[i].replace('lekton-', 'lekton ') # Lekton`
			`s[i] = s[i].replace('semi-narrow', 'seminarrow') # 3270`
			`s[i] = s[i].replace('bolditalic', 'bold italic')`
			`s[i] = re.sub(r'\bfor\b', '', s[i]) # Meslo, Monofur`
			`s[i] = re.sub(r'\bpowerline\b', '', s[i]) # Meslo, Monofur`
			`s[i] = s[i].replace('fira mono', 'fura mono') # Obviously someone forgot to rename the fonts in Fira/`
			`s[i] = s[i].replace('aurulentsansmono-', 'aurulent sans mono ') # Aurulent fullname oddity`
			`s[i] = s[i].replace('mononoki-', 'mononoki ') # Mononoki has somtimes a dash`
			`s[i] = re.sub(r'\br\b', 'regular', s[i]) # Nonstandard style in Agave`
			`s[i] = re.sub(r'(bitstream vera sans mono.*) oblique', r'\1 italic', s[i]) # They call it Oblique but the filename says Italic`
			`s[i] = re.sub(r'gohufont (uni-)?(11\|14)', 'gohufont', s[i]) # They put the 'name' into the subfamily/weight`
			`s[i] = s[i].replace('xltobl', 'extralight oblique') # Iosevka goes inventing names`
			`s[i] = re.sub(r'proggyclean(?!TT)( ?)', 'proggycleantt\1', s[i]) # ProggyClean has no TT in filename`

			`s[i] = re.sub(r' +', ' ', s[i]).strip()`

			`p = []`
			`for e in s:`
			`parts = e.split(' ')`
			`if not allow_shuffle_all and len(parts) > 2:`
			`tail = parts[1:]`
			`tail.sort()`
			`parts = [parts[0]] + tail`
			`elif len(parts) > 1:`
			`parts.sort()`
			`p.append(' '.join(parts))`
			`return p[0] == p[1]`

			`###### Let's go!`

			`if len(sys.argv) < 2:`
			`print('Usage: {} font_name [font_name ...]\n'.format(sys.argv[0]))`
			`sys.exit(1)`

			`font_patcher = os.path.realpath(os.path.dirname(os.path.realpath(sys.argv[0]))+'/../../../font-patcher')`

			`existing_font = glob.glob('*.[ot]tf')`
			`if len(existing_font):`
			`sys.exit('Would overwrite any existing .ttf and .otf, bailing out (remove them first)')`

			`try:`
			`with open(sys.argv[0] + '.known_issues', 'r') as f:`
			`known_issues = f.read().splitlines()`
			`# known_issues = [line.rstrip() for line in known_issues]`
			`print('Found {:.0f} known issues'.format(len(known_issues) / 4)) # approx ;)`
			`except OSError:`
			`print('Can not open known_issues file')`
			`known_issues = []`
			`new_issues = open(sys.argv[0] + '.known_issues.new', 'w')`

			`print('Examining {} font files'.format(len(sys.argv) - 1))`
			`all_files = 0`
			`issue_files = 0`
			`known_files = 0`

			`print(format_names(True, '', 'Filename', '', 'Fullname', '', 'Family', '', 'Subfamily', '', 'Typogr. Family', '', 'Typogr. Subfamily'))`

			`for filename in sys.argv[1:]:`
			`data = []`
			`fullfile = os.path.basename(filename)`
			`fname = os.path.splitext(fullfile)[0]`
			`if fname == 'NotoColorEmoji':`
			`continue # font is not patchable`
			`if fname in [ 'iosevka-heavyoblique', 'iosevka-term-heavyoblique', 'iosevka-mediumoblique', 'Lilex-VF' ]:`
			`continue # Patch resultant font not openable`
			`log = open("log", "w")`
			`log.write(filename)`
			`log.close()`

name-parser: Rename font-patcher option [why] The option `--parser` instructs `font-patcher` to come up with the font naming by utilizing the FontnameParser object. This sounds logical from a programmers perspective, but the option name is not descriptive for end users of `font-patcher` [how] As usual naming is hard. A short but maybe more descriptive name for the option can be `--makegroups`; as it describes what the option means for the end user: functioning font grouping. Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2022-02-06 12:44:53 +01:00			`for option in ['--makegroups', '']:`
Draft: Introduce a file name parser DO NOT MERGE [why] A lot of the fonts have incorrect naming after patching. A completely different approach can help to come up with a consistent naming scheme. [how] See bin/scripts/name-parser/README.md Signed-off-by: Fini Jastrow <ulf.fini.jastrow@desy.de> 2021-12-02 22:29:54 +01:00			`cmd = ['fontforge', '--script', font_patcher, '--powerline', option, filename ]`
			`cmd = [ c for c in cmd if len(c) ]`
			`ff = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, encoding='utf8')`
			`#ff = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf8')`
			`if ff.returncode:`
			`print("\nERROR running command:\n {}\n\n{}".format(' '.join(cmd), ''))#ff.stdout))`
			`sys.exit(1)`
			`new_font = glob.glob('*.[ot]tf')`
			`font = fontforge.open(new_font[0], 1)`
			`sfnt = get_sfnt_dict(font)`
			`font.close()`
			`os.system('rm -f *.[ot]tf')`
			`(sfnt_full, sfnt_fam, sfnt_subfam, sfnt_pfam, sfnt_psubfam) = extract_sfnt_data(sfnt)`

			`data.append(( os.path.basename(new_font[0]), sfnt_full, sfnt_fam, sfnt_subfam, sfnt_pfam, sfnt_psubfam ))`

			`all_files += 1`

			`t1 = not lenient_cmp(data[0][1], data[1][1], False)`
			`t2 = not lenient_cmp(data[0][2], data[1][2], False)`
			`t3 = not lenient_cmp(data[0][3], data[1][3], True)`
			`t4 = not lenient_cmp(data[0][4], data[1][4], False)`
			`t5 = not lenient_cmp(data[0][5], data[1][5], True)`

			`# Lenience: Allow for dropping unneeded prefered stuff:`
			`# New (sub)family is same as old preferred sub(family)`
			`if t4 and data[0][4] == '' and data[1][4].lower() == data[0][2].lower():`
			`t4 = False`
			`if t5 and data[0][5] == '' and data[1][5].lower() == data[0][3].lower():`
			`t5 = False`

			`if t1 or t2 or t3 or t4 or t5:`
			`m1 = '+'; m2 = '-'`
			`else:`
			`m1 = ''; m2 = ''`
			`t1_ = 'X' if t1 else ''`
			`t2_ = 'X' if t2 else ''`
			`t3_ = 'X' if t3 else ''`
			`t4_ = 'X' if t4 else ''`
			`t5_ = 'X' if t5 else ''`

			`o1 = format_names(False, m1, data[0][0], t1_, data[0][1], t2_, data[0][2], t3_, data[0][3], t4_, data[0][4], t5_, data[0][5])`
			`o2 = format_names(False, m2, data[1][0], '', data[1][1], '', data[1][2], '', data[1][3], '', data[1][4], '', data[1][5])`

			`if len(m1):`
			`issue_files += 1`
			`font = fontforge.open(filename, 1)`
			`sfnt = get_sfnt_dict(font)`
			`font.close()`
			`(sfnt_full, sfnt_fam, sfnt_subfam, sfnt_pfam, sfnt_psubfam) = extract_sfnt_data(sfnt)`
			`o3 = format_names(False, '>', os.path.basename(filename), '', sfnt_full, '', sfnt_fam, '', sfnt_subfam, '', sfnt_pfam, '', sfnt_psubfam)`
			`if not o1 in known_issues or not o2 in known_issues:`
			`new_issues.writelines(['#### AUTOGENERATED\n', o3 + '\n', o1 + '\n', o2 + '\n'])`
			`else:`
			`known_files += 1`
			`idx = known_issues.index(o1) - 2 # should be the index of the explanation line`
			`if known_issues[idx][0] != '#':`
			`sys.exit('Problem with known issues file, line', known_issues.index(o1))`
			`new_issues.writelines([known_issues[idx] + '\n', o3 + '\n', o1 + '\n', o2 + '\n'])`
			`# remove known_issue triplet`
			`known_issues.pop(idx)`
			`known_issues.pop(idx)`
			`known_issues.pop(idx)`
			`known_issues.pop(idx)`

			`print(o1, o2, sep='\n')`

			`print('Fonts with different name rendering: {}/{} ({}/{} are in known_issues)'.format(issue_files, all_files, known_files, issue_files))`

			`if len(known_issues) > 0:`
			`print('There are {} lines not needed in known_issues, appending commented out in new known_issues'.format(len(known_issues)))`
			`new_issues.write('\n#### The following lines are not needed anymore\n\n')`
			`for l in known_issues:`
			`new_issues.writelines([' ', l, '\n'])`

			`new_issues.close()`