User:Huhu9001Bot/04:50, 10 March 2023 (UTC)

From Wiktionary, the free dictionary
Jump to navigation Jump to search
import pywikibot
import pywikibot.pagegenerators
import pywikibot.textlib
import re

pgf = pywikibot.pagegenerators.GeneratorFactory(site = pywikibot.Site('en','wiktionary'))
pgf.handle_args([
    '-transcludes:Template:ja-pos',
    r'-grep:{{\s*ja-pos\s*\|[^}]+\|[a-z ]+\|',
])
pg = pgf.getCombinedGenerator()

r_pat = re.compile(r'{{\s*ja-pos\s*\|[^}]+}}')
def r_rep(m):
    global b_changed
    ta = pywikibot.textlib.extract_templates_and_params(m.group(0))[0]
    arg1 = ta[1]['1']
    arg2 = ta[1]['2']
    if re.fullmatch('[a-z ]+', arg2) and not re.fullmatch('[a-z ]+', arg1):
        print(m.group(0))
        b_changed = True
        ta[1]['1'] = arg2
        ta[1]['2'] = arg1
        result = re.sub(r'\r|\n|(?<=\|)\d+=', '', pywikibot.textlib.glue_template_and_params(ta))
        print(result)
        return result
    return m.group(0)

count = 0
for p in pg:
    print(p)
    b_changed = False
    p.text = r_pat.sub(r_rep, p.text)
    if b_changed:
        count = count + 1
        p.save('ja-pos|1=kana|2=pos to ja-pos|1=pos|2=kana')
    else:
        with open('gather.txt', 'a', encoding="utf-8") as f: f.write(p.title() + '\n')
    print('count: {0}'.format(count))