User:Huhu9001Bot/13:45, 4 June 2023 (UTC)

From Wiktionary, the free dictionary
Jump to navigation Jump to search
import pywikibot
import pywikibot.pagegenerators

import tempParser
import re

pgf = pywikibot.pagegenerators.GeneratorFactory(site = pywikibot.Site('en','wiktionary'))
pgf.handle_args([
    '-cat:Japanese terms with redundant head parameter',
    r'-titleregexnot:[^ぁ-ゞァ-ヺー-ヾ]',
])
pg = pgf.getCombinedGenerator()

def r_rep(title, args):
    i = title == 'ja-pos' and 2 or 1
    if 'head' in args and i not in args:
        args[i] = args['head']
        del args['head']
    elif 'head' in args and i in args:
        for m in re.finditer(r'(\[\[(?:[^\|\]]+\|)?)([^\]]+)(\]\])', args['head']):
            re_kana = ['[-^ \.%]*'] * (2 * len(m.group(2)) - 1)
            re_kana[0:2 * len(m.group(2)) - 1:2] = m.group(2)
            re_kana = '(' + ''.join(re_kana) + ')(?!\])'
            
            a1 = re.sub(re_kana, m.group(1) + '\\1' + m.group(3), args[i], 1)
            if a1 == args[i]: break
            if re.search('\[\[[^\|\]]*\[', a1) != None: break
            args[i] = a1
        else: del args['head']
    return title, args

for p in pg:
    print(p)
    p.text = tempParser.subTemp('ja-(?:noun|verb|adj|pos|phrase)', r_rep, p.text)
    p.save('fix |head= for pure kana entries')