User:Benwing/blib.py

From Wiktionary, the free dictionary
Jump to navigation Jump to search
#!/usr/bin/env python
#coding: utf-8

import pywikibot, mwparserfromhell, re, string, sys, codecs, urllib2, datetime, json

site = pywikibot.Site()


def display(page):
	pywikibot.output(u'# [[{0}]]'.format(page.title()))

def dump(page):
	old = page.get(get_redirect=True)
	pywikibot.output(u'Contents of [[{0}]]:\n{1}\n----------'.format(page.title(), old), toStdout = True)

def do_edit(page, func=None, null=False):
	while True:
		try:
			if func:
				new, comment = func(page, mwparserfromhell.parser.Parser().parse(page.text, skip_style_tags=True))
				
				if new:
					new = unicode(new)
					
					if page.text != new:
						page.text = new
						page.save(comment = comment)
					elif null:
						pywikibot.output(u'Purged page cache for [[{0}]]'.format(page.title()), toStdout = True)
						page.purge(forcelinkupdate = True)
					else:
						pywikibot.output(u'Skipped [[{0}]]: no changes'.format(page.title()), toStdout = True)
				elif null:
					pywikibot.output(u'Purged page cache for [[{0}]]'.format(page.title()), toStdout = True)
					page.purge(forcelinkupdate = True)
				else:
					pywikibot.output(u'Skipped [[{0}]]: {1}'.format(page.title(), comment), toStdout = True)
			else:
				pywikibot.output(u'Purged page cache for [[{0}]]'.format(page.title()), toStdout = True)
				page.purge(forcelinkupdate = True)
		except (pywikibot.LockedPage, pywikibot.NoUsername):
			pywikibot.output(u'Skipped [[{0}]], page is protected'.format(page.title()))
		except urllib2.HTTPError as e:
			if e.code != 503:
				raise
		except:
			pywikibot.output(u'Error on [[{0}]]'.format(page.title()))
			raise
		
		break

def references(page, startsort = None, endsort = None, namespaces = None, includelinks = False):
	if isinstance(page, basestring):
		page = pywikibot.Page(site, page)
	
	i = 0
	t = None
	steps = 50
	
	for current in page.getReferences(onlyTemplateInclusion = not includelinks, namespaces = namespaces):
		i += 1
		
		if endsort != None and i > endsort:
			break
		
		if startsort != None and i <= startsort:
			continue
		
		if endsort != None and not t:
			t = datetime.datetime.now()
		
		yield current
		
		if i % steps == 0:
			tdisp = ""
			
			if endsort != None:
				told = t
				t = datetime.datetime.now()
				pagesleft = (endsort - i) / steps
				tfuture = t + (t - told) * pagesleft
				tdisp = ", est. " + tfuture.strftime("%X")
			
			pywikibot.output(str(i) + "/" + str(endsort) + tdisp)


def cat_articles(page, startsort = None, endsort = None):
	if isinstance(page, basestring):
		page = pywikibot.Category(site, "Category:" + page)
	
	i = 0
	
	for current in page.articles(startsort = startsort if not isinstance(startsort, int) else None):
		i += 1
		
		if startsort != None and isinstance(startsort, int) and i <= startsort:
			continue
		
		if endsort != None:
			if isinstance(endsort, int):
				if i > endsort:
					break
			elif current.title(withNamespace=False) >= endsort:
				break
		
		yield current


def cat_subcats(page, startsort = None, endsort = None):
	if isinstance(page, basestring):
		page = pywikibot.Category(site, "Category:" + page)
	
	i = 0
	
	for current in page.subcategories(startsort = startsort if not isinstance(startsort, int) else None):
		i += 1
		
		if startsort != None and isinstance(startsort, int) and i <= startsort:
			continue
		
		if endsort != None:
			if isinstance(endsort, int):
				if i > endsort:
					break
			elif current.title() >= endsort:
				break
		
		yield current


def prefix(prefix, startsort = None, endsort = None, namespace = None):
	i = 0
	
	for current in site.prefixindex(prefix, namespace):
		i += 1
		
		if startsort != None and i <= startsort:
			continue
		
		if endsort != None and i > endsort:
			break
		
		yield current

def stream(st, startsort = None, endsort = None):
	i = 0
	
	for name in st:
		i += 1
		
		if startsort != None and i <= startsort:
			continue
		if endsort != None and i > endsort:
			break
		
		if type(name) == str:
			name = str.decode(name, "utf-8")
		
		name = re.sub(ur"^[#*] *\[\[(.+)]]$", ur"\1", name, flags=re.UNICODE)
		
		yield pywikibot.Page(site, name)


def get_args():
	startsort = None
	endsort = None
	
	if len(sys.argv) >= 2:
		try:
			startsort = int(sys.argv[1])
		except ValueError:
			startsort = str.decode(sys.argv[1], "utf-8")
	
	if len(sys.argv) >= 3:
		try:
			endsort = int(sys.argv[2])
		except ValueError:
			endsort = str.decode(sys.argv[2], "utf-8")
	
	return (startsort, endsort)

languages = None
languages_byCode = None
languages_byCanonicalName = None

families = None
families_byCode = None
families_byCanonicalName = None

scripts = None
scripts_byCode = None
scripts_byCanonicalName = None

etym_languages = None
etym_languages_byCode = None
etym_languages_byCanonicalName = None

wm_languages = None
wm_languages_byCode = None
wm_languages_byCanonicalName = None


def getData():
	getLanguageData()
	getFamilyData()
	getScriptData()
	getEtymLanguageData()

def getLanguageData():
	global languages, languages_byCode, languages_byCanonicalName
	
	languages = json.loads(site.expand_text("{{#invoke:User:MewBot|getLanguageData}}"))
	languages_byCode = {}
	languages_byCanonicalName = {}
	
	for lang in languages:
		languages_byCode[lang["code"]] = lang
		languages_byCanonicalName[lang["canonicalName"]] = lang


def getFamilyData():
	global families, families_byCode, families_byCanonicalName
	
	families = json.loads(site.expand_text("{{#invoke:User:MewBot|getFamilyData}}"))
	families_byCode = {}
	families_byCanonicalName = {}
	
	for fam in families:
		families_byCode[fam["code"]] = fam
		families_byCanonicalName[fam["canonicalName"]] = fam


def getScriptData():
	global scripts, scripts_byCode, scripts_byCanonicalName
	
	scripts = json.loads(site.expand_text("{{#invoke:User:MewBot|getScriptData}}"))
	scripts_byCode = {}
	scripts_byCanonicalName = {}
	
	for sc in scripts:
		scripts_byCode[sc["code"]] = sc
		scripts_byCanonicalName[sc["canonicalName"]] = sc


def getEtymLanguageData():
	global etym_languages, etym_languages_byCode, etym_languages_byCanonicalName
	
	etym_languages = json.loads(site.expand_text("{{#invoke:User:MewBot|getEtymLanguageData}}"))
	etym_languages_byCode = {}
	etym_languages_byCanonicalName = {}
	
	for etyl in etym_languages:
		etym_languages_byCode[etyl["code"]] = etyl
		etym_languages_byCanonicalName[etyl["canonicalName"]] = etyl