User:Ahechtbot/sandbox/transclusioncount.py

transclusioncount.py sandbox

edit
#! ~/venv/bin/python3

# Public domain; bjweeks, MZMcBride, ahecht; 2008, 2016, 2018, 2019

########################### Transclusion count tool ###########################
# Updates subpages of the page specified by the "rootpage" variable with      #
# transclusion counts for all templates and modules on the wiki specified by  #
# the "code" and "family" variables. You can also specify the wiki via the    #
# command line, e.g.: "transclusioncount.py en wikiquote".                    #
#                                                                             #
# This script must be run on toolforge, with both Pywikibot and the toolforge #
# libraries installed. Pywikibot also relies on the requests library and      #
# either mwparserfromhell or wikitextparser.  See:                            #
# //wikitech.wikimedia.org/wiki/Help:Toolforge/My_first_Pywikibot_tool        #
# //wikitech.wikimedia.org/wiki/User:Legoktm/toolforge_library                #
# //www.mediawiki.org/wiki/Manual:Pywikibot/Installation/en                   #
#                                                                             #
# See the following for setting up Toolforge to run python jobs:              #
# //wikitech.wikimedia.org/wiki/Help:Toolforge/Python#Jobs                    #
#                                                                             #
# When creating the bootstrap_venv.sh file, include the following four lines  #
# (without the "#"s at the start of each line):                               #
# pip install toolforge                                                       #
# pip install pywikibot                                                       #
# pip install "mwparserfromhell>=0.5.0"                                       #
# pip install "wikitextparser>=0.47.5"                                        #
# pip install "requests>=2.20.1"                                              #
#                                                                             #
# and, after running chmod ug+x bootstrap_venv.sh, run it using the following #
# command (without the "#"):                                                  #
#                                                                             #
# toolforge jobs run bootstrap-venv --command "cd $PWD && ./bootstrap_venv.sh" --image python3.9 --wait
#                                                                             #
# To run automatically, use the following command:                            #
#                                                                             #
# toolforge-jobs run transclusioncount --command "pyvenv/bin/python scripts/transclusioncount.py" --image python3.9 --schedule "00 12 * * SUN"
#                                                                             #
# I would recommend changing 00 12 to a random time, to avoid running when    #
# lots of other scripts are using server resources                            #
###############################################################################

import toolforge
import pywikibot
import time
import sys
from math import log10, floor

# set debug = "no" to enable writing to wiki
class settings:
	code = 'en'
	family = 'wiki'
	rootpage = "Module:Transclusion count/"
	editsumm = "[[Wikipedia:BOT|Bot]]: Updated page."
	debug = "no"
	sigfigs = 2

report_title = settings.rootpage + 'data/'

report_template = '''\
return {
%s
}
'''

try:
	code = sys.argv[1]
except:
	code = settings.code
	
try:
	family = sys.argv[2]
except:
	family = settings.family
	
if family == 'wikipedia':
	settings.lang = code + 'wiki'
else:
	settings.lang = code + family

if family == 'wiki':
	wiki = pywikibot.Site(code, 'wikipedia')
else:
	wiki = pywikibot.Site(code, family)

query1 = '''
/* transclusioncount.py SLOW_OK */
SELECT
  lt_title,
  COUNT(*)
FROM templatelinks JOIN linktarget ON tl_target_id = lt_id
WHERE lt_namespace = 10
GROUP BY lt_title
HAVING COUNT(*) > 2000
LIMIT 10000;
'''

query2 = '''
/* transclusioncount.py SLOW_OK */
SELECT
  lt_title,
  COUNT(*)
FROM templatelinks JOIN linktarget ON tl_target_id = lt_id
WHERE lt_namespace = 828
GROUP BY lt_title
HAVING COUNT(*) > 2000
LIMIT 10000;
'''

connectSuccess = False
tries = 0

while not connectSuccess:
	try:
		conn = toolforge.connect(settings.lang, 'analytics')
		print("\nExecuting query1 at %s..." % (time.ctime()))
		with conn.cursor() as cursor:
			cursor.execute(query1)
			result1 = cursor.fetchall()
		print("\nExecuting query2 at %s..." % (time.ctime()))
		with conn.cursor() as cursor:
			cursor.execute(query2)
			result2 = cursor.fetchall()
		try:
			conn.close()
		except:
			pass
		connectSuccess = True
		print("Success at %s!" % (time.ctime()))
	except Exception as e:
		try:
			cursor.close()
		except:
			pass
		try:
			conn.close()
		except:
			pass
		print("Error: ", e)
		tries += 1
		if tries > 24:
			print("Script failed after 24 tries at %s."  % (time.ctime()))
			raise SystemExit(e)
		else:
			print("Waiting 1 hour starting at %s..."  % (time.ctime()))
			time.sleep(3600)

if settings.debug != "no":
	import os
	try:
		with open(os.path.join(os.getcwd(), 'result1.txt'), "w") as f:
			f.write(str(result1))
		with open(os.path.join(os.getcwd(), 'result2.txt'), "w") as f:
			f.write(str(result2))
	except Exception as e:
		print("Error writing to file: %s" % (e))
	print("\nBuilding output...")


output = {"A": [], "B": [], "C": [], "D": [], "E": [], "F": [], "G": [], "H": [], "I": [], "J": [], "K": [], "L": [], "M": [], "N": [], "O": [], "P": [], "Q": [], "R": [], "S": [], "T": [], "U": [], "V": [], "W": [], "X": [], "Y": [], "Z": [], "other": []}

for row in result1:
	try:
		lt_title = row[0].decode()
	except:
		lt_title = str(row[0])
	index_letter = lt_title[0]
	if row[1] < 100000: #Use an extra sigfig for very large counts
		sigfigs = settings.sigfigs - 1
	else:
		sigfigs = settings.sigfigs
	uses = round(row[1], -int(floor(log10(row[1])))+sigfigs)
	table_row = '''["%s"] = %i,''' % (lt_title.replace("\\", "\\\\").replace('"', '\\"'), uses)
	try:
		output[index_letter].append(table_row)
	except:
		output["other"].append(table_row)

for row in result2:
	try:
		lt_title = row[0].decode()
	except:
		lt_title = str(row[0])
	index_letter = lt_title[0]
	if row[1] < 100000: #Use an extra sigfig for very large counts
		sigfigs = settings.sigfigs - 1
	else:
		sigfigs = settings.sigfigs
	uses = round(row[1], -int(floor(log10(row[1])))+sigfigs)
	table_row = '''["Module:%s"] = %i,''' % (lt_title.replace("\\", "\\\\").replace('"', '\\"'), uses)
	try:
		output[index_letter].append(table_row)
	except:
		output["other"].append(table_row)

for section in output:
	report = pywikibot.Page(wiki, report_title + section)
	old_text = report.text
	report.text = report_template % ('\n'.join(output[section]))
	if settings.debug == "no":
		# print("Writing " + report_title + section)
		try:
			report.save(settings.editsumm)
		except Exception as e:
			print("Error at %s: %s" % (time.ctime(),e))
	else:
		print("== " + report_title + section + " ==\n\n" + report.text)
		
print("\nDone at %s!" % (time.ctime()))