Source as of 09:35, 28 December 2014 (UTC).
#!/home/sigma/.local/bin/python3
# -*- coding: utf-8 -*-
# LGPLv2+ license, look it up
import re
import builtins
import traceback
from datetime import datetime, timedelta
import ceterach
import passwords
import mwparserfromhell as mwparser
def main():
global api
api = ceterach.api.MediaWiki("https://en.wikipedia.org/w/api.php")
api.login("Lowercase sigmabot", passwords.lcsb)
api.set_token("edit")
bot = ProtectionTemplateBot(api)
if bot.is_allowed or 1:
bot.run()
else:
print("Check the bot shutoff page!")
def allow_bots(text, user):
return not re.search(r'\{\{(nobots|bots\|(allow=none|deny=.*?' + user + r'.*?|optout=all|deny=all))\}\}', text)
#builtins.print = lambda *args, **kwargs: None
class ProtectionTemplateBot:
REDIR_TL = "{{r protected}}"
EDIT_TL = {"pp-dispute", "pp-vandalism", "pp-template",
"pp-semi-sock", "pp-semi-blp", "pp-semi-indef",
"pp-protected", "pp-office", "pp-reset",
"pp-semi", "pp-semi-protect", "sprotect",
"sprotected", "semiprotected", "pp-semi-prot",
"pp-semi-vandalism", "pp-semi-protected",
"pp-full", "pp-blp", "pp-sock",
}
MOVE_TL = {"pp-move-dispute", "pp-move-vandalism",
"pp-move-indef", "pp-move", "mprotect",
"m-protected", "mprotected2", "mpprotected"
}
PROT_TL = EDIT_TL | MOVE_TL | {"r protected", "r semi-protected", "r fully protected"}
NO_PROTECTION = {'edit': (None,) * 2,
'create': (None,) * 2,
'move': (None,) * 2,
}
def __init__(self, api, shutoff="User:Lowercase sigmabot/Shutoff"):
self.api = api
self.shutoff_page = api.page(shutoff)
@property
def is_allowed(self):
return self.shutoff_page.content.lower() == "true" #or True
@property
def protected_pages(self):
import sys
if len(sys.argv) > 1:
for arg in sys.argv[1:]:
yield self.api.page(arg)
return
for x in self.api.category("Category:Wikipedia pages with incorrect protection templates").members:
if not x.namespace in (2, 3):
yield x
for x in self.api.iterator(150, list='logevents', letype='protect'):
#The nonexistent page check will be done later.
if x['ns'] in (2, 3):
continue
yield self.api.page(x['title'])
def check_rev_stamp(self, page):
q = {"action": "query", "prop": "revisions", "rvprop": "timestamp", "titles": page}
res = self.api.call(**q)["query"]["pages"]
info = tuple(res.values())[0]["revisions"][0]["timestamp"]
stamp = datetime.strptime(info, "%Y-%m-%dT%H:%M:%SZ")
if (datetime.utcnow() - stamp) > timedelta(seconds=15 * 60):
return True
return False
def check_if_page_needs_edits(self, page):
""""This method is a crappy hack that you should never read
for inspiration."""
prot_info = {k: v[0] for (k, v) in page.protection.items()}
tls = self.get_templates_on(page)
tl_on_page = [x.title.lower().partition(":")[2] for x in tls]
pp_tl_on_page = self.PROT_TL.intersection(tl_on_page)
score = 0
for tl in pp_tl_on_page:
if tl in self.EDIT_TL and not prot_info["edit"]:
score += 1
elif tl in self.MOVE_TL and not prot_info["move"]:
score += 1
if prot_info['edit'] and not tl in self.EDIT_TL:
score += 1
if prot_info['move'] and not tl in self.MOVE_TL:
score += 1
return score
def build_template(self, page, **options):
protection = mwparser.parse("{{subst:User:LikeLakers2/SWP/sync-pp}}")
untouched_template = str(protection)
tl = protection.filter_templates()[0]
tl.add("small", "{{subst:User:Lowercase sigmabot/is not talk}}")
infinity = datetime.max
prot_info = page.protection
prot_expiries = {k: info[1] for (k, info) in prot_info.items()}
# processed_options = "|".join(k + "=" + v for (k, v) in options.items())
for (k, v) in options.items():
tl.add(k, v)
for k in prot_expiries:
if not prot_expiries[k]:
continue
if k == "edit":
if prot_expiries[k] == infinity:
tl.add("expiry", "indef")
if page.namespace == 10:
tl.add("reason", "template")
else:
tl.add("reason", "long-term")
else:
tl.add("expiry", str(prot_expiries[k]))
if k == "move":
level, expiry = prot_info[k]
if level == "autoconfirmed":
continue # Do not add pp-move for semiprotected pages
elif level == "sysop":
if expiry == infinity:
tl.add("moveexpiry", "indef")
tl.add("movereason", "generic")
else:
tl.add("moveexpiry", str(expiry))
if str(protection) == untouched_template:
return ""
return str(protection)
def selectively_remove(self, page):
print("Selective removal on {!r}".format(page.title))
kw = {}
prot_info = {k: v[0] for (k, v) in page.protection.items()}
original_text = page.content
code = mwparser.parse(page.content)
templates = code.filter_templates()
if page.is_redirect:
# This method is only called if there are templates on the
# page and the page is protected at all
return
# This is the selective removal part
if not prot_info['edit']:
for template in templates:
tl = template.name.lower()
if tl in self.EDIT_TL:
print("\tRemoving edit templates")
code.remove(template)
if not prot_info['move']:
for template in templates:
tl = template.name.lower()
if tl in self.MOVE_TL:
print("\tRemoving move templates")
code.remove(template)
# This is the selective adding part
if prot_info['edit']:
if self.EDIT_TL & {tl.name.lower() for tl in templates}:
kw['addedit'] = 'no'
print("\taddedit=no")
if prot_info['move']:
if self.MOVE_TL & {tl.name.lower() for tl in templates}:
kw['addmove'] = 'no'
print("\taddmove=no")
if len(kw) > 1:
print("Nevermind, {!r} doesn't need edits".format(page))
return
text = self.build_template(page, **kw) + str(code)
if text == original_text:
print("Nevermind, {!r} doesn't need edits".format(page))
return
page.edit(text, "Correcting protection templates) (bot", minor=True, bot=True)
def get_templates_on(self, page):
tl = tuple(self.api.iterator(1000, prop="templates", tlnamespace=10, titles=page.title, tllimit=1000))
if not tl[0].get("templates", None):
return
for x in tl[0]["templates"]:
yield self.api.page(x["title"])
def rm_templates(self, page):
text = mwparser.parse(page.content)
summ = "Removing protection templates) (bot"
print(page.title)
for tl in text.filter_templates():
if tl.name.lower() in self.PROT_TL or tl.name.lower().startswith("pp-"):
text.remove(tl)
text = str(text)
print("Removing templates from {!r}".format(page.title))
return page.edit(text, summary=summ, minor=True, bot=True)
def add_to_redir(self, page):
templates_on_page = (x.title.partition(":")[2].lower() for x in self.get_templates_on(page))
if "r protected" in templates_on_page:
print("{!r} already has a redirect template".format(page.title))
return
print("Adding templates to redirect {!r}".format(page.title))
return page.append(self.REDIR_TL, "Adding protection template to redirect) (bot", minor=True, bot=True)
def add_templates(self, page):
summary = "Adding protection templates) (bot"
print("Adding templates to {!r}".format(page.title))
tl = self.build_template(page)
if not tl.strip():
print("\tNevermind, skipping {!r}".format(page.title))
return
meth = page.append if page.namespace == 10 else page.prepend
if page.content.startswith(("{|", '=')):
tl += "\n"
return meth(tl, summary, minor=True, bot=True)
def run(self):
for page in self.protected_pages:
if not page.exists:
print("{!r} doesn't exist.".format(page.title))
continue
if page.namespace == 10 or len(page.content) > 150000:
continue
protection = {k: v[0] for (k, v) in page.protection.items()}
#if protection["edit"][0] == "sysop":
if 'sysop' in str(protection['edit']): # Bad programming :(
print("{!r} is full protected!".format(page.title))
continue
templates_on_page = [x.title.partition(":")[2].lower() for x in self.get_templates_on(page)]
has_pp_template = self.PROT_TL.intersection(templates_on_page)#"pp-meta" in templates_on_page
if "documentation" in templates_on_page:
print("{!r} has a doc template on it.".format(page.title))
continue
if not allow_bots(page.content, "Lowercase sigmabot"):
print("{!r} does not allow the bot to edit".format(page.title))
continue
if page.title.lower().count("wikipedia signpost"):
print("{!r} is the signpost.".format(page.title))
continue
if not self.check_rev_stamp(page.title):
print("{!r} needs to wait until 15 minutes after most recent revision".format(page.title))
continue
try:
if not has_pp_template:
if page.is_redirect and not "template:r protected" in templates_on_page:
if protection["edit"]:
self.add_to_redir(page)
else:
if len(page.content) > 150000:
print("{!r} is too long for us to safely determine the templates on the page.")
continue
self.add_templates(page)
#print(templates_on_page)
else:
if any(protection.values()): # Has protection and pp template
self.selectively_remove(page)
else: # No protection, but has_pp_template
self.rm_templates(page)
except Exception as e:
print(repr(page.title), "error")
traceback.print_exc()
continue
if __name__ == "__main__":
main()
api.logout()