User:BrownHairedGirl/Election links cleanup/AWB custom module

Warning and disclaimer edit

If you are considering using this module yourself, please note that:

  1. I published this code for the sake of transparency, so that other editors could examine precisely what I am doing. I have given no consideration to others using it, but since it's on a Wikipedia page, it's licensed for re-use. So anyone is entitled to use it and/or hack it as they wish
  2. The bottom line is that WP:ELLINKS is on the edge of WP:NOTBROKEN. In some cases its use amounts to a minor violation of NOTBROKEN; in other cases its fine.
  3. So there is always a possibility that someone could object, but I note that the discussions on my talk page have been broadly positive, and I get lots of thanks notifications for my WP:ELLINKS edits. So make your own call on how close to the wind you want to sail.
  4. I have not attempted to perfectly generalise this module or to document its quirks and failings. It's good enough for my own use, and I don't intend to polish it for publication.
  5. As with any code, I recommend that it be run only if you either have confidence that it has been polished and checked (which has not happened here), or you understand what the code is doing and have satisfied yourself that it does what you want, without errors.

Here is the code, with no guarantees that it is fit for any purpose whatsoever. Note that WP:AWBRULES #1 says "You are responsible for every edit made. Do not sacrifice quality for speed, and review all changes before saving."

So ... if you decide to use this module, and it blows up your house, kills all babies within a ten-mile radius, and triggers a new ice-age, that is your responsibility.

Code edit

// v004 -- now handles plebiscites, and {{Main|foo}}, {{See also foo|}} etc
public string ProcessArticle(string ArticleText, string ArticleTitle, int wikiNamespace, out string Summary, out bool Skip)
{
	Skip = false;
	Summary = "[[WP:ELLINKS]]";
	string replacementList = "";
	string retval = ArticleText;
	Dictionary<string, string> linksDone = 
			new Dictionary<string, string>();
	Dictionary<string, string> replacementFailed = 
			new Dictionary<string, string>();
	List<string> rejectedTitles = new List<string>();
	
	int matchCount = 0;
	int replacementsMade = 0;
	
	// bool diagnose = true;
	bool diagnose = false;
	string diagnostics = "\n\n=================== DIAGNOSTICS ===================\n\n";
	
	
	Regex rx = new Regex(@"(?<all>((?<=\[\[)(?!Category:) *(?<allbutends>(?<elname>[^\|\]]+\b(referendum|plebiscite|(by-?)?election)s?)(?<comma>,?[ _]+)(?<month>((January|February|March|April|May|June|July|August|September|October|November|December)[_ ]+)?)(?<yyyy>\d\d\d\d))[_ ]*(?=[\|\]#]))|(((?<=\{\{[^\{\}]*[I]nfobox[^\{\}]*\|\s*(term_(start|end)\d*|next_election|previous_election|election_name)\s*=\s*)(?<allbutends2>(?<elname2>[^\|\]\[\}]+\b(referendum|plebiscite|(by-?)?election)s?)(?<comma2>,?[ _]+)(?<month2>((January|February|March|April|May|June|July|August|September|October|November|December)[_ ]+)?)(?<yyyy2>\d\d\d\d))(?=\s*[\|\}]))|((?<=\{\{([mM]ain|[mM]ain[_ ]?[aA]rticle|Further[_ ]?(information|info)?|([mM]ore ?)[dD]etails|[sS]ee ?[aA]lso)\s*\|\s*)(?<allbutends3>(?<elname3>[^\|\]]+\b(referendum|plebiscite|(by-?)?election)s?)(?<comma3>,?[ _]+)(?<month3>((January|February|March|April|May|June|July|August|September|October|November|December)[_ ]+)?)(?<yyyy3>\d\d\d\d))(?=\s*[\|\}]))))", RegexOptions.Singleline);
		
	// Find matches.
	MatchCollection matches = rx.Matches(ArticleText);
	matchCount = matches.Count;
	diagnostics = diagnostics + string.Format("{0} matches found\n\n", matchCount);
	// Report on each match.
	foreach (Match match in matches)
	{
		GroupCollection groups = match.Groups;
		
		diagnostics = diagnostics + "\n# All = " + groups["all"].Value + "\n";
		diagnostics = diagnostics + "\n# elname = " + groups["elname"].Value + "\n";
		diagnostics = diagnostics + "\n# elname2 = " + groups["elname2"].Value + "\n";
		diagnostics = diagnostics + "\n# elname3 = " + groups["elname3"].Value + "\n";
		
		string thisElectionOldname;
		string thisElectionNewname;
		string diagMsg;
		
		bool doThisOne;
		if ((groups["elname"].Value != "") && (groups["elname"].Value != null)) { // we matched type 1
			doThisOne = processElectionName(groups["elname"].Value, groups["comma"].Value, groups["month"].Value, groups["yyyy"].Value, out thisElectionOldname, out thisElectionNewname);
		}
		else if ((groups["elname2"].Value != "") && (groups["elname2"].Value != null)) { // we matched type 2

			doThisOne = processElectionName(groups["elname2"].Value, groups["comma2"].Value, groups["month2"].Value, groups["yyyy2"].Value, out thisElectionOldname, out thisElectionNewname);
		}
		else { // we matched type 3
			doThisOne = processElectionName(groups["elname3"].Value, groups["comma3"].Value, groups["month3"].Value, groups["yyyy3"].Value, out thisElectionOldname, out thisElectionNewname);
		}
		diagnostics = diagnostics + "\n* Checking [[" + thisElectionOldname + "]]: " + (doThisOne ? "VALIDNAME" : "BANNED") + " ";
		if (!doThisOne) {
			if (!rejectedTitles.Contains(thisElectionOldname)) {
				rejectedTitles.Add(thisElectionOldname);
			}
			continue;
		}
		
		if (linksDone.ContainsKey(thisElectionOldname)) { // we have already done this one
			continue;
		}
		
		string myReplaced;
		if (electionRegexReplace(retval, thisElectionOldname, thisElectionNewname, out myReplaced)) {
			linksDone.Add(thisElectionOldname, thisElectionNewname);
			replacementsMade = replacementsMade + 1;
		}
		else {
			if (!replacementFailed.ContainsKey(thisElectionOldname)) {
				replacementFailed.Add(thisElectionOldname, thisElectionNewname);
			}
			diagnostics = diagnostics + "REPLACEMENT-FAILED\n";
			continue;
		}
		retval = myReplaced;
		diagnostics = diagnostics + "REPLACEMENT-SUCCEEDED\n";
	}
	diagnostics = diagnostics + "\n\n===================\nSUCCESSFUL REPLACEMENTS (" + linksDone.Count + "):\n" + DictionaryToString(linksDone, "* ", "\n"); 
	diagnostics = diagnostics + "\n===================\nFAILED REPLACEMENTS (" + replacementFailed.Count + "):\n" + DictionaryToString(replacementFailed, "* ", "\n");
	diagnostics = diagnostics + "\n===================\nREJECTED TITLES (" + rejectedTitles.Count + "):";
	foreach (string aRejected in rejectedTitles) {
		diagnostics = diagnostics + "\n* [[" + aRejected + "]]";
	}
	
	Summary = Summary + " (" + linksDone.Count + "/" + matchCount + "): " + DictionaryToString(linksDone, "", "; ");
	
	
	if (diagnose) {
		return retval + "\n" + diagnostics;
	}
	else {
		if (replacementsMade == 0) {
			Skip = true;
		}
		retval = retval.Replace("<br>", "<br />");
		return retval;
	}
}
public bool processElectionName(string electionName, string electionComma, string electionMonth, string electionYear, out string bareOldName, out string bareNewName)
{
	bareOldName = electionName + electionComma + " " + electionMonth + " " + electionYear;
	bareNewName = electionMonth + " " + electionYear + " " + electionName;
	bareOldName = Regex.Replace(bareOldName, @"[_ ]+", " ");
	bareNewName = Regex.Replace(bareNewName, @"[_ ]+", " ");
	bareOldName = Regex.Replace(bareOldName, @"^\s*(.*?)\s*$", "$1");
	bareNewName = Regex.Replace(bareNewName, @"^\s*(.*?)\s*$", "$1");
	Match bannedPhrases = Regex.Match(bareOldName, @"\b(Boundary|list|in the|at the|for the|elected|endorsements?|returned|results?|candidates?|selection|selected|polls?|polling|opinion|debates?)\b", RegexOptions.IgnoreCase);
	if (bannedPhrases.Success) {
		return false;
	}
	return true;
}
public bool electionRegexReplace(string inputText, string bareOldName, string bareNewName, out string outputText)
{
	string encodedOldName = bareOldName;
	encodedOldName = Regex.Escape(encodedOldName);
	// encodedOldName = encodedOldName.Replace("\s", "[\s_]+");
	
	string fixedText = inputText;
	fixedText = Regex.Replace(fixedText, @"(?<=\[\[)" + encodedOldName + @"[_ ]*(?=[\|\]#])", bareNewName);
	fixedText = Regex.Replace(fixedText, @"(?<=\{\{([mM]ain|[mM]ain[_ ]?[aA]rticle|Further[_ ]?(information|info)?|([mM]ore ?)[dD]etails|[sS]ee ?[aA]lso)\s*\|\s*)" + encodedOldName + @"[_ ]*(?=[\|\}])", bareNewName);
	fixedText = Regex.Replace(fixedText, @"(?<=\{\{[^\{\}]*[I]nfobox[^\{\}]*\|\s*(term_(start|end)\d+|next_election|previous_election|election_name)\s*=\s*)" + encodedOldName + @"(?=\s*[\|\}])", bareNewName, RegexOptions.Singleline);
	
	if (fixedText == inputText) { // nothing changed
		outputText = inputText;
		return false;
	}
	// succesful replacement
	outputText = fixedText;
	return true;
}
public string DictionaryToString(Dictionary<string,string> myDic, string prefix, string suffix) { 
	string myString = "";
	foreach( KeyValuePair<string, string> kvp in myDic )
	{
		myString = myString + prefix + "[[" + kvp.Key + "]] → [[" + kvp.Value + "]]" + suffix;
	}
	return myString;
}