User:AnomieBOT/source/tasks/AccidentalLangLinkFixer.pm

package tasks::AccidentalLangLinkFixer;

=pod

=begin metadata

Bot:     AnomieBOT
Task:    AccidentalLangLinkFixer
BRFA:    Wikipedia:Bots/Requests for approval/AnomieBOT 43
Status:  Approved 2011-01-02
Created: 2010-09-11

Periodically checks pages in [[:Category:Pages automatically checked for
incorrect links]] for categories and language links seemingly in
running text, and adds the necessary ":" to make them wikilinks instead.

=end metadata

=cut

use utf8;
use strict;

use Data::Dumper;
use POSIX;
use Date::Parse;
use AnomieBOT::Task qw/:time/;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

my @categories = (
    'Category:Pages automatically checked for accidental language links',
    'Category:Pages automatically checked for incorrect links',
);
my $frequency=600; # minutes

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    $self->{'iter'}=undef;
    $self->{'next'}=0;
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2011-01-02.<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 43]]

=cut

sub approved {
    return 3;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    $api->task('AccidentalLangLinkFixer', 0, 10, qw/d::IWNS d::Nowiki/);

    my $help='User:'.$api->user.'/docs/AccidentalLangLinkFixer';

    # Spend a max of 5 minutes on this task before restarting
    my $endtime=time()+300;

    # Get regular expressions
    return 60 unless $api->load_IWNS_maps();
    my $llre=$api->interlanguage_re();
    my $clre=$api->namespace_re(14);

    if(!defined($self->{'iter'})){
        $self->{'iter'}=$api->iterator(
            generator => 'categorymembers',
            gcmtitle  => [ @categories ],
            gcmlimit  => 100,
            prop      => 'langlinks|categories',
            lllimit   => 'max',
            cllimit   => 'max',
        );
        $self->{'next'}=time()+$frequency;
    }
    while(my $pg=$self->{'iter'}->next){
        my $category = $self->{'iter'}->iterval;
        if(!$pg->{'_ok_'}){
            $api->warn("Failed to retrieve page list for $category: ".$pg->{'error'}."\n");
            return 60;
        }

        return 0 if $api->halting;

        my $page=$pg->{'title'};

        # Get list of langlinks and categories in a standardized format
        my $ll=join '|', sort map $_->{'lang'}.':'.$_->{'*'}, @{$pg->{'langlinks'}};
        my $cl=join '|', sort map $_->{'title'}, @{$pg->{'categories'}};

        # If they haven't changed, we need do nothing more here
        next if(($api->store->{"$page#ll"} // '') eq $ll && ($api->store->{"$page#cl"} // '') eq $cl);

        # Ugh, we need to check the page.
        my $tok=$api->edittoken($page, EditRedir => 1);
        if($tok->{'code'} eq 'shutoff'){
            $api->warn("Task disabled: ".$tok->{'content'}."\n");
            return 300;
        }
        if($tok->{'code'} ne 'success'){
            $api->warn("Failed to get edit token for $page: ".$tok->{'error'}."\n");
            next;
        }
        if(exists($tok->{'missing'})){
            $api->warn("WTF? $page does not exist?\n");
            next;
        }

        # Fix any bad links
        my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};
        my ($outtxt,$nowiki)=$api->strip_regex(qr/^(?:\s|<!--.*?-->|<\/?(?:noinclude|includeonly|onlyinclude)>)*\[\[\s*(?:$llre|$clre)\s*:[^]]*\]\](?:\s|<!--.*?-->|<\/?(?:noinclude|includeonly|onlyinclude)>)*$/m, $intxt);
        ($outtxt,$nowiki)=$api->strip_regex(qr/\[\[\s*(?:$clre)\s*:\s*(?:\|[^]]*)?\]\]/, $outtxt, $nowiki);
        ($outtxt,$nowiki)=$api->strip_nowiki($outtxt,$nowiki);
        my @summary=();
        push @summary, 'category' if $outtxt=~s/(\[\[\s*)($clre\s*:)/$1:$2/g;
        push @summary, 'language' if $outtxt=~s/(\[\[\s*)($llre\s*:)/$1:$2/g;
        $outtxt=$api->replace_stripped($outtxt,$nowiki);

        if(@summary){
            my $summary="Fixing accidental ".join(' and ', @summary)." links";
            $api->log("$summary in $page");
            my $r=$api->edit($tok, $outtxt, "[[$help|$summary]]", 1, 1);
            if($r->{'code'} ne 'success'){
                $api->warn("Write failed on $page: ".$r->{'error'}."\n");
                next;
            }
        } else {
            $api->log("Updating saved category and language links for $page");
            $api->store->{"$page#ll"}=$ll;
            $api->store->{"$page#cl"}=$cl;
        }

        # If we've been at it long enough, let another task have a go.
        return 0 if time()>=$endtime;
    }

    $self->{'iter'}=undef;
    return $self->{'next'}-time();
}

1;