User:AnomieBOT/source/tasks/NobotsHallOfShame.pm

package tasks::NobotsHallOfShame;

=pod

=begin metadata

Bot:     AnomieBOT
Task:    NobotsHallOfShame
BRFA:    N/A
Status:  Begun 2011-09-11
Created: 2011-09-10

Update a list of {{tl|bots}} usage.

=end metadata

=cut

use utf8;
use strict;

use AnomieBOT::Task;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

use Data::Dumper;

my $basepage='User:AnomieBOT/Nobots Hall of Shame';

sub new {
    my $class=shift;
    my $self=$class->SUPER::new;
    $self->{'iter'}=undef;
    bless $self, $class;
    return $self;
}

=pod

=for info
Per [[WP:BOT#Approval]], any bot or automated editing process that only
affects only the operators' user and talk pages (or subpages thereof),
and which are not otherwise disruptive, may be run without prior
approval.

=cut

sub approved {
    return 999;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    $api->task('NobotsHallOfShame', 0, 10, qw(d::IWNS));

    my $t=($api->store->{'nextrun'}//0)-time();
    return $t if $t>0;

    # Query list of namespaces
    my %nslist=$api->namespace_reverse_map;

    # Spend a max of 5 minutes on this task before restarting
    my $endtime=time()+300;

    # Query list of pages transcluding {{tl|bots}}
    my $iter=$api->iterator(
        generator    => 'embeddedin',
        geititle     => 'Template:Bots',
        geilimit     => 'max',
        prop         => 'info|templates',
        tltemplates  => 'Template:Copyvio/core',
        tllimit      => 'max',
    );
    my @pages=();
    my %pagesbyns=();
    my %copyvio=();
    my @qpages=();
    while(my $p=$iter->next){
        return 0 if $api->halting;
        if(!$p->{'_ok_'}){
            $api->warn("Failed to retrieve embeddedin list for Template:Bots: ".$p->{'error'}."\n");
            return 300;
        }

        # We don't care about user subpages
        next if($p->{'ns'} == 2 && $p->{'title'}=~m{/});
        next if($p->{'ns'} == 3 && $p->{'title'}=~m{/});

        my $pageid=$p->{'pageid'};
        push @pages, $pageid;
        push @{$pagesbyns{$p->{'ns'}}}, $pageid;
        push @qpages, $pageid unless(($api->store->{"revid $pageid"}//0) == $p->{'lastrevid'});
        $copyvio{$pageid}=grep $_->{"title"} eq 'Template:Copyvio/core', @{$p->{'templates'}//[]};
    }

    # Load info for all pages edited since the last check
    if(@qpages){
        my @q=();
        while(@qpages){
            push @q, join('|', splice(@qpages, 0, 50));
        }
        $iter=$api->iterator(
            pageids => \@q,
            prop    => 'revisions',
            rvprop  => 'ids|content',
            rvslots => 'main',
        );
        while(my $p=$iter->next){
            return 0 if $api->halting;
            if(!$p->{'_ok_'}){
                $api->warn("Failed to retrieve content list for Template:Bots transclusions: ".$p->{'error'}."\n");
                return 300;
            }

            my $pageid=$p->{'pageid'};
            my $title=$p->{'title'};
            my $revid=$p->{'revisions'}[0]{'revid'};
            my $txt=$p->{'revisions'}[0]{'slots'}{'main'}{'*'};
            $txt=~s{<(nowiki|pre|source|syntaxhighlight)(?:\s[^>]*)?(?:/>|(?<!/)>.*?(?:</\g{-1}(?:\s*)>|$))|<!--.*?(?:-->|$)}{}siog;
            my @exclusion=($txt=~/\{\{(?:[nN]obots|[bB]ots)(?:\s*\|[^\}]*)?\}\}/gs);
            $api->store->{"revid $pageid"}=$revid;
            $api->store->{"info $pageid"}=[ $title, @exclusion ];

            return 0 if time()>$endtime;
        }
    }

    my $ret=$self->makepage($api, undef, \@pages, \%copyvio);
    return $ret if defined($ret);
    for my $ns (sort { $a <=> $b } keys %nslist){
        next if $ns < 0;
        $ret=$self->makepage($api, $ns, $pagesbyns{$ns}//[], \%copyvio);
        return $ret if defined($ret);
    }

    $t=86400-(time()%86400);
    $api->store->{'nextrun'}=time+$t;
    return $t;
}

sub makepage {
    my ($self, $api, $ns, $pages, $copyvio)=@_;
    my @pages=@$pages;
    my $title=$basepage;
    my %nslist=$api->namespace_reverse_map;
    $nslist{0}='Main';
    $title.="/$ns" if defined($ns);
    my $inns=defined($ns)?' in the '.$nslist{$ns}.' namespace':'';

    # Now process all pages
    my %denybybot=();
    my %allowbybot=();
    my @broken=();
    my @multiple=();
    my $all='&lt;all&gt;';
    my $allcv='&lt;all+copyvio&gt;';
    for my $pageid (@pages){
        my ($title, @exclusion)=@{$api->store->{"info $pageid"}};
        next unless @exclusion;
        my $ct1=0;
        my $ct2=0;
        my $xall=$copyvio->{$pageid}?$allcv:$all;
        for my $c (@exclusion){
            if($c=~/\{\{[nN]obots\}\}/){ $ct1++; $denybybot{$xall}{$title}=1; next; }
            if($c=~/\{\{[bB]ots\}\}/){ $ct1++; $allowbybot{$xall}{$title}=1; next; }
            if($c=~/\{\{[bB]ots\s*\|\s*allow\s*=\s*(.*?)\s*\}\}/s){
                $ct1++;
                if($1 eq 'all'){ $allowbybot{$xall}{$title}=1; next; }
                if($1 eq 'none'){ $denybybot{$xall}{$title}=1; next; }
                foreach my $bot (split(/\s*,\s*/, $1)){
                    $allowbybot{$bot}{$title}=1;
                }
                next;
            }
            if($c=~/\{\{[bB]ots\s*\|\s*deny\s*=\s*(.*?)\s*\}\}/s){
                $ct1++;
                if($1 eq 'all'){ $denybybot{$xall}{$title}=1; next; }
                if($1 eq 'none'){ $allowbybot{$xall}{$title}=1; next; }
                foreach my $bot (split(/\s*,\s*/, $1)){
                    $denybybot{$bot}{$title}=1;
                }
                next;
            }
            if($c=~/\{\{[bB]ots\s*\|\s*optout\s*=\s*(.*?)\s*\}\}/s){
                $ct2++;
                # Ignore
                next;
            }

            # If we got here, the template is unparsable
            push @broken, $title;
            $ct1++;
            $ct2++;
        }
        push @multiple, $title if $ct1>1 || $ct2>1;
    }

    # Get edit token
    my $tok=$api->edittoken($title, EditRedirect=>1);
    if($tok->{'code'} ne 'success'){
        $api->warn("Failed to get edit token for $title: ".$tok->{'error'});
        return 60;
    }

    # Prepare the output page
    my $txt=qq(This page lists uses of {{tl|bots}} or {{tl|nobots}}$inns, organized by the bot being allowed/excluded. If a page allows/excludes multiple bots, it will be listed multiple times.\n\n);
    $txt.=qq(<center style="font-size:smaller;margin:0 4ex">[ [[$basepage|All]]);
    for my $ns (sort { $a <=> $b } keys %nslist){
        next if $ns < 0;
        $txt.=qq( | [[$basepage/$ns|).$nslist{$ns}.qq(]]);
    }
    $txt.=qq( ]</center>\n\n);

    if(%denybybot){
        $txt.=qq(==Pages$inns excluding certain bots==\n);
        $txt.=qq({|class="wikitable sortable" style="width:100%"\n);
        $txt.=qq(!Bot!!Exclusions!!class="unsortable"|Page list\n);
        my @bots=sort {
            (($a ne $all) <=> ($b ne $all)) ||
            (($a ne $allcv) <=> ($b ne $allcv)) ||
            ((scalar keys %{$denybybot{$b}}) <=> (scalar keys %{$denybybot{$a}})) ||
            lc($a) cmp lc($b) || $a cmp $b;
        } keys %denybybot;
        for my $bot (@bots){
            my @pages=sort keys %{$denybybot{$bot}};
            my $ct=@pages;
            $txt.=qq(|-\n);
            if ( $bot =~ /\|/ ) {
                $bot =~ s/\|/&#x7C;/g;
                $txt .= qq(|$bot\n);
            } else {
                $bot =~ s/=/{{=}}/g;
                $txt .= ($bot eq $all || $bot eq $allcv) ? qq(|$bot\n) : qq(|{{user|$bot}}\n);
            }
            $txt.=qq(|$ct\n);
            my $pages="\n*[[:".join("]]\n*[[:", @pages)."]]";
            $pages=qq({{hidden begin}}).$pages.qq(\n{{hidden end}}) if $ct>5;
            $txt.=qq(|$pages\n);
        }
        $txt.=qq(|}\n\n);
    }

    if(%allowbybot){
        $txt.=qq(==Pages$inns allowing certain bots==\n);
        $txt.=qq({|class="wikitable sortable" style="width:100%"\n);
        $txt.=qq(!Bot!!Allowances!!class="unsortable"|Page list\n);
        my @bots=sort {
            (($a ne $all) <=> ($b ne $all)) ||
            (($a ne $allcv) <=> ($b ne $allcv)) ||
            ((scalar keys %{$allowbybot{$b}}) <=> (scalar keys %{$allowbybot{$a}})) ||
            lc($a) cmp lc($b) || $a cmp $b;
        } keys %allowbybot;
        for my $bot (@bots){
            my @pages=sort keys %{$allowbybot{$bot}};
            my $ct=@pages;
            $txt.=qq(|-\n);
            $txt.=($bot eq $all || $bot eq $allcv) ? qq(|$bot\n) : qq(|{{user|$bot}}\n);
            $txt.=qq(|$ct\n);
            my $pages="\n*[[:".join("]]\n*[[:", @pages)."]]";
            $pages=qq({{hidden begin}}).$pages.qq(\n{{hidden end}}) if $ct>5;
            $txt.=qq(|$pages\n);
        }
        $txt.=qq(|}\n\n);
    }

    if(@multiple){
        $txt.=qq(==Pages$inns with multiple exclusion templates==\n);
        $txt.="*[[:".join("]]\n*[[:", sort @multiple)."]]\n\n";
    }

    if(@broken){
        $txt.=qq(==Pages$inns with broken exclusion templates==\n);
        $txt.="*[[:".join("]]\n*[[:", sort @broken)."]]\n\n";
    }

    $txt=~s/\s*$//;
    my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'}//''; $intxt=~s/\s*$//;

    if($txt ne $intxt){
        $api->log("Updating $title");
        my $r=$api->edit($tok, $txt, "Updating Hall of Shame", 0, 0);
        if($r->{'code'} ne 'success'){
            $api->warn("Write failed on $title: ".$r->{'error'}."\n");
            return 300;
        }
    }

    return undef;
}

1;