User:Polbot/source/stats.pl

use strict;
use Perlwikipedia;
use LWP::UserAgent;

my $firstletter = shift;

print "\nStarting polbot\n" ;
my $pw=Perlwikipedia->new();
#$pw->{debug} = 1;
$pw->{mech}->agent('Bot/WP/EN/Quadell/polbot');

print "Logging in\n";
my $login_status=$pw->login('Polbot','(bot password)');
die "I can't log in." unless ($login_status eq 0);

# Pull from FJC
print "Getting list of all judges starting with $firstletter\n";
my @judge_ids = ();
my $url = 'http://www.fjc.gov/servlet/tAsearch?lname=' . $firstletter;
print " $url\n";
my $ua = LWP::UserAgent->new;
$ua->agent("Mozilla/6.0");
my $res = $ua->get($url);
die "could not connect" unless ($res->is_success);
my $html = $res->content;
while ($html =~ m/<A HREF=\"\/servlet\/tGetInfo\?jid=(\d+)\">([^<]*)</g) {
	my $thisjid = $1;
	my $thisname = $2;
	push @judge_ids, $thisjid;
}

print scalar(@judge_ids) . " judges found.\n\n";
my $wiki_out;

foreach my $jid (@judge_ids) {
	my $url = "http:\/\/www.fjc.gov\/servlet\/tGetInfo\?jid=$jid";
	print "\n$jid: ";
	$res = $ua->get($url);
	die "could not connect" unless ($res->is_success);
	$html = $res->content;
	$html =~ s/\`/'/g;
	
	# Output variables
	my $name = "-";
	my $race = "-";
	my $gender = "-";
	
	# extract name
	$html =~ m/\<FONT SIZE\=\+1 COLOR\=BLACK\>\<B\>([^\n]*?) *\<\/B\>\<\/FONT\>/m;
	my $rev_name = $1;
	$rev_name =~ s/ +/ /g;
	$rev_name =~ s/\[//g;
	$rev_name =~ s/\]//g;
	$rev_name =~ m/^(.*?)\, (.*?)( Jr\.| Sr\.| II| III| IV)?$/;
	my $last_name = $1;
	$name = "[[$2 $last_name$3]]";		
	print "$name\n";
	
	# extract gender
	if ($html =~ m/<BR><B>Gender:<\/B> *([^<]+)</) {	
		$gender = $1;
	}

	# extract race
	if ($html =~ m/<BR><BR><B>Race or Ethnicity:<\/B> *([^<]+)</) {
		$race = $1;
	}
	
	# Extract judgeships
	if ($html =~ m/<B>Federal Judicial Service:<\/B><BR>(.*?)<BR>\s*<BR>\s*<B>/si) {
		my $ju_string = $1;
		my @jus = split(/ *<[Bb][Rr]><[Bb][Rr]> */, $ju_string);
		foreach my $ju (@jus) {

			# Output variables
			my $court = "-";
			my $president = "-";
			my $nominated_date = "-";
			my $recess_date = "-";
			my $confirmed_date = "-";
			my $commission_date = "-";
			my $senior_date = "-";
			my $end_date = "-";
			my $end_reason = "-";
			
			if ($ju =~ m/Judge, U\. S\. District Court, ([^<]*)<[Bb][Rr]>/) {
				$court = "[[United States District Court for the $1]]";
			} elsif ($ju =~ m/U\. S\. District Court for the District of Columbia/) {
				$court = "[[United States District Court for the District of Columbia]]";
			} elsif ($ju =~ m/Judge, U\. S\. Circuit Courts ([^<]*)<[Bb][Rr]>/) {
				$court = "[[United States circuit court]] $1";
			} elsif ($ju =~ m/Judge, Circuit Court for the District of Columbia([^<]*)<[Bb][Rr]>/) {
				$court = "[[United States circuit court]] for the D.C. Circuit";
		  } elsif ($ju =~ m/U\. S\. Court of Appeals for District of Columbia Circuit<[Bb][Rr]>/) {
				$court = "[[United States Court of Appeals for the D.C. Circuit]]";
			} elsif ($ju =~ m/Judge, U\. S\. Court of Appeals ([^<]*)<[Bb][Rr]>/) {
				$court = "[[United States Court of Appeals $1]]";
			} elsif ($ju =~ m/Supreme Court of the United States/) {
				$court = "[[Supreme Court of the United States]]";
			} 
			
			if ($ju =~ m/Nominated by (.*?) on (\w+ \d+, \d+), to/) {
				$president = "[[$1]]";
				$nominated_date = $2;
			} elsif ($ju =~ m/Received a recess appointment from (.*?) on (\w+ \d+, \d+), to/) {
				$president = "[[$1]]";
				$recess_date = $2;
				if ($ju =~ m/; nominated on (\w+ \d+, \d+);/) {
					$nominated_date = $1;
				}
			}
			
			if ($ju =~ m/Confirmed by the Senate on (\w+ \d+, \d+), and received commission on (\w+ \d+, \d+)\./) {
				$confirmed_date = $1;
				$commission_date = $2;
			}
			
			if ($ju =~ m/Assumed senior status on (\w+ \d+, \d+)\./) {
				$senior_date = $1;
			}
			
			if ($ju =~ m/Service terminated on (\w+ \d+, \d+), due to (.*?)\./) {
				$end_date = $1;
				$end_reason = $2;
				$end_reason =~ s/appointment to another judicial position/reappointment/;
			}
			
			if ($court ne "-") {
				$wiki_out .= "|-\n| $name || $court || $president || $recess_date || $nominated_date || $confirmed_date || $commission_date || $senior_date || $end_date || $end_reason || $race || $gender \n";
			}
		}
	}		
}

print "Writing... ";
my $listsofar = $pw->get_text("Wikipedia:WikiProject United States courts and judges/judgestats");
$listsofar .= $wiki_out;
$pw->edit("Wikipedia:WikiProject United States courts and judges/judgestats", $listsofar, "Adding judges that start with $firstletter");
print "done.\n";