User:Fl/scripts/achewoodscriptc.php

<?php
/*********************************************************************\
* Achewood script c, converts previously made date based references 
* to [Cartoon name], Date, Achewood. References
*
* This file is part of a collection of scripts, created by Foxy Loxy
* <http://en.wikipedia.org/wiki/User:Foxy_Loxy> and stored at
* <http://en.wikipedia.org/wiki/User:Foxy_Loxy/scripts>.
*
* These scripts are free software: you can redistribute it and/or modify
* them under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* 
* These scripts are distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with these scripts.  If not, see <http://www.gnu.org/licenses/>.
\*********************************************************************/

require_once 		"pwiki.class.php";
$pw			= new PWiki;

function source_code ($url) {
  if (function_exists ('curl_init')) {
    $curl = curl_init ($url);
    curl_setopt ($curl, CURLOPT_HEADER, FALSE);
    curl_setopt ($curl, CURLOPT_RETURNTRANSFER, TRUE);
    curl_setopt ($curl, CURLOPT_FOLLOWLOCATION, TRUE);
    curl_setopt ($curl, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
    $source = curl_exec ($curl);
    curl_close ($curl);
    return $source;
  } else {
    return file_get_contents ($url);
  }
}


$page = $pw->getPage('Achewood');
preg_match_all("/<ref( name=(.*?)>|>)\[(http:\/\/(www\.a|a)chewood\.com\/(index.php\?|\?)date=([0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9])) (.*?)\]\. ''Achewood''<\/ref>/i", $page, $matches);
$i = 0;

$runs = count($matches[0]);
foreach($matches[0] as $match) {
  echo "Run ".($i+1)." of $runs started\n";
  $link = $matches[3][$i];
  $date = $matches[7][$i];
  $refend = $matches[1][$i];
  $achewood = source_code($link);
  preg_match_all('/<p id="comic_body">\s*<a href="(.*?)">(.*?)<\/p>/si', $achewood, $titlelink);
  $othersite = html_entity_decode($titlelink[1][0]);
  $othersitesource = source_code($othersite);
  preg_match_all('/<title>(.*?)<\/title>/si', $othersitesource, $titlearray);
  $title = $titlearray[1][0];
  $matchnew = "<ref".$refend."[".$link." ".$title."], ".$date.", ''Achewood''</ref>";
  $find[$i] = $match;
  $replace[$i] = $matchnew;
  $i++;
  echo "Run $i of $runs complete\n";
}

$far = "AchewoodFindAndReplace.txt";
$fh = fopen($far, 'w') or die("can't open file");
fwrite($fh, print_r($find)."\n\n".print_r($replace));
fclose($fh);

$newpage = str_replace($find, $replace, $page);

$wiki = "AchewoodWikitext.txt";
$fh = fopen($wiki, 'w') or die("can't open file");
fwrite($fh, $newpage);
fclose($fh);

$pw->_exit("End of script.");
?>