User:Boothby (bot)/Source code

 * @copyright	Philip Withnall 2006 * @package	Boothby * @version	2.0.0 * @license	http://tecnocode.co.uk/links/sourcecode-license.html * @filesource */
 * 1) ! /usr/bin/php

// TODO: Standard cURL functions in class // TODO: curl_errno etc. // TODO: Perhaps move to curl_multi_* instead of threading? // TODO: Perhaps use curl_setopt_array instead of lots of curl_setopts? // TODO: Clean up memory usage // TODO: Task queues (from text files, command line, etc.) // TODO: Functionality to go through and convert all images to PNG (and change links to them) // TODO: Functionality to go through all images and produce a list of ones with likely bad names (e.g. only one word in the name, etc.) // TODO: Functionality to go through all pages, find the uncategorised main pages and add wikifications to them // TODO: Make it update its user page to say its status when it's working // TODO: Logout // TODO: Make it OS-independent

define('NAME','Boothby bot'); define('VERSION','2.0.0');

$boothby=new boothby; $boothby->rereference_templates('Final Fantasy VII Nav','Final Fantasy VII/Nav');

class boothby {	function __construct {		// Constructor: set up global variables

// Site details $this->base_url='http://strategywiki.net/'; $this->bot_username='Boothby (bot)'; $this->bot_password='password'; $this->bot_useragent='Boothby bot '.VERSION; $this->watch_articles=true;

$this->logged_in=false;

// System details $this->cookie_file='/var/tmp/boothby_cookies.txt'; $this->log_file='/var/log/boothby.txt';

// Upload limits $this->upload_delay=0; $this->last_upload=NULL;

// Testing $this->test_mode=false;

// Threading limits $this->thread_mode=false; $this->children=0; $this->max_children=10;

if($this->thread_mode) {			declare(ticks=1); pcntl_signal(SIGCHLD,array(& $this,"sig_handler")); // TODO: Sort this out //pcntl_signal(SIGNIT,array(& $this,"sig_handler")); }

$this->log_message('Starting up.'); }

function __destruct {		// Destructor $this->log_message('Shutting down.'); }

//==================================================================================================	// Utility functions //==================================================================================================

function login {		// Login if($this->logged_in) return; else $this->logged_in=true;

$this->log_message('Logging in.');

if($this->test_mode) return;

$post_vars=array(			'wpName'=>$this->bot_username,			'wpPassword'=>$this->bot_password,			'wpLoginattempt'=>'Log in',			'wpRemember'=>true,			'wpRetype'=>,			'wpEmail'=>		);

$ch=curl_init($this->base_url.'w/index.php?title=Special:Userlogin&action=submitlogin&type=login'); curl_setopt($ch,CURLOPT_COOKIEFILE,$this->cookie_file); curl_setopt($ch,CURLOPT_COOKIEJAR,$this->cookie_file); curl_setopt($ch,CURLOPT_POST,true); curl_setopt($ch,CURLOPT_HEADER,true); curl_setopt($ch,CURLOPT_RETURNTRANSFER,true); curl_setopt($ch,CURLOPT_POSTFIELDS,$post_vars); curl_setopt($ch,CURLOPT_REFERER,$this->base_url.'wiki/Special:Userlogin'); curl_setopt($ch,CURLOPT_USERAGENT,$this->bot_useragent); $response=curl_exec($ch); curl_close($ch); return $response; }

function download_url($url) {		// Download URL $ch=curl_init; curl_setopt($ch,CURLOPT_URL,$url); curl_setopt($ch,CURLOPT_HEADER,false); curl_setopt($ch,CURLOPT_RETURNTRANSFER,true); curl_setopt($ch,CURLOPT_COOKIEFILE,$this->cookie_file); curl_setopt($ch,CURLOPT_COOKIEJAR,$this->cookie_file); curl_setopt($ch,CURLOPT_USERAGENT,$this->bot_useragent); $response=curl_exec($ch); curl_close($ch); return $response; }

function download_page($article_name) {		// Download page return $this->download_url($this->base_url.'wiki/'.$this->spaces_to_underscores($article_name)); }

function download_file($file_name) {		// Download file $file_page=$this->download_page('Image:'.$this->spaces_to_underscores($file_name)); $matches=array; preg_match('# underscores_to_spaces($file_name).'">'.$file_name.'#',$file_page,$matches); return $this->download_url($this->base_url.$matches[1]); }

function download_edit_page($article_name) {		// Download edit page return $this->download_url($this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=edit'); }

function download_move_page($article_name) {		// Download move page return $this->download_url($this->base_url.'wiki/Special:Movepage/'.$this->spaces_to_underscores($article_name)); }

function download_delete_page($article_name) {		// Download delete page return $this->download_url($this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=delete'); }

function download_allpages_page($from=NULL,$namespace=0) {		// Download allpages page return $this->download_url($this->base_url.'w/index.php?title=Special:Allpages&namespace='.intval($namespace).((!is_null($from))?'&from='.$this->spaces_to_underscores($from):'')); }

function download_whatlinkshere_page($article_name) {		// Download whatlinkshere page return $this->download_url($this->base_url.'wiki/Special:Whatlinkshere/'.$this->spaces_to_underscores($article_name)); }

function extract_category_articles($category_page) {		// Extract member articles from category page $matches=array; preg_match('/<a name="articles"(.+)<div class="printfooter"/smU',$category_page,$matches); $matches2=array; preg_match_all('|href="/wiki/(.+)"|U',$matches[1],$matches2); return $matches2[1]; }

function extract_allpages_articles($allpages_page) {		// Extract page links from allpages page $matches=array; preg_match('| (.+)\)\.(.+)|smU',$whatlinkshere_page,$matches);		$matches2=array;		preg_match_all('|href="/wiki/(.+)"|U',$matches[1],$matches2);		return $matches2[1];	}

function extract_edit_keys($edit_page) {		// Extract edit page variables $matches=array; preg_match('/value="([0-9]+)" name="wpStarttime".+value="([0-9]+)" name="wpEdittime".+value="([a-zA-Z0-9]+)" name="wpEditToken".+name="wpAutoSummary" value="([a-zA-Z0-9]+)"/sm',$edit_page,$matches); array_shift($matches); return $matches; }

function extract_edit_content($edit_page) {		// Extract article wikimarkup from edit page $matches=array; preg_match('|cols=\'80\' >(.*) |smU',$edit_page,$matches); return htmlspecialchars_decode($matches[1]); }

function extract_move_keys($move_page) {		// Extract move page variables $matches=array; preg_match('/name=\'wpEditToken\' value="([a-zA-Z0-9]*)"/',$move_page,$matches); return $matches[1]; }

function extract_delete_keys($delete_page) {		// Extract delete page variables $matches=array; preg_match('/name=\'wpEditToken\' value="([a-zA-Z0-9]*)"/',$delete_page,$matches); return $matches[1]; }

function upload_file($file_name,$new_file_name,$summary) {		// Upload a file if((!is_null($this->last_upload)) && ($this->upload_delay>0)) {			// Wait until we're allowed to upload again usleep(microtime_float-($this->last_upload+$this->upload_delay)); $this->last_upload=microtime_float; }

$this->log_message('Uploading file Image:'.$new_file_name.', file summary "'.$summary.'".');

if($this->test_mode) return;

$post_vars=array(			'wpUploadFile'=>'@'.$file_name,			'wpDestFile'=>$new_file_name,			'wpUploadDescription'=>$summary,			'wpWatchthis'=>($this->watch_articles)?'CHECKED':'',			'wpIgnorewarning'=>'CHECKED',			'wpUpload'=>'Upload file'		);

$ch=curl_init($this->base_url.'wiki/Special:Upload'); curl_setopt($ch,CURLOPT_COOKIEFILE,$this->cookie_file); curl_setopt($ch,CURLOPT_COOKIEJAR,$this->cookie_file); curl_setopt($ch,CURLOPT_POST,true); curl_setopt($ch,CURLOPT_HEADER,true); curl_setopt($ch,CURLOPT_RETURNTRANSFER,true); curl_setopt($ch,CURLOPT_POSTFIELDS,$post_vars); curl_setopt($ch,CURLOPT_REFERER,$this->base_url.'wiki/Special:Upload'); curl_setopt($ch,CURLOPT_USERAGENT,$this->bot_useragent); $response=curl_exec($ch); curl_close($ch); return $response; }

function upload_edit_page($article_name,$edit_keys,$edit_content,$summary,$minor_edit) {		// Upload edited page if((!is_null($this->last_upload)) && ($this->upload_delay>0)) {			// Wait until we're allowed to upload again usleep(microtime_float-($this->last_upload+$this->upload_delay)); $this->last_upload=microtime_float; }

$this->log_message('Uploading page '.$article_name.', edit summary "'.$summary.'"'.(($minor_edit)?' (minor)':'').'.');

if($this->test_mode) return;

$post_vars=array(			'wpSection'=>,			'wpStarttime'=>$edit_keys[0],			'wpEdittime'=>$edit_keys[1],			'wpScrolltop'=>,			'wpTextbox1'=>$edit_content,			'wpSummary'=>$summary,			'wpMinoredit'=>($minor_edit)?'CHECKED':,			'wpWatchthis'=>($this->watch_articles)?'CHECKED':,			'wpSave'=>'Save page',			'wpEditToken'=>$edit_keys[2],			'wpAutoSummary'=>$edit_keys[3]		);

$ch=curl_init($this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=submit'); curl_setopt($ch,CURLOPT_COOKIEFILE,$this->cookie_file); curl_setopt($ch,CURLOPT_COOKIEJAR,$this->cookie_file); curl_setopt($ch,CURLOPT_POST,true); curl_setopt($ch,CURLOPT_HEADER,true); curl_setopt($ch,CURLOPT_RETURNTRANSFER,true); curl_setopt($ch,CURLOPT_POSTFIELDS,$post_vars); curl_setopt($ch,CURLOPT_REFERER,$this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=edit'); curl_setopt($ch,CURLOPT_USERAGENT,$this->bot_useragent); $response=curl_exec($ch); curl_close($ch); return $response; }

function upload_delete_page($article_name,$edit_token,$summary) {		// Upload deleted page form if((!is_null($this->last_upload)) && ($this->upload_delay>0)) {			// Wait until we're allowed to upload again usleep(microtime_float-($this->last_upload+$this->upload_delay)); $this->last_upload=microtime_float; }

$this->log_message('Deleting page '.$article_name.', delete summary "'.$summary.'".');

if($this->test_mode) return;

$post_vars=array(			'wpReason'=>$summary,			'wpConfirmB'=>'Delete page',			'wpEditToken'=>$edit_token		);

$ch=curl_init($this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=delete'); curl_setopt($ch,CURLOPT_COOKIEFILE,$this->cookie_file); curl_setopt($ch,CURLOPT_COOKIEJAR,$this->cookie_file); curl_setopt($ch,CURLOPT_POST,true); curl_setopt($ch,CURLOPT_HEADER,true); curl_setopt($ch,CURLOPT_RETURNTRANSFER,true); curl_setopt($ch,CURLOPT_POSTFIELDS,$post_vars); curl_setopt($ch,CURLOPT_REFERER,$this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=delete'); curl_setopt($ch,CURLOPT_USERAGENT,$this->bot_useragent); $response=curl_exec($ch); curl_close($ch); return $response; }

function upload_move_page($article_name,$new_article_name,$edit_token,$summary) {		// Upload page to move it		if((!is_null($this->last_upload)) && ($this->upload_delay>0)) {			// Wait until we're allowed to upload again usleep(microtime_float-($this->last_upload+$this->upload_delay)); $this->last_upload=microtime_float; }

$this->log_message('Moving page '.$article_name.' to '.$new_article_name.', edit summary "'.$summary.'".');

if($this->test_mode) return;

$post_vars=array(			'wpNewTitle'=>$new_article_name,			'wpOldTitle'=>$article_name,			'wpReason'=>$summary,			'wpEditToken'=>$edit_token,			'wpTextbox1'=>$edit_content,			'wpMove'=>'Move page'		);

$ch=curl_init($this->base_url.'w/index.php?title=Special:Movepage&action=submit'); curl_setopt($ch,CURLOPT_COOKIEFILE,$this->cookie_file); curl_setopt($ch,CURLOPT_COOKIEJAR,$this->cookie_file); curl_setopt($ch,CURLOPT_POST,true); curl_setopt($ch,CURLOPT_HEADER,true); curl_setopt($ch,CURLOPT_RETURNTRANSFER,true); curl_setopt($ch,CURLOPT_POSTFIELDS,$post_vars); curl_setopt($ch,CURLOPT_REFERER,$this->base_url.'wiki/Special:Movepage/'.$article_name); curl_setopt($ch,CURLOPT_USERAGENT,$this->bot_useragent); $response=curl_exec($ch); curl_close($ch); return $response; }

function log_message($message) {		// Log a message $fh=fopen($this->log_file,'a'); fwrite($fh,date('M j G:i:s').' localhost boothby.php['.posix_getpid.']: Message: '.$message."\n"); fclose($fh); echo $message."\n"; }

function log_error($error) {		// Log an error (non-fatal) $fh=fopen($this->log_file,'a'); fwrite($fh,date('M j G:i:s').' localhost boothby.php['.posix_getpid.']: Error: '.$error."\n"); fclose($fh); echo 'Error: '.$error."\n"; }

function log_fatal($error) {		// Log a fatal error $fh=fopen($this->log_file,'a'); fwrite($fh,date('M j G:i:s').' localhost boothby.php['.posix_getpid.']: Fatal error: '.$error."\n"); fclose($fh); echo 'Fatal error: '.$error."\n"; die; }

function microtime_float {		list($usec,$sec)=explode(' ',microtime); return ((float)$usec+(float)$sec); }

function sig_handler($sig_number) {		switch($sig_number) {			case SIGNIT: // Ctrl+C $this->log_error('Terminated in-process.'); exit(0); break; case SIGCHLD: // A child process has terminated $this->children--; break; default: break; }	}

function underscores_to_spaces($text) {		return str_replace('_',' ',$text); }

function spaces_to_underscores($text) {		return str_replace(' ','_',$text); }

//==================================================================================================	// Process functions //==================================================================================================

function recategorise_pages($category_name,$new_category_name) {		// Move all the pages in a category to a new category // TODO: Support for moving the category page as well // TODO: Support for sub-categories and test it with categories other than image categories $this->login; $category_content=$this->download_page('Category:'.$category_name); $page_list=$this->extract_category_articles($category_content);

$this->log_message('Moving '.count($page_list).' pages from Category:'.$category_name.' to Category:'.$new_category_name.'.');

foreach($page_list as $page_name) {			if($this->thread_mode) {				while($this->children>=$this->max_children) sleep(5); $this->children++;

$pid=pcntl_fork; }			if(($this->thread_mode) && ($pid==-1)) $this->log_fatal('Failed to fork process.'); elseif(($this->thread_mode) && ($pid)) {				// We're the parent pcntl_wait($status); }			else {				// We're the child, or threading is disabled $page_content=$this->download_edit_page($page_name); $this->upload_edit_page($page_name,$this->extract_edit_keys($page_content),preg_replace('/\[\[?'.$category_name.'(\|[^\]]*)?\]\]/','$1'.$new_category_name.'$2',$this->extract_edit_content($page_content)),'Moved to Category:'.$new_category_name.' (bot edit)',false); if($this->thread_mode) exit(0); }		}	}

function relink_pages($article_name,$new_article_name) {		// Change the links in pages which link to the old article to point to the new article $this->login; $whatlinkshere_content=$this->download_whatlinkshere_page($article_name); $page_list=$this->extract_whatlinkshere_articles($whatlinkshere_content);

$this->log_message('Re-linking '.count($page_list).' pages from '.$article_name.' to '.$new_article_name.'.');

foreach($page_list as $page_name) {			if($this->thread_mode) {				while($this->children>=$this->max_children) sleep(5); $this->children++;

$pid=pcntl_fork; }			if(($this->thread_mode) && ($pid==-1)) $this->log_fatal('Failed to fork process.'); elseif(($this->thread_mode) && ($pid)) {				// We're the parent pcntl_wait($status); }			else {				// We're the child, or threading is disabled $page_content=$this->download_edit_page($page_name); $this->upload_edit_page($page_name,$this->extract_edit_keys($page_content),preg_replace('/\[\[?'.$article_name.'(\|[^\]]*)?\]\]/','$1'.$new_article_name.'$2',$this->extract_edit_content($page_content)),'Re-linked to '.$new_article_name.' (bot edit)',false); if($this->thread_mode) exit(0); }		}	}

function rereference_templates($template_name,$new_template_name) {		// Change the references in pages which reference the old template to the new template // TODO: Support for moving the category page as well $this->login; $whatlinkshere_content=$this->download_whatlinkshere_page('Template:'.$template_name); $page_list=$this->extract_whatlinkshere_articles($whatlinkshere_content);

$this->log_message('Re-referencing '.count($page_list).' pages with Template:'.$template_name.' to Template:'.$new_template_name.'.');

foreach($page_list as $page_name) {			if($this->thread_mode) {				while($this->children>=$this->max_children) sleep(5); $this->children++;

$pid=pcntl_fork; }			if(($this->thread_mode) && ($pid==-1)) $this->log_fatal('Failed to fork process.'); elseif(($this->thread_mode) && ($pid)) {				// We're the parent pcntl_wait($status); }			else {				// We're the child, or threading is disabled $page_content=$this->download_edit_page($page_name); $this->upload_edit_page($page_name,$this->extract_edit_keys($page_content),preg_replace('/\{\{'.$template_name.'(\|[^\}]*)?\}\}/i','',$this->extract_edit_content($page_content)),'Re-referenced to Template:'.$new_template_name.' (bot edit)',false); if($this->thread_mode) exit(0); }		}	}

function move_page($page_name,$new_page_name,$move_subpages=true) {		// Move a page (and sub-pages) $this->login; $allpages_content=$this->download_allpages_page($page_name,0); $page_list=$this->extract_allpages_articles($allpages_content);

$this->log_message('Moving '.$page_name.' '.(($move_subpages)?'and its sub-pages ':'').'('.count($page_list).' pages in total) to '.$new_page_name.'.');

foreach($page_list as $_page_name) {			if($this->thread_mode) {				while($this->children>=$this->max_children) sleep(5); $this->children++;

$pid=pcntl_fork; }			if(($this->thread_mode) && ($pid==-1)) $this->log_fatal('Failed to fork process.'); elseif(($this->thread_mode) && ($pid)) {				// We're the parent pcntl_wait($status); }			else {				// We're the child, or threading is disabled if(($_page_name==$page_name) || (($move_subpages) && (substr($_page_name,0,strpos($_page_name,'/'))==$page_name))) {					$_new_page_name=str_replace($page_name,$new_page_name,$_page_name); $page_content=$this->download_move_page($_page_name); $this->upload_move_page($_page_name,$_new_page_name,$this->extract_move_keys($_page_name),'Moved to '.$_new_page_name.' (bot edit)'); $this->relink_pages($_page_name,$_new_page_name); }				if($this->thread_mode) exit(0); }		}	}

function move_file($file_name,$new_file_name) {		// Move a file $this->login; $this->log_message('Moving Image:'.$file_name.' to '.$new_file_name.'.');

$temp_file_name=tempnam('./','temp_'); $temp_file=fopen($temp_file_name,'w'); fwrite($temp_file,$this->download_file($file_name)); fclose($temp_file);

$this->upload_file($temp_file_name,$new_file_name,$this->extract_edit_content($this->download_edit_page('Image:'.$file_name))); //'Moved from Image:'.$file_name.' (bot edit)'

unlink($temp_file_name);

$this->upload_delete_page('Image:'.$file_name,$this->extract_delete_keys($this->download_delete_page('Image:'.$file_name)),'Moved to Image:'.$new_file_name.' (bot edit)'); $this->relink_pages('Image:'.$file_name,'Image:'.$new_file_name); } } ?>