User:Boothby (bot)/Source code

 * @copyright	Philip Withnall 2006 * @package	Boothby * @version	2.0.0 * @license	http://tecnocode.co.uk/links/sourcecode-license.html * @filesource */
 * 1) ! /usr/bin/php

/* TODO: Core functionality: - Perhaps move to curl_multi_* instead of threading? - Perhaps use curl_setopt_array instead of lots of curl_setopts? - Clean up memory usage - Task queues (from text files, command line, etc.) - Make it OS-independent - Colour console output - Stop it editing pages in the StrategyWiki, MediaWiki, *_talk namespaces unless explicitly told to	Processes: - Functionality to go through and convert all images to PNG (and change links to them) - Functionality to go through all images and produce a list of ones with likely bad names (e.g. only one word in the name, etc.) - Functionality to go through all pages, find the uncategorised main pages and add wikifications to them

define('NAME','Boothby bot'); define('VERSION','2.0.0');

$boothby=new boothby; $boothby->move_page('CCLP2','Chip\'s Challenge Level Pack 2',true,true);

class boothby {	function __construct {		// Constructor: set up global variables

// Site details $this->base_url='http://strategywiki.net/'; $this->bot_username='Boothby (bot)'; $this->bot_password='password'; $this->bot_useragent='Boothby bot '.VERSION; $this->watch_articles=true;

$this->current_task=NULL;

// System details $this->cookie_file='/var/tmp/boothby_cookies.txt'; $this->log_file='/var/log/boothby.txt';

// Download limits $this->download_delay=500000; $this->last_download=NULL;

// Upload limits $this->upload_delay=5000000; $this->last_upload=NULL;

// Testing $this->test_mode=false;

// Threading limits $this->thread_mode=false; $this->children=0; $this->max_children=10;

if($this->thread_mode) {			declare(ticks=1); pcntl_signal(SIGCHLD,array(& $this,"sig_handler")); // TODO: Sort this out //pcntl_signal(SIGNIT,array(& $this,"sig_handler")); }

$this->log_message('Starting up.'); $this->login; }

function __destruct {		// Destructor $this->update_task; $this->logout; $this->log_message('Shutting down.'); }

//==================================================================================================	// Utility functions //==================================================================================================

function login {		// Login $this->log_message('Logging in.'); if($this->test_mode) return;

$post_vars=array(			'wpName'=>$this->bot_username,			'wpPassword'=>$this->bot_password,			'wpLoginattempt'=>'Log in',			'wpRemember'=>true,			'wpRetype'=>,			'wpEmail'=>		);

return $this->upload_url($this->base_url.'w/index.php?title=Special:Userlogin&action=submitlogin&type=login',$post_vars,$this->base_url.'wiki/Special:Userlogin'); }

function logout {		// Logout $this->log_message('Logging out.'); if($this->test_mode) return;

return $this->download_url($this->base_url.'wiki/Special:Userlogout'); }

function download_url($url) {		// Download URL $this->download_wait;

$ch=curl_init($url); curl_setopt($ch,CURLOPT_COOKIEFILE,$this->cookie_file); curl_setopt($ch,CURLOPT_COOKIEJAR,$this->cookie_file); curl_setopt($ch,CURLOPT_HEADER,false); curl_setopt($ch,CURLOPT_RETURNTRANSFER,true); curl_setopt($ch,CURLOPT_USERAGENT,$this->bot_useragent); $response=curl_exec($ch);

if(curl_errno($ch)) $this->log_error(curl_error($ch)); curl_close($ch);

return $response; }

function download_wait {		// Wait until we're allowed to download again if($this->download_delay>0) {			if(is_null($this->last_download)) {				$this->last_download=$this->microtime_float; return; }			else {				usleep($this->last_download+$this->download_delay-$this->microtime_float); $this->last_download=$this->microtime_float; }		}	}

function upload_url($url,$post_vars,$referrer) {		// Upload URL if($this->test_mode) return; $this->upload_wait;

$ch=curl_init($url); curl_setopt($ch,CURLOPT_COOKIEFILE,$this->cookie_file); curl_setopt($ch,CURLOPT_COOKIEJAR,$this->cookie_file); curl_setopt($ch,CURLOPT_POST,true); curl_setopt($ch,CURLOPT_HEADER,true); curl_setopt($ch,CURLOPT_RETURNTRANSFER,true); curl_setopt($ch,CURLOPT_POSTFIELDS,$post_vars); curl_setopt($ch,CURLOPT_REFERER,$referrer); curl_setopt($ch,CURLOPT_USERAGENT,$this->bot_useragent); $response=curl_exec($ch);

if(curl_errno($ch)) $this->log_error(curl_error($ch)); curl_close($ch);

return $response; }

function upload_wait {		// Wait until we're allowed to upload again if($this->upload_delay>0) {			if(is_null($this->last_upload)) {				$this->last_upload=$this->microtime_float; return; }			else {				usleep($this->last_upload+$this->upload_delay-$this->microtime_float); $this->last_upload=$this->microtime_float; }		}	}

function download_page($article_name) {		// Download page return $this->download_url($this->base_url.'wiki/'.$this->spaces_to_underscores($article_name)); }

function download_file($file_name) {		// Download file $file_page=$this->download_page('Image:'.$this->spaces_to_underscores($file_name)); $matches=array; preg_match('| underscores_to_spaces($file_name)).'">'.$file_name.'|',$file_page,$matches); return $this->download_url($this->base_url.$matches[1]); }

function download_edit_page($article_name) {		// Download edit page return $this->download_url($this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=edit'); }

function download_move_page($article_name) {		// Download move page return $this->download_url($this->base_url.'wiki/Special:Movepage/'.$this->spaces_to_underscores($article_name)); }

function download_delete_page($article_name) {		// Download delete page return $this->download_url($this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=delete'); }

function download_allpages_page($from=NULL,$namespace=0) {		// Download allpages page return $this->download_url($this->base_url.'w/index.php?title=Special:Allpages&namespace='.intval($namespace).((!is_null($from))?'&from='.$this->spaces_to_underscores($from):'')); }

function download_whatlinkshere_page($article_name) {		// Download whatlinkshere page return $this->download_url($this->base_url.'wiki/Special:Whatlinkshere/'.$this->spaces_to_underscores($article_name)); }

function extract_category_articles($category_page) {		// Extract member articles from category page $matches=array; preg_match('|<a name="articles"(.+)<div class="printfooter"|smU',$category_page,$matches); $matches2=array; preg_match_all('|href="/wiki/(.+)"|U',$matches[1],$matches2); return $matches2[1]; }

function extract_allpages_articles($allpages_page) {		// Extract page links from allpages page $matches=array; preg_match('| (.+)\)\.(.+)|smU',$whatlinkshere_page,$matches);		$matches2=array;		preg_match_all('|href="/wiki/(.+)"|U',$matches[1],$matches2);		return $matches2[1];	}

function extract_edit_keys($edit_page) {		// Extract edit page variables $matches=array; preg_match('|value="([0-9]+)" name="wpStarttime".+value="([0-9]+)" name="wpEdittime".+value="([a-zA-Z0-9]+)" name="wpEditToken".+name="wpAutoSummary" value="([a-zA-Z0-9]+)"|sm',$edit_page,$matches); array_shift($matches); return $matches; }

function extract_edit_content($edit_page) {		// Extract article wikimarkup from edit page $matches=array; preg_match('|cols=\'80\' >(.*) |smU',$edit_page,$matches); return htmlspecialchars_decode($matches[1]); }

function extract_move_keys($move_page) {		// Extract move page variables $matches=array; preg_match('|name=\'wpEditToken\' value="([a-zA-Z0-9]*)"|',$move_page,$matches); return $matches[1]; }

function extract_delete_keys($delete_page) {		// Extract delete page variables $matches=array; preg_match('|name=\'wpEditToken\' value="([a-zA-Z0-9]*)"|',$delete_page,$matches); return $matches[1]; }

function upload_file($file_name,$new_file_name,$summary) {		// Upload a file $this->log_message('Uploading file Image:'.$new_file_name.', file summary "'.$summary.'".');

$post_vars=array(			'wpUploadFile'=>'@'.$file_name,			'wpDestFile'=>$new_file_name,			'wpUploadDescription'=>$summary,			'wpWatchthis'=>($this->watch_articles)?'CHECKED':'',			'wpIgnorewarning'=>'CHECKED',			'wpUpload'=>'Upload file'		);

return $this->upload_url($this->base_url.'wiki/Special:Upload',$post_vars,$this->base_url.'wiki/Special:Upload'); }

function upload_edit_page($article_name,$edit_keys,$edit_content,$summary,$minor_edit) {		// Upload edited page $this->log_message('Uploading page '.$article_name.', edit summary "'.$summary.'"'.(($minor_edit)?' (minor)':'').'.');

$post_vars=array(			'wpSection'=>,			'wpStarttime'=>$edit_keys[0],			'wpEdittime'=>$edit_keys[1],			'wpScrolltop'=>,			'wpTextbox1'=>$edit_content,			'wpSummary'=>$summary,			'wpMinoredit'=>($minor_edit)?'CHECKED':,			'wpWatchthis'=>($this->watch_articles)?'CHECKED':,			'wpSave'=>'Save page',			'wpEditToken'=>$edit_keys[2],			'wpAutoSummary'=>$edit_keys[3]		);

return $this->upload_url($this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=submit',$post_vars,$this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=edit'); }

function upload_delete_page($article_name,$edit_token,$summary) {		// Upload deleted page form $this->log_message('Deleting page '.$article_name.', delete summary "'.$summary.'".');

$post_vars=array(			'wpReason'=>$summary,			'wpConfirmB'=>'Delete page',			'wpEditToken'=>$edit_token		);

return $this->upload_url($this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=delete',$post_vars,$this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=delete'); }

function upload_move_page($article_name,$new_article_name,$edit_token,$summary) {		// Upload page to move it		$this->log_message('Moving page '.$article_name.' to '.$new_article_name.', edit summary "'.$summary.'".');

$post_vars=array(			'wpNewTitle'=>$new_article_name,			'wpOldTitle'=>$article_name,			'wpReason'=>$summary,			'wpMove'=>'Move page',			'wpEditToken'=>$edit_token		);

return $this->upload_url($this->base_url.'w/index.php?title=Special:Movepage&action=submit',$post_vars,$this->base_url.'wiki/Special:Movepage/'.$this->spaces_to_underscores($article_name)); }

function log_message($message) {		// Log a message if(!$this->test_mode) {			$fh=fopen($this->log_file,'a'); fwrite($fh,date('M j G:i:s').' localhost boothby.php['.posix_getpid.']: Message: '.$message."\n"); fclose($fh); }		echo $message."\n"; }

function log_error($error) {		// Log an error (non-fatal) if(!$this->test_mode) {			$fh=fopen($this->log_file,'a'); fwrite($fh,date('M j G:i:s').' localhost boothby.php['.posix_getpid.']: Error: '.$error."\n"); fclose($fh); }		echo 'Error: '.$error."\n"; }

function log_fatal($error) {		// Log a fatal error if(!$this->test_mode) {			$fh=fopen($this->log_file,'a'); fwrite($fh,date('M j G:i:s').' localhost boothby.php['.posix_getpid.']: Fatal error: '.$error."\n"); fclose($fh); }		echo 'Fatal error: '.$error."\n"; die; }

function update_task($task=NULL) {		// Update the current task on the user page if(is_null($task)) $task='inactive'; if($this->current_task==$task) return; else $this->current_task=$task;

$edit_page=$this->download_edit_page('User:'.$this->bot_username); $this->upload_edit_page('User:'.$this->bot_username,$this->extract_edit_keys($edit_page),preg_replace('|\'\'\'Current task\'\'\': .*|','\'\'\'Current task\'\'\': '.$task,$this->extract_edit_content($edit_page)),'Updated task (bot edit)',true); }

function log_message_and_update_task($message) {		// Log a message and update the current task on the user page $this->log_message($message); $this->update_task($message); }

function microtime_float {		list($usec,$sec)=explode(' ',microtime); return ((float)$usec+(float)$sec); }

function sig_handler($sig_number) {		switch($sig_number) {			case SIGNIT: // Ctrl+C $this->log_error('Terminated in-process.'); exit(0); break; case SIGCHLD: // A child process has terminated $this->children--; break; default: break; }	}

function underscores_to_spaces($text) {		return str_replace('_',' ',$text); }

function spaces_to_underscores($text) {		return str_replace(' ','_',$text); }

//==================================================================================================	// Process functions //==================================================================================================

function recategorise_pages($category_name,$new_category_name) {		// Move all the pages in a category to a new category // TODO: Support for moving the category page as well // TODO: Support for sub-categories and test it with categories other than image categories $category_content=$this->download_page('Category:'.$category_name); $page_list=$this->extract_category_articles($category_content);

$this->log_message_and_update_task('Moving '.count($page_list).' pages from Category:'.$category_name.' to Category:'.$new_category_name.'.');

foreach($page_list as $page_name) {			if($this->thread_mode) {				while($this->children>=$this->max_children) sleep(5); $this->children++;

$pid=pcntl_fork; }			if(($this->thread_mode) && ($pid==-1)) $this->log_fatal('Failed to fork process.'); elseif(($this->thread_mode) && ($pid)) {				// We're the parent pcntl_wait($status); }			else {				// We're the child, or threading is disabled $page_content=$this->download_edit_page($page_name); $this->upload_edit_page($page_name,$this->extract_edit_keys($page_content),preg_replace('|\[\[?'.str_replace('|','\|',$category_name).'(\|[^\]]*)?\]\]|','$1'.$new_category_name.'$2',$this->extract_edit_content($page_content)),'Moved to Category:'.$new_category_name.' (bot edit)',false); if($this->thread_mode) exit(0); }		}	}

function relink_pages($article_name,$new_article_name) {		// Change the links in pages which link to the old article to point to the new article $whatlinkshere_content=$this->download_whatlinkshere_page($article_name); $page_list=$this->extract_whatlinkshere_articles($whatlinkshere_content);

$this->log_message_and_update_task('Re-linking '.count($page_list).' pages from '.$article_name.' to '.$new_article_name.'.');

foreach($page_list as $page_name) {			if($this->thread_mode) {				while($this->children>=$this->max_children) sleep(5); $this->children++;

$pid=pcntl_fork; }			if(($this->thread_mode) && ($pid==-1)) $this->log_fatal('Failed to fork process.'); elseif(($this->thread_mode) && ($pid)) {				// We're the parent pcntl_wait($status); }			else {				// We're the child, or threading is disabled $page_content=$this->download_edit_page($page_name); $this->upload_edit_page($page_name,$this->extract_edit_keys($page_content),preg_replace('|\[\[?'.str_replace('|','\|',$article_name).'(\|[^\]]*)?\]\]|','$1'.$new_article_name.'$2',$this->extract_edit_content($page_content)),'Re-linked to '.$new_article_name.' (bot edit)',false); if($this->thread_mode) exit(0); }		}	}

function rereference_templates($template_name,$new_template_name) {		// Change the references in pages which reference the old template to the new template // TODO: Support for moving the template as well $whatlinkshere_content=$this->download_whatlinkshere_page('Template:'.$template_name); $page_list=$this->extract_whatlinkshere_articles($whatlinkshere_content);

$this->log_message_and_update_task('Re-referencing '.count($page_list).' pages with Template:'.$template_name.' to Template:'.$new_template_name.'.');

foreach($page_list as $page_name) {			if($this->thread_mode) {				while($this->children>=$this->max_children) sleep(5); $this->children++;

$pid=pcntl_fork; }			if(($this->thread_mode) && ($pid==-1)) $this->log_fatal('Failed to fork process.'); elseif(($this->thread_mode) && ($pid)) {				// We're the parent pcntl_wait($status); }			else {				// We're the child, or threading is disabled $page_content=$this->download_edit_page($page_name); $this->upload_edit_page($page_name,$this->extract_edit_keys($page_content),preg_replace('|\{\{'.str_replace('|','\|',$template_name).'(\|[^\}]*)?\}\}|i','',$this->extract_edit_content($page_content)),'Re-referenced to Template:'.$new_template_name.' (bot edit)',false); if($this->thread_mode) exit(0); }		}	}

function move_page($page_name,$new_page_name,$move_subpages=true,$delete_redirects=false) {		// Move a page (and sub-pages) $allpages_content=$this->download_allpages_page($page_name,0); $page_list=$this->extract_allpages_articles($allpages_content);

$_page_list=array; foreach($page_list as $_page_name) {			$_page_name=$this->underscores_to_spaces($_page_name); if(($_page_name==$page_name) || (($move_subpages) && (substr($_page_name,0,strpos($_page_name,'/'))==$page_name))) $_page_list[]=$_page_name; }

$this->log_message_and_update_task('Moving '.$page_name.' '.(($move_subpages)?'and its sub-pages ':'').'('.count($_page_list).' pages in total) to '.$new_page_name.'.');

foreach($_page_list as $_page_name) {			if($this->thread_mode) {				while($this->children>=$this->max_children) sleep(5); $this->children++;

$pid=pcntl_fork; }			if(($this->thread_mode) && ($pid==-1)) $this->log_fatal('Failed to fork process.'); elseif(($this->thread_mode) && ($pid)) {				// We're the parent pcntl_wait($status); }			else {				// We're the child, or threading is disabled $_new_page_name=str_replace($page_name,$new_page_name,$_page_name); $page_content=$this->download_move_page($_page_name); $this->upload_move_page($_page_name,$_new_page_name,$this->extract_move_keys($page_content),'Moved to '.$_new_page_name.' (bot edit)'); $this->relink_pages($_page_name,$_new_page_name);

// TODO: Move this out to delete_page function, making sure to set delete_subpages FALSE! if($delete_redirects) {					$this->upload_delete_page($_page_name,$this->extract_delete_keys($this->download_delete_page($_page_name)),'Moved to '.$_new_page_name.' (bot edit)'); }				if($this->thread_mode) exit(0); }		}	}

function move_file($file_name,$new_file_name) {		// Move a file $this->log_message_and_update_task('Moving Image:'.$file_name.' to '.$new_file_name.'.');

$temp_file_name=tempnam('./','temp_'); $temp_file=fopen($temp_file_name,'w'); fwrite($temp_file,$this->download_file($file_name)); fclose($temp_file);

$this->upload_file($temp_file_name,$new_file_name,$this->extract_edit_content($this->download_edit_page('Image:'.$file_name))); //'Moved from Image:'.$file_name.' (bot edit)'

unlink($temp_file_name);

$this->upload_delete_page('Image:'.$file_name,$this->extract_delete_keys($this->download_delete_page('Image:'.$file_name)),'Moved to Image:'.$new_file_name.' (bot edit)'); $this->relink_pages('Image:'.$file_name,'Image:'.$new_file_name); } } ?>