From StrategyWiki, the free strategy guide and walkthrough wiki
#! /usr/bin/php
<?php
/**
* Boothby
*
* A bot for automating boring and repetitive tasks, and making
* maintenance easier on StrategyWiki (http://strategywiki.net/).
*
* @author Philip Withnall <drbob@tecnocode.co.uk>
* @copyright Philip Withnall 2006
* @package Boothby
* @version 3.0.0
* @license http://tecnocode.co.uk/links/sourcecode-license.html
* @filesource
*/
/*
Terminology:
- Article: any page on the wiki, be it a category, file, template, or perhaps just a "normal" page
- Page: a "normal" page
- Category: a category
- File: a file
*/
/*
TODO:
Core functionality:
- Perhaps move to curl_multi_* instead of threading?
- Perhaps use curl_setopt_array instead of lots of curl_setopts?
- Clean up memory usage
- Make it OS-independent (and make it work on CLI and CGI)
- --help
- GTK interface
- Colour console output
- See if unset() takes an indefinite number of parameters; if so, combine them where necessary
- Add comments to all functions detailing what they do
- Make some classes singletons so they can't be cloned
- Stop it editing pages in the StrategyWiki, MediaWiki, *_talk namespaces unless explicitly told to
- New architecture:
- *Task queues* (from text files, command line, etc.) control everything, and chain together lots of other functions:
- *Article lists* produce lists of pages: perhaps from in a category, perhaps pages which link to another page, perhaps just random ones, etc. (take in mixed, return array)
- *Modifiers* manipulate the lists to (for example) remove pages from unwanted namespaces (take in array, return array)
- *Processes* are then applied to each page in the list: replace this link with that, convert to PNG, add a category, etc. (take in mixed foreach entry in array, return boolean)
Processes:
- Functionality to go through and convert all images to PNG (and change links to them)
- Functionality to go through all images and produce a list of ones with likely bad names (e.g. only one word in the name, etc.)
- Functionality to go through all pages, find the uncategorised main pages and add wikifications to them
*/
/*
Threaded code:
if($this->thread_mode)
{
while($this->children>=$this->max_children) sleep(5);
$this->children++;
$pid=pcntl_fork();
}
if(($this->thread_mode) && ($pid==-1)) $this->log_fatal('Failed to fork process.');
elseif(($this->thread_mode) && ($pid))
{
// We're the parent
pcntl_wait($status);
}
else
{
// We're the child, or threading is disabled
$article_content=$this->download_edit_page($article_name);
$this->upload_edit_page($article_name,$this->extract_edit_keys($article_content),preg_replace('|\[\[(:)?'.str_replace('|','\|',$category_name).'(\|[^\]]*)?\]\]|','[[$1'.$new_category_name.'$2]]',$this->extract_edit_content($article_content)),'Moved to Category:'.$new_category_name.' (bot edit)',false);
if($this->thread_mode) exit(0);
}
*/
$boothby=new boothby_common();
class boothby_common
{
// TODO: Make these static
// Naming
public $name='Boothby bot';
public $version='3.0.0';
// Site details
public $base_url='http://strategywiki.org/';
public $bot_username='Boothby (bot)';
protected $bot_password='password';
public $bot_useragent='Boothby bot';
public $watch_articles=true;
// TODO: Make sure this is ordered properly and add values
public $all_namespaces=array(''=>0,'Talk'=>1,'User','User talk','StrategyWiki','StrategyWiki talk','Category','Category talk','Image','Image talk','Template','Template talk','MediaWiki','MediaWiki talk','Help','Help talk','Portal','Portal talk','Game index','Game index talk');
public $bad_namespaces=array('Talk','User','User talk','StrategyWiki','StrategyWiki talk','Image talk','MediaWiki','MediaWiki talk','Template talk','Help','Help talk','Category talk');
// TODO: Check if I'm needed
//protected $task_queue=NULL;
protected $current_task=NULL;
// System details
public $cookie_file='/var/tmp/boothby_cookies.txt';
public $log_file='/var/log/boothby.txt';
// Download limits
protected $download_delay=500000;
protected $last_download=NULL;
// Upload limits
protected $upload_delay=5000000;
protected $last_upload=NULL;
// Testing
public $test_mode=false;
// Threading limits
public $thread_mode=false;
protected $children=0;
protected $max_children=10;
// File handles
protected static $stderr=NULL;
protected static $stdout=NULL;
protected static $stdin=NULL;
public function __construct()
{
// Constructor
// Task queue types
$this->enum(
TASK_QUEUE_VAR,
TASK_QUEUE_FILE,
TASK_QUEUE_XML,
TASK_QUEUE_CLI
);
// Task queue entry types
$this->enum(
TASK_QUEUE_ENTRY_BLOCK,
TASK_QUEUE_ENTRY_INLINE
);
// Task queue entry categories
define('TASK_QUEUE_ENTRY_INPUT','input');
define('TASK_QUEUE_ENTRY_LOG','log');
define('TASK_QUEUE_ENTRY_ARTICLE_LIST','article_list');
define('TASK_QUEUE_ENTRY_MODIFIER','modifier');
define('TASK_QUEUE_ENTRY_CONSTRUCT','construct');
define('TASK_QUEUE_ENTRY_PROCESS','process');
/*// Site details
$this->base_url='http://strategywiki.net/';
$this->bot_username='Boothby (bot)';
$this->bot_password='groundkeeper';
$this->bot_useragent=NAME.VERSION;
$this->watch_articles=true;
$this->bad_namespaces=array('Talk','User','User talk','StrategyWiki','StrategyWiki talk','Image talk','MediaWiki','MediaWiki talk','Template talk','Help','Help talk','Category talk');
$this->task_queue=NULL;
$this->current_task=NULL;
// System details
$this->cookie_file='/var/tmp/boothby_cookies.txt';
$this->log_file='/var/log/boothby.txt';
// Download limits
$this->download_delay=500000;
$this->last_download=NULL;
// Upload limits
$this->upload_delay=5000000;
$this->last_upload=NULL;
// Testing
$this->test_mode=true;
// Threading limits
$this->thread_mode=false;
$this->children=0;
$this->max_children=10;*/
$this->bot_useragent=$this->name.' '.$this->version;
if($this->thread_mode)
{
declare(ticks=1);
pcntl_signal(SIGCHLD,array(& $this,"sig_handler"));
// TODO: Sort this out
//pcntl_signal(SIGNIT,array(& $this,"sig_handler"));
}
// File handles
if(!is_resource(STDERR)) self::$stderr=fopen('php://stderr','w');
else self::$stderr=STDERR;
if(!is_resource(STDOUT)) self::$stdout=fopen('php://stdout','w');
else self::$stdout=STDOUT;
if(!is_resource(STDIN)) self::$stdin=fopen('php://stdin','r');
else self::$stdin=STDIN;
// Let's get going!
$this->log_message('Starting up.');
$this->login();
$task_queue=new boothby_task_queue();
$task_queue->load_task_queue(TASK_QUEUE_XML,'test_script2.xml');
$task_queue->check_validity();
$task_queue->run_tasks();
}
public function __destruct()
{
// Destructor
$this->update_task();
$this->logout();
$this->log_message('Shutting down.');
}
protected function download_url($url)
{
// Download URL
$this->download_wait();
$ch=curl_init($url);
curl_setopt($ch,CURLOPT_COOKIEFILE,$this->cookie_file);
curl_setopt($ch,CURLOPT_COOKIEJAR,$this->cookie_file);
curl_setopt($ch,CURLOPT_HEADER,false);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch,CURLOPT_USERAGENT,$this->bot_useragent);
$response=curl_exec($ch);
if(curl_errno($ch)) $this->log_error(curl_error($ch));
curl_close($ch);
return $response;
}
private function download_wait()
{
// Wait until we're allowed to download again
if($this->download_delay>0)
{
if(is_null($this->last_download))
{
$this->last_download=$this->microtime_float();
return;
}
else
{
usleep($this->last_download+$this->download_delay-$this->microtime_float());
$this->last_download=$this->microtime_float();
}
}
}
protected function upload_url($url,$post_vars,$referrer)
{
// Upload URL
if($this->test_mode) return;
$this->upload_wait();
$ch=curl_init($url);
curl_setopt($ch,CURLOPT_COOKIEFILE,$this->cookie_file);
curl_setopt($ch,CURLOPT_COOKIEJAR,$this->cookie_file);
curl_setopt($ch,CURLOPT_POST,true);
curl_setopt($ch,CURLOPT_HEADER,true);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch,CURLOPT_POSTFIELDS,$post_vars);
curl_setopt($ch,CURLOPT_REFERER,$referrer);
curl_setopt($ch,CURLOPT_USERAGENT,$this->bot_useragent);
$response=curl_exec($ch);
if(curl_errno($ch)) $this->log_error(curl_error($ch));
curl_close($ch);
return $response;
}
private function upload_wait()
{
// Wait until we're allowed to upload again
if($this->upload_delay>0)
{
if(is_null($this->last_upload))
{
$this->last_upload=$this->microtime_float();
return;
}
else
{
usleep($this->last_upload+$this->upload_delay-$this->microtime_float());
$this->last_upload=$this->microtime_float();
}
}
}
protected function log_message($message)
{
// Log a message
if(!$this->test_mode)
{
$fh=fopen($this->log_file,'a');
fwrite($fh,date('M j G:i:s').' localhost boothby.php['.posix_getpid().']: Message: '.$message."\n");
fclose($fh);
}
fputs(self::$stdout,$message."\n");
}
protected function log_error($error)
{
// Log an error (non-fatal)
if(!$this->test_mode)
{
$fh=fopen($this->log_file,'a');
fwrite($fh,date('M j G:i:s').' localhost boothby.php['.posix_getpid().']: Error: '.$error."\n");
fclose($fh);
}
fputs(self::$stderr,'Error: '.$error."\n");
}
protected function log_fatal($error)
{
// Log a fatal error
if(!$this->test_mode)
{
$fh=fopen($this->log_file,'a');
fwrite($fh,date('M j G:i:s').' localhost boothby.php['.posix_getpid().']: Fatal error: '.$error."\n");
fclose($fh);
}
fputs(self::$stderr,'Fatal error: '.$error."\n");
die();
}
protected function update_task($task=NULL)
{
// Update the current task on the user page
if(is_null($task)) $task='inactive';
if($this->current_task==$task) return;
else $this->current_task=$task;
$edit_page=$this->download_edit_page('User:'.$this->bot_username);
$this->upload_edit_page('User:'.$this->bot_username,$this->extract_edit_keys($edit_page),preg_replace('|\'\'\'Current task\'\'\': .*|','\'\'\'Current task\'\'\': '.$task,$this->extract_edit_content($edit_page)),'Updated task (bot edit)',true);
}
protected function log_message_and_update_task($message)
{
// Log a message and update the current task on the user page
$this->log_message($message);
$this->update_task($message);
}
protected function microtime_float()
{
list($usec,$sec)=explode(' ',microtime());
return ((float)$usec+(float)$sec);
}
private function sig_handler($sig_number)
{
// TODO
switch($sig_number)
{
case SIGNIT:
// Ctrl+C
$this->log_error('Terminated in-process.');
exit(0);
break;
case SIGCHLD:
// A child process has terminated
$this->children--;
break;
default:
break;
}
}
protected function underscores_to_spaces($text)
{
return str_replace('_',' ',$text);
}
protected function spaces_to_underscores($text)
{
return str_replace(' ','_',$text);
}
protected function enum()
{
$i=0;
$args=func_get_args();
if(is_array($args))
{
foreach($args as $constant) define($constant,++$i);
}
}
// Utility functions
function login()
{
// Login
$this->log_message('Logging in.');
if($this->test_mode) return;
$post_vars=array(
'wpName'=>$this->bot_username,
'wpPassword'=>$this->bot_password,
'wpLoginattempt'=>'Log in',
'wpRemember'=>true,
'wpRetype'=>'',
'wpEmail'=>''
);
return $this->upload_url($this->base_url.'w/index.php?title=Special:Userlogin&action=submitlogin&type=login',$post_vars,$this->base_url.'wiki/Special:Userlogin');
}
function logout()
{
// Logout
$this->log_message('Logging out.');
if($this->test_mode) return;
return $this->download_url($this->base_url.'wiki/Special:Userlogout');
}
function download_page($article_name)
{
// Download page
return $this->download_url($this->base_url.'wiki/'.$this->spaces_to_underscores($article_name));
}
function download_file($file_name)
{
// Download file
$file_page=$this->download_page('Image:'.$this->spaces_to_underscores($file_name));
$matches=array();
preg_match('|<p><a href="/(.+)" class=\'internal\' title="'.str_replace('|','\|',$this->underscores_to_spaces($file_name)).'">'.$file_name.'</a>|',$file_page,$matches);
return $this->download_url($this->base_url.$matches[1]);
}
function download_edit_page($article_name)
{
// Download edit page
return $this->download_url($this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=edit');
}
function download_move_page($article_name)
{
// Download move page
return $this->download_url($this->base_url.'wiki/Special:Movepage/'.$this->spaces_to_underscores($article_name));
}
function download_delete_page($article_name)
{
// Download delete page
return $this->download_url($this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=delete');
}
function download_allpages_page($from=NULL,$namespace=0)
{
// Download allpages page
return $this->extract_allpages_articles($this->download_url($this->base_url.'w/index.php?title=Special:Allpages&namespace='.intval($namespace).((!is_null($from))?'&from='.$this->spaces_to_underscores($from):'')));
}
function download_whatlinkshere_page($article_name)
{
// Download whatlinkshere page
return $this->extract_whatlinkshere_articles($this->download_url($this->base_url.'wiki/Special:Whatlinkshere/'.$this->spaces_to_underscores($article_name)));
}
function download_deadendpages_page($limit=1000)
{
// Download deadendpages page
return $this->extract_deadendpages_articles($this->download_url($this->base_url.'w/index.php?title=Special:Deadendpages&limit='.intval($limit)));
}
function upload_file($file_name,$new_file_name,$summary)
{
// Upload a file
$this->log_message('Uploading file Image:'.$new_file_name.'.');
$post_vars=array(
'wpUploadFile'=>'@'.$file_name,
'wpDestFile'=>$new_file_name,
'wpUploadDescription'=>$summary,
'wpWatchthis'=>($this->watch_articles)?'CHECKED':'',
'wpIgnorewarning'=>'CHECKED',
'wpUpload'=>'Upload file'
);
return $this->upload_url($this->base_url.'wiki/Special:Upload',$post_vars,$this->base_url.'wiki/Special:Upload');
}
function upload_edit_page($article_name,$edit_keys,$edit_content,$summary,$minor_edit)
{
// Upload edited page
$this->log_message('Uploading page '.$article_name.'.');
$post_vars=array(
'wpSection'=>'',
'wpStarttime'=>$edit_keys[0],
'wpEdittime'=>$edit_keys[1],
'wpScrolltop'=>'',
'wpTextbox1'=>$edit_content,
'wpSummary'=>$summary,
'wpMinoredit'=>($minor_edit)?'CHECKED':'',
'wpWatchthis'=>($this->watch_articles)?'CHECKED':'',
'wpSave'=>'Save page',
'wpEditToken'=>$edit_keys[2],
'wpAutoSummary'=>$edit_keys[3]
);
return $this->upload_url($this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=submit',$post_vars,$this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=edit');
}
function upload_delete_page($article_name,$edit_token,$summary)
{
// Upload deleted page form
$this->log_message('Deleting page '.$article_name.'.');
$post_vars=array(
'wpReason'=>$summary,
'wpConfirmB'=>'Delete page',
'wpEditToken'=>$edit_token
);
return $this->upload_url($this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=delete',$post_vars,$this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=delete');
}
function upload_move_page($article_name,$new_article_name,$edit_token,$summary)
{
// Upload page to move it
$this->log_message('Moving page '.$article_name.' to '.$new_article_name.'.');
$post_vars=array(
'wpNewTitle'=>$new_article_name,
'wpOldTitle'=>$article_name,
'wpReason'=>$summary,
'wpMove'=>'Move page',
'wpEditToken'=>$edit_token
);
return $this->upload_url($this->base_url.'w/index.php?title=Special:Movepage&action=submit',$post_vars,$this->base_url.'wiki/Special:Movepage/'.$this->spaces_to_underscores($article_name));
}
function extract_category_articles($category_page)
{
// Extract member articles from category page
$matches=array();
preg_match('|<a name="articles"(.+)<div class="printfooter"|smU',$category_page,$matches);
$matches2=array();
preg_match_all('|href="/wiki/(.+)"|U',$matches[1],$matches2);
return $matches2[1];
}
function extract_allpages_articles($allpages_page)
{
// Extract page links from allpages page
$matches=array();
preg_match('|<hr />(.+)<div class="printfooter"|smU',$allpages_page,$matches);
$matches2=array();
preg_match_all('|href="/wiki/([^"]+)"|',$matches[1],$matches2);
return $matches2[1];
}
function extract_whatlinkshere_articles($whatlinkshere_page)
{
// Extract page links from allpages page
$matches=array();
preg_match('|500</a>\)\.<ul>(.+)</ul>|smU',$whatlinkshere_page,$matches);
$matches2=array();
preg_match_all('|href="/wiki/(.+)"|U',$matches[1],$matches2);
return $matches2[1];
}
function extract_deadendpages_articles($deadendpages_page)
{
// Extract page links from deadendpages page
$matches=array();
preg_match('|<ol start=\'([0-9]+)\' class=\'special\'>(.+)</ol>|smU',$deadendpages_page,$matches);
$matches2=array();
preg_match_all('|href="/wiki/(.+)"|U',$matches[2],$matches2);
return $matches2[1];
}
function extract_edit_keys($edit_page)
{
// Extract edit page variables
$matches=array();
preg_match('|value="([0-9]+)" name="wpStarttime".+value="([0-9]+)" name="wpEdittime".+value="([a-zA-Z0-9]+)" name="wpEditToken".+name="wpAutoSummary" value="([a-zA-Z0-9]+)"|sm',$edit_page,$matches);
array_shift($matches);
return $matches;
}
function extract_edit_content($edit_page)
{
// Extract article wikimarkup from edit page
$matches=array();
preg_match('|cols=\'80\' >(.*)</textarea>|smU',$edit_page,$matches);
return htmlspecialchars_decode($matches[1]);
}
function extract_move_keys($move_page)
{
// Extract move page variables
$matches=array();
preg_match('|name=\'wpEditToken\' value="([a-zA-Z0-9]*)"|',$move_page,$matches);
return $matches[1];
}
function extract_delete_keys($delete_page)
{
// Extract delete page variables
$matches=array();
preg_match('|name=\'wpEditToken\' value="([a-zA-Z0-9]*)"|',$delete_page,$matches);
return $matches[1];
}
}
class boothby_task_queue extends boothby_common
{
protected $queue_position=0;
protected $queue=array();
protected $stack=array();
protected $var_stack=array();
protected $unset_var_stack=array();
protected $queue_vars=array();
protected $parent_queue=NULL;
private $xml_parsing=NULL;
public function __construct()
{
// Constructor
}
public function __destruct()
{
// Destructor
if(count($this->task_queue)>$this->queue_position) $this->log_message('Shutting down with '.(count($this->task_queue)-$this->queue_position).' item(s) remaining in the task queue.');
}
public function load_task_queue($list_type,$param=NULL)
{
// Load task queue
switch($list_type)
{
case TASK_QUEUE_VAR:
// PHP array-based task queue: just takes an array to put straight into the queue
if((is_null($param)) || (!is_array($param)) || (!is_array($param[0])) || (!is_array($param[1]))) $this->log_fatal('Bad array given for task queue.');
$this->queue=$param[0];
$this->queue_vars=$param[1];
$this->var_stack=$param[2];
break;
case TASK_QUEUE_FILE:
// Text-file-based task queue: supports simple linear list of actions to perform
if((is_null($param)) || (!file_exists($param)) || (!is_readable($param))) $this->log_fatal('Bad filename given for task queue.');
$task_file=file_get_contents($param);
$queue=explode("\n",$task_file);
unset($task_file); // Waste not...
foreach($queue as $line_no=>$item)
{
$item=trim($item);
if($item!='')
{
$details=explode(',',$item);
if(count($details)<2) $this->log_fatal('Too few parameters on line '.($line_no+1).'.');
$this->queue[]=new boothby_task_queue_entry(TASK_QUEUE_ENTRY_INLINE,array_shift($details),array_shift($details),((count($details)>1)?$details:$details[0]),$this);
}
}
break;
case TASK_QUEUE_XML:
// XML-based task queue: supports everything, including blocks
if((is_null($param)) || (!file_exists($param)) || (!is_readable($param))) $this->log_fatal('Bad filename given for task queue.');
$this->xml_parsing=array('started'=>false,'current_task_args'=>array(),'current_container'=>&$this->queue,'containers'=>array(),'current_var_container'=>&$this->queue_vars,'var_containers'=>array(),'current_var_stack_container'=>&$this->var_stack,'var_stack_containers'=>array());
$xml_parser=xml_parser_create();
xml_set_element_handler($xml_parser,array(& $this,'_xml_start_element'),array(& $this,'_xml_end_element'));
$fp=fopen($param,'r');
while($xml_data=fread($fp,4096))
{
if(!xml_parse($xml_parser,$xml_data,feof($fp))) $this->log_fatal('XML parsing error in task file. ('.xml_error_string(xml_get_error_code($xml_parser)).' on line '.xml_get_current_line_number($xml_parser).')');
}
xml_parser_free($xml_parser);
unset($xml_data,$xml_parser,$this->xml_parsing); // Waste not...
break;
case TASK_QUEUE_CLI:
/*
TODO: Ask whether a "file" or "xml" script is to be written. Let them write one (line-by-line, with
perhaps some checking for mistakes), save it as a file (with the filename specified by the user),
and then run it.
*/
$this->log_fatal('Task queue CLI interface isn\'t supported yet!');
break;
default:
// Unknown task queue type
$this->log_fatal('Unknown task queue type.');
break;
}
}
public function check_validity()
{
// Check task queue validity
$last_task=NULL;
$article_list=false;
for($i=0;$i<count($this->queue);$i++)
{
if(isset($this->queue[$i-1])) $last_task=$this->queue[$i-1];
$last_task_cat=(!is_null($last_task))?$last_task->get_category():NULL;
if(isset($this->queue[$i])) $current_task=$this->queue[$i];
$current_task_cat=(!is_null($current_task))?$current_task->get_category():NULL;
switch($current_task_cat)
{
case TASK_QUEUE_ENTRY_CONSTRUCT:
if((is_null($last_task_cat)) || (!$article_list)) $this->log_fatal('Construct tasks can only come after log, article list, or modifier tasks.');
break;
case TASK_QUEUE_ENTRY_PROCESS:
if((is_null($last_task_cat)) || (!$article_list)) $this->log_fatal('Process tasks can only come after log, article list, or modifier tasks.');
break;
case TASK_QUEUE_ENTRY_LOG:
if($last_task_cat==TASK_QUEUE_ENTRY_LOG) $this->log_message('You have two log tasks next to each other. Why not combine them?');
break;
case TASK_QUEUE_ENTRY_ARTICLE_LIST:
$article_list=true;
//if(($last_task_cat!=TASK_QUEUE_ENTRY_LOG) && (!is_null($last_task_cat))) $this->log_fatal('Article list tasks can only come after log tasks, or be first.');
break;
case TASK_QUEUE_ENTRY_MODIFIER:
if(($last_task_cat==TASK_QUEUE_ENTRY_PROCESS) || ($last_task_cat==TASK_QUEUE_ENTRY_CONSTRUCT)) $this->log_fatal('Modifier tasks can only come after log, article list, or other modifier tasks.');
break;
}
}
}
public function run_tasks()
{
// Run the tasks
$stack=NULL;
$i=0;
$task=$this->get_current_task();
$task_vars=$this->get_current_task_vars();
while(true)
{
if(!$task instanceof boothby_task_queue_entry) break;
if((is_null($task_vars)) || (!is_array($task_vars))) $task_vars=array('in'=>NULL,'out'=>'stack');
if($task_vars['in']=='stack')
{
if($i>0) $stack=$this->pop_stack();
$task_vars['in']=&$stack;
}
$output=$task->run_task($task_vars['in']);
if($task_vars['out']=='stack') $this->push_stack($output);
else $task_vars['out']=$output;
$i++;
$this->queue_position++;
$task=$this->get_current_task();
$task_vars=$this->get_current_task_vars();
}
}
public function get_current_task()
{
if(!array_key_exists($this->queue_position,$this->queue)) return false;
return $this->queue[$this->queue_position];
}
public function get_current_task_vars()
{
if(!array_key_exists($this->queue_position,$this->queue_vars)) return false;
return $this->queue_vars[$this->queue_position];
}
public function count_tasks()
{
return count($this->queue);
}
public function reset_position($new_position=0)
{
if($this->count_tasks()<=$new_position) return;
$this->queue_position=$new_position;
}
public function pop_stack()
{
$return=array_pop($this->stack);
if(is_null($return)) $this->log_fatal('The task stack underflew. You have too many requesting tasks, and not enough contributing tasks.');
return $return;
}
public function push_stack($param)
{
$this->stack[]=$param;
}
public function set_variable($name,$value)
{
// Set (and create if necessary) a variable on the $var_stack
$this->var_stack[$name]=$value;
array_splice($this->unset_var_stack,array_search($name,$this->unset_var_stack),1);
}
public function get_variables()
{
return $this->var_stack;
}
public function set_parent_queue($parent_queue)
{
$this->parent_queue=$parent_queue;
}
public function get_parent_queue()
{
return $this->parent_queue;
}
// XML parsing functions for XML task queues
private function _xml_start_element($parser,$element_name,$attributes)
{
// Start function for XML element parsing
if($element_name=='BOOTHBY-SCRIPT')
{
if($this->xml_parsing['started']) $this->log_fatal('You can\'t have more than one boothby-script element in an XML task queue.');
else $this->xml_parsing['started']=true;
return;
}
if(!$this->xml_parsing['started']) $this->log_fatal('The root element in an XML task queue must be a boothby-script element.');
if(!array_key_exists('NAME',$attributes)) $this->log_fatal('All elements have to have a name attribute.');
// Parameter collection
$_params=array();
if(array_key_exists('PARAM',$attributes))
{
$_params[]=$attributes['PARAM'];
$i=1;
while(array_key_exists('PARAM'.$i,$attributes))
{
$_params[]=$attributes['PARAM'.$i];
$i++;
}
}
// Variable replacement with references
$params=array();
foreach($_params as $param)
{
if($param{0}=='$')
{
// HACKHACK!
$var_name=substr($param,1);
if(!array_key_exists($var_name,$this->xml_parsing['current_var_stack_container'])) $this->xml_parsing['current_var_stack_container'][$var_name]='';
$params[]=&$this->xml_parsing['current_var_stack_container'][$var_name];
}
else $params[]=$param;
}
unset($_params,$var_name); // Waste not...
$attributes['PARAMS']=$params;
// In and out variable replacements
if(!array_key_exists('IN',$attributes)) $attributes['IN']=NULL;
elseif($attributes['IN']{0}=='$')
{
$var_name=substr($attributes['IN'],1);
if(!array_key_exists($var_name,$this->xml_parsing['current_var_stack_container'])) $this->xml_parsing['current_var_stack_container'][$var_name]='';
$attributes['IN']=&$this->xml_parsing['current_var_stack_container'][$var_name];
}
if(!array_key_exists('OUT',$attributes)) $attributes['OUT']='stack';
elseif($attributes['OUT']{0}!='$') $this->log_fatal('Out attributes must either be "stack", or a variable name with a preceding dollar sign.');
elseif($attributes['OUT']{0}=='$')
{
$var_name=substr($attributes['OUT'],1);
if(!array_key_exists($var_name,$this->xml_parsing['current_var_stack_container'])) $this->xml_parsing['current_var_stack_container'][$var_name]='';
$attributes['OUT']=&$this->xml_parsing['current_var_stack_container'][$var_name];
}
$this->xml_parsing['current_var_container'][]=array(
'in'=>&$attributes['IN'],
'out'=>&$attributes['OUT']
);
$this->xml_parsing['current_task_args'][]=$attributes;
switch($element_name)
{
case 'INPUT':
// Input element
if(!array_key_exists('PROMPT',$attributes)) $this->log_fatal('Input elements have to have a prompt attribute.');
$this->xml_parsing['current_container'][]=new boothby_task_queue_entry(TASK_QUEUE_ENTRY_INLINE,TASK_QUEUE_ENTRY_INPUT,$attributes['NAME'],$attributes['PROMPT'],$this);
break;
case 'LOG':
// Log element
if(!array_key_exists('MESSAGE',$attributes)) $this->log_fatal('Log elements have to have a message attribute.');
$this->xml_parsing['current_container'][]=new boothby_task_queue_entry(TASK_QUEUE_ENTRY_INLINE,TASK_QUEUE_ENTRY_LOG,$attributes['NAME'],$attributes['MESSAGE'],$this);
break;
case 'ARTICLE-LIST':
// Article list element
$this->xml_parsing['current_container'][]=new boothby_task_queue_entry(TASK_QUEUE_ENTRY_INLINE,TASK_QUEUE_ENTRY_ARTICLE_LIST,$attributes['NAME'],$params,$this);
break;
case 'MODIFIER':
// Modifier element
$this->xml_parsing['current_container'][]=new boothby_task_queue_entry(TASK_QUEUE_ENTRY_INLINE,TASK_QUEUE_ENTRY_MODIFIER,$attributes['NAME'],$params,$this);
break;
case 'CONSTRUCT':
// Construct element
$this->xml_parsing['containers'][]=$this->xml_parsing['current_container'];
$this->xml_parsing['current_container']=array();
$this->xml_parsing['var_containers'][]=$this->xml_parsing['current_var_container'];
$this->xml_parsing['current_var_container']=array();
$this->xml_parsing['var_stack_containers'][]=$this->xml_parsing['current_var_stack_container'];
$this->xml_parsing['current_var_stack_container']=array();
break;
case 'PROCESS':
// Process element
$this->xml_parsing['current_container'][]=new boothby_task_queue_entry(TASK_QUEUE_ENTRY_INLINE,TASK_QUEUE_ENTRY_PROCESS,$attributes['NAME'],$params,$this);
break;
default:
// Unknown task queue entry type
$this->log_fatal('Unknown XML task queue element.');
break;
}
}
private function _xml_end_element($parser,$element_name)
{
// End function for XML element parsing
if($element_name=='BOOTHBY-SCRIPT')
{
if(!$this->xml_parsing['started']) $this->log_fatal('You can\'t have more than one boothby-script element in an XML task queue.');
else $this->xml_parsing['started']=false;
return;
}
if(!$this->xml_parsing['started']) $this->log_fatal('The root element in an XML task queue must be a boothby-script element.');
$attributes=array_pop($this->xml_parsing['current_task_args']);
switch($element_name)
{
case 'INPUT':
// Input element
break;
case 'LOG':
// Log element
break;
case 'ARTICLE-LIST':
// Article list element
break;
case 'MODIFIER':
// Modifier element
break;
case 'CONSTRUCT':
// Construct element
$task_queue=$this->xml_parsing['current_container'];
$this->xml_parsing['current_container']=array_pop($this->xml_parsing['containers']);
$queue_vars=$this->xml_parsing['current_var_container'];
$this->xml_parsing['current_var_container']=array_pop($this->xml_parsing['var_containers']);
$var_stack=$this->xml_parsing['current_var_stack_container'];
$this->xml_parsing['current_var_stack_container']=array_pop($this->xml_parsing['var_stack_containers']);
$task_entry=new boothby_task_queue_entry(TASK_QUEUE_ENTRY_BLOCK,TASK_QUEUE_ENTRY_CONSTRUCT,$attributes['NAME'],$attributes['PARAMS'],$this);
$task_entry->attach_task_queue($task_queue,$queue_vars,$var_stack);
unset($task_queue); // Waste not...
unset($queue_vars); // Waste not...
$this->xml_parsing['current_container'][]=$task_entry;
unset($task_entry); // Waste not...
break;
case 'PROCESS':
break;
default:
// Unknown task queue entry type
$this->log_fatal('Unknown XML task queue element.');
break;
}
}
}
class boothby_task_queue_entry extends boothby_common
{
protected $done=false;
protected $type;
protected $category;
protected $function_name;
protected $function_params;
protected $task_queue=NULL;
protected $parent_queue;
public function __construct($type,$category,$name,$params,$parent_queue)
{
// Constructor
$type=trim($type);
$category=trim($category);
$name=trim($name);
if(!is_array($params)) $params=trim($params);
// TODO: Clean up these switches
switch($type)
{
case TASK_QUEUE_ENTRY_BLOCK:
$this->type=TASK_QUEUE_ENTRY_BLOCK;
break;
case TASK_QUEUE_ENTRY_INLINE:
$this->type=TASK_QUEUE_ENTRY_INLINE;
break;
default:
// Unknown task queue entry type
$this->log_fatal('Unknown task queue entry type.');
break;
}
switch($category)
{
case TASK_QUEUE_ENTRY_INPUT:
$this->category=TASK_QUEUE_ENTRY_INPUT;
break;
case TASK_QUEUE_ENTRY_LOG:
$this->category=TASK_QUEUE_ENTRY_LOG;
break;
case TASK_QUEUE_ENTRY_ARTICLE_LIST:
$this->category=TASK_QUEUE_ENTRY_ARTICLE_LIST;
break;
case TASK_QUEUE_ENTRY_MODIFIER:
$this->category=TASK_QUEUE_ENTRY_MODIFIER;
break;
case TASK_QUEUE_ENTRY_CONSTRUCT:
$this->category=TASK_QUEUE_ENTRY_CONSTRUCT;
break;
case TASK_QUEUE_ENTRY_PROCESS:
$this->category=TASK_QUEUE_ENTRY_PROCESS;
break;
default:
// Unknown task queue entry category
$this->log_fatal('Unknown task queue entry category.');
break;
}
$this->function_name=$name;
$this->function_params=$params;
$this->parent_queue=$parent_queue;
}
public function __destruct()
{
// Destructor
}
public function run_task($stack)
{
// Run the task and return the return value
$class_name='boothby_'.$this->category;
$function_name=$this->category.'_'.$this->function_name;
switch($this->category)
{
case TASK_QUEUE_ENTRY_INPUT:
// Input task
case TASK_QUEUE_ENTRY_ARTICLE_LIST:
// Article list task
$class=new $class_name();
$return=$class->$function_name($this->function_params);
unset($class); // Waste not...
break;
case TASK_QUEUE_ENTRY_LOG:
// Log task
// TODO: Support for interpolating variable names with values
switch($this->function_name)
{
case 'message':
$this->log_message($this->function_params);
break;
case 'update_task':
$this->update_task($this->function_params);
break;
case 'message_and_update_task':
$this->log_message_and_update_task($this->function_params);
break;
case 'error':
$this->log_error($this->function_params);
break;
case 'fatal':
$this->log_fatal($this->function_params);
break;
default:
$this->log_fatal('Unknown log function.');
break;
}
$return=true;
break;
case TASK_QUEUE_ENTRY_MODIFIER:
// Modifier task
$class=new $class_name();
$return=$class->$function_name($stack,$this->function_params);
unset($class); // Waste not...
break;
case TASK_QUEUE_ENTRY_PROCESS:
// Process task
$class=new $class_name();
if(is_array($stack))
{
foreach($stack as $article_name) $class->$function_name($article_name,$this->function_params);
}
else $class->$function_name($stack,$this->function_params);
unset($class); // Waste not...
$return=true;
break;
case TASK_QUEUE_ENTRY_CONSTRUCT:
// Construct task
$class=new $class_name();
$return=$class->$function_name($stack,$this->task_queue);
unset($class); // Waste not...
break;
default:
// Unknown task queue entry category
$this->log_fatal('Unknown task queue entry category.');
break;
}
if($return) $this->done=true;
else $this->done=false;
return $return;
}
public function get_category()
{
return $this->category;
}
public function attach_task_queue($task_queue,$queue_vars,$var_stack)
{
if($this->type!=TASK_QUEUE_ENTRY_BLOCK) $this->log_fatal('Task queues cannot be attached to non-block tasks.');
$this->task_queue=new boothby_task_queue();
$this->task_queue->load_task_queue(TASK_QUEUE_VAR,array($task_queue,$queue_vars,$var_stack));
$this->task_queue->set_parent_queue($this->parent_queue);
}
}
class boothby_input extends boothby_common
{
public function __construct()
{
// Constructor
}
public function __destruct()
{
// Destructor
}
public function input_normal($prompt)
{
$this->log_message($prompt);
return trim(fgets(self::$stdin));
}
}
class boothby_article_list extends boothby_common
{
public function __construct()
{
// Constructor
}
public function __destruct()
{
// Destructor
}
public function article_list_category_articles($category_name)
{
while(is_array($category_name)) $category_name=$category_name[0];
return $this->download_page('Category:'.$category_name);
}
public function article_list_whatlinkshere_articles($article_name)
{
while(is_array($article_name)) $article_name=$article_name[0];
return $this->download_whatlinkshere_page($article_name);
}
public function article_list_deadendpages_articles($limit)
{
while(is_array($limit)) $limit=$limit[0];
return $this->download_deadendpages_page($limit);
}
public function article_list_allpages_articles($article_name)
{
// Fetch a list of articles from allpages, starting with the specified article name (and in that namespace)
while(is_array($article_name)) $article_name=$article_name[0];
$colon_pos=strpos(':',$article_name);
$namespace=substr($article_name,0,$colon_pos);
if(in_array($namespace,array_keys($this->all_namespaces))) $namespace=$this->all_namespaces[$namespace];
else $namespace=0;
return $this->download_allpages_page($article_name,$namespace);
}
}
class boothby_modifier extends boothby_common
{
public function __construct()
{
// Constructor
}
public function __destruct()
{
// Destructor
}
public function modifier_remove_bad_namespaces($article_list)
{
// Remove unwanted namespaces from the article list (e.g. talk namespaces, project namespace, user namespace, etc.)
if(!is_array($article_list)) $article_list=array($article_list);
$_article_list=array();
foreach($article_list as $article_name)
{
$_article_name=explode(':',$this->underscores_to_spaces($article_name));
if(!in_array($_article_name[0],$this->bad_namespaces)) $_article_list[]=$article_name;
}
return $_article_list;
}
public function modifier_limit_to_article($article_list,$article_name)
{
// Limit a list of articles to those underneath a particular page (and that page itself)
if(!is_array($article_list)) $article_list=array($article_list);
if(is_array($article_name)) $article_name=$article_name[0];
$_article_list=array();
foreach($article_list as $_article_name)
{
$_article_name=$this->underscores_to_spaces($_article_name);
$i=0;
$f=0;
while($f=strpos($article_name,'/',$f))
{
$i=strpos($_article_name,'/',$i);
$i++;
$f++;
}
if(($_article_name==$article_name) || (substr($_article_name,0,strpos($_article_name,'/',$i))==$article_name)) $_article_list[]=$_article_name;
}
return $_article_list;
}
public function modifier_substitute_main_article_name($article_list,$new_main_article_name)
{
// Replace the main article name of a given article with a new one
if(!is_array($article_list)) $article_list=array($article_list);
if(is_array($new_main_article_name)) $new_main_article_name=$new_main_article_name[0];
$_article_list=array();
foreach($article_list as $_article_name)
{
// TODO: Sort this code out
$i=0;
$f=0;
while($f=strpos($new_main_article_name,'/',$f))
{
$i=strpos($_article_name,'/',$i);
$i++;
$f++;
}
$_article_list[]=($i!==false)?$new_main_article_name.substr($_article_name,$i-1):$new_main_article_name;
}
return $_article_list;
}
}
class boothby_construct extends boothby_common
{
public function __construct()
{
// Constructor
}
public function __destruct()
{
// Destructor
}
public function construct_for_each($article_list,$task_queue)
{
// Simple for-each loop for the article list
$parent_queue=$task_queue->get_parent_queue();
$variables=$parent_queue->get_variables();
foreach($variables as $variable_name=>$variable_value) $task_queue->set_variable($variable_name,$variable_value);
foreach($article_list as $article_name)
{
$task_queue->push_stack($article_name);
$task_queue->set_variable('for_each_key',$article_name);
$task_queue->run_tasks();
$task_queue->reset_position();
}
$variables=$task_queue->get_variables();
foreach($variables as $variable_name=>$variable_value) $parent_queue->set_variable($variable_name,$variable_value);
return $article_list;
}
}
class boothby_process extends boothby_common
{
public function __construct()
{
// Constructor
}
public function __destruct()
{
// Destructor
}
public function process_recategorise_article($article_name,$new_category_name)
{
// Move the page to a new category
while(is_array($new_category_name)) $new_category_name=$new_category_name[0];
$article_content=$this->download_edit_page($article_name);
$this->upload_edit_page($article_name,$this->extract_edit_keys($article_content),preg_replace('|\[\[(:)?'.str_replace('|','\|',$category_name).'(\|[^\]]*)?\]\]|','[[$1'.$new_category_name.'$2]]',$this->extract_edit_content($article_content)),'Moved to Category:'.$new_category_name.' (bot edit)',false);
return $article_name;
}
public function process_relink_article($article_name,$params)
{
// Change the links in the article which link to the old article to point to the new article
list($old_article_name,$new_article_name)=$params;
unset($params); // Waste not...
$article_content=$this->download_edit_page($article_name);
$this->upload_edit_page($article_name,$this->extract_edit_keys($article_content),preg_replace('|\[\[(:)?(/)?'.str_replace('|','\|',$old_article_name).'(/)?(\|[^\]]*)?\]\]|','[[$1$2'.$new_article_name.'$3$4]]',$this->extract_edit_content($article_content)),'Re-linked to '.$new_article_name.' (bot edit)',false);
return $article_name;
}
public function process_rereference_template($article_name,$params)
{
// Change the references in the article which reference the old template to the new template
list($old_template_name,$new_template_name)=$params;
unset($params); // Waste not...
$article_content=$this->download_edit_page($article_name);
$this->upload_edit_page($article_name,$this->extract_edit_keys($article_content),preg_replace('|\{\{(:)?'.str_replace('|','\|',$old_template_name).'(\|[^\}]*)?\}\}|i','{{$1'.$new_template_name.'$2}}',$this->extract_edit_content($article_content)),'Re-referenced to Template:'.$new_template_name.' (bot edit)',false);
return $article_name;
}
public function process_add_all_game_nav($article_name,$params)
{
// Add an all game nav template to the article
// HACKHACK!
$article_content=$this->download_edit_page($article_name);
$this->upload_edit_page($article_name,$this->extract_edit_keys($article_content),"{{All Game Nav|game=".$this->underscores_to_spaces(substr($article_name,0,strpos($article_name,'/')))."}}\n".$this->extract_edit_content($article_content),'Added all game nav (bot edit)',false);
return $article_name;
}
public function process_move_article($article_name,$new_article_name)
{
// Move an article to the new article
while(is_array($new_article_name)) $new_article_name=$new_article_name[0];
$this->upload_move_page($article_name,$new_article_name,$this->extract_move_keys($this->download_move_page($article_name)),'Moved to '.$new_article_name.' (bot edit)');
return $new_article_name;
}
public function process_move_file($file_name,$new_file_name)
{
// Move a file
while(is_array($new_file_name)) $new_file_name=$new_file_name[0];
$temp_file_name=tempnam('./','temp_');
$temp_file=fopen($temp_file_name,'w');
fwrite($temp_file,$this->download_file($file_name));
fclose($temp_file);
$this->upload_file($temp_file_name,$new_file_name,$this->extract_edit_content($this->download_edit_page('Image:'.$file_name))); //'Moved from Image:'.$file_name.' (bot edit)'
unlink($temp_file_name);
return $new_file_name;
}
public function process_delete_article($article_name)
{
// Delete an article
$this->upload_delete_page($article_name,$this->extract_delete_keys($this->download_delete_page($article_name)),'Deleted (bot edit)');
return true;
}
}
?>