Jump to navigation
Jump to search
#! /usr/bin/php <?php /** * Boothby * * A bot for automating boring and repetitive tasks, and making * maintenance easier on StrategyWiki (http://strategywiki.net/). * * @author Philip Withnall <drbob@tecnocode.co.uk> * @copyright Philip Withnall 2006 * @package Boothby * @version 3.0.0 * @license http://tecnocode.co.uk/links/sourcecode-license.html * @filesource */ /* Terminology: - Article: any page on the wiki, be it a category, file, template, or perhaps just a "normal" page - Page: a "normal" page - Category: a category - File: a file */ /* TODO: Core functionality: - Perhaps move to curl_multi_* instead of threading? - Perhaps use curl_setopt_array instead of lots of curl_setopts? - Clean up memory usage - Make it OS-independent (and make it work on CLI and CGI) - --help - GTK interface - Colour console output - See if unset() takes an indefinite number of parameters; if so, combine them where necessary - Add comments to all functions detailing what they do - Make some classes singletons so they can't be cloned - Stop it editing pages in the StrategyWiki, MediaWiki, *_talk namespaces unless explicitly told to - New architecture: - *Task queues* (from text files, command line, etc.) control everything, and chain together lots of other functions: - *Article lists* produce lists of pages: perhaps from in a category, perhaps pages which link to another page, perhaps just random ones, etc. (take in mixed, return array) - *Modifiers* manipulate the lists to (for example) remove pages from unwanted namespaces (take in array, return array) - *Processes* are then applied to each page in the list: replace this link with that, convert to PNG, add a category, etc. (take in mixed foreach entry in array, return boolean) Processes: - Functionality to go through and convert all images to PNG (and change links to them) - Functionality to go through all images and produce a list of ones with likely bad names (e.g. only one word in the name, etc.) - Functionality to go through all pages, find the uncategorised main pages and add wikifications to them */ /* Threaded code: if($this->thread_mode) { while($this->children>=$this->max_children) sleep(5); $this->children++; $pid=pcntl_fork(); } if(($this->thread_mode) && ($pid==-1)) $this->log_fatal('Failed to fork process.'); elseif(($this->thread_mode) && ($pid)) { // We're the parent pcntl_wait($status); } else { // We're the child, or threading is disabled $article_content=$this->download_edit_page($article_name); $this->upload_edit_page($article_name,$this->extract_edit_keys($article_content),preg_replace('|\[\[(:)?'.str_replace('|','\|',$category_name).'(\|[^\]]*)?\]\]|','[[$1'.$new_category_name.'$2]]',$this->extract_edit_content($article_content)),'Moved to Category:'.$new_category_name.' (bot edit)',false); if($this->thread_mode) exit(0); } */ $boothby=new boothby_common(); class boothby_common { // TODO: Make these static // Naming public $name='Boothby bot'; public $version='3.0.0'; // Site details public $base_url='http://strategywiki.org/'; public $bot_username='Boothby (bot)'; protected $bot_password='password'; public $bot_useragent='Boothby bot'; public $watch_articles=true; // TODO: Make sure this is ordered properly and add values public $all_namespaces=array(''=>0,'Talk'=>1,'User','User talk','StrategyWiki','StrategyWiki talk','Category','Category talk','Image','Image talk','Template','Template talk','MediaWiki','MediaWiki talk','Help','Help talk','Portal','Portal talk','Game index','Game index talk'); public $bad_namespaces=array('Talk','User','User talk','StrategyWiki','StrategyWiki talk','Image talk','MediaWiki','MediaWiki talk','Template talk','Help','Help talk','Category talk'); // TODO: Check if I'm needed //protected $task_queue=NULL; protected $current_task=NULL; // System details public $cookie_file='/var/tmp/boothby_cookies.txt'; public $log_file='/var/log/boothby.txt'; // Download limits protected $download_delay=500000; protected $last_download=NULL; // Upload limits protected $upload_delay=5000000; protected $last_upload=NULL; // Testing public $test_mode=false; // Threading limits public $thread_mode=false; protected $children=0; protected $max_children=10; // File handles protected static $stderr=NULL; protected static $stdout=NULL; protected static $stdin=NULL; public function __construct() { // Constructor // Task queue types $this->enum( TASK_QUEUE_VAR, TASK_QUEUE_FILE, TASK_QUEUE_XML, TASK_QUEUE_CLI ); // Task queue entry types $this->enum( TASK_QUEUE_ENTRY_BLOCK, TASK_QUEUE_ENTRY_INLINE ); // Task queue entry categories define('TASK_QUEUE_ENTRY_INPUT','input'); define('TASK_QUEUE_ENTRY_LOG','log'); define('TASK_QUEUE_ENTRY_ARTICLE_LIST','article_list'); define('TASK_QUEUE_ENTRY_MODIFIER','modifier'); define('TASK_QUEUE_ENTRY_CONSTRUCT','construct'); define('TASK_QUEUE_ENTRY_PROCESS','process'); /*// Site details $this->base_url='http://strategywiki.net/'; $this->bot_username='Boothby (bot)'; $this->bot_password='groundkeeper'; $this->bot_useragent=NAME.VERSION; $this->watch_articles=true; $this->bad_namespaces=array('Talk','User','User talk','StrategyWiki','StrategyWiki talk','Image talk','MediaWiki','MediaWiki talk','Template talk','Help','Help talk','Category talk'); $this->task_queue=NULL; $this->current_task=NULL; // System details $this->cookie_file='/var/tmp/boothby_cookies.txt'; $this->log_file='/var/log/boothby.txt'; // Download limits $this->download_delay=500000; $this->last_download=NULL; // Upload limits $this->upload_delay=5000000; $this->last_upload=NULL; // Testing $this->test_mode=true; // Threading limits $this->thread_mode=false; $this->children=0; $this->max_children=10;*/ $this->bot_useragent=$this->name.' '.$this->version; if($this->thread_mode) { declare(ticks=1); pcntl_signal(SIGCHLD,array(& $this,"sig_handler")); // TODO: Sort this out //pcntl_signal(SIGNIT,array(& $this,"sig_handler")); } // File handles if(!is_resource(STDERR)) self::$stderr=fopen('php://stderr','w'); else self::$stderr=STDERR; if(!is_resource(STDOUT)) self::$stdout=fopen('php://stdout','w'); else self::$stdout=STDOUT; if(!is_resource(STDIN)) self::$stdin=fopen('php://stdin','r'); else self::$stdin=STDIN; // Let's get going! $this->log_message('Starting up.'); $this->login(); $task_queue=new boothby_task_queue(); $task_queue->load_task_queue(TASK_QUEUE_XML,'test_script2.xml'); $task_queue->check_validity(); $task_queue->run_tasks(); } public function __destruct() { // Destructor $this->update_task(); $this->logout(); $this->log_message('Shutting down.'); } protected function download_url($url) { // Download URL $this->download_wait(); $ch=curl_init($url); curl_setopt($ch,CURLOPT_COOKIEFILE,$this->cookie_file); curl_setopt($ch,CURLOPT_COOKIEJAR,$this->cookie_file); curl_setopt($ch,CURLOPT_HEADER,false); curl_setopt($ch,CURLOPT_RETURNTRANSFER,true); curl_setopt($ch,CURLOPT_USERAGENT,$this->bot_useragent); $response=curl_exec($ch); if(curl_errno($ch)) $this->log_error(curl_error($ch)); curl_close($ch); return $response; } private function download_wait() { // Wait until we're allowed to download again if($this->download_delay>0) { if(is_null($this->last_download)) { $this->last_download=$this->microtime_float(); return; } else { usleep($this->last_download+$this->download_delay-$this->microtime_float()); $this->last_download=$this->microtime_float(); } } } protected function upload_url($url,$post_vars,$referrer) { // Upload URL if($this->test_mode) return; $this->upload_wait(); $ch=curl_init($url); curl_setopt($ch,CURLOPT_COOKIEFILE,$this->cookie_file); curl_setopt($ch,CURLOPT_COOKIEJAR,$this->cookie_file); curl_setopt($ch,CURLOPT_POST,true); curl_setopt($ch,CURLOPT_HEADER,true); curl_setopt($ch,CURLOPT_RETURNTRANSFER,true); curl_setopt($ch,CURLOPT_POSTFIELDS,$post_vars); curl_setopt($ch,CURLOPT_REFERER,$referrer); curl_setopt($ch,CURLOPT_USERAGENT,$this->bot_useragent); $response=curl_exec($ch); if(curl_errno($ch)) $this->log_error(curl_error($ch)); curl_close($ch); return $response; } private function upload_wait() { // Wait until we're allowed to upload again if($this->upload_delay>0) { if(is_null($this->last_upload)) { $this->last_upload=$this->microtime_float(); return; } else { usleep($this->last_upload+$this->upload_delay-$this->microtime_float()); $this->last_upload=$this->microtime_float(); } } } protected function log_message($message) { // Log a message if(!$this->test_mode) { $fh=fopen($this->log_file,'a'); fwrite($fh,date('M j G:i:s').' localhost boothby.php['.posix_getpid().']: Message: '.$message."\n"); fclose($fh); } fputs(self::$stdout,$message."\n"); } protected function log_error($error) { // Log an error (non-fatal) if(!$this->test_mode) { $fh=fopen($this->log_file,'a'); fwrite($fh,date('M j G:i:s').' localhost boothby.php['.posix_getpid().']: Error: '.$error."\n"); fclose($fh); } fputs(self::$stderr,'Error: '.$error."\n"); } protected function log_fatal($error) { // Log a fatal error if(!$this->test_mode) { $fh=fopen($this->log_file,'a'); fwrite($fh,date('M j G:i:s').' localhost boothby.php['.posix_getpid().']: Fatal error: '.$error."\n"); fclose($fh); } fputs(self::$stderr,'Fatal error: '.$error."\n"); die(); } protected function update_task($task=NULL) { // Update the current task on the user page if(is_null($task)) $task='inactive'; if($this->current_task==$task) return; else $this->current_task=$task; $edit_page=$this->download_edit_page('User:'.$this->bot_username); $this->upload_edit_page('User:'.$this->bot_username,$this->extract_edit_keys($edit_page),preg_replace('|\'\'\'Current task\'\'\': .*|','\'\'\'Current task\'\'\': '.$task,$this->extract_edit_content($edit_page)),'Updated task (bot edit)',true); } protected function log_message_and_update_task($message) { // Log a message and update the current task on the user page $this->log_message($message); $this->update_task($message); } protected function microtime_float() { list($usec,$sec)=explode(' ',microtime()); return ((float)$usec+(float)$sec); } private function sig_handler($sig_number) { // TODO switch($sig_number) { case SIGNIT: // Ctrl+C $this->log_error('Terminated in-process.'); exit(0); break; case SIGCHLD: // A child process has terminated $this->children--; break; default: break; } } protected function underscores_to_spaces($text) { return str_replace('_',' ',$text); } protected function spaces_to_underscores($text) { return str_replace(' ','_',$text); } protected function enum() { $i=0; $args=func_get_args(); if(is_array($args)) { foreach($args as $constant) define($constant,++$i); } } // Utility functions function login() { // Login $this->log_message('Logging in.'); if($this->test_mode) return; $post_vars=array( 'wpName'=>$this->bot_username, 'wpPassword'=>$this->bot_password, 'wpLoginattempt'=>'Log in', 'wpRemember'=>true, 'wpRetype'=>'', 'wpEmail'=>'' ); return $this->upload_url($this->base_url.'w/index.php?title=Special:Userlogin&action=submitlogin&type=login',$post_vars,$this->base_url.'wiki/Special:Userlogin'); } function logout() { // Logout $this->log_message('Logging out.'); if($this->test_mode) return; return $this->download_url($this->base_url.'wiki/Special:Userlogout'); } function download_page($article_name) { // Download page return $this->download_url($this->base_url.'wiki/'.$this->spaces_to_underscores($article_name)); } function download_file($file_name) { // Download file $file_page=$this->download_page('Image:'.$this->spaces_to_underscores($file_name)); $matches=array(); preg_match('|<p><a href="/(.+)" class=\'internal\' title="'.str_replace('|','\|',$this->underscores_to_spaces($file_name)).'">'.$file_name.'</a>|',$file_page,$matches); return $this->download_url($this->base_url.$matches[1]); } function download_edit_page($article_name) { // Download edit page return $this->download_url($this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=edit'); } function download_move_page($article_name) { // Download move page return $this->download_url($this->base_url.'wiki/Special:Movepage/'.$this->spaces_to_underscores($article_name)); } function download_delete_page($article_name) { // Download delete page return $this->download_url($this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=delete'); } function download_allpages_page($from=NULL,$namespace=0) { // Download allpages page return $this->extract_allpages_articles($this->download_url($this->base_url.'w/index.php?title=Special:Allpages&namespace='.intval($namespace).((!is_null($from))?'&from='.$this->spaces_to_underscores($from):''))); } function download_whatlinkshere_page($article_name) { // Download whatlinkshere page return $this->extract_whatlinkshere_articles($this->download_url($this->base_url.'wiki/Special:Whatlinkshere/'.$this->spaces_to_underscores($article_name))); } function download_deadendpages_page($limit=1000) { // Download deadendpages page return $this->extract_deadendpages_articles($this->download_url($this->base_url.'w/index.php?title=Special:Deadendpages&limit='.intval($limit))); } function upload_file($file_name,$new_file_name,$summary) { // Upload a file $this->log_message('Uploading file Image:'.$new_file_name.'.'); $post_vars=array( 'wpUploadFile'=>'@'.$file_name, 'wpDestFile'=>$new_file_name, 'wpUploadDescription'=>$summary, 'wpWatchthis'=>($this->watch_articles)?'CHECKED':'', 'wpIgnorewarning'=>'CHECKED', 'wpUpload'=>'Upload file' ); return $this->upload_url($this->base_url.'wiki/Special:Upload',$post_vars,$this->base_url.'wiki/Special:Upload'); } function upload_edit_page($article_name,$edit_keys,$edit_content,$summary,$minor_edit) { // Upload edited page $this->log_message('Uploading page '.$article_name.'.'); $post_vars=array( 'wpSection'=>'', 'wpStarttime'=>$edit_keys[0], 'wpEdittime'=>$edit_keys[1], 'wpScrolltop'=>'', 'wpTextbox1'=>$edit_content, 'wpSummary'=>$summary, 'wpMinoredit'=>($minor_edit)?'CHECKED':'', 'wpWatchthis'=>($this->watch_articles)?'CHECKED':'', 'wpSave'=>'Save page', 'wpEditToken'=>$edit_keys[2], 'wpAutoSummary'=>$edit_keys[3] ); return $this->upload_url($this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=submit',$post_vars,$this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=edit'); } function upload_delete_page($article_name,$edit_token,$summary) { // Upload deleted page form $this->log_message('Deleting page '.$article_name.'.'); $post_vars=array( 'wpReason'=>$summary, 'wpConfirmB'=>'Delete page', 'wpEditToken'=>$edit_token ); return $this->upload_url($this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=delete',$post_vars,$this->base_url.'w/index.php?title='.$this->spaces_to_underscores($article_name).'&action=delete'); } function upload_move_page($article_name,$new_article_name,$edit_token,$summary) { // Upload page to move it $this->log_message('Moving page '.$article_name.' to '.$new_article_name.'.'); $post_vars=array( 'wpNewTitle'=>$new_article_name, 'wpOldTitle'=>$article_name, 'wpReason'=>$summary, 'wpMove'=>'Move page', 'wpEditToken'=>$edit_token ); return $this->upload_url($this->base_url.'w/index.php?title=Special:Movepage&action=submit',$post_vars,$this->base_url.'wiki/Special:Movepage/'.$this->spaces_to_underscores($article_name)); } function extract_category_articles($category_page) { // Extract member articles from category page $matches=array(); preg_match('|<a name="articles"(.+)<div class="printfooter"|smU',$category_page,$matches); $matches2=array(); preg_match_all('|href="/wiki/(.+)"|U',$matches[1],$matches2); return $matches2[1]; } function extract_allpages_articles($allpages_page) { // Extract page links from allpages page $matches=array(); preg_match('|<hr />(.+)<div class="printfooter"|smU',$allpages_page,$matches); $matches2=array(); preg_match_all('|href="/wiki/([^"]+)"|',$matches[1],$matches2); return $matches2[1]; } function extract_whatlinkshere_articles($whatlinkshere_page) { // Extract page links from allpages page $matches=array(); preg_match('|500</a>\)\.<ul>(.+)</ul>|smU',$whatlinkshere_page,$matches); $matches2=array(); preg_match_all('|href="/wiki/(.+)"|U',$matches[1],$matches2); return $matches2[1]; } function extract_deadendpages_articles($deadendpages_page) { // Extract page links from deadendpages page $matches=array(); preg_match('|<ol start=\'([0-9]+)\' class=\'special\'>(.+)</ol>|smU',$deadendpages_page,$matches); $matches2=array(); preg_match_all('|href="/wiki/(.+)"|U',$matches[2],$matches2); return $matches2[1]; } function extract_edit_keys($edit_page) { // Extract edit page variables $matches=array(); preg_match('|value="([0-9]+)" name="wpStarttime".+value="([0-9]+)" name="wpEdittime".+value="([a-zA-Z0-9]+)" name="wpEditToken".+name="wpAutoSummary" value="([a-zA-Z0-9]+)"|sm',$edit_page,$matches); array_shift($matches); return $matches; } function extract_edit_content($edit_page) { // Extract article wikimarkup from edit page $matches=array(); preg_match('|cols=\'80\' >(.*)</textarea>|smU',$edit_page,$matches); return htmlspecialchars_decode($matches[1]); } function extract_move_keys($move_page) { // Extract move page variables $matches=array(); preg_match('|name=\'wpEditToken\' value="([a-zA-Z0-9]*)"|',$move_page,$matches); return $matches[1]; } function extract_delete_keys($delete_page) { // Extract delete page variables $matches=array(); preg_match('|name=\'wpEditToken\' value="([a-zA-Z0-9]*)"|',$delete_page,$matches); return $matches[1]; } } class boothby_task_queue extends boothby_common { protected $queue_position=0; protected $queue=array(); protected $stack=array(); protected $var_stack=array(); protected $unset_var_stack=array(); protected $queue_vars=array(); protected $parent_queue=NULL; private $xml_parsing=NULL; public function __construct() { // Constructor } public function __destruct() { // Destructor if(count($this->task_queue)>$this->queue_position) $this->log_message('Shutting down with '.(count($this->task_queue)-$this->queue_position).' item(s) remaining in the task queue.'); } public function load_task_queue($list_type,$param=NULL) { // Load task queue switch($list_type) { case TASK_QUEUE_VAR: // PHP array-based task queue: just takes an array to put straight into the queue if((is_null($param)) || (!is_array($param)) || (!is_array($param[0])) || (!is_array($param[1]))) $this->log_fatal('Bad array given for task queue.'); $this->queue=$param[0]; $this->queue_vars=$param[1]; $this->var_stack=$param[2]; break; case TASK_QUEUE_FILE: // Text-file-based task queue: supports simple linear list of actions to perform if((is_null($param)) || (!file_exists($param)) || (!is_readable($param))) $this->log_fatal('Bad filename given for task queue.'); $task_file=file_get_contents($param); $queue=explode("\n",$task_file); unset($task_file); // Waste not... foreach($queue as $line_no=>$item) { $item=trim($item); if($item!='') { $details=explode(',',$item); if(count($details)<2) $this->log_fatal('Too few parameters on line '.($line_no+1).'.'); $this->queue[]=new boothby_task_queue_entry(TASK_QUEUE_ENTRY_INLINE,array_shift($details),array_shift($details),((count($details)>1)?$details:$details[0]),$this); } } break; case TASK_QUEUE_XML: // XML-based task queue: supports everything, including blocks if((is_null($param)) || (!file_exists($param)) || (!is_readable($param))) $this->log_fatal('Bad filename given for task queue.'); $this->xml_parsing=array('started'=>false,'current_task_args'=>array(),'current_container'=>&$this->queue,'containers'=>array(),'current_var_container'=>&$this->queue_vars,'var_containers'=>array(),'current_var_stack_container'=>&$this->var_stack,'var_stack_containers'=>array()); $xml_parser=xml_parser_create(); xml_set_element_handler($xml_parser,array(& $this,'_xml_start_element'),array(& $this,'_xml_end_element')); $fp=fopen($param,'r'); while($xml_data=fread($fp,4096)) { if(!xml_parse($xml_parser,$xml_data,feof($fp))) $this->log_fatal('XML parsing error in task file. ('.xml_error_string(xml_get_error_code($xml_parser)).' on line '.xml_get_current_line_number($xml_parser).')'); } xml_parser_free($xml_parser); unset($xml_data,$xml_parser,$this->xml_parsing); // Waste not... break; case TASK_QUEUE_CLI: /* TODO: Ask whether a "file" or "xml" script is to be written. Let them write one (line-by-line, with perhaps some checking for mistakes), save it as a file (with the filename specified by the user), and then run it. */ $this->log_fatal('Task queue CLI interface isn\'t supported yet!'); break; default: // Unknown task queue type $this->log_fatal('Unknown task queue type.'); break; } } public function check_validity() { // Check task queue validity $last_task=NULL; $article_list=false; for($i=0;$i<count($this->queue);$i++) { if(isset($this->queue[$i-1])) $last_task=$this->queue[$i-1]; $last_task_cat=(!is_null($last_task))?$last_task->get_category():NULL; if(isset($this->queue[$i])) $current_task=$this->queue[$i]; $current_task_cat=(!is_null($current_task))?$current_task->get_category():NULL; switch($current_task_cat) { case TASK_QUEUE_ENTRY_CONSTRUCT: if((is_null($last_task_cat)) || (!$article_list)) $this->log_fatal('Construct tasks can only come after log, article list, or modifier tasks.'); break; case TASK_QUEUE_ENTRY_PROCESS: if((is_null($last_task_cat)) || (!$article_list)) $this->log_fatal('Process tasks can only come after log, article list, or modifier tasks.'); break; case TASK_QUEUE_ENTRY_LOG: if($last_task_cat==TASK_QUEUE_ENTRY_LOG) $this->log_message('You have two log tasks next to each other. Why not combine them?'); break; case TASK_QUEUE_ENTRY_ARTICLE_LIST: $article_list=true; //if(($last_task_cat!=TASK_QUEUE_ENTRY_LOG) && (!is_null($last_task_cat))) $this->log_fatal('Article list tasks can only come after log tasks, or be first.'); break; case TASK_QUEUE_ENTRY_MODIFIER: if(($last_task_cat==TASK_QUEUE_ENTRY_PROCESS) || ($last_task_cat==TASK_QUEUE_ENTRY_CONSTRUCT)) $this->log_fatal('Modifier tasks can only come after log, article list, or other modifier tasks.'); break; } } } public function run_tasks() { // Run the tasks $stack=NULL; $i=0; $task=$this->get_current_task(); $task_vars=$this->get_current_task_vars(); while(true) { if(!$task instanceof boothby_task_queue_entry) break; if((is_null($task_vars)) || (!is_array($task_vars))) $task_vars=array('in'=>NULL,'out'=>'stack'); if($task_vars['in']=='stack') { if($i>0) $stack=$this->pop_stack(); $task_vars['in']=&$stack; } $output=$task->run_task($task_vars['in']); if($task_vars['out']=='stack') $this->push_stack($output); else $task_vars['out']=$output; $i++; $this->queue_position++; $task=$this->get_current_task(); $task_vars=$this->get_current_task_vars(); } } public function get_current_task() { if(!array_key_exists($this->queue_position,$this->queue)) return false; return $this->queue[$this->queue_position]; } public function get_current_task_vars() { if(!array_key_exists($this->queue_position,$this->queue_vars)) return false; return $this->queue_vars[$this->queue_position]; } public function count_tasks() { return count($this->queue); } public function reset_position($new_position=0) { if($this->count_tasks()<=$new_position) return; $this->queue_position=$new_position; } public function pop_stack() { $return=array_pop($this->stack); if(is_null($return)) $this->log_fatal('The task stack underflew. You have too many requesting tasks, and not enough contributing tasks.'); return $return; } public function push_stack($param) { $this->stack[]=$param; } public function set_variable($name,$value) { // Set (and create if necessary) a variable on the $var_stack $this->var_stack[$name]=$value; array_splice($this->unset_var_stack,array_search($name,$this->unset_var_stack),1); } public function get_variables() { return $this->var_stack; } public function set_parent_queue($parent_queue) { $this->parent_queue=$parent_queue; } public function get_parent_queue() { return $this->parent_queue; } // XML parsing functions for XML task queues private function _xml_start_element($parser,$element_name,$attributes) { // Start function for XML element parsing if($element_name=='BOOTHBY-SCRIPT') { if($this->xml_parsing['started']) $this->log_fatal('You can\'t have more than one boothby-script element in an XML task queue.'); else $this->xml_parsing['started']=true; return; } if(!$this->xml_parsing['started']) $this->log_fatal('The root element in an XML task queue must be a boothby-script element.'); if(!array_key_exists('NAME',$attributes)) $this->log_fatal('All elements have to have a name attribute.'); // Parameter collection $_params=array(); if(array_key_exists('PARAM',$attributes)) { $_params[]=$attributes['PARAM']; $i=1; while(array_key_exists('PARAM'.$i,$attributes)) { $_params[]=$attributes['PARAM'.$i]; $i++; } } // Variable replacement with references $params=array(); foreach($_params as $param) { if($param{0}=='$') { // HACKHACK! $var_name=substr($param,1); if(!array_key_exists($var_name,$this->xml_parsing['current_var_stack_container'])) $this->xml_parsing['current_var_stack_container'][$var_name]=''; $params[]=&$this->xml_parsing['current_var_stack_container'][$var_name]; } else $params[]=$param; } unset($_params,$var_name); // Waste not... $attributes['PARAMS']=$params; // In and out variable replacements if(!array_key_exists('IN',$attributes)) $attributes['IN']=NULL; elseif($attributes['IN']{0}=='$') { $var_name=substr($attributes['IN'],1); if(!array_key_exists($var_name,$this->xml_parsing['current_var_stack_container'])) $this->xml_parsing['current_var_stack_container'][$var_name]=''; $attributes['IN']=&$this->xml_parsing['current_var_stack_container'][$var_name]; } if(!array_key_exists('OUT',$attributes)) $attributes['OUT']='stack'; elseif($attributes['OUT']{0}!='$') $this->log_fatal('Out attributes must either be "stack", or a variable name with a preceding dollar sign.'); elseif($attributes['OUT']{0}=='$') { $var_name=substr($attributes['OUT'],1); if(!array_key_exists($var_name,$this->xml_parsing['current_var_stack_container'])) $this->xml_parsing['current_var_stack_container'][$var_name]=''; $attributes['OUT']=&$this->xml_parsing['current_var_stack_container'][$var_name]; } $this->xml_parsing['current_var_container'][]=array( 'in'=>&$attributes['IN'], 'out'=>&$attributes['OUT'] ); $this->xml_parsing['current_task_args'][]=$attributes; switch($element_name) { case 'INPUT': // Input element if(!array_key_exists('PROMPT',$attributes)) $this->log_fatal('Input elements have to have a prompt attribute.'); $this->xml_parsing['current_container'][]=new boothby_task_queue_entry(TASK_QUEUE_ENTRY_INLINE,TASK_QUEUE_ENTRY_INPUT,$attributes['NAME'],$attributes['PROMPT'],$this); break; case 'LOG': // Log element if(!array_key_exists('MESSAGE',$attributes)) $this->log_fatal('Log elements have to have a message attribute.'); $this->xml_parsing['current_container'][]=new boothby_task_queue_entry(TASK_QUEUE_ENTRY_INLINE,TASK_QUEUE_ENTRY_LOG,$attributes['NAME'],$attributes['MESSAGE'],$this); break; case 'ARTICLE-LIST': // Article list element $this->xml_parsing['current_container'][]=new boothby_task_queue_entry(TASK_QUEUE_ENTRY_INLINE,TASK_QUEUE_ENTRY_ARTICLE_LIST,$attributes['NAME'],$params,$this); break; case 'MODIFIER': // Modifier element $this->xml_parsing['current_container'][]=new boothby_task_queue_entry(TASK_QUEUE_ENTRY_INLINE,TASK_QUEUE_ENTRY_MODIFIER,$attributes['NAME'],$params,$this); break; case 'CONSTRUCT': // Construct element $this->xml_parsing['containers'][]=$this->xml_parsing['current_container']; $this->xml_parsing['current_container']=array(); $this->xml_parsing['var_containers'][]=$this->xml_parsing['current_var_container']; $this->xml_parsing['current_var_container']=array(); $this->xml_parsing['var_stack_containers'][]=$this->xml_parsing['current_var_stack_container']; $this->xml_parsing['current_var_stack_container']=array(); break; case 'PROCESS': // Process element $this->xml_parsing['current_container'][]=new boothby_task_queue_entry(TASK_QUEUE_ENTRY_INLINE,TASK_QUEUE_ENTRY_PROCESS,$attributes['NAME'],$params,$this); break; default: // Unknown task queue entry type $this->log_fatal('Unknown XML task queue element.'); break; } } private function _xml_end_element($parser,$element_name) { // End function for XML element parsing if($element_name=='BOOTHBY-SCRIPT') { if(!$this->xml_parsing['started']) $this->log_fatal('You can\'t have more than one boothby-script element in an XML task queue.'); else $this->xml_parsing['started']=false; return; } if(!$this->xml_parsing['started']) $this->log_fatal('The root element in an XML task queue must be a boothby-script element.'); $attributes=array_pop($this->xml_parsing['current_task_args']); switch($element_name) { case 'INPUT': // Input element break; case 'LOG': // Log element break; case 'ARTICLE-LIST': // Article list element break; case 'MODIFIER': // Modifier element break; case 'CONSTRUCT': // Construct element $task_queue=$this->xml_parsing['current_container']; $this->xml_parsing['current_container']=array_pop($this->xml_parsing['containers']); $queue_vars=$this->xml_parsing['current_var_container']; $this->xml_parsing['current_var_container']=array_pop($this->xml_parsing['var_containers']); $var_stack=$this->xml_parsing['current_var_stack_container']; $this->xml_parsing['current_var_stack_container']=array_pop($this->xml_parsing['var_stack_containers']); $task_entry=new boothby_task_queue_entry(TASK_QUEUE_ENTRY_BLOCK,TASK_QUEUE_ENTRY_CONSTRUCT,$attributes['NAME'],$attributes['PARAMS'],$this); $task_entry->attach_task_queue($task_queue,$queue_vars,$var_stack); unset($task_queue); // Waste not... unset($queue_vars); // Waste not... $this->xml_parsing['current_container'][]=$task_entry; unset($task_entry); // Waste not... break; case 'PROCESS': break; default: // Unknown task queue entry type $this->log_fatal('Unknown XML task queue element.'); break; } } } class boothby_task_queue_entry extends boothby_common { protected $done=false; protected $type; protected $category; protected $function_name; protected $function_params; protected $task_queue=NULL; protected $parent_queue; public function __construct($type,$category,$name,$params,$parent_queue) { // Constructor $type=trim($type); $category=trim($category); $name=trim($name); if(!is_array($params)) $params=trim($params); // TODO: Clean up these switches switch($type) { case TASK_QUEUE_ENTRY_BLOCK: $this->type=TASK_QUEUE_ENTRY_BLOCK; break; case TASK_QUEUE_ENTRY_INLINE: $this->type=TASK_QUEUE_ENTRY_INLINE; break; default: // Unknown task queue entry type $this->log_fatal('Unknown task queue entry type.'); break; } switch($category) { case TASK_QUEUE_ENTRY_INPUT: $this->category=TASK_QUEUE_ENTRY_INPUT; break; case TASK_QUEUE_ENTRY_LOG: $this->category=TASK_QUEUE_ENTRY_LOG; break; case TASK_QUEUE_ENTRY_ARTICLE_LIST: $this->category=TASK_QUEUE_ENTRY_ARTICLE_LIST; break; case TASK_QUEUE_ENTRY_MODIFIER: $this->category=TASK_QUEUE_ENTRY_MODIFIER; break; case TASK_QUEUE_ENTRY_CONSTRUCT: $this->category=TASK_QUEUE_ENTRY_CONSTRUCT; break; case TASK_QUEUE_ENTRY_PROCESS: $this->category=TASK_QUEUE_ENTRY_PROCESS; break; default: // Unknown task queue entry category $this->log_fatal('Unknown task queue entry category.'); break; } $this->function_name=$name; $this->function_params=$params; $this->parent_queue=$parent_queue; } public function __destruct() { // Destructor } public function run_task($stack) { // Run the task and return the return value $class_name='boothby_'.$this->category; $function_name=$this->category.'_'.$this->function_name; switch($this->category) { case TASK_QUEUE_ENTRY_INPUT: // Input task case TASK_QUEUE_ENTRY_ARTICLE_LIST: // Article list task $class=new $class_name(); $return=$class->$function_name($this->function_params); unset($class); // Waste not... break; case TASK_QUEUE_ENTRY_LOG: // Log task // TODO: Support for interpolating variable names with values switch($this->function_name) { case 'message': $this->log_message($this->function_params); break; case 'update_task': $this->update_task($this->function_params); break; case 'message_and_update_task': $this->log_message_and_update_task($this->function_params); break; case 'error': $this->log_error($this->function_params); break; case 'fatal': $this->log_fatal($this->function_params); break; default: $this->log_fatal('Unknown log function.'); break; } $return=true; break; case TASK_QUEUE_ENTRY_MODIFIER: // Modifier task $class=new $class_name(); $return=$class->$function_name($stack,$this->function_params); unset($class); // Waste not... break; case TASK_QUEUE_ENTRY_PROCESS: // Process task $class=new $class_name(); if(is_array($stack)) { foreach($stack as $article_name) $class->$function_name($article_name,$this->function_params); } else $class->$function_name($stack,$this->function_params); unset($class); // Waste not... $return=true; break; case TASK_QUEUE_ENTRY_CONSTRUCT: // Construct task $class=new $class_name(); $return=$class->$function_name($stack,$this->task_queue); unset($class); // Waste not... break; default: // Unknown task queue entry category $this->log_fatal('Unknown task queue entry category.'); break; } if($return) $this->done=true; else $this->done=false; return $return; } public function get_category() { return $this->category; } public function attach_task_queue($task_queue,$queue_vars,$var_stack) { if($this->type!=TASK_QUEUE_ENTRY_BLOCK) $this->log_fatal('Task queues cannot be attached to non-block tasks.'); $this->task_queue=new boothby_task_queue(); $this->task_queue->load_task_queue(TASK_QUEUE_VAR,array($task_queue,$queue_vars,$var_stack)); $this->task_queue->set_parent_queue($this->parent_queue); } } class boothby_input extends boothby_common { public function __construct() { // Constructor } public function __destruct() { // Destructor } public function input_normal($prompt) { $this->log_message($prompt); return trim(fgets(self::$stdin)); } } class boothby_article_list extends boothby_common { public function __construct() { // Constructor } public function __destruct() { // Destructor } public function article_list_category_articles($category_name) { while(is_array($category_name)) $category_name=$category_name[0]; return $this->download_page('Category:'.$category_name); } public function article_list_whatlinkshere_articles($article_name) { while(is_array($article_name)) $article_name=$article_name[0]; return $this->download_whatlinkshere_page($article_name); } public function article_list_deadendpages_articles($limit) { while(is_array($limit)) $limit=$limit[0]; return $this->download_deadendpages_page($limit); } public function article_list_allpages_articles($article_name) { // Fetch a list of articles from allpages, starting with the specified article name (and in that namespace) while(is_array($article_name)) $article_name=$article_name[0]; $colon_pos=strpos(':',$article_name); $namespace=substr($article_name,0,$colon_pos); if(in_array($namespace,array_keys($this->all_namespaces))) $namespace=$this->all_namespaces[$namespace]; else $namespace=0; return $this->download_allpages_page($article_name,$namespace); } } class boothby_modifier extends boothby_common { public function __construct() { // Constructor } public function __destruct() { // Destructor } public function modifier_remove_bad_namespaces($article_list) { // Remove unwanted namespaces from the article list (e.g. talk namespaces, project namespace, user namespace, etc.) if(!is_array($article_list)) $article_list=array($article_list); $_article_list=array(); foreach($article_list as $article_name) { $_article_name=explode(':',$this->underscores_to_spaces($article_name)); if(!in_array($_article_name[0],$this->bad_namespaces)) $_article_list[]=$article_name; } return $_article_list; } public function modifier_limit_to_article($article_list,$article_name) { // Limit a list of articles to those underneath a particular page (and that page itself) if(!is_array($article_list)) $article_list=array($article_list); if(is_array($article_name)) $article_name=$article_name[0]; $_article_list=array(); foreach($article_list as $_article_name) { $_article_name=$this->underscores_to_spaces($_article_name); $i=0; $f=0; while($f=strpos($article_name,'/',$f)) { $i=strpos($_article_name,'/',$i); $i++; $f++; } if(($_article_name==$article_name) || (substr($_article_name,0,strpos($_article_name,'/',$i))==$article_name)) $_article_list[]=$_article_name; } return $_article_list; } public function modifier_substitute_main_article_name($article_list,$new_main_article_name) { // Replace the main article name of a given article with a new one if(!is_array($article_list)) $article_list=array($article_list); if(is_array($new_main_article_name)) $new_main_article_name=$new_main_article_name[0]; $_article_list=array(); foreach($article_list as $_article_name) { // TODO: Sort this code out $i=0; $f=0; while($f=strpos($new_main_article_name,'/',$f)) { $i=strpos($_article_name,'/',$i); $i++; $f++; } $_article_list[]=($i!==false)?$new_main_article_name.substr($_article_name,$i-1):$new_main_article_name; } return $_article_list; } } class boothby_construct extends boothby_common { public function __construct() { // Constructor } public function __destruct() { // Destructor } public function construct_for_each($article_list,$task_queue) { // Simple for-each loop for the article list $parent_queue=$task_queue->get_parent_queue(); $variables=$parent_queue->get_variables(); foreach($variables as $variable_name=>$variable_value) $task_queue->set_variable($variable_name,$variable_value); foreach($article_list as $article_name) { $task_queue->push_stack($article_name); $task_queue->set_variable('for_each_key',$article_name); $task_queue->run_tasks(); $task_queue->reset_position(); } $variables=$task_queue->get_variables(); foreach($variables as $variable_name=>$variable_value) $parent_queue->set_variable($variable_name,$variable_value); return $article_list; } } class boothby_process extends boothby_common { public function __construct() { // Constructor } public function __destruct() { // Destructor } public function process_recategorise_article($article_name,$new_category_name) { // Move the page to a new category while(is_array($new_category_name)) $new_category_name=$new_category_name[0]; $article_content=$this->download_edit_page($article_name); $this->upload_edit_page($article_name,$this->extract_edit_keys($article_content),preg_replace('|\[\[(:)?'.str_replace('|','\|',$category_name).'(\|[^\]]*)?\]\]|','[[$1'.$new_category_name.'$2]]',$this->extract_edit_content($article_content)),'Moved to Category:'.$new_category_name.' (bot edit)',false); return $article_name; } public function process_relink_article($article_name,$params) { // Change the links in the article which link to the old article to point to the new article list($old_article_name,$new_article_name)=$params; unset($params); // Waste not... $article_content=$this->download_edit_page($article_name); $this->upload_edit_page($article_name,$this->extract_edit_keys($article_content),preg_replace('|\[\[(:)?(/)?'.str_replace('|','\|',$old_article_name).'(/)?(\|[^\]]*)?\]\]|','[[$1$2'.$new_article_name.'$3$4]]',$this->extract_edit_content($article_content)),'Re-linked to '.$new_article_name.' (bot edit)',false); return $article_name; } public function process_rereference_template($article_name,$params) { // Change the references in the article which reference the old template to the new template list($old_template_name,$new_template_name)=$params; unset($params); // Waste not... $article_content=$this->download_edit_page($article_name); $this->upload_edit_page($article_name,$this->extract_edit_keys($article_content),preg_replace('|\{\{(:)?'.str_replace('|','\|',$old_template_name).'(\|[^\}]*)?\}\}|i','{{$1'.$new_template_name.'$2}}',$this->extract_edit_content($article_content)),'Re-referenced to Template:'.$new_template_name.' (bot edit)',false); return $article_name; } public function process_add_all_game_nav($article_name,$params) { // Add an all game nav template to the article // HACKHACK! $article_content=$this->download_edit_page($article_name); $this->upload_edit_page($article_name,$this->extract_edit_keys($article_content),"{{All Game Nav|game=".$this->underscores_to_spaces(substr($article_name,0,strpos($article_name,'/')))."}}\n".$this->extract_edit_content($article_content),'Added all game nav (bot edit)',false); return $article_name; } public function process_move_article($article_name,$new_article_name) { // Move an article to the new article while(is_array($new_article_name)) $new_article_name=$new_article_name[0]; $this->upload_move_page($article_name,$new_article_name,$this->extract_move_keys($this->download_move_page($article_name)),'Moved to '.$new_article_name.' (bot edit)'); return $new_article_name; } public function process_move_file($file_name,$new_file_name) { // Move a file while(is_array($new_file_name)) $new_file_name=$new_file_name[0]; $temp_file_name=tempnam('./','temp_'); $temp_file=fopen($temp_file_name,'w'); fwrite($temp_file,$this->download_file($file_name)); fclose($temp_file); $this->upload_file($temp_file_name,$new_file_name,$this->extract_edit_content($this->download_edit_page('Image:'.$file_name))); //'Moved from Image:'.$file_name.' (bot edit)' unlink($temp_file_name); return $new_file_name; } public function process_delete_article($article_name) { // Delete an article $this->upload_delete_page($article_name,$this->extract_delete_keys($this->download_delete_page($article_name)),'Deleted (bot edit)'); return true; } } ?>