427 lines
17 KiB
PHP
Executable file
427 lines
17 KiB
PHP
Executable file
#!/usr/bin/php
|
||
<?php
|
||
|
||
/*
|
||
This program is free software: you can redistribute it and/or modify
|
||
it under the terms of the GNU General Public License as published by
|
||
the Free Software Foundation, either version 3 of the License, or
|
||
(at your option) any later version.
|
||
|
||
This program is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
GNU General Public License for more details.
|
||
|
||
You should have received a copy of the GNU General Public License
|
||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
*/
|
||
|
||
define('N',"\n");
|
||
define('SNAME',basename(__FILE__));
|
||
define('FNAME',preg_replace('/\.[^.]*$/','',SNAME));
|
||
define('CHILD','getinstinfo.php');
|
||
define('LIBDP','/../../site/mustard/include');
|
||
|
||
require(__DIR__.LIBDP.'/ght.php');
|
||
require(__DIR__.LIBDP.'/parsetime.php');
|
||
|
||
use function mysqli_real_escape_string as myesc;
|
||
|
||
declare(ticks=1);
|
||
if (function_exists('pcntl_signal')) {
|
||
function signalHandler($signal) {
|
||
echo(N);
|
||
mexit('received signal «'.$signal.'», shutting down.'.N,0);
|
||
}
|
||
pcntl_signal(SIGTERM,'signalHandler');// Termination ('kill' was called)
|
||
pcntl_signal(SIGHUP,'signalHandler');// Terminal log-out
|
||
pcntl_signal(SIGINT,'signalHandler');// Interrupted (Ctrl-C is pressed)
|
||
}
|
||
|
||
$opts=[
|
||
'poolsize'=>20,
|
||
'moreclauses'=>'',
|
||
'peersfp'=>null,
|
||
'dontrestore'=>false,
|
||
'ignorelock'=>false,
|
||
'timeout'=>10,
|
||
'deadline'=>60*24*60*60,// if an instance has not been responding for more than this value of seconds, declare it dead
|
||
'oldline'=>30*24*60*60,// if an instance has been new for a period longer than this amount, it's no longer new
|
||
'ldtoots'=>40,// number of toots to check with the automatic language detection function
|
||
'fetchusers'=>false,
|
||
'setnew'=>true,
|
||
'dryrun'=>false,
|
||
'jsonfp'=>__DIR__.'/instances.json',
|
||
'jsonwrite'=>false,
|
||
];
|
||
|
||
$help='SYNOPSIS
|
||
|
||
'.SNAME.' [options]
|
||
|
||
DESCRIPTION
|
||
|
||
This script coordinates the parallel execution of a definable number of
|
||
'.CHILD.' processes “against” all the alive instances which are already
|
||
present in mastostart’s database, plus optionally those listed in a
|
||
specifiable file (typically the output file from a peerscrawl.php run).
|
||
|
||
OPTIONS
|
||
|
||
-p, --peersfp <file>
|
||
Defines the path to a file containing a list of instances to consider in
|
||
addition to those which are already present in the database. Note that this
|
||
option is ignored if the script will recover a previous unfinished session.
|
||
-P, --poolsize <number>
|
||
The number of slots in the processes pool, that is the number of '.CHILD.'
|
||
processes the script will run in parallel. Note that this option is ignored
|
||
if the script will recover a previous unfinished session.
|
||
DEFAULT: '.$opts['poolsize'].'
|
||
-D, --deadline <time specification>
|
||
If an instance has not been responding for more than this time, declare
|
||
it dead. See section «TIME SPECIFICATION» below to see how to specify time.
|
||
This option gets passed to each '.CHILD.' process as is, and has no effect
|
||
on '.SNAME.' itself.
|
||
DEFAULT: '.ght($opts['deadline'],[' day§ days',' hour§ hours',' minute§ minutes',' second§ seconds']).'
|
||
-o, --oldline <time specification>
|
||
If an instance has been marked as new for more than this time, mark it as
|
||
not new. See section «TIME SPECIFICATION» below to see how to specify time.
|
||
This option gets passed to each '.CHILD.' process as is, and has no effect
|
||
on '.SNAME.' itself.
|
||
DEFAULT: '.ght($opts['oldline'],[' day§ days',' hour§ hours',' minute§ minutes',' second§ seconds']).'
|
||
-l, --ldtoots <number>
|
||
This option defines the number of toots that '.CHILD.' processes will try
|
||
to fetch from the local public timelines to try and guess the most used
|
||
languages of each instance. This option gets passed to each '.CHILD.'
|
||
process as is, and has no effect on '.SNAME.' itself. Its minimum value is
|
||
10, its maximum is 40.
|
||
DEFAULT: '.$opts['ldtoots'].'
|
||
-f, --fetchusers
|
||
If this option is set, the '.CHILD.' processes will try to fetch users’
|
||
profiles infos from each considered instance’s user directory and store
|
||
them in the database. This option gets passed to each '.CHILD.' process as
|
||
is, and has no effect on '.SNAME.' itself.
|
||
-t, --timeout <seconds>
|
||
Sets the timeout in seconds for every connection attempt. This option gets
|
||
passed to each '.CHILD.' process as is, and has no effect on '.SNAME.'
|
||
itself.
|
||
DEFAULT: '.$opts['timeout'].'
|
||
-N, --dontsetnew
|
||
If this option is set, '.CHILD.' processes won’t mark new instances as
|
||
new. This can be useful for a first run. This option gets passed to each
|
||
'.CHILD.' process as is, and has no effect on '.SNAME.' itself.
|
||
-I, --ignorelock
|
||
Normally, if its lockfile exists, the script will exit with an error.
|
||
If this option is set, instead, the lockfile existence will be ignored.
|
||
Please check that the script is actually not running before using it.
|
||
-R, --dontrestore
|
||
If this option is set and «instances.job» and «status.job» files from
|
||
a previous unfinished session are present in the «run» subdirectory inside
|
||
the directory where the script resides, the script will ignore them and
|
||
start a new session; otherwise the script will restore the previous,
|
||
unfinished session.
|
||
-d, --dryrun
|
||
If this option is set, '.CHILD.' processes won’t write anything in the
|
||
database. It is meant for testing purposes. This option gets passed to each
|
||
'.CHILD.' process as is, and has no effect on '.SNAME.' itself.
|
||
-m, --moreclauses <more SQL clauses>
|
||
If this option is set, whatever one writes as argument to the option will
|
||
be added to the main query for instances’ records, which is «SELECT URI FROM
|
||
Instances WHERE Dead=0».
|
||
-h, --help
|
||
When this option is specified, the script will show this help text and exit.
|
||
|
||
TIME SPECIFICATION
|
||
|
||
An example is better than ~5147 words :-)
|
||
To specify 1 year, 6 months (made of 31 days), 2 weeks, 3 days, 5 hours,
|
||
7 minutes and 12 seconds you can use «1y,6M,2w,3d,5h,7m,12s»; but you can
|
||
also use «12s,7m,5h,3d,2w,6M,1y», or even «18M,1w,1w,2d,1d,3h,2h,7m,12s».
|
||
|
||
LICENSE
|
||
|
||
This program comes with ABSOLUTELY NO WARRANTY; for details see the source.
|
||
This is free software, and you are welcome to redistribute it under certain
|
||
conditions; see <http://www.gnu.org/licenses/> for details.'.N;
|
||
|
||
for ($i=1; $i<$argc; $i++) {
|
||
if ($argv[$i]=='-p' || $argv[$i]=='--peersfp') {
|
||
if ($i+1>=$argc || !file_exists($argv[$i+1]) || !is_file($argv[$i+1]) || !is_readable($argv[$i+1]))
|
||
mexit('option «'.$argv[$i].'» requires an existing and readable file as an argument (use «-h» to read help).'.N,1);
|
||
$i++;
|
||
$opts['peersfp']=$argv[$i];
|
||
} elseif ($argv[$i]=='-P' || $argv[$i]=='--poolsize') {
|
||
if ($i+1>=$argc || preg_match('/\d+/',$argv[$i+1])!==1 || $argv[$i+1]+0<1)
|
||
mexit('option «'.$argv[$i].'» requires an integer number greater than 0 as an argument (use «-h» to read help).'.N,1);
|
||
$i++;
|
||
$opts['poolsize']=$argv[$i]+0;
|
||
} elseif ($argv[$i]=='-t' || $argv[$i]=='--timeout') {
|
||
if ($i+1>=$argc || preg_match('/^[0-9]+$/',$argv[$i+1])!==1)
|
||
mexit('option «'.$argv[$i].'» requires a numeric argument (use «-h» to read help).'.N,1);
|
||
$i++;
|
||
$opts['timeout']=$argv[$i]+0;
|
||
} elseif ($argv[$i]=='-D' || $argv[$i]=='--deadline') {
|
||
if ($i+1>=$argc || parsetime($argv[$i+1])===false)
|
||
mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
|
||
$i++;
|
||
$opts['deadline']=parsetime($argv[$i]);
|
||
} elseif ($argv[$i]=='-o' || $argv[$i]=='--oldline') {
|
||
if ($i+1>=$argc || parsetime($argv[$i+1])===false)
|
||
mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
|
||
$i++;
|
||
$opts['oldline']=parsetime($argv[$i]);
|
||
} elseif ($argv[$i]=='-l' || $argv[$i]=='--ldtoots') {
|
||
if ($i+1>=$argc || preg_match('/^\d+$/',$argv[$i+1])!==1 || $argv[$i+1]+0>40 || $argv[$i+1]+0<10)
|
||
mexit('option «'.$argv[$i].'» requires a number >= 10 and <= 40 as an argument (use «-h» to read help).'.N,1);
|
||
$i++;
|
||
$opts['ldtoots']=$argv[$i]+0;
|
||
} elseif ($argv[$i]=='-f' || $argv[$i]=='--fetchusers') {
|
||
$opts['fetchusers']=true;
|
||
} elseif ($argv[$i]=='-N' || $argv[$i]=='--dontsetnew') {
|
||
$opts['setnew']=false;
|
||
} elseif ($argv[$i]=='-d' || $argv[$i]=='--dryrun') {
|
||
$opts['dryrun']=true;
|
||
} elseif ($argv[$i]=='-R' || $argv[$i]=='--dontrestore') {
|
||
$opts['dontrestore']=true;
|
||
} elseif ($argv[$i]=='-I' || $argv[$i]=='--ignorelock') {
|
||
$opts['ignorelock']=true;
|
||
} elseif ($argv[$i]=='-m' || $argv[$i]=='--moreclauses') {
|
||
if ($i+1>=$argc)
|
||
mexit('option «'.$argv[$i].'» requires some SQL clause as argument (use «-h» to read help).'.N,1);
|
||
$i++;
|
||
$opts['moreclauses']=$argv[$i];
|
||
} elseif ($argv[$i]=='-h' || $argv[$i]=='--help') {
|
||
echo($help);
|
||
exit(0);
|
||
} else {
|
||
mexit('don’t know how to interpret «'.$argv[$i].'» (you can read the help text using «-h» or «--help»).'.N,1);
|
||
}
|
||
}
|
||
|
||
$rundirpath=__DIR__.'/run';
|
||
if (file_exists($rundirpath) && !is_dir($rundirpath))
|
||
mexit('«'.$rundirpath.'» is not a directory.'.N,1);
|
||
elseif (file_exists($rundirpath) && (!is_readable($rundirpath) || !is_writeable($rundirpath)))
|
||
mexit('«'.$rundirpath.'» is not readable and writeable.'.N,1);
|
||
elseif (!file_exists($rundirpath))
|
||
if (@mkdir($rundirpath)===false)
|
||
mexit('could not create directory «'.$rundirpath.'».'.N,1);
|
||
|
||
$lockfp=$rundirpath.'/'.FNAME.'lock';
|
||
if (file_exists($lockfp) && !$opts['ignorelock']) {
|
||
eecho(3,'lock file «'.$lockfp.'» exists (if you are sure '.SNAME.' is not already running you can use option «-I» to force execution).'.N);
|
||
exit(1);
|
||
}
|
||
if (touch($lockfp)===false) {
|
||
eecho(3,'could not touch file «'.$lockfp.'».'.N);
|
||
exit(1);
|
||
}
|
||
|
||
$inifp=__DIR__.'/../../conf/mustard.ini';
|
||
$iniarr=@parse_ini_file($inifp);
|
||
if ($iniarr===false) mexit('could not open config file «'.$inifp.'»'.N,1);
|
||
|
||
try { $link=@mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket']); }
|
||
catch (Exception $error) { mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1); }
|
||
try { mysqli_set_charset($link,'utf8mb4'); }
|
||
catch (Exception $error) { mexit('could not set «utf8mb4» charset for MySQL: '.mysqli_error($link).'.'.N,1); }
|
||
|
||
$cmd=__DIR__.'/getinstinfo.php -t '.$opts['timeout'].' -D '.$opts['deadline'].'s -o '.$opts['oldline'].'s -l '.$opts['ldtoots'];
|
||
if (!$opts['setnew']) $cmd.=' -N';
|
||
if ($opts['dryrun']) $cmd.=' -d';
|
||
if ($opts['fetchusers']) $cmd.=' -f';
|
||
eecho(1,'base command: «'.$cmd.'».'.N);
|
||
|
||
$instsjfp=$rundirpath.'/instances.job';
|
||
$statusjfp=$rundirpath.'/status.job';
|
||
|
||
$tini=microtime(true);
|
||
|
||
if (!$opts['dontrestore'] && file_exists($statusjfp) && file_exists($instsjfp)) {
|
||
eecho(0,'looks like previous session was interrupted, trying to recover it...'.N);
|
||
$insts=@file($instsjfp,FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
|
||
if ($insts===false) mexit('could not open file «'.$instsjfp.'» for reading.'.N,1);
|
||
$cinsts=count($insts);
|
||
eecho(1,'loaded '.$cinsts.' hostnames from previous session file.'.N);
|
||
$buf=@file($statusjfp,FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
|
||
if ($buf===false) mexit('could not open file «'.$statusjfp.'» for reading.'.N,1);
|
||
if (count($buf)<2) mexit('file «'.$statusjfp.'»: wrong format (1).'.N,1);
|
||
$buf[0]=explode("\t",$buf[0]);
|
||
if (count($buf[0])!=4 ||
|
||
preg_match('/^\d+$/',$buf[0][0])!==1 ||
|
||
preg_match('/^\d+$/',$buf[0][1])!==1 ||
|
||
preg_match('/^\d+[\d.]*\d$/',$buf[0][2])!==1 ||
|
||
preg_match('/^\d+$/',$buf[0][3])!==1)
|
||
mexit('file «'.$statusjfp.'»: wrong format (2).'.N,1);
|
||
$opts['poolsize']=$buf[0][0]+0;
|
||
$instk=$buf[0][1]+0;
|
||
$toff=$buf[0][2]+0;
|
||
$done=$buf[0][3]+0;
|
||
//eecho(0,'poolsize: '.$opts['poolsize'].'; instk: '.$instk.'; eta: '.$eta.'; done: '.$done.'.'.N);
|
||
for ($i=1; $i<count($buf); $i++) {
|
||
if (preg_match('/^\d+$/',$buf[$i])!==1) mexit('file «'.$statusjfp.'»: wrong format (3).'.N,1);
|
||
//eecho(0,$i.': '.$buf[$i].'.'.N);
|
||
$host=$insts[$buf[$i]+0];
|
||
eecho(1,'bootstrapping processes pool, adding host «'.$host.'».'.N);
|
||
$procs[]=['proc'=>proc_open($cmd.' '.escapeshellarg($host).' &>/dev/null',[],$pipes[]), 'instk'=>$buf[$i]+0, 'host'=>$host, 'begts'=>microtime(true)];
|
||
}
|
||
eecho(1,'recovered previous session.'.N);
|
||
} else {
|
||
|
||
$insts=[];
|
||
eecho(0,'loading known, alive instances from the database...'.N);
|
||
$res=myq($link,'SELECT URI FROM Instances WHERE Dead=0'.$opts['moreclauses'],__LINE__);
|
||
while($row=mysqli_fetch_assoc($res))
|
||
if (!in_array($row['URI'],$insts))
|
||
$insts[]=$row['URI'];
|
||
eecho(1,'loaded '.count($insts).' known, alive instances from the database.'.N);
|
||
|
||
if (!is_null($opts['peersfp'])) {
|
||
eecho(0,'loading dead instances from the database...'.N);
|
||
$res=myq($link,'SELECT URI FROM Instances WHERE Dead=1',__LINE__);
|
||
$deadinsts=[];
|
||
while($row=mysqli_fetch_assoc($res))
|
||
if (!in_array($row['URI'],$deadinsts))
|
||
$deadinsts[]=$row['URI'];
|
||
eecho(1,'loaded '.count($deadinsts).' dead instances from the database.'.N);
|
||
eecho(0,'loading instances from «'.$opts['peersfp'].'»...'.N);
|
||
$peers=@file($opts['peersfp'],FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
|
||
if ($peers===false) mexit('could not open «'.$opts['peersfp'].'» for reading.'.N,1);
|
||
$i=0;
|
||
foreach ($peers as $pdom) {
|
||
if (!in_array($pdom,$insts)) {
|
||
if (!in_array($pdom,$deadinsts)) {
|
||
if (!willtrunc($pdom,'Instances','URI')) {
|
||
$i++;
|
||
$insts[]=$pdom;
|
||
} else {
|
||
eecho(2,'ignoring instance «'.$pdom.'» from peers file because its hostname is too long for column «URI» of table «Instances».'.N);
|
||
}
|
||
} else {
|
||
eecho(1,'ignoring instance «'.$pdom.'» from peers file because it is dead.'.N);
|
||
}
|
||
}
|
||
}
|
||
eecho(1,'loaded '.$i.' instances from «'.$opts['peersfp'].'».'.N);
|
||
}
|
||
|
||
unset($deadinsts);
|
||
shuffle($insts);
|
||
$cinsts=count($insts);
|
||
eecho(1,$cinsts.' instances to be checked.'.N);
|
||
|
||
$instsf=@fopen($instsjfp,'w');
|
||
if ($instsf===false) mexit('could not open «'.$instsjfp.'» for writing.'.N,1);
|
||
foreach ($insts as $host) fwrite($instsf,$host.N);
|
||
fclose($instsf);
|
||
|
||
$toff=0;
|
||
$done=0;
|
||
$procs=[];
|
||
|
||
for ($instk=0; $instk<$opts['poolsize'] && $instk<$cinsts; $instk++) {
|
||
$host=$insts[$instk];
|
||
eecho(1,'bootstrapping processes pool, adding host «'.$host.'».'.N);
|
||
$procs[]=['proc'=>proc_open($cmd.' '.escapeshellarg($host).' &>/dev/null',[],$pipes[]), 'instk'=>$instk, 'host'=>$host, 'begts'=>microtime(true)];
|
||
}
|
||
$instk--;
|
||
|
||
}
|
||
|
||
mysqli_close($link);
|
||
unset($link);
|
||
|
||
$rundone=false;
|
||
|
||
do {
|
||
$now=microtime(true);
|
||
$eta=$now-$tini+$toff;
|
||
eecho(1,'[[[ CHECKING PROCESSES POOL ]]]'.N);
|
||
$somerun=false;
|
||
foreach ($procs as $key=>$proc) {
|
||
if (!is_null($proc)) {
|
||
$pstat=proc_get_status($proc['proc']);
|
||
if (!$pstat['running']) {
|
||
$done++;
|
||
$out='proc slot '.$key.': finished running on «'.$proc['host'].'» (exit code: '.$pstat['exitcode'].')';
|
||
if ($instk<$cinsts-1) {
|
||
$instk++;
|
||
$host=$insts[$instk];
|
||
$procs[$key]=['proc'=>proc_open($cmd.' '.escapeshellarg($host).' &>/dev/null',[],$pipes[$key]), 'instk'=>$instk, 'host'=>$host, 'begts'=>$now];
|
||
$out.=', started a new process on «'.$host.'».'.N;
|
||
} else {
|
||
$out.='; no more hosts to check.'.N;
|
||
$procs[$key]=null;
|
||
}
|
||
eecho(1,$out);
|
||
} else {
|
||
eecho(1,'proc slot '.$key.': been running on «'.$proc['host'].'» for '.ght($now-$proc['begts']).'.'.N);
|
||
$somerun=true;
|
||
}
|
||
}
|
||
}
|
||
$out=$done.'/'.$cinsts.' ('.round(100/$cinsts*$done).'%); elapsed time: '.ght($eta);
|
||
if ($done>0) $out.='; estimated time remaining: '.ght($cinsts*$eta/$done-$eta);
|
||
eecho(1,$out.'.'.N);
|
||
if ($somerun) {
|
||
writestatus($statusjfp,$opts,$instk,$eta,$done,$procs);
|
||
sleep(1);
|
||
} else {
|
||
$rundone=true;
|
||
}
|
||
} while (!$rundone);
|
||
|
||
unlink($instsjfp);
|
||
unlink($statusjfp);
|
||
unlink($lockfp);
|
||
eecho(1,'Done :-)'.N);
|
||
exit(0);
|
||
|
||
|
||
// functions
|
||
|
||
function writestatus(&$statusjfp,&$opts,&$instk,&$eta,&$done,&$procs) {
|
||
$f=@fopen($statusjfp,'w');
|
||
if ($f===false) mexit('could not open «'.$statusjfp.'» for writing.'.N,2);
|
||
fwrite($f,$opts['poolsize']."\t".$instk."\t".$eta."\t".$done.N);
|
||
foreach ($procs as $proc)
|
||
if (!is_null($proc))
|
||
fwrite($f,$proc['instk'].N);
|
||
fclose($f);
|
||
}
|
||
|
||
function eecho($lev,$msg) {
|
||
$time=microtime(false);
|
||
$time=explode(' ',$time);
|
||
$time=date('Y-m-d H:i:s',$time[1]).'.'.substr($time[0],2);
|
||
$levs=['Debug', 'Info', 'Warning', 'Error'];
|
||
$msg=$time.' '.$levs[$lev].': '.$msg;
|
||
if ($lev<2)
|
||
echo($msg);
|
||
else
|
||
fwrite(STDERR,$msg);
|
||
}
|
||
|
||
function mexit($msg,$code) {
|
||
global $link, $jsonf, $lockfp;
|
||
if (isset($link)) mysqli_close($link);
|
||
if (isset($jsonf)) fclose($jsonf);
|
||
if (isset($lockfp) && is_file($lockfp)) unlink($lockfp);
|
||
if ($code!=0)
|
||
eecho(3,$msg);
|
||
else
|
||
eecho(1,$msg);
|
||
exit($code);
|
||
}
|
||
|
||
function myq(&$link,$query,$line) {
|
||
try {
|
||
$res=mysqli_query($link,$query);
|
||
}
|
||
catch (Exception $error) {
|
||
mexit('query «'.$query.'» (line '.$line.') failed: '.$error->getMessage().N,3);
|
||
}
|
||
return($res);
|
||
}
|
||
|
||
?>
|