Added “lecho” for “message levels”, removed “gecho”, removed “verbose” option; removed “loop” option (do loop from a shell script if needed)

This commit is contained in:
pezcurrel 2022-12-25 11:32:08 +01:00
parent 1d0c6b799a
commit 10e2e1b58a

View file

@ -38,11 +38,15 @@ $opts=[
'timeout'=>8,
'curltimeout'=>15,
'loop'=>false,
'verbose'=>false,
'excludedead'=>false,
'ignorelock'=>false
'ignorelock'=>false,
'minmsgimplev'=>1
];
$msgimplevs=['Debug', 'Info', 'Warning', 'Error'];
foreach ($msgimplevs as $val) $imsgimplevs[]=lcfirst($val);
$imsgimplevs[]='none';
$ghtsa=[[' day',' days'],[' hour',' hours'],[' minute',' minutes'],[' second',' seconds']];
$help='SYNOPSIS
@ -86,10 +90,6 @@ OPTIONS
execution will be taken into account.
-E, --excludedead
Exclude instances marked as “Dead” in the database.
-l, --loop
Normally the script will exit after completing a crawl; if this option
is set, it will restart crawling until it receives a SIGTERM, SIGHUP
or SIGINT.
-t, --timeout <time>
Defines the timeout in seconds for every connection attempt. See section
«TIME SPECIFICATION» below to see how to specify time.
@ -98,8 +98,14 @@ OPTIONS
Defines the timeout in seconds for every download. See section «TIME
SPECIFICATION» below to see how to specify time.
DEFAULT: '.ght($opts['curltimeout'],$ghtsa).'
-v, --verbose
Be more verbose.
-m, --minmsgimplev <«debug»|«info»|«warning»|«error»|«none»>
Defines the minimum “importance level” of messages to be written to the text
user interface. There are 4 “importance levels”, in this order of
importance: «debug», «info», «warning», «error». Setting this option to any
of these values will write to the text user interface all the messages with
the specified or a greater level; setting it to the special value «none»
will completely disable message writing.
DEFAULT: '.$imsgimplevs[$opts['minmsgimplev']].'
-h, --help
Show this help text and exit.
@ -117,65 +123,64 @@ TIME SPECIFICATION
for ($i=1; $i<$argc; $i++) {
if ($argv[$i]=='-s' || $argv[$i]=='--startinst') {
if ($i+1>=$argc)
mexit('Error: option «'.$argv[$i].'» has to be followed by a domain name (use «-h» for more info).'.N,1);
mexit(3,'option «'.$argv[$i].'» has to be followed by a domain name (use «-h» for more info).'.N,1,false);
$i++;
$opts['startinst']=$argv[$i];
} elseif ($argv[$i]=='-d' || $argv[$i]=='--deadline') {
if ($i+1>=$argc || ($time=parsetime($argv[$i+1]))===false)
mexit('Error: option «'.$argv[$i].'» requires a valid time specification as an argument (use «-h» to read help).'.N,1);
mexit(3,'option «'.$argv[$i].'» requires a valid time specification as an argument (use «-h» to read help).'.N,1,false);
$i++;
$opts['deadline']=$time;
} elseif ($argv[$i]=='-p' || $argv[$i]=='--peersfp') {
if ($i+1>=$argc)
mexit('Error: option «'.$argv[$i].'» has to be followed by a files path (use «-h» for more info).'.N,1);
mexit(3,'option «'.$argv[$i].'» has to be followed by a files path (use «-h» for more info).'.N,1,false);
$i++;
$opts['peersfp']=$argv[$i];
} elseif ($argv[$i]=='-a' || $argv[$i]=='--apeersfp') {
if ($i+1>=$argc)
mexit('Error: option «'.$argv[$i].'» has to be followed by a files path (use «-h» for more info).'.N,1);
mexit(3,'option «'.$argv[$i].'» has to be followed by a files path (use «-h» for more info).'.N,1,false);
$i++;
$opts['apeersfp']=$argv[$i];
} elseif ($argv[$i]=='-c' || $argv[$i]=='--cpeersfp') {
if ($i+1>=$argc)
mexit('Error: option «'.$argv[$i].'» has to be followed by a files path (use «-h» for more info).'.N,1);
mexit(3,'option «'.$argv[$i].'» has to be followed by a files path (use «-h» for more info).'.N,1,false);
$i++;
$opts['cpeersfp']=$argv[$i];
} elseif ($argv[$i]=='-I' || $argv[$i]=='--ignorelock') {
$opts['ignorelock']=true;
} elseif ($argv[$i]=='-e' || $argv[$i]=='--excludefp') {
if ($i+1>=$argc)
mexit('Error: option «'.$argv[$i].'» has to be followed by a files path (use «-h» for more info).'.N,1);
mexit(3,'option «'.$argv[$i].'» has to be followed by a files path (use «-h» for more info).'.N,1,false);
$i++;
$opts['excludefp']=$argv[$i];
} elseif ($argv[$i]=='-t' || $argv[$i]=='--timeout') {
if ($i+1>=$argc || ($time=parsetime($argv[$i+1]))===false)
mexit('Error: option «'.$argv[$i].'» requires a valid time specification as an argument (use «-h» to read help).'.N,1);
mexit(3,'option «'.$argv[$i].'» requires a valid time specification as an argument (use «-h» to read help).'.N,1,false);
$i++;
$opts['timeout']=$time;
} elseif ($argv[$i]=='-T' || $argv[$i]=='--curltimeout') {
if ($i+1>=$argc || ($time=parsetime($argv[$i+1]))===false)
mexit('Error: option «'.$argv[$i].'» requires a valid time specification as an argument (use «-h» to read help).'.N,1);
mexit(3,'option «'.$argv[$i].'» requires a valid time specification as an argument (use «-h» to read help).'.N,1,false);
$i++;
$opts['curltimeout']=$time;
} elseif ($argv[$i]=='-E' || $argv[$i]=='--excludedead') {
$opts['excludedead']=true;
} elseif ($argv[$i]=='-l' || $argv[$i]=='--loop') {
$opts['loop']=true;
} elseif ($argv[$i]=='-v' || $argv[$i]=='--verbose') {
$opts['verbose']=true;
} elseif ($argv[$i]=='-m' || $argv[$i]=='--minmsgimplev') {
if ($i+1>=$argc || !in_array(strtolower($argv[$i+1]),$imsgimplevs))
mexit(3,'option «'.$argv[$i].'» requires a valid “message importance level” value as an argument (use «-h» to read help).'.N,1,false);
$i++;
$opts['minmsgimplev']=array_search(strtolower($argv[$i]),$imsgimplevs);
} elseif ($argv[$i]=='-h' || $argv[$i]=='--help') {
mexit($help,0);
echo($help);
exit(0);
} else {
mexit('Error: dont know how to interpret «'.$argv[$i].'» (use «-h» to read the help text).'.N,1);
mexit(3,'dont know how to interpret «'.$argv[$i].'» (use «-h» to read the help text).'.N,1,false);
}
}
$lockfp=__DIR__.'/'.BNAME.'.lock';
if (is_file($lockfp) && !$opts['ignorelock']) {
gecho('Error: lockfile exists: it seems the program is already running; if youre sure its not, you can use «-I» to force execution.'.N,false,true);
exit(1);
}
if (@touch($lockfp)===false) mexit('Error: could not create lockfile «'.$lockfp.'».'.N,false,true);
if (is_file($lockfp) && !$opts['ignorelock']) mexit(3,'lockfile exists: it seems the program is already running; if youre sure its not, you can use «-I» to force execution.'.N,1,false);
if (@touch($lockfp)===false) mexit(3,'could not create lockfile «'.$lockfp.'».'.N,1,false);
//declare(ticks=1);
pcntl_async_signals(true);
@ -183,16 +188,16 @@ pcntl_signal(SIGTERM,'sighandler');// Termination ('kill' was called)
pcntl_signal(SIGHUP,'sighandler');// Terminal log-out
pcntl_signal(SIGINT,'sighandler');// Interrupted (Ctrl-C is pressed)
$iniarr=@parse_ini_file($opts['inifp'])
or mexit('Error: couldnt open «'.$opts['inifp'].'».'.N,1);
$iniarr=@parse_ini_file($opts['inifp']);
if ($iniarr===false) mexit(3,'couldnt open «'.$opts['inifp'].'».'.N,1,true);
try { $link=@mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket']); }
catch (Exception $error) { mexit('Error: couldnt connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true); }
catch (Exception $error) { mexit(3,'couldnt connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true); }
// for php versions < 8
if ($link===false) mexit('Error: couldnt connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true);
if ($link===false) mexit(3,'couldnt connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true);
try { $res=mysqli_set_charset($link,'utf8mb4'); }
catch (Exception $error) { mexit('Error: couldnt set «utf8mb4» charset for MySQL: '.mysqli_error($link).' ('.mysqli_errno($link).'.'.N,1,true); }
catch (Exception $error) { mexit(3,'couldnt set «utf8mb4» charset for MySQL: '.mysqli_error($link).' ('.mysqli_errno($link).'.'.N,1,true); }
// for php versions < 8
if ($res===false) mexit('Error: couldnt set «utf8mb4» charset for MySQL: '.mysqli_error($link).' ('.mysqli_errno($link).').'.N,1,true);
if ($res===false) mexit(3,'couldnt set «utf8mb4» charset for MySQL: '.mysqli_error($link).' ('.mysqli_errno($link).').'.N,1,true);
$deadinsts=[];
if ($opts['excludedead']) {
@ -205,7 +210,7 @@ if ($opts['excludedead']) {
if (!in_array($row['Hostname'],$deadinsts))
$deadinsts[]=$row['URI'];
unset($res,$row);
gecho('Loaded list of dead instances ('.count($deadinsts).').'.N,true,false);
lecho(1,'loaded list of dead instances ('.count($deadinsts).').'.N);
}
$insts=[];
@ -214,39 +219,31 @@ $ainsts=[];
$exarr=[];
$notifs=[];
$cloop=0;
$peersf=@fopen($opts['peersfp'],'w');
if ($peersf===false) mexit(3,'couldnt open «'.$opts['peersfp'].'» in write mode.'.N,1,true);
$apeersf=@fopen($opts['apeersfp'],'w');
if ($apeersf===false) mexit(3,'couldnt open «'.$opts['apeersfp'].'» in write mode.'.N,1,true);
$cpeersf=@fopen($opts['cpeersfp'],'w');
if ($cpeersf===false) mexit(3,'couldnt open «'.$opts['cpeersfp'].'» in write mode.'.N,1,true);
do {
$peersf=@fopen($opts['peersfp'],'w');
if ($peersf===false) mexit('Error: couldnt open «'.$opts['peersfp'].'» in write mode.'.N,1);
$apeersf=@fopen($opts['apeersfp'],'w');
if ($apeersf===false) mexit('Error: couldnt open «'.$opts['apeersfp'].'» in write mode.'.N,1);
$cpeersf=@fopen($opts['cpeersfp'],'w');
if ($cpeersf===false) mexit('Error: couldnt open «'.$opts['cpeersfp'].'» in write mode.'.N,1);
$cloop++;
$maxround=1;
$newc=0;
$tini=time();
// go
crawl([$opts['startinst']],1);
gecho('Done crawling! :-)'.N,true,false);
$now=time();
gecho('Crawl started on '.date('Y-m-d H:i:s',$tini).' and ended on '.date('Y-m-d H:i:s',$now).'.'.N,true,false);
gecho(count($ainsts).' URIs checked in '.ght($now-$tini).', '.$maxround.' rounds; '.count($insts).' responded; found '.$newc.' new instances; max. memory usage: '.ghs(memory_get_peak_usage(true)).'.'.N,true,false);
gecho('Loop(s): '.$cloop.N,true,false);
sleep(1);
fclose($peersf);
fclose($cpeersf);
fclose($apeersf);
sortcheckandsave($insts,'list of responding instances',$opts['peersfp']);
sortcheckandsave($cinsts,'list of checked instances',$opts['cpeersfp']);
sortcheckandsave($ainsts,'list of all instances',$opts['apeersfp']);
$insts=[];
$cinsts=[];
$ainsts=[];
} while ($opts['loop']);
$maxround=1;
$newc=0;
$tini=time();
// go
crawl([$opts['startinst']],1);
lecho(1,'done crawling! :-)'.N);
$now=time();
lecho(1,'crawl started on '.date('Y-m-d H:i:s',$tini).' and ended on '.date('Y-m-d H:i:s',$now).'.'.N);
lecho(1,count($ainsts).' URIs checked in '.ght($now-$tini).', '.$maxround.' rounds; '.count($insts).' responded; found '.$newc.' new instances; max. memory usage: '.ghs(memory_get_peak_usage(true)).'.'.N);
fclose($peersf);
fclose($cpeersf);
fclose($apeersf);
sortcheckandsave($insts,'list of responding instances',$opts['peersfp']);
sortcheckandsave($cinsts,'list of checked instances',$opts['cpeersfp']);
sortcheckandsave($ainsts,'list of all instances',$opts['apeersfp']);
mysqli_close($link);
unlink($lockfp);
lecho(1,'done :-)'.N);
exit(0);
@ -254,7 +251,7 @@ exit(0);
function crawl($list,$id) {
global $insts, $deadinsts, $cinsts, $ainsts, $tini, $opts, $peersf, $cpeersf, $apeersf, $maxround, $newc, $link;
gecho('###### START OF ROUND '.$id.' ######'.N,true,false);
lecho(1,'###### START OF ROUND '.$id.' ######'.N);
$nlist=[];
$c=count($list);
$i=0;
@ -267,30 +264,30 @@ function crawl($list,$id) {
$i++;
$now=time();
$rtela=$now-$rtini;
gecho('Working on «'.$inst.'»: round '.$id.', '.$i.'/'.$c.'; TET: '.ght($now-$tini,null,0).'; ETR of this round: '.ght($rtela/$i*$c-$rtela,null,0).'; using '.ghs(memory_get_usage(true)).' mem. (peak: '.ghs(memory_get_peak_usage(true)).'); '.count($insts).' discovered instances; '.count($nlist).' instances in next round list; '.$newc.' new instances found.'.N,true,false);
lecho(1,'working on «'.$inst.'»: round '.$id.', '.$i.'/'.$c.'; TET: '.ght($now-$tini,null,0).'; ETR of this round: '.ght($rtela/$i*$c-$rtela,null,0).'; using '.ghs(memory_get_usage(true)).' mem. (peak: '.ghs(memory_get_peak_usage(true)).'); '.count($insts).' discovered instances; '.count($nlist).' instances in next round list; '.$newc.' new instances found.'.N);
waituntilonline();
updexarr();
gecho('Trying to load «'.$inst.s peers...'.N,true,false);
lecho(0,'trying to load «'.$inst.s peers...'.N);
$peers=gurl('https://'.$inst.'/api/v1/instance/peers',$opts['timeout'],$opts['curltimeout']);
$cinsts[]=$inst;// don't need to check if in_array
fwrite($cpeersf,$inst.N);
$responded=0;
if ($peers['cont']===false) {
gecho('Error loading «'.$inst.s peers: '.$peers['emsg'].'.'.N,true,true);
lecho(2,'could not load «'.$inst.s peers: '.$peers['emsg'].'.'.N);
} else {
$peers=@json_decode($peers['cont'],true);
if (!is_array($peers)) {
gecho('Error loading «'.$inst.s peers: got not good JSON.'.N,true,true);
lecho(2,'loading «'.$inst.s peers, got not good JSON.'.N);
} else {
gecho('Successfully loaded «'.$inst.s peers :-)'.N,true,false);
lecho(1,'successfully loaded «'.$inst.s peers :-)'.N);
$responded=1;
if (!in_array($inst,$insts)) {
gecho('Instance «'.$inst.'» responded :-)'.N,true,false);
lecho(1,'instance «'.$inst.'» responded :-)'.N);
$insts[]=$inst;
fwrite($peersf,$inst.N);
$res=myq($link,'SELECT ID FROM Instances WHERE URI=\''.myesc($link,$inst).'\'');
if (mysqli_num_rows($res)==0) {
gecho('Instance «'.$inst.'» is new :-)'.N,true,false);
lecho(1,'instance «'.$inst.'» is new :-)'.N);
myq($link,'INSERT INTO Instances SET URI=\''.myesc($link,$inst).'\', InsertTS='.time());
}
}
@ -308,9 +305,9 @@ function crawl($list,$id) {
if (in_array($peer,$nlist)) $whynot[]='it has already been added to next round list';
if ($opts['excludedead'] && in_array($peer,$deadinsts)) $whynot[]='its dead';
if (count($whynot)>0) {
if ($opts['verbose']) gecho(' Not adding peer «'.$peer.'» to next round list because '.implode(', ',$whynot).'.'.N,true,true);
lecho(0,'not adding peer «'.$peer.'» to next round list because '.implode(', ',$whynot).'.'.N);
} else {
if ($opts['verbose']) gecho(' Adding peer «'.$peer.'» to next round list :-)'.N,true,false);
lecho(1,'adding peer «'.$peer.'» to next round list :-)'.N);
$nlist[]=$peer;
}
}
@ -320,7 +317,7 @@ function crawl($list,$id) {
$res=myq($link,'SELECT * FROM Peers WHERE Hostname=\''.myesc($link,$inst).'\'');
$nrows=mysqli_num_rows($res);
if ($nrows>0) {
if ($nrows>1) gecho('«'.$inst.'» has '.$nrows.' records in Peers table! :-('.N,true,true);
if ($nrows>1) lecho(2,'«'.$inst.'» has '.$nrows.' records in Peers table! :-('.N);
$row=mysqli_fetch_assoc($res);
$instid=$row['ID'];
$dead=0;
@ -334,11 +331,11 @@ function crawl($list,$id) {
$row=mysqli_fetch_assoc($res);
if ($now-$row['Time']>$opts['deadline']) {
$dead=1;
gecho('«'.$instid.'» just died!',true,true);
lecho(2,'instance «'.$inst.'» just died!'.N);
$deadinsts[]=$inst;
}
} else {
gecho('«'.$inst.'» exists in Peers table but theres no data about it in PeersChecks!'.N,true,true);
lecho(2,'instance «'.$inst.'» exists in Peers table but theres no data about it in PeersChecks!'.N);
}
}
$query='UPDATE Peers SET Hostname=\''.myesc($link,$inst).'\', IsDead='.$dead.' WHERE ID='.$instid;
@ -354,39 +351,39 @@ function crawl($list,$id) {
crawl($nlist,$id+1);
$maxround=$id+1;
} else {
gecho('Next round list is empty.'.N,true,false);
lecho(1,'next round list is empty.'.N);
}
gecho('###### END OF ROUND '.$id.' ######'.N,true,false);
lecho(1,'###### END OF ROUND '.$id.' ######'.N);
}
function mexit($msg,$code) {
function mexit($lev,$msg,$code,$remlock) {
global $link, $peersf, $cpeersf, $apeersf, $lockfp;
if (isset($link) && $link!==false) mysqli_close($link);
if (isset($peersf) && $peersf!==false) @fclose($peersf);
if (isset($cpeersf) && $cpeersf!==false) @fclose($cpeersf);
if (isset($apeersf) && $apeersf!==false) @fclose($apeersf);
if (isset($lockfp) && is_file($lockfp)) unlink($lockfp);
if ($code==0)
echo($msg);
else
fwrite(STDERR,$msg);
if ($remlock && isset($lockfp) && is_file($lockfp)) unlink($lockfp);
lecho($lev,$msg);
exit($code);
}
function gecho($msg,$prtime,$iserr) {
if ($prtime)
$msg=microdate().' '.$msg;
if ($iserr)
fwrite(STDERR,$msg);
else
echo($msg);
function lecho($lev,$msg) {
global $opts, $msgimplevs;
$time=microdate();
$msg=$time.' '.$msgimplevs[$lev].': '.$msg;
if ($lev>=$opts['minmsgimplev']) {
if ($lev<2)
echo($msg);
else
fwrite(STDERR,$msg);
}
}
function myq(&$link,$query) {
try { $res=mysqli_query($link,$query); }
catch (Exception $error) { mexit('Error: query «'.$query.'» failed: '.$error->getMessage().' ('.$error->getCode().').'.N,2); }
catch (Exception $error) { mexit(3,'query «'.$query.'» failed: '.$error->getMessage().' ('.$error->getCode().').'.N,2,true); }
// for php versions < 8, which seem to not catch mysql exceptions
if ($res===false) mexit('Error: query «'.$query.'» failed: '.mysqli_error($link).' ('.mysqli_errno($link).').'.N,2);
if ($res===false) mexit(3,'query «'.$query.'» failed: '.mysqli_error($link).' ('.mysqli_errno($link).').'.N,2,true);
return($res);
}
@ -400,8 +397,8 @@ function sortcheckandsave(&$arr,$arrdesc,&$fp) {
$buc=count($arr);
$arr=array_unique($arr);
$auc=count($arr);
if ($buc!=$auc) gecho('Warning: '.$arrdesc.' contained duplicates, better check code ;-)'.N,true,true);
gecho('Saving ordered '.$arrdesc.' into «'.$fp.'».'.N,true,false);
if ($buc!=$auc) lecho(2,$arrdesc.' contained duplicates, better check the code ;-)'.N);
lecho(1,'saving ordered '.$arrdesc.' into «'.$fp.'».'.N);
sort($arr);
$f=@fopen($fp,'w');
if ($f!==false) {
@ -409,13 +406,13 @@ function sortcheckandsave(&$arr,$arrdesc,&$fp) {
fwrite($f,$val.N);
fclose($f);
} else {
gecho('Error: couldnt open «'.$fp.'» for writing.'.N,true,true);
lecho(2,'couldnt open «'.$fp.'» for writing.'.N);
}
}
function sighandler($signal) {
echo(N);
mexit('Interrupted (signal: '.$signal.').'.N,0);
mexit(1,'interrupted (signal: '.$signal.').'.N,0,true);
}
function isempty($val) {
@ -430,11 +427,11 @@ function waituntilonline() {
$gotoff=false;
while (false===($f=@fsockopen($url,80,$errno,$errstr,1))) {
$gotoff=true;
gecho('Warning: it seems we are offline, waiting 10 seconds before retrying...'.N,true,true);
lecho(2,'it seems we are offline, waiting for 10 seconds before retrying...'.N);
sleep(10);
}
fclose($f);
if ($gotoff) gecho('It seems we are back online! :-)'.N,true,false);
if ($gotoff) lecho(1,'it seems we are back online! :-)'.N);
}
function updexarr() {
@ -451,11 +448,11 @@ function updexarr() {
if (@preg_match($line,'foo')!==false)
$exarr[]=$line;
else
gecho('WARNING: «'.$opts['excludefp'].'», line '.$i.': «'.$line.'» is not a valid regular expression.'.N,true,true);
lecho(2,'exclude file «'.$opts['excludefp'].'» contains an invalid regular expression on line '.$i.': «'.$line.'».'.N);
}
}
} else {
gecho('WARNING: I could not open «'.$opts['excludefp'].'» for reading.'.N,true,true);
lecho(2,'could not open exclude file «'.$opts['excludefp'].'» for reading.'.N);
}
}
}