Removed “restore” option: could work, but it’s not very useful and would require a big hassle; added loops and new found instances counters; made sighandler use mexit

This commit is contained in:
pezcurrel 2022-12-25 09:24:23 +01:00
parent d6b77b0e29
commit c0802de828

View file

@ -33,7 +33,6 @@ $opts=[
'peersfp'=>__DIR__.'/peers',
'apeersfp'=>__DIR__.'/peers.all',
'cpeersfp'=>__DIR__.'/peers.checked',
'restore'=>false,
'excludefp'=>null,
'timeout'=>8,
'curltimeout'=>15,
@ -73,11 +72,6 @@ OPTIONS
Normally, if its lockfile exists, the program exits with an error before
doing anything. With this option the lockfile is ignored. Please verify
that the program is not already running before using it.
-r, --restore
>>> Currently not working, causes script to just exit with an error message.
If peers files «peers», «peers.all», «peers.checked» exist on programs
start they will be loaded, thus allowing to restore an interrupted previous
crawling session. This option is mutually exclusive with the «loop» option.
-e, --excludefp <file>
Defines a file containing exclusion rules: one regular expression per
line (empty lines are ignored). Any instance matching any defined regex
@ -88,7 +82,7 @@ OPTIONS
-l, --loop
Normally the script will exit after completing a crawl; if this option
is set, it will restart crawling until it receives a SIGTERM, SIGHUP
or SIGINT. This option is mutually exclusive with the «restore» option.
or SIGINT.
-t, --timeout <seconds>
Defines the timeout in seconds for every connection attempt.
DEFAULT: «'.$opts['timeout'].'»
@ -125,8 +119,6 @@ for ($i=1; $i<$argc; $i++) {
mexit('Error: option «'.$argv[$i].'» has to be followed by a files path (use «-h» for more info).'.N,1);
$i++;
$opts['cpeersfp']=$argv[$i];
} elseif ($argv[$i]=='-r' || $argv[$i]=='--restore') {
$opts['restore']=true;
} elseif ($argv[$i]=='-I' || $argv[$i]=='--ignorelock') {
$opts['ignorelock']=true;
} elseif ($argv[$i]=='-e' || $argv[$i]=='--excludefp') {
@ -157,10 +149,6 @@ for ($i=1; $i<$argc; $i++) {
}
}
if ($opts['restore']) mexit('Error: “restore” options code has to be finished, it currently doesnt work; exiting.'.N,1);
if ($opts['loop'] && $opts['restore']) mexit('Error: “loop” and “restore” options are mutually exclusive (use «-h» to read the help text).'.N,1);
$lockfp=__DIR__.'/'.BNAME.'.lock';
if (is_file($lockfp) && !$opts['ignorelock']) {
gecho('Error: lockfile exists: it seems the program is already running; if youre sure its not, you can use «-I» to force execution.'.N,false,true);
@ -205,37 +193,27 @@ $ainsts=[];
$exarr=[];
$notifs=[];
$mode=['mode'=>'w','desc'=>'write'];
if ($opts['restore']) {
$insts=@file($opts['peersfp'],FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
if ($insts===false) mexit('Error: couldnt open «'.$opts['peersfp'].'» for reading.'.N,1);
$cinsts=@file($opts['cpeersfp'],FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
if ($cinsts===false) mexit('Error: couldnt open «'.$opts['cpeersfp'].'» for reading.'.N,1);
$ainsts=@file($opts['apeersfp'],FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
if ($ainsts===false) mexit('Error: couldnt open «'.$opts['apeersfp'].'» for reading.'.N,1);
$mode=['mode'=>'a','desc'=>'append'];
gecho('Succesfully restored previous session :-)'.N,true,false);
}
$cloop=0;
do {
$peersf=@fopen($opts['peersfp'],$mode['mode']);
if ($peersf===false) mexit('Error: couldnt open «'.$opts['peersfp'].'» in '.$mode['desc'].' mode.'.N,1);
$apeersf=@fopen($opts['apeersfp'],$mode['mode']);
if ($apeersf===false) mexit('Error: couldnt open «'.$opts['apeersfp'].'» in '.$mode['desc'].' mode.'.N,1);
$cpeersf=@fopen($opts['cpeersfp'],$mode['mode']);
if ($cpeersf===false) mexit('Error: couldnt open «'.$opts['cpeersfp'].'» in '.$mode['desc'].' mode.'.N,1);
$peersf=@fopen($opts['peersfp'],'w');
if ($peersf===false) mexit('Error: couldnt open «'.$opts['peersfp'].'» in write mode.'.N,1);
$apeersf=@fopen($opts['apeersfp'],'w');
if ($apeersf===false) mexit('Error: couldnt open «'.$opts['apeersfp'].'» in write mode.'.N,1);
$cpeersf=@fopen($opts['cpeersfp'],'w');
if ($cpeersf===false) mexit('Error: couldnt open «'.$opts['cpeersfp'].'» in write mode.'.N,1);
$cloop++;
$maxround=1;
$newc=0;
$tini=time();
// go
if ($opts['restore'])
crawl($insts,1);
else
crawl([$opts['startinst']],1);
crawl([$opts['startinst']],1);
gecho('Done crawling! :-)'.N,true,false);
$now=time();
gecho('Crawl started on '.date('Y-m-d H:i:s',$tini).' and ended on '.date('Y-m-d H:i:s',$now).'.'.N,true,false);
gecho(count($ainsts).' URIs checked in '.ght($now-$tini).' ('.$maxround.' rounds); '.count($insts).' responded. Max. memory usage: '.ghs(memory_get_peak_usage(true)).'.'.N,true,false);
gecho(count($ainsts).' URIs checked in '.ght($now-$tini).', '.$maxround.' rounds; '.count($insts).' responded; found '.$newc.' new instances; max. memory usage: '.ghs(memory_get_peak_usage(true)).'.'.N,true,false);
gecho('Loop(s): '.$cloop.N,true,false);
sleep(1);
fclose($peersf);
fclose($cpeersf);
fclose($apeersf);
@ -245,7 +223,6 @@ do {
$insts=[];
$cinsts=[];
$ainsts=[];
$mode=['mode'=>'w','desc'=>'write'];
} while ($opts['loop']);
mysqli_close($link);
unlink($lockfp);
@ -255,7 +232,7 @@ exit(0);
// functions
function crawl($list,$id) {
global $insts, $deadinsts, $cinsts, $ainsts, $tini, $opts, $peersf, $cpeersf, $apeersf, $maxround, $link;
global $insts, $deadinsts, $cinsts, $ainsts, $tini, $opts, $peersf, $cpeersf, $apeersf, $maxround, $newc, $link;
gecho('###### START OF ROUND '.$id.' ######'.N,true,false);
$nlist=[];
$c=count($list);
@ -269,7 +246,7 @@ function crawl($list,$id) {
$i++;
$now=time();
$rtela=$now-$rtini;
gecho('Working on «'.$inst.'»: round '.$id.', '.$i.'/'.$c.'; TET: '.ght($now-$tini,null,0).'; ETR of this round: '.ght($rtela/$i*$c-$rtela,null,0).'; using '.ghs(memory_get_usage(true)).' mem. (peak: '.ghs(memory_get_peak_usage(true)).'); '.count($insts).' discovered instances; '.count($nlist).' instances in next round list.'.N,true,false);
gecho('Working on «'.$inst.'»: round '.$id.', '.$i.'/'.$c.'; TET: '.ght($now-$tini,null,0).'; ETR of this round: '.ght($rtela/$i*$c-$rtela,null,0).'; using '.ghs(memory_get_usage(true)).' mem. (peak: '.ghs(memory_get_peak_usage(true)).'); '.count($insts).' discovered instances; '.count($nlist).' instances in next round list; '.$newc.' new instances found.'.N,true,false);
waituntilonline();
updexarr();
gecho('Trying to load «'.$inst.s peers...'.N,true,false);
@ -416,13 +393,8 @@ function sortcheckandsave(&$arr,$arrdesc,&$fp) {
}
function sighandler($signal) {
global $peersf, $cpeersf, $apeersf, $lockfp;
if (isset($peersf) && $peersf!==false) @fclose($peersf);
if (isset($cpeersf) && $cpeersf!==false) @fclose($cpeersf);
if (isset($apeersf) && $apeersf!==false) @fclose($apeersf);
if (isset($lockfp) && is_file($lockfp)) unlink($lockfp);
echo(N.'Interrupted (signal: '.$signal.').'.N);
exit(0);
echo(N);
mexit('Interrupted (signal: '.$signal.').'.N,0);
}
function isempty($val) {