Removed “restore” option: could work, but it’s not very useful and would require a big hassle; added loops and new found instances counters; made sighandler use mexit
This commit is contained in:
parent
d6b77b0e29
commit
c0802de828
1 changed files with 18 additions and 46 deletions
|
@ -33,7 +33,6 @@ $opts=[
|
|||
'peersfp'=>__DIR__.'/peers',
|
||||
'apeersfp'=>__DIR__.'/peers.all',
|
||||
'cpeersfp'=>__DIR__.'/peers.checked',
|
||||
'restore'=>false,
|
||||
'excludefp'=>null,
|
||||
'timeout'=>8,
|
||||
'curltimeout'=>15,
|
||||
|
@ -73,11 +72,6 @@ OPTIONS
|
|||
Normally, if its lockfile exists, the program exits with an error before
|
||||
doing anything. With this option the lockfile is ignored. Please verify
|
||||
that the program is not already running before using it.
|
||||
-r, --restore
|
||||
>>> Currently not working, causes script to just exit with an error message.
|
||||
If peers files «peers», «peers.all», «peers.checked» exist on program’s
|
||||
start they will be loaded, thus allowing to restore an interrupted previous
|
||||
crawling session. This option is mutually exclusive with the «loop» option.
|
||||
-e, --excludefp <file>
|
||||
Defines a file containing exclusion rules: one regular expression per
|
||||
line (empty lines are ignored). Any instance matching any defined regex
|
||||
|
@ -88,7 +82,7 @@ OPTIONS
|
|||
-l, --loop
|
||||
Normally the script will exit after completing a crawl; if this option
|
||||
is set, it will restart crawling until it receives a SIGTERM, SIGHUP
|
||||
or SIGINT. This option is mutually exclusive with the «restore» option.
|
||||
or SIGINT.
|
||||
-t, --timeout <seconds>
|
||||
Defines the timeout in seconds for every connection attempt.
|
||||
DEFAULT: «'.$opts['timeout'].'»
|
||||
|
@ -125,8 +119,6 @@ for ($i=1; $i<$argc; $i++) {
|
|||
mexit('Error: option «'.$argv[$i].'» has to be followed by a file’s path (use «-h» for more info).'.N,1);
|
||||
$i++;
|
||||
$opts['cpeersfp']=$argv[$i];
|
||||
} elseif ($argv[$i]=='-r' || $argv[$i]=='--restore') {
|
||||
$opts['restore']=true;
|
||||
} elseif ($argv[$i]=='-I' || $argv[$i]=='--ignorelock') {
|
||||
$opts['ignorelock']=true;
|
||||
} elseif ($argv[$i]=='-e' || $argv[$i]=='--excludefp') {
|
||||
|
@ -157,10 +149,6 @@ for ($i=1; $i<$argc; $i++) {
|
|||
}
|
||||
}
|
||||
|
||||
if ($opts['restore']) mexit('Error: “restore” option’s code has to be finished, it currently doesn’t work; exiting.'.N,1);
|
||||
|
||||
if ($opts['loop'] && $opts['restore']) mexit('Error: “loop” and “restore” options are mutually exclusive (use «-h» to read the help text).'.N,1);
|
||||
|
||||
$lockfp=__DIR__.'/'.BNAME.'.lock';
|
||||
if (is_file($lockfp) && !$opts['ignorelock']) {
|
||||
gecho('Error: lockfile exists: it seems the program is already running; if you’re sure it’s not, you can use «-I» to force execution.'.N,false,true);
|
||||
|
@ -205,37 +193,27 @@ $ainsts=[];
|
|||
$exarr=[];
|
||||
$notifs=[];
|
||||
|
||||
$mode=['mode'=>'w','desc'=>'write'];
|
||||
|
||||
if ($opts['restore']) {
|
||||
$insts=@file($opts['peersfp'],FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
|
||||
if ($insts===false) mexit('Error: couldn’t open «'.$opts['peersfp'].'» for reading.'.N,1);
|
||||
$cinsts=@file($opts['cpeersfp'],FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
|
||||
if ($cinsts===false) mexit('Error: couldn’t open «'.$opts['cpeersfp'].'» for reading.'.N,1);
|
||||
$ainsts=@file($opts['apeersfp'],FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
|
||||
if ($ainsts===false) mexit('Error: couldn’t open «'.$opts['apeersfp'].'» for reading.'.N,1);
|
||||
$mode=['mode'=>'a','desc'=>'append'];
|
||||
gecho('Succesfully restored previous session :-)'.N,true,false);
|
||||
}
|
||||
$cloop=0;
|
||||
|
||||
do {
|
||||
$peersf=@fopen($opts['peersfp'],$mode['mode']);
|
||||
if ($peersf===false) mexit('Error: couldn’t open «'.$opts['peersfp'].'» in '.$mode['desc'].' mode.'.N,1);
|
||||
$apeersf=@fopen($opts['apeersfp'],$mode['mode']);
|
||||
if ($apeersf===false) mexit('Error: couldn’t open «'.$opts['apeersfp'].'» in '.$mode['desc'].' mode.'.N,1);
|
||||
$cpeersf=@fopen($opts['cpeersfp'],$mode['mode']);
|
||||
if ($cpeersf===false) mexit('Error: couldn’t open «'.$opts['cpeersfp'].'» in '.$mode['desc'].' mode.'.N,1);
|
||||
$peersf=@fopen($opts['peersfp'],'w');
|
||||
if ($peersf===false) mexit('Error: couldn’t open «'.$opts['peersfp'].'» in write mode.'.N,1);
|
||||
$apeersf=@fopen($opts['apeersfp'],'w');
|
||||
if ($apeersf===false) mexit('Error: couldn’t open «'.$opts['apeersfp'].'» in write mode.'.N,1);
|
||||
$cpeersf=@fopen($opts['cpeersfp'],'w');
|
||||
if ($cpeersf===false) mexit('Error: couldn’t open «'.$opts['cpeersfp'].'» in write mode.'.N,1);
|
||||
$cloop++;
|
||||
$maxround=1;
|
||||
$newc=0;
|
||||
$tini=time();
|
||||
// go
|
||||
if ($opts['restore'])
|
||||
crawl($insts,1);
|
||||
else
|
||||
crawl([$opts['startinst']],1);
|
||||
crawl([$opts['startinst']],1);
|
||||
gecho('Done crawling! :-)'.N,true,false);
|
||||
$now=time();
|
||||
gecho('Crawl started on '.date('Y-m-d H:i:s',$tini).' and ended on '.date('Y-m-d H:i:s',$now).'.'.N,true,false);
|
||||
gecho(count($ainsts).' URIs checked in '.ght($now-$tini).' ('.$maxround.' rounds); '.count($insts).' responded. Max. memory usage: '.ghs(memory_get_peak_usage(true)).'.'.N,true,false);
|
||||
gecho(count($ainsts).' URIs checked in '.ght($now-$tini).', '.$maxround.' rounds; '.count($insts).' responded; found '.$newc.' new instances; max. memory usage: '.ghs(memory_get_peak_usage(true)).'.'.N,true,false);
|
||||
gecho('Loop(s): '.$cloop.N,true,false);
|
||||
sleep(1);
|
||||
fclose($peersf);
|
||||
fclose($cpeersf);
|
||||
fclose($apeersf);
|
||||
|
@ -245,7 +223,6 @@ do {
|
|||
$insts=[];
|
||||
$cinsts=[];
|
||||
$ainsts=[];
|
||||
$mode=['mode'=>'w','desc'=>'write'];
|
||||
} while ($opts['loop']);
|
||||
mysqli_close($link);
|
||||
unlink($lockfp);
|
||||
|
@ -255,7 +232,7 @@ exit(0);
|
|||
// functions
|
||||
|
||||
function crawl($list,$id) {
|
||||
global $insts, $deadinsts, $cinsts, $ainsts, $tini, $opts, $peersf, $cpeersf, $apeersf, $maxround, $link;
|
||||
global $insts, $deadinsts, $cinsts, $ainsts, $tini, $opts, $peersf, $cpeersf, $apeersf, $maxround, $newc, $link;
|
||||
gecho('###### START OF ROUND '.$id.' ######'.N,true,false);
|
||||
$nlist=[];
|
||||
$c=count($list);
|
||||
|
@ -269,7 +246,7 @@ function crawl($list,$id) {
|
|||
$i++;
|
||||
$now=time();
|
||||
$rtela=$now-$rtini;
|
||||
gecho('Working on «'.$inst.'»: round '.$id.', '.$i.'/'.$c.'; TET: '.ght($now-$tini,null,0).'; ETR of this round: '.ght($rtela/$i*$c-$rtela,null,0).'; using '.ghs(memory_get_usage(true)).' mem. (peak: '.ghs(memory_get_peak_usage(true)).'); '.count($insts).' discovered instances; '.count($nlist).' instances in next round list.'.N,true,false);
|
||||
gecho('Working on «'.$inst.'»: round '.$id.', '.$i.'/'.$c.'; TET: '.ght($now-$tini,null,0).'; ETR of this round: '.ght($rtela/$i*$c-$rtela,null,0).'; using '.ghs(memory_get_usage(true)).' mem. (peak: '.ghs(memory_get_peak_usage(true)).'); '.count($insts).' discovered instances; '.count($nlist).' instances in next round list; '.$newc.' new instances found.'.N,true,false);
|
||||
waituntilonline();
|
||||
updexarr();
|
||||
gecho('Trying to load «'.$inst.'»’s peers...'.N,true,false);
|
||||
|
@ -416,13 +393,8 @@ function sortcheckandsave(&$arr,$arrdesc,&$fp) {
|
|||
}
|
||||
|
||||
function sighandler($signal) {
|
||||
global $peersf, $cpeersf, $apeersf, $lockfp;
|
||||
if (isset($peersf) && $peersf!==false) @fclose($peersf);
|
||||
if (isset($cpeersf) && $cpeersf!==false) @fclose($cpeersf);
|
||||
if (isset($apeersf) && $apeersf!==false) @fclose($apeersf);
|
||||
if (isset($lockfp) && is_file($lockfp)) unlink($lockfp);
|
||||
echo(N.'Interrupted (signal: '.$signal.').'.N);
|
||||
exit(0);
|
||||
echo(N);
|
||||
mexit('Interrupted (signal: '.$signal.').'.N,0);
|
||||
}
|
||||
|
||||
function isempty($val) {
|
||||
|
|
Loading…
Reference in a new issue