492 lines
20 KiB
PHP
Executable file
492 lines
20 KiB
PHP
Executable file
#!/usr/bin/php
|
||
<?php
|
||
|
||
/*
|
||
This program is free software: you can redistribute it and/or modify
|
||
it under the terms of the GNU General Public License as published by
|
||
the Free Software Foundation, either version 3 of the License, or
|
||
(at your option) any later version.
|
||
|
||
This program is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
GNU General Public License for more details.
|
||
|
||
You should have received a copy of the GNU General Public License
|
||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
*/
|
||
|
||
const N="\n";
|
||
define('SNAME',basename(__FILE__));
|
||
define('BNAME',preg_replace('/\.[^.]*$/','',SNAME));
|
||
|
||
require __DIR__.'/../lib/gurl.php';
|
||
require __DIR__.'/../lib/ghs.php';
|
||
require __DIR__.'/../lib/ght.php';
|
||
require __DIR__.'/../lib/grace.php';
|
||
require __DIR__.'/../lib/parsetime.php';
|
||
|
||
use function mysqli_real_escape_string as myesc;
|
||
|
||
$opts=[
|
||
'inifp'=>__DIR__.'/../conf/mustard.ini',
|
||
'startinst'=>'mastodon.social',
|
||
'gracetime'=>$gracetime,
|
||
'peersfp'=>__DIR__.'/peers.responding',
|
||
'ckpeersfp'=>__DIR__.'/peers.checked',
|
||
'excludefp'=>null,
|
||
'conntimeout'=>4,
|
||
'functimeout'=>7,
|
||
'loop'=>false,
|
||
'excludedead'=>true,
|
||
'ignorelock'=>false,
|
||
'minmsgimplev'=>1
|
||
];
|
||
|
||
$msglevs=['Debug', 'Info', 'Warning', 'Error', 'None'];
|
||
|
||
$help='SYNOPSIS
|
||
|
||
'.SNAME.' [options]
|
||
|
||
DESCRIPTION
|
||
|
||
This program tries to build a fairly complete list of fediverse instances
|
||
exposing the [instance]/api/v1/instance/peers endpoint.
|
||
|
||
OPTIONS
|
||
|
||
-s, --startinst <domain>
|
||
Defines the first instance to crawl.
|
||
DEFAULT: «'.$opts['startinst'].'»
|
||
-e, --excludefp <file>
|
||
Defines a file containing exclusion rules: one regular expression per
|
||
line (empty lines are ignored). Any instance matching any defined regex
|
||
will be ignored by the program. Changes made to this file during program
|
||
execution will be taken into account.
|
||
-i, --includedead
|
||
Include “dead” instances (see next two options descriptions to know which
|
||
instances are considered “dead”).
|
||
-g, --gracetime <time>
|
||
If an instance has not been responding for longer than this time, consider
|
||
it dead and avoid checking it. See section «TIME SPECIFICATION» below to see
|
||
how to specify time.
|
||
DEFAULT: '.ght($opts['gracetime'],null,0).'
|
||
-G, --graceline
|
||
Return the “graceline” (0:0:0 of today minus gracetime: see option above) in
|
||
unix time and local time, then exit.
|
||
-p, --peersfp <file>
|
||
Defines the file into which the ordered list of responding instances
|
||
will be saved.
|
||
DEFAULT: «'.$opts['peersfp'].'»
|
||
-c, --ckpeersfp <file>
|
||
Defines the file into which the ordered list of all checked instances will
|
||
be saved.
|
||
DEFAULT: «'.$opts['ckpeersfp'].'»
|
||
-I, --ignorelock
|
||
Normally, if its lockfile exists, the program exits with an error before
|
||
doing anything. With this option the lockfile is ignored. Please verify
|
||
that the program is not already running before using it.
|
||
-t, --conntimeout <time>
|
||
Defines the timeout in seconds for every connection attempt. See section
|
||
«TIME SPECIFICATION» below to see how to specify time.
|
||
DEFAULT: '.ght($opts['conntimeout'],null,0).'
|
||
-T, --functimeout <time>
|
||
Defines the timeout in seconds for every download. See section «TIME
|
||
SPECIFICATION» below to see how to specify time.
|
||
DEFAULT: '.ght($opts['functimeout'],null,0).'
|
||
-m, --minmsgimplev <«debug»|«info»|«warning»|«error»|«none»>
|
||
Defines the minimum “importance level” of messages to be written to the text
|
||
user interface. There are 4 “importance levels”, in this order of
|
||
importance: «debug», «info», «warning», «error». Setting this option to any
|
||
of these values will write to the text user interface all the messages with
|
||
the specified or a greater level; setting it to the special value «none»
|
||
will completely disable message writing.
|
||
DEFAULT: '.lcfirst($msglevs[$opts['minmsgimplev']]).'
|
||
-h, --help
|
||
Show this help text and exit.
|
||
|
||
TIME SPECIFICATION
|
||
|
||
An example is better than ~5148 words :-)
|
||
To specify 1 year, 6 months (made of 31 days), 2 weeks, 3 days, 5 hours,
|
||
7 minutes and 12 seconds you can use «1y,6M,2w,3d,5h,7m,12s»; but you can
|
||
also use «12s,7m,5h,3d,2w,6M,1y», or even «18M,1w,1w,2d,1d,3h,2h,7m,12s».
|
||
|
||
This program comes with ABSOLUTELY NO WARRANTY; for details see the source.
|
||
This is free software, and you are welcome to redistribute it under certain
|
||
conditions; see <http://www.gnu.org/licenses/> for details.'.N;
|
||
|
||
for ($i=1; $i<$argc; $i++) {
|
||
if ($argv[$i]=='-s' || $argv[$i]=='--startinst') {
|
||
if ($i+1>=$argc)
|
||
mexit(3,'option «'.$argv[$i].'» has to be followed by a domain name (use «-h» for more info).'.N,1,false);
|
||
$i++;
|
||
$opts['startinst']=$argv[$i];
|
||
} elseif ($argv[$i]=='-g' || $argv[$i]=='--gracetime') {
|
||
if ($i+1>=$argc || ($time=parsetime($argv[$i+1]))===false)
|
||
mexit(3,'option «'.$argv[$i].'» requires a valid time specification as an argument (use «-h» to read help).'.N,1,false);
|
||
$i++;
|
||
$opts['gracetime']=$time;
|
||
} elseif ($argv[$i]=='-p' || $argv[$i]=='--peersfp') {
|
||
if ($i+1>=$argc)
|
||
mexit(3,'option «'.$argv[$i].'» has to be followed by a file’s path (use «-h» for more info).'.N,1,false);
|
||
$i++;
|
||
$opts['peersfp']=$argv[$i];
|
||
} elseif ($argv[$i]=='-c' || $argv[$i]=='--ckpeersfp') {
|
||
if ($i+1>=$argc)
|
||
mexit(3,'option «'.$argv[$i].'» has to be followed by a file’s path (use «-h» for more info).'.N,1,false);
|
||
$i++;
|
||
$opts['ckpeersfp']=$argv[$i];
|
||
} elseif ($argv[$i]=='-I' || $argv[$i]=='--ignorelock') {
|
||
$opts['ignorelock']=true;
|
||
} elseif ($argv[$i]=='-e' || $argv[$i]=='--excludefp') {
|
||
if ($i+1>=$argc)
|
||
mexit(3,'option «'.$argv[$i].'» has to be followed by a file’s path (use «-h» for more info).'.N,1,false);
|
||
$i++;
|
||
$opts['excludefp']=$argv[$i];
|
||
} elseif ($argv[$i]=='-t' || $argv[$i]=='--conntimeout') {
|
||
if ($i+1>=$argc || ($time=parsetime($argv[$i+1]))===false)
|
||
mexit(3,'option «'.$argv[$i].'» requires a valid time specification as an argument (use «-h» to read help).'.N,1,false);
|
||
$i++;
|
||
$opts['conntimeout']=$time;
|
||
} elseif ($argv[$i]=='-T' || $argv[$i]=='--functimeout') {
|
||
if ($i+1>=$argc || ($time=parsetime($argv[$i+1]))===false)
|
||
mexit(3,'option «'.$argv[$i].'» requires a valid time specification as an argument (use «-h» to read help).'.N,1,false);
|
||
$i++;
|
||
$opts['functimeout']=$time;
|
||
} elseif ($argv[$i]=='-i' || $argv[$i]=='--includedead') {
|
||
$opts['excludedead']=false;
|
||
} elseif ($argv[$i]=='-m' || $argv[$i]=='--minmsgimplev') {
|
||
if ($i+1>=$argc || !in_array(ucfirst(strtolower($argv[$i+1])),$msglevs))
|
||
mexit(3,'option «'.$argv[$i].'» requires a valid “message importance level” value as an argument (use «-h» to read help).'.N,1,false);
|
||
$i++;
|
||
$opts['minmsgimplev']=array_search(ucfirst(strtolower($argv[$i])),$msglevs);
|
||
} elseif ($argv[$i]=='-G' || $argv[$i]=='--graceline') {
|
||
echo 'Graceline: '.$graceline.' ('.date('Y-m-d H:i:s',$graceline).').'.N;
|
||
exit(0);
|
||
} elseif ($argv[$i]=='-h' || $argv[$i]=='--help') {
|
||
echo $help;
|
||
exit(0);
|
||
} else {
|
||
mexit(3,'don’t know how to interpret «'.$argv[$i].'» (use «-h» to read the help text).'.N,1,false);
|
||
}
|
||
}
|
||
|
||
$lockfp=__DIR__.'/'.BNAME.'.lock';
|
||
if (is_file($lockfp) && !$opts['ignorelock']) mexit(3,'lockfile exists: it seems the program is already running; if you’re sure it’s not, you can use «-I» to force execution.'.N,1,false);
|
||
if (@touch($lockfp)===false) mexit(3,'could not create lockfile «'.$lockfp.'».'.N,1,false);
|
||
|
||
//declare(ticks=1);
|
||
pcntl_async_signals(true);
|
||
pcntl_signal(SIGTERM,'sighandler');// Termination ('kill' was called)
|
||
pcntl_signal(SIGHUP,'sighandler');// Terminal log-out
|
||
pcntl_signal(SIGINT,'sighandler');// Interrupted (Ctrl-C is pressed)
|
||
|
||
lecho(1,'###### Starting '.BNAME.' ######'.N);
|
||
|
||
$iniarr=@parse_ini_file($opts['inifp']);
|
||
if ($iniarr===false) mexit(3,'couldn’t open «'.$opts['inifp'].'».'.N,1,true);
|
||
try { $link=@mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket']); }
|
||
catch (Exception $error) { mexit(3,'couldn’t connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true); }
|
||
// for php versions < 8
|
||
if ($link===false) mexit(3,'couldn’t connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true);
|
||
try { $res=mysqli_set_charset($link,'utf8mb4'); }
|
||
catch (Exception $error) { mexit(3,'couldn’t set «utf8mb4» charset for MySQL: '.mysqli_error($link).' ('.mysqli_errno($link).'.'.N,1,true); }
|
||
// for php versions < 8
|
||
if ($res===false) mexit(3,'couldn’t set «utf8mb4» charset for MySQL: '.mysqli_error($link).' ('.mysqli_errno($link).').'.N,1,true);
|
||
|
||
$deadinsts=[];
|
||
if ($opts['excludedead']) {
|
||
lecho(1,'loading dead instances from “Instances” and “Peers” table (gracetime: '.ght($opts['gracetime'],null,0).'; graceline: '.date('Y-m-d H:i:s',$graceline).').'.N);
|
||
$res=myq($link,'SELECT URI FROM Instances WHERE LastOkCheckTS IS NULL OR LastOkCheckTS < '.$graceline);
|
||
lecho(1,'got '.mysqli_num_rows($res).' dead instances from “Instances” table.'.N);
|
||
while ($row=mysqli_fetch_assoc($res))
|
||
if (!in_array($row['URI'],$deadinsts))
|
||
$deadinsts[]=$row['URI'];
|
||
// $res=myq($link,'SELECT Hostname FROM Peers WHERE LastOkCheckTS IS NULL OR (LastCheckTS-LastOkCheckTS < '.$gracetime.' AND (LastCheckTS-LastOkCheckTS) / (ChecksAtLast-ChecksAtLastOk) <= '.$graceminfreq.')');
|
||
$res=myq($link,'SELECT Hostname FROM Peers WHERE LastOkCheckTS IS NULL OR LastOkCheckTS < '.$graceline);
|
||
lecho(1,'got '.mysqli_num_rows($res).' dead instances from “Peers” table.'.N);
|
||
while ($row=mysqli_fetch_assoc($res))
|
||
if (!in_array($row['Hostname'],$deadinsts))
|
||
$deadinsts[]=$row['Hostname'];
|
||
unset($res,$row);
|
||
lecho(1,'loaded list of dead instances ('.count($deadinsts).').'.N);
|
||
}
|
||
//mexit(1,'bau!'.N,0,true);
|
||
|
||
$insts=[];
|
||
$ckinsts=[];
|
||
$exarr=[];
|
||
|
||
$maxround=1;
|
||
$totnewc=0;
|
||
$tini=time();
|
||
$list=[$opts['startinst']];
|
||
// go
|
||
crawl($list,1);
|
||
lecho(1,'done crawling! :-)'.N);
|
||
$now=time();
|
||
lecho(1,'crawl started on '.date('Y-m-d H:i:s',$tini).' and ended on '.date('Y-m-d H:i:s',$now).'; took '.ght($now-$tini,null,0).' in '.$maxround.' rounds; '.count($insts).' instances responded; '.count($ckinsts).' instances were considered; '.$totnewc.' new instances were found; max. memory usage: '.ghs(memory_get_peak_usage(true)).'.'.N);
|
||
sortcheckandsave($insts,'list of responding instances',$opts['peersfp']);
|
||
sortcheckandsave($ckinsts,'list of checked instances',$opts['ckpeersfp']);
|
||
mysqli_close($link);
|
||
unlink($lockfp);
|
||
lecho(1,'###### DONE :-) ######'.N);
|
||
exit(0);
|
||
|
||
|
||
// functions
|
||
|
||
function crawl(&$list,$id) {
|
||
global $insts, $ckinsts, $deadinsts, $tini, $opts, $maxround, $totnewc, $link;
|
||
// wouldn't make sense to filter $list here: filtering already happens before adding an instance to next round list
|
||
$newc=0;
|
||
lecho(1,'###### START OF ROUND '.$id.' ######'.N);
|
||
$clist=count($list);
|
||
lecho(1,'will check '.$clist.' instance(s).'.N);
|
||
$nlist=[];
|
||
$i=0;
|
||
$rtini=time();
|
||
foreach ($list as $inst) {
|
||
$responded=false;
|
||
$i++;
|
||
lecho(1,'round '.$id.': working on instance «'.$inst.'» ('.$i.'/'.$clist.').'.N);
|
||
updexarr($id);
|
||
waituntilonline($id);
|
||
lecho(1,'round '.$id.': trying to load instance «'.$inst.'»’s peers...'.N);
|
||
$peers=gurl('https://'.$inst.'/api/v1/instance/peers',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']);
|
||
if ($peers['cont']===false) {
|
||
lecho(2,'round '.$id.': could not load instance «'.$inst.'»’s peers: '.$peers['emsg'].'.'.N);
|
||
} else {
|
||
$peers=@json_decode($peers['cont'],true);
|
||
if (!is_array($peers)) {
|
||
lecho(2,'round '.$id.': expecting instance «'.$inst.'»’s peers, got bad JSON instead.'.N);
|
||
} else {
|
||
$responded=true;
|
||
$cpeers=count($peers);
|
||
lecho(1,'round '.$id.': successfully loaded instance «'.$inst.'»’s peers ('.$cpeers.') :-)'.N);
|
||
$pi=1;
|
||
foreach ($peers as $key=>$peer) {
|
||
if ($key!=$pi-1) {
|
||
lecho(2,'round '.$id.': instance «'.$inst.'»’s peers: entity '.$pi.'/'.$cpeers.'’s key is not sequential: not checking further.'.N);
|
||
break;
|
||
} elseif (!is_string($peer)) {
|
||
lecho(2,'round '.$id.': instance «'.$inst.'»’s peers: entity '.$pi.'/'.$cpeers.' is not a string: not checking further.'.N);
|
||
break;
|
||
} else {
|
||
$whynot=[];
|
||
if (in_array($peer,$ckinsts)) $whynot[]='it has already been checked';
|
||
if (!validhostname($peer)) $whynot[]='its hostname is not valid';
|
||
if (ckexarr($peer)) $whynot[]='its hostname matches an exclusion regexp';
|
||
if (in_array($peer,$list)) $whynot[]='it is already present in current list';
|
||
if (in_array($peer,$nlist)) $whynot[]='it is already present in next round list';
|
||
if ($opts['excludedead'] && in_array($peer,$deadinsts)) $whynot[]='it’s dead';
|
||
if (count($whynot)>0) {
|
||
lecho(0,'round '.$id.': instance «'.$inst.'»: not adding peer «'.$peer.'» ('.$pi.'/'.$cpeers.') to next round list: '.implode(', ',$whynot).'.'.N);
|
||
} else {
|
||
lecho(1,'round '.$id.': instance «'.$inst.'»: adding peer «'.$peer.'» ('.$pi.'/'.$cpeers.') to next round list :-)'.N);
|
||
$nlist[]=$peer;
|
||
}
|
||
}
|
||
$pi++;
|
||
}
|
||
}
|
||
}
|
||
if (!$responded) {
|
||
lecho(1,'round '.$id.': instance «'.$inst.'» didn’t respond at its “peers” endpoint; trying to load its info from “instance” endpoint...'.N);
|
||
$instinfo=gurl('https://'.$inst.'/api/v1/instance',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']);
|
||
if ($instinfo['cont']===false) {
|
||
lecho(2,'round '.$id.': could not load instance «'.$inst.'»’s info: '.$instinfo['emsg'].'.'.N);
|
||
} else {
|
||
$instinfo=@json_decode($instinfo['cont'],true);
|
||
if (is_array($instinfo))
|
||
$responded=true;
|
||
else
|
||
lecho(2,'round '.$id.': expecting instance «'.$inst.'»’s info, got bad JSON instead.'.N);
|
||
}
|
||
}
|
||
$now=time();
|
||
if ($responded) {
|
||
lecho(1,'round '.$id.': instance «'.$inst.'» responded :-)'.N);
|
||
$insts[]=$inst;
|
||
$res=myq($link,'SELECT ID FROM Instances WHERE URI=\''.myesc($link,$inst).'\'');
|
||
$cres=mysqli_num_rows($res);
|
||
if ($cres<1) {
|
||
lecho(1,'round '.$id.': instance «'.$inst.'» is new to “Instances” table, adding it :-)'.N);
|
||
myq($link,'INSERT INTO Instances SET URI=\''.myesc($link,$inst).'\', InsertTS='.$now);
|
||
$totnewc++;
|
||
$newc++;
|
||
} elseif ($cres>1) {
|
||
lecho(2,'round '.$id.': instance «'.$inst.'» has '.$cres.' records in “Instances” table! :-('.N);
|
||
} else {
|
||
lecho(1,'round '.$id.': instance «'.$inst.'» is already present in “Instances” table.'.N);
|
||
}
|
||
} else {
|
||
lecho(1,'round '.$id.': instance «'.$inst.'» didn’t respond :-('.N);
|
||
}
|
||
$res=myq($link,'SELECT * FROM Peers WHERE Hostname=\''.myesc($link,$inst).'\'');
|
||
$cres=mysqli_num_rows($res);
|
||
if ($cres<1) {
|
||
lecho(1,'round '.$id.': instance «'.$inst.'» is new to “Peers” table, adding it :-)'.N);
|
||
$query='INSERT INTO Peers SET Hostname=\''.myesc($link,$inst).'\', FirstCheckTS='.$now;
|
||
if ($responded) $query.=', LastOkCheckTS='.$now;
|
||
myq($link,$query);
|
||
} elseif ($cres>0) {
|
||
if ($cres>1) lecho(2,'round '.$id.': instance «'.$inst.'» has '.$cres.' records in “Peers” table! :-('.N);
|
||
if ($responded) {
|
||
lecho(1,'round '.$id.': instance «'.$inst.'» is already present in “Peers” table, but it responded: updating its record’s “LastOkCheckTS” value...'.N);
|
||
$row=mysqli_fetch_assoc($res);
|
||
myq($link,'UPDATE Peers SET LastOkCheckTS='.$now.' WHERE ID='.$row['ID']);
|
||
}
|
||
}
|
||
$ckinsts[]=$inst;
|
||
$now=time();
|
||
$rtela=$now-$rtini;
|
||
lecho(1,'round '.$id.': finished working on instance «'.$inst.'» ('.$i.'/'.$clist.'); RoundElapsedTime: '.ght($rtela,null,0).'; RoundEstimatedTimeRemaining: '.ght($rtela/$i*$clist-$rtela,null,0).'; RoundNewInsts: '.$newc.'; NextRoundInsts: '.count($nlist).'; TotElapsedTime: '.ght($now-$tini,null,0).'; TotConsideredInsts: '.count($ckinsts).'; TotRespondingInsts: '.count($insts).'; TotNewInsts: '.$totnewc.'; using '.ghs(memory_get_usage(true)).' mem. (peak: '.ghs(memory_get_peak_usage(true)).').'.N);
|
||
}
|
||
unset($list);
|
||
$now=time();
|
||
$rtela=$now-$rtini;
|
||
$cnlist=count($nlist);
|
||
lecho(1,'END OF ROUND STATS: RoundCheckedInsts: '.$clist.'; RoundElapsedTime: '.ght($rtela,null,0).'; RoundNewInsts: '.$newc.'; NextRoundInsts: '.$cnlist.'; TotElapsedTime: '.ght($now-$tini,null,0).'; TotConsideredInsts: '.count($ckinsts).'; TotRespondingInsts: '.count($insts).'; TotNewInsts: '.$totnewc.'; using '.ghs(memory_get_usage(true)).' mem. (peak: '.ghs(memory_get_peak_usage(true)).').'.N);
|
||
if ($cnlist<1) lecho(1,'next round list is empty.'.N);
|
||
lecho(1,'###### END OF ROUND '.$id.' ######'.N);
|
||
if ($cnlist>0) {
|
||
crawl($nlist,$id+1);
|
||
if ($id+1>$maxround) $maxround=$id+1;
|
||
}
|
||
}
|
||
|
||
function mexit($lev,$msg,$code,$remlock) {
|
||
global $link, $insts, $ckinsts, $lockfp, $opts;
|
||
if (isset($insts) && is_array($insts)) sortcheckandsave($insts,'list of responding instances',$opts['peersfp']);
|
||
if (isset($ckinsts) && is_array($ckinsts)) sortcheckandsave($ckinsts,'list of checked instances',$opts['ckpeersfp']);
|
||
if ($remlock && isset($lockfp) && is_file($lockfp)) unlink($lockfp);
|
||
lecho($lev,$msg);
|
||
exit($code);
|
||
}
|
||
|
||
function lecho($lev,$msg) {
|
||
global $opts, $msglevs;
|
||
$time=microdate();
|
||
$msg=$time.' '.$msglevs[$lev].': '.$msg;
|
||
if ($lev>=$opts['minmsgimplev']) {
|
||
if ($lev<2)
|
||
echo $msg;
|
||
else
|
||
fwrite(STDERR,$msg);
|
||
}
|
||
}
|
||
|
||
function myq(&$link,$query) {
|
||
try { $res=mysqli_query($link,$query); }
|
||
catch (Exception $error) { mexit(3,'query «'.$query.'» failed: '.$error->getMessage().' ('.$error->getCode().').'.N,2,true); }
|
||
// for php versions < 8, which seem to not catch mysql exceptions
|
||
if ($res===false) mexit(3,'query «'.$query.'» failed: '.mysqli_error($link).' ('.mysqli_errno($link).').'.N,2,true);
|
||
return($res);
|
||
}
|
||
|
||
function microdate($time=null) {
|
||
if (is_null($time)) $time=microtime(false);
|
||
$time=explode(' ',$time);
|
||
return(date('Y-m-d H:i:s',$time[1]).'.'.substr($time[0],2,-2));
|
||
}
|
||
|
||
function sortcheckandsave(&$arr,$arrdesc,&$fp) {
|
||
$buc=count($arr);
|
||
$arr=array_unique($arr);
|
||
$auc=count($arr);
|
||
if ($buc!=$auc) lecho(2,$arrdesc.' contained duplicates, better check the code ;-)'.N);
|
||
lecho(1,'saving ordered '.$arrdesc.' into «'.$fp.'».'.N);
|
||
sort($arr);
|
||
$f=@fopen($fp,'w');
|
||
if ($f!==false) {
|
||
foreach ($arr as $val)
|
||
fwrite($f,$val.N);
|
||
fclose($f);
|
||
} else {
|
||
lecho(2,'couldn’t open «'.$fp.'» for writing.'.N);
|
||
}
|
||
}
|
||
|
||
function sighandler($signal) {
|
||
echo N;
|
||
mexit(1,'interrupted (signal: '.$signal.').'.N,0,true);
|
||
}
|
||
|
||
function isempty($val) {
|
||
if (preg_match('/^\s*$/',$val)===1)
|
||
return(true);
|
||
else
|
||
return(false);
|
||
}
|
||
|
||
function waituntilonline($roundid) {
|
||
$url='www.google.com';
|
||
$gotoff=false;
|
||
while (false===($f=@fsockopen($url,80,$errno,$errstr,1))) {
|
||
$gotoff=true;
|
||
lecho(2,'round '.$roundid.': it seems we are offline, waiting 10 seconds before retrying...'.N);
|
||
sleep(10);
|
||
}
|
||
fclose($f);
|
||
if ($gotoff) lecho(1,'round '.$roundid.': it seems we are back online! :-)'.N);
|
||
}
|
||
|
||
function updexarr($roundid) {
|
||
global $exarr, $opts;
|
||
if (!is_null($opts['excludefp'])) {
|
||
$f=@fopen($opts['excludefp'],'r');
|
||
if ($f!==false) {
|
||
$i=0;
|
||
$exarr=[];
|
||
while (!feof($f)) {
|
||
$i++;
|
||
$line=trim(fgets($f));
|
||
if (!isempty($line)) {
|
||
if (@preg_match($line,'foo')!==false)
|
||
$exarr[]=$line;
|
||
else
|
||
lecho(2,'round '.$roundid.': exclude file «'.$opts['excludefp'].'» contains an invalid regular expression on line '.$i.': «'.$line.'».'.N);
|
||
}
|
||
}
|
||
} else {
|
||
lecho(2,'round '.$roundid.': could not open exclude file «'.$opts['excludefp'].'» for reading.'.N);
|
||
}
|
||
}
|
||
}
|
||
|
||
function ckexarr($inst) {
|
||
global $exarr;
|
||
foreach ($exarr as $re)
|
||
if (preg_match($re,$inst)===1) return(true);
|
||
return(false);
|
||
}
|
||
|
||
function ismultibyte($s) {
|
||
preg_replace('/./u','.',$s,-1,$c);
|
||
(strlen($s)>$c) ? $r=true : $r=false;
|
||
return($r);
|
||
}
|
||
|
||
function validhostname($hostname) {
|
||
//$hostname=preg_replace('#/.*#','',$hostname);
|
||
//$hostname=preg_replace('#:[0-9]+$#','',$hostname);
|
||
if (ismultibyte($hostname)) $hostname=idn_to_ascii($hostname,IDNA_DEFAULT,INTL_IDNA_VARIANT_UTS46);
|
||
//echo($hostname.N);
|
||
if (strlen($hostname)>253) return(false);
|
||
$labels=explode('.',$hostname);
|
||
foreach($labels as $label) {
|
||
$len=strlen($label);
|
||
if ($len<1 || $len>63) return(false);
|
||
if (preg_match('#^-#',$label)==1) return(false);
|
||
if (preg_match('#-$#',$label)==1) return(false);
|
||
//if (preg_match('#--#',$label)==1) return(false);
|
||
if (preg_match('#^[a-zA-Z0-9-]+$#',$label)!==1) return(false);
|
||
}
|
||
return(true);
|
||
}
|
||
//$url='www.team.starschlep.com/'; if (validhostname($url)) echo('OK: '.$url.N); else echo('KO: '.$url.N); die();
|
||
|
||
?>
|