2020-10-13 08:21:26 +02:00
#!/usr/bin/php
< ? php
/*
This program is free software : you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation , either version 3 of the License , or
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with this program . If not , see < http :// www . gnu . org / licenses />.
*/
2023-12-29 10:04:05 +01:00
const N = " \n " ;
2022-12-22 14:04:29 +01:00
define ( 'SNAME' , basename ( __FILE__ ));
define ( 'BNAME' , preg_replace ( '/\.[^.]*$/' , '' , SNAME ));
2020-10-13 08:21:26 +02:00
2023-12-26 11:17:54 +01:00
require __DIR__ . '/../lib/gurl.php' ;
require __DIR__ . '/../lib/ghs.php' ;
require __DIR__ . '/../lib/ght.php' ;
require __DIR__ . '/../lib/gracetime.php' ;
require __DIR__ . '/../lib/parsetime.php' ;
2020-10-13 08:21:26 +02:00
2022-12-22 11:32:18 +01:00
use function mysqli_real_escape_string as myesc ;
2022-12-21 22:07:05 +01:00
$opts = [
2020-10-21 15:26:31 +02:00
'inifp' => __DIR__ . '/../conf/mustard.ini' ,
2020-10-13 08:21:26 +02:00
'startinst' => 'mastodon.social' ,
2022-12-26 12:25:15 +01:00
'gracetime' => $gracetime ,
2023-12-29 10:04:05 +01:00
'peersfp' => __DIR__ . '/peers.responding' ,
'ckpeersfp' => __DIR__ . '/peers.checked' ,
2020-10-13 08:21:26 +02:00
'excludefp' => null ,
2023-12-31 20:31:41 +01:00
'conntimeout' => 3 ,
'functimeout' => 5 ,
2022-12-22 15:05:55 +01:00
'loop' => false ,
2023-01-06 17:03:41 +01:00
'excludedead' => true ,
2022-12-25 11:32:08 +01:00
'ignorelock' => false ,
'minmsgimplev' => 1
2022-12-21 22:07:05 +01:00
];
2020-10-13 08:21:26 +02:00
2023-01-06 17:03:41 +01:00
$msglevs = [ 'Debug' , 'Info' , 'Warning' , 'Error' , 'None' ];
2022-12-25 11:32:08 +01:00
2022-12-22 14:04:29 +01:00
$help = ' SYNOPSIS
'.SNAME.' [ options ]
DESCRIPTION
This program tries to build a fairly complete list of fediverse instances
exposing the [ instance ] / api / v1 / instance / peers endpoint .
OPTIONS
- s , -- startinst < domain >
Defines the first instance to crawl .
DEFAULT : « '.$opts[' startinst '].' »
2022-12-26 12:25:15 +01:00
- e , -- excludefp < file >
Defines a file containing exclusion rules : one regular expression per
line ( empty lines are ignored ) . Any instance matching any defined regex
will be ignored by the program . Changes made to this file during program
execution will be taken into account .
2023-12-29 11:05:04 +01:00
- i , -- includedead
Include “dead” instances ( see next two options descriptions to know which
instances are considered “dead” ) .
2022-12-26 12:25:15 +01:00
- g , -- gracetime < time >
2023-12-29 11:05:04 +01:00
If an instance has not been responding for longer than this time , consider
it dead and avoid checking it . See section «TIME SPECIFICATION» below to see
how to specify time .
2023-12-29 10:04:05 +01:00
DEFAULT : '.ght($opts[' gracetime '],null,0).'
2023-01-06 17:03:41 +01:00
- G , -- graceline
Return the “graceline” ( now - gracetime : see option above ) in unix time and
local time , then exit .
2022-12-22 14:04:29 +01:00
- p , -- peersfp < file >
Defines the file into which the ordered list of responding instances
will be saved .
DEFAULT : « '.$opts[' peersfp '].' »
2023-12-29 10:04:05 +01:00
- c , -- ckpeersfp < file >
2022-12-22 14:04:29 +01:00
Defines the file into which the ordered list of all checked instances will
be saved .
2023-12-29 10:04:05 +01:00
DEFAULT : « '.$opts[' ckpeersfp '].' »
2022-12-22 14:04:29 +01:00
- I , -- ignorelock
Normally , if its lockfile exists , the program exits with an error before
doing anything . With this option the lockfile is ignored . Please verify
that the program is not already running before using it .
2023-12-29 10:04:05 +01:00
- t , -- conntimeout < time >
2022-12-25 09:47:04 +01:00
Defines the timeout in seconds for every connection attempt . See section
«TIME SPECIFICATION» below to see how to specify time .
2023-12-29 10:04:05 +01:00
DEFAULT : '.ght($opts[' conntimeout '],null,0).'
- T , -- functimeout < time >
2022-12-25 09:47:04 +01:00
Defines the timeout in seconds for every download . See section «TIME
SPECIFICATION» below to see how to specify time .
2023-12-29 10:04:05 +01:00
DEFAULT : '.ght($opts[' functimeout '],null,0).'
2022-12-25 11:32:08 +01:00
- m , -- minmsgimplev < «debug» | «info» | «warning» | «error» | «none» >
Defines the minimum “importance level” of messages to be written to the text
user interface . There are 4 “importance levels” , in this order of
importance : «debug» , «info» , «warning» , «error» . Setting this option to any
of these values will write to the text user interface all the messages with
the specified or a greater level ; setting it to the special value «none»
will completely disable message writing .
2023-01-06 17:03:41 +01:00
DEFAULT : '.lcfirst($msglevs[$opts[' minmsgimplev ']]).'
2022-12-22 14:04:29 +01:00
- h , -- help
Show this help text and exit .
2022-12-25 09:47:04 +01:00
TIME SPECIFICATION
An example is better than ~ 5148 words :- )
To specify 1 year , 6 months ( made of 31 days ), 2 weeks , 3 days , 5 hours ,
7 minutes and 12 seconds you can use «1y , 6 M , 2 w , 3 d , 5 h , 7 m , 12 s» ; but you can
also use «12s , 7 m , 5 h , 3 d , 2 w , 6 M , 1 y» , or even «18M , 1 w , 1 w , 2 d , 1 d , 3 h , 2 h , 7 m , 12 s» .
2022-12-22 14:04:29 +01:00
This program comes with ABSOLUTELY NO WARRANTY ; for details see the source .
This is free software , and you are welcome to redistribute it under certain
conditions ; see < http :// www . gnu . org / licenses /> for details . ' . N ;
2020-10-13 08:21:26 +02:00
for ( $i = 1 ; $i < $argc ; $i ++ ) {
2022-12-22 14:04:29 +01:00
if ( $argv [ $i ] == '-s' || $argv [ $i ] == '--startinst' ) {
if ( $i + 1 >= $argc )
2022-12-25 11:32:08 +01:00
mexit ( 3 , 'option «' . $argv [ $i ] . '» has to be followed by a domain name (use «-h» for more info).' . N , 1 , false );
2022-12-22 14:04:29 +01:00
$i ++ ;
$opts [ 'startinst' ] = $argv [ $i ];
2022-12-26 12:25:15 +01:00
} elseif ( $argv [ $i ] == '-g' || $argv [ $i ] == '--gracetime' ) {
2022-12-25 09:47:04 +01:00
if ( $i + 1 >= $argc || ( $time = parsetime ( $argv [ $i + 1 ])) === false )
2022-12-25 11:32:08 +01:00
mexit ( 3 , 'option «' . $argv [ $i ] . '» requires a valid time specification as an argument (use «-h» to read help).' . N , 1 , false );
2022-12-25 09:47:04 +01:00
$i ++ ;
2022-12-26 12:25:15 +01:00
$opts [ 'gracetime' ] = $time ;
2022-12-22 14:04:29 +01:00
} elseif ( $argv [ $i ] == '-p' || $argv [ $i ] == '--peersfp' ) {
if ( $i + 1 >= $argc )
2022-12-25 11:32:08 +01:00
mexit ( 3 , 'option «' . $argv [ $i ] . '» has to be followed by a file’ s path (use «-h» for more info).' . N , 1 , false );
2022-12-22 14:04:29 +01:00
$i ++ ;
$opts [ 'peersfp' ] = $argv [ $i ];
2023-12-29 10:04:05 +01:00
} elseif ( $argv [ $i ] == '-c' || $argv [ $i ] == '--ckpeersfp' ) {
2022-12-22 14:04:29 +01:00
if ( $i + 1 >= $argc )
2022-12-25 11:32:08 +01:00
mexit ( 3 , 'option «' . $argv [ $i ] . '» has to be followed by a file’ s path (use «-h» for more info).' . N , 1 , false );
2022-12-22 14:04:29 +01:00
$i ++ ;
2023-12-29 10:04:05 +01:00
$opts [ 'ckpeersfp' ] = $argv [ $i ];
2022-12-22 14:04:29 +01:00
} elseif ( $argv [ $i ] == '-I' || $argv [ $i ] == '--ignorelock' ) {
$opts [ 'ignorelock' ] = true ;
} elseif ( $argv [ $i ] == '-e' || $argv [ $i ] == '--excludefp' ) {
if ( $i + 1 >= $argc )
2022-12-25 11:32:08 +01:00
mexit ( 3 , 'option «' . $argv [ $i ] . '» has to be followed by a file’ s path (use «-h» for more info).' . N , 1 , false );
2022-12-22 14:04:29 +01:00
$i ++ ;
$opts [ 'excludefp' ] = $argv [ $i ];
2023-12-29 10:04:05 +01:00
} elseif ( $argv [ $i ] == '-t' || $argv [ $i ] == '--conntimeout' ) {
2022-12-25 09:47:04 +01:00
if ( $i + 1 >= $argc || ( $time = parsetime ( $argv [ $i + 1 ])) === false )
2022-12-25 11:32:08 +01:00
mexit ( 3 , 'option «' . $argv [ $i ] . '» requires a valid time specification as an argument (use «-h» to read help).' . N , 1 , false );
2022-12-22 14:04:29 +01:00
$i ++ ;
2023-12-29 10:04:05 +01:00
$opts [ 'conntimeout' ] = $time ;
} elseif ( $argv [ $i ] == '-T' || $argv [ $i ] == '--functimeout' ) {
2022-12-25 09:47:04 +01:00
if ( $i + 1 >= $argc || ( $time = parsetime ( $argv [ $i + 1 ])) === false )
2022-12-25 11:32:08 +01:00
mexit ( 3 , 'option «' . $argv [ $i ] . '» requires a valid time specification as an argument (use «-h» to read help).' . N , 1 , false );
2022-12-22 14:24:48 +01:00
$i ++ ;
2023-12-29 10:04:05 +01:00
$opts [ 'functimeout' ] = $time ;
2023-01-06 17:03:41 +01:00
} elseif ( $argv [ $i ] == '-i' || $argv [ $i ] == '--includedead' ) {
$opts [ 'excludedead' ] = false ;
2022-12-25 11:32:08 +01:00
} elseif ( $argv [ $i ] == '-m' || $argv [ $i ] == '--minmsgimplev' ) {
2023-01-06 17:03:41 +01:00
if ( $i + 1 >= $argc || ! in_array ( ucfirst ( strtolower ( $argv [ $i + 1 ])), $msglevs ))
2022-12-25 11:32:08 +01:00
mexit ( 3 , 'option «' . $argv [ $i ] . '» requires a valid “message importance level” value as an argument (use «-h» to read help).' . N , 1 , false );
$i ++ ;
2023-01-06 17:03:41 +01:00
$opts [ 'minmsgimplev' ] = array_search ( ucfirst ( strtolower ( $argv [ $i ])), $msglevs );
} elseif ( $argv [ $i ] == '-G' || $argv [ $i ] == '--graceline' ) {
$graceline = time () - $opts [ 'gracetime' ];
2023-12-29 13:02:40 +01:00
echo 'Graceline: ' . $graceline . ' (' . date ( 'Y-m-d H:i:s' , $graceline ) . ').' . N ;
2023-01-06 17:03:41 +01:00
exit ( 0 );
2022-12-22 14:04:29 +01:00
} elseif ( $argv [ $i ] == '-h' || $argv [ $i ] == '--help' ) {
2023-12-29 13:02:40 +01:00
echo $help ;
2022-12-25 11:32:08 +01:00
exit ( 0 );
2022-12-22 14:04:29 +01:00
} else {
2022-12-25 11:32:08 +01:00
mexit ( 3 , 'don’ t know how to interpret «' . $argv [ $i ] . '» (use «-h» to read the help text).' . N , 1 , false );
2020-10-13 08:21:26 +02:00
}
}
2022-12-22 14:04:29 +01:00
$lockfp = __DIR__ . '/' . BNAME . '.lock' ;
2022-12-25 11:32:08 +01:00
if ( is_file ( $lockfp ) && ! $opts [ 'ignorelock' ]) mexit ( 3 , 'lockfile exists: it seems the program is already running; if you’ re sure it’ s not, you can use «-I» to force execution.' . N , 1 , false );
if ( @ touch ( $lockfp ) === false ) mexit ( 3 , 'could not create lockfile «' . $lockfp . '».' . N , 1 , false );
2020-10-14 08:37:41 +02:00
2022-12-21 22:07:05 +01:00
//declare(ticks=1);
pcntl_async_signals ( true );
2022-12-22 15:05:55 +01:00
pcntl_signal ( SIGTERM , 'sighandler' ); // Termination ('kill' was called)
pcntl_signal ( SIGHUP , 'sighandler' ); // Terminal log-out
pcntl_signal ( SIGINT , 'sighandler' ); // Interrupted (Ctrl-C is pressed)
2022-12-21 22:07:05 +01:00
2023-12-29 10:04:05 +01:00
lecho ( 1 , '###### Starting ' . BNAME . ' ######' . N );
2022-12-25 11:32:08 +01:00
$iniarr =@ parse_ini_file ( $opts [ 'inifp' ]);
if ( $iniarr === false ) mexit ( 3 , 'couldn’ t open «' . $opts [ 'inifp' ] . '».' . N , 1 , true );
2022-12-22 11:32:18 +01:00
try { $link =@ mysqli_connect ( $iniarr [ 'db_host' ], $iniarr [ 'db_admin_name' ], $iniarr [ 'db_admin_password' ], $iniarr [ 'db_name' ], $iniarr [ 'db_port' ], $iniarr [ 'db_socket' ]); }
2022-12-25 11:32:08 +01:00
catch ( Exception $error ) { mexit ( 3 , 'couldn’ t connect to MySQL server: ' . mysqli_connect_error () . '.' . N , 1 , true ); }
2022-12-22 11:32:18 +01:00
// for php versions < 8
2022-12-25 11:32:08 +01:00
if ( $link === false ) mexit ( 3 , 'couldn’ t connect to MySQL server: ' . mysqli_connect_error () . '.' . N , 1 , true );
2022-12-22 11:32:18 +01:00
try { $res = mysqli_set_charset ( $link , 'utf8mb4' ); }
2022-12-25 11:32:08 +01:00
catch ( Exception $error ) { mexit ( 3 , 'couldn’ t set «utf8mb4» charset for MySQL: ' . mysqli_error ( $link ) . ' (' . mysqli_errno ( $link ) . '.' . N , 1 , true ); }
2022-12-22 11:32:18 +01:00
// for php versions < 8
2022-12-25 11:32:08 +01:00
if ( $res === false ) mexit ( 3 , 'couldn’ t set «utf8mb4» charset for MySQL: ' . mysqli_error ( $link ) . ' (' . mysqli_errno ( $link ) . ').' . N , 1 , true );
2022-12-22 11:32:18 +01:00
2022-12-21 22:07:05 +01:00
$deadinsts = [];
if ( $opts [ 'excludedead' ]) {
2022-12-26 15:01:35 +01:00
$graceline = time () - $opts [ 'gracetime' ];
2023-12-29 10:04:05 +01:00
lecho ( 1 , 'loading dead instances from “Instances” and “Peers” table (gracetime: ' . ght ( $opts [ 'gracetime' ], null , 0 ) . '; graceline: ' . date ( 'Y-m-d H:i:s' , $graceline ) . ').' . N );
2023-12-31 22:57:07 +01:00
$res = myq ( $link , 'SELECT URI FROM Instances WHERE LastOkCheckTS<' . $graceline . ' OR LastOkCheckTS IS NULL' );
2023-12-29 10:04:05 +01:00
lecho ( 1 , 'got ' . mysqli_num_rows ( $res ) . ' dead instances from “Instances” table.' . N );
2022-12-21 22:07:05 +01:00
while ( $row = mysqli_fetch_assoc ( $res ))
2022-12-22 11:32:18 +01:00
if ( ! in_array ( $row [ 'URI' ], $deadinsts ))
$deadinsts [] = $row [ 'URI' ];
2023-12-31 22:57:07 +01:00
$res = myq ( $link , 'SELECT Hostname FROM Peers WHERE LastOkCheckTS<' . $graceline . ' OR LastOkCheckTS IS NULL' );
2023-12-29 10:04:05 +01:00
lecho ( 1 , 'got ' . mysqli_num_rows ( $res ) . ' dead instances from “Peers” table.' . N );
2022-12-22 11:32:18 +01:00
while ( $row = mysqli_fetch_assoc ( $res ))
if ( ! in_array ( $row [ 'Hostname' ], $deadinsts ))
2023-10-13 17:37:17 +02:00
$deadinsts [] = $row [ 'Hostname' ];
2022-12-22 11:32:18 +01:00
unset ( $res , $row );
2022-12-25 11:32:08 +01:00
lecho ( 1 , 'loaded list of dead instances (' . count ( $deadinsts ) . ').' . N );
2022-12-21 22:07:05 +01:00
}
2023-01-06 17:03:41 +01:00
//mexit(0,'bau!'.N,0,true);
2022-12-21 22:07:05 +01:00
$insts = [];
2023-12-29 10:04:05 +01:00
$ckinsts = [];
2022-12-21 22:07:05 +01:00
$exarr = [];
2022-12-25 11:32:08 +01:00
$maxround = 1 ;
2023-12-29 10:04:05 +01:00
$totnewc = 0 ;
2022-12-25 11:32:08 +01:00
$tini = time ();
2022-12-29 09:28:18 +01:00
$list = [ $opts [ 'startinst' ]];
2022-12-25 11:32:08 +01:00
// go
2022-12-29 09:28:18 +01:00
crawl ( $list , 1 );
2022-12-25 11:32:08 +01:00
lecho ( 1 , 'done crawling! :-)' . N );
$now = time ();
2023-12-29 10:04:05 +01:00
lecho ( 1 , 'crawl started on ' . date ( 'Y-m-d H:i:s' , $tini ) . ' and ended on ' . date ( 'Y-m-d H:i:s' , $now ) . '; took ' . ght ( $now - $tini , null , 0 ) . ' in ' . $maxround . ' rounds; ' . count ( $insts ) . ' instances responded; ' . count ( $ckinsts ) . ' instances were considered; ' . $totnewc . ' new instances were found; max. memory usage: ' . ghs ( memory_get_peak_usage ( true )) . '.' . N );
2022-12-25 11:32:08 +01:00
sortcheckandsave ( $insts , 'list of responding instances' , $opts [ 'peersfp' ]);
2023-12-29 10:04:05 +01:00
sortcheckandsave ( $ckinsts , 'list of checked instances' , $opts [ 'ckpeersfp' ]);
2022-12-22 11:32:18 +01:00
mysqli_close ( $link );
2022-12-22 15:05:55 +01:00
unlink ( $lockfp );
2022-12-25 11:32:08 +01:00
lecho ( 1 , 'done :-)' . N );
2022-12-21 22:07:05 +01:00
exit ( 0 );
// functions
2022-12-29 09:28:18 +01:00
function crawl ( & $list , $id ) {
2023-12-29 10:04:05 +01:00
global $insts , $ckinsts , $deadinsts , $tini , $opts , $maxround , $totnewc , $link ;
2023-12-29 13:02:40 +01:00
// wouldn't make sense to filter $list here: filtering already happens before adding an instance to next round list
2023-12-29 10:04:05 +01:00
$newc = 0 ;
2022-12-25 11:32:08 +01:00
lecho ( 1 , '###### START OF ROUND ' . $id . ' ######' . N );
2023-12-29 10:04:05 +01:00
$clist = count ( $list );
lecho ( 1 , 'will check ' . $clist . ' instance(s).' . N );
2022-12-21 22:07:05 +01:00
$nlist = [];
2023-12-29 10:04:05 +01:00
$i = 0 ;
2022-12-21 22:07:05 +01:00
$rtini = time ();
foreach ( $list as $inst ) {
2023-12-29 10:04:05 +01:00
$responded = false ;
2022-12-21 22:07:05 +01:00
$i ++ ;
2023-12-29 10:04:05 +01:00
lecho ( 1 , 'round ' . $id . ': working on instance «' . $inst . '» (' . $i . '/' . $clist . ').' . N );
2023-12-29 13:02:40 +01:00
updexarr ( $id );
waituntilonline ( $id );
lecho ( 1 , 'round ' . $id . ': trying to load instance «' . $inst . '»’ s peers...' . N );
2023-12-31 23:16:16 +01:00
$peers = gurl ( 'https://' . $inst . '/api/v1/instance/peers' , $opts [ 'conntimeout' ], $opts [ 'functimeout' ],[ 'Accept: application/json' ]);
2022-12-21 22:07:05 +01:00
if ( $peers [ 'cont' ] === false ) {
2023-12-29 13:02:40 +01:00
lecho ( 2 , 'round ' . $id . ': could not load instance «' . $inst . '»’ s peers: ' . $peers [ 'emsg' ] . '.' . N );
2022-12-21 22:07:05 +01:00
} else {
$peers =@ json_decode ( $peers [ 'cont' ], true );
2022-12-26 14:51:42 +01:00
if ( ! is_array ( $peers )) {
2023-12-29 13:02:40 +01:00
lecho ( 2 , 'round ' . $id . ': expecting instance «' . $inst . '»’ s peers, got bad JSON instead.' . N );
2022-12-21 22:07:05 +01:00
} else {
2023-12-29 10:04:05 +01:00
$responded = true ;
$cpeers = count ( $peers );
2023-12-29 13:02:40 +01:00
lecho ( 1 , 'round ' . $id . ': successfully loaded instance «' . $inst . '»’ s peers (' . $cpeers . ') :-)' . N );
2023-12-27 16:42:31 +01:00
$pi = 1 ;
2022-12-26 15:27:14 +01:00
foreach ( $peers as $key => $peer ) {
2023-12-27 16:42:31 +01:00
if ( $key != $pi - 1 ) {
2023-12-29 13:02:40 +01:00
lecho ( 2 , 'round ' . $id . ': instance «' . $inst . '»’ s peers: entity ' . $pi . '/' . $cpeers . '’ s key is not sequential: not checking further.' . N );
2022-12-26 15:27:14 +01:00
break ;
} elseif ( ! is_string ( $peer )) {
2023-12-29 13:02:40 +01:00
lecho ( 2 , 'round ' . $id . ': instance «' . $inst . '»’ s peers: entity ' . $pi . '/' . $cpeers . ' is not a string: not checking further.' . N );
2022-12-26 15:27:14 +01:00
break ;
2022-12-21 22:07:05 +01:00
} else {
2022-12-26 14:51:42 +01:00
$whynot = [];
2023-12-29 12:21:50 +01:00
if ( in_array ( $peer , $ckinsts )) $whynot [] = 'it has already been checked' ;
2023-12-29 10:04:05 +01:00
if ( ! validhostname ( $peer )) $whynot [] = 'its hostname is not valid' ;
if ( ckexarr ( $peer )) $whynot [] = 'its hostname matches an exclusion regexp' ;
2022-12-26 14:51:42 +01:00
if ( in_array ( $peer , $list )) $whynot [] = 'it is already present in current list' ;
2023-12-29 12:21:50 +01:00
if ( in_array ( $peer , $nlist )) $whynot [] = 'it is already present in next round list' ;
2022-12-26 14:51:42 +01:00
if ( $opts [ 'excludedead' ] && in_array ( $peer , $deadinsts )) $whynot [] = 'it’ s dead' ;
if ( count ( $whynot ) > 0 ) {
2023-12-29 13:02:40 +01:00
lecho ( 0 , 'round ' . $id . ': instance «' . $inst . '»: not adding peer «' . $peer . '» (' . $pi . '/' . $cpeers . ') to next round list: ' . implode ( ', ' , $whynot ) . '.' . N );
2022-12-26 14:51:42 +01:00
} else {
2023-12-29 13:02:40 +01:00
lecho ( 1 , 'round ' . $id . ': instance «' . $inst . '»: adding peer «' . $peer . '» (' . $pi . '/' . $cpeers . ') to next round list :-)' . N );
2022-12-26 14:51:42 +01:00
$nlist [] = $peer ;
}
2022-12-21 22:07:05 +01:00
}
2022-12-26 15:27:14 +01:00
$pi ++ ;
2022-12-21 22:07:05 +01:00
}
}
}
2023-12-29 10:04:05 +01:00
if ( ! $responded ) {
2023-12-29 13:02:40 +01:00
lecho ( 1 , 'round ' . $id . ': instance «' . $inst . '» didn’ t respond at its “peers” endpoint; trying to load its info from “instance” endpoint...' . N );
2023-12-31 23:16:16 +01:00
$instinfo = gurl ( 'https://' . $inst . '/api/v1/instance' , $opts [ 'conntimeout' ], $opts [ 'functimeout' ],[ 'Accept: application/json' ]);
2023-12-27 21:55:33 +01:00
if ( $instinfo [ 'cont' ] === false ) {
2023-12-29 13:02:40 +01:00
lecho ( 2 , 'round ' . $id . ': could not load instance «' . $inst . '»’ s info: ' . $instinfo [ 'emsg' ] . '.' . N );
2023-12-27 21:55:33 +01:00
} else {
$instinfo =@ json_decode ( $instinfo [ 'cont' ], true );
if ( is_array ( $instinfo ))
$responded = true ;
else
2023-12-29 13:02:40 +01:00
lecho ( 2 , 'round ' . $id . ': expecting instance «' . $inst . '»’ s info, got bad JSON instead.' . N );
2023-12-27 21:55:33 +01:00
}
2023-12-29 10:04:05 +01:00
}
$now = time ();
if ( $responded ) {
2023-12-29 13:02:40 +01:00
lecho ( 1 , 'round ' . $id . ': instance «' . $inst . '» responded :-)' . N );
2023-12-29 10:04:05 +01:00
$insts [] = $inst ;
$res = myq ( $link , 'SELECT ID FROM Instances WHERE URI=\'' . myesc ( $link , $inst ) . '\'' );
$cres = mysqli_num_rows ( $res );
if ( $cres < 1 ) {
2023-12-29 13:02:40 +01:00
lecho ( 1 , 'round ' . $id . ': instance «' . $inst . '» is new to “Instances” table, adding it :-)' . N );
2023-12-29 10:04:05 +01:00
myq ( $link , 'INSERT INTO Instances SET URI=\'' . myesc ( $link , $inst ) . '\', InsertTS=' . $now );
$totnewc ++ ;
$newc ++ ;
} elseif ( $cres > 1 ) {
2023-12-29 13:02:40 +01:00
lecho ( 2 , 'round ' . $id . ': instance «' . $inst . '» has ' . $cres . ' records in “Instances” table! :-(' . N );
2023-12-27 21:55:33 +01:00
} else {
2023-12-29 13:02:40 +01:00
lecho ( 1 , 'round ' . $id . ': instance «' . $inst . '» is already present in “Instances” table.' . N );
2023-12-27 16:42:31 +01:00
}
2022-12-25 18:41:13 +01:00
} else {
2023-12-29 13:02:40 +01:00
lecho ( 1 , 'round ' . $id . ': instance «' . $inst . '» didn’ t respond :-(' . N );
2022-12-22 11:32:18 +01:00
}
2023-12-29 10:04:05 +01:00
$res = myq ( $link , 'SELECT * FROM Peers WHERE Hostname=\'' . myesc ( $link , $inst ) . '\'' );
$cres = mysqli_num_rows ( $res );
if ( $cres < 1 ) {
2023-12-29 13:02:40 +01:00
lecho ( 1 , 'round ' . $id . ': instance «' . $inst . '» is new to “Peers” table, adding it :-)' . N );
2023-12-29 10:04:05 +01:00
$query = 'INSERT INTO Peers SET Hostname=\'' . myesc ( $link , $inst ) . '\', FirstCheckTS=' . $now ;
if ( $responded ) $query .= ', LastOkCheckTS=' . $now ;
myq ( $link , $query );
} elseif ( $cres > 0 ) {
2023-12-29 13:02:40 +01:00
if ( $cres > 1 ) lecho ( 2 , 'round ' . $id . ': instance «' . $inst . '» has ' . $cres . ' records in “Peers” table! :-(' . N );
2023-12-29 10:04:05 +01:00
if ( $responded ) {
2023-12-29 13:02:40 +01:00
lecho ( 1 , 'round ' . $id . ': instance «' . $inst . '» is already present in “Peers” table, but it responded: updating its record’ s “LastOkCheckTS” value...' . N );
2023-12-29 10:04:05 +01:00
$row = mysqli_fetch_assoc ( $res );
myq ( $link , 'UPDATE Peers SET LastOkCheckTS=' . $now . ' WHERE ID=' . $row [ 'ID' ]);
}
}
2023-12-29 13:02:40 +01:00
$ckinsts [] = $inst ;
2023-12-29 10:04:05 +01:00
$now = time ();
$rtela = $now - $rtini ;
lecho ( 1 , 'round ' . $id . ': finished working on instance «' . $inst . '» (' . $i . '/' . $clist . '); RoundElapsedTime: ' . ght ( $rtela , null , 0 ) . '; RoundEstimatedTimeRemaining: ' . ght ( $rtela / $i * $clist - $rtela , null , 0 ) . '; RoundNewInsts: ' . $newc . '; NextRoundInsts: ' . count ( $nlist ) . '; TotElapsedTime: ' . ght ( $now - $tini , null , 0 ) . '; TotConsideredInsts: ' . count ( $ckinsts ) . '; TotRespondingInsts: ' . count ( $insts ) . '; TotNewInsts: ' . $totnewc . '; using ' . ghs ( memory_get_usage ( true )) . ' mem. (peak: ' . ghs ( memory_get_peak_usage ( true )) . ').' . N );
2022-12-21 22:07:05 +01:00
}
2022-12-29 09:28:18 +01:00
unset ( $list );
2023-12-29 10:04:05 +01:00
$now = time ();
$rtela = $now - $rtini ;
$cnlist = count ( $nlist );
lecho ( 1 , 'END OF ROUND STATS: RoundCheckedInsts: ' . $clist . '; RoundElapsedTime: ' . ght ( $rtela , null , 0 ) . '; RoundNewInsts: ' . $newc . '; NextRoundInsts: ' . $cnlist . '; TotElapsedTime: ' . ght ( $now - $tini , null , 0 ) . '; TotConsideredInsts: ' . count ( $ckinsts ) . '; TotRespondingInsts: ' . count ( $insts ) . '; TotNewInsts: ' . $totnewc . '; using ' . ghs ( memory_get_usage ( true )) . ' mem. (peak: ' . ghs ( memory_get_peak_usage ( true )) . ').' . N );
if ( $cnlist < 1 ) lecho ( 1 , 'next round list is empty.' . N );
lecho ( 1 , '###### END OF ROUND ' . $id . ' ######' . N );
if ( $cnlist > 0 ) {
2022-12-21 22:07:05 +01:00
crawl ( $nlist , $id + 1 );
2022-12-28 17:06:39 +01:00
if ( $id + 1 > $maxround ) $maxround = $id + 1 ;
2022-12-21 22:07:05 +01:00
}
}
2022-12-25 11:32:08 +01:00
function mexit ( $lev , $msg , $code , $remlock ) {
2023-12-29 10:04:05 +01:00
global $link , $insts , $ckinsts , $lockfp , $opts ;
2022-12-26 16:40:10 +01:00
if ( isset ( $insts ) && is_array ( $insts )) sortcheckandsave ( $insts , 'list of responding instances' , $opts [ 'peersfp' ]);
2023-12-29 10:04:05 +01:00
if ( isset ( $ckinsts ) && is_array ( $ckinsts )) sortcheckandsave ( $ckinsts , 'list of checked instances' , $opts [ 'ckpeersfp' ]);
2022-12-25 11:32:08 +01:00
if ( $remlock && isset ( $lockfp ) && is_file ( $lockfp )) unlink ( $lockfp );
lecho ( $lev , $msg );
2020-10-13 08:21:26 +02:00
exit ( $code );
}
2022-12-25 11:32:08 +01:00
function lecho ( $lev , $msg ) {
2023-01-06 17:03:41 +01:00
global $opts , $msglevs ;
2022-12-25 11:32:08 +01:00
$time = microdate ();
2023-01-06 17:03:41 +01:00
$msg = $time . ' ' . $msglevs [ $lev ] . ': ' . $msg ;
2022-12-25 11:32:08 +01:00
if ( $lev >= $opts [ 'minmsgimplev' ]) {
if ( $lev < 2 )
2023-12-29 13:02:40 +01:00
echo $msg ;
2022-12-25 11:32:08 +01:00
else
fwrite ( STDERR , $msg );
}
2022-07-13 12:45:57 +02:00
}
2022-12-18 18:42:11 +01:00
function myq ( & $link , $query ) {
2022-12-21 22:07:05 +01:00
try { $res = mysqli_query ( $link , $query ); }
2022-12-25 11:32:08 +01:00
catch ( Exception $error ) { mexit ( 3 , 'query «' . $query . '» failed: ' . $error -> getMessage () . ' (' . $error -> getCode () . ').' . N , 2 , true ); }
2022-12-18 18:42:11 +01:00
// for php versions < 8, which seem to not catch mysql exceptions
2022-12-25 11:32:08 +01:00
if ( $res === false ) mexit ( 3 , 'query «' . $query . '» failed: ' . mysqli_error ( $link ) . ' (' . mysqli_errno ( $link ) . ').' . N , 2 , true );
2022-12-18 18:42:11 +01:00
return ( $res );
}
2022-07-13 12:45:57 +02:00
function microdate ( $time = null ) {
if ( is_null ( $time )) $time = microtime ( false );
$time = explode ( ' ' , $time );
2023-12-29 10:04:05 +01:00
return ( date ( 'Y-m-d H:i:s' , $time [ 1 ]) . '.' . substr ( $time [ 0 ], 2 , - 2 ));
2022-07-13 12:45:57 +02:00
}
2020-10-13 08:21:26 +02:00
function sortcheckandsave ( & $arr , $arrdesc , & $fp ) {
$buc = count ( $arr );
$arr = array_unique ( $arr );
$auc = count ( $arr );
2022-12-25 11:32:08 +01:00
if ( $buc != $auc ) lecho ( 2 , $arrdesc . ' contained duplicates, better check the code ;-)' . N );
lecho ( 1 , 'saving ordered ' . $arrdesc . ' into «' . $fp . '».' . N );
2020-10-13 08:21:26 +02:00
sort ( $arr );
$f =@ fopen ( $fp , 'w' );
if ( $f !== false ) {
foreach ( $arr as $val )
fwrite ( $f , $val . N );
fclose ( $f );
} else {
2022-12-25 11:32:08 +01:00
lecho ( 2 , 'couldn’ t open «' . $fp . '» for writing.' . N );
2020-10-13 08:21:26 +02:00
}
}
2022-12-22 15:05:55 +01:00
function sighandler ( $signal ) {
2023-12-29 13:02:40 +01:00
echo N ;
2022-12-25 11:32:08 +01:00
mexit ( 1 , 'interrupted (signal: ' . $signal . ').' . N , 0 , true );
2020-10-13 08:21:26 +02:00
}
function isempty ( $val ) {
if ( preg_match ( '/^\s*$/' , $val ) === 1 )
return ( true );
else
return ( false );
}
2023-12-29 13:02:40 +01:00
function waituntilonline ( $roundid ) {
2020-10-13 08:21:26 +02:00
$url = 'www.google.com' ;
2022-12-21 22:07:05 +01:00
$gotoff = false ;
2020-10-13 08:21:26 +02:00
while ( false === ( $f =@ fsockopen ( $url , 80 , $errno , $errstr , 1 ))) {
2022-12-21 22:07:05 +01:00
$gotoff = true ;
2023-12-29 13:02:40 +01:00
lecho ( 2 , 'round ' . $roundid . ': it seems we are offline, waiting 10 seconds before retrying...' . N );
2022-12-22 15:05:55 +01:00
sleep ( 10 );
2020-10-13 08:21:26 +02:00
}
fclose ( $f );
2023-12-29 13:02:40 +01:00
if ( $gotoff ) lecho ( 1 , 'round ' . $roundid . ': it seems we are back online! :-)' . N );
2020-10-13 08:21:26 +02:00
}
2023-12-29 13:02:40 +01:00
function updexarr ( $roundid ) {
2020-10-13 08:21:26 +02:00
global $exarr , $opts ;
if ( ! is_null ( $opts [ 'excludefp' ])) {
$f =@ fopen ( $opts [ 'excludefp' ], 'r' );
if ( $f !== false ) {
$i = 0 ;
2022-12-21 22:07:05 +01:00
$exarr = [];
2020-10-13 08:21:26 +02:00
while ( ! feof ( $f )) {
$i ++ ;
$line = trim ( fgets ( $f ));
if ( ! isempty ( $line )) {
if ( @ preg_match ( $line , 'foo' ) !== false )
$exarr [] = $line ;
else
2023-12-29 13:02:40 +01:00
lecho ( 2 , 'round ' . $roundid . ': exclude file «' . $opts [ 'excludefp' ] . '» contains an invalid regular expression on line ' . $i . ': «' . $line . '».' . N );
2020-10-13 08:21:26 +02:00
}
}
} else {
2023-12-29 13:02:40 +01:00
lecho ( 2 , 'round ' . $roundid . ': could not open exclude file «' . $opts [ 'excludefp' ] . '» for reading.' . N );
2020-10-13 08:21:26 +02:00
}
}
}
function ckexarr ( $inst ) {
global $exarr ;
foreach ( $exarr as $re )
if ( preg_match ( $re , $inst ) === 1 ) return ( true );
return ( false );
}
2022-11-11 21:57:30 +01:00
function ismultibyte ( $s ) {
preg_replace ( '/./u' , '.' , $s , - 1 , $c );
( strlen ( $s ) > $c ) ? $r = true : $r = false ;
return ( $r );
}
2022-12-18 18:42:11 +01:00
function validhostname ( $hostname ) {
//$hostname=preg_replace('#/.*#','',$hostname);
2022-12-18 11:42:32 +01:00
//$hostname=preg_replace('#:[0-9]+$#','',$hostname);
2022-12-21 22:15:40 +01:00
if ( ismultibyte ( $hostname )) $hostname = idn_to_ascii ( $hostname , IDNA_DEFAULT , INTL_IDNA_VARIANT_UTS46 );
2022-11-11 21:57:30 +01:00
//echo($hostname.N);
2020-10-13 08:21:26 +02:00
if ( strlen ( $hostname ) > 253 ) return ( false );
$labels = explode ( '.' , $hostname );
foreach ( $labels as $label ) {
$len = strlen ( $label );
if ( $len < 1 || $len > 63 ) return ( false );
if ( preg_match ( '#^-#' , $label ) == 1 ) return ( false );
if ( preg_match ( '#-$#' , $label ) == 1 ) return ( false );
//if (preg_match('#--#',$label)==1) return(false);
if ( preg_match ( '#^[a-zA-Z0-9-]+$#' , $label ) !== 1 ) return ( false );
}
return ( true );
}
//$url='www.team.starschlep.com/'; if (validhostname($url)) echo('OK: '.$url.N); else echo('KO: '.$url.N); die();
?>