diff --git a/web/clitools/crawler.php b/web/clitools/crawler.php
index eb358af..e90b6ed 100755
--- a/web/clitools/crawler.php
+++ b/web/clitools/crawler.php
@@ -16,1272 +16,386 @@
along with this program. If not, see .
*/
-const N="\n";
+define('N',"\n");
+define('SNAME',basename(__FILE__));
+define('FNAME',preg_replace('/\.[^.]*$/','',SNAME));
+define('CHILD','getinstinfo.php');
+define('LIBDP','/../site/mustard/include');
-require(__DIR__.'/../site/mustard/include/gurl.php');
+require(__DIR__.LIBDP.'/ght.php');
-require(__DIR__.'/lib/vendor/autoload.php');
-use LanguageDetection\Language;
-
-(strtoupper(substr(PHP_OS,0,3))==='WIN') ? $iswin=true : $iswin=false;
-
-function eecho($lev,$msg) {
- $time=microtime(false);
- $time=explode(' ',$time);
- $time=date('Y-m-d H:i:s',$time[1]).'.'.substr($time[0],2);
- $levs=['Debug', 'Info', 'Warning', 'Error'];
- $msg=$time.' '.$levs[$lev].': '.$msg;
- if ($lev<2)
- echo($msg);
- else
- fwrite(STDERR,$msg);
-}
-
-function mexit($msg,$code) {
- global $link, $jsonf, $lockfp;
- if (isset($link)) mysqli_close($link);
- if (isset($jsonf)) fclose($jsonf);
- if (isset($lockfp) && is_file($lockfp)) unlink($lockfp);
- if ($code!=0)
- eecho(3,$msg);
- else
- eecho(1,$msg);
- exit($code);
-}
+use function mysqli_real_escape_string as myesc;
declare(ticks=1);
if (function_exists('pcntl_signal')) {
function signalHandler($signal) {
echo(N);
- mexit('received signal «'.$signal.'», shutting down.'.N,0);
+ mexit('received signal «'.$signal.'», shutting down.'.N,0,true);
}
pcntl_signal(SIGTERM,'signalHandler');// Termination ('kill' was called)
pcntl_signal(SIGHUP,'signalHandler');// Terminal log-out
pcntl_signal(SIGINT,'signalHandler');// Interrupted (Ctrl-C is pressed)
}
+$msglevs=['debug', 'info', 'warning', 'error', 'none'];
+
$opts=[
- 'timeout'=>10,
- 'deadline'=>60*24*60*60,// if an instance has not been responding for more than this value of seconds (currently 60 days), declare it dead
- 'oldline'=>30*24*60*60,// if an instance has been new for a period longer than this amount (currently 30 days), it's no longer new
- 'ldtoots'=>40,// number of toots to check with the automatic language detection function
- 'setnew'=>true,
- 'dryrun'=>false,
- 'jsonfp'=>__DIR__.'/instances.json',
- 'jsonwrite'=>false,
+ 'poolsize'=>20,
+ 'moreclauses'=>'',
'peersfp'=>null,
'dontrestore'=>false,
'ignorelock'=>false,
- 'fetchusers'=>false,
- 'moreclauses'=>''
+ 'logminmsglev'=>1,
+ 'tuiminmsglev'=>1
];
-$help='crawler.php
- DESCRIPTION
- This script updates mastostart’s database with the data it manages to
- retrieve from instances already present in the database plus (optionally)
- those listed in a specifiable file (typically the output file from a
- peerscrawl.php run).
- SYNOPSIS
- crawler.php [options]
- OPTIONS
- -p, --peersfp
- Sets a file containing a list of instances to consider in addition to those
- which are already present in the database.
- Note that this option is ignored if the script will recover a previous
- unfinished session.
- -f, --fetchusers
- *Currently experimental*: if this option is set, the script will try and
- fetch users’ profiles infos from each considered instance’s user directory
- and store them in the database.
- -t, --timeout
- Sets the timeout in seconds for every connection attempt.
- DEFAULT: «'.$opts['timeout'].'»
- -N, --dontsetnew
- If this option is set, the script won’t mark new instances as new. This can
- be useful for a first run.
- -I, --ignorelock
- Normally, if its lockfile exists, the script will exit with an error.
- If this option is set, the lockfile existence will be ignored.
- Warning: check that the script is actually not running yet before using
- this option.
- -R, --dontrestore
- If this option is set and «instances.job» and «currinst.job» files from
- a previous unfinished session are present, the script will ignore them
- and start a new session.
- -d, --dryrun
- If this option is set, the script won’t write anything in the database.
- -j, --jsonwrite
- If this option is set, the script will write an «instances.json» file
- containing all the data it could retrieve from every considered instance.
- -m, --moreclauses
- If this option is set, whatever one writes as argument to the option will
- be added to the main query for instances’ records, which is
- «SELECT URI FROM Instances WHERE Dead=0», so one can limit the crawl more.
- -h, --help
- If this option is set, the script will show this help text and exit.
+$ghtsa=[[' day',' days'],[' hour',' hours'],[' minute',' minutes'],[' second',' seconds']];
- This program comes with ABSOLUTELY NO WARRANTY; for details see the source.
- This is free software, and you are welcome to redistribute it under
- certain conditions; see for details.'.N;
+$help='SYNOPSIS
+
+ '.SNAME.' [options]
+
+DESCRIPTION
+
+ This script coordinates the parallel execution of a definable number of
+ '.CHILD.' processes “against” all the alive instances which are already
+ present in mastostart’s database, plus optionally those listed in a
+ specifiable file (typically the output file from a peerscrawl.php run).
+
+OPTIONS
+ -
+ Everything after a single dash will be passed to '.CHILD.' processes as is.
+ -p, --peersfp
+ Defines the path to a file containing a list of instances to consider in
+ addition to those which are already present in the database. Note that this
+ option is ignored if the script will restore a previous unfinished session.
+ -P, --poolsize
+ The number of slots in the processes pool, that is the number of '.CHILD.'
+ processes the script will run in parallel. Note that this option is ignored
+ if the script will restore a previous unfinished session.
+ DEFAULT: '.$opts['poolsize'].'
+ -I, --ignorelock
+ Normally, if its lockfile exists, the script will exit with an error.
+ If this option is set, instead, the lockfile existence will be ignored.
+ Please check that the script is actually not running before using it.
+ -R, --dontrestore
+ If this option is set and «instances.job» and «status.job» files from
+ a previous unfinished session are present in the «run» subdirectory inside
+ the directory where the script resides, the script will ignore them and
+ start a new session; otherwise the script will restore the previous,
+ unfinished session.
+ -m, --moreclauses
+ If this option is set, whatever one writes as argument to the option will
+ be added to the main query for instances’ records, which is «SELECT URI FROM
+ Instances WHERE Dead=0».
+ -L, --logminmsglev <«debug»|«info»|«warning»|«error»|«none»>
+ Defines the minimum “importance level” of messages to be written into the
+ log file «run/[instance hostname].log». There are 4 “importance levels”, in
+ this order of importance: «debug», «info», «warning», «error».
+ Setting this option to any of these values will write into the logfile all
+ the messages with the specified or a greater level; setting it to the
+ special value «none» will completely disable logging to file.
+ DEFAULT: '.$msglevs[$opts['logminmsglev']].'
+ -T, --tuiminmsglev <«debug»|«info»|«warning»|«error»|«none»>
+ Defines the minimum “importance level” of messages to be written to the
+ terminal. See the option above to understand how this works.
+ DEFAULT: '.$msglevs[$opts['tuiminmsglev']].'
+ -h, --help
+ When this option is specified, the script will show this help text and exit.
+
+LICENSE
+
+ This program comes with ABSOLUTELY NO WARRANTY; for details see the source.
+ This is free software, and you are welcome to redistribute it under certain
+ conditions; see for details.'.N;
+
+$childopts='';
for ($i=1; $i<$argc; $i++) {
- if (substr($argv[$i],0,1)=='-') {
- switch($argv[$i]) {
- case '-p':
- case '--peersfp':
- if ($i+1>=$argc || !file_exists($argv[$i+1]) || !is_file($argv[$i+1]) || !is_readable($argv[$i+1]))
- mexit('option «'.$argv[$i].'» requires an existing and readable file as an argument (use «-h» to read help).'.N,1);
+ if ($argv[$i]=='-') {
+ if ($i<$argc-1) {
$i++;
- $opts['peersfp']=$argv[$i];
- break;
- case '-f':
- case '--fetchusers':
- $opts['fetchusers']=true;
- break;
- case '-t':
- case '--timeout':
- if ($i+1>=$argc || preg_match('/^[0-9]+$/',$argv[$i+1])!==1)
- mexit('option «'.$argv[$i].'» requires a numeric argument (use «-h» to read help).'.N,1);
- $i++;
- $opts['timeout']=$argv[$i]+0;
- break;
- case '-N':
- case '--dontsetnew':
- $opts['setnew']=false;
- break;
- case '-R':
- case '--dontrestore':
- $opts['dontrestore']=true;
- break;
- case '-I':
- case '--ignorelock':
- $opts['ignorelock']=true;
- break;
- case '-d':
- case '--dryrun':
- $opts['dryrun']=true;
- break;
- case '-j':
- case '--jsonwrite':
- $opts['jsonwrite']=true;
- break;
- case '-m':
- case '--moreclauses':
- if ($i+1>=$argc)
- mexit('option «'.$argv[$i].'» requires some SQL clause as argument (use «-h» to read help).'.N,1);
- $i++;
- $opts['moreclauses']=$argv[$i];
- break;
- case '-h':
- case '--help':
- echo($help);
- exit(0);
- break;
- default:
- mexit('option «'.$argv[$i].'» is unknown (use «-h» to read help).'.N,1);
- break;
+ while ($i<$argc) {
+ $childopts.=' '.$argv[$i];
+ $i++;
+ }
+ } else {
+ eecho(2,'you have specified «-» as last argument...'.N);
}
+ } elseif ($argv[$i]=='-p' || $argv[$i]=='--peersfp') {
+ if ($i+1>=$argc || !file_exists($argv[$i+1]) || !is_file($argv[$i+1]) || !is_readable($argv[$i+1]))
+ mexit('option «'.$argv[$i].'» requires an existing and readable file as an argument (use «-h» to read help).'.N,1,false);
+ $i++;
+ $opts['peersfp']=$argv[$i];
+ } elseif ($argv[$i]=='-P' || $argv[$i]=='--poolsize') {
+ if ($i+1>=$argc || preg_match('/\d+/',$argv[$i+1])!==1 || $argv[$i+1]+0<1)
+ mexit('option «'.$argv[$i].'» requires an integer number greater than 0 as an argument (use «-h» to read help).'.N,1,false);
+ $i++;
+ $opts['poolsize']=$argv[$i]+0;
+ } elseif ($argv[$i]=='-R' || $argv[$i]=='--dontrestore') {
+ $opts['dontrestore']=true;
+ } elseif ($argv[$i]=='-I' || $argv[$i]=='--ignorelock') {
+ $opts['ignorelock']=true;
+ } elseif ($argv[$i]=='-m' || $argv[$i]=='--moreclauses') {
+ if ($i+1>=$argc)
+ mexit('option «'.$argv[$i].'» requires some SQL clause as argument (use «-h» to read help).'.N,1,false);
+ $i++;
+ $opts['moreclauses']=$argv[$i];
+ } elseif ($argv[$i]=='-L' || $argv[$i]=='--logminmsglev') {
+ if ($i+1>=$argc || !in_array(strtolower($argv[$i+1]),$msglevs))
+ mexit('option «'.$argv[$i].'» requires a “log level” value as an argument (use «-h» to read help).'.N,1);
+ $i++;
+ $opts['logminmsglev']=array_search(strtolower($argv[$i]),$msglevs);
+ } elseif ($argv[$i]=='-T' || $argv[$i]=='--tuiminmsglev') {
+ if ($i+1>=$argc || !in_array(strtolower($argv[$i+1]),$msglevs))
+ mexit('option «'.$argv[$i].'» requires a “log level” value as an argument (use «-h» to read help).'.N,1);
+ $i++;
+ $opts['tuiminmsglev']=array_search(strtolower($argv[$i]),$msglevs);
+ } elseif ($argv[$i]=='-h' || $argv[$i]=='--help') {
+ echo($help);
+ exit(0);
+ } else {
+ mexit('don’t know how to interpret «'.$argv[$i].'» (you can read the help text using «-h» or «--help»).'.N,1,false);
}
}
-use function mysqli_real_escape_string as myesc;
+foreach ($msglevs as $key=>$val) $msglevs[$key]=ucfirst($val);
+
+$rundirpath=__DIR__.'/run';
+
+$lockfp=$rundirpath.'/'.FNAME.'.lock';
+if (file_exists($lockfp) && !$opts['ignorelock']) {
+ eecho(3,'lock file «'.$lockfp.'» exists (if you are sure '.SNAME.' is not already running you can use option «-I» to force execution).'.N);
+ exit(1);
+}
+if (@touch($lockfp)===false) {
+ eecho(3,'could not touch file «'.$lockfp.'».'.N);
+ exit(1);
+}
+
+if (file_exists($rundirpath) && !is_dir($rundirpath))
+ mexit('«'.$rundirpath.'» is not a directory.'.N,1,false);
+elseif (file_exists($rundirpath) && (!is_readable($rundirpath) || !is_writeable($rundirpath)))
+ mexit('«'.$rundirpath.'» is not readable and writeable.'.N,1,false);
+elseif (!file_exists($rundirpath))
+ if (@mkdir($rundirpath)===false)
+ mexit('could not create directory «'.$rundirpath.'».'.N,1,false);
+
+$instsjfp=$rundirpath.'/instances.job';
+$statusjfp=$rundirpath.'/status.job';
+
+(!$opts['dontrestore'] && file_exists($instsjfp) && file_exists($statusjfp)) ? $restore=true : $restore=false;
+
+$logfp=$rundirpath.'/'.FNAME.'.log';
+($restore) ? $mode='a' : $mode='w';
+$logf=fopen($logfp,$mode);
+if ($logf===false) mexit('could not open log file «'.$logfp.'» for writing.'.N,1,true);
+
+($restore) ? eecho(1,'--- restarting ---'.N) : eecho(1,'--- starting ---'.N);
+
+$cmd=__DIR__.'/'.CHILD.$childopts;
+eecho(1,'base command: «'.$cmd.'».'.N);
+
+if ($restore) {
+ eecho(0,'looks like previous session was interrupted, trying to restore it...'.N);
+ $insts=@file($instsjfp,FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
+ if ($insts===false) mexit('could not open file «'.$instsjfp.'» for reading.'.N,1,true);
+ $cinsts=count($insts);
+ eecho(1,'loaded '.$cinsts.' hostnames from previous session file.'.N);
+ $buf=@file($statusjfp,FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
+ if ($buf===false) mexit('could not open file «'.$statusjfp.'» for reading.'.N,1,true);
+ if (count($buf)<2) mexit('file «'.$statusjfp.'»: wrong format (1).'.N,1,true);
+ $buf[0]=explode("\t",$buf[0]);
+ if (count($buf[0])!=4 ||
+ preg_match('/^\d+$/',$buf[0][0])!==1 ||
+ preg_match('/^\d+$/',$buf[0][1])!==1 ||
+ preg_match('/^\d+(\.\d+)?$/',$buf[0][2])!==1 ||
+ preg_match('/^\d+$/',$buf[0][3])!==1)
+ mexit('file «'.$statusjfp.'»: wrong format (2).'.N,1,true);
+ $opts['poolsize']=$buf[0][0]+0;
+ $instk=$buf[0][1]+0;
+ $toff=$buf[0][2]+0;
+ $done=$buf[0][3]+0;
+ //eecho(0,'poolsize: '.$opts['poolsize'].'; instk: '.$instk.'; eta: '.$eta.'; done: '.$done.'.'.N);
+ for ($i=1; $iproc_open($cmd.' '.escapeshellarg($host).' &>/dev/null',[],$pipes[]), 'instk'=>$buf[$i]+0, 'host'=>$host, 'begts'=>microtime(true)];
+ }
+ eecho(1,'restored previous session.'.N);
+} else {
+
+ $inifp=__DIR__.'/../conf/mustard.ini';
+ $iniarr=@parse_ini_file($inifp);
+ if ($iniarr===false) mexit('could not open config file «'.$inifp.'»'.N,1,true);
+
+ try { $link=@mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket']); }
+ catch (Exception $error) { mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true); }
+ // for php versions < 8
+ if ($link===false) mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true);
+ try { $res=mysqli_set_charset($link,'utf8mb4'); }
+ catch (Exception $error) { mexit('could not set «utf8mb4» charset for MySQL: '.mysqli_error($link).'.'.N,1,true); }
+ // for php versions < 8
+ if ($res===false) mexit('could not set MySQL charset: '.mysqli_errno($link).': '.mysqli_error($link).'.'.N,1,true);
+
+ $insts=[];
+ eecho(0,'loading known, alive instances from the database...'.N);
+ $res=myq($link,'SELECT URI FROM Instances WHERE Dead=0'.$opts['moreclauses'],__LINE__);
+ while($row=mysqli_fetch_assoc($res))
+ if (!in_array($row['URI'],$insts))
+ $insts[]=$row['URI'];
+ eecho(1,'loaded '.count($insts).' known, alive instances from the database.'.N);
+
+ mysqli_close($link);
+ unset($link);
+
+ if (!is_null($opts['peersfp'])) {
+ eecho(0,'loading dead instances from the database...'.N);
+ $res=myq($link,'SELECT URI FROM Instances WHERE Dead=1',__LINE__);
+ $deadinsts=[];
+ while($row=mysqli_fetch_assoc($res))
+ if (!in_array($row['URI'],$deadinsts))
+ $deadinsts[]=$row['URI'];
+ eecho(1,'loaded '.count($deadinsts).' dead instances from the database.'.N);
+ eecho(0,'loading instances from «'.$opts['peersfp'].'»...'.N);
+ $peers=@file($opts['peersfp'],FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
+ if ($peers===false) mexit('could not open «'.$opts['peersfp'].'» for reading.'.N,1,true);
+ $i=0;
+ foreach ($peers as $pdom) {
+ if (!in_array($pdom,$insts)) {
+ if (!in_array($pdom,$deadinsts)) {
+ $i++;
+ $insts[]=$pdom;
+ } else {
+ eecho(1,'ignoring instance «'.$pdom.'» from peers file because it’s dead.'.N);
+ }
+ }
+ }
+ eecho(1,'loaded '.$i.' more instances from «'.$opts['peersfp'].'».'.N);
+ }
+
+ unset($deadinsts);
+ shuffle($insts);
+ $cinsts=count($insts);
+ eecho(1,$cinsts.' instances to be checked.'.N);
+
+ $instsf=@fopen($instsjfp,'w');
+ if ($instsf===false) mexit('could not open «'.$instsjfp.'» for writing.'.N,1,true);
+ foreach ($insts as $host) fwrite($instsf,$host.N);
+ fclose($instsf);
+
+ $toff=0;
+ $done=0;
+ $procs=[];
+
+ for ($instk=0; $instk<$opts['poolsize'] && $instk<$cinsts; $instk++) {
+ $host=$insts[$instk];
+ eecho(1,'bootstrapping processes pool, adding host «'.$host.'».'.N);
+ $procs[]=['proc'=>proc_open($cmd.' '.escapeshellarg($host).' &>/dev/null',[],$pipes[]), 'instk'=>$instk, 'host'=>$host, 'begts'=>microtime(true)];
+ }
+ $instk--;
+
+}
+
+$tini=microtime(true);
+$rundone=false;
+do {
+ $now=microtime(true);
+ $eta=$now-$tini+$toff;
+ eecho(0,'[[[ CHECKING PROCESSES POOL ]]]'.N);
+ $somerun=false;
+ foreach ($procs as $key=>$proc) {
+ if (!is_null($proc)) {
+ $pstat=proc_get_status($proc['proc']);
+ if (!$pstat['running']) {
+ $done++;
+ $out='proc slot '.$key.': finished running on «'.$proc['host'].'» (exit code: '.$pstat['exitcode'].')';
+ if ($instk<$cinsts-1) {
+ $instk++;
+ $host=$insts[$instk];
+ $procs[$key]=['proc'=>proc_open($cmd.' '.escapeshellarg($host).' &>/dev/null',[],$pipes[$key]), 'instk'=>$instk, 'host'=>$host, 'begts'=>$now];
+ $out.='; started a new process on «'.$host.'».'.N;
+ } else {
+ $out.='; no more hosts to check.'.N;
+ $procs[$key]=null;
+ }
+ eecho(1,$out);
+ } else {
+ eecho(0,'proc slot '.$key.': been running on «'.$proc['host'].'» for '.ght($now-$proc['begts']).'.'.N);
+ $somerun=true;
+ }
+ }
+ }
+ $out=$done.'/'.$cinsts.' ('.round(100/$cinsts*$done).'%); elapsed time: '.ght($eta);
+ if ($done>0) $out.='; estimated time remaining: '.ght($cinsts*$eta/$done-$eta);
+ eecho(1,$out.'.'.N);
+ if ($somerun) {
+ writestatus($statusjfp,$opts,$instk,$eta,$done,$procs);
+ sleep(1);
+ } else {
+ $rundone=true;
+ }
+} while (!$rundone);
+
+unlink($instsjfp);
+unlink($statusjfp);
+unlink($lockfp);
+eecho(1,'done :-)'.N);
+unlink($logfp);
+exit(0);
+
+
+// functions
+
+function writestatus(&$statusjfp,&$opts,&$instk,&$eta,&$done,&$procs) {
+ $f=@fopen($statusjfp,'w');
+ if ($f===false) mexit('could not open «'.$statusjfp.'» for writing.'.N,2,true);
+ fwrite($f,$opts['poolsize']."\t".$instk."\t".$eta."\t".$done.N);
+ foreach ($procs as $proc)
+ if (!is_null($proc))
+ fwrite($f,$proc['instk'].N);
+ fclose($f);
+}
+
+function eecho($lev,$msg) {
+ global $logf, $opts, $msglevs;
+ $time=microtime(false);
+ $time=explode(' ',$time);
+ $time=date('Y-m-d H:i:s',$time[1]).'.'.substr($time[0],2);
+ $msg=$time.' '.$msglevs[$lev].': '.$msg;
+ if ($lev>=$opts['tuiminmsglev']) {
+ if ($lev<2)
+ echo($msg);
+ else
+ fwrite(STDERR,$msg);
+ }
+ if ($lev>=$opts['logminmsglev'] && isset($logf) && $logf!==false) fwrite($logf,$msg);
+}
function myq(&$link,$query,$line) {
try {
$res=mysqli_query($link,$query);
}
catch (Exception $error) {
- mexit('query «'.$query.'» (line '.$line.') failed: '.$error->getMessage().N,3);
+ mexit('query «'.$query.'» (line '.$line.') failed: '.$error->getMessage().N,3,true);
}
- // for older php versions, which seem to not catch mysql exceptions
- if ($res===false) mexit('query «'.$query.'» (line '.$line.') failed: '.mysqli_errno($link).': '.mysqli_error($link).'.'.N,3);
+ // for older php versions < 8, which seem to not catch mysql exceptions
+ if ($res===false) mexit('query «'.$query.'» (line '.$line.') failed: '.mysqli_errno($link).': '.mysqli_error($link).'.'.N,3,true);
return($res);
}
-$lockfp=__DIR__.'/crawler.lock';
-if (file_exists($lockfp) && !$opts['ignorelock']) {
- eecho(3,'lock file «'.$lockfp.'» exists (if you are sure crawler.php is not already running you can use option «-I» to force execution).'.N);
- exit(1);
-}
-touch($lockfp);
-
-$inifp=__DIR__.'/../conf/mustard.ini';
-$iniarr=@parse_ini_file($inifp)
- or mexit('could not open config file «'.$inifp.'»'.N,1);
-$link=@mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket'])
- or mexit('could not connect to MySQL server: '.mysqli_connect_error().N,1);
-mysqli_set_charset($link,'utf8mb4')
- or mexit('could not set «utf8mb4» charset fro MySQL: '.mysqli_error($link).N,1);
-
-require(__DIR__.'/../site/mustard/include/tables.php');
-$tables=tables($link);
-//print_r($tables);
-
-$recover=false;
-$instsjfp=__DIR__.'/instances.job';
-$currinstjfp=__DIR__.'/currinst.job';
-
-if (!$opts['dontrestore'] && file_exists($currinstjfp) && file_exists($instsjfp)) {
- eecho(0,'looks like previous session was interrupted, trying to recover it...'.N);
- $buf=@file($instsjfp,FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES)
- or mexit('could not open file «'.$instsjfp.'» for reading.'.N,1);
- $insts=array();
- foreach ($buf as $line)
- $insts[]=$line;
- $buf=@file($currinstjfp,FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES)
- or mexit('could not open file «'.$currinstjfp.'» for reading.'.N,1);
- $buf=explode("\t",$buf[0]);
- $currinst=array('dom'=>$buf[0], 'i'=>$buf[1], 'qok'=>$buf[2], 'qgood'=>$buf[3]);
- $recover=true;
- eecho(1,'recovered previous session.'.N);
-}
-
-function truncs($str,$tab,$col,$ctx) {
- global $tables, $iswin;
- if (is_null($str)) return(null);
- if ($iswin)
- $tab=strtolower($tab);
- $size=$tables[$tab][$col];
- $len=mb_strlen($str,'UTF-8');
- if ($len>$size) {
- $str=mb_substr($str,0,$size-1,'UTF-8').'…';
- notify($ctx.': had to truncate string to '.$size.' chars to be able to insert it into «'.$col.'» column in «'.$tab.'» table.',3);
- }
- return($str);
-}
-
-function truncn($num,$tab,$col,$ctx) {
- global $tables, $iswin;
- if ($iswin)
- $tab=strtolower($tab);
- if (is_numeric($num)) {
- if ($num>$tables[$tab][$col]['max']) {
- notify($ctx.': had to ceil «'.$num.'» to «'.$tables[$tab][$col]['max'].'», ie the maximum value it can have in column «'.$col.'» of table «'.$tab.'».',3);
- $num=$tables[$tab][$col]['max'];
- } elseif ($num<$tables[$tab][$col]['min']) {
- notify($ctx.': had to floor «'.$num.'» to «'.$tables[$tab][$col]['min'].'», ie the minimum value it can have in column «'.$col.'» of table «'.$tab.'»).',3);
- $num=$tables[$tab][$col]['min'];
- }
- } else {
- notify($ctx.': function «truncn»: expecting a number, got something else; returning «0».',3);
- $num=0;
- }
- return($num);
-}
-
-/*$contextopts=array(
- 'http'=>array(
- 'timeout'=>$opts['timeout']
- ),
- 'socket'=>array(
- 'tcp_nodelay'=>true
- )
-);
-$context=stream_context_create($contextopts);*/
-
-function pgdatetomy($pgdate) {
- //2018-04-07T15:05:26.801Z
- if (preg_match('/^(\d+)-(\d+)-(\d+)[ T]{1}(\d+):(\d+):(\d+)(\.\d+)?Z?$/',$pgdate,$buf)===1) {
- $mtime=gmmktime($buf[4],$buf[5],$buf[6],$buf[2],$buf[3],$buf[1]);
- if (array_key_exists(7,$buf))
- $mtime=$mtime+floatval('0'.$buf[7]);
- return($mtime);
- } else {
- notify('Function «pgdatetomy»: «'.$pgdate.'» has not a recognized date format; returning current date.',3);
- return(time());
- }
-}
-
-if (!$recover) {
-
- $insts=array();
-
- $res=myq($link,'SELECT URI FROM Instances WHERE Dead=0'.$opts['moreclauses'],__LINE__);
- while($row=mysqli_fetch_assoc($res))
- if (!in_array($row['URI'],$insts))
- $insts[]=$row['URI'];
- eecho(1,'loaded known, alive instances from the database into the list of instances to be checked.'.N);
-
- $res=myq($link,'SELECT URI FROM Instances WHERE Dead=1',__LINE__);
- $deadinsts=array();
- while($row=mysqli_fetch_assoc($res))
- $deadinsts[]=$row['URI'];
- eecho(1,'loaded dead instances into the corresponding list.'.N);
-
- if (!is_null($opts['peersfp'])) {
- eecho(0,'loading other instances to be checked from «'.$opts['peersfp'].'».'.N);
- $peers=@file($opts['peersfp'],FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
- if ($peers===false)
- mexit('could not open «'.$opts['peersfp'].'» for reading.'.N,1);
- foreach ($peers as $pdom) {
- if (!in_array($pdom,$insts))
- if (!in_array($pdom,$deadinsts))
- if (!willtrunc($pdom,'Instances','URI'))
- $insts[]=$pdom;
- else
- eecho(2,'ignoring instance «'.$pdom.'» because its hostname is too long for column «URI» of table «Instances».'.N);
- else
- eecho(1,'ignoring instance «'.$pdom.'» because it is dead.'.N);
- }
- }
-
- unset($deadinsts);
- sort($insts);
-// shuffle($insts);
- eecho(1,count($insts).' instances to be checked.'.N);
-
- $instsf=@fopen($instsjfp,'w')
- or mexit('could not open «'.$instsjfp.'» for writing.'.N,1);
- foreach ($insts as $host)
- fwrite($instsf,$host.N);
- fclose($instsf);
-}
-
-function willtrunc($str,$tab,$col) {
- global $tables, $iswin;
- if ($iswin)
- $tab=strtolower($tab);
- if (mb_strlen($str,'UTF-8')>$tables[$tab][$col])
- return(true);
+function mexit($msg,$code,$remlock) {
+ global $link, $logf, $lockfp;
+ if (isset($link) && $link!==false) mysqli_close($link);
+ if ($remlock && isset($lockfp) && is_file($lockfp)) unlink($lockfp);
+ if ($code!=0)
+ eecho(3,$msg);
else
- return(false);
-}
-
-function b2i($bool,$pre) {
- if (is_bool($bool)) {
- if ($bool)
- return(1);
- else
- return(0);
- } else {
- notify($pre.'«'.$bool.'» is not a boolean value, returning «0».',3);
- return(0);
- }
-}
-
-//is array, array key exists and value is not null
-function akeavinn($key,&$arr) {
- if (is_array($arr) && array_key_exists($key,$arr) && !is_null($arr[$key]))
- return(true);
- else
- return(false);
-}
-
-function nempty($str) {
- if (preg_match('/^\s*$/',$str)===1)
- return(null);
- else
- return($str);
-}
-
-function subarimp($glue,$key,&$arr) {
- $str='';
- $i=1;
- $carr=count($arr);
- foreach ($arr as $inarr) {
- $str.=$inarr[$key];
- if ($i<$carr)
- $str.=$glue;
- $i++;
- }
- return($str);
-}
-
-function notify($msg,$sev) {
- // notify "Severity" should be called "Importance"; anyway, it is to be thought of as "$lev" param of function "eecho": 0=debug, 1=info, 2=warning, 3=error
- global $link, $tables, $iswin, $opts;
- eecho($sev,'*notification*: '.strip_tags($msg).N);
- $tab='Notifications';
- if ($iswin) $tab='notifications';
- if (!$opts['dryrun']) myq($link,'INSERT INTO Notifications (ID, Notification, Severity, Microtime, Seen, Deleted) VALUES (NULL, \''.myesc($link,mb_substr($msg,0,$tables[$tab]['Notification'],'UTF-8')).'\', '.$sev.', \''.microtime(true).'\', 0, 0)',__LINE__);
-}
-
-/** */
-/**
- * Executes a call to Mastodon API.
- *
- * @param string $host Host to be called (e.g.: "mastodon.bida.im")
- * @param string $path API path (e.g.: "/api/v1/timelines/public?local=true")
- * @return mixed An array representing the JSON object as returned by json_decode, or NULL if the call fails
- */
-function get_api($host, $path) {
- global $opts;
- $buf = @gurl('https://'.$host.$path,$opts['timeout']);
- if ($buf['cont']!==false) {
- ckratelimit($buf['headers']);
- $data = json_decode($buf['cont'], true);
- return $data;
- } else {
- return NULL;
- }
-}
-
-/**
- * Returns a list of known recognized languages, with the related probability, fot the toot that got passed to it
- *
- * @param mixed $toot The toot to be checked, as returned by the API
- * @return array Associative array with language and related probability
- */
-function get_toot_languages($toot) {
- if (is_array($toot) && array_key_exists('language',$toot))
- $l = $toot['language'];
- else
- $l = NULL;
- if($l !== NULL) {
- // the language is explicitly set in the toot, so use that
- $langs[$l] = 1;
- } elseif (array_key_exists('content',$toot)) {
- // the language is not explicitly set in the toot, so try and recognize it
- $text = strip_tags($toot['content']);
- $ld = new Language;
- $langs = $ld->detect($text)->bestResults()->close();
- }
- // group derived languages into two-charactes language code (e.g.: "zh-CN" into "zh")
- $grouped_langs = array();
- foreach($langs as $key => $value) {
- $l = explode("-", $key)[0];
- if(array_key_exists($l, $grouped_langs)) {
- $grouped_langs[$l] = max($grouped_langs[$l], $value);
- } else {
- $grouped_langs[$l] = $value;
- }
- }
- return $grouped_langs;
-}
-
-/**
- * Given the probability of a language for every toot, calculate the average
- *
- * @param array $detected_langs Array of mappings between language and probability
- * @return array Mapping between language and probability
- */
-function summary($detected_langs) {
- $res = Array();
- foreach($detected_langs as $langs) {
- foreach($langs as $l => $weight) {
- if(!array_key_exists($l, $res)) {
- $res[$l] = 0;
- }
- $res[$l] += $weight;
- }
- }
- foreach($res as $l => $sumweight) {
- $res[$l] = $sumweight / count($detected_langs);
- }
- return $res;
-}
-
-/**
- * Helper function for usort: compares two arrays using the first element
- *
- * @param array $entry1 First array to be compared
- * @param array $entry2 Second array to be compared
- * @return number -1, 0 o 1 depening on $entry1[0] being less than, equal to or greater than $entry2[0]
- */
-function sort_weights($entry1, $entry2) {
- $w1 = $entry1[0];
- $w2 = $entry2[0];
- if ($w1 < $w2)
- $ret=1;
- elseif ($w1 == $w2)
- $ret=0;
- else
- $ret=-1;
- return $ret;
-}
-
-/**
- * Given a language mapping, return a list of probable languages
- *
- * @param array $summary Map between language and probabilty
- * @return string[] List of probable languages
- */
-function get_languages($summary) {
- $lst = [];
- foreach($summary as $code => $weight) {
- $lst[] = [$weight, $code];
- }
- usort($lst, 'sort_weights');
- $languages = [];
- $lastweight = 0;
- foreach($lst as $entry) {
- $l = $entry[1];
- $weight = $entry[0];
- if($weight < $lastweight * 2 / 3) {
- break;
- }
- $languages[] = $l;
- $lastweight = $weight;
- }
- return $languages;
-}
-
-/**
- * Returns a list of probable languages for the given instance
- *
- * @param string $host Instance’s hostname (e.g.: "mastodon.bida.im")
- * @return string[] List of probable languages
- */
-function get_instance_langs($host) {
- global $opts;
- $data = get_api($host, '/api/v1/timelines/public?local=true&limit='.$opts['ldtoots']);
- if($data == NULL) {
- return [];
- }
- $detected_langs = array_map('get_toot_languages', $data);
- $summary = summary($detected_langs);
- $languages = get_languages($summary);
- return $languages;
-}
-
-require(__DIR__.'/../site/mustard/include/mb_ucfirst.php');
-
-function langs($instid, $uri, $auto) {
- global $info, $instrow, $link, $opts;
- $retlangs=array();
- $languages=array();
- // even if $auto is true, set it to false (don't do autodection of languages based on last toots) if api/v1/instance returned a language different from the default "en": assume instead it is right, because it has been explicitly set
- if (isset($info['languages'][0]) && $info['languages'][0]!='en')
- $auto=false;
- if ($auto) {
- $languages = get_instance_langs($uri);
- } elseif (akeavinn('languages',$info)) {
- $languages = $info['languages'];
- }
- if (count($languages)==0) {
- return($retlangs);
- } else {
- while (count($languages)>5)
- array_pop($languages);
- foreach ($languages as $key=>$val)
- $languages[$key]=str_replace('-','_',$val);
- if ($auto)
- eecho(1,'detected languages: '.implode(', ',$languages).N);
- else
- eecho(1,'declared languages: '.implode(', ',$languages).N);
- $pos=0;
- foreach($languages as $lang) {
- $res=myq($link,'SELECT * FROM Languages WHERE Code=\''.myesc($link,$lang).'\'',__LINE__);
- if (mysqli_num_rows($res)<1) {
- $code=myesc($link,truncs($lang,'Languages','Code','«'.$instrow['URI'].'»'));
- $NameOrig=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,$lang)),'Languages','NameOrig','«'.$instrow['URI'].'»'));
- $NamePt_BR=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'pt_BR')),'Languages','NamePT_BR','«'.$instrow['URI'].'»'));
- $NameDe=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'de')),'Languages','NameDE','«'.$instrow['URI'].'»'));
- $NameUk=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'uk')),'Languages','NameUK','«'.$instrow['URI'].'»'));
- $NameCa=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'ca')),'Languages','NameCA','«'.$instrow['URI'].'»'));
- $NameEn=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'en')),'Languages','NameEN','«'.$instrow['URI'].'»'));
- $NameEs=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'es')),'Languages','NameES','«'.$instrow['URI'].'»'));
- $NameFr=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'fr')),'Languages','NameFR','«'.$instrow['URI'].'»'));
- $NameGl=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'gl')),'Languages','NameGL','«'.$instrow['URI'].'»'));
- $NameIt=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'it')),'Languages','NameIT','«'.$instrow['URI'].'»'));
- $q = 'INSERT INTO Languages (ID, Code, NameOrig, NamePT_BR, NameDE, NameUK, NameCA, NameEN, NameES, NameFR, NameGL, NameIT) VALUES (NULL, \''.$code.'\', \''.$NameOrig.'\', \''.$NamePt_BR.'\', \''.$NameDe.'\', \''.$NameUk.'\', \''.$NameCa.'\', \''.$NameEn.'\', \''.$NameEs.'\', \''.$NameFr.'\', \''.$NameGl.'\', \''.$NameIt.'\')';
- if (!$opts['dryrun']) {
- myq($link,$q,__LINE__);
- $langid=mysqli_insert_id($link);
- } else {
- $langid=0;
- }
- } else {
- $row=mysqli_fetch_assoc($res);
- $langid=$row['ID'];
- }
- $pos++;
- $retlangs[]=array('InstID'=>$instid,'LangID'=>$langid,'Pos'=>$pos,'Code'=>$lang);
- }
- }
- return($retlangs);
-}
-
-function varbdump($var) {
- ob_start();
- var_dump($var);
- $content=ob_get_contents();
- ob_end_clean();
- return($content);
-}
-
-function mdasortbykey(&$arr,$key,$rev=false) {
- $karr=array();
- foreach ($arr as $akey=>$subarr)
- $karr[round($subarr[$key]*10000000000000,0)]=array($akey,$subarr);
- if (!$rev)
- ksort($karr);
- else
- krsort($karr);
- $arr=array();
- foreach ($karr as $akey=>$subarr)
- $arr[$subarr[0]]=$subarr[1];
-}
-
-require(__DIR__.'/../site/mustard/include/ghs.php');
-
-require(__DIR__.'/../site/mustard/include/ght.php');
-
-/*
- * Nodeinfo ('https://'.$host.'/nodeinfo/2.0.json') was added in v3.0.0
- * Trends ('https://'.$host.'/api/v1/trends') was added in v3.0.0
- * Activity ('https://'.$host.'/api/v1/instance/activity') was added in v2.1.2
-*/
-
-if ($opts['jsonwrite']) {
- if ($recover)
- $mode=array('a','append');
- else
- $mode=array('w','write');
- $jsonf=@fopen($opts['jsonfp'],$mode[0])
- or mexit('could not open file «'.$opts['jsonfp'].'» in '.$mode[1].' mode.',1);
- if ($mode[0]=='w')
- fwrite($jsonf,'{'.N);
-}
-
-$tini=time();
-$cinsts=count($insts);
-$i=0;
-$qok=0;
-$qgood=0;
-if ($recover) {
- $i=$currinst['i'];
- $qok=$currinst['qok'];
- $qgood=$currinst['qgood'];
-}
-$beg=$i;
-while ($i<$cinsts) {
- $now=time();
- $host=$insts[$i];
- @file_put_contents($currinstjfp,$host."\t".$i."\t".$qok."\t".$qgood.N)
- or mexit('could not open «'.$currinstjfp.'» for writing.',1);
- $i++;
- $ismast=null;
- $instans=true;
- $info=null;
- $tela=$now-$tini;
- eecho(1,'working on «'.$host.'»; '.$i.'/'.$cinsts.'; '.$qok.' ok; '.$qgood.' good; '.round(100/$cinsts*$i).'%; elapsed time: '.ght($tela,null,0).'; estimated remaining time: '.ght($tela/$i*($cinsts-$beg)-$tela,null,0).'; mem.: '.ghs(memory_get_usage(true)).'; mem. peak: '.ghs(memory_get_peak_usage(true)).N);
- if (willtrunc($host,'Instances','URI')) {
- eecho(2,'«'.$host.'»: ignoring it because hostname is too long for the «URI» column of «Instances» table.'.N);
- } else {
- eecho(0,'«'.$host.'»: trying to fetch instance info from API...'.N);
- $buf=@gurl('https://'.$host.'/api/v1/instance',$opts['timeout']);
- if ($buf['cont']!==false) {
- ckratelimit($buf['headers']);
- $info=@json_decode($buf['cont'],true);
- if (is_array($info)) {
- eecho(1,'«'.$host.'»: got instance info from API :-)'.N);
- eecho(0,'«'.$host.'»: trying to fetch nodeinfo specs on https...'.N);
- $buf=@gurl('https://'.$host.'/.well-known/nodeinfo',$opts['timeout']);
- if ($buf['cont']===false) {
- eecho(0,'«'.$host.'»: trying to fetch nodeinfo specs on http...'.N);
- $buf=@gurl('http://'.$host.'/.well-known/nodeinfo',$opts['timeout']);
- }
- if ($buf['cont']!==false) {
- $buf=@json_decode($buf['cont'],true);
- if (is_array($buf) && array_key_exists('links',$buf) && is_array($buf['links']) && count($buf['links'])>0) {
- $nirefs=[];
- foreach ($buf['links'] as $key=>$niref)
- if (isset($niref['rel']) && isset($niref['href']))
- $nirefs[$niref['rel']]=$niref['href'];
- else
- eecho(2,'«'.$host.'»: nodeinfo specs link '.$key.' has unexpected format.'.N);
- krsort($nirefs);
- $niref=array_shift($nirefs);
- eecho(0,'«'.$host.'»: got nodeinfo specs; trying to fetch nodeinfo...'.N);
- $buf=@gurl($niref,$opts['timeout']);
- if ($buf['cont']!==false) {
- $buf=@json_decode($buf['cont'],true);
- if (is_array($buf) && isset($buf['software']['name']) && isset($buf['software']['version'])) {
- $info['x-nodeinfo']=$buf;
- if (preg_match('/^mastodon|fedibird|ecko|hometown/',$info['x-nodeinfo']['software']['name'])===1)
- $ismast=true;
- $res=myq($link,'SELECT Name FROM Platforms WHERE Name=\''.myesc($link,$info['x-nodeinfo']['software']['name']).'\'',__LINE__);
- if (mysqli_num_rows($res)<1) {
- if (!$opts['dryrun']) myq($link,'INSERT INTO Platforms (Name) VALUES (\''.myesc($link,truncs($info['x-nodeinfo']['software']['name'],'Platforms','Name','«'.$host.'»')).'\')',__LINE__)
- or mexit(__LINE__.': '.mysqli_error($link).N,3);
- notify('New software found: «'.$host.'» runs on «'.$info['x-nodeinfo']['software']['name'].'»; i added it to the table of known softwares. It would be good to check whether it is a Mastodon derivate and how compatible it is, to decide whether to consider instances using it as Mastodon instances.',2);
- }
- } else {
- eecho(2,'«'.$host.'»: nodeinfo was not good json or json had unexpected format.'.N);
- }
- }
- } else {
- eecho(2,'«'.$host.'»: nodeinfo specs where not good json or json had unexpected format.'.N);
- }
- } else {
- eecho(2,'«'.$host.'»: could not retrieve nodeinfo specs.'.N);
- }
- if (array_key_exists('version',$info)) {
- eecho(1,'«'.$host.'» software version is «'.$info['version'].'».'.N);
- if ($info['version']>='2.1.2') {
- eecho(0,'«'.$host.'»: trying to fetch instance activity info from API...'.N);
- $buf=@gurl('https://'.$host.'/api/v1/instance/activity',$opts['timeout']);
- if ($buf['cont']!==false) {
- ckratelimit($buf['headers']);
- eecho(1,'«'.$host.'»: got instance activity info from API :-)'.N);
- $info['x-activity']=json_decode($buf['cont'],true);
- } else {
- eecho(2,'«'.$host.'»: could not fetch instance activity from API: '.$buf['emsg'].N);
- }
- }
- if ($info['version']>='3.0.0') {
- eecho(0,'«'.$host.'»: trying to fetch instance trends info from API...'.N);
- $buf=@gurl('https://'.$host.'/api/v1/trends',$opts['timeout']);
- if ($buf['cont']!==false) {
- ckratelimit($buf['headers']);
- eecho(1,'«'.$host.'»: got instance trends info from API :-)'.N);
- $info['x-trends']=json_decode($buf['cont'],true);
- } else {
- eecho(2,'«'.$host.'»: could not fetch instance trends from API: '.$buf['emsg'].N);
- }
- }
- }
- } else {
- $instans=false;
- eecho(2,'«'.$host.'»: fetched data were not good JSON.'.N);
- }
- } else {
- $instans=false;
- eecho(2,'«'.$host.'»: could not fetch instance info from API: '.$buf['emsg'].N);
- }
- if (!isset($info['uri']) || preg_match('#^\s*$#',$info['uri'])===1)
- $instans=false;
- if (is_array($info) && count($info)>0) {
- //echo('json dump of all fetched info:'.N.json_encode($info,JSON_PRETTY_PRINT).N);
- if ($opts['jsonwrite'])
- fwrite($jsonf,'"'.$host.'": '.json_encode($info,JSON_PRETTY_PRINT).','.N);
- }
- if (!$instans) {
- // this is the limbo of non-responding instances
- $res=myq($link,'SELECT * FROM Instances WHERE URI=\''.myesc($link,$host).'\'',__LINE__);
- $nrows=mysqli_num_rows($res);
- if ($nrows==1) {
- eecho(1,'«'.$host.'»: didn’t respond, but it is present in the database; updating InstChecks, Instances.LastCheckOk and possibly Instances.New=0 and Instances.Dead=1.'.N);
- $row=mysqli_fetch_assoc($res);
- $instid=$row['ID'];
- if (!$opts['dryrun']) myq($link,'UPDATE Instances SET LastCheckOk=0 WHERE ID='.$instid,__LINE__);
- if ($row['New']==1 && !is_null($row['FirstSeen']) && $now-$row['FirstSeen']>$opts['oldline']) {
- notify('Instance «'.$row['URI'].'» is no longer new.',2);
- if (!$opts['dryrun']) myq($link,'UPDATE Instances SET New=0 WHERE ID='.$instid,__LINE__);
- }
-
- // we check the last time instance responded, if ever
- $rres=myq($link,'SELECT Time FROM InstChecks WHERE InstID='.$instid.' AND Status=1 ORDER BY Time DESC LIMIT 1',__LINE__);
- // if instance never responded we consider the time of first check
- if (mysqli_num_rows($rres)==0)
- $rres=myq($link,'SELECT Time FROM InstChecks WHERE InstID='.$instid.' AND Status=0 ORDER BY Time ASC LIMIT 1',__LINE__);
- if (mysqli_num_rows($rres)>0) {
- $rrow=mysqli_fetch_assoc($rres);
- if ($now-$rrow['Time']>$opts['deadline']) {
- if (!$opts['dryrun']) myq($link,'UPDATE Instances SET Dead=1 WHERE ID='.$instid,__LINE__);
- notify('Instance «'.$row['URI'].'» is dead!',2);
- }
- } else {
- eecho(2,'«'.$host.'»: exists in the database but there’s no data about it in InstChecks!'.N);
- }
- } elseif ($nrows==0) {
- eecho(1,'«'.$host.'»: doesn’t respond and is not in the database, adding it.'.N);
- // "FirstSeen=NULL" because it's not seen until it responds for the first time
- if (!$opts['dryrun']) {
- myq($link,'INSERT INTO Instances SET FirstSeen=NULL, New=1, Good=0, Chosen=0, Visible=0, Noxious=0, URI=\''.myesc($link,$host).'\', LastCheckOk=0, InsertTS='.$now,__LINE__);
- $instid=mysqli_insert_id($link);
- myq($link,'INSERT INTO InstChecks SET InstID='.$instid.', Time='.$now.', Status=0',__LINE__);
- } else {
- $instid=0;
- }
- } else {
- notify('Instance «'.$host.'» has '.$nrows.' entries in «Instances» table!',3);
- }
- if (!$opts['dryrun']) myq($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$instid.', '.$now.', 0)',__LINE__);
- } else {
-
- // instance responded
-
- if (is_null($ismast)) {
- if (array_key_exists('pleroma',$info)) {
- $ismast=false;
- } elseif (preg_match('#(pleroma|pixelfed)#i',$info['version'])==1) {
- $ismast=false;
- } elseif (preg_match('#^[0-9]+\.[0-9]+\.[0-9]+#',$info['version'])!==1) {
- $ismast=false;
- }
- }
-
- $qok++;
- if (!is_null($ismast))
- ($ismast) ? $ismast=1 : $ismast=0;
- $instrow=array('ID'=>null, 'FirstSeen'=>null, 'IsMastodon'=>$ismast, 'Dead'=>0, 'New'=>0, 'Good'=>0, 'Chosen'=>0, 'Priority'=>null, 'Visible'=>0, 'Noxious'=>0, 'NoxReason'=>null, 'NoxLastModTS'=>null, 'URI'=>null, 'Title'=>null, 'ShortDesc'=>null, 'LongDesc'=>null, 'OurDesc'=>null, 'OurDescEN'=> null, 'LocalityID'=>null, 'OurLangsLock'=>0, 'Email'=>null, 'Software'=>null, 'Version'=>null, 'UserCount'=>null, 'StatusCount'=>null, 'DomainCount'=>null, 'ActiveUsersMonth'=>null, 'ActiveUsersHalfYear'=>null, 'Thumb'=>null, 'RegOpen'=>null, 'RegReqApproval'=>null, 'MaxTootChars'=>null, 'AdmAccount'=>null, 'AdmDisplayName'=>null, 'AdmCreatedAt'=>null, 'AdmNote'=>null, 'AdmURL'=>null, 'AdmAvatar'=>null, 'AdmHeader'=>null, 'LastCheckOk'=>1, 'GuestID'=>null, 'LastGuestEdit'=>null);
- $instrow['URI']=$host;
- if (akeavinn('title',$info))
- $instrow['Title']=nempty(truncs($info['title'],'Instances','Title','«'.$instrow['URI'].'»'));
- if (akeavinn('short_description',$info))
- $instrow['ShortDesc']=nempty(truncs($info['short_description'],'Instances','ShortDesc','«'.$instrow['URI'].'»'));
- if (akeavinn('description',$info))
- $instrow['LongDesc']=nempty(truncs($info['description'],'Instances','LongDesc','«'.$instrow['URI'].'»'));
- if (akeavinn('email',$info))
- $instrow['Email']=nempty(truncs($info['email'],'Instances','Email','«'.$instrow['URI'].'»'));
- if (akeavinn('version',$info))
- $instrow['Version']=nempty(truncs($info['version'],'Instances','Version','«'.$instrow['URI'].'»'));
- if (akeavinn('stats',$info)) {
- if (akeavinn('user_count',$info['stats']))
- $instrow['UserCount']=truncn($info['stats']['user_count'],'Instances','UserCount','«'.$instrow['URI'].'»');
- if (akeavinn('status_count',$info['stats']))
- $instrow['StatusCount']=truncn($info['stats']['status_count'],'Instances','StatusCount','«'.$instrow['URI'].'»');
- if (akeavinn('domain_count',$info['stats']))
- $instrow['DomainCount']=truncn($info['stats']['domain_count'],'Instances','DomainCount','«'.$instrow['URI'].'»');
- }
- if (akeavinn('thumbnail',$info))
- $instrow['Thumb']=nempty(truncs($info['thumbnail'],'Instances','Thumb','«'.$instrow['URI'].'»'));
- if (akeavinn('max_toot_chars',$info))
- $instrow['MaxTootChars']=truncn($info['max_toot_chars'],'Instances','MaxTootChars','«'.$instrow['URI'].'»');
- if (akeavinn('registrations',$info))
- $instrow['RegOpen']=b2i($info['registrations'],'Istanza «'.$instrow['URI'].'»: ');
- if (akeavinn('approval_required',$info))
- $instrow['RegReqApproval']=b2i($info['approval_required'],'Istanza «'.$instrow['URI'].'»: ');
- if (akeavinn('contact_account',$info)) {
- if (akeavinn('acct',$info['contact_account']))
- $instrow['AdmAccount']=nempty(truncs($info['contact_account']['acct'],'Instances','AdmAccount','«'.$instrow['URI'].'»'));
- if (akeavinn('display_name',$info['contact_account']))
- $instrow['AdmDisplayName']=nempty(truncs($info['contact_account']['display_name'],'Instances','AdmDisplayName','«'.$instrow['URI'].'»'));
- if (akeavinn('created_at',$info['contact_account']))
- $instrow['AdmCreatedAt']=pgdatetomy($info['contact_account']['created_at']);
- if (akeavinn('note',$info['contact_account']))
- $instrow['AdmNote']=nempty(truncs($info['contact_account']['note'],'Instances','AdmNote','«'.$instrow['URI'].'»'));
- if (akeavinn('url',$info['contact_account']))
- $instrow['AdmURL']=nempty(truncs($info['contact_account']['url'],'Instances','AdmURL','«'.$instrow['URI'].'»'));
- if (akeavinn('avatar',$info['contact_account']))
- $instrow['AdmAvatar']=nempty(truncs($info['contact_account']['avatar'],'Instances','AdmAvatar','«'.$instrow['URI'].'»'));
- if (akeavinn('header',$info['contact_account']))
- $instrow['AdmHeader']=nempty(truncs($info['contact_account']['header'],'Instances','AdmHeader','«'.$instrow['URI'].'»'));
- }
- if (akeavinn('x-nodeinfo',$info)) {
- if (akeavinn('software',$info['x-nodeinfo']) && akeavinn('name',$info['x-nodeinfo']['software']))
- $instrow['Software']=nempty(truncs($info['x-nodeinfo']['software']['name'],'Instances','Software','«'.$instrow['URI'].'»'));
- if (akeavinn('usage',$info['x-nodeinfo']) && akeavinn('users',$info['x-nodeinfo']['usage'])) {
- if (akeavinn('activeMonth',$info['x-nodeinfo']['usage']['users']))
- $instrow['ActiveUsersMonth']=truncn($info['x-nodeinfo']['usage']['users']['activeMonth'],'Instances','ActiveUsersMonth','«'.$instrow['URI'].'»');
- if (akeavinn('activeHalfyear',$info['x-nodeinfo']['usage']['users']))
- $instrow['ActiveUsersHalfYear']=truncn($info['x-nodeinfo']['usage']['users']['activeHalfyear'],'Instances','ActiveUsersHalfYear','«'.$instrow['URI'].'»');
- }
- }
-
- $whynot=array();
- if (is_null($instrow['RegOpen'])) {
- $whynot[]='we don’t know if it allows registrations';
- } elseif ($instrow['RegOpen']==0) {
- $whynot[]='it doesn’t allow registrations';
- }
- if (is_null($instrow['UserCount'])) {
- $whynot[]='we don’t know its total users number';
- } elseif ($instrow['UserCount']<10 || $instrow['UserCount']>30000) {
- $whynot[]='total users number is not greater than 10 and less than 30000';
- }
- if (is_null($instrow['DomainCount'])) {
- $whynot[]='we don’t know the number of other instances it knows';
- } elseif ($instrow['DomainCount']<500) {
- $whynot[]='the number of other instances it knows is less than 500';
- }
- if (!is_null($instrow['ActiveUsersMonth'])) {
- if ($instrow['ActiveUsersMonth']<10)
- $whynot[]='the number of active users for the last month is less than 10';
- } elseif (!is_null($instrow['StatusCount']) && $instrow['UserCount']>0 && $instrow['StatusCount']/$instrow['UserCount']<10) {
- $whynot[]='the average number of toots for user is less than 10';
- } else {
- $whynot[]='it was impossible to detect the number of active users for the last month or the average number of toots for user';
- }
- if (count($whynot)==0) {
- $instrow['Good']=1;
- eecho(1,'«'.$host.'»: this is a suitable instance! :-)'.N);
- $qgood++;
- } else {
- eecho(1,'«'.$host.'»: this is not a suitable instance: '.implode('; ',$whynot).' :-('.N);
- }
-
- $res=myq($link,'SELECT * FROM Instances WHERE URI=\''.myesc($link,$instrow['URI']).'\'',__LINE__);
-
- $nrows=mysqli_num_rows($res);
- if ($nrows==1) {
- eecho(1,'«'.$instrow['URI'].'»: is already present in the database, updating it...'.N);
- $oldinstrow=mysqli_fetch_assoc($res);
- $instid=$oldinstrow['ID'];
- $instrow['ID']=$oldinstrow['ID'];
- // if the instance already present in the db has FirstSeen=NULL, this means this is the first time it responds, so...
- if (is_null($oldinstrow['FirstSeen'])) {
- $instrow['FirstSeen']=$now;
- $instrow['New']=1;
- } else {
- $instrow['FirstSeen']=$oldinstrow['FirstSeen'];
- if ($oldinstrow['New']==1) {
- $instrow['New']=1;
- if ($now-$oldinstrow['FirstSeen']>$opts['oldline']) {
- $instrow['New']=0;
- notify('Instance «'.$instrow['URI'].'» is no longer new.',2);
- }
- }
- }
-
- if ($instrow['Good']==1 && $oldinstrow['Good']==0) {
- notify('Instance «'.$instrow['URI'].'» wasn’t suitable, but it is now!',1);
- } elseif ($instrow['Good']==0 && $oldinstrow['Good']==1) {
- notify('Instance «'.$instrow['URI'].'» was suitable, but it’s no longer for these reasons: '.implode('; ',$whynot),1);
- }
- $instrow['Chosen']=$oldinstrow['Chosen'];
- $instrow['Priority']=$oldinstrow['Priority'];
- $instrow['Visible']=$oldinstrow['Visible'];
- $instrow['Noxious']=$oldinstrow['Noxious'];
- $instrow['NoxReason']=$oldinstrow['NoxReason'];
- $instrow['NoxLastModTS']=$oldinstrow['NoxLastModTS'];
- if ($instrow['ShortDesc']!=$oldinstrow['ShortDesc'])
- notify('«Short description» of instance «'.$instrow['URI'].'» has changed.',1);
- if ($instrow['LongDesc']!=$oldinstrow['LongDesc'])
- notify('«Long description» of instance «'.$instrow['URI'].'» has changed.',1);
- $instrow['OurDesc']=$oldinstrow['OurDesc'];
- $instrow['OurDescEN']=$oldinstrow['OurDescEN'];
- $instrow['LocalityID']=$oldinstrow['LocalityID'];
- $instrow['OurLangsLock']=$oldinstrow['OurLangsLock'];
- $instrow['GuestID']=$oldinstrow['GuestID'];
- $instrow['LastGuestEdit']=$oldinstrow['LastGuestEdit'];
- $query='UPDATE Instances SET ';
- foreach ($instrow as $field=>$value) {
- if (!is_null($value))
- $query.=$field.'=\''.myesc($link,$value).'\', ';
- else
- $query.=$field.'=NULL, ';
- }
- $query=substr($query,0,-2).' WHERE Instances.ID='.$instrow['ID'];
- eecho(1,'«'.$host.'»: update query: «'.$query.'».'.N);
- if (!$opts['dryrun']) myq($link,$query,__LINE__);
-
- $res=myq($link,'SELECT InstID, LangID, Pos, Code FROM InstLangs LEFT JOIN Languages ON Languages.ID=LangID WHERE InstID='.$instrow['ID'].' ORDER BY Pos ASC',__LINE__);
- $oldinstlangs=array();
- while ($row=mysqli_fetch_assoc($res))
- $oldinstlangs[]=$row;
- $instlangs=langs($instrow['ID'], $instrow['URI'], false);
- if ($instlangs!=$oldinstlangs) {
- notify('The list of languages declared by instance «'.$instrow['URI'].'» has changed from «'.subarimp(', ','Code',$oldinstlangs).'» to «'.subarimp(', ','Code',$instlangs).'».',1);
- if (!$opts['dryrun']) {
- myq($link,'DELETE FROM InstLangs WHERE InstID='.$instrow['ID'],__LINE__);
- foreach ($instlangs as $row)
- myq($link,'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')',__LINE__);
- }
- }
-
- if ($instrow['OurLangsLock']==0) {
- $instourlangs=langs($instrow['ID'], $instrow['URI'], true);
- // if instourlangs is empty and instlangs is not, set instourlangs as instlangs
- if (count($instourlangs)==0 && count($instlangs)>0)
- $instourlangs=$instlangs;
- if (count($instourlangs)>0) {
- if (!$opts['dryrun']) {
- myq($link,'DELETE FROM InstOurLangs WHERE InstID='.$instrow['ID'],__LINE__);
- foreach ($instourlangs as $row)
- myq($link,'INSERT INTO InstOurLangs (InstID, OurLangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')',__LINE__);
- }
- }
- }
-
- } elseif ($nrows==0) {
- eecho(1,'«'.$host.'» is not present in the database, adding it...'.N);
- $instrow['FirstSeen']=$now;
- if ($opts['setnew'])
- $instrow['New']=1;
- $fields=array();
- $values='';
- foreach ($instrow as $field=>$value) {
- $fields[]=$field;
- if (!is_null($value))
- $values.='\''.myesc($link,$value).'\', ';
- else
- $values.='NULL, ';
- }
- $values=substr($values,0,-2);
- $query='INSERT INTO Instances ('.implode(', ',$fields).', InsertTS) VALUES ('.$values.', '.$now.')';
- eecho(1,'«'.$host.'»: insert query: «'.$query.'»'.N);
- if (!$opts['dryrun']) {
- myq($link,$query,__LINE__);
- $instid=mysqli_insert_id($link);
- } else {
- $instid=0;
- }
- if ($opts['setnew'] && !$opts['dryrun'])
- notify('New instance found: «'.$instrow['URI'].'».',1);
-
- $instlangs=langs($instid, $instrow['URI'], false);
- if (!$opts['dryrun']) {
- foreach ($instlangs as $row)
- myq($link,'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')',__LINE__);
- }
-
- $instourlangs=langs($instid, $instrow['URI'], true);
- // if instourlangs is empty and instlangs is not, set instourlangs as instlangs
- if (count($instourlangs)==0 && count($instlangs)>0)
- $instourlangs=$instlangs;
- if (!$opts['dryrun']) {
- foreach ($instourlangs as $row)
- myq($link,'INSERT INTO InstOurLangs (InstID, OurLangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')',__LINE__);
- }
-
- if ($instrow['Good']==1)
- notify('New instance «'.$instrow['URI'].'» is suitable!',1);
-
- } else {
- notify('Instance «'.$host.'» has '.$nrows.' entries in «Instances» table!',3);
- }
-
- if (array_key_exists('x-activity',$info) && is_array($info['x-activity'])) {
- if (!$opts['dryrun']) {
- myq($link,'DELETE FROM InstActivity WHERE InstID='.$instid,__LINE__);
- $pos=0;
- foreach ($info['x-activity'] as $buf) {
- if (akeavinn('week',$buf) && akeavinn('statuses',$buf) && akeavinn('logins',$buf) && akeavinn('registrations',$buf)) {
- $pos++;
- $query='INSERT INTO InstActivity (InstID, Week, Statuses, Logins, Registrations, Pos) VALUES (\''.$instid.'\', \''.myesc($link,$buf['week']).'\', \''.myesc($link,$buf['statuses']).'\', \''.myesc($link,$buf['logins']).'\', \''.myesc($link,$buf['registrations']).'\', '.$pos.')';
- myq($link,$query,__LINE__);
- }
- }
- }
- }
-
- if (array_key_exists('x-trends',$info) && is_array($info['x-trends'])) {
- $trends=array();
- foreach ($info['x-trends'] as $buf) {
- if (akeavinn('name',$buf) && akeavinn('url',$buf) && akeavinn('history',$buf) && is_array($buf['history'])) {
- $trend=0;
- foreach ($buf['history'] as $row) {
- if ($row['uses']>0)
- $trend+=($row['accounts']/$row['uses']);
- }
- $trends[]=array(
- 'InstID'=>$instid,
- 'LastDay'=>$buf['history'][0]['day'],
- 'Name'=>$buf['name'],
- 'URL'=>$buf['url'],
- 'Pos'=>null,
- 'trend'=>$trend
- );
- }
- }
- mdasortbykey($trends,'trend',true);
- //print_r($trends);
- if (!$opts['dryrun']) myq($link,'DELETE FROM InstTrends WHERE InstID='.$instid,__LINE__);
- $pos=0;
- foreach ($trends as $trend) {
- $pos++;
- $query='INSERT INTO InstTrends (InstID, LastDay, Name, URL, Pos) VALUES ('.$trend['InstID'].', \''.$trend['LastDay'].'\', \''.myesc($link,truncs($trend['Name'],'InstTrends','Name','«'.$instrow['URI'].'»')).'\', \''.myesc($link,truncs($trend['URL'],'InstTrends','URL','«'.$instrow['URI'].'»')).'\', '.$pos.')';
- if (!$opts['dryrun']) myq($link,$query,__LINE__);
- }
- }
- if (!$opts['dryrun']) myq($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$instid.', '.$now.', 1)',__LINE__);
-
- if ($opts['fetchusers'] && $ismast && array_key_exists('version',$info) && $info['version']>='4.0.0') {
- eecho(0,'«'.$host.'»: trying to fetch users info from directory API...'.N);
- $exusers=[];// array of this instance's users already existing in the db
- $res=myq($link,'SELECT ID, locid, username FROM Users WHERE InstID='.$instid,__LINE__);
- while ($row=mysqli_fetch_assoc($res)) $exusers[$row['locid']]=$row;
- $users=[];// array of users in this instance's directory
- $chunk=0;
- $limit=80;
- $end=false;
- while (!$end) {
- $offset=$chunk*$limit;
- $buf=@gurl('https://'.$host.'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['timeout']);
- if ($buf['cont']!==false) {
- ckratelimit($buf['headers']);
- eecho(1,'«'.$host.'»: got '.($chunk+1).' chunk(s) of users info from directory API :-)'.N);
- $buf=@json_decode($buf['cont'],true);
- if (is_array($buf)) {
- //print_r($buf);
- if (count($buf)<$limit) $end=true;
- /*if (count($buf)>0 && !array_key_exists('noindex',$buf[0])) {
- eecho(2,'«'.$host.'»: account entities reported by directory api endpoint don’t have a “noindex” attribute; skipping directory fetching.'.N);
- break;
- } else {
- eecho(0,'«'.$host.'»: account entities reported by directory api endpoint do have a “noindex” attribute; continuing with directory fetching.'.N);
- }*/
- //foreach ($buf as $user) echo($user['username'].' '); echo(N.N);
- foreach ($buf as $user) {
- if (make(['id', 'username', 'display_name', 'locked', 'bot', 'discoverable', 'created_at', 'note', 'url', 'avatar', 'header', 'statuses_count', 'last_status_at', 'fields', 'noindex'], $user)) {
- eecho(0,'«'.$host.'» ('.$i.'/'.$cinsts.'): working on user «'.$user['username'].'»...'.N);
- // disabled because it takes too long on instances with many users
- /*if (!isset($user['noindex'])) {
- $user['noindex']=true;
- eecho(0,'«'.$host.'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it by fetching user’s profile page...'.N);
- $page=gurl($user['url'],$opts['timeout']);
- // here ckratelimit is not needed because it's a normal web page, not json from mastodon api
- if ($page['cont']!==false) {
- //
- if (preg_match('/='3.3.0') {
- eecho(0,'«'.$host.'»: trying to fetch tags for user «'.$user['username'].'»...'.N);
- $tags=@gurl('https://'.$host.'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['timeout']);
- if ($tags['cont']!==false) {
- ckratelimit($tags['headers']);
- $tags=@json_decode($tags['cont'],true);
- if (is_array($tags) && count($tags)>0) {
- eecho(1,'«'.$host.'»: got '.count($tags).' tag(s) for user «'.$user['username'].'» :-)'.N);
- foreach($tags as $tag) $user['tags'][]=$tag['name'];
- }
- } else {
- eecho(2,'«'.$host.'»: could not fetch tags for user «'.$user['username'].'» :-( ('.$tags['emsg'].').'.N);
- }
- }
- $user['tags']=implode(';',$user['tags']);
- if ($user['tags']=='') $user['tags']=null;*/
- $user['tags']=null;
- if (!is_null($user['created_at'])) $user['created_at']=pgdatetomy($user['created_at']);
- if (!is_null($user['last_status_at'])) $user['last_status_at']=datetomy($user['last_status_at']);
- $users[$user['id']]=$user;
- } else {
- eecho(2,'«'.$host.'»: user record missed some required keys :-('.N);
- //print_r($user);
- }
- }
- } else {
- eecho(2,'«'.$host.'»: ... but the chunk was not good JSON :-('.N);
- $end=true;
- }
- $chunk++;
- } else {
- eecho(2,'«'.$host.'»: could not fetch users info from directory API: '.$buf['emsg'].N);
- $end=true;
- }
- }
- foreach ($users as $locid=>$user) {
- $query='SET InstID='.$instid.', host='.myv($link,$host).', locid='.myv($link,$user['id']).', username='.myv($link,truncs($user['username'], 'Users', 'username', '«'.$host.'»: «'.$user['username'].'»')).', display_name='.myv($link,truncs($user['display_name'], 'Users', 'display_name', '«'.$host.'»: «'.$user['username'].'»')).', locked='.myv($link,$user['locked']).', bot='.myv($link,$user['bot']).', created_at='.myv($link,$user['created_at']).', note='.myv($link,truncs($user['note'], 'Users', 'note', '«'.$host.'»: «'.$user['username'].'»')).', url='.myv($link,truncs($user['url'], 'Users', 'url', '«'.$host.'»: «'.$user['username'].'»')).', avatar='.myv($link,truncs($user['avatar'], 'Users', 'avatar', '«'.$host.'»: «'.$user['username'].'»')).', header='.myv($link,truncs($user['header'], 'Users', 'header', '«'.$host.'»: «'.$user['username'].'»')).', statuses_count='.myv($link,$user['statuses_count']).', last_status_at='.myv($link,$user['last_status_at']).', tags='.myv($link,truncs($user['tags'], 'Users', 'tags', '«'.$host.'»: «'.$user['username'].'»'));
- $uid=0;
- if (!array_key_exists($user['id'],$exusers)) {
- if (!$user['noindex']) {
- eecho(0,'«'.$host.'»: inserting new user «'.$user['username'].'»...'.N);
- $query='INSERT INTO Users '.$query;
- if (!$opts['dryrun']) {
- myq($link,$query,__LINE__);
- $uid=mysqli_insert_id($link);
- }
- } else {
- eecho(0,'«'.$host.'»: NOT inserting user «'.$user['username'].'» because they don’t want to be indexed...'.N);
- }
- } else {
- $uid=$exusers[$locid]['ID'];
- if (!$user['noindex']) {
- eecho(0,'«'.$host.'»: updating existing user «'.$user['username'].'» ('.$uid.')...'.N);
- $query='UPDATE Users '.$query.' WHERE ID='.$uid;
- } else {
- eecho(0,'«'.$host.'»: deleting existing user «'.$user['username'].'» ('.$uid.') because they don’t want to be indexed...'.N);
- $query='DELETE FROM Users WHERE ID='.$uid;
- }
- if (!$opts['dryrun']) {
- myq($link,$query,__LINE__);
- myq($link,'DELETE FROM UsersFields WHERE UserID='.$uid,__LINE__);
- }
- }
- if ($uid!=0 && !$user['noindex'] && is_array($user['fields']) && count($user['fields'])>0) {
- eecho(0,'«'.$host.'»: saving user fields for user «'.$user['username'].'» ('.$uid.')...'.N);
- foreach ($user['fields'] as $field) {
- (is_null($field['verified_at'])) ? $field['verified_at']=0 : $field['verified_at']=1;
- $field['name']=truncs($field['name'],'UsersFields','name','«'.$host.'»: «'.$user['username'].'»');
- $field['value']=truncs($field['value'],'UsersFields','value','«'.$host.'»: «'.$user['username'].'»');
- if (!$opts['dryrun']) myq($link,'INSERT INTO UsersFields SET UserID='.$uid.', name='.myv($link,$field['name']).', value='.myv($link,$field['value']).', verified='.$field['verified_at'],__LINE__);
- }
- }
- }
- foreach ($exusers as $locid=>$exuser) {
- if (!array_key_exists($locid,$users)) {
- eecho(0,'«'.$host.'»: user «'.$exusers[$locid]['username'].'» opted out of the directory, deleting their record ('.$exuser['ID'].')...'.N);
- if (!$opts['dryrun']) {
- myq($link,'DELETE FROM Users WHERE ID='.$exuser['ID'],__LINE__);
- myq($link,'DELETE FROM UsersFields WHERE UserID='.$exuser['ID'],__LINE__);
- }
- }
- }
- }
-
-
-
- }
- }
-}
-
-mysqli_close($link);
-unset($link);
-
-if ($opts['jsonwrite']) {
- fwrite($jsonf,'"The end?": true'.N.'}'.N);
- fclose($jsonf);
-}
-
-unlink($instsjfp);
-unlink($currinstjfp);
-unlink($lockfp);
-
-eecho(1,'Done (in '.ght(time()-$tini,null,0).') :-)'.N);
-
-exit(0);
-
-// "multi array_key_exists"
-function make($keys,&$arr) {
- foreach ($keys as $key)
- if (!array_key_exists($key,$arr))
- return(false);
- return(true);
-}
-
-function myv(&$link,$var) {
- if (is_null($var)) {
- return('NULL');
- } elseif (is_bool($var)) {
- if ($var)
- return('1');
- else
- return('0');
- } elseif (trim($var)=='') {
- return('NULL');
- } else {
- return('\''.mysqli_real_escape_string($link,$var).'\'');
- }
-}
-
-function datetomy($date) {
- $date=explode('-',$date);
- return(mktime(0,0,0,$date[1],$date[2],$date[0]));
-}
-
-function ckratelimit($httpresphead) {
- $headers=explode("\r\n",$httpresphead);
- $buff=[];
- array_shift($headers);
- foreach ($headers as $header)
- if (preg_match('/^([^:]+):(.*)$/Uu',$header,$matches)===1)
- $buff[strtolower($matches[1])]=trim($matches[2]);
- $headers=$buff;
- if (array_key_exists('x-ratelimit-reset',$headers)) {
- if (array_key_exists('date',$headers)) {
- //Wed, 30 Mar 2022 21:27:22 GMT
- $srvnow=strtotime($headers['date']);
- //2022-03-31T04:05:00.058705Z
- $srvrlr=strtotime($headers['x-ratelimit-reset']);
- $stosl=$srvrlr-$srvnow+1;
- //echo('ckratelimit: x-ratelimit-remaining: '.$headers['x-ratelimit-remaining'].'; $srvnow: '.gmdate('c',$srvnow).'; $srvrlr: '.gmdate('c',$srvrlr).'; current time to sleep: '.$stosl.'.'.N);
- if ($headers['x-ratelimit-remaining']<3) {
- eecho(2,'reached rate limit, sleeping for '.$stosl.' seconds ...'.N);
- sleep($stosl);
- }
- } else {
- eecho(2,'ckratelimit: $httpresphead did not contain a «date» header!'.N);
- }
- } else {
- eecho(2,'ckratelimit: $httpresphead did not contain an «x-ratelimit-reset» header!'.N);
- }
+ eecho(1,$msg);
+ if (isset($logf) && $logf!==false) fclose($logf);
+ exit($code);
}
?>