1286 lines
53 KiB
PHP
Executable file
1286 lines
53 KiB
PHP
Executable file
#!/usr/bin/php
|
||
<?php
|
||
|
||
/*
|
||
This program is free software: you can redistribute it and/or modify
|
||
it under the terms of the GNU General Public License as published by
|
||
the Free Software Foundation, either version 3 of the License, or
|
||
(at your option) any later version.
|
||
|
||
This program is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
GNU General Public License for more details.
|
||
|
||
You should have received a copy of the GNU General Public License
|
||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
*/
|
||
|
||
const N="\n";
|
||
|
||
require(__DIR__.'/../site/mustard/include/getfc.php');
|
||
|
||
require(__DIR__.'/lib/vendor/autoload.php');
|
||
use LanguageDetection\Language;
|
||
|
||
(strtoupper(substr(PHP_OS,0,3))==='WIN') ? $iswin=true : $iswin=false;
|
||
|
||
function eecho($lev,$msg) {
|
||
$time=microtime(false);
|
||
$time=explode(' ',$time);
|
||
$time=date('Y-m-d H:i:s',$time[1]).'.'.substr($time[0],2);
|
||
$levs=['Debug', 'Info', 'Warning', 'Error'];
|
||
$msg=$time.' '.$levs[$lev].': '.$msg;
|
||
if ($lev<2)
|
||
echo($msg);
|
||
else
|
||
fwrite(STDERR,$msg);
|
||
}
|
||
|
||
function mexit($msg,$code) {
|
||
global $link, $jsonf, $lockfp;
|
||
if (isset($link)) mysqli_close($link);
|
||
if (isset($jsonf)) fclose($jsonf);
|
||
if (isset($lockfp) && is_file($lockfp)) unlink($lockfp);
|
||
if ($code!=0)
|
||
eecho(3,$msg);
|
||
else
|
||
eecho(1,$msg);
|
||
exit($code);
|
||
}
|
||
|
||
declare(ticks=1);
|
||
if (function_exists('pcntl_signal')) {
|
||
function signalHandler($signal) {
|
||
echo(N);
|
||
mexit('received signal «'.$signal.'», shutting down.'.N,0);
|
||
}
|
||
pcntl_signal(SIGTERM,'signalHandler');// Termination ('kill' was called)
|
||
pcntl_signal(SIGHUP,'signalHandler');// Terminal log-out
|
||
pcntl_signal(SIGINT,'signalHandler');// Interrupted (Ctrl-C is pressed)
|
||
}
|
||
|
||
$opts=[
|
||
'timeout'=>10,
|
||
'deadline'=>60*24*60*60,// if an instance has not been responding for more than this value of seconds (currently 60 days), declare it dead
|
||
'oldline'=>30*24*60*60,// if an instance has been new for a period longer than this amount (currently 30 days), it's no longer new
|
||
'ldtoots'=>40,// number of toots to check with the automatic language detection function
|
||
'setnew'=>true,
|
||
'dryrun'=>false,
|
||
'jsonfp'=>__DIR__.'/instances.json',
|
||
'jsonwrite'=>false,
|
||
'peersfp'=>null,
|
||
'dontrestore'=>false,
|
||
'ignorelock'=>false,
|
||
'fetchusers'=>false,
|
||
'moreclauses'=>''
|
||
];
|
||
|
||
$help='crawler.php
|
||
DESCRIPTION
|
||
This script updates mastostart’s database with the data it manages to
|
||
retrieve from instances already present in the database plus (optionally)
|
||
those listed in a specifiable file (typically the output file from a
|
||
peerscrawl.php run).
|
||
SYNOPSIS
|
||
crawler.php [options]
|
||
OPTIONS
|
||
-p, --peersfp <file>
|
||
Sets a file containing a list of instances to consider in addition to those
|
||
which are already present in the database.
|
||
Note that this option is ignored if the script will recover a previous
|
||
unfinished session.
|
||
-f, --fetchusers
|
||
*Currently experimental*: if this option is set, the script will try and
|
||
fetch users’ profiles infos from each considered instance’s user directory
|
||
and store them in the database.
|
||
-t, --timeout <seconds>
|
||
Sets the timeout in seconds for every connection attempt.
|
||
DEFAULT: «'.$opts['timeout'].'»
|
||
-N, --dontsetnew
|
||
If this option is set, the script won’t mark new instances as new. This can
|
||
be useful for a first run.
|
||
-I, --ignorelock
|
||
Normally, if its lockfile exists, the script will exit with an error.
|
||
If this option is set, the lockfile existence will be ignored.
|
||
Warning: check that the script is actually not running yet before using
|
||
this option.
|
||
-R, --dontrestore
|
||
If this option is set and «instances.job» and «currinst.job» files from
|
||
a previous unfinished session are present, the script will ignore them
|
||
and start a new session.
|
||
-d, --dryrun
|
||
If this option is set, the script won’t write anything in the database.
|
||
-j, --jsonwrite
|
||
If this option is set, the script will write an «instances.json» file
|
||
containing all the data it could retrieve from every considered instance.
|
||
-m, --moreclauses <more SQL clauses>
|
||
If this option is set, whatever one writes as argument to the option will
|
||
be added to the main query for instances’ records, which is
|
||
«SELECT URI FROM Instances WHERE Dead=0», so one can limit the crawl more.
|
||
-h, --help
|
||
If this option is set, the script will show this help text and exit.
|
||
|
||
This program comes with ABSOLUTELY NO WARRANTY; for details see the source.
|
||
This is free software, and you are welcome to redistribute it under
|
||
certain conditions; see <http://www.gnu.org/licenses/> for details.'.N;
|
||
|
||
for ($i=1; $i<$argc; $i++) {
|
||
if (substr($argv[$i],0,1)=='-') {
|
||
switch($argv[$i]) {
|
||
case '-p':
|
||
case '--peersfp':
|
||
if ($i+1>=$argc || !file_exists($argv[$i+1]) || !is_file($argv[$i+1]) || !is_readable($argv[$i+1]))
|
||
mexit('option «'.$argv[$i].'» requires an existing and readable file as an argument (use «-h» to read help).'.N,1);
|
||
$i++;
|
||
$opts['peersfp']=$argv[$i];
|
||
break;
|
||
case '-f':
|
||
case '--fetchusers':
|
||
$opts['fetchusers']=true;
|
||
break;
|
||
case '-t':
|
||
case '--timeout':
|
||
if ($i+1>=$argc || preg_match('/^[0-9]+$/',$argv[$i+1])!==1)
|
||
mexit('option «'.$argv[$i].'» requires a numeric argument (use «-h» to read help).'.N,1);
|
||
$i++;
|
||
$opts['timeout']=$argv[$i]+0;
|
||
break;
|
||
case '-N':
|
||
case '--dontsetnew':
|
||
$opts['setnew']=false;
|
||
break;
|
||
case '-R':
|
||
case '--dontrestore':
|
||
$opts['dontrestore']=true;
|
||
break;
|
||
case '-I':
|
||
case '--ignorelock':
|
||
$opts['ignorelock']=true;
|
||
break;
|
||
case '-d':
|
||
case '--dryrun':
|
||
$opts['dryrun']=true;
|
||
break;
|
||
case '-j':
|
||
case '--jsonwrite':
|
||
$opts['jsonwrite']=true;
|
||
break;
|
||
case '-m':
|
||
case '--moreclauses':
|
||
if ($i+1>=$argc)
|
||
mexit('option «'.$argv[$i].'» requires some SQL clause as argument (use «-h» to read help).'.N,1);
|
||
$i++;
|
||
$opts['moreclauses']=$argv[$i];
|
||
break;
|
||
case '-h':
|
||
case '--help':
|
||
echo($help);
|
||
exit(0);
|
||
break;
|
||
default:
|
||
mexit('option «'.$argv[$i].'» is unknown (use «-h» to read help).'.N,1);
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
use function mysqli_real_escape_string as myesc;
|
||
|
||
function myq(&$link,$query,$line) {
|
||
try {
|
||
$res=mysqli_query($link,$query);
|
||
}
|
||
catch (Exception $error) {
|
||
mexit('query «'.$query.'» (line '.$line.') failed: '.$error->getMessage().N,3);
|
||
}
|
||
return($res);
|
||
}
|
||
|
||
$lockfp=__DIR__.'/crawler.lock';
|
||
if (file_exists($lockfp) && !$opts['ignorelock']) {
|
||
eecho(3,'lock file «'.$lockfp.'» exists (if you are sure crawler.php is not already running you can use option «-I» to force execution).'.N);
|
||
exit(1);
|
||
}
|
||
touch($lockfp);
|
||
|
||
$inifp=__DIR__.'/../conf/mustard.ini';
|
||
$iniarr=@parse_ini_file($inifp)
|
||
or mexit('could not open config file «'.$inifp.'»'.N,1);
|
||
$link=@mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket'])
|
||
or mexit('could not connect to MySQL server: '.mysqli_connect_error().N,1);
|
||
mysqli_set_charset($link,'utf8mb4')
|
||
or mexit('could not set «utf8mb4» charset fro MySQL: '.mysqli_error($link).N,1);
|
||
|
||
require(__DIR__.'/../site/mustard/include/tables.php');
|
||
$tables=tables($link);
|
||
//print_r($tables);
|
||
|
||
$recover=false;
|
||
$instsjfp=__DIR__.'/instances.job';
|
||
$currinstjfp=__DIR__.'/currinst.job';
|
||
|
||
if (!$opts['dontrestore'] && file_exists($currinstjfp) && file_exists($instsjfp)) {
|
||
eecho(0,'looks like previous session was interrupted, trying to recover it...'.N);
|
||
$buf=@file($instsjfp,FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES)
|
||
or mexit('could not open file «'.$instsjfp.'» for reading.'.N,1);
|
||
$insts=array();
|
||
foreach ($buf as $line)
|
||
$insts[]=$line;
|
||
$buf=@file($currinstjfp,FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES)
|
||
or mexit('could not open file «'.$currinstjfp.'» for reading.'.N,1);
|
||
$buf=explode("\t",$buf[0]);
|
||
$currinst=array('dom'=>$buf[0], 'i'=>$buf[1], 'qok'=>$buf[2], 'qgood'=>$buf[3]);
|
||
$recover=true;
|
||
eecho(1,'recovered previous session.'.N);
|
||
}
|
||
|
||
function truncs($str,$tab,$col,$ctx) {
|
||
global $tables, $iswin;
|
||
if (is_null($str)) return(null);
|
||
if ($iswin)
|
||
$tab=strtolower($tab);
|
||
$size=$tables[$tab][$col];
|
||
$len=mb_strlen($str,'UTF-8');
|
||
if ($len>$size) {
|
||
$str=mb_substr($str,0,$size-1,'UTF-8').'…';
|
||
notify($ctx.': had to truncate string to '.$size.' chars to be able to insert it into «'.$col.'» column in «'.$tab.'» table.',3);
|
||
}
|
||
return($str);
|
||
}
|
||
|
||
function truncn($num,$tab,$col,$ctx) {
|
||
global $tables, $iswin;
|
||
if ($iswin)
|
||
$tab=strtolower($tab);
|
||
if (is_numeric($num)) {
|
||
if ($num>$tables[$tab][$col]['max']) {
|
||
notify($ctx.': had to ceil «'.$num.'» to «'.$tables[$tab][$col]['max'].'», ie the maximum value it can have in column «'.$col.'» of table «'.$tab.'».',3);
|
||
$num=$tables[$tab][$col]['max'];
|
||
} elseif ($num<$tables[$tab][$col]['min']) {
|
||
notify($ctx.': had to floor «'.$num.'» to «'.$tables[$tab][$col]['min'].'», ie the minimum value it can have in column «'.$col.'» of table «'.$tab.'»).',3);
|
||
$num=$tables[$tab][$col]['min'];
|
||
}
|
||
} else {
|
||
notify($ctx.': function «truncn»: expecting a number, got something else; returning «0».',3);
|
||
$num=0;
|
||
}
|
||
return($num);
|
||
}
|
||
|
||
/*$contextopts=array(
|
||
'http'=>array(
|
||
'timeout'=>$opts['timeout']
|
||
),
|
||
'socket'=>array(
|
||
'tcp_nodelay'=>true
|
||
)
|
||
);
|
||
$context=stream_context_create($contextopts);*/
|
||
|
||
function pgdatetomy($pgdate) {
|
||
//2018-04-07T15:05:26.801Z
|
||
if (preg_match('/^(\d+)-(\d+)-(\d+)[ T]{1}(\d+):(\d+):(\d+)(\.\d+)?Z?$/',$pgdate,$buf)===1) {
|
||
$mtime=gmmktime($buf[4],$buf[5],$buf[6],$buf[2],$buf[3],$buf[1]);
|
||
if (array_key_exists(7,$buf))
|
||
$mtime=$mtime+floatval('0'.$buf[7]);
|
||
return($mtime);
|
||
} else {
|
||
notify('Function «pgdatetomy»: «'.$pgdate.'» has not a recognized date format; returning current date.',3);
|
||
return(time());
|
||
}
|
||
}
|
||
|
||
if (!$recover) {
|
||
|
||
$insts=array();
|
||
|
||
$res=myq($link,'SELECT URI FROM Instances WHERE Dead=0'.$opts['moreclauses'],__LINE__);
|
||
while($row=mysqli_fetch_assoc($res))
|
||
if (!in_array($row['URI'],$insts))
|
||
$insts[]=$row['URI'];
|
||
eecho(1,'loaded known, alive instances from the database into the list of instances to be checked.'.N);
|
||
|
||
$res=myq($link,'SELECT URI FROM Instances WHERE Dead=1',__LINE__);
|
||
$deadinsts=array();
|
||
while($row=mysqli_fetch_assoc($res))
|
||
$deadinsts[]=$row['URI'];
|
||
eecho(1,'loaded dead instances into the corresponding list.'.N);
|
||
|
||
if (!is_null($opts['peersfp'])) {
|
||
eecho(0,'loading other instances to be checked from «'.$opts['peersfp'].'».'.N);
|
||
$peers=@file($opts['peersfp'],FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
|
||
if ($peers===false)
|
||
mexit('could not open «'.$opts['peersfp'].'» for reading.'.N,1);
|
||
foreach ($peers as $pdom) {
|
||
if (!in_array($pdom,$insts))
|
||
if (!in_array($pdom,$deadinsts))
|
||
if (!willtrunc($pdom,'Instances','URI'))
|
||
$insts[]=$pdom;
|
||
else
|
||
eecho(2,'ignoring instance «'.$pdom.'» because its hostname is too long for column «URI» of table «Instances».'.N);
|
||
else
|
||
eecho(1,'ignoring instance «'.$pdom.'» because it is dead.'.N);
|
||
}
|
||
}
|
||
|
||
unset($deadinsts);
|
||
sort($insts);
|
||
// shuffle($insts);
|
||
eecho(1,count($insts).' instances to be checked.'.N);
|
||
|
||
$instsf=@fopen($instsjfp,'w')
|
||
or mexit('could not open «'.$instsjfp.'» for writing.'.N,1);
|
||
foreach ($insts as $host)
|
||
fwrite($instsf,$host.N);
|
||
fclose($instsf);
|
||
}
|
||
|
||
function willtrunc($str,$tab,$col) {
|
||
global $tables, $iswin;
|
||
if ($iswin)
|
||
$tab=strtolower($tab);
|
||
if (mb_strlen($str,'UTF-8')>$tables[$tab][$col])
|
||
return(true);
|
||
else
|
||
return(false);
|
||
}
|
||
|
||
function b2i($bool,$pre) {
|
||
if (is_bool($bool)) {
|
||
if ($bool)
|
||
return(1);
|
||
else
|
||
return(0);
|
||
} else {
|
||
notify($pre.'«'.$bool.'» is not a boolean value, returning «0».',3);
|
||
return(0);
|
||
}
|
||
}
|
||
|
||
//is array, array key exists and value is not null
|
||
function akeavinn($key,&$arr) {
|
||
if (is_array($arr) && array_key_exists($key,$arr) && !is_null($arr[$key]))
|
||
return(true);
|
||
else
|
||
return(false);
|
||
}
|
||
|
||
function nempty($str) {
|
||
if (preg_match('/^\s*$/',$str)===1)
|
||
return(null);
|
||
else
|
||
return($str);
|
||
}
|
||
|
||
function subarimp($glue,$key,&$arr) {
|
||
$str='';
|
||
$i=1;
|
||
$carr=count($arr);
|
||
foreach ($arr as $inarr) {
|
||
$str.=$inarr[$key];
|
||
if ($i<$carr)
|
||
$str.=$glue;
|
||
$i++;
|
||
}
|
||
return($str);
|
||
}
|
||
|
||
function notify($msg,$sev) {
|
||
// notify "Severity" should be called "Importance"; anyway, it is to be thought of as "$lev" param of function "eecho": 0=debug, 1=info, 2=warning, 3=error
|
||
global $link, $tables, $iswin, $opts;
|
||
eecho($sev,'*notification*: '.strip_tags($msg).N);
|
||
$tab='Notifications';
|
||
if ($iswin) $tab='notifications';
|
||
if (!$opts['dryrun']) myq($link,'INSERT INTO Notifications (ID, Notification, Severity, Microtime, Seen, Deleted) VALUES (NULL, \''.myesc($link,mb_substr($msg,0,$tables[$tab]['Notification'],'UTF-8')).'\', '.$sev.', \''.microtime(true).'\', 0, 0)',__LINE__);
|
||
}
|
||
|
||
/** <LANGUAGE MANAGEMENT> */
|
||
/**
|
||
* Executes a call to Mastodon API.
|
||
*
|
||
* @param string $host Host to be called (e.g.: "mastodon.bida.im")
|
||
* @param string $path API path (e.g.: "/api/v1/timelines/public?local=true")
|
||
* @return mixed An array representing the JSON object as returned by json_decode, or NULL if the call fails
|
||
*/
|
||
function get_api($host, $path) {
|
||
global $opts;
|
||
$buf = @getfc('https://'.$host.$path,$opts['timeout']);
|
||
if ($buf['cont']!==false) {
|
||
ckratelimit($buf['headers']);
|
||
$data = json_decode($buf['cont'], true);
|
||
return $data;
|
||
} else {
|
||
return NULL;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Returns a list of known recognized languages, with the related probability, fot the toot that got passed to it
|
||
*
|
||
* @param mixed $toot The toot to be checked, as returned by the API
|
||
* @return array Associative array with language and related probability
|
||
*/
|
||
function get_toot_languages($toot) {
|
||
if (is_array($toot) && array_key_exists('language',$toot))
|
||
$l = $toot['language'];
|
||
else
|
||
$l = NULL;
|
||
if($l !== NULL) {
|
||
// the language is explicitly set in the toot, so use that
|
||
$langs[$l] = 1;
|
||
} elseif (array_key_exists('content',$toot)) {
|
||
// the language is not explicitly set in the toot, so try and recognize it
|
||
$text = strip_tags($toot['content']);
|
||
$ld = new Language;
|
||
$langs = $ld->detect($text)->bestResults()->close();
|
||
}
|
||
// group derived languages into two-charactes language code (e.g.: "zh-CN" into "zh")
|
||
$grouped_langs = array();
|
||
foreach($langs as $key => $value) {
|
||
$l = explode("-", $key)[0];
|
||
if(array_key_exists($l, $grouped_langs)) {
|
||
$grouped_langs[$l] = max($grouped_langs[$l], $value);
|
||
} else {
|
||
$grouped_langs[$l] = $value;
|
||
}
|
||
}
|
||
return $grouped_langs;
|
||
}
|
||
|
||
/**
|
||
* Given the probability of a language for every toot, calculate the average
|
||
*
|
||
* @param array $detected_langs Array of mappings between language and probability
|
||
* @return array Mapping between language and probability
|
||
*/
|
||
function summary($detected_langs) {
|
||
$res = Array();
|
||
foreach($detected_langs as $langs) {
|
||
foreach($langs as $l => $weight) {
|
||
if(!array_key_exists($l, $res)) {
|
||
$res[$l] = 0;
|
||
}
|
||
$res[$l] += $weight;
|
||
}
|
||
}
|
||
foreach($res as $l => $sumweight) {
|
||
$res[$l] = $sumweight / count($detected_langs);
|
||
}
|
||
return $res;
|
||
}
|
||
|
||
/**
|
||
* Helper function for usort: compares two arrays using the first element
|
||
*
|
||
* @param array $entry1 First array to be compared
|
||
* @param array $entry2 Second array to be compared
|
||
* @return number -1, 0 o 1 depening on $entry1[0] being less than, equal to or greater than $entry2[0]
|
||
*/
|
||
function sort_weights($entry1, $entry2) {
|
||
$w1 = $entry1[0];
|
||
$w2 = $entry2[0];
|
||
if ($w1 < $w2)
|
||
$ret=1;
|
||
elseif ($w1 == $w2)
|
||
$ret=0;
|
||
else
|
||
$ret=-1;
|
||
return $ret;
|
||
}
|
||
|
||
/**
|
||
* Given a language mapping, return a list of probable languages
|
||
*
|
||
* @param array $summary Map between language and probabilty
|
||
* @return string[] List of probable languages
|
||
*/
|
||
function get_languages($summary) {
|
||
$lst = [];
|
||
foreach($summary as $code => $weight) {
|
||
$lst[] = [$weight, $code];
|
||
}
|
||
usort($lst, 'sort_weights');
|
||
$languages = [];
|
||
$lastweight = 0;
|
||
foreach($lst as $entry) {
|
||
$l = $entry[1];
|
||
$weight = $entry[0];
|
||
if($weight < $lastweight * 2 / 3) {
|
||
break;
|
||
}
|
||
$languages[] = $l;
|
||
$lastweight = $weight;
|
||
}
|
||
return $languages;
|
||
}
|
||
|
||
/**
|
||
* Returns a list of probable languages for the given instance
|
||
*
|
||
* @param string $host Instance’s hostname (e.g.: "mastodon.bida.im")
|
||
* @return string[] List of probable languages
|
||
*/
|
||
function get_instance_langs($host) {
|
||
global $opts;
|
||
$data = get_api($host, '/api/v1/timelines/public?local=true&limit='.$opts['ldtoots']);
|
||
if($data == NULL) {
|
||
return [];
|
||
}
|
||
$detected_langs = array_map('get_toot_languages', $data);
|
||
$summary = summary($detected_langs);
|
||
$languages = get_languages($summary);
|
||
return $languages;
|
||
}
|
||
|
||
require(__DIR__.'/../site/mustard/include/mb_ucfirst.php');
|
||
|
||
function langs($instid, $uri, $auto) {
|
||
global $info, $instrow, $link, $opts;
|
||
$retlangs=array();
|
||
$languages=array();
|
||
// even if $auto is true, set it to false (don't do autodection of languages based on last toots) if api/v1/instance returned a language different from the default "en": assume instead it is right, because it has been explicitly set
|
||
if (isset($info['languages'][0]) && $info['languages'][0]!='en')
|
||
$auto=false;
|
||
if ($auto) {
|
||
$languages = get_instance_langs($uri);
|
||
} elseif (akeavinn('languages',$info)) {
|
||
$languages = $info['languages'];
|
||
}
|
||
if (count($languages)==0) {
|
||
return($retlangs);
|
||
} else {
|
||
foreach ($languages as $key=>$val)
|
||
$languages[$key]=str_replace('-','_',$val);
|
||
if ($auto)
|
||
eecho(1,'detected languages: '.implode(', ',$languages).N);
|
||
else
|
||
eecho(1,'declared languages: '.implode(', ',$languages).N);
|
||
$pos=0;
|
||
foreach($languages as $lang) {
|
||
$res=myq($link,'SELECT * FROM Languages WHERE Code=\''.myesc($link,$lang).'\'',__LINE__);
|
||
if (mysqli_num_rows($res)<1) {
|
||
$code=myesc($link,truncs($lang,'Languages','Code','«'.$instrow['URI'].'»'));
|
||
$NameOrig=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,$lang)),'Languages','NameOrig','«'.$instrow['URI'].'»'));
|
||
$NamePt_BR=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'pt_BR')),'Languages','NamePT_BR','«'.$instrow['URI'].'»'));
|
||
$NameDe=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'de')),'Languages','NameDE','«'.$instrow['URI'].'»'));
|
||
$NameUk=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'uk')),'Languages','NameUK','«'.$instrow['URI'].'»'));
|
||
$NameCa=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'ca')),'Languages','NameCA','«'.$instrow['URI'].'»'));
|
||
$NameEn=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'en')),'Languages','NameEN','«'.$instrow['URI'].'»'));
|
||
$NameEs=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'es')),'Languages','NameES','«'.$instrow['URI'].'»'));
|
||
$NameFr=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'fr')),'Languages','NameFR','«'.$instrow['URI'].'»'));
|
||
$NameGl=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'gl')),'Languages','NameGL','«'.$instrow['URI'].'»'));
|
||
$NameIt=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'it')),'Languages','NameIT','«'.$instrow['URI'].'»'));
|
||
$q = 'INSERT INTO Languages (ID, Code, NameOrig, NamePT_BR, NameDE, NameUK, NameCA, NameEN, NameES, NameFR, NameGL, NameIT) VALUES (NULL, \''.$code.'\', \''.$NameOrig.'\', \''.$NamePt_BR.'\',, \''.$NameDe.'\', \''.$NameUk.'\', \''.$NameCa.'\', \''.$NameEn.'\', \''.$NameEs.'\', \''.$NameFr.'\', \''.$NameGl.'\', \''.$NameIt.'\')';
|
||
if (!$opts['dryrun']) {
|
||
myq($link,$q,__LINE__);
|
||
$langid=mysqli_insert_id($link);
|
||
} else {
|
||
$langid=0;
|
||
}
|
||
} else {
|
||
$row=mysqli_fetch_assoc($res);
|
||
$langid=$row['ID'];
|
||
}
|
||
$pos++;
|
||
$retlangs[]=array('InstID'=>$instid,'LangID'=>$langid,'Pos'=>$pos,'Code'=>$lang);
|
||
}
|
||
}
|
||
return($retlangs);
|
||
}
|
||
|
||
function varbdump($var) {
|
||
ob_start();
|
||
var_dump($var);
|
||
$content=ob_get_contents();
|
||
ob_end_clean();
|
||
return($content);
|
||
}
|
||
|
||
function mdasortbykey(&$arr,$key,$rev=false) {
|
||
$karr=array();
|
||
foreach ($arr as $akey=>$subarr)
|
||
$karr[round($subarr[$key]*10000000000000,0)]=array($akey,$subarr);
|
||
if (!$rev)
|
||
ksort($karr);
|
||
else
|
||
krsort($karr);
|
||
$arr=array();
|
||
foreach ($karr as $akey=>$subarr)
|
||
$arr[$subarr[0]]=$subarr[1];
|
||
}
|
||
|
||
require(__DIR__.'/../site/mustard/include/ghs.php');
|
||
|
||
require(__DIR__.'/../site/mustard/include/ght.php');
|
||
|
||
/*
|
||
* Nodeinfo ('https://'.$host.'/nodeinfo/2.0.json') was added in v3.0.0
|
||
* Trends ('https://'.$host.'/api/v1/trends') was added in v3.0.0
|
||
* Activity ('https://'.$host.'/api/v1/instance/activity') was added in v2.1.2
|
||
*/
|
||
|
||
if ($opts['jsonwrite']) {
|
||
if ($recover)
|
||
$mode=array('a','append');
|
||
else
|
||
$mode=array('w','write');
|
||
$jsonf=@fopen($opts['jsonfp'],$mode[0])
|
||
or mexit('could not open file «'.$opts['jsonfp'].'» in '.$mode[1].' mode.',1);
|
||
if ($mode[0]=='w')
|
||
fwrite($jsonf,'{'.N);
|
||
}
|
||
|
||
//$insts=['damze.umbrella.cafe'];
|
||
$tini=time();
|
||
$cinsts=count($insts);
|
||
$i=0;
|
||
$qok=0;
|
||
$qgood=0;
|
||
if ($recover) {
|
||
$i=$currinst['i'];
|
||
$qok=$currinst['qok'];
|
||
$qgood=$currinst['qgood'];
|
||
}
|
||
$beg=$i;
|
||
while ($i<$cinsts) {
|
||
$now=time();
|
||
$host=$insts[$i];
|
||
@file_put_contents($currinstjfp,$host."\t".$i."\t".$qok."\t".$qgood.N)
|
||
or mexit('could not open «'.$currinstjfp.'» for writing.',1);
|
||
$i++;
|
||
$ismast=null;
|
||
$instans=true;
|
||
$info=null;
|
||
$tela=$now-$tini;
|
||
eecho(1,'working on «'.$host.'»; '.$i.'/'.$cinsts.'; '.$qok.' ok; '.$qgood.' good; '.round(100/$cinsts*$i).'%; elapsed time: '.ght($tela,null,0).'; estimated remaining time: '.ght($tela/$i*($cinsts-$beg)-$tela,null,0).'; mem.: '.ghs(memory_get_usage(true)).'; mem. peak: '.ghs(memory_get_peak_usage(true)).N);
|
||
if (willtrunc($host,'Instances','URI')) {
|
||
eecho(2,'«'.$host.'»: ignoring it because hostname is too long for the «URI» column of «Instances» table.'.N);
|
||
} else {
|
||
eecho(0,'«'.$host.'»: trying to fetch instance info from API...'.N);
|
||
$buf=@getfc('https://'.$host.'/api/v1/instance',$opts['timeout']);
|
||
if ($buf['cont']!==false) {
|
||
ckratelimit($buf['headers']);
|
||
$info=@json_decode($buf['cont'],true);
|
||
if (is_array($info)) {
|
||
eecho(1,'«'.$host.'»: got instance info from API :-)'.N);
|
||
eecho(0,'«'.$host.'»: trying to fetch nodeinfo specs on https...'.N);
|
||
$buf=@getfc('https://'.$host.'/.well-known/nodeinfo',$opts['timeout']);
|
||
if ($buf['cont']===false) {
|
||
eecho(0,'«'.$host.'»: trying to fetch nodeinfo specs on http...'.N);
|
||
$buf=@getfc('http://'.$host.'/.well-known/nodeinfo',$opts['timeout']);
|
||
}
|
||
if ($buf['cont']!==false) {
|
||
$buf=@json_decode($buf['cont'],true);
|
||
if (is_array($buf) && array_key_exists('links',$buf) && is_array($buf['links']) && count($buf['links'])>0) {
|
||
$nirefs=[];
|
||
foreach ($buf['links'] as $key=>$niref)
|
||
if (isset($niref['rel']) && isset($niref['href']))
|
||
$nirefs[$niref['rel']]=$niref['href'];
|
||
else
|
||
eecho(2,'«'.$host.'»: nodeinfo specs link '.$key.' has unexpected format.'.N);
|
||
krsort($nirefs);
|
||
$niref=array_shift($nirefs);
|
||
eecho(0,'«'.$host.'»: got nodeinfo specs; trying to fetch nodeinfo...'.N);
|
||
$buf=@getfc($niref,$opts['timeout']);
|
||
if ($buf['cont']!==false) {
|
||
$buf=@json_decode($buf['cont'],true);
|
||
if (is_array($buf) && isset($buf['software']['name']) && isset($buf['software']['version'])) {
|
||
$info['x-nodeinfo']=$buf;
|
||
if (preg_match('/^mastodon|fedibird|ecko|hometown/',$info['x-nodeinfo']['software']['name'])===1)
|
||
$ismast=true;
|
||
$res=myq($link,'SELECT Name FROM Platforms WHERE Name=\''.myesc($link,$info['x-nodeinfo']['software']['name']).'\'',__LINE__);
|
||
if (mysqli_num_rows($res)<1) {
|
||
if (!$opts['dryrun']) myq($link,'INSERT INTO Platforms (Name) VALUES (\''.myesc($link,truncs($info['x-nodeinfo']['software']['name'],'Platforms','Name','«'.$host.'»')).'\')',__LINE__)
|
||
or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||
notify('New software found: «'.$host.'» runs on «'.$info['x-nodeinfo']['software']['name'].'»; i added it to the table of known softwares. It would be good to check whether it is a Mastodon derivate and how compatible it is, to decide whether to consider instances using it as Mastodon instances.',2);
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$host.'»: nodeinfo was not good json or json had unexpected format.'.N);
|
||
}
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$host.'»: nodeinfo specs where not good json or json had unexpected format.'.N);
|
||
}
|
||
}
|
||
if (array_key_exists('version',$info)) {
|
||
eecho(1,'«'.$host.'» software version is «'.$info['version'].'».'.N);
|
||
if ($info['version']>='2.1.2') {
|
||
eecho(0,'«'.$host.'»: trying to fetch instance activity info from API...'.N);
|
||
$buf=@getfc('https://'.$host.'/api/v1/instance/activity',$opts['timeout']);
|
||
if ($buf['cont']!==false) {
|
||
ckratelimit($buf['headers']);
|
||
eecho(1,'«'.$host.'»: got instance activity info from API :-)'.N);
|
||
$info['x-activity']=json_decode($buf['cont'],true);
|
||
} else {
|
||
eecho(2,'«'.$host.'»: could not fetch instance activity from API: '.$buf['emsg'].N);
|
||
}
|
||
}
|
||
if ($info['version']>='3.0.0') {
|
||
eecho(0,'«'.$host.'»: trying to fetch instance trends info from API...'.N);
|
||
$buf=@getfc('https://'.$host.'/api/v1/trends',$opts['timeout']);
|
||
if ($buf['cont']!==false) {
|
||
ckratelimit($buf['headers']);
|
||
eecho(1,'«'.$host.'»: got instance trends info from API :-)'.N);
|
||
$info['x-trends']=json_decode($buf['cont'],true);
|
||
} else {
|
||
eecho(2,'«'.$host.'»: could not fetch instance trends from API: '.$buf['emsg'].N);
|
||
}
|
||
}
|
||
}
|
||
} else {
|
||
$instans=false;
|
||
eecho(2,'«'.$host.'»: fetched data were not good JSON.'.N);
|
||
}
|
||
} else {
|
||
$instans=false;
|
||
eecho(2,'«'.$host.'»: could not fetch instance info from API: '.$buf['emsg'].N);
|
||
}
|
||
if (!isset($info['uri']) || preg_match('#^\s*$#',$info['uri'])===1)
|
||
$instans=false;
|
||
if (is_array($info) && count($info)>0) {
|
||
//echo('json dump of all fetched info:'.N.json_encode($info,JSON_PRETTY_PRINT).N);
|
||
if ($opts['jsonwrite'])
|
||
fwrite($jsonf,'"'.$host.'": '.json_encode($info,JSON_PRETTY_PRINT).','.N);
|
||
}
|
||
if (!$instans) {
|
||
// this is the limbo of non-responding instances
|
||
$res=myq($link,'SELECT * FROM Instances WHERE URI=\''.myesc($link,$host).'\'',__LINE__);
|
||
$nrows=mysqli_num_rows($res);
|
||
if ($nrows==1) {
|
||
eecho(1,'«'.$host.'»: didn’t respond, but it is present in the database; updating InstChecks, Instances.LastCheckOk and possibly Instances.New=0 and Instances.Dead=1.'.N);
|
||
$row=mysqli_fetch_assoc($res);
|
||
$instid=$row['ID'];
|
||
if (!$opts['dryrun']) mysq($link,'UPDATE Instances SET LastCheckOk=0 WHERE ID='.$instid,__LINE__);
|
||
if ($row['New']==1 && !is_null($row['FirstSeen']) && $now-$row['FirstSeen']>$opts['oldline']) {
|
||
notify('Instance «<a href="viewinst.php?id='.$instid.'">'.$row['URI'].'</a>» is no longer new.',2);
|
||
if (!$opts['dryrun']) myq($link,'UPDATE Instances SET New=0 WHERE ID='.$instid,__LINE__);
|
||
}
|
||
|
||
// we check the last time instance responded, if ever
|
||
$rres=myq($link,'SELECT Time FROM InstChecks WHERE InstID='.$instid.' AND Status=1 ORDER BY Time DESC LIMIT 1',__LINE__);
|
||
// if instance never responded we consider the time of first check
|
||
if (mysqli_num_rows($rres)==0)
|
||
$rres=myq($link,'SELECT Time FROM InstChecks WHERE InstID='.$instid.' AND Status=0 ORDER BY Time ASC LIMIT 1',__LINE__);
|
||
if (mysqli_num_rows($rres)>0) {
|
||
$rrow=mysqli_fetch_assoc($rres);
|
||
if ($now-$rrow['Time']>$opts['deadline']) {
|
||
if (!$opts['dryrun']) myq($link,'UPDATE Instances SET Dead=1 WHERE ID='.$instid,__LINE__);
|
||
notify('Instance «<a href="viewinst.php?id='.$instid.'">'.$row['URI'].'</a>» is dead!',2);
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$host.'»: exists in the database but there’s no data about it in InstChecks!'.N);
|
||
}
|
||
} elseif ($nrows==0) {
|
||
eecho(1,'«'.$host.'»: doesn’t respond and is not in the database, adding it.'.N);
|
||
// "New=0" and "FirstSeen=NULL" because it's not new and not seen until it responds for the first time
|
||
if (!$opts['dryrun']) {
|
||
myq($link,'INSERT INTO Instances SET FirstSeen=NULL, New=0, Good=0, Chosen=0, Visible=0, Noxious=0, URI=\''.myesc($link,$host).'\', LastCheckOk=0, InsertTS='.$now,__LINE__);
|
||
$instid=mysqli_insert_id($link);
|
||
myq($link,'INSERT INTO InstChecks SET InstID='.$instid.', Time='.$now.', Status=0',__LINE__);
|
||
} else {
|
||
$instid=0;
|
||
}
|
||
} else {
|
||
notify('Instance «'.$host.'» has '.$nrows.' entries in «Instances» table!',3);
|
||
}
|
||
if (!$opts['dryrun']) myq($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$instid.', '.$now.', 0)',__LINE__);
|
||
} else {
|
||
|
||
// instance responded
|
||
|
||
if (is_null($ismast)) {
|
||
if (!array_key_exists('version',$info)) {
|
||
$ismast=null;// redundant, just to put there something
|
||
} elseif (array_key_exists('pleroma',$info)) {
|
||
$ismast=false;
|
||
} elseif (preg_match('#(compatible|pleroma|pixelfed)#i',$info['version'])==1) {
|
||
$ismast=false;
|
||
} elseif (preg_match('#^[0-9]+\.[0-9]+\.[0-9]+#',$info['version'])!==1) {
|
||
$ismast=false;
|
||
} else {
|
||
$ismast=true;
|
||
}
|
||
}
|
||
|
||
$qok++;
|
||
if (!is_null($ismast))
|
||
($ismast) ? $ismast=1 : $ismast=0;
|
||
$instrow=array('ID'=>null, 'FirstSeen'=>null, 'IsMastodon'=>$ismast, 'Dead'=>0, 'New'=>0, 'Good'=>0, 'Chosen'=>0, 'Priority'=>null, 'Visible'=>0, 'Noxious'=>0, 'NoxReason'=>null, 'NoxLastModTS'=>null, 'URI'=>null, 'Title'=>null, 'ShortDesc'=>null, 'LongDesc'=>null, 'OurDesc'=>null, 'OurDescEN'=> null, 'LocalityID'=>null, 'OurLangsLock'=>0, 'Email'=>null, 'Software'=>null, 'Version'=>null, 'UserCount'=>null, 'StatusCount'=>null, 'DomainCount'=>null, 'ActiveUsersMonth'=>null, 'ActiveUsersHalfYear'=>null, 'Thumb'=>null, 'RegOpen'=>null, 'RegReqApproval'=>null, 'MaxTootChars'=>null, 'AdmAccount'=>null, 'AdmDisplayName'=>null, 'AdmCreatedAt'=>null, 'AdmNote'=>null, 'AdmURL'=>null, 'AdmAvatar'=>null, 'AdmHeader'=>null, 'LastCheckOk'=>1, 'GuestID'=>null, 'LastGuestEdit'=>null);
|
||
$instrow['URI']=$host;
|
||
if (akeavinn('title',$info))
|
||
$instrow['Title']=nempty(truncs($info['title'],'Instances','Title','«'.$instrow['URI'].'»'));
|
||
if (akeavinn('short_description',$info))
|
||
$instrow['ShortDesc']=nempty(truncs($info['short_description'],'Instances','ShortDesc','«'.$instrow['URI'].'»'));
|
||
if (akeavinn('description',$info))
|
||
$instrow['LongDesc']=nempty(truncs($info['description'],'Instances','LongDesc','«'.$instrow['URI'].'»'));
|
||
if (akeavinn('email',$info))
|
||
$instrow['Email']=nempty(truncs($info['email'],'Instances','Email','«'.$instrow['URI'].'»'));
|
||
if (akeavinn('version',$info))
|
||
$instrow['Version']=nempty(truncs($info['version'],'Instances','Version','«'.$instrow['URI'].'»'));
|
||
if (akeavinn('stats',$info)) {
|
||
if (akeavinn('user_count',$info['stats']))
|
||
$instrow['UserCount']=truncn($info['stats']['user_count'],'Instances','UserCount','«'.$instrow['URI'].'»');
|
||
if (akeavinn('status_count',$info['stats']))
|
||
$instrow['StatusCount']=truncn($info['stats']['status_count'],'Instances','StatusCount','«'.$instrow['URI'].'»');
|
||
if (akeavinn('domain_count',$info['stats']))
|
||
$instrow['DomainCount']=truncn($info['stats']['domain_count'],'Instances','DomainCount','«'.$instrow['URI'].'»');
|
||
}
|
||
if (akeavinn('thumbnail',$info))
|
||
$instrow['Thumb']=nempty(truncs($info['thumbnail'],'Instances','Thumb','«'.$instrow['URI'].'»'));
|
||
if (akeavinn('max_toot_chars',$info))
|
||
$instrow['MaxTootChars']=truncn($info['max_toot_chars'],'Instances','MaxTootChars','«'.$instrow['URI'].'»');
|
||
if (akeavinn('registrations',$info))
|
||
$instrow['RegOpen']=b2i($info['registrations'],'Istanza «'.$instrow['URI'].'»: ');
|
||
if (akeavinn('approval_required',$info))
|
||
$instrow['RegReqApproval']=b2i($info['approval_required'],'Istanza «'.$instrow['URI'].'»: ');
|
||
if (akeavinn('contact_account',$info)) {
|
||
if (akeavinn('acct',$info['contact_account']))
|
||
$instrow['AdmAccount']=nempty(truncs($info['contact_account']['acct'],'Instances','AdmAccount','«'.$instrow['URI'].'»'));
|
||
if (akeavinn('display_name',$info['contact_account']))
|
||
$instrow['AdmDisplayName']=nempty(truncs($info['contact_account']['display_name'],'Instances','AdmDisplayName','«'.$instrow['URI'].'»'));
|
||
if (akeavinn('created_at',$info['contact_account']))
|
||
$instrow['AdmCreatedAt']=pgdatetomy($info['contact_account']['created_at']);
|
||
if (akeavinn('note',$info['contact_account']))
|
||
$instrow['AdmNote']=nempty(truncs($info['contact_account']['note'],'Instances','AdmNote','«'.$instrow['URI'].'»'));
|
||
if (akeavinn('url',$info['contact_account']))
|
||
$instrow['AdmURL']=nempty(truncs($info['contact_account']['url'],'Instances','AdmURL','«'.$instrow['URI'].'»'));
|
||
if (akeavinn('avatar',$info['contact_account']))
|
||
$instrow['AdmAvatar']=nempty(truncs($info['contact_account']['avatar'],'Instances','AdmAvatar','«'.$instrow['URI'].'»'));
|
||
if (akeavinn('header',$info['contact_account']))
|
||
$instrow['AdmHeader']=nempty(truncs($info['contact_account']['header'],'Instances','AdmHeader','«'.$instrow['URI'].'»'));
|
||
}
|
||
if (akeavinn('x-nodeinfo',$info)) {
|
||
if (akeavinn('software',$info['x-nodeinfo']) && akeavinn('name',$info['x-nodeinfo']['software']))
|
||
$instrow['Software']=nempty(truncs($info['x-nodeinfo']['software']['name'],'Instances','Software','«'.$instrow['URI'].'»'));
|
||
if (akeavinn('usage',$info['x-nodeinfo']) && akeavinn('users',$info['x-nodeinfo']['usage'])) {
|
||
if (akeavinn('activeMonth',$info['x-nodeinfo']['usage']['users']))
|
||
$instrow['ActiveUsersMonth']=truncn($info['x-nodeinfo']['usage']['users']['activeMonth'],'Instances','ActiveUsersMonth','«'.$instrow['URI'].'»');
|
||
if (akeavinn('activeHalfyear',$info['x-nodeinfo']['usage']['users']))
|
||
$instrow['ActiveUsersHalfYear']=truncn($info['x-nodeinfo']['usage']['users']['activeHalfyear'],'Instances','ActiveUsersHalfYear','«'.$instrow['URI'].'»');
|
||
}
|
||
}
|
||
|
||
$whynot=array();
|
||
if (is_null($instrow['RegOpen'])) {
|
||
$whynot[]='we don’t know if it allows registrations';
|
||
} elseif ($instrow['RegOpen']==0) {
|
||
$whynot[]='it doesn’t allow registrations';
|
||
}
|
||
if (is_null($instrow['UserCount'])) {
|
||
$whynot[]='we don’t know its total users number';
|
||
} elseif ($instrow['UserCount']<10 || $instrow['UserCount']>30000) {
|
||
$whynot[]='total users number is not greater than 10 and less than 30000';
|
||
}
|
||
if (is_null($instrow['DomainCount'])) {
|
||
$whynot[]='we don’t know the number of other instances it knows';
|
||
} elseif ($instrow['DomainCount']<500) {
|
||
$whynot[]='the number of other instances it knows is less than 500';
|
||
}
|
||
if (!is_null($instrow['ActiveUsersMonth'])) {
|
||
if ($instrow['ActiveUsersMonth']<10)
|
||
$whynot[]='the number of active users for the last month is less than 10';
|
||
} elseif (!is_null($instrow['StatusCount']) && $instrow['UserCount']>0 && $instrow['StatusCount']/$instrow['UserCount']<10) {
|
||
$whynot[]='the average number of toots for user is less than 10';
|
||
} else {
|
||
$whynot[]='it was impossible to detect the number of active users for the last month or the average number of toots for user';
|
||
}
|
||
if (count($whynot)==0) {
|
||
$instrow['Good']=1;
|
||
eecho(1,'«'.$host.'»: this is a suitable instance! :-)'.N);
|
||
$qgood++;
|
||
} else {
|
||
eecho(1,'«'.$host.'»: this is not a suitable instance: '.implode('; ',$whynot).' :-('.N);
|
||
}
|
||
|
||
$res=myq($link,'SELECT * FROM Instances WHERE URI=\''.myesc($link,$instrow['URI']).'\'',__LINE__);
|
||
|
||
$nrows=mysqli_num_rows($res);
|
||
if ($nrows==1) {
|
||
eecho(1,'«'.$instrow['URI'].'»: is already present in the database, updating it...'.N);
|
||
$oldinstrow=mysqli_fetch_assoc($res);
|
||
$instid=$oldinstrow['ID'];
|
||
$instrow['ID']=$oldinstrow['ID'];
|
||
// if the instance already present in the db has FirstSeen=NULL, this means this is the first time it responds, so...
|
||
if (is_null($oldinstrow['FirstSeen'])) {
|
||
$instrow['FirstSeen']=$now;
|
||
$instrow['New']=1;
|
||
} else {
|
||
$instrow['FirstSeen']=$oldinstrow['FirstSeen'];
|
||
if ($oldinstrow['New']==1) {
|
||
$instrow['New']=1;
|
||
if ($now-$oldinstrow['FirstSeen']>$opts['oldline']) {
|
||
$instrow['New']=0;
|
||
notify('Instance «<a href="viewinst.php?id='.$instrow['ID'].'">'.$instrow['URI'].'</a>» is no longer new.',2);
|
||
}
|
||
}
|
||
}
|
||
|
||
if ($instrow['Good']==1 && $oldinstrow['Good']==0) {
|
||
notify('Instance «<a href="viewinst.php?id='.$instrow['ID'].'">'.$instrow['URI'].'</a>» wasn’t suitable, but it is now!',1);
|
||
} elseif ($instrow['Good']==0 && $oldinstrow['Good']==1) {
|
||
notify('Instance «<a href="viewinst.php?id='.$instrow['ID'].'">'.$instrow['URI'].'</a>» was suitable, but it’s no longer for these reasons: '.implode('; ',$whynot),1);
|
||
}
|
||
$instrow['Chosen']=$oldinstrow['Chosen'];
|
||
$instrow['Priority']=$oldinstrow['Priority'];
|
||
$instrow['Visible']=$oldinstrow['Visible'];
|
||
$instrow['Noxious']=$oldinstrow['Noxious'];
|
||
$instrow['NoxReason']=$oldinstrow['NoxReason'];
|
||
$instrow['NoxLastModTS']=$oldinstrow['NoxLastModTS'];
|
||
if ($instrow['ShortDesc']!=$oldinstrow['ShortDesc'])
|
||
notify('«Short description» of instance «<a href="viewinst.php?id='.$instrow['ID'].'">'.$instrow['URI'].'</a>» has changed.',1);
|
||
if ($instrow['LongDesc']!=$oldinstrow['LongDesc'])
|
||
notify('«Long description» of instance «<a href="viewinst.php?id='.$instrow['ID'].'">'.$instrow['URI'].'</a>» has changed.',1);
|
||
$instrow['OurDesc']=$oldinstrow['OurDesc'];
|
||
$instrow['OurDescEN']=$oldinstrow['OurDescEN'];
|
||
$instrow['LocalityID']=$oldinstrow['LocalityID'];
|
||
$instrow['OurLangsLock']=$oldinstrow['OurLangsLock'];
|
||
$instrow['GuestID']=$oldinstrow['GuestID'];
|
||
$instrow['LastGuestEdit']=$oldinstrow['LastGuestEdit'];
|
||
$query='UPDATE Instances SET ';
|
||
foreach ($instrow as $field=>$value) {
|
||
if (!is_null($value))
|
||
$query.=$field.'=\''.myesc($link,$value).'\', ';
|
||
else
|
||
$query.=$field.'=NULL, ';
|
||
}
|
||
$query=substr($query,0,-2).' WHERE Instances.ID='.$instrow['ID'];
|
||
eecho(1,'«'.$host.'»: update query: «'.$query.'».'.N);
|
||
if (!$opts['dryrun']) myq($link,$query,__LINE__);
|
||
|
||
$res=myq($link,'SELECT InstID, LangID, Pos, Code FROM InstLangs LEFT JOIN Languages ON Languages.ID=LangID WHERE InstID='.$instrow['ID'].' ORDER BY Pos ASC',__LINE__);
|
||
$oldinstlangs=array();
|
||
while ($row=mysqli_fetch_assoc($res))
|
||
$oldinstlangs[]=$row;
|
||
$instlangs=langs($instrow['ID'], $instrow['URI'], false);
|
||
if ($instlangs!=$oldinstlangs) {
|
||
notify('The list of languages declared by instance «<a href="viewinst.php?id='.$instrow['ID'].'">'.$instrow['URI'].'</a>» has changed from «'.subarimp(', ','Code',$oldinstlangs).'» to «'.subarimp(', ','Code',$instlangs).'».',1);
|
||
if (!$opts['dryrun']) {
|
||
myq($link,'DELETE FROM InstLangs WHERE InstID='.$instrow['ID'],__LINE__);
|
||
foreach ($instlangs as $row)
|
||
myq($link,'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')',__LINE__);
|
||
}
|
||
}
|
||
|
||
if ($instrow['OurLangsLock']==0) {
|
||
$instourlangs=langs($instrow['ID'], $instrow['URI'], true);
|
||
// if instourlangs is empty and instlangs is not, set instourlangs as instlangs
|
||
if (count($instourlangs)==0 && count($instlangs)>0)
|
||
$instourlangs=$instlangs;
|
||
if (count($instourlangs)>0) {
|
||
if (!$opts['dryrun']) {
|
||
myq($link,'DELETE FROM InstOurLangs WHERE InstID='.$instrow['ID'],__LINE__);
|
||
foreach ($instourlangs as $row)
|
||
myq($link,'INSERT INTO InstOurLangs (InstID, OurLangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')',__LINE__);
|
||
}
|
||
}
|
||
}
|
||
|
||
} elseif ($nrows==0) {
|
||
eecho(1,'«'.$host.'» is not present in the database, adding it...'.N);
|
||
$instrow['FirstSeen']=$now;
|
||
if ($opts['setnew'])
|
||
$instrow['New']=1;
|
||
$fields=array();
|
||
$values='';
|
||
foreach ($instrow as $field=>$value) {
|
||
$fields[]=$field;
|
||
if (!is_null($value))
|
||
$values.='\''.myesc($link,$value).'\', ';
|
||
else
|
||
$values.='NULL, ';
|
||
}
|
||
$values=substr($values,0,-2);
|
||
$query='INSERT INTO Instances ('.implode(', ',$fields).', InsertTS) VALUES ('.$values.', '.$now.')';
|
||
eecho(1,'«'.$host.'»: insert query: «'.$query.'»'.N);
|
||
if (!$opts['dryrun']) {
|
||
myq($link,$query,__LINE__);
|
||
$instid=mysqli_insert_id($link);
|
||
} else {
|
||
$instid=0;
|
||
}
|
||
if ($opts['setnew'] && !$opts['dryrun'])
|
||
notify('New instance found: «<a href="viewinst.php?id='.$instid.'">'.$instrow['URI'].'</a>».',1);
|
||
|
||
$instlangs=langs($instid, $instrow['URI'], false);
|
||
if (!$opts['dryrun']) {
|
||
foreach ($instlangs as $row)
|
||
myq($link,'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')',__LINE__);
|
||
}
|
||
|
||
$instourlangs=langs($instid, $instrow['URI'], true);
|
||
// if instourlangs is empty and instlangs is not, set instourlangs as instlangs
|
||
if (count($instourlangs)==0 && count($instlangs)>0)
|
||
$instourlangs=$instlangs;
|
||
if (!$opts['dryrun']) {
|
||
foreach ($instourlangs as $row)
|
||
myq($link,'INSERT INTO InstOurLangs (InstID, OurLangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')',__LINE__);
|
||
}
|
||
|
||
if ($instrow['Good']==1)
|
||
notify('New instance «<a href="viewinst.php?id='.$instid.'">'.$instrow['URI'].'</a>» is suitable!',1);
|
||
|
||
} else {
|
||
notify('Instance «'.$host.'» has '.$nrows.' entries in «Instances» table!',3);
|
||
}
|
||
|
||
if (array_key_exists('x-activity',$info) && is_array($info['x-activity'])) {
|
||
if (!$opts['dryrun']) {
|
||
myq($link,'DELETE FROM InstActivity WHERE InstID='.$instid,__LINE__);
|
||
$pos=0;
|
||
foreach ($info['x-activity'] as $buf) {
|
||
if (akeavinn('week',$buf) && akeavinn('statuses',$buf) && akeavinn('logins',$buf) && akeavinn('registrations',$buf)) {
|
||
$pos++;
|
||
$query='INSERT INTO InstActivity (InstID, Week, Statuses, Logins, Registrations, Pos) VALUES (\''.$instid.'\', \''.myesc($link,$buf['week']).'\', \''.myesc($link,$buf['statuses']).'\', \''.myesc($link,$buf['logins']).'\', \''.myesc($link,$buf['registrations']).'\', '.$pos.')';
|
||
myq($link,$query,__LINE__);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if (array_key_exists('x-trends',$info) && is_array($info['x-trends'])) {
|
||
$trends=array();
|
||
foreach ($info['x-trends'] as $buf) {
|
||
if (akeavinn('name',$buf) && akeavinn('url',$buf) && akeavinn('history',$buf) && is_array($buf['history'])) {
|
||
$trend=0;
|
||
foreach ($buf['history'] as $row) {
|
||
if ($row['uses']>0)
|
||
$trend+=($row['accounts']/$row['uses']);
|
||
}
|
||
$trends[]=array(
|
||
'InstID'=>$instid,
|
||
'LastDay'=>$buf['history'][0]['day'],
|
||
'Name'=>$buf['name'],
|
||
'URL'=>$buf['url'],
|
||
'Pos'=>null,
|
||
'trend'=>$trend
|
||
);
|
||
}
|
||
}
|
||
mdasortbykey($trends,'trend',true);
|
||
//print_r($trends);
|
||
if (!$opts['dryrun']) myq($link,'DELETE FROM InstTrends WHERE InstID='.$instid,__LINE__);
|
||
$pos=0;
|
||
foreach ($trends as $trend) {
|
||
$pos++;
|
||
$query='INSERT INTO InstTrends (InstID, LastDay, Name, URL, Pos) VALUES ('.$trend['InstID'].', \''.$trend['LastDay'].'\', \''.myesc($link,truncs($trend['Name'],'InstTrends','Name','«'.$instrow['URI'].'»')).'\', \''.myesc($link,truncs($trend['URL'],'InstTrends','URL','«'.$instrow['URI'].'»')).'\', '.$pos.')';
|
||
if (!$opts['dryrun']) myq($link,$query,__LINE__);
|
||
}
|
||
}
|
||
if (!$opts['dryrun']) myq($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$instid.', '.$now.', 1)',__LINE__);
|
||
|
||
if ($opts['fetchusers'] && $ismast && array_key_exists('version',$info) && $info['version']>='4.0.0') {
|
||
eecho(0,'«'.$host.'»: trying to fetch users info from directory API...'.N);
|
||
$exusers=[];// array of this instance's users already existing in the db
|
||
$res=myq($link,'SELECT ID, locid, username FROM Users WHERE InstID='.$instid,__LINE__);
|
||
while ($row=mysqli_fetch_assoc($res)) $exusers[$row['locid']]=$row;
|
||
$users=[];// array of users in this instance's directory
|
||
$chunk=0;
|
||
$limit=80;
|
||
$end=false;
|
||
while (!$end) {
|
||
$offset=$chunk*$limit;
|
||
$buf=@getfc('https://'.$host.'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['timeout']);
|
||
if ($buf['cont']!==false) {
|
||
ckratelimit($buf['headers']);
|
||
eecho(1,'«'.$host.'»: got '.($chunk+1).' chunk(s) of users info from directory API :-)'.N);
|
||
$buf=@json_decode($buf['cont'],true);
|
||
if (is_array($buf)) {
|
||
//print_r($buf);
|
||
if (count($buf)<$limit) $end=true;
|
||
/*if (count($buf)>0 && !array_key_exists('noindex',$buf[0])) {
|
||
eecho(2,'«'.$host.'»: account entities reported by directory api endpoint don’t have a “noindex” attribute; skipping directory fetching.'.N);
|
||
break;
|
||
} else {
|
||
eecho(0,'«'.$host.'»: account entities reported by directory api endpoint do have a “noindex” attribute; continuing with directory fetching.'.N);
|
||
}*/
|
||
//foreach ($buf as $user) echo($user['username'].' '); echo(N.N);
|
||
foreach ($buf as $user) {
|
||
if (make(['id', 'username', 'display_name', 'locked', 'bot', 'discoverable', 'created_at', 'note', 'url', 'avatar', 'header', 'statuses_count', 'last_status_at', 'fields', 'noindex'], $user)) {
|
||
eecho(0,'«'.$host.'» ('.$i.'/'.$cinsts.'): working on user «'.$user['username'].'»...'.N);
|
||
// disabled because it takes too long on instances with many users
|
||
/*if (!isset($user['noindex'])) {
|
||
$user['noindex']=true;
|
||
eecho(0,'«'.$host.'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it by fetching user’s profile page...'.N);
|
||
$page=getfc($user['url'],$opts['timeout']);
|
||
// here ckratelimit is not needed because it's a normal web page, not json from mastodon api
|
||
if ($page['cont']!==false) {
|
||
//<meta content='noindex, noarchive' name='robots'>
|
||
if (preg_match('/<meta\s+content=[\'"](noindex|noarchive)/ui',$page['cont'])!==1) {
|
||
$user['noindex']=false;
|
||
eecho(0,'«'.$user['url'].'»: «noindex» is not set.'.N);
|
||
} else {
|
||
eecho(0,'«'.$user['url'].'»: «noindex» is set.'.N);
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$host.'»: could not fetch «'.$user['url'].'»: '.$page['emsg'].N);
|
||
}
|
||
}*/
|
||
$snote=strip_tags($user['note']);
|
||
if (preg_match('/(?<!\w)#(nobots?|noindex)(?!\w)/iu',$snote)===1) $user['noindex']=true;
|
||
if (preg_match('/(?<!\w)#(okindex|yesindex|doindex|okmhindex)(?!\w)/iu',$snote)===1) $user['noindex']=false;
|
||
// disabled; takes too long on instances with many users
|
||
/*$user['tags']=[];
|
||
if (!$user['noindex'] && $info['version']>='3.3.0') {
|
||
eecho(0,'«'.$host.'»: trying to fetch tags for user «'.$user['username'].'»...'.N);
|
||
$tags=@getfc('https://'.$host.'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['timeout']);
|
||
if ($tags['cont']!==false) {
|
||
ckratelimit($tags['headers']);
|
||
$tags=@json_decode($tags['cont'],true);
|
||
if (is_array($tags) && count($tags)>0) {
|
||
eecho(1,'«'.$host.'»: got '.count($tags).' tag(s) for user «'.$user['username'].'» :-)'.N);
|
||
foreach($tags as $tag) $user['tags'][]=$tag['name'];
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$host.'»: could not fetch tags for user «'.$user['username'].'» :-( ('.$tags['emsg'].').'.N);
|
||
}
|
||
}
|
||
$user['tags']=implode(';',$user['tags']);
|
||
if ($user['tags']=='') $user['tags']=null;*/
|
||
$user['tags']=null;
|
||
if (!is_null($user['created_at'])) $user['created_at']=pgdatetomy($user['created_at']);
|
||
if (!is_null($user['last_status_at'])) $user['last_status_at']=datetomy($user['last_status_at']);
|
||
$users[$user['id']]=$user;
|
||
} else {
|
||
eecho(2,'«'.$host.'»: user record missed some required keys :-('.N);
|
||
//print_r($user);
|
||
}
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$host.'»: ... but the chunk was not good JSON :-('.N);
|
||
$end=true;
|
||
}
|
||
$chunk++;
|
||
} else {
|
||
eecho(2,'«'.$host.'»: could not fetch users info from directory API: '.$buf['emsg'].N);
|
||
$end=true;
|
||
}
|
||
}
|
||
foreach ($users as $locid=>$user) {
|
||
$query='SET InstID='.$instid.', host='.myv($link,$host).', locid='.myv($link,$user['id']).', username='.myv($link,truncs($user['username'], 'Users', 'username', '«'.$host.'»: «'.$user['username'].'»')).', display_name='.myv($link,truncs($user['display_name'], 'Users', 'display_name', '«'.$host.'»: «'.$user['username'].'»')).', locked='.myv($link,$user['locked']).', bot='.myv($link,$user['bot']).', created_at='.myv($link,$user['created_at']).', note='.myv($link,truncs($user['note'], 'Users', 'note', '«'.$host.'»: «'.$user['username'].'»')).', url='.myv($link,truncs($user['url'], 'Users', 'url', '«'.$host.'»: «'.$user['username'].'»')).', avatar='.myv($link,truncs($user['avatar'], 'Users', 'avatar', '«'.$host.'»: «'.$user['username'].'»')).', header='.myv($link,truncs($user['header'], 'Users', 'header', '«'.$host.'»: «'.$user['username'].'»')).', statuses_count='.myv($link,$user['statuses_count']).', last_status_at='.myv($link,$user['last_status_at']).', tags='.myv($link,truncs($user['tags'], 'Users', 'tags', '«'.$host.'»: «'.$user['username'].'»'));
|
||
$uid=0;
|
||
if (!array_key_exists($user['id'],$exusers)) {
|
||
if (!$user['noindex']) {
|
||
eecho(0,'«'.$host.'»: inserting new user «'.$user['username'].'»...'.N);
|
||
$query='INSERT INTO Users '.$query;
|
||
if (!$opts['dryrun']) {
|
||
myq($link,$query,__LINE__);
|
||
$uid=mysqli_insert_id($link);
|
||
}
|
||
} else {
|
||
eecho(0,'«'.$host.'»: NOT inserting user «'.$user['username'].'» because they don’t want to be indexed...'.N);
|
||
}
|
||
} else {
|
||
$uid=$exusers[$locid]['ID'];
|
||
if (!$user['noindex']) {
|
||
eecho(0,'«'.$host.'»: updating existing user «'.$user['username'].'» ('.$uid.')...'.N);
|
||
$query='UPDATE Users '.$query.' WHERE ID='.$uid;
|
||
} else {
|
||
eecho(0,'«'.$host.'»: deleting existing user «'.$user['username'].'» ('.$uid.') because they don’t want to be indexed...'.N);
|
||
$query='DELETE FROM Users WHERE ID='.$uid;
|
||
}
|
||
if (!$opts['dryrun']) {
|
||
myq($link,$query,__LINE__);
|
||
myq($link,'DELETE FROM UsersFields WHERE UserID='.$uid,__LINE__);
|
||
}
|
||
}
|
||
if ($uid!=0 && !$user['noindex'] && is_array($user['fields']) && count($user['fields'])>0) {
|
||
eecho(0,'«'.$host.'»: saving user fields for user «'.$user['username'].'» ('.$uid.')...'.N);
|
||
foreach ($user['fields'] as $field) {
|
||
(is_null($field['verified_at'])) ? $field['verified_at']=0 : $field['verified_at']=1;
|
||
$field['name']=truncs($field['name'],'UsersFields','name','«'.$host.'»: «'.$user['username'].'»');
|
||
$field['value']=truncs($field['value'],'UsersFields','value','«'.$host.'»: «'.$user['username'].'»');
|
||
if (!$opts['dryrun']) myq($link,'INSERT INTO UsersFields SET UserID='.$uid.', name='.myv($link,$field['name']).', value='.myv($link,$field['value']).', verified='.$field['verified_at'],__LINE__);
|
||
}
|
||
}
|
||
}
|
||
foreach ($exusers as $locid=>$exuser) {
|
||
if (!array_key_exists($locid,$users)) {
|
||
eecho(0,'«'.$host.'»: user «'.$exusers[$locid]['username'].'» opted out of the directory, deleting their record ('.$exuser['ID'].')...'.N);
|
||
if (!$opts['dryrun']) {
|
||
myq($link,'DELETE FROM Users WHERE ID='.$exuser['ID'],__LINE__);
|
||
myq($link,'DELETE FROM UsersFields WHERE UserID='.$exuser['ID'],__LINE__);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
|
||
}
|
||
}
|
||
}
|
||
|
||
mysqli_close($link);
|
||
unset($link);
|
||
|
||
if ($opts['jsonwrite']) {
|
||
fwrite($jsonf,'"The end?": true'.N.'}'.N);
|
||
fclose($jsonf);
|
||
}
|
||
|
||
unlink($instsjfp);
|
||
unlink($currinstjfp);
|
||
unlink($lockfp);
|
||
|
||
eecho(1,'Done (in '.ght(time()-$tini,null,0).') :-)'.N);
|
||
|
||
exit(0);
|
||
|
||
// "multi array_key_exists"
|
||
function make($keys,&$arr) {
|
||
foreach ($keys as $key)
|
||
if (!array_key_exists($key,$arr))
|
||
return(false);
|
||
return(true);
|
||
}
|
||
|
||
function myv(&$link,$var) {
|
||
if (is_null($var)) {
|
||
return('NULL');
|
||
} elseif (is_bool($var)) {
|
||
if ($var)
|
||
return('1');
|
||
else
|
||
return('0');
|
||
} elseif (trim($var)=='') {
|
||
return('NULL');
|
||
} else {
|
||
return('\''.mysqli_real_escape_string($link,$var).'\'');
|
||
}
|
||
}
|
||
|
||
function datetomy($date) {
|
||
$date=explode('-',$date);
|
||
return(mktime(0,0,0,$date[1],$date[2],$date[0]));
|
||
}
|
||
|
||
function ckratelimit($httpresphead) {
|
||
$headers=explode("\r\n",$httpresphead);
|
||
$buff=[];
|
||
array_shift($headers);
|
||
foreach ($headers as $header)
|
||
if (preg_match('/^([^:]+):(.*)$/Uu',$header,$matches)===1)
|
||
$buff[strtolower($matches[1])]=trim($matches[2]);
|
||
$headers=$buff;
|
||
if (array_key_exists('x-ratelimit-reset',$headers)) {
|
||
if (array_key_exists('date',$headers)) {
|
||
//Wed, 30 Mar 2022 21:27:22 GMT
|
||
$srvnow=strtotime($headers['date']);
|
||
//2022-03-31T04:05:00.058705Z
|
||
$srvrlr=strtotime($headers['x-ratelimit-reset']);
|
||
$stosl=$srvrlr-$srvnow+1;
|
||
//echo('ckratelimit: x-ratelimit-remaining: '.$headers['x-ratelimit-remaining'].'; $srvnow: '.gmdate('c',$srvnow).'; $srvrlr: '.gmdate('c',$srvrlr).'; current time to sleep: '.$stosl.'.'.N);
|
||
if ($headers['x-ratelimit-remaining']<3) {
|
||
eecho(2,'reached rate limit, sleeping for '.$stosl.' seconds ...'.N);
|
||
sleep($stosl);
|
||
}
|
||
} else {
|
||
eecho(2,'ckratelimit: $httpresphead did not contain a «date» header!'.N);
|
||
}
|
||
} else {
|
||
eecho(2,'ckratelimit: $httpresphead did not contain an «x-ratelimit-reset» header!'.N);
|
||
}
|
||
}
|
||
|
||
?>
|