1
0
Fork 0
MastodonStartpage/web/admin/crawler/crawler.php

832 lines
30 KiB
PHP
Raw Normal View History

2019-12-01 09:07:45 +01:00
#!/bin/php
<?php
/*
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
2019-12-30 22:51:32 +01:00
/*
DAFA
- hardening sull'input: verificare tipi, truncn, truncs
*/
2019-12-01 09:07:45 +01:00
define('N',"\n");
2019-12-28 22:51:41 +01:00
$link=false;
$logf=false;
$jsonf=false;
2019-12-26 21:57:36 +01:00
declare(ticks=1);
pcntl_signal(SIGTERM,'signalHandler');// Termination ('kill' was called)
pcntl_signal(SIGHUP,'signalHandler');// Terminal log-out
pcntl_signal(SIGINT,'signalHandler');// Interrupted (Ctrl-C is pressed)
function signalHandler($signal) {
global $link, $logf, $jsonf;
lecho(N.'Sono stato interrotto.'.N);
if ($link) {
lecho('La connessione MySQL è aperta, la chiudo.'.N);
mysqli_close($link);
}
if ($jsonf) {
2019-12-30 22:51:32 +01:00
lecho('Il file di dump json è aperto, lo chiudo.'.N);
// qui no, altrimenti "riprendi" fa poi casino
// fwrite($jsonf,'"Fine?": true'.N.'}'.N);
2019-12-26 21:57:36 +01:00
fclose($jsonf);
}
if ($logf) {
2019-12-30 22:51:32 +01:00
lecho('Il file di log è aperto, lo chiudo.'.N);
2019-12-26 21:57:36 +01:00
fclose($logf);
}
exit(2);
}
$opts=array(
2019-12-26 21:57:36 +01:00
'timeout'=>3,
'log'=>true,
2019-12-30 22:51:32 +01:00
'jsonfp'=>__DIR__.'/instances.json',
2019-12-26 21:57:36 +01:00
'jsonwrite'=>true,
'jsonread'=>false
);
2019-12-01 09:07:45 +01:00
2019-12-26 21:57:36 +01:00
use function mysqli_real_escape_string as myesc;
2019-12-01 09:07:45 +01:00
function tosec($str) {
if (preg_match('/^([0-9]+)([smogSMA]?)/',$str,$buf)===1) {
switch ($buf[2]) {
case '':
case 's':
return($buf[1]);
break;
case 'm':
return($buf[1]*60);
break;
case 'o':
return($buf[1]*60*60);
break;
case 'g':
return($buf[1]*60*60*24);
break;
case 'S':
return($buf[1]*60*60*24*7);
break;
case 'M':
2019-12-06 14:49:34 +01:00
return($buf[1]*60*60*24*30);
break;
case 'A':
2019-12-06 14:49:34 +01:00
return($buf[1]*60*60*24*365);
break;
}
} else {
return(false);
}
}
2019-12-01 09:07:45 +01:00
2019-12-28 22:51:41 +01:00
function mexit($msg,$code) {
global $link, $jsonf, $logf;
2019-12-26 21:57:36 +01:00
lecho($msg);
2019-12-28 22:51:41 +01:00
if ($link)
2019-12-26 21:57:36 +01:00
mysqli_close($link);
2019-12-28 22:51:41 +01:00
if ($jsonf)
fclose($jsonf);
2019-12-26 21:57:36 +01:00
if ($logf)
fclose($logf);
exit($code);
2019-12-01 09:07:45 +01:00
}
2019-12-26 21:57:36 +01:00
function lecho($msg,$logonly=false) {
global $opts, $logf;
if (!$logonly)
echo($msg);
if ($opts['log'])
fwrite($logf,$msg);
2019-12-01 09:07:45 +01:00
}
2019-12-30 22:51:32 +01:00
$instsjfp=__DIR__.'/instances.job';
$currinstjfp=__DIR__.'/currinst.job';
if (file_exists($currinstjfp) && file_exists($instsjfp)) {
$riprendi=true;
} else {
$riprendi=false;
}
$logfp=__DIR__.'/crawler.log';
2019-12-26 21:57:36 +01:00
if ($opts['log']) {
2019-12-30 22:51:32 +01:00
if ($riprendi)
$mode=array('a','aggiunta');
else
$mode=array('w','scrittura');
$logf=@fopen($logfp,$mode[0]);
if ($logf===false) {
echo('Non ho potuto aprire in modalità '.$mode[1].' il file di log «'.$logfp.'».'.N);
exit(1);
}
2019-12-01 09:07:45 +01:00
}
2019-12-30 22:51:32 +01:00
$inifp=__DIR__.'/../sec/mastostartadmin.ini';
$iniarr=@parse_ini_file($inifp)
2019-12-26 21:57:36 +01:00
or mexit('Impossibile aprire il file di configurazione «'.$inifp.'»'.N,1);
$link=mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket'])
or mexit(mysqli_error($link).N,1);
2019-12-28 22:51:41 +01:00
mysqli_set_charset($link,'utf8mb4')
or mexit(mysqli_error($link).N,1);
$tables=array();
$res=mysqli_query($link,'SHOW TABLES')
or mexit(mysqli_error($link).N,1);
while ($row=mysqli_fetch_row($res)) {
$resb=mysqli_query($link,'SHOW COLUMNS FROM '.$row[0])
or mexit(mysqli_error($link).N,1);
$fields=array();
2019-12-30 22:51:32 +01:00
// lo uso solo per alcuni tipi, quindi non sto a cercare completezza
2019-12-28 22:51:41 +01:00
while ($rowb=mysqli_fetch_assoc($resb)) {
2019-12-30 22:51:32 +01:00
preg_match('/(\w+)\((.*)\)( unsigned)?/',$rowb['Type'],$buf);
switch ($buf[1]) {
case 'char':
case 'varchar':
$fields[$rowb['Field']]=$buf[2];
break;
case 'tinyint':
if (array_key_exists(3,$buf))
$fields[$rowb['Field']]=array('min'=>0,'max'=>255);
else
$fields[$rowb['Field']]=array('min'=>-128,'max'=>127);
break;
case 'smallint':
if (array_key_exists(3,$buf))
$fields[$rowb['Field']]=array('min'=>0,'max'=>65535);
else
$fields[$rowb['Field']]=array('min'=>-32768,'max'=>32767);
break;
case 'mediumint':
if (array_key_exists(3,$buf))
$fields[$rowb['Field']]=array('min'=>0,'max'=>16777215);
else
$fields[$rowb['Field']]=array('min'=>-8388608,'max'=>8388607);
break;
case 'int':
if (array_key_exists(3,$buf))
$fields[$rowb['Field']]=array('min'=>0,'max'=>4294967295);
else
$fields[$rowb['Field']]=array('min'=>-2147483648,'max'=>2147483647);
break;
// bigint non ci sta in php a meno di usare bcmath o gmp che non è detto siano abilitate sul server, in ogni caso poco importa perché valori bigint vengono usati solo internamente al db, non "vengono da fuori"
case 'bigint':
if (array_key_exists(3,$buf))
$fields[$rowb['Field']]=array('min'=>'0','max'=>'18446744073709551615');
else
$fields[$rowb['Field']]=array('min'=>'-9223372036854775808','max'=>'9223372036854775807');
break;
case 'decimal':
// questo è da testare contro un decimale vero
// fatto, il risultato è che in mysql devo usare decimal(14,4)
if (preg_match('/,/',$buf[2])===1) {
$lim=explode(',',$buf[2]);
} else {
$lim[0]=$buf[2];
$lim[1]=0;
}
$int=$lim[0]-$lim[1];
$sint='';
for ($i=0; $i<$int; $i++)
$sint.='9';
$sdec='';
for ($i=0; $i<$lim[1]; $i++)
$sdec.='9';
$max=$sint.'.'.$sdec;
if (array_key_exists(3,$buf))
$fields[$rowb['Field']]=array('min'=>0,'max'=>floatval($max));
else
$fields[$rowb['Field']]=array('min'=>floatval('-'.$max),'max'=>floatval($max));
break;
default:
$fields[$rowb['Field']]=$rowb['Type'];
break;
}
2019-12-28 22:51:41 +01:00
}
$tables[$row[0]]=$fields;
}
2019-12-30 22:51:32 +01:00
if ($riprendi) {
lecho('Pare che ci sia un lavoro in sospeso, provo a riprenderlo...'.N);
$buf=@file($instsjfp,FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES)
or mexit('Non ho potuto aprire in lettura il file «'.$instsjfp.'».'.N,1);
$insts=array();
foreach ($buf as $line)
$insts[]=$line;
$buf=@file($currinstjfp,FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES)
or mexit('Non ho potuto aprire in lettura il file «'.$currinstjfp.'».'.N,1);
$buf=explode("\t",$buf[0]);
$currinst=array('dom'=>$buf[0], 'i'=>$buf[1], 'qok'=>$buf[2], 'qgood'=>$buf[3]);
$riprendi=true;
}
$tronconi=array();
function flushtronc($id) {
global $tronconi;
foreach ($tronconi as $row) {
$row['id']=$id;
if ($row['tab']=='Blacklist')
$eurl='editblinst.php';
elseif ($row['tab']=='Instances')
$eurl='editinst.php';
elseif ($row['tab']=='Languages')
$eurl='editlang.php';
// questo qui sotto non è errore: la tabella InstTrends non ha ID perciò non è editabile, il massimo che si può fare è andare a vedere la tabella Instances e i trends collegati (l'id che viene passato è infatti quello della tabella Instances)
elseif ($row['tab']=='InstTrends')
$eurl='editinst.php';
notify('Ho dovuto troncare a '.$row['size'].' caratteri il valore da inserire nella colonna «'.$row['col'].'» della tabella «'.$row['tab'].'» perché troppo lungo ('.$row['len'].' caratteri). Puoi <a href="'.$eurl.'?id='.$row['id'].'">editarlo qui</a>.',2);
}
$tronconi=array();
}
function truncs($str,$tab,$col) {
global $tables, $tronconi;
2019-12-28 22:51:41 +01:00
$size=$tables[$tab][$col];
2019-12-30 22:51:32 +01:00
$len=mb_strlen($str,'UTF-8');
if ($len>$size) {
$tronconi[]=array('id'=>null,'tab'=>$tab,'col'=>$col,'len'=>$len,'size'=>$size);
2019-12-28 22:51:41 +01:00
$str=mb_substr($str,0,$size-1,'UTF-8').'…';
}
return($str);
}
2019-12-26 21:57:36 +01:00
2019-12-30 22:51:32 +01:00
function truncn($num,$tab,$col) {
global $tables;
if ($num>$tables[$tab][$col]['max']) {
notify('Ho dovuto troncare «'.$num.'» al valore massimo «'.$tables[$tab][$col]['max'].'» che può avere nella colonna «'.$col.'» della tabella «'.$tab.'»).',2);
$num=$tables[$tab][$col]['max'];
} elseif ($num<$tables[$tab][$col]['min']) {
notify('Ho dovuto troncare «'.$num.'» al valore minimo «'.$tables[$tab][$col]['min'].'» che può avere nella colonna «'.$col.'» della tabella «'.$tab.'»).',2);
$num=$tables[$tab][$col]['min'];
}
return($num);
}
2019-12-26 21:57:36 +01:00
$contextopts=array(
'http'=>array(
'timeout'=>$opts['timeout']
),
'socket'=>array(
'tcp_nodelay'=>true
)
);
$context=stream_context_create($contextopts);
$blacklist=array();
lecho('Carico la blacklist dal database...'.N);
$res=mysqli_query($link,'SELECT * FROM Blacklist')
2019-12-28 22:51:41 +01:00
or mexit(mysqli_error($link).N,3);
2019-12-26 21:57:36 +01:00
lecho(mysqli_num_rows($res).' istanze nella blacklist.'.N);
while($row=mysqli_fetch_assoc($res)) {
$blacklist[$row['Domain']]=$row;
2019-12-01 09:07:45 +01:00
}
2019-12-26 21:57:36 +01:00
function pgdatetomy($pgdate) {
2019-12-28 22:51:41 +01:00
if (preg_match('/^(\d+)-(\d+)-(\d+)[ T]{1}(\d+):(\d+):(\d+)(\.\d+)?Z?$/',$pgdate,$buf)===1) {
$mtime=mktime($buf[4],$buf[5],$buf[6],$buf[2],$buf[3],$buf[1]);
if (array_key_exists(7,$buf))
$mtime=$mtime+floatval('0'.$buf[7]);
return($mtime);
2019-12-15 17:06:02 +01:00
} else {
2019-12-30 22:51:32 +01:00
notify('pgdatetomy: «'.$pgdate.'» non è un formato di data riconosciuto! Ritorno il magico momento attuale.',3);
return(time());
2019-12-15 17:06:02 +01:00
}
}
2019-12-26 21:57:36 +01:00
function blpgdumplinetomy($line) {
$truefalse=array('f'=>0,'t'=>1);
$row=explode("\t",$line);
$row=array('Domain'=>$row[0],
'CreatedAt'=>pgdatetomy($row[1]),
'ModifiedAt'=>pgdatetomy($row[2]),
'Severity'=>$row[3],
'RejectMedia'=>$truefalse[$row[4]],
'RejectReports'=>$truefalse[$row[5]],
'PublicComment'=>$row[6]);
return($row);
2019-12-17 13:19:12 +01:00
}
2019-12-30 22:51:32 +01:00
if (!$riprendi) {
$blacklistnew=array();
$insts=array();
lecho('Carico le istanze di partenza...'.N);
$res=mysqli_query($link,'SELECT Domain FROM StartNodes')
or mexit(mysqli_error($link).N,3);
lecho(mysqli_num_rows($res).' istanze di partenza.'.N);
while($row=mysqli_fetch_assoc($res)) {
$insts[]=$row['Domain'];
lecho('Recupero la lista delle istanze note a «'.$row['Domain'].'» ... ');
$buf=@file_get_contents('https://'.$row['Domain'].'/api/v1/instance/peers',false,$context);
if ($buf!==false) {
lecho('OK :-)'.N);
$peers=json_decode($buf,true);
foreach ($peers as $pdom) {
if (willtrunc($pdom,'Instances','URI'))
notify('Listanza «'.$pdom.'» non sarà considerata perché il suo dominio è troppo lungo per il campo «URI» della tabella «Instances» nel DB',1);
if (!in_array($pdom,$insts) && !willtrunc($pdom,'Instances','URI'))
$insts[]=$pdom;
2019-12-26 21:57:36 +01:00
}
2019-12-30 22:51:32 +01:00
} else {
lecho('ERRORE :-('.N);
2019-12-26 21:57:36 +01:00
}
2019-12-30 22:51:32 +01:00
lecho('Recupero la blacklist di «'.$row['Domain'].'» ... ');
$buf=@file_get_contents('https://'.$row['Domain'].'/domain_blocks.txt',false,$context);
if ($buf!==false) {
lecho('OK :-)'.N);
$buf=explode(N,$buf);
foreach ($buf as $line) {
if (preg_match('/(^#.*$)|(^\s*$)/',$line)===0) {
$brow=blpgdumplinetomy($line);
if (!array_key_exists($brow['Domain'],$blacklist)) {
$blacklistnew[$brow['Domain']]=$brow;
}
$blacklist[$brow['Domain']]=$brow;
2019-12-26 21:57:36 +01:00
}
}
2019-12-30 22:51:32 +01:00
} else {
lecho('ERRORE :-('.N);
2019-12-01 09:07:45 +01:00
}
}
2019-12-30 22:51:32 +01:00
foreach ($blacklistnew as $row) {
if (!willtrunc($row['Domain'],'Blacklist','Domain')) {
mysqli_query($link,'INSERT INTO Blacklist (ID, Domain, CreatedAt, ModifiedAt, Severity, RejectMedia, RejectReports, PrivateComment, PublicComment) VALUES (NULL, \''.myesc($link,$row['Domain']).'\', \''.myesc($link,$row['CreatedAt']).'\', \''.myesc($link,$row['ModifiedAt']).'\', \''.myesc($link,$row['Severity']).'\', \''.myesc($link,$row['RejectMedia']).'\', \''.myesc($link,$row['RejectReports']).'\', NULL, \''.myesc($link,truncs($row['PublicComment'],'Blacklist','PublicComment')).'\')')
or mexit(mysqli_error($link).N,3);
flushtronc(mysqli_insert_id($link));
} else {
notify('Non ho potuto inserire «'.$row['Domain'].'» nella tabella delle istanze blacklistate perché il dominio è troppo lungo.',2);
}
}
//lecho('Carico le istanze note dal DB e aggiungo alla lista di quelle da controllare quelle che non ci sono già.'.N);
$res=mysqli_query($link,'SELECT URI FROM Instances')
2019-12-28 22:51:41 +01:00
or mexit(mysqli_error($link).N,3);
2019-12-30 22:51:32 +01:00
while($row=mysqli_fetch_assoc($res)) {
if (!in_array($row['URI'],$insts))
$insts[]=$row['URI'];
}
sort($insts);
ksort($blacklist);
ksort($blacklistnew);
lecho('Istanze recuperate: '.count($insts).N);
lecho('Istanze blacklistate: '.count($blacklist).', di cui '.count($blacklistnew).' nuove aggiunte al DB.'.N);
$instsf=@fopen($instsjfp,'w')
or mexit('Non ho potuto aprire in scrittura il file «'.$instsjfp.'».'.N,1);
foreach ($insts as $dom)
fwrite($instsf,$dom.N);
fclose($instsf);
2019-12-26 21:57:36 +01:00
}
2019-12-15 17:06:02 +01:00
2019-12-30 22:51:32 +01:00
function willtrunc($str,$tab,$col) {
global $tables;
if (mb_strlen($str,'UTF-8')>$tables[$tab][$col])
return(true);
2019-12-26 21:57:36 +01:00
else
2019-12-30 22:51:32 +01:00
return(false);
}
function b2i($bool,$pre) {
if (is_bool($bool)) {
if ($bool)
return(1);
else
return(0);
} else {
notify($pre.'il valore «'.$bool.'» non è booleano, lo assumo come falso e ritorno «0».',2);
2019-12-26 21:57:36 +01:00
return(0);
2019-12-30 22:51:32 +01:00
}
}
2019-12-28 22:51:41 +01:00
//is array, array key exists and value is not null
2019-12-26 21:57:36 +01:00
function akeavinn($key,&$arr) {
2019-12-28 22:51:41 +01:00
if (is_array($arr) && array_key_exists($key,$arr) && !is_null($arr[$key]))
2019-12-26 21:57:36 +01:00
return(true);
else
return(false);
2019-12-01 09:07:45 +01:00
}
2019-12-26 21:57:36 +01:00
function nempty($str) {
if (preg_match('/^\s*$/',$str)===1)
return(null);
else
return($str);
}
2019-12-15 17:06:02 +01:00
2019-12-30 22:51:32 +01:00
function subarimp($glue,$key,&$arr) {
2019-12-26 21:57:36 +01:00
$str='';
$i=1;
$carr=count($arr);
foreach ($arr as $inarr) {
$str.=$inarr[$key];
if ($i<$carr)
$str.=$glue;
$i++;
}
return($str);
}
2019-12-17 13:19:12 +01:00
2019-12-26 21:57:36 +01:00
function notify($msg,$sev) {
2019-12-28 22:51:41 +01:00
global $link, $tables;
2019-12-30 22:51:32 +01:00
lecho('NOTIFICAZIÒ: '.strip_tags($msg).N);
mysqli_query($link,'INSERT INTO Notifications (ID, Notification, Severity, Microtime, Seen) VALUES (NULL, \''.myesc($link,mb_substr($msg,0,$tables['Notifications']['Notification'],'UTF-8')).'\', '.$sev.', \''.microtime(true).'\', 0)')
2019-12-28 22:51:41 +01:00
or mexit(mysqli_error($link).N,3);
}
2019-12-30 22:51:32 +01:00
function langs($instid) {
2019-12-28 22:51:41 +01:00
global $info, $instrow, $link;
$instlangs=array();
if (akeavinn('languages',$info)) {
$pos=0;
foreach ($info['languages'] as $lang) {
$res=mysqli_query($link,'SELECT * FROM Languages WHERE Code=\''.myesc($link,$lang).'\'')
or mexit(mysqli_error($link).N,3);
if (mysqli_num_rows($res)<1) {
2019-12-30 22:51:32 +01:00
mysqli_query($link,'INSERT INTO Languages (ID, Code, ItName, OwnName) VALUES (NULL, \''.myesc($link,truncs($lang,'Languages','Code')).'\', \''.myesc($link,truncs(ucfirst(locale_get_display_name($lang,'it')),'Languages','ItName')).'\', \''.myesc($link,truncs(ucfirst(locale_get_display_name($lang,$lang)),'Languages','OwnName')).'\')')
2019-12-28 22:51:41 +01:00
or mexit(mysqli_error($link).N,3);
$langid=mysqli_insert_id($link);
2019-12-30 22:51:32 +01:00
flushtronc($langid);
2019-12-28 22:51:41 +01:00
} else {
$row=mysqli_fetch_assoc($res);
$langid=$row['ID'];
}
$pos++;
$instlangs[]=array('InstID'=>$instid,'LangID'=>$langid,'Pos'=>$pos,'Code'=>$lang);
}
}
return($instlangs);
}
function varbdump($var) {
ob_start();
var_dump($var);
$content=ob_get_contents();
ob_end_clean();
return($content);
}
function mdasortbykey(&$arr,$key,$rev=false) {
$karr=array();
foreach ($arr as $akey=>$subarr)
$karr[$subarr[$key]]=array($akey,$subarr);
if (!$rev)
ksort($karr);
else
krsort($karr);
$arr=array();
foreach ($karr as $akey=>$subarr)
$arr[$subarr[0]]=$subarr[1];
2019-12-26 21:57:36 +01:00
}
2019-12-15 17:06:02 +01:00
2019-12-26 21:57:36 +01:00
/*
* Nodeinfo ('https://'.$dom.'/nodeinfo/2.0') è stato aggiunto nella 3.0.0
* Trends ('https://'.$dom.'/api/v1/trends') è stato aggiunto nella 3.0.0
* Activity ('https://'.$dom.'/api/v1/instance/activity') è stato aggiunto nella 2.1.2
*/
2019-12-17 13:19:12 +01:00
2019-12-26 21:57:36 +01:00
if ($opts['jsonwrite']) {
2019-12-30 22:51:32 +01:00
if ($riprendi)
$mode=array('a','aggiunta');
else
$mode=array('w','scrittura');
$jsonf=@fopen($opts['jsonfp'],$mode[0])
or mexit('Non ho potuto aprire in modalità '.$mode[1].' il file di dump delle info json «'.$opts['jsonfp'].'».',1);
if ($mode[0]=='w')
fwrite($jsonf,'{'.N);
2019-12-26 21:57:36 +01:00
}
$cinsts=count($insts);
$i=0;
2019-12-28 22:51:41 +01:00
$qok=0;
$qgood=0;
2019-12-30 22:51:32 +01:00
if ($riprendi) {
$i=$currinst['i'];
$qok=$currinst['qok'];
$qgood=$currinst['qgood'];
}
while ($i<$cinsts) {
$dom=$insts[$i];
@file_put_contents($currinstjfp,$dom."\t".$i."\t".$qok."\t".$qgood.N)
or mexit('Non ho potuto aprire in scrittura il file «'.$currinstjfp.'».',1);
2019-12-26 21:57:36 +01:00
$i++;
2019-12-28 22:51:41 +01:00
$ok=true;
2019-12-26 21:57:36 +01:00
$info=null;
lecho('~~~~~~~~~~~~~~~'.N);
2019-12-28 22:51:41 +01:00
lecho('Provo a recuperare info su «'.$dom.'» ['.$i.'/'.$cinsts.' ('.$qok.' OK; '.$qgood.' BUONE) - '.round(100/$cinsts*$i).'%]'.N);
2019-12-26 21:57:36 +01:00
lecho('Provo a recuperare le informazioni API sullistanza ... ');
$buf=@file_get_contents('https://'.$dom.'/api/v1/instance',false,$context);
if ($buf!==false) {
$info=json_decode($buf,true);
2019-12-28 22:51:41 +01:00
if (is_array($info)) {
lecho('OK :-)'.N);
lecho('Provo a recuperare le informazioni Nodeinfo sullistanza ... ');
$buf=@file_get_contents('https://'.$dom.'/nodeinfo/2.0',false,$context);
if ($buf!==false) {
lecho('OK :-)'.N);
$info['x-nodeinfo']=json_decode($buf,true);
2019-12-30 22:51:32 +01:00
// per ora teniamo solo quelle che, se si identificano, si identificano come mastodon o corgidon (derivato di mastodon)
// teniamo d'occhio le notifiche di cui sotto per includere eventualmente altri derivati di mastodon?
// visti fin qui, verificare cosa sono: epicyon
2019-12-28 22:51:41 +01:00
if (is_array($info['x-nodeinfo']) && array_key_exists('software',$info['x-nodeinfo']) && array_key_exists('name',$info['x-nodeinfo']['software']) && preg_match('/^mastodon|corgidon/',$info['x-nodeinfo']['software']['name'])===0) {
$ok=false;
2019-12-30 22:51:32 +01:00
notify('Il software «'.$info['x-nodeinfo']['software']['name'].'» non è mastodon o derivati.',1);
2019-12-17 13:19:12 +01:00
}
2019-12-28 22:51:41 +01:00
} else {
lecho('ERRORE :-('.N);
2019-12-17 13:19:12 +01:00
}
2019-12-28 22:51:41 +01:00
if ($ok && array_key_exists('version',$info)) {
if ($info['version']>='2.1.2') {
lecho('Provo a recuperare le informazioni API sullattività dellistanza ... ');
$buf=@file_get_contents('https://'.$dom.'/api/v1/instance/activity',false,$context);
if ($buf!==false) {
lecho('OK :-)'.N);
$info['x-activity']=json_decode($buf,true);
} else {
lecho('ERRORE :-('.N);
}
2019-12-26 21:57:36 +01:00
}
2019-12-28 22:51:41 +01:00
if ($info['version']>='3.0.0') {
lecho('Provo a recuperare le informazioni API sui trends dellistanza ... ');
$buf=@file_get_contents('https://'.$dom.'/api/v1/trends',false,$context);
if ($buf!==false) {
lecho('OK :-)'.N);
$info['x-trends']=json_decode($buf,true);
} else {
lecho('ERRORE :-('.N);
}
2019-12-26 21:57:36 +01:00
}
}
2019-12-28 22:51:41 +01:00
} else {
$ok=false;
lecho('ERRORE :-('.N);
2019-12-01 09:07:45 +01:00
}
} else {
2019-12-28 22:51:41 +01:00
$ok=false;
2019-12-26 21:57:36 +01:00
lecho('ERRORE :-('.N);
2019-12-28 22:51:41 +01:00
// questo è anche il limbo delle istanze che non rispondono, perciò controlliamo se già esistono nel db e, nel caso, aggiorniamo InstChecks
$res=mysqli_query($link,'SELECT * FROM Instances WHERE URI=\''.myesc($link,mb_substr($dom,0,$tables['Instances']['URI'],'UTF-8')).'\'')
or mexit(mysqli_error($link).N,3,true);
if (mysqli_num_rows($res)>0) {
lecho('«'.$dom.'» non risponde, ma è presente nel database; aggiorno InstChecks.');
$row=mysqli_fetch_assoc($res);
mysqli_query($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$row['ID'].', '.time().', 0)')
or mexit(mysqli_error($link).N,3,true);
}
}
2019-12-30 22:51:32 +01:00
if (is_array($info) && count($info)>0) {
lecho('Dumpone json di tutte le info recuperate:'.N.json_encode($info,JSON_PRETTY_PRINT).N,true);
2019-12-26 21:57:36 +01:00
if ($opts['jsonwrite'])
2019-12-30 22:51:32 +01:00
fwrite($jsonf,'"'.$dom.'": '.json_encode($info,JSON_PRETTY_PRINT).','.N);
}
if ($ok && !is_null($info) && akeavinn('uri',$info) && !is_null(nempty($info['uri'])) && !willtrunc($info['uri'],'Instances','URI') && akeavinn('version',$info) && preg_match('/pleroma|pixelfed/i',$info['version'])===0) {
$qok++;
$instrow=array('ID'=>null, 'New'=>0, 'Good'=>0, 'Chosen'=>0, 'Visible'=>0, 'Blacklisted'=>0, 'URI'=>null, 'Title'=>null, 'ShortDesc'=>null, 'LongDesc'=>null, 'OurDesc'=>null, 'PlaceID'=>null, 'Email'=>null, 'Software'=>null, 'Version'=>null, 'UserCount'=>null, 'StatusCount'=>null, 'DomainCount'=>null, 'ActiveUsersMonth'=>null, 'ActiveUsersHalfYear'=>null, 'Thumb'=>null, 'RegOpen'=>null, 'RegReqApproval'=>null, 'MaxTootChars'=>null, 'AdmAccount'=>null, 'AdmDisplayName'=>null, 'AdmCreatedAt'=>null, 'AdmNote'=>null, 'AdmURL'=>null, 'AdmAvatar'=>null, 'AdmHeader'=>null);
2019-12-26 21:57:36 +01:00
if (array_key_exists($info['uri'],$blacklist))
2019-12-30 22:51:32 +01:00
$instrow['Blacklisted']=1;
$instrow['URI']=$info['uri'];
2019-12-26 21:57:36 +01:00
if (akeavinn('title',$info))
2019-12-30 22:51:32 +01:00
$instrow['Title']=nempty(truncs($info['title'],'Instances','Title'));
2019-12-26 21:57:36 +01:00
if (akeavinn('short_description',$info))
2019-12-30 22:51:32 +01:00
$instrow['ShortDesc']=nempty(truncs($info['short_description'],'Instances','ShortDesc'));
2019-12-26 21:57:36 +01:00
if (akeavinn('description',$info))
2019-12-30 22:51:32 +01:00
$instrow['LongDesc']=nempty(truncs($info['description'],'Instances','LongDesc'));
2019-12-26 21:57:36 +01:00
if (akeavinn('email',$info))
2019-12-30 22:51:32 +01:00
$instrow['Email']=nempty(truncs($info['email'],'Instances','Email'));
2019-12-26 21:57:36 +01:00
if (akeavinn('version',$info))
2019-12-30 22:51:32 +01:00
$instrow['Version']=nempty(truncs($info['version'],'Instances','Version'));
2019-12-26 21:57:36 +01:00
if (akeavinn('stats',$info)) {
if (akeavinn('user_count',$info['stats']))
2019-12-30 22:51:32 +01:00
$instrow['UserCount']=truncn($info['stats']['user_count'],'Instances','UserCount');
2019-12-26 21:57:36 +01:00
if (akeavinn('status_count',$info['stats']))
2019-12-30 22:51:32 +01:00
$instrow['StatusCount']=truncn($info['stats']['status_count'],'Instances','StatusCount');
2019-12-26 21:57:36 +01:00
if (akeavinn('domain_count',$info['stats']))
2019-12-30 22:51:32 +01:00
$instrow['DomainCount']=truncn($info['stats']['domain_count'],'Instances','DomainCount');
2019-12-26 21:57:36 +01:00
}
if (akeavinn('thumbnail',$info))
2019-12-30 22:51:32 +01:00
$instrow['Thumb']=nempty(truncs($info['thumbnail'],'Instances','Thumb'));
2019-12-26 21:57:36 +01:00
if (akeavinn('max_toot_chars',$info))
2019-12-30 22:51:32 +01:00
$instrow['MaxTootChars']=truncn($info['max_toot_chars'],'Instances','MaxTootChars');
2019-12-26 21:57:36 +01:00
if (akeavinn('registrations',$info))
2019-12-30 22:51:32 +01:00
$instrow['RegOpen']=b2i($info['registrations'],'Istanza «'.$instrow['URI'].'»: ');
2019-12-26 21:57:36 +01:00
if (akeavinn('approval_required',$info))
2019-12-30 22:51:32 +01:00
$instrow['RegReqApproval']=b2i($info['approval_required'],'Istanza «'.$instrow['URI'].'»: ');
2019-12-26 21:57:36 +01:00
if (akeavinn('contact_account',$info)) {
if (akeavinn('acct',$info['contact_account']))
2019-12-30 22:51:32 +01:00
$instrow['AdmAccount']=nempty(truncs($info['contact_account']['acct'],'Instances','AdmAccount'));
2019-12-26 21:57:36 +01:00
if (akeavinn('display_name',$info['contact_account']))
2019-12-30 22:51:32 +01:00
$instrow['AdmDisplayName']=nempty(truncs($info['contact_account']['display_name'],'Instances','AdmAccount'));
2019-12-26 21:57:36 +01:00
if (akeavinn('created_at',$info['contact_account']))
$instrow['AdmCreatedAt']=pgdatetomy($info['contact_account']['created_at']);
if (akeavinn('note',$info['contact_account']))
2019-12-30 22:51:32 +01:00
$instrow['AdmNote']=nempty(truncs(strip_tags($info['contact_account']['note'],'<a>'),'Instances','AdmNote'));
2019-12-26 21:57:36 +01:00
if (akeavinn('url',$info['contact_account']))
2019-12-30 22:51:32 +01:00
$instrow['AdmURL']=nempty(truncs($info['contact_account']['url'],'Instances','AdmURL'));
2019-12-26 21:57:36 +01:00
if (akeavinn('avatar',$info['contact_account']))
2019-12-30 22:51:32 +01:00
$instrow['AdmAvatar']=nempty(truncs($info['contact_account']['avatar'],'Instances','AdmAvatar'));
2019-12-26 21:57:36 +01:00
if (akeavinn('header',$info['contact_account']))
2019-12-30 22:51:32 +01:00
$instrow['AdmHeader']=nempty(truncs($info['contact_account']['header'],'Instances','AdmHeader'));
2019-12-26 21:57:36 +01:00
}
if (akeavinn('x-nodeinfo',$info)) {
if (akeavinn('software',$info['x-nodeinfo']) && akeavinn('name',$info['x-nodeinfo']['software']))
2019-12-30 22:51:32 +01:00
$instrow['Software']=nempty(truncs($info['x-nodeinfo']['software']['name'],'Instances','Software'));
2019-12-26 21:57:36 +01:00
if (akeavinn('usage',$info['x-nodeinfo']) && akeavinn('users',$info['x-nodeinfo']['usage'])) {
if (akeavinn('activeMonth',$info['x-nodeinfo']['usage']['users']))
2019-12-30 22:51:32 +01:00
$instrow['ActiveUsersMonth']=truncn($info['x-nodeinfo']['usage']['users']['activeMonth'],'Instances','ActiveUsersMonth');
2019-12-26 21:57:36 +01:00
if (akeavinn('activeHalfyear',$info['x-nodeinfo']['usage']['users']))
2019-12-30 22:51:32 +01:00
$instrow['ActiveUsersHalfYear']=truncn($info['x-nodeinfo']['usage']['users']['activeHalfyear'],'Instances','ActiveUsersHalfYear');
2019-12-17 13:19:12 +01:00
}
2019-12-26 21:57:36 +01:00
}
2019-12-28 22:51:41 +01:00
$whynot=array();
2019-12-30 22:51:32 +01:00
if ($instrow['Blacklisted']==1)
2019-12-28 22:51:41 +01:00
$whynot[]='è nella blacklist';
if (is_null($instrow['RegOpen'])) {
$whynot[]='non se ne conosce lo stato delle registrazioni (aperte/chiuse)';
} elseif ($instrow['RegOpen']==0) {
$whynot[]='ha le registrazioni chiuse';
}
if (is_null($instrow['UserCount'])) {
$whynot[]='non se ne conosce il numero di utenti';
} elseif ($instrow['UserCount']<10 || $instrow['UserCount']>30000) {
$whynot[]='il numero di utenti non è compreso tra 10 e 30.000';
}
if (is_null($instrow['DomainCount'])) {
$whynot[]='non se ne conosce il numero di istanze note';
} elseif ($instrow['DomainCount']<500) {
$whynot[]='il numero di istanze note è minore di 500';
}
if (!is_null($instrow['ActiveUsersMonth'])) {
if ($instrow['ActiveUsersMonth']<10)
$whynot[]='il numero di utenti attivi nellultimo mese è minore di 10';
} elseif (!is_null($instrow['StatusCount']) && $instrow['StatusCount']/$instrow['UserCount']<10) {
$whynot[]='il numero medio di toots per utente è minore di 10';
}
if (count($whynot)==0) {
$instrow['Good']=1;
2019-12-30 22:51:32 +01:00
lecho('Siamo in presenza di unistanza BUONA! :-)'.N);
2019-12-28 22:51:41 +01:00
$qgood++;
2019-12-30 22:51:32 +01:00
} else {
lecho('Siamo in presenza di unistanza CATTIVA: '.implode('; ',$whynot).' :-('.N);
2019-12-28 22:51:41 +01:00
}
2019-12-26 21:57:36 +01:00
$res=mysqli_query($link,'SELECT * FROM Instances WHERE URI=\''.myesc($link,$instrow['URI']).'\'')
or mexit(mysqli_error($link).N,3,true);
2019-12-28 22:51:41 +01:00
2019-12-26 21:57:36 +01:00
if (mysqli_num_rows($res)>0) {
lecho('«'.$instrow['URI'].'» è già presente nel DB, la aggiorno...'.N);
$oldinstrow=mysqli_fetch_assoc($res);
2019-12-30 22:51:32 +01:00
flushtronc($oldinstrow['ID']);
2019-12-28 22:51:41 +01:00
$instid=$oldinstrow['ID'];
$instrow['ID']=$oldinstrow['ID'];
$instrow['New']=$oldinstrow['New'];
if ($instrow['Good']==1 && $oldinstrow['Good']==0) {
notify('Listanza «<a href="editinst.php?id='.$instrow['ID'].'">'.$instrow['URI'].'</a>» non era papabile, ma lo è diventata!',1);
} elseif ($instrow['Good']==0 && $oldinstrow['Good']==1) {
notify('Listanza «<a href="editinst.php?id='.$instrow['ID'].'">'.$instrow['URI'].'</a>» era papabile, ma non lo è più per i seguenti motivi: '.implode('; ',$whynot),3);
}
$instrow['Chosen']=$oldinstrow['Chosen'];
$instrow['Visible']=$oldinstrow['Visible'];
if ($instrow['ShortDesc']!=$oldinstrow['ShortDesc'])
2019-12-30 22:51:32 +01:00
notify('<p>La «Descrizione breve» dellistanza «<a href="editinst.php?id='.$instrow['ID'].'">'.$instrow['URI'].'</a>» è cambiata. La vecchia era...</p><div class="valdesc">'.$oldinstrow['ShortDesc'].'</div><p>La nuova è...</p><div class="valdesc">«'.$instrow['ShortDesc'].'</div>',1);
2019-12-28 22:51:41 +01:00
if ($instrow['LongDesc']!=$oldinstrow['LongDesc'])
2019-12-30 22:51:32 +01:00
notify('<p>La «Descrizione lunga» dellistanza «<a href="editinst.php?id='.$instrow['ID'].'">'.$instrow['URI'].'</a>» è cambiata. La vecchia era...</p><div class="valdesc">'.$oldinstrow['LongDesc'].'</div><p>La nuove è...</p><div class="valdesc">«'.$instrow['LongDesc'].'</div>',1);
$instrow['OurDesc']=$oldinstrow['OurDesc'];
2019-12-28 22:51:41 +01:00
$instrow['PlaceID']=$oldinstrow['PlaceID'];
2019-12-26 21:57:36 +01:00
$query='UPDATE Instances SET ';
foreach ($instrow as $field=>$value) {
if (!is_null($value))
$query.=$field.'=\''.myesc($link,$value).'\', ';
else
2019-12-28 22:51:41 +01:00
$query.=$field.'=NULL, ';
2019-12-17 13:19:12 +01:00
}
2019-12-28 22:51:41 +01:00
$query=substr($query,0,-2).' WHERE Instances.ID='.$instrow['ID'];
lecho('QUERONA DI UPDATE: «'.$query.'».'.N);
mysqli_query($link,$query)
or mexit(mysqli_error($link).N,3,true);
$res=mysqli_query($link,'SELECT InstID, LangID, Pos, Code FROM InstLangs LEFT JOIN Languages ON Languages.ID=LangID WHERE InstID='.$instrow['ID'].' ORDER BY Pos ASC')
2019-12-26 21:57:36 +01:00
or mexit(mysqli_error($link).N,3,true);
$oldinstlangs=array();
2019-12-28 22:51:41 +01:00
while ($row=mysqli_fetch_assoc($res))
2019-12-26 21:57:36 +01:00
$oldinstlangs[]=$row;
2019-12-30 22:51:32 +01:00
$instlangs=langs($instrow['ID']);
2019-12-28 22:51:41 +01:00
if ($instlangs!=$oldinstlangs) {
2019-12-30 22:51:32 +01:00
notify('La lista delle lingue utilizzate dichiarate dallistanza «<a href="editinst.php?id='.$instrow['ID'].'">'.$instrow['URI'].'</a>» è cambiata da «'.subarimp(', ','Code',$oldinstlangs).'» a «'.subarimp(', ','Code',$instlangs).'».',1);
2019-12-28 22:51:41 +01:00
mysqli_query($link,'DELETE FROM InstLangs WHERE InstID='.$instrow['ID'])
or mexit(mysqli_error($link).N,3,true);
foreach ($instlangs as $row) {
mysqli_query($link,'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')')
2019-12-26 21:57:36 +01:00
or mexit(mysqli_error($link).N,3,true);
}
2019-12-28 22:51:41 +01:00
}
2019-12-17 13:19:12 +01:00
} else {
2019-12-26 21:57:36 +01:00
lecho('«'.$info['uri'].'» non è già presente nel DB, la aggiungo...'.N);
$instrow['New']=1;
$fields=array();
$values='';
foreach ($instrow as $field=>$value) {
$fields[]=$field;
if (!is_null($value))
$values.='\''.myesc($link,$value).'\', ';
else
$values.='NULL, ';
}
2019-12-26 21:57:36 +01:00
$values=substr($values,0,-2);
$query='INSERT INTO Instances ('.implode(', ',$fields).') VALUES ('.$values.')';
2019-12-28 22:51:41 +01:00
lecho('QUERONA DI INSERT: «'.$query.'»'.N);
mysqli_query($link,$query)
or mexit(mysqli_error($link).N,3,true);
$instid=mysqli_insert_id($link);
2019-12-30 22:51:32 +01:00
flushtronc($instid);
$instlangs=langs($instid);
2019-12-28 22:51:41 +01:00
foreach ($instlangs as $row) {
mysqli_query($link,'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')')
or mexit(mysqli_error($link).N,3,true);
}
2019-12-30 22:51:32 +01:00
if ($instrow['Good']==1)
notify('La nuova istanza «<a href="editinst.php?id=">'.$info['uri'].'</a>» è papabile!',1);
2019-12-01 09:07:45 +01:00
}
2019-12-28 22:51:41 +01:00
if (array_key_exists('x-activity',$info) && is_array($info['x-activity'])) {
mysqli_query($link,'DELETE FROM InstActivity WHERE InstID='.$instid);
foreach ($info['x-activity'] as $buf) {
if (akeavinn('week',$buf) && akeavinn('statuses',$buf) && akeavinn('logins',$buf) && akeavinn('registrations',$buf)) {
$query='INSERT INTO InstActivity (InstID, Week, Statuses, Logins, Registrations) VALUES (\''.$instid.'\', \''.myesc($link,$buf['week']).'\', \''.myesc($link,$buf['statuses']).'\', \''.myesc($link,$buf['logins']).'\', \''.myesc($link,$buf['registrations']).'\')';
mysqli_query($link,$query)
or mexit(mysqli_error($link).N,3,true);
}
}
}
if (array_key_exists('x-trends',$info) && is_array($info['x-trends'])) {
$trends=array();
foreach ($info['x-trends'] as $buf) {
if (akeavinn('name',$buf) && akeavinn('url',$buf) && akeavinn('history',$buf) && is_array($buf['history'])) {
$trend=0;
foreach ($buf['history'] as $row) {
if ($row['uses']>0)
$trend+=($row['accounts']/$row['uses']);
}
$trends[]=array(
'InstID'=>$instid,
'LastDay'=>$buf['history'][0]['day'],
2019-12-30 22:51:32 +01:00
'Name'=>$buf['name'],
'URL'=>$buf['url'],
2019-12-28 22:51:41 +01:00
'Pos'=>null,
'trend'=>$trend
);
}
}
mdasortbykey($trends,'trend',true);
// print_r($trends);
2019-12-30 22:51:32 +01:00
mysqli_query($link,'DELETE FROM InstTrends WHERE InstID='.$instid);
2019-12-28 22:51:41 +01:00
$pos=0;
foreach ($trends as $trend) {
$pos++;
2019-12-30 22:51:32 +01:00
$query='INSERT INTO InstTrends (InstID, LastDay, Name, URL, Pos) VALUES ('.$trend['InstID'].', \''.$trend['LastDay'].'\', \''.myesc($link,truncs($trend['Name'],'InstTrends','Name')).'\', \''.myesc($link,truncs($trend['URL'],'InstTrends','URL')).'\', '.$pos.')';
2019-12-28 22:51:41 +01:00
mysqli_query($link,$query)
or mexit(mysqli_error($link).N,3,true);
2019-12-30 22:51:32 +01:00
// questo qui sotto non è errore, vedi il commento relativo nella funzione
flushtronc($instid);
2019-12-28 22:51:41 +01:00
}
}
mysqli_query($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$instid.', '.time().', 1)')
or mexit(mysqli_error($link).N,3,true);
2019-12-01 09:07:45 +01:00
}
}
2019-12-26 21:57:36 +01:00
mysqli_close($link);
2019-12-17 13:19:12 +01:00
2019-12-26 21:57:36 +01:00
if ($opts['jsonwrite']) {
fwrite($jsonf,'"Fine?": true'.N.'}'.N);
fclose($jsonf);
}
2019-12-26 21:57:36 +01:00
2019-12-30 22:51:32 +01:00
unlink($instsjfp);
unlink($currinstjfp);
2019-12-26 21:57:36 +01:00
exit(0);
2019-12-01 09:07:45 +01:00
?>