2020-10-13 08:21:26 +02:00
< ? php
This program is free software : you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation , either version 3 of the License , or
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with this program . If not , see < http :// www . gnu . org / licenses />.
define ( 'N' , " \n " );
2020-10-21 15:26:31 +02:00
require ( __DIR__ . '/../site/mustard/include/getfc.php' );
2020-10-13 08:21:26 +02:00
2020-10-21 15:26:31 +02:00
require ( __DIR__ . '/../site/vendor/autoload.php' );
2020-10-13 08:21:26 +02:00
use LanguageDetection\Language ;
if ( strtoupper ( substr ( PHP_OS , 0 , 3 )) === 'WIN' )
$iswin = true ;
$iswin = false ;
$link = false ;
$jsonf = false ;
declare ( ticks = 1 );
if ( function_exists ( 'pcntl_signal' )) {
function signalHandler ( $signal ) {
2020-10-14 08:37:41 +02:00
global $link , $jsonf , $lockfp ;
2020-10-13 08:21:26 +02:00
echo ( N . 'Sono stato interrotto.' . N );
if ( $link ) {
echo ( 'La connessione MySQL è aperta, la chiudo.' . N );
mysqli_close ( $link );
if ( $jsonf ) {
echo ( 'Il file di dump json è aperto, lo chiudo.' . N );
// qui no, altrimenti "riprendi" fa poi casino
//fwrite($jsonf,'"Fine?": true'.N.'}'.N);
fclose ( $jsonf );
2020-10-14 08:37:41 +02:00
if ( isset ( $lockfp ) && file_exists ( $lockfp )) {
echo ( 'Il file di lock esiste, lo elimino.' . N );
unlink ( $lockfp );
2020-10-13 08:21:26 +02:00
exit ( 2 );
pcntl_signal ( SIGTERM , 'signalHandler' ); // Termination ('kill' was called)
pcntl_signal ( SIGHUP , 'signalHandler' ); // Terminal log-out
pcntl_signal ( SIGINT , 'signalHandler' ); // Interrupted (Ctrl-C is pressed)
$opts = array (
'timeout' => 5 ,
'deadline' => 60 * 24 * 60 * 60 , // se un'istanza non risponde da 60 giorni dichiararla morta
'ldtoots' => 40 , // numero di toots da passare alla funzione di rilevamento automatico della lingua
'setnew' => true ,
'dryrun' => false ,
'jsonfp' => __DIR__ . '/instances.json' ,
'jsonwrite' => false ,
2020-10-14 00:03:40 +02:00
'peersfp' => null ,
2020-10-14 08:37:41 +02:00
'restore' => false ,
'dontrestore' => false ,
'ignorelock' => false
2020-10-13 08:21:26 +02:00
$help = ' crawler . php
Popola / aggiorna il database di mastostart con i dati che riesce
a recuperare da una lista di istanze composta da quelle già presenti
nel database più quelle di un file specificabile ( tipicamente il file
di output di peerscrawl . php ) .
peerscrawl . php [ options ]
- p , -- peersfp < file >
2020-10-13 17:48:55 +02:00
Definisce un file da cui caricare la lista delle istanze di cui cercare
di recuperare i dati . Per default non è definito alcun file , quindi
il programma si limita a controllare le istanze già presenti del db .
Nota : questa opzione è ininfluente se il programma viene lanciato
per riprendere un’ esecuzione precedente interrotta .
2020-10-13 08:21:26 +02:00
- t , -- timeout < secondi >
Definisce il timeout in secondi di ogni tentativo di connessione .
DEFAULT : « '.$opts[' timeout '].' »
- N , -- dontsetnew
Non marca le istanze come nuove , neanche quando lo sono . Può essere utile
per il primo crawl .
2020-10-14 08:37:41 +02:00
- I , -- ignorelock
Normalmente , se il suo lockfile esiste , il programma esce con un errore .
Questa opzione fa sì che il lockfile sia ignorato . Attenzione : verifica
che effettivamente il programma non stia già girando prima di usarla .
2020-10-14 00:03:40 +02:00
- r , -- restore
2020-10-14 08:37:41 +02:00
Prova a riprendere l’ esecuzione di una sessione precedente non completata
2020-10-14 00:03:40 +02:00
( perché funzioni devono essere presenti i file «instances . job»
e «currinst . job» ) .
2020-10-14 08:37:41 +02:00
- R , -- dontrestore
Se sono presenti i file di una sessione precedente non completata
( «instances . job» e «currinst . job» ) ignorali e prosegui ( verranno
sovrascritti ) .
2020-10-13 08:21:26 +02:00
- d , -- dryrun
Non scrive nulla nel database .
- j , -- jsonwrite
Attiva la scrittura di un file «instances . json» nella stessa directory
di crawler . php , contenente tutti i dati recuperati da tutte le istanze .
2020-10-13 21:32:58 +02:00
- h , -- help
Mostra questo aiuto ed esce .
2020-10-13 08:21:26 +02:00
This program comes with ABSOLUTELY NO WARRANTY ; for details see the source .
This is free software , and you are welcome to redistribute it under
certain conditions ; see < http :// www . gnu . org / licenses /> for details . ' . N ;
for ( $i = 1 ; $i < $argc ; $i ++ ) {
if ( substr ( $argv [ $i ], 0 , 1 ) == '-' ) {
switch ( $argv [ $i ]) {
case '-p' :
case '--peersfp' :
if ( $i + 1 >= $argc || ! file_exists ( $argv [ $i + 1 ]) || ! is_file ( $argv [ $i + 1 ]) || ! is_readable ( $argv [ $i + 1 ]))
mexit ( 'L’ opzione «' . $argv [ $i ] . '» richiede come parametro un file esistente e leggibile (usa «-h» per vedere la guida).' . N , 1 );
$i ++ ;
$opts [ 'peersfp' ] = $argv [ $i ];
break ;
case '-t' :
case '--timeout' :
if ( $i + 1 >= $argc || preg_match ( '/^[0-9]+$/' , $argv [ $i + 1 ]) !== 1 )
mexit ( 'L’ opzione «' . $argv [ $i ] . '» richiede un parametro numerico (usa «-h» per vedere la guida).' . N , 1 );
$i ++ ;
$opts [ 'timeout' ] = $argv [ $i ] + 0 ;
break ;
case '-N' :
case '--dontsetnew' :
$opts [ 'setnew' ] = false ;
break ;
2020-10-14 00:03:40 +02:00
case '-r' :
case '--restore' :
2020-10-14 08:37:41 +02:00
$opts [ 'restore' ] = true ;
break ;
case '-R' :
case '--dontrestore' :
$opts [ 'dontrestore' ] = true ;
break ;
case '-I' :
case '--ignorelock' :
$opts [ 'ignorelock' ] = true ;
2020-10-14 00:03:40 +02:00
break ;
2020-10-13 08:21:26 +02:00
case '-d' :
case '--dryrun' :
$opts [ 'dryrun' ] = true ;
break ;
case '-j' :
case '--jsonwrite' :
$opts [ 'jsonwrite' ] = true ;
break ;
case '-h' :
case '--help' :
mexit ( $help , 0 );
break ;
default :
mexit ( 'L’ opzione «' . $argv [ $i ] . '» è sconosciuta (usa «-h» per vedere la guida).' . N , 1 );
break ;
use function mysqli_real_escape_string as myesc ;
function mexit ( $msg , $code ) {
2020-10-14 08:37:41 +02:00
global $link , $jsonf , $lockfp ;
2020-10-13 08:21:26 +02:00
echo ( $msg );
if ( $link )
mysqli_close ( $link );
if ( $jsonf )
fclose ( $jsonf );
2020-10-14 08:37:41 +02:00
if ( isset ( $lockfp ) && file_exists ( $lockfp ))
unlink ( $lockfp );
2020-10-13 08:21:26 +02:00
exit ( $code );
2020-10-14 08:37:41 +02:00
$lockfp = __DIR__ . '/crawler.lock' ;
if ( file_exists ( $lockfp ) && ! $opts [ 'ignorelock' ])
mexit ( 'Il file di lock esiste: pare che sia già in corso una sessione; se sei sicur@ che non è così usa «-I» per forzare l’ esecuzione.' . N , 2 );
touch ( $lockfp );
$riprendi = false ;
2020-10-13 08:21:26 +02:00
$instsjfp = __DIR__ . '/instances.job' ;
$currinstjfp = __DIR__ . '/currinst.job' ;
2020-10-14 00:03:40 +02:00
if ( $opts [ 'restore' ]) {
if ( file_exists ( $currinstjfp ) && file_exists ( $instsjfp )) {
$riprendi = true ;
} else {
mexit ( 'Impossibile riprendere la sessione precedente: «instances.job» e/o «currinst.job» non esistono.' . N , 2 );
2020-10-13 08:21:26 +02:00
} else {
2020-10-14 00:03:40 +02:00
if ( file_exists ( $currinstjfp ) && file_exists ( $instsjfp )) {
2020-10-14 08:37:41 +02:00
if ( ! $opts [ 'dontrestore' ])
mexit ( 'La sessione precedente non è stata conclusa: «instances.job» e «currinst.job» esistono; usa «-r» per ripristinarla o «-R» per ignorarla.' . N , 2 );
2020-10-14 00:03:40 +02:00
2020-10-13 08:21:26 +02:00
2020-10-18 06:53:27 +02:00
$inifp = __DIR__ . '/../conf/mustard.ini' ;
2020-10-13 08:21:26 +02:00
$iniarr =@ parse_ini_file ( $inifp )
or mexit ( 'Impossibile aprire il file di configurazione «' . $inifp . '»' . N , 1 );
$link =@ mysqli_connect ( $iniarr [ 'db_host' ], $iniarr [ 'db_admin_name' ], $iniarr [ 'db_admin_password' ], $iniarr [ 'db_name' ], $iniarr [ 'db_port' ], $iniarr [ 'db_socket' ])
or mexit ( 'Impossibile connettersi al server MySQL: ' . mysqli_connect_error () . N , 1 );
mysqli_set_charset ( $link , 'utf8mb4' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 1 );
2020-10-21 15:26:31 +02:00
require ( __DIR__ . '/../site/mustard/include/tables.php' );
2020-10-13 08:21:26 +02:00
$tables = tables ( $link );
if ( $riprendi ) {
echo ( 'Pare che ci sia un lavoro in sospeso, provo a riprenderlo...' . N );
$buf =@ file ( $instsjfp , FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES )
or mexit ( 'Non ho potuto aprire in lettura il file «' . $instsjfp . '».' . N , 1 );
$insts = array ();
foreach ( $buf as $line )
$insts [] = $line ;
$buf =@ file ( $currinstjfp , FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES )
or mexit ( 'Non ho potuto aprire in lettura il file «' . $currinstjfp . '».' . N , 1 );
$buf = explode ( " \t " , $buf [ 0 ]);
$currinst = array ( 'dom' => $buf [ 0 ], 'i' => $buf [ 1 ], 'qok' => $buf [ 2 ], 'qgood' => $buf [ 3 ]);
$riprendi = true ;
function truncs ( $str , $tab , $col , $ctx ) {
global $tables , $tronconi , $iswin ;
if ( $iswin )
$tab = strtolower ( $tab );
$size = $tables [ $tab ][ $col ];
$len = mb_strlen ( $str , 'UTF-8' );
if ( $len > $size ) {
notify ( $ctx . ': ho dovuto troncare a ' . $size . ' caratteri il valore da inserire nella colonna «' . $col . '» della tabella «' . $tab . '» perché troppo lungo (' . $len . ' caratteri).' , 2 );
$str = mb_substr ( $str , 0 , $size - 1 , 'UTF-8' ) . '…' ;
return ( $str );
function truncn ( $num , $tab , $col , $ctx ) {
global $tables , $iswin ;
if ( $iswin )
$tab = strtolower ( $tab );
if ( is_numeric ( $num )) {
if ( $num > $tables [ $tab ][ $col ][ 'max' ]) {
notify ( $ctx . ': ho dovuto troncare «' . $num . '» al valore massimo «' . $tables [ $tab ][ $col ][ 'max' ] . '» che può avere nella colonna «' . $col . '» della tabella «' . $tab . '»).' , 2 );
$num = $tables [ $tab ][ $col ][ 'max' ];
} elseif ( $num < $tables [ $tab ][ $col ][ 'min' ]) {
notify ( $ctx . ': ho dovuto troncare «' . $num . '» al valore minimo «' . $tables [ $tab ][ $col ][ 'min' ] . '» che può avere nella colonna «' . $col . '» della tabella «' . $tab . '»).' , 2 );
$num = $tables [ $tab ][ $col ][ 'min' ];
} else {
notify ( $ctx . ': truncn(): mi aspettavo un numero, invece non lo era; ritorno «0».' , 3 );
$num = 0 ;
return ( $num );
/* $contextopts = array (
'http' => array (
'timeout' => $opts [ 'timeout' ]
'socket' => array (
'tcp_nodelay' => true
$context = stream_context_create ( $contextopts ); */
$blacklist = array ();
echo ( 'Carico la blacklist dal database...' . N );
$res = mysqli_query ( $link , 'SELECT * FROM Blacklist' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
echo ( mysqli_num_rows ( $res ) . ' istanze nella blacklist.' . N );
while ( $row = mysqli_fetch_assoc ( $res )) {
$blacklist [ $row [ 'Domain' ]] = $row ;
function pgdatetomy ( $pgdate ) {
if ( preg_match ( '/^(\d+)-(\d+)-(\d+)[ T]{1}(\d+):(\d+):(\d+)(\.\d+)?Z?$/' , $pgdate , $buf ) === 1 ) {
$mtime = mktime ( $buf [ 4 ], $buf [ 5 ], $buf [ 6 ], $buf [ 2 ], $buf [ 3 ], $buf [ 1 ]);
if ( array_key_exists ( 7 , $buf ))
$mtime = $mtime + floatval ( '0' . $buf [ 7 ]);
return ( $mtime );
} else {
notify ( 'pgdatetomy: «' . $pgdate . '» non è un formato di data riconosciuto! Ritorno il magico momento attuale.' , 3 );
return ( time ());
function blpgdumplinetomy ( $line ) {
$truefalse = array ( 'f' => 0 , 't' => 1 );
$row = explode ( " \t " , $line );
$row = array ( 'Domain' => $row [ 0 ],
'CreatedAt' => pgdatetomy ( $row [ 1 ]),
'ModifiedAt' => pgdatetomy ( $row [ 2 ]),
'Severity' => $row [ 3 ],
'RejectMedia' => $truefalse [ $row [ 4 ]],
'RejectReports' => $truefalse [ $row [ 5 ]],
'PublicComment' => $row [ 6 ]);
return ( $row );
if ( ! $riprendi ) {
$blacklistnew = array ();
$insts = array ();
echo ( 'Carico le istanze di riferimento per le blacklist...' . N );
$res = mysqli_query ( $link , 'SELECT Domain FROM StartNodes' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
echo ( mysqli_num_rows ( $res ) . ' istanze di riferimento.' . N );
while ( $row = mysqli_fetch_assoc ( $res )) {
$insts [] = $row [ 'Domain' ];
/* echo ( 'Recupero la lista delle istanze note a «' . $row [ 'Domain' ] . '» ... ' );
$buf =@ getfc ( 'https://' . $row [ 'Domain' ] . '/api/v1/instance/peers' , $opts [ 'timeout' ]);
if ( $buf [ 'cont' ] !== false ) {
echo ( 'OK :-)' . N );
$peers = json_decode ( $buf [ 'cont' ], true );
foreach ( $peers as $pdom ) {
if ( willtrunc ( $pdom , 'Instances' , 'URI' ))
notify ( 'L’ istanza «' . $pdom . '» non sarà considerata perché il suo dominio è troppo lungo per il campo «URI» della tabella «Instances» nel DB' , 2 );
if ( ! in_array ( $pdom , $insts ) && ! willtrunc ( $pdom , 'Instances' , 'URI' ))
$insts [] = $pdom ;
} else {
echo ( 'ERRORE: ' . $buf [ 'cont' ] . N );
} */
echo ( 'Recupero la blacklist di «' . $row [ 'Domain' ] . '» ... ' );
$buf =@ getfc ( 'https://' . $row [ 'Domain' ] . '/domain_blocks.txt' , $opts [ 'timeout' ]);
if ( $buf [ 'cont' ] !== false ) {
echo ( 'OK :-)' . N );
$buf = explode ( N , $buf [ 'cont' ]);
foreach ( $buf as $line ) {
if ( preg_match ( '/(^#.*$)|(^\s*$)/' , $line ) === 0 ) {
$brow = blpgdumplinetomy ( $line );
if ( ! array_key_exists ( $brow [ 'Domain' ], $blacklist )) {
$blacklistnew [ $brow [ 'Domain' ]] = $brow ;
$blacklist [ $brow [ 'Domain' ]] = $brow ;
} else {
echo ( 'ERRORE: ' . $buf [ 'emsg' ] . N );
foreach ( $blacklistnew as $row ) {
if ( ! willtrunc ( $row [ 'Domain' ], 'Blacklist' , 'Domain' )) {
if ( ! $opts [ 'dryrun' ]) mysqli_query ( $link , 'INSERT INTO Blacklist (ID, Domain, CreatedAt, ModifiedAt, Severity, RejectMedia, RejectReports, PrivateComment, PublicComment) VALUES (NULL, \'' . myesc ( $link , $row [ 'Domain' ]) . '\', \'' . myesc ( $link , $row [ 'CreatedAt' ]) . '\', \'' . myesc ( $link , $row [ 'ModifiedAt' ]) . '\', \'' . myesc ( $link , $row [ 'Severity' ]) . '\', \'' . myesc ( $link , $row [ 'RejectMedia' ]) . '\', \'' . myesc ( $link , $row [ 'RejectReports' ]) . '\', NULL, \'' . myesc ( $link , $row [ 'Domain' ]) . '\')' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
} else {
echo ( 'Non ho potuto inserire «' . $row [ 'Domain' ] . '» nella tabella delle istanze blacklistate perché il dominio è troppo lungo per il campo corrispondente nel DB.' );
2020-10-13 17:48:55 +02:00
echo ( 'Carico le istanze note e vive dal DB e le metto nella lista di quelle da controllare.' . N );
2020-10-13 08:21:26 +02:00
$res = mysqli_query ( $link , 'SELECT URI FROM Instances WHERE Dead=0' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
while ( $row = mysqli_fetch_assoc ( $res ))
if ( ! in_array ( $row [ 'URI' ], $insts ))
$insts [] = $row [ 'URI' ];
echo ( 'Creo la lista delle istanze morte.' . N );
$res = mysqli_query ( $link , 'SELECT URI FROM Instances WHERE Dead=1' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
$deadinsts = array ();
while ( $row = mysqli_fetch_assoc ( $res ))
$deadinsts [] = $row [ 'URI' ];
2020-10-13 17:48:55 +02:00
if ( ! is_null ( $opts [ 'peersfp' ])) {
echo ( 'Carico le istanze dalla lista «' . $opts [ 'peersfp' ] . '» e aggiungo alla lista di quelle da controllare quelle che non ci sono già e che non risultano morte.' . N );
$peers =@ file ( $opts [ 'peersfp' ], FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES );
if ( $peers === false )
mexit ( 'Non ho potuto aprire in lettura «' . $opts [ 'peersfp' ] . '».' . N , 1 );
foreach ( $peers as $pdom ) {
if ( ! in_array ( $pdom , $insts ))
if ( ! in_array ( $pdom , $deadinsts ))
if ( ! willtrunc ( $pdom , 'Instances' , 'URI' ))
$insts [] = $pdom ;
echo ( 'L’ istanza «' . $pdom . '» non sarà considerata perché il suo dominio è troppo lungo per il campo «URI» della tabella «Instances» nel DB.' . N );
2020-10-13 08:21:26 +02:00
2020-10-13 17:48:55 +02:00
echo ( 'L’ istanza «' . $pdom . '» non sarà considerata perché È MORTA!' . N );
2020-10-13 08:21:26 +02:00
2020-10-13 17:48:55 +02:00
2020-10-13 08:21:26 +02:00
sort ( $insts );
// shuffle($insts);
ksort ( $blacklist );
ksort ( $blacklistnew );
echo ( 'Istanze recuperate: ' . count ( $insts ) . N );
echo ( 'Istanze blacklistate: ' . count ( $blacklist ) . ', di cui ' . count ( $blacklistnew ) . ' nuove aggiunte al DB.' . N );
$instsf =@ fopen ( $instsjfp , 'w' )
or mexit ( 'Non ho potuto aprire in scrittura il file «' . $instsjfp . '».' . N , 1 );
foreach ( $insts as $dom )
fwrite ( $instsf , $dom . N );
fclose ( $instsf );
function willtrunc ( $str , $tab , $col ) {
global $tables , $iswin ;
if ( $iswin )
$tab = strtolower ( $tab );
if ( mb_strlen ( $str , 'UTF-8' ) > $tables [ $tab ][ $col ])
return ( true );
return ( false );
function b2i ( $bool , $pre ) {
if ( is_bool ( $bool )) {
if ( $bool )
return ( 1 );
return ( 0 );
} else {
notify ( $pre . 'il valore «' . $bool . '» non è booleano, lo assumo come falso e ritorno «0».' , 3 );
return ( 0 );
//is array, array key exists and value is not null
function akeavinn ( $key , & $arr ) {
if ( is_array ( $arr ) && array_key_exists ( $key , $arr ) && ! is_null ( $arr [ $key ]))
return ( true );
return ( false );
function nempty ( $str ) {
if ( preg_match ( '/^\s*$/' , $str ) === 1 )
return ( null );
return ( $str );
function subarimp ( $glue , $key , & $arr ) {
$str = '' ;
$i = 1 ;
$carr = count ( $arr );
foreach ( $arr as $inarr ) {
$str .= $inarr [ $key ];
if ( $i < $carr )
$str .= $glue ;
$i ++ ;
return ( $str );
function notify ( $msg , $sev ) {
global $link , $tables , $iswin , $opts ;
echo ( 'NOTIFICAZIÒ: ' . strip_tags ( $msg ) . N );
$tab = 'Notifications' ;
if ( $iswin )
$tab = 'notifications' ;
if ( ! $opts [ 'dryrun' ]) mysqli_query ( $link , 'INSERT INTO Notifications (ID, Notification, Severity, Microtime, Seen, Deleted) VALUES (NULL, \'' . myesc ( $link , mb_substr ( $msg , 0 , $tables [ $tab ][ 'Notification' ], 'UTF-8' )) . '\', ' . $sev . ', \'' . microtime ( true ) . '\', 0, 0)' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
* Effettua una chiamata alla API di Mastodon .
* @ param string $host L ' host da chiamare ( e . g .: " mastodon.bida.im " )
* @ param string $path Il path della API ( e . g .: " /api/v1/timelines/public?local=true " )
* @ return mixed L ' oggetto ritornato dalla chiamata , già parsato da json_decode , o NULL se la chiamata fallisce
function get_api ( $host , $path ) {
global $opts ;
$buf = @ getfc ( 'https://' . $host . $path , $opts [ 'timeout' ]);
if ( $buf [ 'cont' ] !== false ) {
$data = json_decode ( $buf [ 'cont' ], true );
return $data ;
} else {
return NULL ;
* Torna un elenco di linguaggi riconosciuti nel toot fornito con relativa probabilità .
* @ param mixed $toot Il toot da analizzare , come ritornato dalle API
* @ return array Mappa tra codice lingua e probabilità che il toot sia in quella lingua .
function get_toot_languages ( $toot ) {
if ( is_array ( $toot ) && array_key_exists ( 'language' , $toot ))
$l = $toot [ 'language' ];
$l = NULL ;
if ( $l !== NULL ) {
// la lingua è specificata già nel toot: usa quella
$langs [ $l ] = 1 ;
} else {
// la lingua non è specificata: deducila
$text = strip_tags ( $toot [ 'content' ]);
$ld = new Language ;
$langs = $ld -> detect ( $text ) -> bestResults () -> close ();
// raggruppa le lingue derivate, e.g.: "zh" e "zh-CN"
$grouped_langs = array ();
foreach ( $langs as $key => $value ) {
$l = explode ( " - " , $key )[ 0 ];
if ( array_key_exists ( $l , $grouped_langs )) {
$grouped_langs [ $l ] = max ( $grouped_langs [ $l ], $value );
} else {
$grouped_langs [ $l ] = $value ;
return $grouped_langs ;
* Date le probabilità di lingua per ogni toot , calcola la media .
* @ param array $detected_langs Array di mappe tra lingua e probabilità
* @ return array Mappa tra lingua e probabilità
function summary ( $detected_langs ) {
$res = Array ();
foreach ( $detected_langs as $langs ) {
foreach ( $langs as $l => $weight ) {
if ( ! array_key_exists ( $l , $res )) {
$res [ $l ] = 0 ;
$res [ $l ] += $weight ;
foreach ( $res as $l => $sumweight ) {
$res [ $l ] = $sumweight / count ( $detected_langs );
return $res ;
* Helper function per usort : compara due array usando il primo elemento .
* @ param array $entry1 Primo array da comparare
* @ param array $entry2 Secondo array da comparare
* @ return number - 1 , 0 o 1 a seconda che $entry1 [ 0 ] sia minore , uguale o superiore a $entry2 [ 0 ]
function sort_weights ( $entry1 , $entry2 ) {
$w1 = $entry1 [ 0 ];
$w2 = $entry2 [ 0 ];
if ( $w1 < $w2 )
$ret = 1 ;
elseif ( $w1 == $w2 )
$ret = 0 ;
$ret =- 1 ;
return $ret ;
* Data una mappa di lingue , ritorna una lista di linguaggi considerati probabili .
* @ param array $summary Mappa tra lingue e probabilità
* @ return string [] Elenco di lingue considerate probabili
function get_languages ( $summary ) {
$lst = [];
foreach ( $summary as $code => $weight ) {
$lst [] = [ $weight , $code ];
usort ( $lst , 'sort_weights' );
$languages = [];
$lastweight = 0 ;
foreach ( $lst as $entry ) {
$l = $entry [ 1 ];
$weight = $entry [ 0 ];
if ( $weight < $lastweight * 2 / 3 ) {
break ;
$languages [] = $l ;
$lastweight = $weight ;
return $languages ;
* Ritorna una lista di lingue probabili per la data istanza .
* @ param string $host Hostname dell ' istanza ( e . g .: " mastodon.bida.im " )
* @ return string [] Lista di lingue probabili
function get_instance_langs ( $host ) {
global $opts ;
$data = get_api ( $host , '/api/v1/timelines/public?local=true&limit=' . $opts [ 'ldtoots' ]);
if ( $data == NULL ) {
return [];
$detected_langs = array_map ( 'get_toot_languages' , $data );
$summary = summary ( $detected_langs );
$languages = get_languages ( $summary );
return $languages ;
2020-10-21 15:26:31 +02:00
require ( __DIR__ . '/../site/mustard/include/mb_ucfirst.php' );
2020-10-13 08:21:26 +02:00
function langs ( $instid , $uri , $auto ) {
global $info , $instrow , $link , $opts ;
$retlangs = array ();
$languages = array ();
if ( $auto ) {
$languages = get_instance_langs ( $uri );
} elseif ( akeavinn ( 'languages' , $info )) {
$languages = $info [ 'languages' ];
if ( count ( $languages ) == 0 ) {
return ( $retlangs );
} else {
if ( $auto )
echo ( 'Lingue rilevate: ' . implode ( ', ' , $languages ) . N );
echo ( 'Lingue dichiarate: ' . implode ( ', ' , $languages ) . N );
$pos = 0 ;
foreach ( $languages as $lang ) {
$res = mysqli_query ( $link , 'SELECT * FROM Languages WHERE Code=\'' . myesc ( $link , $lang ) . '\'' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
if ( mysqli_num_rows ( $res ) < 1 ) {
$code = myesc ( $link , truncs ( $lang , 'Languages' , 'Code' , '«' . $instrow [ 'URI' ] . '»' ));
$NameOrig = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , $lang )), 'Languages' , 'NameOrig' , '«' . $instrow [ 'URI' ] . '»' ));
$NameCa = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , 'ca' )), 'Languages' , 'NameCA' , '«' . $instrow [ 'URI' ] . '»' ));
$NameEn = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , 'en' )), 'Languages' , 'NameEN' , '«' . $instrow [ 'URI' ] . '»' ));
$NameEs = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , 'es' )), 'Languages' , 'NameES' , '«' . $instrow [ 'URI' ] . '»' ));
$NameFr = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , 'fr' )), 'Languages' , 'NameFR' , '«' . $instrow [ 'URI' ] . '»' ));
$NameIt = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , 'it' )), 'Languages' , 'NameIT' , '«' . $instrow [ 'URI' ] . '»' ));
$q = 'INSERT INTO Languages (ID, Code, NameOrig, NameCA, NameEN, NameES, NameFR, NameIT) VALUES (NULL, \'' . $code . '\', \'' . $NameOrig . '\', \'' . $NameCa . '\', \'' . $NameEn . '\', \'' . $NameEs . '\', \'' . $NameFr . '\', \'' . $NameIt . '\')' ;
if ( ! $opts [ 'dryrun' ]) {
mysqli_query ( $link , $q ) or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
$langid = mysqli_insert_id ( $link );
} else {
$langid = 0 ;
} else {
$row = mysqli_fetch_assoc ( $res );
$langid = $row [ 'ID' ];
$pos ++ ;
$retlangs [] = array ( 'InstID' => $instid , 'LangID' => $langid , 'Pos' => $pos , 'Code' => $lang );
return ( $retlangs );
function varbdump ( $var ) {
ob_start ();
var_dump ( $var );
$content = ob_get_contents ();
ob_end_clean ();
return ( $content );
function mdasortbykey ( & $arr , $key , $rev = false ) {
$karr = array ();
foreach ( $arr as $akey => $subarr )
$karr [ $subarr [ $key ]] = array ( $akey , $subarr );
if ( ! $rev )
ksort ( $karr );
krsort ( $karr );
$arr = array ();
foreach ( $karr as $akey => $subarr )
$arr [ $subarr [ 0 ]] = $subarr [ 1 ];
2020-10-21 15:26:31 +02:00
require ( __DIR__ . '/../site/mustard/include/ghs.php' );
2020-10-13 08:21:26 +02:00
2020-10-21 15:26:31 +02:00
require ( __DIR__ . '/../site/mustard/include/ght.php' );
2020-10-13 08:21:26 +02:00
* Nodeinfo ( 'https://' . $dom . '/nodeinfo/2.0.json' ) è stato aggiunto nella 3.0 . 0
* Trends ( 'https://' . $dom . '/api/v1/trends' ) è stato aggiunto nella 3.0 . 0
* Activity ( 'https://' . $dom . '/api/v1/instance/activity' ) è stato aggiunto nella 2.1 . 2
if ( $opts [ 'jsonwrite' ]) {
if ( $riprendi )
$mode = array ( 'a' , 'aggiunta' );
$mode = array ( 'w' , 'scrittura' );
$jsonf =@ fopen ( $opts [ 'jsonfp' ], $mode [ 0 ])
or mexit ( 'Non ho potuto aprire in modalità ' . $mode [ 1 ] . ' il file di dump delle info json «' . $opts [ 'jsonfp' ] . '».' , 1 );
if ( $mode [ 0 ] == 'w' )
fwrite ( $jsonf , '{' . N );
$tini = time ();
$cinsts = count ( $insts );
$i = 0 ;
$qok = 0 ;
$qgood = 0 ;
if ( $riprendi ) {
$i = $currinst [ 'i' ];
$qok = $currinst [ 'qok' ];
$qgood = $currinst [ 'qgood' ];
$beg = $i ;
while ( $i < $cinsts ) {
$dom = $insts [ $i ];
@ file_put_contents ( $currinstjfp , $dom . " \t " . $i . " \t " . $qok . " \t " . $qgood . N )
or mexit ( 'Non ho potuto aprire in scrittura il file «' . $currinstjfp . '».' , 1 );
$i ++ ;
$ismast = null ;
$instans = true ;
$info = null ;
$tela = time () - $tini ;
echo ( '~~~~ ' . $dom . ' - ' . $i . '/' . $cinsts . '; ' . $qok . ' OK; ' . $qgood . ' BUONE; ' . round ( 100 / $cinsts * $i ) . '%; tempo trascorso: ' . ght ( $tela , null , 0 ) . '; stima tempo rimanente: ' . ght ( $tela / $i * ( $cinsts - $beg ) - $tela , null , 0 ) . '; mem.: ' . ghs ( memory_get_usage ( true )) . ' picco mem.: ' . ghs ( memory_get_peak_usage ( true )) . ' ~~~~' . N );
if ( willtrunc ( $dom , 'Instances' , 'URI' )) {
echo ( 'ATTENZIONE: la lunghezza di «' . $dom . '» eccede quella del campo URI della tabella Instances, perciò lo ignoro.' );
} else {
echo ( 'Provo a recuperare le informazioni API sull’ istanza ... ' );
$buf =@ getfc ( 'https://' . $dom . '/api/v1/instance' , $opts [ 'timeout' ]);
if ( $buf [ 'cont' ] !== false ) {
$info = json_decode ( $buf [ 'cont' ], true );
if ( is_array ( $info )) {
echo ( 'OK :-)' . N );
echo ( 'Provo a recuperare le informazioni Nodeinfo sull’ istanza ... ' );
$buf =@ getfc ( 'https://' . $dom . '/nodeinfo/2.0.json' , $opts [ 'timeout' ]);
if ( $buf [ 'cont' ] !== false ) {
echo ( 'OK :-)' . N );
$info [ 'x-nodeinfo' ] = json_decode ( $buf [ 'cont' ], true );
// teniamo d'occhio le notifiche di cui sotto per includere eventualmente altri derivati di mastodon?
// visti fin qui, verificare cosa sono: epicyon
if ( isset ( $info [ 'x-nodeinfo' ][ 'software' ][ 'name' ]) && ! is_null ( $info [ 'x-nodeinfo' ][ 'software' ][ 'name' ])) {
if ( preg_match ( '/^mastodon|corgidon/' , $info [ 'x-nodeinfo' ][ 'software' ][ 'name' ]) === 1 )
$ismast = true ;
$res = mysqli_query ( $link , 'SELECT Name FROM Platforms WHERE Name=\'' . myesc ( $link , $info [ 'x-nodeinfo' ][ 'software' ][ 'name' ]) . '\'' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
if ( mysqli_num_rows ( $res ) < 1 ) {
if ( ! $opts [ 'dryrun' ]) mysqli_query ( $link , 'INSERT INTO Platforms (Name) VALUES (\'' . myesc ( $link , truncs ( $info [ 'x-nodeinfo' ][ 'software' ][ 'name' ], 'Platforms' , 'Name' , '«' . $info [ 'uri' ] . '»' )) . '\')' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
notify ( '«' . $info [ 'uri' ] . '» utilizza come software «' . $info [ 'x-nodeinfo' ][ 'software' ][ 'name' ] . '»; l’ ho aggiunto alla tabella delle piattaforme incontrate. Se non si tratta di mastodon o corgidon, che già vengono accettati, sarebbe buona cosa verificare se è una variante di mastodon e quanto è compatibile, per valutare se accettare le istanze che lo utilizzano.' , 1 );
} else {
echo ( 'ERRORE: ' . $buf [ 'emsg' ] . N );
if ( array_key_exists ( 'version' , $info )) {
if ( $info [ 'version' ] >= '2.1.2' ) {
echo ( 'Provo a recuperare le informazioni API sull’ attività dell’ istanza ... ' );
$buf =@ getfc ( 'https://' . $dom . '/api/v1/instance/activity' , $opts [ 'timeout' ]);
if ( $buf [ 'cont' ] !== false ) {
echo ( 'OK :-)' . N );
$info [ 'x-activity' ] = json_decode ( $buf [ 'cont' ], true );
} else {
echo ( 'ERRORE: ' . $buf [ 'emsg' ] . N );
if ( $info [ 'version' ] >= '3.0.0' ) {
echo ( 'Provo a recuperare le informazioni API sui trends dell’ istanza ... ' );
$buf =@ getfc ( 'https://' . $dom . '/api/v1/trends' , $opts [ 'timeout' ]);
if ( $buf [ 'cont' ] !== false ) {
echo ( 'OK :-)' . N );
$info [ 'x-trends' ] = json_decode ( $buf [ 'cont' ], true );
} else {
echo ( 'ERRORE: ' . $buf [ 'emsg' ] . N );
} else {
$instans = false ;
echo ( 'ERRORE: i dati recuperati non erano un array' . N );
} else {
$instans = false ;
echo ( 'ERRORE: ' . $buf [ 'emsg' ] . N );
if ( ! isset ( $info [ 'uri' ]) || preg_match ( '#^\s*$#' , $info [ 'uri' ]) === 1 )
$instans = false ;
if ( is_array ( $info ) && count ( $info ) > 0 ) {
//echo('Dumpone json di tutte le info recuperate:'.N.json_encode($info,JSON_PRETTY_PRINT).N);
if ( $opts [ 'jsonwrite' ])
fwrite ( $jsonf , '"' . $dom . '": ' . json_encode ( $info , JSON_PRETTY_PRINT ) . ',' . N );
2020-10-14 08:37:41 +02:00
$now = time ();
2020-10-13 08:21:26 +02:00
if ( ! $instans ) {
// questo è il limbo delle istanze che non rispondono
$res = mysqli_query ( $link , 'SELECT * FROM Instances WHERE URI=\'' . myesc ( $link , $dom ) . '\'' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
if ( mysqli_num_rows ( $res ) > 0 ) {
echo ( '«' . $dom . '» non risponde, ma è presente nel database; aggiorno InstChecks, Instances.LastCheckOk ed eventualmente Instances.Dead.' . N );
$row = mysqli_fetch_assoc ( $res );
if ( ! $opts [ 'dryrun' ]) mysqli_query ( $link , 'INSERT INTO InstChecks (InstID, Time, Status) VALUES (' . $row [ 'ID' ] . ', ' . $now . ', 0)' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
if ( ! $opts [ 'dryrun' ]) mysqli_query ( $link , 'UPDATE Instances SET LastCheckOk=0 WHERE ID=' . $row [ 'ID' ])
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
// vediamo se ha mai risposto e nel caso ritorniamo per primo il momento dell'ultima risposta
$rres = mysqli_query ( $link , 'SELECT Time FROM InstChecks WHERE InstID=' . $row [ 'ID' ] . ' AND Status=1 ORDER BY Time DESC' ) or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
// se non ha mai risposto ritorniamo per primo il momento del primo check
if ( mysqli_num_rows ( $rres ) == 0 ) {
$rres = mysqli_query ( $link , 'SELECT Time FROM InstChecks WHERE InstID=' . $row [ 'ID' ] . ' AND Status=0 ORDER BY Time ASC' ) or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
if ( mysqli_num_rows ( $rres ) > 0 ) {
$rrow = mysqli_fetch_assoc ( $rres );
if ( $now - $rrow [ 'Time' ] > $opts [ 'deadline' ]) {
if ( ! $opts [ 'dryrun' ]) mysqli_query ( $link , 'UPDATE Instances SET Dead=1 WHERE ID=' . $row [ 'ID' ])
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
notify ( 'L’ istanza «<a href="viewinst.php?id=' . $row [ 'ID' ] . '">' . $row [ 'URI' ] . '</a>» è MORTA!' , 0 );
} else {
echo ( 'PAZZESCO! «' . $dom . '» esiste nel database ma non ci sono dati relativi in InstChecks! Rimedio.' . N );
if ( ! $opts [ 'dryrun' ]) mysqli_query ( $link , 'INSERT INTO InstChecks SET InstID=' . $row [ 'ID' ] . ', Time=' . $now . ', Status=0' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
} else {
echo ( '«' . $dom . '» non risponde e non è nel database, la aggiungo.' . N );
( array_key_exists ( $dom , $blacklist )) ? $black = '1' : $black = '0' ;
// "New=0" e nessun FirstSeen (quindi NULL) perché non è nuova e non è vista per la prima volta finché non risponde la prima volta
if ( ! $opts [ 'dryrun' ]) {
mysqli_query ( $link , 'INSERT INTO Instances SET New=0, Good=0, Chosen=0, Visible=0, Blacklisted=' . $black . ', URI=\'' . myesc ( $link , $dom ) . '\', LastCheckOk=0' ) or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
$instid = mysqli_insert_id ( $link );
mysqli_query ( $link , 'INSERT INTO InstChecks SET InstID=' . $instid . ', Time=' . $now . ', Status=0' ) or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
} else {
$instid = 0 ;
} else {
// l'istanza ha risposto occhei...
if ( is_null ( $ismast )) {
if ( ! array_key_exists ( 'version' , $info )) {
$ismast = null ; // ridondante, ma tanto per metterci qualcosa
} elseif ( array_key_exists ( 'pleroma' , $info )) {
$ismast = false ;
} elseif ( preg_match ( '#(compatible|pleroma|pixelfed)#i' , $info [ 'version' ]) == 1 ) {
$ismast = false ;
} elseif ( preg_match ( '#^[0-9]+\.[0-9]+\.[0-9]+#' , $info [ 'version' ]) !== 1 ) {
$ismast = false ;
} else {
$ismast = true ;
$qok ++ ;
if ( ! is_null ( $ismast ))
( $ismast ) ? $ismast = 1 : $ismast = 0 ;
$instrow = array ( 'ID' => null , 'FirstSeen' => null , 'IsMastodon' => $ismast , 'Dead' => 0 , 'New' => 0 , 'Good' => 0 , 'Chosen' => 0 , 'Priority' => null , 'Visible' => 0 , 'Blacklisted' => 0 , 'URI' => null , 'Title' => null , 'ShortDesc' => null , 'LongDesc' => null , 'OurDesc' => null , 'OurDescEN' => null , 'LocalityID' => null , 'OurLangsLock' => 0 , 'Email' => null , 'Software' => null , 'Version' => null , 'UserCount' => null , 'StatusCount' => null , 'DomainCount' => null , 'ActiveUsersMonth' => null , 'ActiveUsersHalfYear' => null , 'Thumb' => null , 'RegOpen' => null , 'RegReqApproval' => null , 'MaxTootChars' => null , 'AdmAccount' => null , 'AdmDisplayName' => null , 'AdmCreatedAt' => null , 'AdmNote' => null , 'AdmURL' => null , 'AdmAvatar' => null , 'AdmHeader' => null , 'LastCheckOk' => 1 , 'GuestID' => null , 'LastGuestEdit' => null );
if ( array_key_exists ( $info [ 'uri' ], $blacklist ))
$instrow [ 'Blacklisted' ] = 1 ;
$instrow [ 'URI' ] = $info [ 'uri' ];
if ( akeavinn ( 'title' , $info ))
$instrow [ 'Title' ] = nempty ( truncs ( $info [ 'title' ], 'Instances' , 'Title' , '«' . $instrow [ 'URI' ] . '»' ));
if ( akeavinn ( 'short_description' , $info ))
$instrow [ 'ShortDesc' ] = nempty ( truncs ( $info [ 'short_description' ], 'Instances' , 'ShortDesc' , '«' . $instrow [ 'URI' ] . '»' ));
if ( akeavinn ( 'description' , $info ))
$instrow [ 'LongDesc' ] = nempty ( truncs ( $info [ 'description' ], 'Instances' , 'LongDesc' , '«' . $instrow [ 'URI' ] . '»' ));
if ( akeavinn ( 'email' , $info ))
$instrow [ 'Email' ] = nempty ( truncs ( $info [ 'email' ], 'Instances' , 'Email' , '«' . $instrow [ 'URI' ] . '»' ));
if ( akeavinn ( 'version' , $info ))
$instrow [ 'Version' ] = nempty ( truncs ( $info [ 'version' ], 'Instances' , 'Version' , '«' . $instrow [ 'URI' ] . '»' ));
if ( akeavinn ( 'stats' , $info )) {
if ( akeavinn ( 'user_count' , $info [ 'stats' ]))
$instrow [ 'UserCount' ] = truncn ( $info [ 'stats' ][ 'user_count' ], 'Instances' , 'UserCount' , '«' . $instrow [ 'URI' ] . '»' );
if ( akeavinn ( 'status_count' , $info [ 'stats' ]))
$instrow [ 'StatusCount' ] = truncn ( $info [ 'stats' ][ 'status_count' ], 'Instances' , 'StatusCount' , '«' . $instrow [ 'URI' ] . '»' );
if ( akeavinn ( 'domain_count' , $info [ 'stats' ]))
$instrow [ 'DomainCount' ] = truncn ( $info [ 'stats' ][ 'domain_count' ], 'Instances' , 'DomainCount' , '«' . $instrow [ 'URI' ] . '»' );
if ( akeavinn ( 'thumbnail' , $info ))
$instrow [ 'Thumb' ] = nempty ( truncs ( $info [ 'thumbnail' ], 'Instances' , 'Thumb' , '«' . $instrow [ 'URI' ] . '»' ));
if ( akeavinn ( 'max_toot_chars' , $info ))
$instrow [ 'MaxTootChars' ] = truncn ( $info [ 'max_toot_chars' ], 'Instances' , 'MaxTootChars' , '«' . $instrow [ 'URI' ] . '»' );
if ( akeavinn ( 'registrations' , $info ))
$instrow [ 'RegOpen' ] = b2i ( $info [ 'registrations' ], 'Istanza «' . $instrow [ 'URI' ] . '»: ' );
if ( akeavinn ( 'approval_required' , $info ))
$instrow [ 'RegReqApproval' ] = b2i ( $info [ 'approval_required' ], 'Istanza «' . $instrow [ 'URI' ] . '»: ' );
if ( akeavinn ( 'contact_account' , $info )) {
if ( akeavinn ( 'acct' , $info [ 'contact_account' ]))
$instrow [ 'AdmAccount' ] = nempty ( truncs ( $info [ 'contact_account' ][ 'acct' ], 'Instances' , 'AdmAccount' , '«' . $instrow [ 'URI' ] . '»' ));
if ( akeavinn ( 'display_name' , $info [ 'contact_account' ]))
$instrow [ 'AdmDisplayName' ] = nempty ( truncs ( $info [ 'contact_account' ][ 'display_name' ], 'Instances' , 'AdmDisplayName' , '«' . $instrow [ 'URI' ] . '»' ));
if ( akeavinn ( 'created_at' , $info [ 'contact_account' ]))
$instrow [ 'AdmCreatedAt' ] = pgdatetomy ( $info [ 'contact_account' ][ 'created_at' ]);
if ( akeavinn ( 'note' , $info [ 'contact_account' ]))
2020-10-15 19:24:45 +02:00
$instrow [ 'AdmNote' ] = nempty ( truncs ( $info [ 'contact_account' ][ 'note' ], 'Instances' , 'AdmNote' , '«' . $instrow [ 'URI' ] . '»' ));
2020-10-13 08:21:26 +02:00
if ( akeavinn ( 'url' , $info [ 'contact_account' ]))
$instrow [ 'AdmURL' ] = nempty ( truncs ( $info [ 'contact_account' ][ 'url' ], 'Instances' , 'AdmURL' , '«' . $instrow [ 'URI' ] . '»' ));
if ( akeavinn ( 'avatar' , $info [ 'contact_account' ]))
$instrow [ 'AdmAvatar' ] = nempty ( truncs ( $info [ 'contact_account' ][ 'avatar' ], 'Instances' , 'AdmAvatar' , '«' . $instrow [ 'URI' ] . '»' ));
if ( akeavinn ( 'header' , $info [ 'contact_account' ]))
$instrow [ 'AdmHeader' ] = nempty ( truncs ( $info [ 'contact_account' ][ 'header' ], 'Instances' , 'AdmHeader' , '«' . $instrow [ 'URI' ] . '»' ));
if ( akeavinn ( 'x-nodeinfo' , $info )) {
if ( akeavinn ( 'software' , $info [ 'x-nodeinfo' ]) && akeavinn ( 'name' , $info [ 'x-nodeinfo' ][ 'software' ]))
$instrow [ 'Software' ] = nempty ( truncs ( $info [ 'x-nodeinfo' ][ 'software' ][ 'name' ], 'Instances' , 'Software' , '«' . $instrow [ 'URI' ] . '»' ));
if ( akeavinn ( 'usage' , $info [ 'x-nodeinfo' ]) && akeavinn ( 'users' , $info [ 'x-nodeinfo' ][ 'usage' ])) {
if ( akeavinn ( 'activeMonth' , $info [ 'x-nodeinfo' ][ 'usage' ][ 'users' ]))
$instrow [ 'ActiveUsersMonth' ] = truncn ( $info [ 'x-nodeinfo' ][ 'usage' ][ 'users' ][ 'activeMonth' ], 'Instances' , 'ActiveUsersMonth' , '«' . $instrow [ 'URI' ] . '»' );
if ( akeavinn ( 'activeHalfyear' , $info [ 'x-nodeinfo' ][ 'usage' ][ 'users' ]))
$instrow [ 'ActiveUsersHalfYear' ] = truncn ( $info [ 'x-nodeinfo' ][ 'usage' ][ 'users' ][ 'activeHalfyear' ], 'Instances' , 'ActiveUsersHalfYear' , '«' . $instrow [ 'URI' ] . '»' );
$whynot = array ();
if ( $instrow [ 'Blacklisted' ] == 1 )
$whynot [] = 'è nella blacklist' ;
if ( is_null ( $instrow [ 'RegOpen' ])) {
$whynot [] = 'non se ne conosce lo stato delle registrazioni (aperte/chiuse)' ;
} elseif ( $instrow [ 'RegOpen' ] == 0 ) {
$whynot [] = 'ha le registrazioni chiuse' ;
if ( is_null ( $instrow [ 'UserCount' ])) {
$whynot [] = 'non se ne conosce il numero di utenti' ;
} elseif ( $instrow [ 'UserCount' ] < 10 || $instrow [ 'UserCount' ] > 30000 ) {
$whynot [] = 'il numero di utenti non è compreso tra 10 e 30.000' ;
if ( is_null ( $instrow [ 'DomainCount' ])) {
$whynot [] = 'non se ne conosce il numero di istanze note' ;
} elseif ( $instrow [ 'DomainCount' ] < 500 ) {
$whynot [] = 'il numero di istanze note è minore di 500' ;
if ( ! is_null ( $instrow [ 'ActiveUsersMonth' ])) {
if ( $instrow [ 'ActiveUsersMonth' ] < 10 )
$whynot [] = 'il numero di utenti attivi nell’ ultimo mese è minore di 10' ;
2020-10-22 17:54:05 +02:00
} elseif ( ! is_null ( $instrow [ 'StatusCount' ]) && $instrow [ 'UserCount' ] > 0 && $instrow [ 'StatusCount' ] / $instrow [ 'UserCount' ] < 10 ) {
2020-10-13 08:21:26 +02:00
$whynot [] = 'il numero medio di toots per utente è minore di 10' ;
} else {
$whynot [] = 'è stato impossibile determinare il numero di utenti attivi nell’ ultimo mese o il numero medio di toots per utente' ;
if ( count ( $whynot ) == 0 ) {
$instrow [ 'Good' ] = 1 ;
echo ( 'Siamo in presenza di un’ istanza BUONA! :-)' . N );
$qgood ++ ;
} else {
echo ( 'Siamo in presenza di un’ istanza CATTIVA: ' . implode ( '; ' , $whynot ) . ' :-(' . N );
$res = mysqli_query ( $link , 'SELECT * FROM Instances WHERE URI=\'' . myesc ( $link , $instrow [ 'URI' ]) . '\'' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
if ( mysqli_num_rows ( $res ) > 0 ) {
echo ( '«' . $instrow [ 'URI' ] . '» è già presente nel DB, la aggiorno...' . N );
$oldinstrow = mysqli_fetch_assoc ( $res );
$instid = $oldinstrow [ 'ID' ];
$instrow [ 'ID' ] = $oldinstrow [ 'ID' ];
// se l'istanza già presente nel db ha FirstSeen=NULL significa che è stata aggiunta senza che rispondesse e che questa è la prima volta che risponde, quindi...
if ( is_null ( $oldinstrow [ 'FirstSeen' ])) {
$instrow [ 'FirstSeen' ] = time ();
$instrow [ 'New' ] = 1 ;
} else {
$instrow [ 'FirstSeen' ] = $oldinstrow [ 'FirstSeen' ];
$instrow [ 'New' ] = $oldinstrow [ 'New' ];
if ( $instrow [ 'Good' ] == 1 && $oldinstrow [ 'Good' ] == 0 ) {
notify ( 'L’ istanza «<a href="viewinst.php?id=' . $instrow [ 'ID' ] . '">' . $instrow [ 'URI' ] . '</a>» non era papabile, ma lo è diventata!' , 1 );
} elseif ( $instrow [ 'Good' ] == 0 && $oldinstrow [ 'Good' ] == 1 ) {
notify ( 'L’ istanza «<a href="viewinst.php?id=' . $instrow [ 'ID' ] . '">' . $instrow [ 'URI' ] . '</a>» era papabile, ma non lo è più per i seguenti motivi: ' . implode ( '; ' , $whynot ), 3 );
$instrow [ 'Chosen' ] = $oldinstrow [ 'Chosen' ];
$instrow [ 'Priority' ] = $oldinstrow [ 'Priority' ];
$instrow [ 'Visible' ] = $oldinstrow [ 'Visible' ];
if ( $instrow [ 'ShortDesc' ] != $oldinstrow [ 'ShortDesc' ])
notify ( 'La «Descrizione breve» dell’ istanza «<a href="viewinst.php?id=' . $instrow [ 'ID' ] . '">' . $instrow [ 'URI' ] . '</a>» è cambiata.' , 2 );
if ( $instrow [ 'LongDesc' ] != $oldinstrow [ 'LongDesc' ])
notify ( 'La «Descrizione lunga» dell’ istanza «<a href="viewinst.php?id=' . $instrow [ 'ID' ] . '">' . $instrow [ 'URI' ] . '</a>» è cambiata.' , 2 );
$instrow [ 'OurDesc' ] = $oldinstrow [ 'OurDesc' ];
$instrow [ 'OurDescEN' ] = $oldinstrow [ 'OurDescEN' ];
$instrow [ 'LocalityID' ] = $oldinstrow [ 'LocalityID' ];
$instrow [ 'OurLangsLock' ] = $oldinstrow [ 'OurLangsLock' ];
$instrow [ 'GuestID' ] = $oldinstrow [ 'GuestID' ];
$instrow [ 'LastGuestEdit' ] = $oldinstrow [ 'LastGuestEdit' ];
$query = 'UPDATE Instances SET ' ;
foreach ( $instrow as $field => $value ) {
if ( ! is_null ( $value ))
$query .= $field . '=\'' . myesc ( $link , $value ) . '\', ' ;
$query .= $field . '=NULL, ' ;
$query = substr ( $query , 0 , - 2 ) . ' WHERE Instances.ID=' . $instrow [ 'ID' ];
echo ( 'QUERONA DI UPDATE: «' . $query . '».' . N );
if ( ! $opts [ 'dryrun' ]) mysqli_query ( $link , $query )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
$res = mysqli_query ( $link , 'SELECT InstID, LangID, Pos, Code FROM InstLangs LEFT JOIN Languages ON Languages.ID=LangID WHERE InstID=' . $instrow [ 'ID' ] . ' ORDER BY Pos ASC' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
$oldinstlangs = array ();
while ( $row = mysqli_fetch_assoc ( $res ))
$oldinstlangs [] = $row ;
$instlangs = langs ( $instrow [ 'ID' ], $instrow [ 'URI' ], false );
if ( $instlangs != $oldinstlangs ) {
notify ( 'La lista delle lingue utilizzate dichiarate dall’ istanza «<a href="viewinst.php?id=' . $instrow [ 'ID' ] . '">' . $instrow [ 'URI' ] . '</a>» è cambiata da «' . subarimp ( ', ' , 'Code' , $oldinstlangs ) . '» a «' . subarimp ( ', ' , 'Code' , $instlangs ) . '».' , 2 );
if ( ! $opts [ 'dryrun' ]) mysqli_query ( $link , 'DELETE FROM InstLangs WHERE InstID=' . $instrow [ 'ID' ])
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
foreach ( $instlangs as $row ) {
if ( ! $opts [ 'dryrun' ]) mysqli_query ( $link , 'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES (' . $row [ 'InstID' ] . ', ' . $row [ 'LangID' ] . ', ' . $row [ 'Pos' ] . ')' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
if ( $instrow [ 'OurLangsLock' ] == 0 ) {
$instourlangs = langs ( $instrow [ 'ID' ], $instrow [ 'URI' ], true );
// se instourlangs è vuoto e instlangs no, imposta instourlangs come instlangs
if ( count ( $instourlangs ) == 0 && count ( $instlangs ) > 0 )
$instourlangs = $instlangs ;
if ( count ( $instourlangs ) > 0 ) {
if ( ! $opts [ 'dryrun' ]) mysqli_query ( $link , 'DELETE FROM InstOurLangs WHERE InstID=' . $instrow [ 'ID' ])
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
foreach ( $instourlangs as $row ) {
if ( ! $opts [ 'dryrun' ]) mysqli_query ( $link , 'INSERT INTO InstOurLangs (InstID, OurLangID, Pos) VALUES (' . $row [ 'InstID' ] . ', ' . $row [ 'LangID' ] . ', ' . $row [ 'Pos' ] . ')' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
} else {
echo ( '«' . $info [ 'uri' ] . '» non è già presente nel DB, la aggiungo...' . N );
$instrow [ 'FirstSeen' ] = $now ;
if ( $opts [ 'setnew' ])
$instrow [ 'New' ] = 1 ;
$fields = array ();
$values = '' ;
foreach ( $instrow as $field => $value ) {
$fields [] = $field ;
if ( ! is_null ( $value ))
$values .= '\'' . myesc ( $link , $value ) . '\', ' ;
$values .= 'NULL, ' ;
$values = substr ( $values , 0 , - 2 );
$query = 'INSERT INTO Instances (' . implode ( ', ' , $fields ) . ') VALUES (' . $values . ')' ;
echo ( 'QUERONA DI INSERT: «' . $query . '»' . N );
if ( ! $opts [ 'dryrun' ]) {
mysqli_query ( $link , $query ) or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
$instid = mysqli_insert_id ( $link );
} else {
$instid = 0 ;
if ( $opts [ 'setnew' ])
notify ( 'Ho trovato una nuova istanza: «<a href="viewinst.php?id=' . $instid . '">' . $instrow [ 'URI' ] . '</a>».' , 1 );
$instlangs = langs ( $instid , $instrow [ 'URI' ], false );
foreach ( $instlangs as $row ) {
if ( ! $opts [ 'dryrun' ]) mysqli_query ( $link , 'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES (' . $row [ 'InstID' ] . ', ' . $row [ 'LangID' ] . ', ' . $row [ 'Pos' ] . ')' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
$instourlangs = langs ( $instid , $instrow [ 'URI' ], true );
// se instourlangs è vuoto e instlangs no, imposta instourlangs come instlangs
if ( count ( $instourlangs ) == 0 && count ( $instlangs ) > 0 )
$instourlangs = $instlangs ;
foreach ( $instourlangs as $row ) {
if ( ! $opts [ 'dryrun' ]) mysqli_query ( $link , 'INSERT INTO InstOurLangs (InstID, OurLangID, Pos) VALUES (' . $row [ 'InstID' ] . ', ' . $row [ 'LangID' ] . ', ' . $row [ 'Pos' ] . ')' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
if ( $instrow [ 'Good' ] == 1 )
notify ( 'La nuova istanza «<a href="viewinst.php?id=' . $instid . '">' . $instrow [ 'URI' ] . '</a>» è papabile!' , 1 );
if ( array_key_exists ( 'x-activity' , $info ) && is_array ( $info [ 'x-activity' ])) {
if ( ! $opts [ 'dryrun' ]) mysqli_query ( $link , 'DELETE FROM InstActivity WHERE InstID=' . $instid );
$pos = 0 ;
foreach ( $info [ 'x-activity' ] as $buf ) {
if ( akeavinn ( 'week' , $buf ) && akeavinn ( 'statuses' , $buf ) && akeavinn ( 'logins' , $buf ) && akeavinn ( 'registrations' , $buf )) {
$pos ++ ;
$query = 'INSERT INTO InstActivity (InstID, Week, Statuses, Logins, Registrations, Pos) VALUES (\'' . $instid . '\', \'' . myesc ( $link , $buf [ 'week' ]) . '\', \'' . myesc ( $link , $buf [ 'statuses' ]) . '\', \'' . myesc ( $link , $buf [ 'logins' ]) . '\', \'' . myesc ( $link , $buf [ 'registrations' ]) . '\', ' . $pos . ')' ;
if ( ! $opts [ 'dryrun' ]) mysqli_query ( $link , $query )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
if ( array_key_exists ( 'x-trends' , $info ) && is_array ( $info [ 'x-trends' ])) {
$trends = array ();
foreach ( $info [ 'x-trends' ] as $buf ) {
if ( akeavinn ( 'name' , $buf ) && akeavinn ( 'url' , $buf ) && akeavinn ( 'history' , $buf ) && is_array ( $buf [ 'history' ])) {
$trend = 0 ;
foreach ( $buf [ 'history' ] as $row ) {
if ( $row [ 'uses' ] > 0 )
$trend += ( $row [ 'accounts' ] / $row [ 'uses' ]);
$trends [] = array (
'InstID' => $instid ,
'LastDay' => $buf [ 'history' ][ 0 ][ 'day' ],
'Name' => $buf [ 'name' ],
'URL' => $buf [ 'url' ],
'Pos' => null ,
'trend' => $trend
mdasortbykey ( $trends , 'trend' , true );
if ( ! $opts [ 'dryrun' ]) mysqli_query ( $link , 'DELETE FROM InstTrends WHERE InstID=' . $instid );
$pos = 0 ;
foreach ( $trends as $trend ) {
$pos ++ ;
$query = 'INSERT INTO InstTrends (InstID, LastDay, Name, URL, Pos) VALUES (' . $trend [ 'InstID' ] . ', \'' . $trend [ 'LastDay' ] . '\', \'' . myesc ( $link , truncs ( $trend [ 'Name' ], 'InstTrends' , 'Name' , '«' . $instrow [ 'URI' ] . '»' )) . '\', \'' . myesc ( $link , truncs ( $trend [ 'URL' ], 'InstTrends' , 'URL' , '«' . $instrow [ 'URI' ] . '»' )) . '\', ' . $pos . ')' ;
if ( ! $opts [ 'dryrun' ]) mysqli_query ( $link , $query )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
if ( ! $opts [ 'dryrun' ]) mysqli_query ( $link , 'INSERT INTO InstChecks (InstID, Time, Status) VALUES (' . $instid . ', ' . $now . ', 1)' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
echo ( N );
mysqli_close ( $link );
if ( $opts [ 'jsonwrite' ]) {
fwrite ( $jsonf , '"Fine?": true' . N . '}' . N );
fclose ( $jsonf );
unlink ( $instsjfp );
unlink ( $currinstjfp );
2020-10-14 08:37:41 +02:00
unlink ( $lockfp );
2020-10-13 08:21:26 +02:00
exit ( 0 );