2020-05-08 18:03:11 +02:00
#!/usr/bin/php
2019-12-01 09:07:45 +01:00
< ? php
/*
This program is free software : you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation , either version 3 of the License , or
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with this program . If not , see < http :// www . gnu . org / licenses />.
*/
2020-03-24 23:22:07 +01:00
define ( 'N' , " \n " );
2020-03-11 13:57:20 +01:00
require __DIR__ . " /../../vendor/autoload.php " ;
use LanguageDetection\Language ;
2020-03-11 21:31:22 +01:00
if ( strtoupper ( substr ( PHP_OS , 0 , 3 )) === 'WIN' )
$iswin = true ;
else
$iswin = false ;
2019-12-28 22:51:41 +01:00
$link = false ;
$logf = false ;
$jsonf = false ;
2019-12-26 21:57:36 +01:00
declare ( ticks = 1 );
2020-04-03 10:59:02 +02:00
if ( function_exists ( 'pcntl_signal' )) {
2020-03-11 13:57:20 +01:00
function signalHandler ( $signal ) {
global $link , $logf , $jsonf ;
lecho ( N . 'Sono stato interrotto.' . N );
if ( $link ) {
lecho ( 'La connessione MySQL è aperta, la chiudo.' . N );
mysqli_close ( $link );
}
if ( $jsonf ) {
lecho ( 'Il file di dump json è aperto, lo chiudo.' . N );
// qui no, altrimenti "riprendi" fa poi casino
// fwrite($jsonf,'"Fine?": true'.N.'}'.N);
fclose ( $jsonf );
}
if ( $logf ) {
lecho ( 'Il file di log è aperto, lo chiudo.' . N );
fclose ( $logf );
}
exit ( 2 );
2019-12-26 21:57:36 +01:00
}
2020-04-03 10:59:02 +02:00
pcntl_signal ( SIGTERM , 'signalHandler' ); // Termination ('kill' was called)
pcntl_signal ( SIGHUP , 'signalHandler' ); // Terminal log-out
pcntl_signal ( SIGINT , 'signalHandler' ); // Interrupted (Ctrl-C is pressed)
2019-12-26 21:57:36 +01:00
}
2019-12-06 08:28:36 +01:00
$opts = array (
2019-12-26 21:57:36 +01:00
'timeout' => 3 ,
2020-05-02 19:59:53 +02:00
'log' => false ,
2019-12-30 22:51:32 +01:00
'jsonfp' => __DIR__ . '/instances.json' ,
2020-05-02 19:59:53 +02:00
'jsonwrite' => false ,
'peersfp' => 'peers'
2019-12-06 08:28:36 +01:00
);
2019-12-01 09:07:45 +01:00
2019-12-26 21:57:36 +01:00
use function mysqli_real_escape_string as myesc ;
2019-12-01 09:07:45 +01:00
2019-12-06 08:28:36 +01:00
function tosec ( $str ) {
if ( preg_match ( '/^([0-9]+)([smogSMA]?)/' , $str , $buf ) === 1 ) {
switch ( $buf [ 2 ]) {
case '' :
case 's' :
return ( $buf [ 1 ]);
break ;
case 'm' :
return ( $buf [ 1 ] * 60 );
break ;
case 'o' :
return ( $buf [ 1 ] * 60 * 60 );
break ;
case 'g' :
return ( $buf [ 1 ] * 60 * 60 * 24 );
break ;
case 'S' :
return ( $buf [ 1 ] * 60 * 60 * 24 * 7 );
break ;
case 'M' :
2019-12-06 14:49:34 +01:00
return ( $buf [ 1 ] * 60 * 60 * 24 * 30 );
2019-12-06 08:28:36 +01:00
break ;
case 'A' :
2019-12-06 14:49:34 +01:00
return ( $buf [ 1 ] * 60 * 60 * 24 * 365 );
2019-12-06 08:28:36 +01:00
break ;
}
} else {
return ( false );
}
}
2019-12-01 09:07:45 +01:00
2019-12-28 22:51:41 +01:00
function mexit ( $msg , $code ) {
global $link , $jsonf , $logf ;
2019-12-26 21:57:36 +01:00
lecho ( $msg );
2019-12-28 22:51:41 +01:00
if ( $link )
2019-12-26 21:57:36 +01:00
mysqli_close ( $link );
2019-12-28 22:51:41 +01:00
if ( $jsonf )
fclose ( $jsonf );
2019-12-26 21:57:36 +01:00
if ( $logf )
fclose ( $logf );
exit ( $code );
2019-12-01 09:07:45 +01:00
}
2019-12-26 21:57:36 +01:00
function lecho ( $msg , $logonly = false ) {
global $opts , $logf ;
if ( ! $logonly )
echo ( $msg );
if ( $opts [ 'log' ])
fwrite ( $logf , $msg );
2019-12-01 09:07:45 +01:00
}
2019-12-30 22:51:32 +01:00
$instsjfp = __DIR__ . '/instances.job' ;
$currinstjfp = __DIR__ . '/currinst.job' ;
if ( file_exists ( $currinstjfp ) && file_exists ( $instsjfp )) {
$riprendi = true ;
} else {
$riprendi = false ;
}
$logfp = __DIR__ . '/crawler.log' ;
2019-12-26 21:57:36 +01:00
if ( $opts [ 'log' ]) {
2019-12-30 22:51:32 +01:00
if ( $riprendi )
$mode = array ( 'a' , 'aggiunta' );
else
$mode = array ( 'w' , 'scrittura' );
$logf =@ fopen ( $logfp , $mode [ 0 ]);
if ( $logf === false ) {
echo ( 'Non ho potuto aprire in modalità ' . $mode [ 1 ] . ' il file di log «' . $logfp . '».' . N );
exit ( 1 );
}
2019-12-01 09:07:45 +01:00
}
2020-05-08 17:38:54 +02:00
$inifp = __DIR__ . '/../sec/mustard.ini' ;
2019-12-30 22:51:32 +01:00
$iniarr =@ parse_ini_file ( $inifp )
2019-12-26 21:57:36 +01:00
or mexit ( 'Impossibile aprire il file di configurazione «' . $inifp . '»' . N , 1 );
2020-01-02 13:19:38 +01:00
$link =@ mysqli_connect ( $iniarr [ 'db_host' ], $iniarr [ 'db_admin_name' ], $iniarr [ 'db_admin_password' ], $iniarr [ 'db_name' ], $iniarr [ 'db_port' ], $iniarr [ 'db_socket' ])
or mexit ( 'Impossibile connettersi al server MySQL: ' . mysqli_connect_error () . N , 1 );
2019-12-28 22:51:41 +01:00
mysqli_set_charset ( $link , 'utf8mb4' )
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 1 );
require ( __DIR__ . '/../include/tables.php' );
$tables = tables ( $link );
//print_r($tables);
2019-12-28 22:51:41 +01:00
2019-12-30 22:51:32 +01:00
if ( $riprendi ) {
lecho ( 'Pare che ci sia un lavoro in sospeso, provo a riprenderlo...' . N );
$buf =@ file ( $instsjfp , FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES )
or mexit ( 'Non ho potuto aprire in lettura il file «' . $instsjfp . '».' . N , 1 );
$insts = array ();
foreach ( $buf as $line )
$insts [] = $line ;
$buf =@ file ( $currinstjfp , FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES )
or mexit ( 'Non ho potuto aprire in lettura il file «' . $currinstjfp . '».' . N , 1 );
$buf = explode ( " \t " , $buf [ 0 ]);
$currinst = array ( 'dom' => $buf [ 0 ], 'i' => $buf [ 1 ], 'qok' => $buf [ 2 ], 'qgood' => $buf [ 3 ]);
$riprendi = true ;
}
2020-01-02 13:19:38 +01:00
function truncs ( $str , $tab , $col , $ctx ) {
2020-03-11 21:31:22 +01:00
global $tables , $tronconi , $iswin ;
if ( $iswin )
$tab = strtolower ( $tab );
$size = $tables [ $tab ][ $col ];
2019-12-30 22:51:32 +01:00
$len = mb_strlen ( $str , 'UTF-8' );
if ( $len > $size ) {
2020-05-02 22:49:30 +02:00
notify ( $ctx . ': ho dovuto troncare a ' . $size . ' caratteri il valore da inserire nella colonna «' . $col . '» della tabella «' . $tab . '» perché troppo lungo (' . $len . ' caratteri).' , 2 );
2019-12-28 22:51:41 +01:00
$str = mb_substr ( $str , 0 , $size - 1 , 'UTF-8' ) . '…' ;
}
return ( $str );
}
2019-12-26 21:57:36 +01:00
2020-01-02 13:19:38 +01:00
function truncn ( $num , $tab , $col , $ctx ) {
2020-03-11 21:31:22 +01:00
global $tables , $iswin ;
if ( $iswin )
$tab = strtolower ( $tab );
2020-01-02 13:19:38 +01:00
if ( is_numeric ( $num )) {
2020-03-11 21:31:22 +01:00
if ( $num > $tables [ $tab ][ $col ][ 'max' ]) {
notify ( $ctx . ': ho dovuto troncare «' . $num . '» al valore massimo «' . $tables [ $tab ][ $col ][ 'max' ] . '» che può avere nella colonna «' . $col . '» della tabella «' . $tab . '»).' , 2 );
$num = $tables [ $tab ][ $col ][ 'max' ];
} elseif ( $num < $tables [ $tab ][ $col ][ 'min' ]) {
notify ( $ctx . ': ho dovuto troncare «' . $num . '» al valore minimo «' . $tables [ $tab ][ $col ][ 'min' ] . '» che può avere nella colonna «' . $col . '» della tabella «' . $tab . '»).' , 2 );
$num = $tables [ $tab ][ $col ][ 'min' ];
2020-01-02 13:19:38 +01:00
}
} else {
notify ( $ctx . ': truncn(): mi aspettavo un numero, invece non lo era; ritorno «0».' , 3 );
$num = 0 ;
2019-12-30 22:51:32 +01:00
}
return ( $num );
}
2019-12-26 21:57:36 +01:00
$contextopts = array (
'http' => array (
'timeout' => $opts [ 'timeout' ]
),
'socket' => array (
'tcp_nodelay' => true
)
);
$context = stream_context_create ( $contextopts );
$blacklist = array ();
lecho ( 'Carico la blacklist dal database...' . N );
$res = mysqli_query ( $link , 'SELECT * FROM Blacklist' )
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2019-12-26 21:57:36 +01:00
lecho ( mysqli_num_rows ( $res ) . ' istanze nella blacklist.' . N );
while ( $row = mysqli_fetch_assoc ( $res )) {
$blacklist [ $row [ 'Domain' ]] = $row ;
2019-12-01 09:07:45 +01:00
}
2019-12-26 21:57:36 +01:00
function pgdatetomy ( $pgdate ) {
2019-12-28 22:51:41 +01:00
if ( preg_match ( '/^(\d+)-(\d+)-(\d+)[ T]{1}(\d+):(\d+):(\d+)(\.\d+)?Z?$/' , $pgdate , $buf ) === 1 ) {
$mtime = mktime ( $buf [ 4 ], $buf [ 5 ], $buf [ 6 ], $buf [ 2 ], $buf [ 3 ], $buf [ 1 ]);
if ( array_key_exists ( 7 , $buf ))
$mtime = $mtime + floatval ( '0' . $buf [ 7 ]);
return ( $mtime );
2019-12-15 17:06:02 +01:00
} else {
2019-12-30 22:51:32 +01:00
notify ( 'pgdatetomy: «' . $pgdate . '» non è un formato di data riconosciuto! Ritorno il magico momento attuale.' , 3 );
return ( time ());
2019-12-15 17:06:02 +01:00
}
}
2019-12-26 21:57:36 +01:00
function blpgdumplinetomy ( $line ) {
$truefalse = array ( 'f' => 0 , 't' => 1 );
$row = explode ( " \t " , $line );
$row = array ( 'Domain' => $row [ 0 ],
'CreatedAt' => pgdatetomy ( $row [ 1 ]),
'ModifiedAt' => pgdatetomy ( $row [ 2 ]),
'Severity' => $row [ 3 ],
'RejectMedia' => $truefalse [ $row [ 4 ]],
'RejectReports' => $truefalse [ $row [ 5 ]],
'PublicComment' => $row [ 6 ]);
return ( $row );
2019-12-17 13:19:12 +01:00
}
2019-12-30 22:51:32 +01:00
if ( ! $riprendi ) {
$blacklistnew = array ();
$insts = array ();
2020-05-02 19:59:53 +02:00
lecho ( 'Carico le istanze di riferimento per le blacklist...' . N );
2019-12-30 22:51:32 +01:00
$res = mysqli_query ( $link , 'SELECT Domain FROM StartNodes' )
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2020-05-02 19:59:53 +02:00
lecho ( mysqli_num_rows ( $res ) . ' istanze di riferimento.' . N );
2019-12-30 22:51:32 +01:00
while ( $row = mysqli_fetch_assoc ( $res )) {
$insts [] = $row [ 'Domain' ];
2020-05-02 19:59:53 +02:00
/* lecho ( 'Recupero la lista delle istanze note a «' . $row [ 'Domain' ] . '» ... ' );
2019-12-30 22:51:32 +01:00
$buf =@ file_get_contents ( 'https://' . $row [ 'Domain' ] . '/api/v1/instance/peers' , false , $context );
if ( $buf !== false ) {
lecho ( 'OK :-)' . N );
$peers = json_decode ( $buf , true );
foreach ( $peers as $pdom ) {
if ( willtrunc ( $pdom , 'Instances' , 'URI' ))
2020-05-03 19:55:24 +02:00
notify ( 'L’ istanza «' . $pdom . '» non sarà considerata perché il suo dominio è troppo lungo per il campo «URI» della tabella «Instances» nel DB' , 2 );
2019-12-30 22:51:32 +01:00
if ( ! in_array ( $pdom , $insts ) && ! willtrunc ( $pdom , 'Instances' , 'URI' ))
$insts [] = $pdom ;
2019-12-26 21:57:36 +01:00
}
2019-12-30 22:51:32 +01:00
} else {
lecho ( 'ERRORE :-(' . N );
2020-05-02 19:59:53 +02:00
} */
2019-12-30 22:51:32 +01:00
lecho ( 'Recupero la blacklist di «' . $row [ 'Domain' ] . '» ... ' );
$buf =@ file_get_contents ( 'https://' . $row [ 'Domain' ] . '/domain_blocks.txt' , false , $context );
if ( $buf !== false ) {
lecho ( 'OK :-)' . N );
$buf = explode ( N , $buf );
foreach ( $buf as $line ) {
if ( preg_match ( '/(^#.*$)|(^\s*$)/' , $line ) === 0 ) {
$brow = blpgdumplinetomy ( $line );
if ( ! array_key_exists ( $brow [ 'Domain' ], $blacklist )) {
$blacklistnew [ $brow [ 'Domain' ]] = $brow ;
}
$blacklist [ $brow [ 'Domain' ]] = $brow ;
2019-12-26 21:57:36 +01:00
}
}
2019-12-30 22:51:32 +01:00
} else {
lecho ( 'ERRORE :-(' . N );
2019-12-01 09:07:45 +01:00
}
}
2019-12-30 22:51:32 +01:00
foreach ( $blacklistnew as $row ) {
if ( ! willtrunc ( $row [ 'Domain' ], 'Blacklist' , 'Domain' )) {
2020-01-02 13:19:38 +01:00
mysqli_query ( $link , 'INSERT INTO Blacklist (ID, Domain, CreatedAt, ModifiedAt, Severity, RejectMedia, RejectReports, PrivateComment, PublicComment) VALUES (NULL, \'' . myesc ( $link , $row [ 'Domain' ]) . '\', \'' . myesc ( $link , $row [ 'CreatedAt' ]) . '\', \'' . myesc ( $link , $row [ 'ModifiedAt' ]) . '\', \'' . myesc ( $link , $row [ 'Severity' ]) . '\', \'' . myesc ( $link , $row [ 'RejectMedia' ]) . '\', \'' . myesc ( $link , $row [ 'RejectReports' ]) . '\', NULL, \'' . myesc ( $link , $row [ 'Domain' ]) . '\')' )
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2019-12-30 22:51:32 +01:00
} else {
2020-05-02 22:49:30 +02:00
lecho ( 'Non ho potuto inserire «' . $row [ 'Domain' ] . '» nella tabella delle istanze blacklistate perché il dominio è troppo lungo per il campo corrispondente nel DB.' , 2 );
2019-12-30 22:51:32 +01:00
}
}
2020-05-02 19:59:53 +02:00
lecho ( 'Carico le istanze di partenza da «' . $opts [ 'peersfp' ] . '»...' . N );
$peers =@ file ( $opts [ 'peersfp' ], FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES );
if ( $peers === false )
mexit ( 'Non ho potuto aprire in lettura «' . $opts [ 'peersfp' ] . '».' . N , 1 );
foreach ( $peers as $pdom )
if ( ! in_array ( $pdom , $insts ))
if ( ! willtrunc ( $pdom , 'Instances' , 'URI' ))
$insts [] = $pdom ;
else
lecho ( 'L’ istanza «' . $pdom . '» non sarà considerata perché il suo dominio è troppo lungo per il campo «URI» della tabella «Instances» nel DB' . N );
lecho ( 'Carico le istanze note dal DB e aggiungo alla lista di quelle da controllare quelle che non ci sono già.' . N );
2019-12-30 22:51:32 +01:00
$res = mysqli_query ( $link , 'SELECT URI FROM Instances' )
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2019-12-30 22:51:32 +01:00
while ( $row = mysqli_fetch_assoc ( $res )) {
if ( ! in_array ( $row [ 'URI' ], $insts ))
$insts [] = $row [ 'URI' ];
}
sort ( $insts );
ksort ( $blacklist );
ksort ( $blacklistnew );
lecho ( 'Istanze recuperate: ' . count ( $insts ) . N );
lecho ( 'Istanze blacklistate: ' . count ( $blacklist ) . ', di cui ' . count ( $blacklistnew ) . ' nuove aggiunte al DB.' . N );
$instsf =@ fopen ( $instsjfp , 'w' )
or mexit ( 'Non ho potuto aprire in scrittura il file «' . $instsjfp . '».' . N , 1 );
foreach ( $insts as $dom )
fwrite ( $instsf , $dom . N );
fclose ( $instsf );
2019-12-26 21:57:36 +01:00
}
2019-12-06 08:28:36 +01:00
2019-12-30 22:51:32 +01:00
function willtrunc ( $str , $tab , $col ) {
2020-03-11 21:31:22 +01:00
global $tables , $iswin ;
if ( $iswin )
$tab = strtolower ( $tab );
if ( mb_strlen ( $str , 'UTF-8' ) > $tables [ $tab ][ $col ])
2019-12-30 22:51:32 +01:00
return ( true );
2019-12-26 21:57:36 +01:00
else
2019-12-30 22:51:32 +01:00
return ( false );
}
function b2i ( $bool , $pre ) {
if ( is_bool ( $bool )) {
if ( $bool )
return ( 1 );
else
return ( 0 );
} else {
2020-05-03 19:55:24 +02:00
notify ( $pre . 'il valore «' . $bool . '» non è booleano, lo assumo come falso e ritorno «0».' , 3 );
2019-12-26 21:57:36 +01:00
return ( 0 );
2019-12-30 22:51:32 +01:00
}
2019-12-06 08:28:36 +01:00
}
2019-12-28 22:51:41 +01:00
//is array, array key exists and value is not null
2019-12-26 21:57:36 +01:00
function akeavinn ( $key , & $arr ) {
2019-12-28 22:51:41 +01:00
if ( is_array ( $arr ) && array_key_exists ( $key , $arr ) && ! is_null ( $arr [ $key ]))
2019-12-26 21:57:36 +01:00
return ( true );
else
return ( false );
2019-12-01 09:07:45 +01:00
}
2019-12-26 21:57:36 +01:00
function nempty ( $str ) {
if ( preg_match ( '/^\s*$/' , $str ) === 1 )
return ( null );
else
return ( $str );
}
2019-12-15 17:06:02 +01:00
2019-12-30 22:51:32 +01:00
function subarimp ( $glue , $key , & $arr ) {
2019-12-26 21:57:36 +01:00
$str = '' ;
$i = 1 ;
$carr = count ( $arr );
foreach ( $arr as $inarr ) {
$str .= $inarr [ $key ];
if ( $i < $carr )
$str .= $glue ;
$i ++ ;
}
return ( $str );
}
2019-12-17 13:19:12 +01:00
2019-12-26 21:57:36 +01:00
function notify ( $msg , $sev ) {
2020-03-11 21:31:22 +01:00
global $link , $tables , $iswin ;
2019-12-30 22:51:32 +01:00
lecho ( 'NOTIFICAZIÒ: ' . strip_tags ( $msg ) . N );
2020-03-11 21:31:22 +01:00
$tab = 'Notifications' ;
if ( $iswin )
$tab = 'notifications' ;
mysqli_query ( $link , 'INSERT INTO Notifications (ID, Notification, Severity, Microtime, Seen) VALUES (NULL, \'' . myesc ( $link , mb_substr ( $msg , 0 , $tables [ $tab ][ 'Notification' ], 'UTF-8' )) . '\', ' . $sev . ', \'' . microtime ( true ) . '\', 0)' )
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2019-12-28 22:51:41 +01:00
}
2020-03-11 13:57:20 +01:00
/** <LANGUAGE MANAGEMENT> */
/**
* Effettua una chiamata alla API di Mastodon .
*
* @ param string $host L ' host da chiamare ( e . g .: " mastodon.bida.im " )
* @ param string $path Il path della API ( e . g .: " /api/v1/timelines/public?local=true " )
* @ return mixed L ' oggetto ritornato dalla chiamata , già parsato da json_decode , o NULL se la chiamata fallisce
*/
function get_api ( $host , $path ) {
global $context ;
try {
$buf = @ file_get_contents ( 'https://' . $host . $path , false , $context );
} catch ( Exception $e ) {
echo " error: " ;
echo $e ;
return NULL ;
}
if ( $buf !== false ) {
$data = json_decode ( $buf , true );
return $data ;
} else {
return NULL ;
}
}
/**
* Torna un elenco di linguaggi riconosciuti nel toot fornito con relativa probabilità .
*
* @ param mixed $toot Il toot da analizzare , come ritornato dalle API
* @ return array Mappa tra codice lingua e probabilità che il toot sia in quella lingua .
*/
function get_toot_languages ( $toot ) {
$l = $toot [ 'language' ];
$res = [];
if ( $l !== NULL ) {
// la lingua è specificata già nel toot: usa quella
$langs [ $l ] = 1 ;
} else {
// la lingua non è specificata: deducila
$text = strip_tags ( $toot [ 'content' ]);
$ld = new Language ;
$langs = $ld -> detect ( $text ) -> bestResults () -> close ();
}
// raggruppa le lingue derivate, e.g.: "zh" e "zh-CN"
$grouped_langs = array ();
foreach ( $langs as $key => $value ) {
$l = explode ( " - " , $key )[ 0 ];
if ( array_key_exists ( $l , $grouped_langs )) {
$grouped_langs [ $l ] = max ( $grouped_langs [ $l ], $value );
} else {
$grouped_langs [ $l ] = $value ;
}
}
return $grouped_langs ;
}
/**
* Date le probabilità di lingua per ogni toot , calcola la media .
*
* @ param array $detected_langs Array di mappe tra lingua e probabilità
* @ return array Mappa tra lingua e probabilità
*/
function summary ( $detected_langs ) {
$res = Array ();
foreach ( $detected_langs as $langs ) {
foreach ( $langs as $l => $weight ) {
if ( ! array_key_exists ( $l , $res )) {
$res [ $l ] = 0 ;
}
$res [ $l ] += $weight ;
}
}
foreach ( $res as $l => $sumweight ) {
$res [ $l ] = $sumweight / count ( $detected_langs );
}
return $res ;
}
/**
* Helper function per usort : compara due array usando il primo elemento .
*
* @ param array $entry1 Primo array da comparare
* @ param array $entry2 Secondo array da comparare
* @ return number - 1 , 0 o 1 a seconda che $entry1 [ 0 ] sia minore , uguale o superiore a $entry2 [ 0 ]
*/
function sort_weights ( $entry1 , $entry2 ) {
$w1 = $entry1 [ 0 ];
$w2 = $entry2 [ 0 ];
2020-03-11 21:31:22 +01:00
if ( $w1 < $w2 )
$ret = 1 ;
elseif ( $w1 == $w2 )
$ret = 0 ;
else
$ret =- 1 ;
return $ret ;
2020-03-11 13:57:20 +01:00
}
/**
* Data una mappa di lingue , ritorna una lista di linguaggi considerati probabili .
*
* @ param array $summary Mappa tra lingue e probabilità
* @ return string [] Elenco di lingue considerate probabili
*/
function get_languages ( $summary ) {
$lst = [];
foreach ( $summary as $code => $weight ) {
$lst [] = [ $weight , $code ];
}
usort ( $lst , 'sort_weights' );
$languages = [];
$lastweight = 0 ;
foreach ( $lst as $entry ) {
$l = $entry [ 1 ];
$weight = $entry [ 0 ];
if ( $weight < $lastweight * 2 / 3 ) {
break ;
}
$languages [] = $l ;
$lastweight = $weight ;
}
return $languages ;
}
/**
* Ritorna una lista di lingue probabili per la data istanza .
*
* @ param string $host Hostname dell ' istanza ( e . g .: " mastodon.bida.im " )
* @ return string [] Lista di lingue probabili
*/
function get_instance_langs ( $host ) {
$data = get_api ( $host , '/api/v1/timelines/public?local=true' );
if ( $data == NULL ) {
return [];
}
$detected_langs = array_map ( 'get_toot_languages' , $data );
$summary = summary ( $detected_langs );
$languages = get_languages ( $summary );
return $languages ;
}
2020-04-07 19:39:06 +02:00
require ( __DIR__ . '/../include/mb_ucfirst.php' );
2020-03-11 13:57:20 +01:00
2020-04-03 10:59:02 +02:00
function langs ( $instid , $uri , $auto ) {
2019-12-28 22:51:41 +01:00
global $info , $instrow , $link ;
2020-04-03 10:59:02 +02:00
$retlangs = array ();
$languages = array ();
if ( $auto ) {
$languages = get_instance_langs ( $uri );
} elseif ( akeavinn ( 'languages' , $info )) {
2020-03-11 13:57:20 +01:00
$languages = $info [ 'languages' ];
}
2020-04-03 10:59:02 +02:00
if ( count ( $languages ) == 0 ) {
return ( $retlangs );
} else {
lecho ( 'Lingue trovate: ' . implode ( ', ' , $languages ) . N );
$pos = 0 ;
foreach ( $languages as $lang ) {
$res = mysqli_query ( $link , 'SELECT * FROM Languages WHERE Code=\'' . myesc ( $link , $lang ) . '\'' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
if ( mysqli_num_rows ( $res ) < 1 ) {
$code = myesc ( $link , truncs ( $lang , 'Languages' , 'Code' , '«' . $instrow [ 'URI' ] . '»' ));
2020-04-21 12:35:53 +02:00
$NameOrig = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , $lang )), 'Languages' , 'NameOrig' , '«' . $instrow [ 'URI' ] . '»' ));
$NameCa = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , 'ca' )), 'Languages' , 'NameCA' , '«' . $instrow [ 'URI' ] . '»' ));
2020-04-07 19:39:06 +02:00
$NameEn = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , 'en' )), 'Languages' , 'NameEN' , '«' . $instrow [ 'URI' ] . '»' ));
$NameEs = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , 'es' )), 'Languages' , 'NameES' , '«' . $instrow [ 'URI' ] . '»' ));
$NameFr = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , 'fr' )), 'Languages' , 'NameFR' , '«' . $instrow [ 'URI' ] . '»' ));
2020-04-21 12:35:53 +02:00
$NameIt = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , 'it' )), 'Languages' , 'NameIT' , '«' . $instrow [ 'URI' ] . '»' ));
$q = 'INSERT INTO Languages (ID, Code, NameOrig, NameCA, NameEN, NameES, NameFR, NameIT) VALUES (NULL, \'' . $code . '\', \'' . $NameOrig . '\', \'' . $NameCa . '\', \'' . $NameEn . '\', \'' . $NameEs . '\', \'' . $NameFr . '\', \'' . $NameIt . '\')' ;
2020-04-03 10:59:02 +02:00
mysqli_query ( $link , $q )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
$langid = mysqli_insert_id ( $link );
} else {
$row = mysqli_fetch_assoc ( $res );
$langid = $row [ 'ID' ];
}
$pos ++ ;
$retlangs [] = array ( 'InstID' => $instid , 'LangID' => $langid , 'Pos' => $pos , 'Code' => $lang );
2019-12-28 22:51:41 +01:00
}
}
2020-04-03 10:59:02 +02:00
return ( $retlangs );
2019-12-28 22:51:41 +01:00
}
function varbdump ( $var ) {
ob_start ();
var_dump ( $var );
$content = ob_get_contents ();
ob_end_clean ();
return ( $content );
}
function mdasortbykey ( & $arr , $key , $rev = false ) {
$karr = array ();
foreach ( $arr as $akey => $subarr )
$karr [ $subarr [ $key ]] = array ( $akey , $subarr );
if ( ! $rev )
ksort ( $karr );
else
krsort ( $karr );
$arr = array ();
foreach ( $karr as $akey => $subarr )
$arr [ $subarr [ 0 ]] = $subarr [ 1 ];
2019-12-26 21:57:36 +01:00
}
2019-12-15 17:06:02 +01:00
2019-12-26 21:57:36 +01:00
/*
* Nodeinfo ( 'https://' . $dom . '/nodeinfo/2.0' ) è stato aggiunto nella 3.0 . 0
* Trends ( 'https://' . $dom . '/api/v1/trends' ) è stato aggiunto nella 3.0 . 0
* Activity ( 'https://' . $dom . '/api/v1/instance/activity' ) è stato aggiunto nella 2.1 . 2
*/
2019-12-17 13:19:12 +01:00
2019-12-26 21:57:36 +01:00
if ( $opts [ 'jsonwrite' ]) {
2019-12-30 22:51:32 +01:00
if ( $riprendi )
$mode = array ( 'a' , 'aggiunta' );
else
$mode = array ( 'w' , 'scrittura' );
$jsonf =@ fopen ( $opts [ 'jsonfp' ], $mode [ 0 ])
or mexit ( 'Non ho potuto aprire in modalità ' . $mode [ 1 ] . ' il file di dump delle info json «' . $opts [ 'jsonfp' ] . '».' , 1 );
if ( $mode [ 0 ] == 'w' )
fwrite ( $jsonf , '{' . N );
2019-12-26 21:57:36 +01:00
}
$cinsts = count ( $insts );
$i = 0 ;
2019-12-28 22:51:41 +01:00
$qok = 0 ;
$qgood = 0 ;
2019-12-30 22:51:32 +01:00
if ( $riprendi ) {
$i = $currinst [ 'i' ];
$qok = $currinst [ 'qok' ];
$qgood = $currinst [ 'qgood' ];
}
while ( $i < $cinsts ) {
$dom = $insts [ $i ];
@ file_put_contents ( $currinstjfp , $dom . " \t " . $i . " \t " . $qok . " \t " . $qgood . N )
or mexit ( 'Non ho potuto aprire in scrittura il file «' . $currinstjfp . '».' , 1 );
2019-12-26 21:57:36 +01:00
$i ++ ;
2019-12-28 22:51:41 +01:00
$ok = true ;
2019-12-26 21:57:36 +01:00
$info = null ;
lecho ( '~~~~~~~~~~~~~~~' . N );
2019-12-28 22:51:41 +01:00
lecho ( 'Provo a recuperare info su «' . $dom . '» [' . $i . '/' . $cinsts . ' (' . $qok . ' OK; ' . $qgood . ' BUONE) - ' . round ( 100 / $cinsts * $i ) . '%]' . N );
2019-12-26 21:57:36 +01:00
lecho ( 'Provo a recuperare le informazioni API sull’ istanza ... ' );
$buf =@ file_get_contents ( 'https://' . $dom . '/api/v1/instance' , false , $context );
if ( $buf !== false ) {
$info = json_decode ( $buf , true );
2019-12-28 22:51:41 +01:00
if ( is_array ( $info )) {
lecho ( 'OK :-)' . N );
lecho ( 'Provo a recuperare le informazioni Nodeinfo sull’ istanza ... ' );
$buf =@ file_get_contents ( 'https://' . $dom . '/nodeinfo/2.0' , false , $context );
if ( $buf !== false ) {
lecho ( 'OK :-)' . N );
$info [ 'x-nodeinfo' ] = json_decode ( $buf , true );
2019-12-30 22:51:32 +01:00
// per ora teniamo solo quelle che, se si identificano, si identificano come mastodon o corgidon (derivato di mastodon)
// teniamo d'occhio le notifiche di cui sotto per includere eventualmente altri derivati di mastodon?
// visti fin qui, verificare cosa sono: epicyon
2020-01-02 13:19:38 +01:00
if ( is_array ( $info [ 'x-nodeinfo' ]) && array_key_exists ( 'software' , $info [ 'x-nodeinfo' ]) && array_key_exists ( 'name' , $info [ 'x-nodeinfo' ][ 'software' ]) &&! is_null ( $info [ 'x-nodeinfo' ][ 'software' ][ 'name' ])) {
if ( preg_match ( '/^mastodon|corgidon/' , $info [ 'x-nodeinfo' ][ 'software' ][ 'name' ]) === 0 )
$ok = false ;
$res = mysqli_query ( $link , 'SELECT Name FROM Platforms WHERE Name=\'' . myesc ( $link , $info [ 'x-nodeinfo' ][ 'software' ][ 'name' ]) . '\'' )
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2020-01-02 13:19:38 +01:00
if ( mysqli_num_rows ( $res ) < 1 ) {
$res = mysqli_query ( $link , 'INSERT INTO Platforms (Name) VALUES (\'' . myesc ( $link , truncs ( $info [ 'x-nodeinfo' ][ 'software' ][ 'name' ], 'Platforms' , 'Name' , '«' . $info [ 'uri' ] . '»' )) . '\')' )
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2020-05-02 22:49:30 +02:00
notify ( '«' . $info [ 'uri' ] . '» utilizza come software «' . $info [ 'x-nodeinfo' ][ 'software' ][ 'name' ] . '»; l’ ho aggiunto alla tabella delle piattaforme incontrate. Se non si tratta di mastodon o corgidon, che già vengono accettati, sarebbe buona cosa verificare se è una variante di mastodon e quanto è compatibile, per valutare se accettare le istanze che lo utilizzano.' , 1 );
2020-01-02 13:19:38 +01:00
}
2019-12-17 13:19:12 +01:00
}
2019-12-28 22:51:41 +01:00
} else {
lecho ( 'ERRORE :-(' . N );
2019-12-17 13:19:12 +01:00
}
2019-12-28 22:51:41 +01:00
if ( $ok && array_key_exists ( 'version' , $info )) {
if ( $info [ 'version' ] >= '2.1.2' ) {
lecho ( 'Provo a recuperare le informazioni API sull’ attività dell’ istanza ... ' );
$buf =@ file_get_contents ( 'https://' . $dom . '/api/v1/instance/activity' , false , $context );
if ( $buf !== false ) {
lecho ( 'OK :-)' . N );
$info [ 'x-activity' ] = json_decode ( $buf , true );
} else {
lecho ( 'ERRORE :-(' . N );
}
2019-12-26 21:57:36 +01:00
}
2019-12-28 22:51:41 +01:00
if ( $info [ 'version' ] >= '3.0.0' ) {
lecho ( 'Provo a recuperare le informazioni API sui trends dell’ istanza ... ' );
$buf =@ file_get_contents ( 'https://' . $dom . '/api/v1/trends' , false , $context );
if ( $buf !== false ) {
lecho ( 'OK :-)' . N );
$info [ 'x-trends' ] = json_decode ( $buf , true );
} else {
lecho ( 'ERRORE :-(' . N );
}
2019-12-26 21:57:36 +01:00
}
2019-12-06 08:28:36 +01:00
}
2019-12-28 22:51:41 +01:00
} else {
$ok = false ;
lecho ( 'ERRORE :-(' . N );
2019-12-01 09:07:45 +01:00
}
} else {
2019-12-28 22:51:41 +01:00
$ok = false ;
2019-12-26 21:57:36 +01:00
lecho ( 'ERRORE :-(' . N );
2019-12-28 22:51:41 +01:00
// questo è anche il limbo delle istanze che non rispondono, perciò controlliamo se già esistono nel db e, nel caso, aggiorniamo InstChecks
2020-03-11 21:31:22 +01:00
$res = mysqli_query ( $link , 'SELECT * FROM Instances WHERE URI=\'' . myesc ( $link , mb_substr ( $dom , 0 , $tables [ $iswin ? 'instances' : 'Instances' ][ 'URI' ], 'UTF-8' )) . '\'' )
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2019-12-28 22:51:41 +01:00
if ( mysqli_num_rows ( $res ) > 0 ) {
2020-03-09 22:16:54 +01:00
lecho ( '«' . $dom . '» non risponde, ma è presente nel database; aggiorno InstChecks.' . N );
2019-12-28 22:51:41 +01:00
$row = mysqli_fetch_assoc ( $res );
mysqli_query ( $link , 'INSERT INTO InstChecks (InstID, Time, Status) VALUES (' . $row [ 'ID' ] . ', ' . time () . ', 0)' )
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2019-12-28 22:51:41 +01:00
}
2019-12-06 08:28:36 +01:00
}
2019-12-30 22:51:32 +01:00
if ( is_array ( $info ) && count ( $info ) > 0 ) {
lecho ( 'Dumpone json di tutte le info recuperate:' . N . json_encode ( $info , JSON_PRETTY_PRINT ) . N , true );
2019-12-26 21:57:36 +01:00
if ( $opts [ 'jsonwrite' ])
2019-12-30 22:51:32 +01:00
fwrite ( $jsonf , '"' . $dom . '": ' . json_encode ( $info , JSON_PRETTY_PRINT ) . ',' . N );
}
2020-05-02 19:59:53 +02:00
if ( $ok && ! is_null ( $info ) && akeavinn ( 'uri' , $info ) && ! is_null ( nempty ( $info [ 'uri' ])) && ! willtrunc ( $info [ 'uri' ], 'Instances' , 'URI' ) && akeavinn ( 'version' , $info ) && preg_match ( '/pleroma|pixelfed/i' , $info [ 'version' ]) !== 1 ) {
2019-12-30 22:51:32 +01:00
$qok ++ ;
2020-05-01 06:51:22 +02:00
$instrow = array ( 'ID' => null , 'FirstSeen' => null , 'New' => 0 , 'Good' => 0 , 'Chosen' => 0 , 'Visible' => 0 , 'Blacklisted' => 0 , 'URI' => null , 'Title' => null , 'ShortDesc' => null , 'LongDesc' => null , 'OurDesc' => null , 'OurDescEN' => null , 'LocalityID' => null , 'OurLangsLock' => 0 , 'Email' => null , 'Software' => null , 'Version' => null , 'UserCount' => null , 'StatusCount' => null , 'DomainCount' => null , 'ActiveUsersMonth' => null , 'ActiveUsersHalfYear' => null , 'Thumb' => null , 'RegOpen' => null , 'RegReqApproval' => null , 'MaxTootChars' => null , 'AdmAccount' => null , 'AdmDisplayName' => null , 'AdmCreatedAt' => null , 'AdmNote' => null , 'AdmURL' => null , 'AdmAvatar' => null , 'AdmHeader' => null , 'GuestID' => null , 'LastGuestEdit' => null );
2019-12-26 21:57:36 +01:00
if ( array_key_exists ( $info [ 'uri' ], $blacklist ))
2019-12-30 22:51:32 +01:00
$instrow [ 'Blacklisted' ] = 1 ;
$instrow [ 'URI' ] = $info [ 'uri' ];
2019-12-26 21:57:36 +01:00
if ( akeavinn ( 'title' , $info ))
2020-01-02 13:19:38 +01:00
$instrow [ 'Title' ] = nempty ( truncs ( $info [ 'title' ], 'Instances' , 'Title' , '«' . $instrow [ 'URI' ] . '»' ));
2019-12-26 21:57:36 +01:00
if ( akeavinn ( 'short_description' , $info ))
2020-01-02 13:19:38 +01:00
$instrow [ 'ShortDesc' ] = nempty ( truncs ( $info [ 'short_description' ], 'Instances' , 'ShortDesc' , '«' . $instrow [ 'URI' ] . '»' ));
2019-12-26 21:57:36 +01:00
if ( akeavinn ( 'description' , $info ))
2020-01-02 13:19:38 +01:00
$instrow [ 'LongDesc' ] = nempty ( truncs ( $info [ 'description' ], 'Instances' , 'LongDesc' , '«' . $instrow [ 'URI' ] . '»' ));
2019-12-26 21:57:36 +01:00
if ( akeavinn ( 'email' , $info ))
2020-01-02 13:19:38 +01:00
$instrow [ 'Email' ] = nempty ( truncs ( $info [ 'email' ], 'Instances' , 'Email' , '«' . $instrow [ 'URI' ] . '»' ));
2019-12-26 21:57:36 +01:00
if ( akeavinn ( 'version' , $info ))
2020-01-02 13:19:38 +01:00
$instrow [ 'Version' ] = nempty ( truncs ( $info [ 'version' ], 'Instances' , 'Version' , '«' . $instrow [ 'URI' ] . '»' ));
2019-12-26 21:57:36 +01:00
if ( akeavinn ( 'stats' , $info )) {
if ( akeavinn ( 'user_count' , $info [ 'stats' ]))
2020-01-02 13:19:38 +01:00
$instrow [ 'UserCount' ] = truncn ( $info [ 'stats' ][ 'user_count' ], 'Instances' , 'UserCount' , '«' . $instrow [ 'URI' ] . '»' );
2019-12-26 21:57:36 +01:00
if ( akeavinn ( 'status_count' , $info [ 'stats' ]))
2020-01-02 13:19:38 +01:00
$instrow [ 'StatusCount' ] = truncn ( $info [ 'stats' ][ 'status_count' ], 'Instances' , 'StatusCount' , '«' . $instrow [ 'URI' ] . '»' );
2019-12-26 21:57:36 +01:00
if ( akeavinn ( 'domain_count' , $info [ 'stats' ]))
2020-01-02 13:19:38 +01:00
$instrow [ 'DomainCount' ] = truncn ( $info [ 'stats' ][ 'domain_count' ], 'Instances' , 'DomainCount' , '«' . $instrow [ 'URI' ] . '»' );
2019-12-26 21:57:36 +01:00
}
if ( akeavinn ( 'thumbnail' , $info ))
2020-01-02 13:19:38 +01:00
$instrow [ 'Thumb' ] = nempty ( truncs ( $info [ 'thumbnail' ], 'Instances' , 'Thumb' , '«' . $instrow [ 'URI' ] . '»' ));
2019-12-26 21:57:36 +01:00
if ( akeavinn ( 'max_toot_chars' , $info ))
2020-01-02 13:19:38 +01:00
$instrow [ 'MaxTootChars' ] = truncn ( $info [ 'max_toot_chars' ], 'Instances' , 'MaxTootChars' , '«' . $instrow [ 'URI' ] . '»' );
2019-12-26 21:57:36 +01:00
if ( akeavinn ( 'registrations' , $info ))
2019-12-30 22:51:32 +01:00
$instrow [ 'RegOpen' ] = b2i ( $info [ 'registrations' ], 'Istanza «' . $instrow [ 'URI' ] . '»: ' );
2019-12-26 21:57:36 +01:00
if ( akeavinn ( 'approval_required' , $info ))
2019-12-30 22:51:32 +01:00
$instrow [ 'RegReqApproval' ] = b2i ( $info [ 'approval_required' ], 'Istanza «' . $instrow [ 'URI' ] . '»: ' );
2019-12-26 21:57:36 +01:00
if ( akeavinn ( 'contact_account' , $info )) {
if ( akeavinn ( 'acct' , $info [ 'contact_account' ]))
2020-01-02 13:19:38 +01:00
$instrow [ 'AdmAccount' ] = nempty ( truncs ( $info [ 'contact_account' ][ 'acct' ], 'Instances' , 'AdmAccount' , '«' . $instrow [ 'URI' ] . '»' ));
2019-12-26 21:57:36 +01:00
if ( akeavinn ( 'display_name' , $info [ 'contact_account' ]))
2020-01-11 00:51:02 +01:00
$instrow [ 'AdmDisplayName' ] = nempty ( truncs ( $info [ 'contact_account' ][ 'display_name' ], 'Instances' , 'AdmDisplayName' , '«' . $instrow [ 'URI' ] . '»' ));
2019-12-26 21:57:36 +01:00
if ( akeavinn ( 'created_at' , $info [ 'contact_account' ]))
$instrow [ 'AdmCreatedAt' ] = pgdatetomy ( $info [ 'contact_account' ][ 'created_at' ]);
if ( akeavinn ( 'note' , $info [ 'contact_account' ]))
2020-01-02 13:19:38 +01:00
$instrow [ 'AdmNote' ] = nempty ( truncs ( strip_tags ( $info [ 'contact_account' ][ 'note' ], '<a>' ), 'Instances' , 'AdmNote' , '«' . $instrow [ 'URI' ] . '»' ));
2019-12-26 21:57:36 +01:00
if ( akeavinn ( 'url' , $info [ 'contact_account' ]))
2020-01-02 13:19:38 +01:00
$instrow [ 'AdmURL' ] = nempty ( truncs ( $info [ 'contact_account' ][ 'url' ], 'Instances' , 'AdmURL' , '«' . $instrow [ 'URI' ] . '»' ));
2019-12-26 21:57:36 +01:00
if ( akeavinn ( 'avatar' , $info [ 'contact_account' ]))
2020-01-02 13:19:38 +01:00
$instrow [ 'AdmAvatar' ] = nempty ( truncs ( $info [ 'contact_account' ][ 'avatar' ], 'Instances' , 'AdmAvatar' , '«' . $instrow [ 'URI' ] . '»' ));
2019-12-26 21:57:36 +01:00
if ( akeavinn ( 'header' , $info [ 'contact_account' ]))
2020-01-02 13:19:38 +01:00
$instrow [ 'AdmHeader' ] = nempty ( truncs ( $info [ 'contact_account' ][ 'header' ], 'Instances' , 'AdmHeader' , '«' . $instrow [ 'URI' ] . '»' ));
2019-12-26 21:57:36 +01:00
}
if ( akeavinn ( 'x-nodeinfo' , $info )) {
if ( akeavinn ( 'software' , $info [ 'x-nodeinfo' ]) && akeavinn ( 'name' , $info [ 'x-nodeinfo' ][ 'software' ]))
2020-01-02 13:19:38 +01:00
$instrow [ 'Software' ] = nempty ( truncs ( $info [ 'x-nodeinfo' ][ 'software' ][ 'name' ], 'Instances' , 'Software' , '«' . $instrow [ 'URI' ] . '»' ));
2019-12-26 21:57:36 +01:00
if ( akeavinn ( 'usage' , $info [ 'x-nodeinfo' ]) && akeavinn ( 'users' , $info [ 'x-nodeinfo' ][ 'usage' ])) {
if ( akeavinn ( 'activeMonth' , $info [ 'x-nodeinfo' ][ 'usage' ][ 'users' ]))
2020-01-02 13:19:38 +01:00
$instrow [ 'ActiveUsersMonth' ] = truncn ( $info [ 'x-nodeinfo' ][ 'usage' ][ 'users' ][ 'activeMonth' ], 'Instances' , 'ActiveUsersMonth' , '«' . $instrow [ 'URI' ] . '»' );
2019-12-26 21:57:36 +01:00
if ( akeavinn ( 'activeHalfyear' , $info [ 'x-nodeinfo' ][ 'usage' ][ 'users' ]))
2020-01-02 13:19:38 +01:00
$instrow [ 'ActiveUsersHalfYear' ] = truncn ( $info [ 'x-nodeinfo' ][ 'usage' ][ 'users' ][ 'activeHalfyear' ], 'Instances' , 'ActiveUsersHalfYear' , '«' . $instrow [ 'URI' ] . '»' );
2019-12-17 13:19:12 +01:00
}
2019-12-26 21:57:36 +01:00
}
2019-12-28 22:51:41 +01:00
$whynot = array ();
2019-12-30 22:51:32 +01:00
if ( $instrow [ 'Blacklisted' ] == 1 )
2019-12-28 22:51:41 +01:00
$whynot [] = 'è nella blacklist' ;
if ( is_null ( $instrow [ 'RegOpen' ])) {
$whynot [] = 'non se ne conosce lo stato delle registrazioni (aperte/chiuse)' ;
} elseif ( $instrow [ 'RegOpen' ] == 0 ) {
$whynot [] = 'ha le registrazioni chiuse' ;
}
if ( is_null ( $instrow [ 'UserCount' ])) {
$whynot [] = 'non se ne conosce il numero di utenti' ;
} elseif ( $instrow [ 'UserCount' ] < 10 || $instrow [ 'UserCount' ] > 30000 ) {
$whynot [] = 'il numero di utenti non è compreso tra 10 e 30.000' ;
}
if ( is_null ( $instrow [ 'DomainCount' ])) {
$whynot [] = 'non se ne conosce il numero di istanze note' ;
} elseif ( $instrow [ 'DomainCount' ] < 500 ) {
$whynot [] = 'il numero di istanze note è minore di 500' ;
}
if ( ! is_null ( $instrow [ 'ActiveUsersMonth' ])) {
if ( $instrow [ 'ActiveUsersMonth' ] < 10 )
$whynot [] = 'il numero di utenti attivi nell’ ultimo mese è minore di 10' ;
} elseif ( ! is_null ( $instrow [ 'StatusCount' ]) && $instrow [ 'StatusCount' ] / $instrow [ 'UserCount' ] < 10 ) {
$whynot [] = 'il numero medio di toots per utente è minore di 10' ;
}
if ( count ( $whynot ) == 0 ) {
$instrow [ 'Good' ] = 1 ;
2019-12-30 22:51:32 +01:00
lecho ( 'Siamo in presenza di un’ istanza BUONA! :-)' . N );
2019-12-28 22:51:41 +01:00
$qgood ++ ;
2019-12-30 22:51:32 +01:00
} else {
lecho ( 'Siamo in presenza di un’ istanza CATTIVA: ' . implode ( '; ' , $whynot ) . ' :-(' . N );
2019-12-28 22:51:41 +01:00
}
2019-12-26 21:57:36 +01:00
$res = mysqli_query ( $link , 'SELECT * FROM Instances WHERE URI=\'' . myesc ( $link , $instrow [ 'URI' ]) . '\'' )
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2019-12-28 22:51:41 +01:00
2019-12-26 21:57:36 +01:00
if ( mysqli_num_rows ( $res ) > 0 ) {
lecho ( '«' . $instrow [ 'URI' ] . '» è già presente nel DB, la aggiorno...' . N );
$oldinstrow = mysqli_fetch_assoc ( $res );
2019-12-28 22:51:41 +01:00
$instid = $oldinstrow [ 'ID' ];
$instrow [ 'ID' ] = $oldinstrow [ 'ID' ];
2020-03-24 23:22:07 +01:00
$instrow [ 'FirstSeen' ] = $oldinstrow [ 'FirstSeen' ];
2019-12-28 22:51:41 +01:00
$instrow [ 'New' ] = $oldinstrow [ 'New' ];
if ( $instrow [ 'Good' ] == 1 && $oldinstrow [ 'Good' ] == 0 ) {
2020-05-02 22:49:30 +02:00
notify ( 'L’ istanza «<a href="viewinst.php?id=' . $instrow [ 'ID' ] . '">' . $instrow [ 'URI' ] . '</a>» non era papabile, ma lo è diventata!' , 1 );
2019-12-28 22:51:41 +01:00
} elseif ( $instrow [ 'Good' ] == 0 && $oldinstrow [ 'Good' ] == 1 ) {
2020-05-02 22:49:30 +02:00
notify ( 'L’ istanza «<a href="viewinst.php?id=' . $instrow [ 'ID' ] . '">' . $instrow [ 'URI' ] . '</a>» era papabile, ma non lo è più per i seguenti motivi: ' . implode ( '; ' , $whynot ), 3 );
2019-12-28 22:51:41 +01:00
}
$instrow [ 'Chosen' ] = $oldinstrow [ 'Chosen' ];
$instrow [ 'Visible' ] = $oldinstrow [ 'Visible' ];
if ( $instrow [ 'ShortDesc' ] != $oldinstrow [ 'ShortDesc' ])
2020-05-03 19:55:24 +02:00
notify ( 'La «Descrizione breve» dell’ istanza «<a href="viewinst.php?id=' . $instrow [ 'ID' ] . '">' . $instrow [ 'URI' ] . '</a>» è cambiata.' , 2 );
2019-12-28 22:51:41 +01:00
if ( $instrow [ 'LongDesc' ] != $oldinstrow [ 'LongDesc' ])
2020-05-03 19:55:24 +02:00
notify ( 'La «Descrizione lunga» dell’ istanza «<a href="viewinst.php?id=' . $instrow [ 'ID' ] . '">' . $instrow [ 'URI' ] . '</a>» è cambiata.' , 2 );
2019-12-30 22:51:32 +01:00
$instrow [ 'OurDesc' ] = $oldinstrow [ 'OurDesc' ];
2020-04-04 19:17:10 +02:00
$instrow [ 'OurDescEN' ] = $oldinstrow [ 'OurDescEN' ];
2020-03-09 22:16:54 +01:00
$instrow [ 'LocalityID' ] = $oldinstrow [ 'LocalityID' ];
2020-04-04 19:17:10 +02:00
$instrow [ 'OurLangsLock' ] = $oldinstrow [ 'OurLangsLock' ];
2020-05-01 06:51:22 +02:00
$instrow [ 'GuestID' ] = $oldinstrow [ 'GuestID' ];
$instrow [ 'LastGuestEdit' ] = $oldinstrow [ 'LastGuestEdit' ];
2019-12-26 21:57:36 +01:00
$query = 'UPDATE Instances SET ' ;
foreach ( $instrow as $field => $value ) {
if ( ! is_null ( $value ))
$query .= $field . '=\'' . myesc ( $link , $value ) . '\', ' ;
else
2019-12-28 22:51:41 +01:00
$query .= $field . '=NULL, ' ;
2019-12-17 13:19:12 +01:00
}
2019-12-28 22:51:41 +01:00
$query = substr ( $query , 0 , - 2 ) . ' WHERE Instances.ID=' . $instrow [ 'ID' ];
lecho ( 'QUERONA DI UPDATE: «' . $query . '».' . N );
mysqli_query ( $link , $query )
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2019-12-28 22:51:41 +01:00
$res = mysqli_query ( $link , 'SELECT InstID, LangID, Pos, Code FROM InstLangs LEFT JOIN Languages ON Languages.ID=LangID WHERE InstID=' . $instrow [ 'ID' ] . ' ORDER BY Pos ASC' )
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2019-12-26 21:57:36 +01:00
$oldinstlangs = array ();
2019-12-28 22:51:41 +01:00
while ( $row = mysqli_fetch_assoc ( $res ))
2019-12-26 21:57:36 +01:00
$oldinstlangs [] = $row ;
2020-04-03 10:59:02 +02:00
$instlangs = langs ( $instrow [ 'ID' ], $instrow [ 'URI' ], false );
2019-12-28 22:51:41 +01:00
if ( $instlangs != $oldinstlangs ) {
2020-05-03 19:55:24 +02:00
notify ( 'La lista delle lingue utilizzate dichiarate dall’ istanza «<a href="viewinst.php?id=' . $instrow [ 'ID' ] . '">' . $instrow [ 'URI' ] . '</a>» è cambiata da «' . subarimp ( ', ' , 'Code' , $oldinstlangs ) . '» a «' . subarimp ( ', ' , 'Code' , $instlangs ) . '».' , 2 );
2019-12-28 22:51:41 +01:00
mysqli_query ( $link , 'DELETE FROM InstLangs WHERE InstID=' . $instrow [ 'ID' ])
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2019-12-28 22:51:41 +01:00
foreach ( $instlangs as $row ) {
mysqli_query ( $link , 'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES (' . $row [ 'InstID' ] . ', ' . $row [ 'LangID' ] . ', ' . $row [ 'Pos' ] . ')' )
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2019-12-26 21:57:36 +01:00
}
2019-12-28 22:51:41 +01:00
}
2020-04-03 10:59:02 +02:00
2020-05-02 19:59:53 +02:00
if ( $instrow [ 'OurLangsLock' ] == 0 ) {
2020-04-04 19:17:10 +02:00
$instourlangs = langs ( $instrow [ 'ID' ], $instrow [ 'URI' ], true );
if ( count ( $instourlangs ) > 0 ) {
mysqli_query ( $link , 'DELETE FROM InstLangs WHERE InstID=' . $instrow [ 'ID' ])
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2020-04-04 19:17:10 +02:00
foreach ( $instourlangs as $row ) {
mysqli_query ( $link , 'INSERT INTO InstOurLangs (InstID, OurLangID, Pos) VALUES (' . $row [ 'InstID' ] . ', ' . $row [ 'LangID' ] . ', ' . $row [ 'Pos' ] . ')' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
}
2020-04-03 10:59:02 +02:00
}
}
2019-12-17 13:19:12 +01:00
} else {
2019-12-26 21:57:36 +01:00
lecho ( '«' . $info [ 'uri' ] . '» non è già presente nel DB, la aggiungo...' . N );
2020-03-24 23:22:07 +01:00
$instrow [ 'FirstSeen' ] = time ();
2019-12-26 21:57:36 +01:00
$instrow [ 'New' ] = 1 ;
$fields = array ();
$values = '' ;
foreach ( $instrow as $field => $value ) {
$fields [] = $field ;
if ( ! is_null ( $value ))
$values .= '\'' . myesc ( $link , $value ) . '\', ' ;
else
$values .= 'NULL, ' ;
2019-12-06 08:28:36 +01:00
}
2019-12-26 21:57:36 +01:00
$values = substr ( $values , 0 , - 2 );
$query = 'INSERT INTO Instances (' . implode ( ', ' , $fields ) . ') VALUES (' . $values . ')' ;
2019-12-28 22:51:41 +01:00
lecho ( 'QUERONA DI INSERT: «' . $query . '»' . N );
mysqli_query ( $link , $query )
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2019-12-28 22:51:41 +01:00
$instid = mysqli_insert_id ( $link );
2020-05-03 19:55:24 +02:00
notify ( 'Ho trovato una nuova istanza: «<a href="viewinst.php?id=' . $instid . '">' . $instrow [ 'URI' ] . '</a>».' , 1 );
2019-12-30 22:51:32 +01:00
2020-04-03 10:59:02 +02:00
$instlangs = langs ( $instid , $instrow [ 'URI' ], false );
2019-12-28 22:51:41 +01:00
foreach ( $instlangs as $row ) {
mysqli_query ( $link , 'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES (' . $row [ 'InstID' ] . ', ' . $row [ 'LangID' ] . ', ' . $row [ 'Pos' ] . ')' )
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
}
$instourlangs = langs ( $instid , $instrow [ 'URI' ], true );
foreach ( $instourlangs as $row ) {
mysqli_query ( $link , 'INSERT INTO InstOurLangs (InstID, OurLangID, Pos) VALUES (' . $row [ 'InstID' ] . ', ' . $row [ 'LangID' ] . ', ' . $row [ 'Pos' ] . ')' )
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2019-12-28 22:51:41 +01:00
}
2019-12-30 22:51:32 +01:00
if ( $instrow [ 'Good' ] == 1 )
2020-05-02 22:49:30 +02:00
notify ( 'La nuova istanza «<a href="viewinst.php?id=' . $instid . '">' . $instrow [ 'URI' ] . '</a>» è papabile!' , 1 );
2019-12-30 22:51:32 +01:00
2019-12-01 09:07:45 +01:00
}
2019-12-28 22:51:41 +01:00
if ( array_key_exists ( 'x-activity' , $info ) && is_array ( $info [ 'x-activity' ])) {
mysqli_query ( $link , 'DELETE FROM InstActivity WHERE InstID=' . $instid );
2020-03-09 22:16:54 +01:00
$pos = 0 ;
2019-12-28 22:51:41 +01:00
foreach ( $info [ 'x-activity' ] as $buf ) {
if ( akeavinn ( 'week' , $buf ) && akeavinn ( 'statuses' , $buf ) && akeavinn ( 'logins' , $buf ) && akeavinn ( 'registrations' , $buf )) {
2020-01-18 07:48:06 +01:00
$pos ++ ;
$query = 'INSERT INTO InstActivity (InstID, Week, Statuses, Logins, Registrations, Pos) VALUES (\'' . $instid . '\', \'' . myesc ( $link , $buf [ 'week' ]) . '\', \'' . myesc ( $link , $buf [ 'statuses' ]) . '\', \'' . myesc ( $link , $buf [ 'logins' ]) . '\', \'' . myesc ( $link , $buf [ 'registrations' ]) . '\', ' . $pos . ')' ;
2019-12-28 22:51:41 +01:00
mysqli_query ( $link , $query )
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2019-12-28 22:51:41 +01:00
}
}
}
if ( array_key_exists ( 'x-trends' , $info ) && is_array ( $info [ 'x-trends' ])) {
$trends = array ();
foreach ( $info [ 'x-trends' ] as $buf ) {
if ( akeavinn ( 'name' , $buf ) && akeavinn ( 'url' , $buf ) && akeavinn ( 'history' , $buf ) && is_array ( $buf [ 'history' ])) {
$trend = 0 ;
foreach ( $buf [ 'history' ] as $row ) {
if ( $row [ 'uses' ] > 0 )
$trend += ( $row [ 'accounts' ] / $row [ 'uses' ]);
}
$trends [] = array (
'InstID' => $instid ,
'LastDay' => $buf [ 'history' ][ 0 ][ 'day' ],
2019-12-30 22:51:32 +01:00
'Name' => $buf [ 'name' ],
'URL' => $buf [ 'url' ],
2019-12-28 22:51:41 +01:00
'Pos' => null ,
'trend' => $trend
);
}
}
mdasortbykey ( $trends , 'trend' , true );
// print_r($trends);
2019-12-30 22:51:32 +01:00
mysqli_query ( $link , 'DELETE FROM InstTrends WHERE InstID=' . $instid );
2019-12-28 22:51:41 +01:00
$pos = 0 ;
foreach ( $trends as $trend ) {
$pos ++ ;
2020-01-02 13:19:38 +01:00
$query = 'INSERT INTO InstTrends (InstID, LastDay, Name, URL, Pos) VALUES (' . $trend [ 'InstID' ] . ', \'' . $trend [ 'LastDay' ] . '\', \'' . myesc ( $link , truncs ( $trend [ 'Name' ], 'InstTrends' , 'Name' , '«' . $instrow [ 'URI' ] . '»' )) . '\', \'' . myesc ( $link , truncs ( $trend [ 'URL' ], 'InstTrends' , 'URL' , '«' . $instrow [ 'URI' ] . '»' )) . '\', ' . $pos . ')' ;
2019-12-28 22:51:41 +01:00
mysqli_query ( $link , $query )
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2019-12-28 22:51:41 +01:00
}
}
mysqli_query ( $link , 'INSERT INTO InstChecks (InstID, Time, Status) VALUES (' . $instid . ', ' . time () . ', 1)' )
2020-04-03 10:59:02 +02:00
or mexit ( __LINE__ . ': ' . mysqli_error ( $link ) . N , 3 );
2019-12-01 09:07:45 +01:00
}
}
2019-12-26 21:57:36 +01:00
mysqli_close ( $link );
2019-12-17 13:19:12 +01:00
2019-12-26 21:57:36 +01:00
if ( $opts [ 'jsonwrite' ]) {
fwrite ( $jsonf , '"Fine?": true' . N . '}' . N );
fclose ( $jsonf );
2019-12-06 08:28:36 +01:00
}
2019-12-26 21:57:36 +01:00
2019-12-30 22:51:32 +01:00
unlink ( $instsjfp );
unlink ( $currinstjfp );
2019-12-26 21:57:36 +01:00
exit ( 0 );
2019-12-01 09:07:45 +01:00
?>