2022-12-17 15:00:36 +01:00
#!/usr/bin/php
< ? php
/*
This program is free software : you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation , either version 3 of the License , or
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with this program . If not , see < http :// www . gnu . org / licenses />.
*/
define ( 'N' , " \n " );
define ( 'SNAME' , basename ( __FILE__ ));
define ( 'LIBDP' , '/../site/mustard/include' );
require ( __DIR__ . LIBDP . '/parsetime.php' );
require ( __DIR__ . LIBDP . '/gurl.php' );
require ( __DIR__ . LIBDP . '/tables.php' );
require ( __DIR__ . LIBDP . '/mb_ucfirst.php' );
2022-12-20 23:00:22 +01:00
require ( __DIR__ . LIBDP . '/mb_lcfirst.php' );
2022-12-17 15:00:36 +01:00
require ( __DIR__ . LIBDP . '/ghs.php' );
require ( __DIR__ . LIBDP . '/ght.php' );
require ( __DIR__ . '/lib/vendor/autoload.php' );
use LanguageDetection\Language ;
use function mysqli_real_escape_string as myesc ;
( strtoupper ( substr ( PHP_OS , 0 , 3 )) === 'WIN' ) ? $iswin = true : $iswin = false ;
declare ( ticks = 1 );
if ( function_exists ( 'pcntl_signal' )) {
function signalHandler ( $signal ) {
echo ( N );
mexit ( 'received signal «' . $signal . '», shutting down.' . N , 0 );
}
pcntl_signal ( SIGTERM , 'signalHandler' ); // Termination ('kill' was called)
pcntl_signal ( SIGHUP , 'signalHandler' ); // Terminal log-out
pcntl_signal ( SIGINT , 'signalHandler' ); // Interrupted (Ctrl-C is pressed)
}
$opts = [
'hostname' => null ,
'timeout' => 10 ,
2022-12-20 23:00:22 +01:00
'deadline' => 62 * 24 * 60 * 60 , // if an instance has not been responding for more than this value of seconds, declare it dead
2022-12-17 15:00:36 +01:00
'ldtoots' => 40 , // number of toots to check with the automatic language detection function
'dryrun' => false ,
'fetchusers' => false ,
'udiratts' => 5 ,
'udirfailst' => 90 ,
'logminmsglev' => 1 ,
'tuiminmsglev' => 1
];
$msglevs = [ 'debug' , 'info' , 'warning' , 'error' , 'none' ];
$ghtsa = [[ ' day' , ' days' ],[ ' hour' , ' hours' ],[ ' minute' , ' minutes' ],[ ' second' , ' seconds' ]];
$help = ' SYNOPSIS
'.SNAME.' < hostname > [ options ]
DESCRIPTION
This script tries to fetch info about the fediverse instance at the given
hostname and insert or update them in mastostart’ s database .
OPTIONS
- D , -- deadline < time >
If an instance has not been responding for longer than this time , declare
it dead . See section «TIME SPECIFICATION» below to see how to specify time .
DEFAULT : '.ght($opts[' deadline '],$ghtsa).'
- l , -- ldtoots < number >
This option defines the number of toots the script will try to fetch from
the local public timelines , to try and guess the most used languages of each
instance . Its minimum value is 10 , its maximum value is 40.
DEFAULT : '.$opts[' ldtoots '].'
- f , -- fetchusers
If this option is set , the script will try to fetch users’ info from the
considered instance’ s users directory , and store them in the database .
- r , -- udiratts < number >
This option defines how many attempts the script will do at fetching a chunk
of users’ info from the profile directory , before giving up .
DEFAULT : '.$opts[' udiratts '].'
- s , -- udirfailst < time >
This option defines how long the script will wait after each failed attempt
at fetching a chunk of users’ info from the profile directory ( see above )
before retrying .
DEFAULT : '.ght($opts[' udirfailst '],$ghtsa).'
- t , -- timeout < time >
Sets the timeout for every connection attempt . See section «TIME
SPECIFICATION» below to see how to specify time .
DEFAULT : '.ght($opts[' timeout '],$ghtsa).'
- d , -- dryrun
If this option is set , the script won’ t write anything in the database .
- L , -- logminmsglev < «debug» | «info» | «warning» | «error» | «none» >
Defines the minimum “importance level” of messages to be written into the
log file «run / [ instance hostname ] . log» . There are 4 “importance levels” , in
this order of importance : «debug» , «info» , «warning» , «error» .
Setting this option to any of these values will write into the logfile all
the messages with the specified or a greater level ; setting it to the
special value «none» will completely disable logging to file .
DEFAULT : '.$msglevs[$opts[' logminmsglev ']].'
- T , -- tuiminmsglev < «debug» | «info» | «warning» | «error» | «none» >
Defines the minimum “importance level” of messages to be written to the
terminal . See the option above to understand how this works .
DEFAULT : '.$msglevs[$opts[' tuiminmsglev ']].'
- h , -- help
If this option is set , the script will show this help text and exit .
TIME SPECIFICATION
An example is better than ~ 5148 words :- )
To specify 1 year , 6 months ( made of 31 days ), 2 weeks , 3 days , 5 hours ,
7 minutes and 12 seconds you can use «1y , 6 M , 2 w , 3 d , 5 h , 7 m , 12 s» ; but you can
also use «12s , 7 m , 5 h , 3 d , 2 w , 6 M , 1 y» , or even «18M , 1 w , 1 w , 2 d , 1 d , 3 h , 2 h , 7 m , 12 s» .
LICENSE
This program comes with ABSOLUTELY NO WARRANTY ; for details see the source .
This is free software , and you are welcome to redistribute it under certain
conditions ; see < http :// www . gnu . org / licenses /> for details . ' . N ;
for ( $i = 1 ; $i < $argc ; $i ++ ) {
if ( $argv [ $i ] == '-f' || $argv [ $i ] == '--fetchusers' ) {
$opts [ 'fetchusers' ] = true ;
} elseif ( $argv [ $i ] == '-r' || $argv [ $i ] == '--udiratts' ) {
if ( $i + 1 >= $argc || preg_match ( '/^\d+$/' , $argv [ $i + 1 ]) !== 1 || $argv [ $i + 1 ] + 0 < 1 )
mexit ( 'option «' . $argv [ $i ] . '» requires a number > 1 as an argument (use «-h» to read help).' . N , 1 );
$i ++ ;
$opts [ 'udiratts' ] = $argv [ $i ] + 0 ;
} elseif ( $argv [ $i ] == '-s' || $argv [ $i ] == '--udirfailst' ) {
if ( $i + 1 >= $argc || parsetime ( $argv [ $i + 1 ]) === false )
mexit ( 'option «' . $argv [ $i ] . '» requires a time specification as an argument (use «-h» to read help).' . N , 1 );
$i ++ ;
$opts [ 'udirfailst' ] = parsetime ( $argv [ $i ]);
} elseif ( $argv [ $i ] == '-t' || $argv [ $i ] == '--timeout' ) {
if ( $i + 1 >= $argc || parsetime ( $argv [ $i + 1 ]) === false )
mexit ( 'option «' . $argv [ $i ] . '» requires a time specification as an argument (use «-h» to read help).' . N , 1 );
$i ++ ;
$opts [ 'timeout' ] = parsetime ( $argv [ $i ]);
} elseif ( $argv [ $i ] == '-D' || $argv [ $i ] == '--deadline' ) {
if ( $i + 1 >= $argc || parsetime ( $argv [ $i + 1 ]) === false )
mexit ( 'option «' . $argv [ $i ] . '» requires a time specification as an argument (use «-h» to read help).' . N , 1 );
$i ++ ;
$opts [ 'deadline' ] = parsetime ( $argv [ $i ]);
} elseif ( $argv [ $i ] == '-l' || $argv [ $i ] == '--ldtoots' ) {
if ( $i + 1 >= $argc || preg_match ( '/^\d+$/' , $argv [ $i + 1 ]) !== 1 || $argv [ $i + 1 ] + 0 > 40 || $argv [ $i + 1 ] + 0 < 10 )
mexit ( 'option «' . $argv [ $i ] . '» requires a number >= 10 and <= 40 as an argument (use «-h» to read help).' . N , 1 );
$i ++ ;
$opts [ 'ldtoots' ] = $argv [ $i ] + 0 ;
} elseif ( $argv [ $i ] == '-d' || $argv [ $i ] == '--dryrun' ) {
$opts [ 'dryrun' ] = true ;
} elseif ( $argv [ $i ] == '-L' || $argv [ $i ] == '--logminmsglev' ) {
if ( $i + 1 >= $argc || ! in_array ( strtolower ( $argv [ $i + 1 ]), $msglevs ))
2022-12-25 11:29:34 +01:00
mexit ( 'option «' . $argv [ $i ] . '» requires a “message importance level” value as an argument (use «-h» to read help).' . N , 1 );
2022-12-17 15:00:36 +01:00
$i ++ ;
$opts [ 'logminmsglev' ] = array_search ( strtolower ( $argv [ $i ]), $msglevs );
} elseif ( $argv [ $i ] == '-T' || $argv [ $i ] == '--tuiminmsglev' ) {
if ( $i + 1 >= $argc || ! in_array ( strtolower ( $argv [ $i + 1 ]), $msglevs ))
2022-12-25 11:29:34 +01:00
mexit ( 'option «' . $argv [ $i ] . '» requires a “message importance level” value as an argument (use «-h» to read help).' . N , 1 );
2022-12-17 15:00:36 +01:00
$i ++ ;
$opts [ 'tuiminmsglev' ] = array_search ( strtolower ( $argv [ $i ]), $msglevs );
} elseif ( $argv [ $i ] == '-h' || $argv [ $i ] == '--help' ) {
echo ( $help );
exit ( 0 );
} elseif ( is_null ( $opts [ 'hostname' ]) && $argv [ $i ][ 0 ] !== '-' ) {
$opts [ 'hostname' ] = $argv [ $i ];
} else {
mexit ( 'don’ t know how to interpret «' . $argv [ $i ] . '», please read the help text using «-h» or «--help».' . N , 1 );
}
}
if ( is_null ( $opts [ 'hostname' ])) mexit ( 'you didn’ t specify an hostname (you can read the help text using «-h» or «--help»).' . N , 1 );
foreach ( $msglevs as $key => $val ) $msglevs [ $key ] = ucfirst ( $val );
$inifp = __DIR__ . '/../conf/mustard.ini' ;
$iniarr =@ parse_ini_file ( $inifp )
or mexit ( 'could not open config file «' . $inifp . '»' . N , 1 );
2022-12-18 18:26:03 +01:00
try { $link =@ mysqli_connect ( $iniarr [ 'db_host' ], $iniarr [ 'db_admin_name' ], $iniarr [ 'db_admin_password' ], $iniarr [ 'db_name' ], $iniarr [ 'db_port' ], $iniarr [ 'db_socket' ]); }
catch ( Exception $error ) { mexit ( 'could not connect to MySQL server: ' . mysqli_connect_error () . '.' . N , 1 , true ); }
// for php versions < 8
if ( $link === false ) mexit ( 'could not connect to MySQL server: ' . mysqli_connect_error () . '.' . N , 1 , true );
try { $res = mysqli_set_charset ( $link , 'utf8mb4' ); }
catch ( Exception $error ) { mexit ( 'could not set «utf8mb4» charset for MySQL: ' . mysqli_error ( $link ) . '.' . N , 1 , true ); }
// for php versions < 8
if ( $res === false ) mexit ( 'could not set MySQL charset: ' . mysqli_errno ( $link ) . ': ' . mysqli_error ( $link ) . '.' . N , 1 , true );
2022-12-17 15:00:36 +01:00
2022-12-21 22:06:10 +01:00
$mastodons = [];
$res = myq ( $link , 'SELECT Name FROM Platforms WHERE Consider=1' , __LINE__ );
while ( $row = mysqli_fetch_assoc ( $res ))
$mastodons [] = preg_quote ( $row [ 'Name' ], '/' );
if ( count ( $mastodons ) < 1 ) mexit ( 'in table «Platforms», there is no platform to be considered!' . N , 1 );
$mastodons = implode ( '|' , $mastodons );
2022-12-17 15:00:36 +01:00
$tables = tables ( $link );
//print_r($tables);
2022-12-17 17:33:46 +01:00
if ( $opts [ 'logminmsglev' ] < 4 ) {
$logfp = __DIR__ . '/run/' . $opts [ 'hostname' ] . '.log' ;
$logf =@ fopen ( $logfp , 'w' );
if ( $logf === false ) mexit ( 'could not open file «' . $logfp . '» in write mode.' . N , 1 );
}
2022-12-17 15:00:36 +01:00
2022-12-20 23:00:22 +01:00
$instints = [ 'ID' , 'FirstSeen' , 'IsMastodon' , 'Dead' , 'Priority' , 'Visible' , 'Noxious' , 'NoxLastModTS' , 'LocalityID' , 'OurLangsLock' , 'UserCount' , 'StatusCount' , 'DomainCount' , 'ActiveUsersMonth' , 'ActiveUsersHalfYear' , 'RegOpen' , 'RegReqApproval' , 'MaxTootChars' , 'AdmCreatedAt' , 'LastCheckOk' , 'GuestID' , 'LastGuestEdit' , 'InsertTS' , 'RPos' ];
$idata = [];
$res = myq ( $link , 'SHOW COLUMNS FROM Instances' , __FILE__ );
while ( $row = mysqli_fetch_assoc ( $res ))
$idata [ $row [ 'Field' ]] = $row [ 'Default' ];
// since we later need to determine if a value is an integer, and mysql returns integers as strings...
setint ( $instints , $idata );
$idata [ 'URI' ] = $opts [ 'hostname' ];
$instanswered = false ;
2022-12-17 15:00:36 +01:00
$now = time ();
2022-12-20 23:00:22 +01:00
/*
* Nodeinfo ( 'https://' . $opts [ 'hostname' ] . '/nodeinfo/2.0.json' ) was added in v3 . 0.0
* Trends ( 'https://' . $opts [ 'hostname' ] . '/api/v1/trends' ) was added in v3 . 0.0
* Activity ( 'https://' . $opts [ 'hostname' ] . '/api/v1/instance/activity' ) was added in v2 . 1.2
*/
2022-12-17 15:00:36 +01:00
eecho ( 1 , '[[[ Working on «' . $opts [ 'hostname' ] . '» ]]]' . N );
2022-12-20 23:00:22 +01:00
if ( willtrunc ( $opts [ 'hostname' ], 'Instances' , 'URI' ))
mexit ( '«' . $opts [ 'hostname' ] . '»: ignoring it because hostname is too long for the «URI» column of «Instances» table.' . N , 2 );
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: trying to fetch its info from the database...' . N );
$res = myq ( $link , 'SELECT * FROM Instances WHERE URI=\'' . myesc ( $link , $opts [ 'hostname' ]) . '\'' , __LINE__ );
$count = mysqli_num_rows ( $res );
if ( $count > 1 ) {
$msg = '«' . $opts [ 'hostname' ] . '»: there are ' . $count . ' records with this URI in Instances table.' ;
notify ( $msg , 3 , false );
mexit ( $msg . N , 3 );
} elseif ( $count == 1 ) {
eecho ( 1 , '«' . $opts [ 'hostname' ] . '»: found 1 record with this URI in Instances table.' . N );
$oidata = mysqli_fetch_assoc ( $res );
setint ( $instints , $oidata );
2022-12-17 15:00:36 +01:00
} else {
2022-12-20 23:00:22 +01:00
eecho ( 1 , '«' . $opts [ 'hostname' ] . '»: found no record with this URI in Instances table.' . N );
$oidata = null ;
}
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: trying to fetch nodeinfo specs on https...' . N );
$buf =@ gurl ( 'https://' . $opts [ 'hostname' ] . '/.well-known/nodeinfo' , $opts [ 'timeout' ]);
if ( $buf [ 'cont' ] === false ) {
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: trying to fetch nodeinfo specs on http...' . N );
$buf =@ gurl ( 'http://' . $opts [ 'hostname' ] . '/.well-known/nodeinfo' , $opts [ 'timeout' ]);
}
if ( $buf [ 'cont' ] !== false ) {
$buf =@ json_decode ( $buf [ 'cont' ], true );
if ( is_array ( $buf )) {
if ( isset ( $buf [ 'links' ]) && is_array ( $buf [ 'links' ]) && count ( $buf [ 'links' ]) > 0 ) {
$ok = true ;
$nirefs = [];
foreach ( $buf [ 'links' ] as $key => $niref ) {
if ( isset ( $niref [ 'rel' ]) && isset ( $niref [ 'href' ])) {
$nirefs [ $niref [ 'rel' ]] = $niref [ 'href' ];
} else {
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: nodeinfo specs “links” entitity ' . $key . ' has unexpected format.' . N );
$ok = false ;
}
2022-12-17 15:00:36 +01:00
}
2022-12-20 23:00:22 +01:00
if ( $ok ) {
krsort ( $nirefs );
$niref = array_shift ( $nirefs );
eecho ( 1 , '«' . $opts [ 'hostname' ] . '»: got and successfully parsed nodeinfo specs :-)' . N );
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: trying to fetch nodeinfo data...' . N );
$buf =@ gurl ( $niref , $opts [ 'timeout' ]);
if ( $buf [ 'cont' ] !== false ) {
$buf =@ json_decode ( $buf [ 'cont' ], true );
if ( is_array ( $buf )) {
eecho ( 1 , '«' . $opts [ 'hostname' ] . '»: got nodeinfo data :-)' . N );
$instanswered = true ;
if ( isset ( $buf [ 'software' ][ 'name' ]) && is_string ( $buf [ 'software' ][ 'name' ]) && ! isempty ( $buf [ 'software' ][ 'name' ])) {
$idata [ 'Software' ] = trim ( $buf [ 'software' ][ 'name' ]);
2022-12-21 06:53:31 +01:00
( preg_match ( '/^' . $mastodons . '/' , $idata [ 'Software' ]) === 1 ) ? $idata [ 'IsMastodon' ] = true : $idata [ 'IsMastodon' ] = false ;
2022-12-20 23:00:22 +01:00
$res = myq ( $link , 'SELECT Name FROM Platforms WHERE Name=\'' . myesc ( $link , $idata [ 'Software' ]) . '\'' , __LINE__ );
if ( mysqli_num_rows ( $res ) < 1 ) {
2022-12-21 06:53:31 +01:00
if ( ! $opts [ 'dryrun' ]) myq ( $link , 'INSERT INTO Platforms (Name) VALUES (\'' . myesc ( $link , truncs ( $idata [ 'Software' ], 'Platforms' , 'Name' , '«' . $opts [ 'hostname' ] . '»' )) . '\')' , __LINE__ );
notify ( '«' . $opts [ 'hostname' ] . '» runs on «' . $idata [ 'Software' ] . '», which was not present in the «Platforms» table, so it was added there. It would be good to check whether it is a Mastodon derivate and how compatible it is, to decide whether to consider instances using it as Mastodon instances by setting the «Consider» field of its record to «1».' , 2 );
2022-12-17 15:00:36 +01:00
}
}
2022-12-20 23:00:22 +01:00
if ( isset ( $buf [ 'software' ][ 'version' ]) && is_string ( $buf [ 'software' ][ 'version' ]) && ! isempty ( $buf [ 'software' ][ 'version' ]))
$idata [ 'Version' ] = trim ( $buf [ 'software' ][ 'version' ]);
if ( isset ( $buf [ 'usage' ][ 'users' ][ 'total' ]) && is_int ( $buf [ 'usage' ][ 'users' ][ 'total' ]))
$idata [ 'UserCount' ] = $buf [ 'usage' ][ 'users' ][ 'total' ];
if ( isset ( $buf [ 'usage' ][ 'users' ][ 'activeMonth' ]) && is_int ( $buf [ 'usage' ][ 'users' ][ 'activeMonth' ]))
$idata [ 'ActiveUsersMonth' ] = $buf [ 'usage' ][ 'users' ][ 'activeMonth' ];
if ( isset ( $buf [ 'usage' ][ 'users' ][ 'activeHalfyear' ]) && is_int ( $buf [ 'usage' ][ 'users' ][ 'activeHalfyear' ]))
$idata [ 'ActiveUsersHalfYear' ] = $buf [ 'usage' ][ 'users' ][ 'activeHalfyear' ];
if ( isset ( $buf [ 'usage' ][ 'localPosts' ]) && is_int ( $buf [ 'usage' ][ 'localPosts' ]))
$idata [ 'StatusCount' ] = $buf [ 'usage' ][ 'localPosts' ];
if ( isset ( $buf [ 'openRegistrations' ]) && is_bool ( $buf [ 'openRegistrations' ]))
$idata [ 'RegOpen' ] = b2i ( $buf [ 'openRegistrations' ]);
} else {
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: nodeinfo data was not good JSON.' . N );
2022-12-17 15:00:36 +01:00
}
} else {
2022-12-20 23:00:22 +01:00
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: could not fetch nodeinfo data: ' . $buf [ 'emsg' ] . '.' . N );
2022-12-17 15:00:36 +01:00
}
}
2022-12-20 23:00:22 +01:00
} else {
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: nodeinfo specs had unexpected format.' . N );
}
} else {
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: nodeinfo specs where not good JSON.' . N );
}
} else {
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: could not fetch nodeinfo specs: ' . $buf [ 'emsg' ] . '.' . N );
}
if ( $idata [ 'IsMastodon' ] && $idata [ 'Version' ] >= '4.0.0' ) {
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: trying to fetch instance info from API v2...' . N );
$buf =@ gurl ( 'https://' . $opts [ 'hostname' ] . '/api/v2/instance' , $opts [ 'timeout' ]);
if ( $buf [ 'cont' ] !== false ) {
ckratelimit ( $buf [ 'headers' ]);
$buf =@ json_decode ( $buf [ 'cont' ], true );
if ( is_array ( $buf )) {
eecho ( 1 , '«' . $opts [ 'hostname' ] . '»: got instance info from API v2 :-)' . N );
if ( isset ( $buf [ 'title' ]) && is_string ( $buf [ 'title' ]) && ! isempty ( $buf [ 'title' ]))
$idata [ 'Title' ] = trim ( $buf [ 'title' ]);
if ( isset ( $buf [ 'description' ]) && is_string ( $buf [ 'description' ]) && ! isempty ( $buf [ 'description' ]))
$idata [ 'ShortDesc' ] = trim ( $buf [ 'description' ]);
if ( isset ( $buf [ 'thumbnail' ][ 'url' ]) && is_string ( $buf [ 'thumbnail' ][ 'url' ]) && ! isempty ( $buf [ 'thumbnail' ][ 'url' ]))
$idata [ 'Thumb' ] = trim ( $buf [ 'thumbnail' ][ 'url' ]);
if ( isset ( $buf [ 'configuration' ][ 'statuses' ][ 'max_characters' ]) && is_int ( $buf [ 'configuration' ][ 'statuses' ][ 'max_characters' ]))
$idata [ 'MaxTootChars' ] = $buf [ 'configuration' ][ 'statuses' ][ 'max_characters' ];
if ( isset ( $buf [ 'registrations' ][ 'approval_required' ]) && is_bool ( $buf [ 'registrations' ][ 'approval_required' ]))
$idata [ 'RegReqApproval' ] = b2i ( $buf [ 'registrations' ][ 'approval_required' ]);
if ( isset ( $buf [ 'contact' ][ 'email' ]) && is_string ( $buf [ 'contact' ][ 'email' ]))
$idata [ 'Email' ] = trim ( $buf [ 'contact' ][ 'email' ]);
if ( isset ( $buf [ 'contact' ][ 'account' ][ 'noindex' ]) && is_bool ( $buf [ 'contact' ][ 'account' ][ 'noindex' ]) && ! $buf [ 'contact' ][ 'account' ][ 'noindex' ]) { // ......
if ( isset ( $buf [ 'contact' ][ 'account' ][ 'acct' ]) && is_string ( $buf [ 'contact' ][ 'account' ][ 'acct' ]) && ! isempty ( $buf [ 'contact' ][ 'account' ][ 'acct' ]))
$idata [ 'AdmAccount' ] = trim ( $buf [ 'contact' ][ 'account' ][ 'acct' ]);
if ( isset ( $buf [ 'contact' ][ 'account' ][ 'display_name' ]) && is_string ( $buf [ 'contact' ][ 'account' ][ 'display_name' ]) && ! isempty ( $buf [ 'contact' ][ 'account' ][ 'display_name' ]))
$idata [ 'AdmDisplayName' ] = trim ( $buf [ 'contact' ][ 'account' ][ 'display_name' ]);
if ( isset ( $buf [ 'contact' ][ 'account' ][ 'created_at' ]) && is_string ( $buf [ 'contact' ][ 'account' ][ 'created_at' ]) && ( $ts = strtotime ( $buf [ 'contact' ][ 'account' ][ 'created_at' ])) !== false )
$idata [ 'AdmCreatedAt' ] = $ts ;
if ( isset ( $buf [ 'contact' ][ 'account' ][ 'note' ]) && is_string ( $buf [ 'contact' ][ 'account' ][ 'note' ]) && ! isempty ( $buf [ 'contact' ][ 'account' ][ 'note' ]))
$idata [ 'AdmNote' ] = trim ( $buf [ 'contact' ][ 'account' ][ 'note' ]);
if ( isset ( $buf [ 'contact' ][ 'account' ][ 'url' ]) && is_string ( $buf [ 'contact' ][ 'account' ][ 'url' ]) && ! isempty ( $buf [ 'contact' ][ 'account' ][ 'url' ]))
$idata [ 'AdmURL' ] = trim ( $buf [ 'contact' ][ 'account' ][ 'url' ]);
if ( isset ( $buf [ 'contact' ][ 'account' ][ 'avatar' ]) && is_string ( $buf [ 'contact' ][ 'account' ][ 'avatar' ]) && ! isempty ( $buf [ 'contact' ][ 'account' ][ 'avatar' ]))
$idata [ 'AdmAvatar' ] = trim ( $buf [ 'contact' ][ 'account' ][ 'avatar' ]);
if ( isset ( $buf [ 'contact' ][ 'account' ][ 'header' ]) && is_string ( $buf [ 'contact' ][ 'account' ][ 'header' ]) && ! isempty ( $buf [ 'contact' ][ 'account' ][ 'header' ]))
$idata [ 'AdmHeader' ] = trim ( $buf [ 'contact' ][ 'account' ][ 'header' ]);
2022-12-17 15:00:36 +01:00
}
2022-12-20 23:00:22 +01:00
// domain_count is gone from api v2, and we won't resort to api v1 just to get it when ver. >= 4.0.0
if ( isset ( $buf [ 'languages' ]) && is_array ( $buf [ 'languages' ]))
$idata [ 'languages' ] = $buf [ 'languages' ];
if ( isset ( $buf [ 'rules' ]) && is_array ( $buf [ 'rules' ]))
foreach ( $buf [ 'rules' ] as $rule )
if ( isset ( $rule [ 'text' ]) && is_string ( $rule [ 'text' ]) && ! isempty ( $rule [ 'text' ]))
$idata [ 'rules' ][] = $rule [ 'text' ];
2022-12-17 15:00:36 +01:00
} else {
2022-12-20 23:00:22 +01:00
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: instance info fetched from API v2 were not good JSON.' . N );
2022-12-17 15:00:36 +01:00
}
} else {
2022-12-20 23:00:22 +01:00
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: could not fetch instance info from API v2: ' . $buf [ 'emsg' ] . '.' . N );
2022-12-17 15:00:36 +01:00
}
2022-12-20 23:00:22 +01:00
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: trying to fetch instance extended description from API v1...' . N );
$buf =@ gurl ( 'https://' . $opts [ 'hostname' ] . '/api/v1/instance/extended_description' , $opts [ 'timeout' ]);
if ( $buf [ 'cont' ] !== false ) {
ckratelimit ( $buf [ 'headers' ]);
$buf =@ json_decode ( $buf [ 'cont' ], true );
if ( is_array ( $buf )) {
eecho ( 1 , '«' . $opts [ 'hostname' ] . '»: got instance extended description from API v1 :-)' . N );
//print_r($buf);
if ( ! is_null ( $buf [ 'content' ]) && is_string ( $buf [ 'content' ]) && ! isempty ( $buf [ 'content' ]))
$idata [ 'LongDesc' ] = trim ( $buf [ 'content' ]);
} else {
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: instance extended description fetched from API v1 was not good JSON.' . N );
}
} else {
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: could not fetch instance extended description from API v1: ' . $buf [ 'emsg' ] . '.' . N );
2022-12-17 15:00:36 +01:00
}
2022-12-20 23:00:22 +01:00
} else { // we still try to fetch instance info from api v1, if ver. < 4.0.0, since it could be a mastodon instance older than 2.1.2, when nodeinfo was introduced
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: trying to fetch instance info from API v1...' . N );
$buf =@ gurl ( 'https://' . $opts [ 'hostname' ] . '/api/v1/instance' , $opts [ 'timeout' ]);
if ( $buf [ 'cont' ] !== false ) {
ckratelimit ( $buf [ 'headers' ]);
$buf =@ json_decode ( $buf [ 'cont' ], true );
if ( is_array ( $buf )) {
eecho ( 1 , '«' . $opts [ 'hostname' ] . '»: got instance info from API v1 :-)' . N );
//print_r($buf);
$instanswered = true ;
if ( isset ( $buf [ 'title' ]) && is_string ( $buf [ 'title' ]) && ! isempty ( $buf [ 'title' ]))
$idata [ 'Title' ] = trim ( $buf [ 'title' ]);
if ( isset ( $buf [ 'short_description' ]) && is_string ( $buf [ 'short_description' ]) && ! isempty ( $buf [ 'short_description' ]))
$idata [ 'ShortDesc' ] = trim ( $buf [ 'description' ]);
if ( isset ( $buf [ 'description' ]) && is_string ( $buf [ 'description' ]) && ! isempty ( $buf [ 'description' ]))
$idata [ 'LongDesc' ] = trim ( $buf [ 'description' ]);
if ( isset ( $buf [ 'email' ]) && is_string ( $buf [ 'email' ]))
$idata [ 'Email' ] = trim ( $buf [ 'email' ]);
2022-12-21 06:53:31 +01:00
// if nodeinfo did not respond, it could be mastodon < 3.0.0, and we would not have $idata['Version'] yet, so...
if ( ! isset ( $idata [ 'Version' ]) && isset ( $buf [ 'version' ]) && is_string ( $buf [ 'version' ]) && ! isempty ( $buf [ 'version' ]))
$idata [ 'Version' ] = trim ( $buf [ 'version' ]);
2022-12-20 23:00:22 +01:00
// if nodeinfo responded we should already have these 2 below, but nodeinfo could have not responded if instance ver. is < 3.0.0
if ( isset ( $buf [ 'stats' ][ 'user_count' ]) && is_int ( $buf [ 'stats' ][ 'user_count' ]))
$idata [ 'UserCount' ] = $buf [ 'stats' ][ 'user_count' ];
if ( isset ( $buf [ 'stats' ][ 'status_count' ]) && is_int ( $buf [ 'stats' ][ 'status_count' ]))
$idata [ 'StatusCount' ] = $buf [ 'stats' ][ 'status_count' ];
if ( isset ( $buf [ 'stats' ][ 'domain_count' ]) && is_int ( $buf [ 'stats' ][ 'domain_count' ]))
$idata [ 'DomainCount' ] = $buf [ 'stats' ][ 'domain_count' ];
if ( isset ( $buf [ 'thumbnail' ]) && is_string ( $buf [ 'thumbnail' ]) && ! isempty ( $buf [ 'thumbnail' ]))
$idata [ 'Thumb' ] = trim ( $buf [ 'thumbnail' ]);
if ( isset ( $buf [ 'max_toot_chars' ]) && is_int ( $buf [ 'max_toot_chars' ]))
$idata [ 'MaxTootChars' ] = $buf [ 'max_toot_chars' ];
elseif ( isset ( $buf [ 'configuration' ][ 'statuses' ][ 'max_characters' ]) && is_int ( $buf [ 'configuration' ][ 'statuses' ][ 'max_characters' ]))
$idata [ 'MaxTootChars' ] = $buf [ 'configuration' ][ 'statuses' ][ 'max_characters' ];
// if nodeinfo responded we should already have this 1 below, but nodeinfo could have not responded if instance ver. is < 3.0.0
if ( isset ( $buf [ 'registrations' ]) && is_bool ( $buf [ 'registrations' ]))
$idata [ 'RegOpen' ] = b2i ( $buf [ 'registrations' ]);
if ( isset ( $buf [ 'approval_required' ]) && is_bool ( $buf [ 'approval_required' ]))
$idata [ 'RegReqApproval' ] = b2i ( $buf [ 'approval_required' ]);
if ( isset ( $buf [ 'contact_account' ][ 'acct' ]) && is_string ( $buf [ 'contact_account' ][ 'acct' ]) && ! isempty ( $buf [ 'contact_account' ][ 'acct' ]))
$idata [ 'AdmAccount' ] = trim ( $buf [ 'contact_account' ][ 'acct' ]);
if ( isset ( $buf [ 'contact_account' ][ 'display_name' ]) && is_string ( $buf [ 'contact_account' ][ 'display_name' ]) && ! isempty ( $buf [ 'contact_account' ][ 'display_name' ]))
$idata [ 'AdmDisplayName' ] = trim ( $buf [ 'contact_account' ][ 'display_name' ]);
if ( isset ( $buf [ 'contact_account' ][ 'created_at' ]) && is_string ( $buf [ 'contact_account' ][ 'created_at' ]) && ( $ts = strtotime ( $buf [ 'contact_account' ][ 'created_at' ])) !== false )
$idata [ 'AdmCreatedAt' ] = $ts ;
if ( isset ( $buf [ 'contact_account' ][ 'note' ]) && is_string ( $buf [ 'contact_account' ][ 'note' ]) && ! isempty ( $buf [ 'contact_account' ][ 'note' ]))
$idata [ 'AdmNote' ] = trim ( $buf [ 'contact_account' ][ 'note' ]);
if ( isset ( $buf [ 'contact_account' ][ 'url' ]) && is_string ( $buf [ 'contact_account' ][ 'url' ]) && ! isempty ( $buf [ 'contact_account' ][ 'url' ]))
$idata [ 'AdmURL' ] = trim ( $buf [ 'contact_account' ][ 'url' ]);
if ( isset ( $buf [ 'contact_account' ][ 'avatar' ]) && is_string ( $buf [ 'contact_account' ][ 'avatar' ]) && ! isempty ( $buf [ 'contact_account' ][ 'avatar' ]))
$idata [ 'AdmAvatar' ] = trim ( $buf [ 'contact_account' ][ 'avatar' ]);
if ( isset ( $buf [ 'contact_account' ][ 'header' ]) && is_string ( $buf [ 'contact_account' ][ 'header' ]) && ! isempty ( $buf [ 'contact_account' ][ 'header' ]))
$idata [ 'AdmHeader' ] = trim ( $buf [ 'contact_account' ][ 'header' ]);
if ( isset ( $buf [ 'languages' ]) && is_array ( $buf [ 'languages' ]))
$idata [ 'languages' ] = $buf [ 'languages' ];
if ( isset ( $buf [ 'rules' ]) && is_array ( $buf [ 'rules' ]))
foreach ( $buf [ 'rules' ] as $rule )
if ( isset ( $rule [ 'text' ]) && is_string ( $rule [ 'text' ]) && ! isempty ( $rule [ 'text' ]))
$idata [ 'rules' ][] = $rule [ 'text' ];
// some falsing
if ( isset ( $buf [ 'pleroma' ])) $idata [ 'IsMastodon' ] = false ;
if ( isset ( $buf [ 'version' ]) && is_string ( $buf [ 'version' ]) && preg_match ( '#(pleroma|pixelfed)#i' , $buf [ 'version' ]) === 1 ) $idata [ 'IsMastodon' ] = false ;
2022-12-17 15:00:36 +01:00
} else {
2022-12-20 23:00:22 +01:00
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: instance info fetched from API v1 were not good JSON.' . N );
2022-12-17 15:00:36 +01:00
}
} else {
2022-12-20 23:00:22 +01:00
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: could not fetch instance info from API v1: ' . $buf [ 'emsg' ] . '.' . N );
}
}
2022-12-17 15:00:36 +01:00
2022-12-20 23:00:22 +01:00
if ( $idata [ 'IsMastodon' ] && ! is_null ( $idata [ 'Version' ]) && $idata [ 'Version' ] >= '2.1.2' ) {
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: trying to fetch instance activity info from API v1...' . N );
$buf =@ gurl ( 'https://' . $opts [ 'hostname' ] . '/api/v1/instance/activity' , $opts [ 'timeout' ]);
if ( $buf [ 'cont' ] !== false ) {
ckratelimit ( $buf [ 'headers' ]);
$buf =@ json_decode ( $buf [ 'cont' ], true );
if ( is_array ( $buf )) {
eecho ( 1 , '«' . $opts [ 'hostname' ] . '»: got instance activity info from API v1 :-)' . N );
$idata [ 'activity' ] = $buf ;
} else {
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: instance activity info from API v1 were not good JSON: ' . $buf [ 'emsg' ] . '.' . N );
}
} else {
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: could not fetch instance activity info from API v1: ' . $buf [ 'emsg' ] . '.' . N );
}
}
2022-12-17 15:00:36 +01:00
2022-12-20 23:00:22 +01:00
if ( $idata [ 'IsMastodon' ] && ! is_null ( $idata [ 'Version' ]) && $idata [ 'Version' ] >= '3.0.0' ) {
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: trying to fetch instance tags trends info from API v1...' . N );
$url = 'https://' . $opts [ 'hostname' ] . '/api/v1/trends' ;
if ( $idata [ 'Version' ] >= '3.5.0' ) $url .= '/tags' ;
$buf =@ gurl ( $url , $opts [ 'timeout' ]);
if ( $buf [ 'cont' ] !== false ) {
ckratelimit ( $buf [ 'headers' ]);
$buf =@ json_decode ( $buf [ 'cont' ], true );
if ( is_array ( $buf )) {
eecho ( 1 , '«' . $opts [ 'hostname' ] . '»: got instance tags trends info from API v1 :-)' . N );
$idata [ 'trends' ] = $buf ;
} else {
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: instance tags trends from API v1 were not good JSON: ' . $buf [ 'emsg' ] . '.' . N );
2022-12-17 15:00:36 +01:00
}
2022-12-20 23:00:22 +01:00
} else {
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: could not fetch instance tags trends from API v1: ' . $buf [ 'emsg' ] . '.' . N );
}
}
2022-12-17 15:00:36 +01:00
2022-12-20 23:00:22 +01:00
// finished fetching
if ( ! is_null ( $idata [ 'IsMastodon' ])) $idata [ 'IsMastodon' ] = b2i ( $idata [ 'IsMastodon' ]);
( $instanswered ) ? $idata [ 'LastCheckOk' ] = 1 : $idata [ 'LastCheckOk' ] = 0 ;
if ( is_null ( $oidata )) {
$query = 'INSERT INTO Instances SET ' ;
if ( $instanswered ) {
$idata [ 'FirstSeen' ] = $now ;
$idata [ 'InsertTS' ] = $now ;
}
} else {
$query = 'UPDATE Instances SET ' ;
$idata [ 'FirstSeen' ] = $oidata [ 'FirstSeen' ];
if ( $instanswered && is_null ( $oidata [ 'FirstSeen' ])) $idata [ 'FirstSeen' ] = $now ;
if ( ! $instanswered && $oidata [ 'Dead' ] == 0 ) {
// we check the last time instance responded, if ever
$res = myq ( $link , 'SELECT Time FROM InstChecks WHERE InstID=' . $oidata [ 'ID' ] . ' AND Status=1 ORDER BY Time DESC LIMIT 1' , __LINE__ );
// if instance never responded we consider the time of first check
if ( mysqli_num_rows ( $res ) == 0 )
$res = myq ( $link , 'SELECT Time FROM InstChecks WHERE InstID=' . $oidata [ 'ID' ] . ' ORDER BY Time ASC LIMIT 1' , __LINE__ );
if ( mysqli_num_rows ( $res ) > 0 ) {
$row = mysqli_fetch_assoc ( $res );
if ( $now - $row [ 'Time' ] > $opts [ 'deadline' ]) {
$idata [ 'Dead' ] = 1 ;
notify ( '«<a href="viewinst.php?id=' . $instid . '">' . $opts [ 'hostname' ] . '</a>» just died!' , 2 );
}
2022-12-23 19:13:37 +01:00
} /* else { // disabled since now we insert instances from peerscrawl.php directly
2022-12-22 11:28:29 +01:00
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: it exists in Instances table but there’ s no data about it in InstChecks!' . N );
2022-12-23 19:13:37 +01:00
} */
2022-12-20 23:00:22 +01:00
} else {
$idata [ 'Dead' ] = $oidata [ 'Dead' ];
}
$idata [ 'Priority' ] = $oidata [ 'Priority' ];
$idata [ 'Visible' ] = $oidata [ 'Visible' ];
$idata [ 'Noxious' ] = $oidata [ 'Noxious' ];
$idata [ 'NoxReason' ] = $oidata [ 'NoxReason' ];
$idata [ 'NoxLastModTS' ] = $oidata [ 'NoxLastModTS' ];
$idata [ 'OurDesc' ] = $oidata [ 'OurDesc' ];
$idata [ 'OurDescEN' ] = $oidata [ 'OurDescEN' ];
$idata [ 'LocalityID' ] = $oidata [ 'LocalityID' ];
$idata [ 'OurLangsLock' ] = $oidata [ 'OurLangsLock' ];
$idata [ 'GuestID' ] = $oidata [ 'GuestID' ];
$idata [ 'LastGuestEdit' ] = $oidata [ 'LastGuestEdit' ];
$idata [ 'InsertTS' ] = $oidata [ 'InsertTS' ];
$idata [ 'RPos' ] = $oidata [ 'RPos' ];
if ( ! $instanswered ) {
$idata [ 'IsMastodon' ] = $oidata [ 'IsMastodon' ];
$idata [ 'Title' ] = $oidata [ 'Title' ];
$idata [ 'ShortDesc' ] = $oidata [ 'ShortDesc' ];
$idata [ 'LongDesc' ] = $oidata [ 'LongDesc' ];
$idata [ 'Email' ] = $oidata [ 'Email' ];
$idata [ 'Software' ] = $oidata [ 'Software' ];
$idata [ 'Version' ] = $oidata [ 'Version' ];
$idata [ 'UserCount' ] = $oidata [ 'UserCount' ];
$idata [ 'StatusCount' ] = $oidata [ 'StatusCount' ];
$idata [ 'DomainCount' ] = $oidata [ 'DomainCount' ];
$idata [ 'ActiveUsersMonth' ] = $oidata [ 'ActiveUsersMonth' ];
$idata [ 'ActiveUsersHalfYear' ] = $oidata [ 'ActiveUsersHalfYear' ];
$idata [ 'Thumb' ] = $oidata [ 'Thumb' ];
$idata [ 'RegOpen' ] = $oidata [ 'RegOpen' ];
$idata [ 'RegReqApproval' ] = $oidata [ 'RegReqApproval' ];
$idata [ 'MaxTootChars' ] = $oidata [ 'MaxTootChars' ];
$idata [ 'AdmAccount' ] = $oidata [ 'AdmAccount' ];
$idata [ 'AdmDisplayName' ] = $oidata [ 'AdmDisplayName' ];
$idata [ 'AdmCreatedAt' ] = $oidata [ 'AdmCreatedAt' ];
$idata [ 'AdmNote' ] = $oidata [ 'AdmNote' ];
$idata [ 'AdmURL' ] = $oidata [ 'AdmURL' ];
$idata [ 'AdmAvatar' ] = $oidata [ 'AdmAvatar' ];
$idata [ 'AdmHeader' ] = $oidata [ 'AdmHeader' ];
}
}
$set = [];
foreach ( $idata as $key => $val ) {
if ( in_array ( $key ,[ 'ID' , 'languages' , 'rules' , 'activity' , 'trends' ])) {
true ; // do nothing
} elseif ( is_null ( $val )) {
$set [] = $key . '=NULL' ;
} elseif ( is_int ( $val )) {
if ( willtrunc ( $val , 'Instances' , $key )) {
$msg = '«' . $opts [ 'hostname' ] . '»: value «' . $val . '» is less than min. admitted value or greater than max. admitted value for column «' . $key . '» of table «Instances». Shutting down.' ;
notify ( $msg , 3 , false );
mexit ( $msg . N , 2 );
2022-12-17 15:00:36 +01:00
}
2022-12-20 23:00:22 +01:00
$set [] = $key . '=' . $val ;
} elseif ( is_string ( $val )) {
if ( willtrunc ( $val , 'Instances' , $key )) {
$msg = '«' . $opts [ 'hostname' ] . '»: value «' . nocrnl ( $val ) . '» is too long for column «' . $key . '» of table «Instances». Shutting down.' ;
notify ( $msg , 3 , false );
mexit ( $msg . N , 2 );
2022-12-17 15:00:36 +01:00
}
2022-12-20 23:00:22 +01:00
$set [] = $key . '=\'' . myesc ( $link , $val ) . '\'' ;
} else {
mexit ( '$idata[\'' . $key . '\'] value has unmanaged type, see code around line ' . __LINE__ . '.' . N , 3 );
}
}
2022-12-17 15:00:36 +01:00
2022-12-20 23:00:22 +01:00
$query .= implode ( ', ' , $set );
2022-12-17 15:00:36 +01:00
2022-12-20 23:00:22 +01:00
if ( ! is_null ( $oidata )) $query .= ' WHERE ID=' . $oidata [ 'ID' ];
2022-12-17 15:00:36 +01:00
2022-12-20 23:00:22 +01:00
eecho ( 1 , 'query: «' . $query . '».' . N );
if ( ! $opts [ 'dryrun' ]) myq ( $link , $query , __LINE__ );
2022-12-17 15:00:36 +01:00
2022-12-20 23:00:22 +01:00
if ( is_null ( $oidata )) {
2022-12-17 15:00:36 +01:00
2022-12-20 23:00:22 +01:00
( ! $opts [ 'dryrun' ]) ? $instid = mysqli_insert_id ( $link ) : $instid = 0 ;
2022-12-17 15:00:36 +01:00
2022-12-20 23:00:22 +01:00
notify ( '«<a href="viewinst.php?id=' . $instid . '">' . $opts [ 'hostname' ] . '</a>» is a NEW instance! :-)' , 1 );
$instlangs = langs ( $instid , $opts [ 'hostname' ], false , __LINE__ );
if ( ! $opts [ 'dryrun' ])
foreach ( $instlangs as $row )
myq ( $link , 'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES (' . $row [ 'InstID' ] . ', ' . $row [ 'LangID' ] . ', ' . $row [ 'Pos' ] . ')' , __LINE__ );
$instourlangs = langs ( $instid , $opts [ 'hostname' ], true , __LINE__ );
// if instourlangs is empty and instlangs is not, set instourlangs as instlangs
if ( count ( $instourlangs ) == 0 && count ( $instlangs ) > 0 )
$instourlangs = $instlangs ;
if ( ! $opts [ 'dryrun' ])
foreach ( $instourlangs as $row )
myq ( $link , 'INSERT INTO InstOurLangs (InstID, OurLangID, Pos) VALUES (' . $row [ 'InstID' ] . ', ' . $row [ 'LangID' ] . ', ' . $row [ 'Pos' ] . ')' , __LINE__ );
} else {
$instid = $oidata [ 'ID' ];
$res = myq ( $link , 'SELECT InstID, LangID, Pos, Code FROM InstLangs LEFT JOIN Languages ON Languages.ID=LangID WHERE InstID=' . $instid . ' ORDER BY Pos ASC' , __LINE__ );
$oldinstlangs = [];
while ( $row = mysqli_fetch_assoc ( $res ))
$oldinstlangs [] = $row ;
$instlangs = langs ( $instid , $opts [ 'hostname' ], false , __LINE__ );
if ( $instlangs != $oldinstlangs && ! $opts [ 'dryrun' ]) {
myq ( $link , 'DELETE FROM InstLangs WHERE InstID=' . $instid , __LINE__ );
foreach ( $instlangs as $row )
myq ( $link , 'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES (' . $row [ 'InstID' ] . ', ' . $row [ 'LangID' ] . ', ' . $row [ 'Pos' ] . ')' , __LINE__ );
}
if ( $idata [ 'OurLangsLock' ] == 0 ) {
$instourlangs = langs ( $instid , $opts [ 'hostname' ], true , __LINE__ );
// if instourlangs is empty and instlangs is not, set instourlangs as instlangs
if ( count ( $instourlangs ) == 0 && count ( $instlangs ) > 0 )
$instourlangs = $instlangs ;
if ( count ( $instourlangs ) > 0 && ! $opts [ 'dryrun' ]) {
myq ( $link , 'DELETE FROM InstOurLangs WHERE InstID=' . $instid , __LINE__ );
foreach ( $instourlangs as $row )
myq ( $link , 'INSERT INTO InstOurLangs (InstID, OurLangID, Pos) VALUES (' . $row [ 'InstID' ] . ', ' . $row [ 'LangID' ] . ', ' . $row [ 'Pos' ] . ')' , __LINE__ );
2022-12-17 15:00:36 +01:00
}
2022-12-20 23:00:22 +01:00
}
2022-12-17 15:00:36 +01:00
2022-12-20 23:00:22 +01:00
}
// from here we know for sure $instid
if ( ! $opts [ 'dryrun' ]) myq ( $link , 'INSERT INTO InstChecks (InstID, Time, Status) VALUES (' . $instid . ', ' . $now . ', ' . $idata [ 'LastCheckOk' ] . ')' , __LINE__ );
if ( isset ( $idata [ 'activity' ]) && is_array ( $idata [ 'activity' ])) {
if ( ! $opts [ 'dryrun' ]) {
myq ( $link , 'DELETE FROM InstActivity WHERE InstID=' . $instid , __LINE__ );
$pos = 0 ;
foreach ( $idata [ 'activity' ] as $buf ) {
2022-12-22 07:40:41 +01:00
// these should all be int, but mastodon represents them as strings
if ( isset ( $buf [ 'week' ]) && is_string ( $buf [ 'week' ]) && preg_match ( '/^\d+$/' , $buf [ 'week' ]) === 1 && isset ( $buf [ 'statuses' ]) && is_string ( $buf [ 'statuses' ]) && preg_match ( '/^\d+$/' , $buf [ 'statuses' ]) === 1 && isset ( $buf [ 'logins' ]) && is_string ( $buf [ 'logins' ]) && preg_match ( '/^\d+$/' , $buf [ 'logins' ]) === 1 && isset ( $buf [ 'registrations' ]) && is_string ( $buf [ 'registrations' ]) && preg_match ( '/^\d+$/' , $buf [ 'registrations' ]) === 1 ) {
2022-12-20 23:00:22 +01:00
$pos ++ ;
2022-12-21 07:54:11 +01:00
myq ( $link , 'INSERT INTO InstActivity (InstID, Week, Statuses, Logins, Registrations, Pos) VALUES (' . $instid . ', ' . $buf [ 'week' ] . ', ' . $buf [ 'statuses' ] . ', ' . $buf [ 'logins' ] . ', ' . $buf [ 'registrations' ] . ', ' . $pos . ')' , __LINE__ );
2022-12-17 15:00:36 +01:00
}
}
2022-12-20 23:00:22 +01:00
}
}
2022-12-17 15:00:36 +01:00
2022-12-20 23:00:22 +01:00
if ( isset ( $idata [ 'trends' ]) && is_array ( $idata [ 'trends' ])) {
$trends = [];
foreach ( $idata [ 'trends' ] as $buf ) {
2022-12-21 07:54:11 +01:00
if ( isset ( $buf [ 'name' ]) && is_string ( $buf [ 'name' ]) && isset ( $buf [ 'url' ]) && is_string ( $buf [ 'url' ]) && isset ( $buf [ 'history' ]) && is_array ( $buf [ 'history' ])) {
2022-12-20 23:00:22 +01:00
$trend = 0 ;
foreach ( $buf [ 'history' ] as $row ) {
2022-12-23 11:22:25 +01:00
// below, we check for "stringness" because, they should be integers, but they are strings
if ( isset ( $row [ 'day' ]) && is_string ( $row [ 'day' ]) && isset ( $row [ 'uses' ]) && is_string ( $row [ 'uses' ]) && isset ( $row [ 'accounts' ]) && is_string ( $row [ 'uses' ])) {
$row [ 'day' ] += 0 ;
$row [ 'uses' ] += 0 ;
$row [ 'accounts' ] += 0 ;
$trend += $row [ 'accounts' ];
2022-12-21 07:54:11 +01:00
}
2022-12-17 15:00:36 +01:00
}
}
2022-12-23 11:22:25 +01:00
$trends [] = [
'InstID' => $instid ,
'LastDay' => $buf [ 'history' ][ 0 ][ 'day' ],
'Name' => $buf [ 'name' ],
'URL' => $buf [ 'url' ],
'Pos' => null ,
'trend' => $trend
];
2022-12-20 23:00:22 +01:00
}
//print_r($trends);
2022-12-23 11:22:25 +01:00
mdasortbykey ( $trends , 'trend' , true );
2022-12-20 23:00:22 +01:00
if ( ! $opts [ 'dryrun' ]) myq ( $link , 'DELETE FROM InstTrends WHERE InstID=' . $instid , __LINE__ );
$pos = 0 ;
foreach ( $trends as $trend ) {
$pos ++ ;
2022-12-21 07:54:11 +01:00
$query = 'INSERT INTO InstTrends (InstID, LastDay, Name, URL, Pos) VALUES (' . $trend [ 'InstID' ] . ', \'' . $trend [ 'LastDay' ] . '\', \'' . myesc ( $link , truncs ( $trend [ 'Name' ], 'InstTrends' , 'Name' , '«' . $opts [ 'hostname' ] . '»' )) . '\', \'' . myesc ( $link , truncs ( $trend [ 'URL' ], 'InstTrends' , 'URL' , '«' . $opts [ 'hostname' ] . '»' )) . '\', ' . $pos . ')' ;
2022-12-20 23:00:22 +01:00
if ( ! $opts [ 'dryrun' ]) myq ( $link , $query , __LINE__ );
}
}
if ( $opts [ 'fetchusers' ] && $idata [ 'IsMastodon' ] && ! is_null ( $idata [ 'Version' ]) && $idata [ 'Version' ] >= '4.0.0' ) {
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: trying to fetch users info from directory API...' . N );
$users = []; // array of users in this instance's directory
$chunk = 0 ;
$limit = 40 ;
$end = false ;
while ( ! $end ) {
$offset = $chunk * $limit ;
for ( $att = 0 ; $att < $opts [ 'udiratts' ]; $att ++ ) {
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: trying to fetch chunk ' . ( $chunk + 1 ) . ' of users info from directory API (attempt ' . ( $att + 1 ) . '/' . $opts [ 'udiratts' ] . ')...' . N );
$buf =@ gurl ( 'https://' . $opts [ 'hostname' ] . '/api/v1/directory?local=1&order=new&limit=' . $limit . '&offset=' . $offset , $opts [ 'timeout' ]);
if ( $buf [ 'cont' ] !== false ) {
$xrlr = ckratelimit ( $buf [ 'headers' ]);
eecho ( 1 , '«' . $opts [ 'hostname' ] . '»: got chunk ' . ( $chunk + 1 ) . ' of users info from directory API on attempt ' . ( $att + 1 ) . '/' . $opts [ 'udiratts' ] . ' (xrlr: ' . $xrlr . ') :-)' . N );
$buf =@ json_decode ( $buf [ 'cont' ], true );
if ( is_array ( $buf )) {
//print_r($buf);
if ( count ( $buf ) < $limit ) $end = true ;
/* if ( count ( $buf ) > 0 && ! array_key_exists ( 'noindex' , $buf [ 0 ])) {
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: account entities reported by directory api endpoint don’ t have a “noindex” attribute; skipping directory fetching.' . N );
break ;
} else {
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: account entities reported by directory api endpoint do have a “noindex” attribute; continuing with directory fetching.' . N );
} */
//foreach ($buf as $user) echo($user['username'].' '); echo(N.N);
foreach ( $buf as $user ) {
if ( make ([ 'id' , 'username' , 'display_name' , 'locked' , 'bot' , 'discoverable' , 'created_at' , 'note' , 'url' , 'avatar' , 'header' , 'statuses_count' , 'last_status_at' , 'fields' , 'noindex' ], $user )) {
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: working on user «' . $user [ 'username' ] . '»...' . N );
2022-12-23 19:13:37 +01:00
// disabled because it takes too long on instances with many users; that's why we added "$idata['Version']>='4.0.0'" as a condition to the root "if" statement and "noindex" to the checked keys in the "if" statement above (ver. >= 4.0.0 do report "noindex" for account entities)
2022-12-20 23:00:22 +01:00
/* if ( ! isset ( $user [ 'noindex' ])) {
$user [ 'noindex' ] = true ;
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: «' . $user [ 'username' ] . '»: «noindex» is undefined, trying to define it by fetching user’ s profile page...' . N );
$page = gurl ( $user [ 'url' ], $opts [ 'timeout' ]);
// here ckratelimit is not needed because it's a normal web page, not json from mastodon api
if ( $page [ 'cont' ] !== false ) {
//<meta content='noindex, noarchive' name='robots'>
if ( preg_match ( '/<meta\s+content=[\'"](noindex|noarchive)/ui' , $page [ 'cont' ]) !== 1 ) {
$user [ 'noindex' ] = false ;
eecho ( 0 , '«' . $user [ 'url' ] . '»: «noindex» is not set.' . N );
} else {
eecho ( 0 , '«' . $user [ 'url' ] . '»: «noindex» is set.' . N );
}
} else {
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: could not fetch «' . $user [ 'url' ] . '»: ' . $page [ 'emsg' ] . N );
}
2022-12-17 15:00:36 +01:00
} */
2022-12-20 23:00:22 +01:00
$snote = strip_tags ( $user [ 'note' ]);
if ( preg_match ( '/(?<!\w)#(nobots?|noindex)(?!\w)/iu' , $snote ) === 1 ) $user [ 'noindex' ] = true ;
if ( preg_match ( '/(?<!\w)#(okindex|yesindex|doindex|okmhindex)(?!\w)/iu' , $snote ) === 1 ) $user [ 'noindex' ] = false ;
2022-12-23 19:13:37 +01:00
// disabled; see previous comment
2022-12-20 23:00:22 +01:00
/* $user [ 'tags' ] = [];
if ( ! $user [ 'noindex' ] && ! is_null ( $idata [ 'Version' ]) && $idata [ 'Version' ] >= '3.3.0' ) {
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: trying to fetch tags for user «' . $user [ 'username' ] . '»...' . N );
$tags =@ gurl ( 'https://' . $opts [ 'hostname' ] . '/api/v1/accounts/' . $user [ 'id' ] . '/featured_tags' , $opts [ 'timeout' ]);
if ( $tags [ 'cont' ] !== false ) {
ckratelimit ( $tags [ 'headers' ]);
$tags =@ json_decode ( $tags [ 'cont' ], true );
if ( is_array ( $tags ) && count ( $tags ) > 0 ) {
eecho ( 1 , '«' . $opts [ 'hostname' ] . '»: got ' . count ( $tags ) . ' tag(s) for user «' . $user [ 'username' ] . '» :-)' . N );
foreach ( $tags as $tag ) $user [ 'tags' ][] = $tag [ 'name' ];
2022-12-17 15:00:36 +01:00
}
} else {
2022-12-20 23:00:22 +01:00
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: could not fetch tags for user «' . $user [ 'username' ] . '» :-( (' . $tags [ 'emsg' ] . ').' . N );
2022-12-17 15:00:36 +01:00
}
}
2022-12-20 23:00:22 +01:00
$user [ 'tags' ] = implode ( ';' , $user [ 'tags' ]);
if ( $user [ 'tags' ] == '' ) $user [ 'tags' ] = null ; */
$user [ 'tags' ] = null ;
if ( ! is_null ( $user [ 'created_at' ])) $user [ 'created_at' ] = strtotime ( $user [ 'created_at' ]);
if ( ! is_null ( $user [ 'last_status_at' ])) $user [ 'last_status_at' ] = datetots ( $user [ 'last_status_at' ]);
$users [ $user [ 'id' ]] = $user ;
2022-12-17 15:00:36 +01:00
} else {
2022-12-20 23:00:22 +01:00
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: user record missed some required keys :-(' . N );
//print_r($user);
2022-12-17 15:00:36 +01:00
}
}
2022-12-20 23:00:22 +01:00
break ;
} else {
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: ... but the chunk was not good JSON :-(' . N );
if ( $att == $opts [ 'udiratts' ] - 1 ) $end = true ;
}
} else {
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: could not fetch chunk ' . ( $chunk + 1 ) . ' of users info from directory API: ' . $buf [ 'emsg' ] . N );
if ( $att == $opts [ 'udiratts' ] - 1 ) {
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: last attempt (' . ( $att + 1 ) . '/' . $opts [ 'udiratts' ] . ') on chunk ' . ( $chunk + 1 ) . ' failed; i give up.' . N );
$end = true ;
} else {
eecho ( 2 , '«' . $opts [ 'hostname' ] . '»: attempt ' . ( $att + 1 ) . '/' . $opts [ 'udiratts' ] . ' on chunk ' . ( $chunk + 1 ) . ' failed; sleeping for ' . ght ( $opts [ 'udirfailst' ], $ghtsa ) . ' before retrying.' . N );
sleep ( $opts [ 'udirfailst' ]);
2022-12-17 15:00:36 +01:00
}
}
2022-12-20 23:00:22 +01:00
}
$chunk ++ ;
}
$totusers = count ( $users );
eecho ( 1 , '«' . $opts [ 'hostname' ] . '»: got ' . $totusers . ' users’ profiles.' . N );
if ( $totusers > 0 ) {
eecho ( 1 , '«' . $opts [ 'hostname' ] . '»: inserting/updating ' . $totusers . ' users’ profiles in the database.' . N );
$exusers = []; // array of this instance's users already existing in the db
$res = myq ( $link , 'SELECT ID, locid, username FROM Users WHERE InstID=' . $instid , __LINE__ );
while ( $row = mysqli_fetch_assoc ( $res )) $exusers [ $row [ 'locid' ]] = $row ;
foreach ( $users as $locid => $user ) {
$query = 'SET InstID=' . $instid . ', host=' . myv ( $link , $opts [ 'hostname' ]) . ', locid=' . myv ( $link , $user [ 'id' ]) . ', username=' . myv ( $link , truncs ( $user [ 'username' ], 'Users' , 'username' , '«' . $opts [ 'hostname' ] . '»: «' . $user [ 'username' ] . '»' )) . ', display_name=' . myv ( $link , truncs ( $user [ 'display_name' ], 'Users' , 'display_name' , '«' . $opts [ 'hostname' ] . '»: «' . $user [ 'username' ] . '»' )) . ', locked=' . myv ( $link , $user [ 'locked' ]) . ', bot=' . myv ( $link , $user [ 'bot' ]) . ', created_at=' . myv ( $link , $user [ 'created_at' ]) . ', note=' . myv ( $link , truncs ( $user [ 'note' ], 'Users' , 'note' , '«' . $opts [ 'hostname' ] . '»: «' . $user [ 'username' ] . '»' )) . ', url=' . myv ( $link , truncs ( $user [ 'url' ], 'Users' , 'url' , '«' . $opts [ 'hostname' ] . '»: «' . $user [ 'username' ] . '»' )) . ', avatar=' . myv ( $link , truncs ( $user [ 'avatar' ], 'Users' , 'avatar' , '«' . $opts [ 'hostname' ] . '»: «' . $user [ 'username' ] . '»' )) . ', header=' . myv ( $link , truncs ( $user [ 'header' ], 'Users' , 'header' , '«' . $opts [ 'hostname' ] . '»: «' . $user [ 'username' ] . '»' )) . ', statuses_count=' . myv ( $link , $user [ 'statuses_count' ]) . ', last_status_at=' . myv ( $link , $user [ 'last_status_at' ]) . ', tags=' . myv ( $link , truncs ( $user [ 'tags' ], 'Users' , 'tags' , '«' . $opts [ 'hostname' ] . '»: «' . $user [ 'username' ] . '»' ));
$uid = 0 ;
if ( ! array_key_exists ( $user [ 'id' ], $exusers )) {
if ( ! $user [ 'noindex' ]) {
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: inserting new user «' . $user [ 'username' ] . '»...' . N );
$query = 'INSERT INTO Users ' . $query ;
if ( ! $opts [ 'dryrun' ]) {
myq ( $link , $query , __LINE__ );
$uid = mysqli_insert_id ( $link );
2022-12-17 15:00:36 +01:00
}
2022-12-20 23:00:22 +01:00
} else {
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: NOT inserting user «' . $user [ 'username' ] . '» because they don’ t want to be indexed...' . N );
2022-12-17 15:00:36 +01:00
}
2022-12-20 23:00:22 +01:00
} else {
$uid = $exusers [ $locid ][ 'ID' ];
if ( ! $user [ 'noindex' ]) {
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: updating existing user «' . $user [ 'username' ] . '» (' . $uid . ')...' . N );
$query = 'UPDATE Users ' . $query . ' WHERE ID=' . $uid ;
} else {
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: deleting existing user «' . $user [ 'username' ] . '» (' . $uid . ') because they don’ t want to be indexed...' . N );
$query = 'DELETE FROM Users WHERE ID=' . $uid ;
}
if ( ! $opts [ 'dryrun' ]) {
myq ( $link , $query , __LINE__ );
myq ( $link , 'DELETE FROM UsersFields WHERE UserID=' . $uid , __LINE__ );
}
}
if ( $uid != 0 && ! $user [ 'noindex' ] && is_array ( $user [ 'fields' ]) && count ( $user [ 'fields' ]) > 0 ) {
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: saving user fields for user «' . $user [ 'username' ] . '» (' . $uid . ')...' . N );
foreach ( $user [ 'fields' ] as $field ) {
( is_null ( $field [ 'verified_at' ])) ? $field [ 'verified_at' ] = 0 : $field [ 'verified_at' ] = 1 ;
$field [ 'name' ] = truncs ( $field [ 'name' ], 'UsersFields' , 'name' , '«' . $opts [ 'hostname' ] . '»: «' . $user [ 'username' ] . '»' );
$field [ 'value' ] = truncs ( $field [ 'value' ], 'UsersFields' , 'value' , '«' . $opts [ 'hostname' ] . '»: «' . $user [ 'username' ] . '»' );
if ( ! $opts [ 'dryrun' ]) myq ( $link , 'INSERT INTO UsersFields SET UserID=' . $uid . ', name=' . myv ( $link , $field [ 'name' ]) . ', value=' . myv ( $link , $field [ 'value' ]) . ', verified=' . $field [ 'verified_at' ], __LINE__ );
}
}
}
eecho ( 1 , '«' . $opts [ 'hostname' ] . '»: deleting possible users’ profiles which are in the database but no longer in the directory.' . N );
foreach ( $exusers as $locid => $exuser ) {
if ( ! array_key_exists ( $locid , $users )) {
eecho ( 0 , '«' . $opts [ 'hostname' ] . '»: user «' . $exusers [ $locid ][ 'username' ] . '» opted out of the directory, deleting their record (' . $exuser [ 'ID' ] . ')...' . N );
if ( ! $opts [ 'dryrun' ]) {
myq ( $link , 'DELETE FROM Users WHERE ID=' . $exuser [ 'ID' ], __LINE__ );
myq ( $link , 'DELETE FROM UsersFields WHERE UserID=' . $exuser [ 'ID' ], __LINE__ );
2022-12-17 15:00:36 +01:00
}
}
}
}
}
2022-12-20 23:00:22 +01:00
mexit ( '«' . $opts [ 'hostname' ] . '»: done in ' . ght ( time () - $now , null , 0 ) . ' :-)' . N , 0 );
2022-12-17 15:00:36 +01:00
// functions
function myq ( & $link , $query , $line ) {
try {
$res = mysqli_query ( $link , $query );
}
catch ( Exception $error ) {
2022-12-20 23:00:22 +01:00
mexit ( 'query «' . $query . '» on line ' . $line . ' failed: ' . $error -> getMessage () . ' (' . $error -> getCode () . ').' . N , 3 );
2022-12-17 15:00:36 +01:00
}
// for php versions < 8, which seem to not catch mysql exceptions
2022-12-20 23:00:22 +01:00
if ( $res === false ) mexit ( 'query «' . $query . '» on line ' . $line . ' failed: ' . mysqli_error ( $link ) . ' (' . mysqli_errno ( $link ) . ').' . N , 3 );
2022-12-17 15:00:36 +01:00
return ( $res );
}
function eecho ( $lev , $msg ) {
global $logf , $opts , $msglevs ;
$time = microtime ( false );
$time = explode ( ' ' , $time );
$time = date ( 'Y-m-d H:i:s' , $time [ 1 ]) . '.' . substr ( $time [ 0 ], 2 );
$msg = $time . ' ' . $msglevs [ $lev ] . ': ' . $msg ;
if ( $lev >= $opts [ 'tuiminmsglev' ]) {
if ( $lev < 2 )
echo ( $msg );
else
fwrite ( STDERR , $msg );
}
if ( $lev >= $opts [ 'logminmsglev' ] && isset ( $logf ) && $logf !== false ) fwrite ( $logf , $msg );
}
function mexit ( $msg , $code ) {
2022-12-20 23:00:22 +01:00
global $link , $logf ;
2022-12-17 15:00:36 +01:00
if ( isset ( $link ) && $link !== false ) mysqli_close ( $link );
if ( $code != 0 )
2022-12-17 17:33:46 +01:00
eecho ( 3 , $msg );
2022-12-17 15:00:36 +01:00
else
2022-12-17 17:33:46 +01:00
eecho ( 1 , $msg );
if ( isset ( $logf ) && $logf !== false ) fclose ( $logf );
2022-12-17 15:00:36 +01:00
exit ( $code );
}
2022-12-20 23:00:22 +01:00
function setint ( $keys , & $arr ) {
foreach ( $keys as $key )
if ( ! is_null ( $arr [ $key ]))
$arr [ $key ] = $arr [ $key ] + 0 ;
}
function willtrunc ( $val , $tab , $col ) {
2022-12-17 15:00:36 +01:00
global $tables , $iswin ;
2022-12-20 23:00:22 +01:00
if ( $iswin ) $tab = strtolower ( $tab );
if ( is_string ( $val ) && mb_strlen ( $val , 'UTF-8' ) > $tables [ $tab ][ $col ]) return ( true );
if ( is_int ( $val ) && ( $val < $tables [ $tab ][ $col ][ 'min' ] || $val > $tables [ $tab ][ $col ][ 'max' ])) return ( true );
return ( false );
2022-12-17 15:00:36 +01:00
}
2022-12-21 07:54:11 +01:00
function truncs ( $str , $tab , $col , $ctx ) {
global $tables , $iswin ;
if ( is_null ( $str )) return ( null );
if ( $iswin )
$tab = strtolower ( $tab );
$size = $tables [ $tab ][ $col ];
$len = mb_strlen ( $str , 'UTF-8' );
if ( $len > $size ) {
$str = mb_substr ( $str , 0 , $size - 1 , 'UTF-8' ) . '…' ;
notify ( $ctx . ': had to truncate string to ' . $size . ' chars to be able to insert it into «' . $col . '» column in «' . $tab . '» table.' , 3 );
}
return ( $str );
}
function truncn ( $num , $tab , $col , $ctx ) {
global $tables , $iswin ;
if ( $iswin )
$tab = strtolower ( $tab );
if ( is_numeric ( $num )) {
if ( $num > $tables [ $tab ][ $col ][ 'max' ]) {
notify ( $ctx . ': had to ceil «' . $num . '» to «' . $tables [ $tab ][ $col ][ 'max' ] . '», ie the maximum value it can have in column «' . $col . '» of table «' . $tab . '».' , 3 );
$num = $tables [ $tab ][ $col ][ 'max' ];
} elseif ( $num < $tables [ $tab ][ $col ][ 'min' ]) {
notify ( $ctx . ': had to floor «' . $num . '» to «' . $tables [ $tab ][ $col ][ 'min' ] . '», ie the minimum value it can have in column «' . $col . '» of table «' . $tab . '»).' , 3 );
$num = $tables [ $tab ][ $col ][ 'min' ];
}
} else {
notify ( $ctx . ': function «truncn»: expecting a number, got something else; returning «0».' , 3 );
$num = 0 ;
}
return ( $num );
}
2022-12-20 23:00:22 +01:00
function nocrnl ( $str ) {
return ( str_replace ([ " \r " , " \n " ],[ '\\r' , '\\n' ], $str ));
}
function b2i ( $bool ) {
( $bool ) ? $r = 1 : $r = 0 ;
return ( $r );
2022-12-17 15:00:36 +01:00
}
2022-12-20 23:00:22 +01:00
function isempty ( $str ) {
( preg_match ( '/^\s*$/' , $str ) === 1 ) ? $r = true : $r = false ;
return ( $r );
}
function notify ( $msg , $lev , $doecho = true ) {
// "$lev" is to be thought of as "$lev" param of function "eecho": 0=debug, 1=info, 2=warning, 3=error
2022-12-17 15:00:36 +01:00
global $link , $tables , $iswin , $opts ;
2022-12-20 23:00:22 +01:00
if ( $doecho ) eecho ( $lev , '*notification*: ' . mb_lcfirst ( strip_tags ( $msg )) . N );
if ( ! $opts [ 'dryrun' ]) {
( $iswin ) ? $tab = 'notifications' : $tab = 'Notifications' ;
myq ( $link , 'INSERT INTO Notifications (ID, Notification, Severity, Microtime, Seen, Deleted) VALUES (NULL, \'' . myesc ( $link , mb_substr ( $msg , 0 , $tables [ $tab ][ 'Notification' ], 'UTF-8' )) . '\', ' . $lev . ', \'' . microtime ( true ) . '\', 0, 0)' , __LINE__ );
}
2022-12-17 15:00:36 +01:00
}
2022-12-21 07:54:11 +01:00
function mdasortbykey ( & $arr , $key , $rev = false ) {
$karr = [];
foreach ( $arr as $akey => $subarr )
2022-12-23 11:22:25 +01:00
$karr [ $subarr [ $key ]] = [ $akey , $subarr ];
2022-12-21 07:54:11 +01:00
if ( ! $rev )
ksort ( $karr );
else
krsort ( $karr );
$arr = [];
foreach ( $karr as $akey => $subarr )
$arr [ $subarr [ 0 ]] = $subarr [ 1 ];
}
// "multi array_key_exists"
function make ( $keys , & $arr ) {
foreach ( $keys as $key )
if ( ! array_key_exists ( $key , $arr ))
return ( false );
return ( true );
}
function myv ( & $link , $var ) {
if ( is_null ( $var )) {
return ( 'NULL' );
} elseif ( is_bool ( $var )) {
if ( $var )
return ( '1' );
else
return ( '0' );
} elseif ( trim ( $var ) == '' ) {
return ( 'NULL' );
} else {
return ( '\'' . mysqli_real_escape_string ( $link , $var ) . '\'' );
}
}
function datetots ( $date ) {
$date = explode ( '-' , $date );
return ( mktime ( 0 , 0 , 0 , $date [ 1 ], $date [ 2 ], $date [ 0 ]));
}
function ckratelimit ( $httpresphead ) {
$headers = explode ( " \r \n " , $httpresphead );
$buff = [];
array_shift ( $headers );
foreach ( $headers as $header )
if ( preg_match ( '/^([^:]+):(.*)$/Uu' , $header , $matches ) === 1 )
$buff [ strtolower ( $matches [ 1 ])] = trim ( $matches [ 2 ]);
$headers = $buff ;
if ( isset ( $headers [ 'date' ]) && isset ( $headers [ 'x-ratelimit-reset' ]) && isset ( $headers [ 'x-ratelimit-remaining' ])) {
if ( $headers [ 'x-ratelimit-remaining' ] == 0 ) {
$stosl = strtotime ( $headers [ 'x-ratelimit-reset' ]) - strtotime ( $headers [ 'date' ]) + 1 ;
eecho ( 2 , 'reached rate limit, sleeping for ' . ght ( $stosl ) . ' ...' . N );
sleep ( $stosl );
}
return ( $headers [ 'x-ratelimit-remaining' ]);
} else {
$missing = [];
if ( ! isset ( $headers [ 'date' ])) $missing [] = 'date' ;
if ( ! isset ( $headers [ 'x-ratelimit-reset' ])) $missing [] = 'x-ratelimit-reset' ;
if ( ! isset ( $headers [ 'x-ratelimit-remaining' ])) $missing [] = 'x-ratelimit-remaining' ;
eecho ( 2 , 'ckratelimit: $httpresphead did not contain «' . implode ( '», «' , $missing ) . '» header(s)!' . N );
return ( false );
}
}
2022-12-17 15:00:36 +01:00
/** <LANGUAGE MANAGEMENT> */
/**
* Executes a call to Mastodon API .
*
* @ param string $host Host to be called ( e . g .: " mastodon.bida.im " )
* @ param string $path API path ( e . g .: " /api/v1/timelines/public?local=true " )
* @ return mixed An array representing the JSON object as returned by json_decode , or NULL if the call fails
*/
function get_api ( $host , $path ) {
global $opts ;
$buf = @ gurl ( 'https://' . $host . $path , $opts [ 'timeout' ]);
if ( $buf [ 'cont' ] !== false ) {
ckratelimit ( $buf [ 'headers' ]);
$data = json_decode ( $buf [ 'cont' ], true );
return $data ;
} else {
return NULL ;
}
}
/**
* Returns a list of known recognized languages , with the related probability , fot the toot that got passed to it
*
* @ param mixed $toot The toot to be checked , as returned by the API
* @ return array Associative array with language and related probability
*/
function get_toot_languages ( $toot ) {
if ( is_array ( $toot ) && array_key_exists ( 'language' , $toot ))
$l = $toot [ 'language' ];
else
$l = NULL ;
if ( $l !== NULL ) {
// the language is explicitly set in the toot, so use that
$langs [ $l ] = 1 ;
} elseif ( array_key_exists ( 'content' , $toot )) {
// the language is not explicitly set in the toot, so try and recognize it
$text = strip_tags ( $toot [ 'content' ]);
$ld = new Language ;
$langs = $ld -> detect ( $text ) -> bestResults () -> close ();
}
// group derived languages into two-charactes language code (e.g.: "zh-CN" into "zh")
$grouped_langs = [];
foreach ( $langs as $key => $value ) {
$l = explode ( " - " , $key )[ 0 ];
if ( array_key_exists ( $l , $grouped_langs )) {
$grouped_langs [ $l ] = max ( $grouped_langs [ $l ], $value );
} else {
$grouped_langs [ $l ] = $value ;
}
}
return $grouped_langs ;
}
/**
* Given the probability of a language for every toot , calculate the average
*
* @ param array $detected_langs Array of mappings between language and probability
* @ return array Mapping between language and probability
*/
function summary ( $detected_langs ) {
$res = [];
foreach ( $detected_langs as $langs ) {
foreach ( $langs as $l => $weight ) {
if ( ! array_key_exists ( $l , $res )) {
$res [ $l ] = 0 ;
}
$res [ $l ] += $weight ;
}
}
foreach ( $res as $l => $sumweight ) {
$res [ $l ] = $sumweight / count ( $detected_langs );
}
return $res ;
}
/**
* Helper function for usort : compares two arrays using the first element
*
* @ param array $entry1 First array to be compared
* @ param array $entry2 Second array to be compared
* @ return number - 1 , 0 o 1 depening on $entry1 [ 0 ] being less than , equal to or greater than $entry2 [ 0 ]
*/
function sort_weights ( $entry1 , $entry2 ) {
$w1 = $entry1 [ 0 ];
$w2 = $entry2 [ 0 ];
if ( $w1 < $w2 )
$ret = 1 ;
elseif ( $w1 == $w2 )
$ret = 0 ;
else
$ret =- 1 ;
return $ret ;
}
/**
* Given a language mapping , return a list of probable languages
*
* @ param array $summary Map between language and probabilty
* @ return string [] List of probable languages
*/
function get_languages ( $summary ) {
$lst = [];
foreach ( $summary as $code => $weight ) {
$lst [] = [ $weight , $code ];
}
usort ( $lst , 'sort_weights' );
$languages = [];
$lastweight = 0 ;
foreach ( $lst as $entry ) {
$l = $entry [ 1 ];
$weight = $entry [ 0 ];
if ( $weight < $lastweight * 2 / 3 ) {
break ;
}
$languages [] = $l ;
$lastweight = $weight ;
}
return $languages ;
}
/**
* Returns a list of probable languages for the given instance
*
* @ param string $host Instance’ s hostname ( e . g .: " mastodon.bida.im " )
* @ return string [] List of probable languages
*/
function get_instance_langs ( $host ) {
global $opts ;
$data = get_api ( $host , '/api/v1/timelines/public?local=true&limit=' . $opts [ 'ldtoots' ]);
if ( $data == NULL ) {
return [];
}
$detected_langs = array_map ( 'get_toot_languages' , $data );
$summary = summary ( $detected_langs );
$languages = get_languages ( $summary );
return $languages ;
}
2022-12-20 23:00:22 +01:00
function langs ( $instid , $hostname , $auto , $line ) {
global $idata , $link , $opts ;
2022-12-17 15:00:36 +01:00
$retlangs = [];
$languages = [];
// even if $auto is true, set it to false (don't do autodection of languages based on last toots) if api/v1/instance returned a language different from the default "en": assume instead it is right, because it has been explicitly set
2022-12-20 23:00:22 +01:00
if ( isset ( $idata [ 'languages' ][ 0 ]) && $idata [ 'languages' ][ 0 ] != 'en' )
2022-12-17 15:00:36 +01:00
$auto = false ;
if ( $auto ) {
2022-12-21 07:54:11 +01:00
$languages = get_instance_langs ( $hostname );
} elseif ( isset ( $idata [ 'languages' ]) && is_array ( $idata [ 'languages' ])) {
$languages = $idata [ 'languages' ];
2022-12-17 15:00:36 +01:00
}
if ( count ( $languages ) == 0 ) {
return ( $retlangs );
} else {
while ( count ( $languages ) > 5 )
array_pop ( $languages );
foreach ( $languages as $key => $val )
$languages [ $key ] = str_replace ( '-' , '_' , $val );
if ( $auto )
2022-12-20 23:00:22 +01:00
eecho ( 1 , '«' . $hostname . '»: detected languages: ' . implode ( ', ' , $languages ) . N );
2022-12-17 15:00:36 +01:00
else
2022-12-20 23:00:22 +01:00
eecho ( 1 , '«' . $hostname . '»: declared languages: ' . implode ( ', ' , $languages ) . N );
2022-12-17 15:00:36 +01:00
$pos = 0 ;
foreach ( $languages as $lang ) {
2022-12-20 23:00:22 +01:00
$res = myq ( $link , 'SELECT * FROM Languages WHERE Code=\'' . myesc ( $link , $lang ) . '\'' , $line );
2022-12-17 15:00:36 +01:00
if ( mysqli_num_rows ( $res ) < 1 ) {
2022-12-20 23:00:22 +01:00
$code = myesc ( $link , truncs ( $lang , 'Languages' , 'Code' , '«' . $hostname . '»' ));
$NameOrig = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , $lang )), 'Languages' , 'NameOrig' , '«' . $hostname . '»' ));
$NamePt_BR = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , 'pt_BR' )), 'Languages' , 'NamePT_BR' , '«' . $hostname . '»' ));
$NameDe = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , 'de' )), 'Languages' , 'NameDE' , '«' . $hostname . '»' ));
$NameUk = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , 'uk' )), 'Languages' , 'NameUK' , '«' . $hostname . '»' ));
$NameCa = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , 'ca' )), 'Languages' , 'NameCA' , '«' . $hostname . '»' ));
$NameEn = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , 'en' )), 'Languages' , 'NameEN' , '«' . $hostname . '»' ));
$NameEs = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , 'es' )), 'Languages' , 'NameES' , '«' . $hostname . '»' ));
$NameFr = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , 'fr' )), 'Languages' , 'NameFR' , '«' . $hostname . '»' ));
$NameGl = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , 'gl' )), 'Languages' , 'NameGL' , '«' . $hostname . '»' ));
$NameIt = myesc ( $link , truncs ( mb_ucfirst ( locale_get_display_name ( $lang , 'it' )), 'Languages' , 'NameIT' , '«' . $hostname . '»' ));
2022-12-17 15:00:36 +01:00
$q = 'INSERT INTO Languages (ID, Code, NameOrig, NamePT_BR, NameDE, NameUK, NameCA, NameEN, NameES, NameFR, NameGL, NameIT) VALUES (NULL, \'' . $code . '\', \'' . $NameOrig . '\', \'' . $NamePt_BR . '\', \'' . $NameDe . '\', \'' . $NameUk . '\', \'' . $NameCa . '\', \'' . $NameEn . '\', \'' . $NameEs . '\', \'' . $NameFr . '\', \'' . $NameGl . '\', \'' . $NameIt . '\')' ;
if ( ! $opts [ 'dryrun' ]) {
2022-12-20 23:00:22 +01:00
myq ( $link , $q , $line );
2022-12-17 15:00:36 +01:00
$langid = mysqli_insert_id ( $link );
} else {
$langid = 0 ;
}
} else {
$row = mysqli_fetch_assoc ( $res );
$langid = $row [ 'ID' ];
}
$pos ++ ;
$retlangs [] = [ 'InstID' => $instid , 'LangID' => $langid , 'Pos' => $pos , 'Code' => $lang ];
}
}
return ( $retlangs );
}
?>