1440 lines
67 KiB
PHP
Executable file
1440 lines
67 KiB
PHP
Executable file
#!/usr/bin/php
|
||
<?php
|
||
|
||
/*
|
||
This program is free software: you can redistribute it and/or modify
|
||
it under the terms of the GNU General Public License as published by
|
||
the Free Software Foundation, either version 3 of the License, or
|
||
(at your option) any later version.
|
||
|
||
This program is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
GNU General Public License for more details.
|
||
|
||
You should have received a copy of the GNU General Public License
|
||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
*/
|
||
|
||
const N="\n";
|
||
const RN="\r\n";
|
||
define('SNAME',basename(__FILE__));
|
||
const LIBDP='/../lib';
|
||
|
||
require __DIR__.LIBDP.'/parsetime.php';
|
||
require __DIR__.LIBDP.'/gurl.php';
|
||
require __DIR__.LIBDP.'/gethttpcode.php';
|
||
require __DIR__.LIBDP.'/tables.php';
|
||
require __DIR__.LIBDP.'/mb_ucfirst.php';
|
||
require __DIR__.LIBDP.'/mb_lcfirst.php';
|
||
require __DIR__.LIBDP.'/ghs.php';
|
||
require __DIR__.LIBDP.'/ght.php';
|
||
require __DIR__.LIBDP.'/fnum.php';
|
||
require __DIR__.LIBDP.'/supplangs.php';
|
||
require __DIR__.LIBDP.'/gettlds.php';
|
||
require __DIR__.LIBDP.'/mastodon_postlen.php';
|
||
require __DIR__.LIBDP.'/vendor/autoload.php';
|
||
use LanguageDetection\Language;
|
||
|
||
use function mysqli_real_escape_string as myesc;
|
||
|
||
(strtoupper(substr(PHP_OS,0,3))==='WIN') ? $iswin=true : $iswin=false;
|
||
|
||
declare(ticks=1);
|
||
if (function_exists('pcntl_signal')) {
|
||
function signalHandler($signal) {
|
||
echo(N);
|
||
mexit('received signal «'.$signal.'», shutting down.'.N,0);
|
||
}
|
||
pcntl_signal(SIGTERM,'signalHandler');// Termination ('kill' was called)
|
||
pcntl_signal(SIGHUP,'signalHandler');// Terminal log-out
|
||
pcntl_signal(SIGINT,'signalHandler');// Interrupted (Ctrl-C is pressed)
|
||
}
|
||
|
||
$opts=[
|
||
'hostname'=>null,
|
||
'conntimeout'=>10,
|
||
'functimeout'=>20,
|
||
'ldtoots'=>40,// number of toots to check with the automatic language detection function
|
||
'dryrun'=>false,
|
||
'fetchusers'=>false,
|
||
'udiratts'=>5,
|
||
'udirfailst'=>90,
|
||
'minmsgimplev'=>1,
|
||
'bothost'=>null,
|
||
'bottoken'=>null,
|
||
'botmaxchars'=>null,
|
||
'_sendtoot'=>false,
|
||
'proxy'=>null
|
||
];
|
||
|
||
$msglevs=['Debug', 'Info', 'Warning', 'Error', 'None'];
|
||
|
||
$help='SYNOPSIS
|
||
|
||
'.SNAME.' <hostname> [options]
|
||
|
||
DESCRIPTION
|
||
|
||
This script tries to fetch info about the fediverse instance at the given
|
||
hostname and insert or update them in mastostart’s database.
|
||
|
||
OPTIONS
|
||
|
||
-l, --ldtoots <number>
|
||
This option defines the number of toots the script will try to fetch from
|
||
the local public timelines, to try and guess the most used languages of each
|
||
instance. Its minimum value is 10, its maximum value is 40.
|
||
DEFAULT: '.$opts['ldtoots'].'
|
||
-f, --fetchusers
|
||
If this option is set, the script will try to fetch users’ info from the
|
||
considered instance’s users directory, and store them in the database.
|
||
-r, --udiratts <number>
|
||
This option defines how many attempts the script will do at fetching a chunk
|
||
of users’ info from the profile directory, before giving up.
|
||
DEFAULT: '.$opts['udiratts'].'
|
||
-s, --udirfailst <time>
|
||
This option defines how long the script will wait after each failed attempt
|
||
at fetching a chunk of users’ info from the profile directory (see above)
|
||
before retrying.
|
||
DEFAULT: '.ght($opts['udirfailst'],null,0).'
|
||
-t, --conntimeout <time>
|
||
Sets the timeout for every connection attempt. See section «TIME
|
||
SPECIFICATION» below to see how to specify time.
|
||
DEFAULT: '.ght($opts['conntimeout'],null,0).'
|
||
-T, --functimeout <time>
|
||
Sets the timeout for every download. See section «TIME SPECIFICATION» below
|
||
to see how to specify time.
|
||
DEFAULT: '.ght($opts['functimeout'],null,0).'
|
||
-d, --dryrun
|
||
If this option is set, the script won’t write anything in the database.
|
||
-m, --minmsgimplev <«debug»|«info»|«warning»|«error»|«none»>
|
||
Defines the minimum “importance level” of messages to be written to the
|
||
text user interface. There are 4 “importance levels”, in this order of
|
||
importance: «debug», «info», «warning», «error». Setting this option to any
|
||
of these values will make the script write to the text user interface all
|
||
the messages with the specified or a greater level; setting it to the
|
||
special value «none» will completely disable messages.
|
||
DEFAULT: '.lcfirst($msglevs[$opts['minmsgimplev']]).'
|
||
-h, --help
|
||
If this option is set, the script will show this help text and exit.
|
||
|
||
TIME SPECIFICATION
|
||
|
||
An example is better than ~5148 words :-)
|
||
To specify 1 year, 6 months (made of 31 days), 2 weeks, 3 days, 5 hours,
|
||
7 minutes and 12 seconds you can use «1y,6M,2w,3d,5h,7m,12s»; but you can
|
||
also use «12s,7m,5h,3d,2w,6M,1y», or even «18M,1w,1w,2d,1d,3h,2h,7m,12s».
|
||
|
||
NEW INSTANCE ANNOUNCEMENT TOOT
|
||
|
||
This script can send an announcement toot when the instance it checks is new.
|
||
It will try to do it if it finds a readable «getinstinfo.ini» file in the
|
||
same directory it lives in, with a «bothost» parameter defining the instance
|
||
to use to send the toot, a «bottoken» parameter defining the token to be used
|
||
to post, and a «botmaxchars» parameter defining the maximum number of
|
||
characters allowed for toots on the defined instance (must be >= 10).
|
||
|
||
PROXY SUPPORT
|
||
|
||
This script supports socks5 proxy to connect to an instance. It will try to
|
||
use a proxy if it finds a readable «getinstinfo.ini» file in the same
|
||
directory it lives in, with a «proxy» parameter with this syntax:
|
||
[user:pass@]<host>[:port].
|
||
|
||
LICENSE
|
||
|
||
This program comes with ABSOLUTELY NO WARRANTY; for details see the source.
|
||
This is free software, and you are welcome to redistribute it under certain
|
||
conditions; see <http://www.gnu.org/licenses/> for details.'.N;
|
||
|
||
foreach ($argv as $val) {
|
||
if ($val=='-h' || $val=='--help') {
|
||
echo($help);
|
||
exit(0);
|
||
}
|
||
}
|
||
|
||
$inifp=__DIR__.'/getinstinfo.ini';
|
||
$iniarr=@parse_ini_file($inifp);
|
||
if (is_array($iniarr)) {
|
||
if (isset($iniarr['bothost']) && !isempty($iniarr['bothost'])) $opts['bothost']=$iniarr['bothost'];
|
||
if (isset($iniarr['bottoken']) && !isempty($iniarr['bottoken'])) $opts['bottoken']=$iniarr['bottoken'];
|
||
if (isset($iniarr['botmaxchars'])) {
|
||
if (preg_match('/^[0-9]+$/',$iniarr['botmaxchars'],$matches)!=1 || $matches[0]+0<10)
|
||
mexit('config file «'.$inifp.'»: value for «botmaxchars» must be an integer >= 10.'.N,1);
|
||
$opts['botmaxchars']=$iniarr['botmaxchars']+0;
|
||
}
|
||
if (!is_null($opts['bothost']) && !is_null($opts['bottoken']) && !is_null($opts['botmaxchars']))
|
||
$opts['_sendtoot']=true;
|
||
if (isset($iniarr['proxy'])) $opts['proxy']=$iniarr['proxy'];
|
||
} else {
|
||
eecho(1,"Could not open «{$inifp}» (it does not exist or is not readable).\n");
|
||
}
|
||
|
||
$inifp=__DIR__.'/../conf/mustard.ini';
|
||
$iniarr=@parse_ini_file($inifp)
|
||
or mexit('could not open config file «'.$inifp.'».'.N,1);
|
||
|
||
for ($i=1; $i<$argc; $i++) {
|
||
if ($argv[$i]=='-f' || $argv[$i]=='--fetchusers') {
|
||
$opts['fetchusers']=true;
|
||
} elseif ($argv[$i]=='-r' || $argv[$i]=='--udiratts') {
|
||
if ($i+1>=$argc || preg_match('/^\d+$/',$argv[$i+1])!==1 || $argv[$i+1]+0<1)
|
||
mexit('option «'.$argv[$i].'» requires a number > 1 as an argument (use «-h» to read help).'.N,1);
|
||
$i++;
|
||
$opts['udiratts']=$argv[$i]+0;
|
||
} elseif ($argv[$i]=='-s' || $argv[$i]=='--udirfailst') {
|
||
if ($i+1>=$argc || parsetime($argv[$i+1])===false)
|
||
mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
|
||
$i++;
|
||
$opts['udirfailst']=parsetime($argv[$i]);
|
||
} elseif ($argv[$i]=='-t' || $argv[$i]=='--conntimeout') {
|
||
if ($i+1>=$argc || parsetime($argv[$i+1])===false)
|
||
mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
|
||
$i++;
|
||
$opts['conntimeout']=parsetime($argv[$i]);
|
||
} elseif ($argv[$i]=='-T' || $argv[$i]=='--functimeout') {
|
||
if ($i+1>=$argc || parsetime($argv[$i+1])===false)
|
||
mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
|
||
$i++;
|
||
$opts['functimeout']=parsetime($argv[$i]);
|
||
} elseif ($argv[$i]=='-l' || $argv[$i]=='--ldtoots') {
|
||
if ($i+1>=$argc || preg_match('/^\d+$/',$argv[$i+1])!==1 || $argv[$i+1]+0>40 || $argv[$i+1]+0<10)
|
||
mexit('option «'.$argv[$i].'» requires a number >= 10 and <= 40 as an argument (use «-h» to read help).'.N,1);
|
||
$i++;
|
||
$opts['ldtoots']=$argv[$i]+0;
|
||
} elseif ($argv[$i]=='-d' || $argv[$i]=='--dryrun') {
|
||
$opts['dryrun']=true;
|
||
} elseif ($argv[$i]=='-m' || $argv[$i]=='--minmsgimplev') {
|
||
if ($i+1>=$argc || !in_array(ucfirst(strtolower($argv[$i+1])),$msglevs))
|
||
mexit('option «'.$argv[$i].'» requires a “message importance level” value as an argument (use «-h» to read help).'.N,1);
|
||
$i++;
|
||
$opts['minmsgimplev']=array_search(ucfirst(strtolower($argv[$i])),$msglevs);
|
||
} elseif (is_null($opts['hostname']) && $argv[$i][0]!=='-') {
|
||
$opts['hostname']=$argv[$i];
|
||
} else {
|
||
mexit('don’t know how to interpret «'.$argv[$i].'», please read the help text using «-h» or «--help».'.N,1);
|
||
}
|
||
}
|
||
|
||
if (is_null($opts['hostname'])) mexit('you didn’t specify an hostname (you can read the help text using «-h» or «--help»).'.N,1);
|
||
|
||
try { $link=@mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket']); }
|
||
catch (Exception $error) { mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true); }
|
||
// for php versions < 8
|
||
if ($link===false) mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true);
|
||
try { $res=mysqli_set_charset($link,'utf8mb4'); }
|
||
catch (Exception $error) { mexit('could not set «utf8mb4» charset for MySQL: '.mysqli_error($link).' ['.mysqli_errno($link).'].'.N,1,true); }
|
||
// for php versions < 8
|
||
if ($res===false) mexit('could not set MySQL charset: '.mysqli_error($link).' ['.mysqli_errno($link).'].'.N,1,true);
|
||
|
||
$mastodons=[];
|
||
$res=myq($link,'SELECT Name FROM Platforms WHERE Consider=1',__LINE__);
|
||
while ($row=mysqli_fetch_assoc($res))
|
||
$mastodons[]=preg_quote($row['Name'],'/');
|
||
if (count($mastodons)<1) mexit('in table «Platforms», there is no platform to be considered!'.N,1);
|
||
$mastodons=implode('|',$mastodons);
|
||
|
||
$tables=tables($link);
|
||
//print_r($tables);
|
||
|
||
$instints=['ID', 'FirstSeen', 'IsMastodon', 'Priority', 'Visible', 'Noxious', 'NoxLastModTS', 'LocalityID', 'OurLangsLock', 'UserCount', 'StatusCount', 'DomainCount', 'ActiveUsersMonth', 'ActiveUsersHalfYear', 'RegOpen', 'RegReqApproval', 'MaxTootChars', 'AdmCreatedAt', 'PublicBlocksList', 'TotChecks', 'OkChecks', 'WasLastCheckOk', 'LastOkCheckTS', 'GuestID', 'LastGuestEdit', 'InsertTS', 'RPos'];
|
||
|
||
$idata=[];
|
||
$res=myq($link,'SHOW COLUMNS FROM Instances',__FILE__);
|
||
while ($row=mysqli_fetch_assoc($res))
|
||
$idata[$row['Field']]=$row['Default'];
|
||
// since we later need to determine if a value is an integer, and mysql returns integers as strings...
|
||
setint($instints,$idata);
|
||
$idata['URI']=$opts['hostname'];
|
||
|
||
$instanswered=false;
|
||
|
||
$now=time();
|
||
|
||
/*
|
||
* Nodeinfo ('https://'.$opts['hostname'].'/nodeinfo/2.0.json') was added in v3.0.0
|
||
* Trends ('https://'.$opts['hostname'].'/api/v1/trends') was added in v3.0.0 and became /api/v1/trends/tags with v3.5.0
|
||
* Activity ('https://'.$opts['hostname'].'/api/v1/instance/activity') was added in v2.1.2
|
||
*/
|
||
|
||
waituntilonline();
|
||
|
||
eecho(1,'[[[ Working on «'.$opts['hostname'].'» ]]]'.N);
|
||
if (willtrunc($opts['hostname'],'Instances','URI'))
|
||
mexit('«'.$opts['hostname'].'»: ignoring it because hostname is too long for the «URI» column of «Instances» table.'.N,2);
|
||
|
||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch its info from the database...'.N);
|
||
$res=myq($link,'SELECT * FROM Instances WHERE URI=\''.myesc($link,$opts['hostname']).'\'',__LINE__);
|
||
$count=mysqli_num_rows($res);
|
||
if ($count>1) {
|
||
$msg='«'.$opts['hostname'].'»: there are '.$count.' records with this URI in Instances table.';
|
||
notify($msg,3,false);
|
||
mexit($msg.N,3);
|
||
} elseif ($count==1) {
|
||
eecho(1,'«'.$opts['hostname'].'»: found 1 record with this URI in Instances table.'.N);
|
||
$oidata=mysqli_fetch_assoc($res);
|
||
setint($instints,$oidata);
|
||
} else {
|
||
eecho(1,'«'.$opts['hostname'].'»: found no record with this URI in Instances table.'.N);
|
||
$oidata=null;
|
||
}
|
||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on https...'.N);
|
||
$buf=@gurl('https://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
|
||
if ($buf['cont']===false) {
|
||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on http...'.N);
|
||
$buf=@gurl('http://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
|
||
}
|
||
if ($buf['cont']!==false) {
|
||
$buf=@json_decode($buf['cont'],true);
|
||
if (is_array($buf)) {
|
||
if (isset($buf['links']) && is_array($buf['links']) && count($buf['links'])>0) {
|
||
$ok=true;
|
||
$nirefs=[];
|
||
foreach ($buf['links'] as $key=>$niref) {
|
||
if (isset($niref['rel']) && isset($niref['href'])) {
|
||
$nirefs[$niref['rel']]=$niref['href'];
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: nodeinfo specs “links” entitity '.$key.' has unexpected format.'.N);
|
||
$ok=false;
|
||
}
|
||
}
|
||
if ($ok) {
|
||
krsort($nirefs);
|
||
$niref=array_shift($nirefs);
|
||
eecho(1,'«'.$opts['hostname'].'»: got and successfully parsed nodeinfo specs :-)'.N);
|
||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo data...'.N);
|
||
$buf=@gurl($niref,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
|
||
if ($buf['cont']!==false) {
|
||
$buf=@json_decode($buf['cont'],true);
|
||
if (is_array($buf)) {
|
||
eecho(1,'«'.$opts['hostname'].'»: got nodeinfo data :-)'.N);
|
||
if (isset($buf['software']['name']) && is_string($buf['software']['name']) && !isempty($buf['software']['name'])) {
|
||
$idata['Software']=trim($buf['software']['name']);
|
||
(preg_match('/^'.$mastodons.'/',$idata['Software'])===1) ? $idata['IsMastodon']=true : $idata['IsMastodon']=false;
|
||
$res=myq($link,'SELECT Name FROM Platforms WHERE Name=\''.myesc($link,$idata['Software']).'\'',__LINE__);
|
||
if (mysqli_num_rows($res)<1) {
|
||
if (!$opts['dryrun'])
|
||
myq($link,'INSERT INTO Platforms (Name) VALUES (\''.myesc($link,truncs($idata['Software'], 'Platforms', 'Name', '«'.$opts['hostname'].'»')).'\')',__LINE__);
|
||
notify('«'.$opts['hostname'].'» runs on «'.$idata['Software'].'», which was not present in the «Platforms» table, so it was added there. It would be good to check whether it is a Mastodon derivate and how compatible it is, to decide whether to consider instances using it as Mastodon instances by setting the «Consider» field of its record to «1».',2);
|
||
}
|
||
}
|
||
if (isset($buf['software']['version']) && is_string($buf['software']['version']) && !isempty($buf['software']['version']))
|
||
$idata['Version']=trim($buf['software']['version']);
|
||
if (isset($buf['usage']['users']['total']) && is_int($buf['usage']['users']['total']))
|
||
$idata['UserCount']=$buf['usage']['users']['total'];
|
||
if (isset($buf['usage']['users']['activeMonth']) && is_int($buf['usage']['users']['activeMonth']))
|
||
$idata['ActiveUsersMonth']=$buf['usage']['users']['activeMonth'];
|
||
if (isset($buf['usage']['users']['activeHalfyear']) && is_int($buf['usage']['users']['activeHalfyear']))
|
||
$idata['ActiveUsersHalfYear']=$buf['usage']['users']['activeHalfyear'];
|
||
if (isset($buf['usage']['localPosts']) && is_int($buf['usage']['localPosts']))
|
||
$idata['StatusCount']=$buf['usage']['localPosts'];
|
||
if (isset($buf['openRegistrations']) && is_bool($buf['openRegistrations']))
|
||
$idata['RegOpen']=b2i($buf['openRegistrations']);
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: nodeinfo data was not good JSON.'.N);
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: could not fetch nodeinfo data: '.$buf['emsg'].'.'.N);
|
||
}
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: nodeinfo specs had unexpected format.'.N);
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: nodeinfo specs where not good JSON.'.N);
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: could not fetch nodeinfo specs: '.$buf['emsg'].'.'.N);
|
||
}
|
||
if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4.0.0') {
|
||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v2...'.N);
|
||
$buf=@gurl('https://'.$opts['hostname'].'/api/v2/instance',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
|
||
if ($buf['cont']!==false) {
|
||
ckratelimit($buf['headers'],"«{$opts['hostname']}»");
|
||
$buf=@json_decode($buf['cont'],true);
|
||
if (is_array($buf)) {
|
||
if (make(['domain', 'title', 'version', 'source_url', 'description', 'usage', 'thumbnail', 'languages', 'configuration', 'registrations', 'contact', 'rules'],$buf)) {
|
||
eecho(1,'«'.$opts['hostname'].'»: got good instance info from API v2 :-)'.N);
|
||
$instanswered=true;
|
||
if (isset($buf['title']) && is_string($buf['title']) && !isempty($buf['title']))
|
||
$idata['Title']=trim($buf['title']);
|
||
if (isset($buf['description']) && is_string($buf['description']) && !isempty($buf['description']))
|
||
$idata['ShortDesc']=trim($buf['description']);
|
||
if (isset($buf['thumbnail']['url']) && is_string($buf['thumbnail']['url']) && !isempty($buf['thumbnail']['url'])) {
|
||
$idata['Thumb']=trim($buf['thumbnail']['url']);
|
||
if (!@file_get_contents($idata['Thumb'],false,null,0,512)) $idata['Thumb']='unavailable';
|
||
}
|
||
if (isset($buf['configuration']['statuses']['max_characters']) && is_int($buf['configuration']['statuses']['max_characters']))
|
||
$idata['MaxTootChars']=$buf['configuration']['statuses']['max_characters'];
|
||
if (isset($buf['registrations']['approval_required']) && is_bool($buf['registrations']['approval_required']))
|
||
$idata['RegReqApproval']=b2i($buf['registrations']['approval_required']);
|
||
if (isset($buf['contact']['email']) && is_string($buf['contact']['email']))
|
||
$idata['Email']=trim($buf['contact']['email']);
|
||
if (!isset($buf['contact']['account']['noindex']) || (isset($buf['contact']['account']['noindex']) && is_bool($buf['contact']['account']['noindex']) && $buf['contact']['account']['noindex']===false)) {
|
||
if (isset($buf['contact']['account']['acct']) && is_string($buf['contact']['account']['acct']) && !isempty($buf['contact']['account']['acct']))
|
||
$idata['AdmAccount']=trim($buf['contact']['account']['acct']);
|
||
if (isset($buf['contact']['account']['display_name']) && is_string($buf['contact']['account']['display_name']) && !isempty($buf['contact']['account']['display_name']))
|
||
$idata['AdmDisplayName']=trim($buf['contact']['account']['display_name']);
|
||
if (isset($buf['contact']['account']['created_at']) && is_string($buf['contact']['account']['created_at']) && ($ts=strtotime($buf['contact']['account']['created_at']))!==false)
|
||
$idata['AdmCreatedAt']=$ts;
|
||
if (isset($buf['contact']['account']['note']) && is_string($buf['contact']['account']['note']) && !isempty($buf['contact']['account']['note']))
|
||
$idata['AdmNote']=trim($buf['contact']['account']['note']);
|
||
if (isset($buf['contact']['account']['url']) && is_string($buf['contact']['account']['url']) && !isempty($buf['contact']['account']['url']))
|
||
$idata['AdmURL']=trim($buf['contact']['account']['url']);
|
||
if (isset($buf['contact']['account']['avatar']) && is_string($buf['contact']['account']['avatar']) && !isempty($buf['contact']['account']['avatar'])) {
|
||
$idata['AdmAvatar']=trim($buf['contact']['account']['avatar']);
|
||
if (!@file_get_contents($idata['AdmAvatar'],false,null,0,512)) $idata['AdmAvatar']='unavailable';
|
||
}
|
||
if (isset($buf['contact']['account']['header']) && is_string($buf['contact']['account']['header']) && !isempty($buf['contact']['account']['header']))
|
||
$idata['AdmHeader']=trim($buf['contact']['account']['header']);
|
||
} else {
|
||
if (isset($buf['contact']['account']['noindex']) && is_bool($buf['contact']['account']['noindex']) && $buf['contact']['account']['noindex']===true)
|
||
$idata['AdmAccount']='OPTED OUT';// here we rely on the fact that nobody could set "acct" to "OPTED OUT" since it doesn't allow spaces
|
||
$idata['AdmAvatar']='unavailable';
|
||
}
|
||
// domain_count is gone from api v2, and we won't resort to api v1 just to get it when ver. >= 4.0.0
|
||
if (isset($buf['languages']) && is_array($buf['languages']))
|
||
$idata['languages']=$buf['languages'];
|
||
if (isset($buf['rules']) && is_array($buf['rules']))
|
||
foreach ($buf['rules'] as $rule)
|
||
if (isset($rule['id']) && is_string($rule['id']) && !isempty($rule['id']) && isset($rule['text']) && is_string($rule['text']) && !isempty($rule['text']))
|
||
$idata['rules'][$rule['id']]=$rule['text'];
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: instance info fetched from API v2 had unexpected format.'.N);
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: instance info fetched from API v2 were not good JSON.'.N);
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: could not fetch instance info from API v2: '.$buf['emsg'].'.'.N);
|
||
}
|
||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance extended description from API v1...'.N);
|
||
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/extended_description',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
|
||
if ($buf['cont']!==false) {
|
||
ckratelimit($buf['headers'],"«{$opts['hostname']}»");
|
||
$buf=@json_decode($buf['cont'],true);
|
||
if (is_array($buf)) {
|
||
eecho(1,'«'.$opts['hostname'].'»: got instance extended description from API v1 :-)'.N);
|
||
//print_r($buf);
|
||
if (!is_null($buf['content']) && is_string($buf['content']) && !isempty($buf['content']))
|
||
$idata['LongDesc']=trim($buf['content']);
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: instance extended description fetched from API v1 was not good JSON.'.N);
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: could not fetch instance extended description from API v1: '.$buf['emsg'].'.'.N);
|
||
}
|
||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance domain blocks from API v1...'.N);
|
||
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/domain_blocks',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
|
||
if ($buf['cont']!==false) {
|
||
ckratelimit($buf['headers'],"«{$opts['hostname']}»");
|
||
$buf=@json_decode($buf['cont'],true);
|
||
if (is_array($buf)) {
|
||
eecho(1,'«'.$opts['hostname'].'»: got instance domain blocks from API v1 :-)'.N);
|
||
$idata['blocks']=[];
|
||
$idata['PublicBlocksList']=1;
|
||
$idata['Threads']='accessible';
|
||
foreach ($buf as $key=>$block) {
|
||
if (make(['domain', 'severity', 'comment'],$block) && is_string($block['domain']) && !isempty($block['domain']) && is_string($block['severity']) && in_array($block['severity'], ['silence','suspend']) && (is_null($block['comment']) || is_string($block['comment']))) {
|
||
if (is_string($block['comment'])) {
|
||
$block['comment']=trim($block['comment']);
|
||
if ($block['comment']=='')
|
||
$block['comment']=null;
|
||
} else {
|
||
$block['comment']=null;
|
||
}
|
||
$idata['blocks'][]=['dom'=>$block['domain'], 'sev'=>$block['severity'], 'comm'=>$block['comment']];
|
||
if (preg_match('#^(threads.net|.*\.threads.net)$#i',$block['domain'])===1) {
|
||
if ($block['severity']=='suspend')
|
||
$idata['Threads']='suspended';
|
||
elseif ($block['severity']=='silence')
|
||
$idata['Threads']='limited';
|
||
else
|
||
$idata['Threads']=$block['severity'];
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: domain block '.$key.' has unexpected format.'.N);
|
||
}
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: instance domain blocks fetched from API v1 were not good JSON.'.N);
|
||
$idata['Threads']=null;
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: could not fetch instance domain blocks from API v1: '.$buf['emsg'].'.'.N);
|
||
$idata['Threads']=null;
|
||
}
|
||
} else {// we still try to fetch instance info from api v1, if ver. < 4.0.0, since it could be a mastodon instance older than 2.1.2, when nodeinfo was introduced
|
||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v1...'.N);
|
||
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
|
||
if ($buf['cont']!==false) {
|
||
ckratelimit($buf['headers'],"«{$opts['hostname']}»");
|
||
$buf=@json_decode($buf['cont'],true);
|
||
if (is_array($buf)) {
|
||
if (make(['uri', 'title', 'short_description', 'description', 'email', 'version', 'urls', 'stats', 'thumbnail', 'languages', 'registrations', 'approval_required', 'contact_account'],$buf)) {
|
||
eecho(1,'«'.$opts['hostname'].'»: got good instance info from API v1 :-)'.N);
|
||
//print_r($buf);
|
||
$instanswered=true;
|
||
if (isset($buf['title']) && is_string($buf['title']) && !isempty($buf['title']))
|
||
$idata['Title']=trim($buf['title']);
|
||
if (isset($buf['short_description']) && is_string($buf['short_description']) && !isempty($buf['short_description']))
|
||
$idata['ShortDesc']=trim($buf['description']);
|
||
if (isset($buf['description']) && is_string($buf['description']) && !isempty($buf['description']))
|
||
$idata['LongDesc']=trim($buf['description']);
|
||
if (isset($buf['email']) && is_string($buf['email']))
|
||
$idata['Email']=trim($buf['email']);
|
||
// if nodeinfo did not respond, it could be mastodon < 3.0.0, and we would not have $idata['Version'] yet, so...
|
||
if (!isset($idata['Version']) && isset($buf['version']) && is_string($buf['version']) && !isempty($buf['version']))
|
||
$idata['Version']=trim($buf['version']);
|
||
// if nodeinfo responded we should already have these 2 below, but nodeinfo could have not responded if instance ver. is < 3.0.0
|
||
if (isset($buf['stats']['user_count']) && is_int($buf['stats']['user_count']))
|
||
$idata['UserCount']=$buf['stats']['user_count'];
|
||
if (isset($buf['stats']['status_count']) && is_int($buf['stats']['status_count']))
|
||
$idata['StatusCount']=$buf['stats']['status_count'];
|
||
if (isset($buf['stats']['domain_count']) && is_int($buf['stats']['domain_count']))
|
||
$idata['DomainCount']=$buf['stats']['domain_count'];
|
||
if (isset($buf['thumbnail']) && is_string($buf['thumbnail']) && !isempty($buf['thumbnail'])) {
|
||
$idata['Thumb']=trim($buf['thumbnail']);
|
||
if (!@file_get_contents($idata['Thumb'],false,null,0,512)) $idata['Thumb']='unavailable';
|
||
}
|
||
if (isset($buf['max_toot_chars']) && is_int($buf['max_toot_chars']))
|
||
$idata['MaxTootChars']=$buf['max_toot_chars'];
|
||
elseif (isset($buf['configuration']['statuses']['max_characters']) && is_int($buf['configuration']['statuses']['max_characters']))
|
||
$idata['MaxTootChars']=$buf['configuration']['statuses']['max_characters'];
|
||
// if nodeinfo responded we should already have this 1 below, but nodeinfo could have not responded if instance ver. is < 3.0.0
|
||
if (isset($buf['registrations']) && is_bool($buf['registrations']))
|
||
$idata['RegOpen']=b2i($buf['registrations']);
|
||
if (isset($buf['approval_required']) && is_bool($buf['approval_required']))
|
||
$idata['RegReqApproval']=b2i($buf['approval_required']);
|
||
if (isset($buf['contact_account']['acct']) && is_string($buf['contact_account']['acct']) && !isempty($buf['contact_account']['acct']))
|
||
$idata['AdmAccount']=trim($buf['contact_account']['acct']);
|
||
if (isset($buf['contact_account']['display_name']) && is_string($buf['contact_account']['display_name']) && !isempty($buf['contact_account']['display_name']))
|
||
$idata['AdmDisplayName']=trim($buf['contact_account']['display_name']);
|
||
if (isset($buf['contact_account']['created_at']) && is_string($buf['contact_account']['created_at']) && ($ts=strtotime($buf['contact_account']['created_at']))!==false)
|
||
$idata['AdmCreatedAt']=$ts;
|
||
if (isset($buf['contact_account']['note']) && is_string($buf['contact_account']['note']) && !isempty($buf['contact_account']['note']))
|
||
$idata['AdmNote']=trim($buf['contact_account']['note']);
|
||
if (isset($buf['contact_account']['url']) && is_string($buf['contact_account']['url']) && !isempty($buf['contact_account']['url']))
|
||
$idata['AdmURL']=trim($buf['contact_account']['url']);
|
||
if (isset($buf['contact_account']['avatar']) && is_string($buf['contact_account']['avatar']) && !isempty($buf['contact_account']['avatar'])) {
|
||
$idata['AdmAvatar']=trim($buf['contact_account']['avatar']);
|
||
if (!@file_get_contents($idata['AdmAvatar'],false,null,0,512)) $idata['AdmAvatar']='unavailable';
|
||
}
|
||
if (isset($buf['contact_account']['header']) && is_string($buf['contact_account']['header']) && !isempty($buf['contact_account']['header']))
|
||
$idata['AdmHeader']=trim($buf['contact_account']['header']);
|
||
if (isset($buf['languages']) && is_array($buf['languages']))
|
||
$idata['languages']=$buf['languages'];
|
||
if (isset($buf['rules']) && is_array($buf['rules']))
|
||
foreach ($buf['rules'] as $rule)
|
||
if (isset($rule['id']) && is_string($rule['id']) && !isempty($rule['id']) && isset($rule['text']) && is_string($rule['text']) && !isempty($rule['text']))
|
||
$idata['rules'][$rule['id']]=$rule['text'];
|
||
// some falsing
|
||
if (isset($buf['pleroma'])) $idata['IsMastodon']=false;
|
||
if (isset($buf['version']) && is_string($buf['version']) && preg_match('#(pleroma|pixelfed)#i',$buf['version'])===1) $idata['IsMastodon']=false;
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: instance info fetched from API v1 had unexpected format.'.N);
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: instance info fetched from API v1 were not good JSON.'.N);
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: could not fetch instance info from API v1: '.$buf['emsg'].'.'.N);
|
||
}
|
||
}
|
||
|
||
if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='2.1.2') {
|
||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance activity info from API v1...'.N);
|
||
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/activity',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
|
||
if ($buf['cont']!==false) {
|
||
ckratelimit($buf['headers'],"«{$opts['hostname']}»");
|
||
$buf=@json_decode($buf['cont'],true);
|
||
if (is_array($buf)) {
|
||
eecho(1,'«'.$opts['hostname'].'»: got instance activity info from API v1 :-)'.N);
|
||
$idata['activity']=$buf;
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: instance activity info from API v1 were not good JSON: '.$buf['emsg'].'.'.N);
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: could not fetch instance activity info from API v1: '.$buf['emsg'].'.'.N);
|
||
}
|
||
}
|
||
|
||
if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='3.0.0') {
|
||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance tags trends info from API v1...'.N);
|
||
$url='https://'.$opts['hostname'].'/api/v1/trends';
|
||
if ($idata['Version']>='3.5.0') $url.='/tags';
|
||
$buf=@gurl($url,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
|
||
if ($buf['cont']!==false) {
|
||
ckratelimit($buf['headers'],"«{$opts['hostname']}»");
|
||
$buf=@json_decode($buf['cont'],true);
|
||
if (is_array($buf)) {
|
||
eecho(1,'«'.$opts['hostname'].'»: got instance tags trends info from API v1 :-)'.N);
|
||
$idata['trends']=$buf;
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: instance tags trends from API v1 were not good JSON: '.$buf['emsg'].'.'.N);
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: could not fetch instance tags trends from API v1: '.$buf['emsg'].'.'.N);
|
||
}
|
||
}
|
||
|
||
// finished fetching
|
||
|
||
if (!is_null($idata['IsMastodon'])) $idata['IsMastodon']=b2i($idata['IsMastodon']);
|
||
($instanswered) ? $idata['WasLastCheckOk']=1 : $idata['WasLastCheckOk']=0;
|
||
|
||
if (is_null($oidata)) {
|
||
$query='INSERT INTO Instances SET ';
|
||
$idata['InsertTS']=$now;
|
||
$idata['TotChecks']=1;
|
||
if ($instanswered) {
|
||
$idata['FirstSeen']=$now;
|
||
$idata['LastOkCheckTS']=$now;
|
||
$idata['OkChecks']=1;
|
||
} else {
|
||
$idata['Thumb']='unavailable';
|
||
$idata['AdmAvatar']='unavailable';
|
||
$idata['OkChecks']=0;
|
||
}
|
||
} else {
|
||
$query='UPDATE Instances SET ';
|
||
($instanswered && is_null($oidata['FirstSeen'])) ? $idata['FirstSeen']=$now : $idata['FirstSeen']=$oidata['FirstSeen'];
|
||
($instanswered) ? $idata['LastOkCheckTS']=$now : $idata['LastOkCheckTS']=$oidata['LastOkCheckTS'];
|
||
$idata['TotChecks']=$oidata['TotChecks']+1;
|
||
$idata['OkChecks']=$oidata['OkChecks'];
|
||
if ($instanswered) $idata['OkChecks']++;
|
||
$idata['Priority']=$oidata['Priority'];
|
||
$idata['Visible']=$oidata['Visible'];
|
||
$idata['Noxious']=$oidata['Noxious'];
|
||
$idata['NoxReason']=$oidata['NoxReason'];
|
||
$idata['NoxLastModTS']=$oidata['NoxLastModTS'];
|
||
$idata['OurDesc']=$oidata['OurDesc'];
|
||
$idata['OurDescEN']=$oidata['OurDescEN'];
|
||
$idata['LocalityID']=$oidata['LocalityID'];
|
||
$idata['OurLangsLock']=$oidata['OurLangsLock'];
|
||
$idata['GuestID']=$oidata['GuestID'];
|
||
$idata['LastGuestEdit']=$oidata['LastGuestEdit'];
|
||
$idata['InsertTS']=$oidata['InsertTS'];
|
||
$idata['RPos']=$oidata['RPos'];
|
||
if (!$instanswered) {
|
||
$idata['IsMastodon']=$oidata['IsMastodon'];
|
||
$idata['Title']=$oidata['Title'];
|
||
$idata['ShortDesc']=$oidata['ShortDesc'];
|
||
$idata['LongDesc']=$oidata['LongDesc'];
|
||
$idata['Email']=$oidata['Email'];
|
||
$idata['Software']=$oidata['Software'];
|
||
$idata['Version']=$oidata['Version'];
|
||
$idata['UserCount']=$oidata['UserCount'];
|
||
$idata['StatusCount']=$oidata['StatusCount'];
|
||
$idata['DomainCount']=$oidata['DomainCount'];
|
||
$idata['ActiveUsersMonth']=$oidata['ActiveUsersMonth'];
|
||
$idata['ActiveUsersHalfYear']=$oidata['ActiveUsersHalfYear'];
|
||
$idata['Thumb']='unavailable';
|
||
$idata['RegOpen']=$oidata['RegOpen'];
|
||
$idata['RegReqApproval']=$oidata['RegReqApproval'];
|
||
$idata['MaxTootChars']=$oidata['MaxTootChars'];
|
||
$idata['AdmAccount']=$oidata['AdmAccount'];
|
||
$idata['AdmDisplayName']=$oidata['AdmDisplayName'];
|
||
$idata['AdmCreatedAt']=$oidata['AdmCreatedAt'];
|
||
$idata['AdmNote']=$oidata['AdmNote'];
|
||
$idata['AdmURL']=$oidata['AdmURL'];
|
||
$idata['AdmAvatar']='unavailable';
|
||
$idata['AdmHeader']=$oidata['AdmHeader'];
|
||
$idata['PublicBlocksList']=$oidata['PublicBlocksList'];
|
||
$idata['Threads']=$oidata['Threads'];
|
||
}
|
||
}
|
||
|
||
$set=[];
|
||
|
||
foreach ($idata as $key=>$val) {
|
||
if (in_array($key,['ID','languages','rules','activity','trends','blocks'])) {
|
||
true;// do nothing
|
||
} elseif (is_null($val)) {
|
||
$set[]=$key.'=NULL';
|
||
} elseif (is_int($val)) {
|
||
$set[]=$key.'='.truncn($val, 'Instances', $key, '«'.$opts['hostname'].'»');
|
||
} elseif (is_string($val)) {
|
||
$set[]=$key.'=\''.myesc($link,truncs($val, 'Instances', $key, '«'.$opts['hostname'].'»')).'\'';
|
||
} else {
|
||
mexit('$idata[\''.$key.'\'] value has unmanaged type, see code around line '.__LINE__.'.'.N,3);
|
||
}
|
||
}
|
||
|
||
$query.=implode(', ',$set);
|
||
|
||
if (!is_null($oidata)) $query.=' WHERE ID='.$oidata['ID'];
|
||
|
||
eecho(1,'query: «'.$query.'».'.N);
|
||
if (!$opts['dryrun']) {
|
||
if (!is_null($oidata) || $instanswered) {
|
||
myq($link,$query,__LINE__);
|
||
} else {
|
||
mexit('«'.$opts['hostname'].'»: not inserting unknown instance because it did not respond; shutting down after '.ght(time()-$now,null,0).' :-)'.N,0);
|
||
}
|
||
}
|
||
|
||
if (is_null($oidata)) {
|
||
(!$opts['dryrun']) ? $instid=mysqli_insert_id($link) : $instid=0;
|
||
notify('«<a href="viewinst.php?id='.$instid.'">'.$opts['hostname'].'</a>» is a NEW instance! :-)',1);
|
||
} else {
|
||
$instid=$oidata['ID'];
|
||
}
|
||
|
||
// from here we know for sure $instid
|
||
|
||
if (!$opts['dryrun']) myq($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$instid.', '.$now.', '.$idata['WasLastCheckOk'].')',__LINE__);
|
||
|
||
if ($instanswered && isset($idata['languages']) && is_array($idata['languages']) && count($idata['languages'])>0) {
|
||
eecho(1,'«'.$opts['hostname'].'»: declared languages: '.implode(', ',$idata['languages']).N);
|
||
if (!$opts['dryrun'])
|
||
myq($link,'DELETE FROM InstLangs WHERE InstID='.$instid,__LINE__);
|
||
$langids=getlangsidsarr($idata['languages'],$supplangs,$link,$opts['hostname'],$opts['dryrun'],__LINE__);
|
||
if (!$opts['dryrun']) {
|
||
$pos=0;
|
||
foreach ($langids as $langid) {
|
||
$pos++;
|
||
myq($link,'INSERT INTO InstLangs SET InstID='.$instid.', LangID='.$langid.', Pos='.$pos,__LINE__);
|
||
}
|
||
}
|
||
if (!is_null($oidata) && $oidata['OurLangsLock']==1) {
|
||
eecho(1,'«'.$opts['hostname'].'»: won’t touch “our languages” because they are locked.'.N);
|
||
} else {
|
||
// we try to detect languages only if first declared language (the only one currently definable by admins)
|
||
// is equal to the default "en", otherwise we assume it's been set to the actual mostly used language on the instance
|
||
if ($idata['languages'][0]=='en') {
|
||
$idata['ourlanguages']=get_instance_langs($opts['hostname']);
|
||
if (count($idata['ourlanguages'])>0) {
|
||
eecho(1,'«'.$opts['hostname'].'»: detected languages: '.implode(', ',$idata['ourlanguages']).N);
|
||
} else {
|
||
$idata['ourlanguages']=$idata['languages'];
|
||
eecho(1,'«'.$opts['hostname'].'»: detected languages: NONE; copied declared languages to detected languages.'.N);
|
||
}
|
||
} else {
|
||
$idata['ourlanguages']=$idata['languages'];
|
||
eecho(1,'«'.$opts['hostname'].'»: copied declared languages to detected languages.'.N);
|
||
}
|
||
if (!$opts['dryrun'])
|
||
myq($link,'DELETE FROM InstOurLangs WHERE InstID='.$instid,__LINE__);
|
||
$langids=getlangsidsarr($idata['ourlanguages'],$supplangs,$link,$opts['hostname'],$opts['dryrun'],__LINE__);
|
||
if (!$opts['dryrun']) {
|
||
$pos=0;
|
||
foreach ($langids as $langid) {
|
||
$pos++;
|
||
myq($link,'INSERT INTO InstOurLangs SET InstID='.$instid.', OurLangID='.$langid.', Pos='.$pos,__LINE__);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if ($instanswered && !$opts['dryrun'])
|
||
myq($link,'DELETE FROM InstActivity WHERE InstID='.$instid,__LINE__);
|
||
if (isset($idata['activity']) && is_array($idata['activity'])) {
|
||
$pos=0;
|
||
foreach ($idata['activity'] as $buf) {
|
||
// these should all be int, but mastodon represents them as strings
|
||
if (isset($buf['week']) && is_string($buf['week']) && preg_match('/^\d+$/',$buf['week'])===1 && isset($buf['statuses']) && is_string($buf['statuses']) && preg_match('/^\d+$/',$buf['statuses'])===1 && isset($buf['logins']) && is_string($buf['logins']) && preg_match('/^\d+$/',$buf['logins'])===1 && isset($buf['registrations']) && is_string($buf['registrations']) && preg_match('/^\d+$/',$buf['registrations'])===1) {
|
||
$pos++;
|
||
if (!$opts['dryrun'])
|
||
myq($link,'INSERT INTO InstActivity (InstID, Week, Statuses, Logins, Registrations, Pos) VALUES ('.$instid.', '.$buf['week'].', '.$buf['statuses'].', '.$buf['logins'].', '.$buf['registrations'].', '.$pos.')',__LINE__);
|
||
}
|
||
}
|
||
}
|
||
|
||
if ($instanswered && !$opts['dryrun'])
|
||
myq($link,'DELETE FROM InstTrends WHERE InstID='.$instid,__LINE__);
|
||
if (isset($idata['trends']) && is_array($idata['trends'])) {
|
||
$trends=[];
|
||
foreach ($idata['trends'] as $buf) {
|
||
if (isset($buf['name']) && is_string($buf['name']) && isset($buf['url']) && is_string($buf['url']) && isset($buf['history']) && is_array($buf['history'])) {
|
||
$trend=0;
|
||
foreach ($buf['history'] as $row) {
|
||
// below, we check for "stringness" because, they should be integers, but they are strings
|
||
if (isset($row['day']) && is_string($row['day']) && preg_match('/^\d+$/',$row['day'])===1 && isset($row['uses']) && is_string($row['uses']) && preg_match('/^\d+$/',$row['uses'])===1 && isset($row['accounts']) && is_string($row['accounts']) && preg_match('/^\d+$/',$row['accounts'])===1) {
|
||
$row['day']+=0;
|
||
$row['uses']+=0;
|
||
$row['accounts']+=0;
|
||
$trend+=$row['accounts'];
|
||
}
|
||
}
|
||
}
|
||
$trends[]=[
|
||
'InstID'=>$instid,
|
||
'LastDay'=>$buf['history'][0]['day'],
|
||
'Name'=>$buf['name'],
|
||
'URL'=>$buf['url'],
|
||
'Pos'=>null,
|
||
'trend'=>$trend
|
||
];
|
||
}
|
||
//print_r($trends);
|
||
mdasortbykey($trends,'trend',true);
|
||
$pos=0;
|
||
foreach ($trends as $trend) {
|
||
$pos++;
|
||
$query='INSERT INTO InstTrends (InstID, LastDay, Name, URL, Pos) VALUES ('.$trend['InstID'].', \''.$trend['LastDay'].'\', \''.myesc($link, truncs($trend['Name'], 'InstTrends', 'Name', '«'.$opts['hostname'].'»')).'\', \''.myesc($link, truncs($trend['URL'], 'InstTrends', 'URL', '«'.$opts['hostname'].'»')).'\', '.$pos.')';
|
||
if (!$opts['dryrun'])
|
||
myq($link,$query,__LINE__);
|
||
}
|
||
}
|
||
|
||
if (isset($idata['rules']) && is_array($idata['rules'])) {
|
||
ksort($idata['rules']);
|
||
if (!$opts['dryrun']) {
|
||
myq($link,'DELETE FROM InstRules WHERE InstID='.$instid,__LINE__);
|
||
foreach ($idata['rules'] as $rule)
|
||
myq($link,'INSERT INTO InstRules SET InstID='.$instid.', Text=\''.myesc($link, truncs($rule, 'InstRules', 'Text', '«'.$opts['hostname'].'»')).'\'',__LINE__);
|
||
}
|
||
}
|
||
|
||
if ($instanswered && !$opts['dryrun'] && isset($idata['blocks'])) {
|
||
myq($link,'DELETE FROM InstBlocks WHERE InstID='.$instid,__LINE__);
|
||
foreach ($idata['blocks'] as $block) {
|
||
(is_null($block['comm'])) ? $block['comm']='NULL' : $block['comm']="'".myesc($link, truncs($block['comm'], 'InstBlocks', 'Comment', '«'.$opts['hostname'].'»'))."'";
|
||
if (!$opts['dryrun'])
|
||
myq($link,'INSERT INTO InstBlocks SET InstID='.$instid.', Domain=\''.myesc($link, truncs($block['dom'], 'InstBlocks', 'Domain', '«'.$opts['hostname'].'»')).'\', Severity=\''.myesc($link, truncs($block['sev'], 'InstBlocks', 'Severity', '«'.$opts['hostname'].'»')).'\', Comment='.$block['comm'],__LINE__);
|
||
}
|
||
}
|
||
|
||
if ($opts['_sendtoot'] && $instanswered && $idata['IsMastodon'] && $idata['FirstSeen']==$now) {
|
||
$lc=['decimal_point'=>'.', 'thousands_sep'=>','];
|
||
gettlds();
|
||
eecho(0,'«'.$opts['hostname'].'»: this instance is new, trying to send an announcement toot about it...'.N);
|
||
$endpoint='https://'.$opts['bothost'].'/api/v1/statuses';
|
||
$toot='A new Mastodon instance, https://'.$opts['hostname'].', has been found by the crawler at https://mastodon.help'.N.N.'Name: ';
|
||
(is_null($idata['Title'])) ? $toot.='unspecified' : $toot.=$idata['Title'];
|
||
$toot.=N.N.'Languages: ';
|
||
if (array_key_exists('languages',$idata) && is_array($idata['languages']) && count($idata['languages'])>0 && $idata['languages'][0]!='en')
|
||
$toot.=implode($idata['languages']);
|
||
elseif (array_key_exists('ourlanguages',$idata) && is_array($idata['ourlanguages']) && count($idata['ourlanguages'])>0)
|
||
$toot.=implode($idata['ourlanguages']);
|
||
else
|
||
$toot.='unspecified/undetectable';
|
||
$toot.=N.N.'Users: ';
|
||
(is_null($idata['UserCount'])) ? $toot.='unspecified' : $toot.=fnum($idata['UserCount'],$lc);
|
||
$toot.=N.N.'Max. characters per post: ';
|
||
(is_null($idata['MaxTootChars']) || $idata['MaxTootChars']==500) ? $toot.='500' : $toot.=fnum($idata['MaxTootChars'],$lc);
|
||
$toot.=N.N.'Registrations: ';
|
||
if (is_null($idata['RegOpen'])) {
|
||
$toot.='unspecified';
|
||
} elseif ($idata['RegOpen']==1) {
|
||
$toot.='open';
|
||
if ($idata['RegReqApproval']==1)
|
||
$toot.=' (require admins approval)';
|
||
elseif ($idata['RegReqApproval']==0)
|
||
$toot.=' (don’t require admins approval)';
|
||
} elseif ($idata['RegOpen']==0) {
|
||
$toot.='closed';
|
||
}
|
||
$toot.=N.N.'Short description: ';
|
||
(is_null($idata['ShortDesc'])) ? $toot.='unspecified' : $toot.=$idata['ShortDesc'];
|
||
$toot.=N.N.'Long description: ';
|
||
(is_null($idata['LongDesc'])) ? $toot.='unspecified' : $toot.=html2text($idata['LongDesc']);
|
||
if (postlen($toot)>$opts['botmaxchars']) {
|
||
while (postlen($toot)+13>$opts['botmaxchars'])
|
||
$toot=preg_replace('#\s+(\S|\n)+$#u','',$toot);
|
||
$toot.=' [continues…]';
|
||
}
|
||
$context=[
|
||
'http'=>[
|
||
'header'=>'Authorization: Bearer '.$opts['bottoken'].RN.
|
||
'Idempotency-Key'.md5($opts['bothost'].$now.rand(1000,9999)).RN.
|
||
'Content-type: application/x-www-form-urlencoded'.RN,
|
||
'method'=>'POST',
|
||
'content'=>http_build_query([
|
||
'status'=>$toot,
|
||
'visibility'=>'public',
|
||
'language'=>'en'
|
||
]),
|
||
'timeout'=>$opts['conntimeout']
|
||
]
|
||
];
|
||
$context=stream_context_create($context);
|
||
$res=@file_get_contents($endpoint,false,$context);
|
||
if ($res===false) {
|
||
eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, could not connect to «'.$opts['bothost'].'».'.N);
|
||
} else {
|
||
$httprsc=gethttpcode($http_response_header);
|
||
$res=@json_decode($res,true);
|
||
if ($httprsc!=200)
|
||
eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, «'.$opts['bothost'].'» has returned HTTP code «'.$httprsc.'».'.N);
|
||
elseif ($res===false)
|
||
eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, «'.$opts['bothost'].'» has not returned valid JSON data.'.N);
|
||
else
|
||
eecho(1,'«'.$opts['hostname'].'»: correctly posted announcement about this new instance on «'.$opts['bothost'].'» :-)'.N);
|
||
}
|
||
}
|
||
|
||
if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4.0.0') {
|
||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch users info from directory API...'.N);
|
||
$users=[];// array of users in this instance's directory
|
||
$chunk=0;
|
||
$limit=40;
|
||
$end=false;
|
||
while (!$end) {
|
||
$offset=$chunk*$limit;
|
||
for ($att=0; $att<$opts['udiratts']; $att++) {
|
||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch chunk '.($chunk+1).' of users info from directory API (attempt '.($att+1).'/'.$opts['udiratts'].')...'.N);
|
||
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
|
||
if ($buf['cont']!==false) {
|
||
$xrlr=ckratelimit($buf['headers'],"«{$opts['hostname']}»");
|
||
eecho(1,'«'.$opts['hostname'].'»: got chunk '.($chunk+1).' of users info from directory API on attempt '.($att+1).'/'.$opts['udiratts'].' (xrlr: '.$xrlr.') :-)'.N);
|
||
$buf=@json_decode($buf['cont'],true);
|
||
if (is_array($buf)) {
|
||
//print_r($buf);
|
||
if (count($buf)<$limit) $end=true;
|
||
/*if (count($buf)>0 && !array_key_exists('noindex',$buf[0])) {
|
||
eecho(2,'«'.$opts['hostname'].'»: account entities reported by directory api endpoint don’t have a “noindex” attribute; skipping directory fetching.'.N);
|
||
break;
|
||
} else {
|
||
eecho(0,'«'.$opts['hostname'].'»: account entities reported by directory api endpoint do have a “noindex” attribute; continuing with directory fetching.'.N);
|
||
}*/
|
||
//foreach ($buf as $user) echo($user['username'].' '); echo(N.N);
|
||
foreach ($buf as $user) {
|
||
if (make(['id', 'username', 'display_name', 'locked', 'bot', 'discoverable', 'created_at', 'note', 'url', 'avatar', 'header', 'statuses_count', 'last_status_at', 'fields', 'noindex'], $user)) {
|
||
eecho(0,'«'.$opts['hostname'].'»: working on user «'.$user['username'].'»...'.N);
|
||
// disabled because it takes too long on instances with many users; that's why we added "$idata['Version']>='4.0.0'" as a condition to the root "if" statement and "noindex" to the checked keys in the "if" statement above (ver. >= 4.0.0 do report "noindex" for account entities)
|
||
/*if (!isset($user['noindex'])) {
|
||
$user['noindex']=true;
|
||
eecho(0,'«'.$opts['hostname'].'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it by fetching user’s profile page...'.N);
|
||
$page=gurl($user['url'],$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
|
||
// here ckratelimit is not needed because it's a normal web page, not json from mastodon api
|
||
if ($page['cont']!==false) {
|
||
//<meta content='noindex, noarchive' name='robots'>
|
||
if (preg_match('/<meta\s+content=[\'"](noindex|noarchive)/ui',$page['cont'])!==1) {
|
||
$user['noindex']=false;
|
||
eecho(0,'«'.$user['url'].'»: «noindex» is not set.'.N);
|
||
} else {
|
||
eecho(0,'«'.$user['url'].'»: «noindex» is set.'.N);
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: could not fetch «'.$user['url'].'»: '.$page['emsg'].N);
|
||
}
|
||
}*/
|
||
$snote=strip_tags($user['note']);
|
||
if (preg_match('/(?<!\w)#(nobots?|noindex)(?!\w)/iu',$snote)===1) $user['noindex']=true;
|
||
if (preg_match('/(?<!\w)#(okindex|yesindex|doindex|okmhindex)(?!\w)/iu',$snote)===1) $user['noindex']=false;
|
||
// disabled; see previous comment
|
||
/*$user['tags']=[];
|
||
if (!$user['noindex'] && !is_null($idata['Version']) && $idata['Version']>='3.3.0') {
|
||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch tags for user «'.$user['username'].'»...'.N);
|
||
$tags=@gurl('https://'.$opts['hostname'].'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
|
||
if ($tags['cont']!==false) {
|
||
ckratelimit($tags['headers'],"«{$opts['hostname']}»");
|
||
$tags=@json_decode($tags['cont'],true);
|
||
if (is_array($tags) && count($tags)>0) {
|
||
eecho(1,'«'.$opts['hostname'].'»: got '.count($tags).' tag(s) for user «'.$user['username'].'» :-)'.N);
|
||
foreach($tags as $tag) $user['tags'][]=$tag['name'];
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: could not fetch tags for user «'.$user['username'].'» :-( ('.$tags['emsg'].').'.N);
|
||
}
|
||
}
|
||
$user['tags']=implode(';',$user['tags']);
|
||
if ($user['tags']=='') $user['tags']=null;*/
|
||
$user['tags']=null;
|
||
if (!is_null($user['created_at'])) $user['created_at']=strtotime($user['created_at']);
|
||
if (!is_null($user['last_status_at'])) $user['last_status_at']=datetots($user['last_status_at']);
|
||
$users[$user['id']]=$user;
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: user record missed some required keys :-('.N);
|
||
//print_r($user);
|
||
}
|
||
}
|
||
break;
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: ... but the chunk was not good JSON :-('.N);
|
||
if ($att==$opts['udiratts']-1) $end=true;
|
||
}
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: could not fetch chunk '.($chunk+1).' of users info from directory API: '.$buf['emsg'].N);
|
||
if ($att==$opts['udiratts']-1) {
|
||
eecho(2,'«'.$opts['hostname'].'»: last attempt ('.($att+1).'/'.$opts['udiratts'].') on chunk '.($chunk+1).' failed; i give up.'.N);
|
||
$end=true;
|
||
} else {
|
||
eecho(2,'«'.$opts['hostname'].'»: attempt '.($att+1).'/'.$opts['udiratts'].' on chunk '.($chunk+1).' failed; sleeping for '.ght($opts['udirfailst'],null,0).' before retrying.'.N);
|
||
sleep($opts['udirfailst']);
|
||
}
|
||
}
|
||
}
|
||
$chunk++;
|
||
}
|
||
$totusers=count($users);
|
||
eecho(1,'«'.$opts['hostname'].'»: got '.$totusers.' users’ profiles.'.N);
|
||
if ($totusers>0) {
|
||
eecho(1,'«'.$opts['hostname'].'»: inserting/updating '.$totusers.' users’ profiles in the database.'.N);
|
||
$exusers=[];// array of this instance's users already existing in the db
|
||
$res=myq($link,'SELECT ID, locid, username FROM Users WHERE InstID='.$instid,__LINE__);
|
||
while ($row=mysqli_fetch_assoc($res)) $exusers[$row['locid']]=$row;
|
||
foreach ($users as $locid=>$user) {
|
||
$query='SET InstID='.$instid.', host='.myv($link,$opts['hostname']).', locid='.myv($link,$user['id']).', username='.myv($link,truncs($user['username'], 'Users', 'username', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', display_name='.myv($link,truncs($user['display_name'], 'Users', 'display_name', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', locked='.myv($link,$user['locked']).', bot='.myv($link,$user['bot']).', created_at='.myv($link,$user['created_at']).', note='.myv($link,truncs($user['note'], 'Users', 'note', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', url='.myv($link,truncs($user['url'], 'Users', 'url', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', avatar='.myv($link,truncs($user['avatar'], 'Users', 'avatar', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', header='.myv($link,truncs($user['header'], 'Users', 'header', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', statuses_count='.myv($link,$user['statuses_count']).', last_status_at='.myv($link,$user['last_status_at']).', tags='.myv($link,truncs($user['tags'], 'Users', 'tags', '«'.$opts['hostname'].'»: «'.$user['username'].'»'));
|
||
$uid=0;
|
||
if (!array_key_exists($user['id'],$exusers)) {
|
||
if (!$user['noindex']) {
|
||
eecho(0,'«'.$opts['hostname'].'»: inserting new user «'.$user['username'].'»...'.N);
|
||
$query='INSERT INTO Users '.$query;
|
||
if (!$opts['dryrun']) {
|
||
myq($link,$query,__LINE__);
|
||
$uid=mysqli_insert_id($link);
|
||
} else {
|
||
$uid=0;
|
||
}
|
||
} else {
|
||
eecho(0,'«'.$opts['hostname'].'»: NOT inserting user «'.$user['username'].'» because they don’t want to be indexed...'.N);
|
||
}
|
||
} else {
|
||
$uid=$exusers[$locid]['ID'];
|
||
if (!$user['noindex']) {
|
||
eecho(0,'«'.$opts['hostname'].'»: updating existing user «'.$user['username'].'» ('.$uid.')...'.N);
|
||
$query='UPDATE Users '.$query.' WHERE ID='.$uid;
|
||
} else {
|
||
eecho(0,'«'.$opts['hostname'].'»: deleting existing user «'.$user['username'].'» ('.$uid.') because they don’t want to be indexed...'.N);
|
||
$query='DELETE FROM Users WHERE ID='.$uid;
|
||
}
|
||
if (!$opts['dryrun']) {
|
||
myq($link,$query,__LINE__);
|
||
myq($link,'DELETE FROM UsersFields WHERE UserID='.$uid,__LINE__);
|
||
}
|
||
}
|
||
if ($uid!=0 && !$user['noindex'] && is_array($user['fields']) && count($user['fields'])>0) {
|
||
eecho(0,'«'.$opts['hostname'].'»: saving user fields for user «'.$user['username'].'» ('.$uid.')...'.N);
|
||
foreach ($user['fields'] as $field) {
|
||
(is_null($field['verified_at'])) ? $field['verified_at']=0 : $field['verified_at']=1;
|
||
$field['name']=truncs($field['name'],'UsersFields','name','«'.$opts['hostname'].'»: «'.$user['username'].'»');
|
||
$field['value']=truncs($field['value'],'UsersFields','value','«'.$opts['hostname'].'»: «'.$user['username'].'»');
|
||
if (!$opts['dryrun']) myq($link,'INSERT INTO UsersFields SET UserID='.$uid.', name='.myv($link,$field['name']).', value='.myv($link,$field['value']).', verified='.$field['verified_at'],__LINE__);
|
||
}
|
||
}
|
||
}
|
||
eecho(1,'«'.$opts['hostname'].'»: deleting possible users’ profiles which are in the database but no longer in the directory.'.N);
|
||
foreach ($exusers as $locid=>$exuser) {
|
||
if (!array_key_exists($locid,$users)) {
|
||
eecho(0,'«'.$opts['hostname'].'»: user «'.$exusers[$locid]['username'].'» opted out of the directory, deleting their record ('.$exuser['ID'].')...'.N);
|
||
if (!$opts['dryrun']) {
|
||
myq($link,'DELETE FROM Users WHERE ID='.$exuser['ID'],__LINE__);
|
||
myq($link,'DELETE FROM UsersFields WHERE UserID='.$exuser['ID'],__LINE__);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
mexit('«'.$opts['hostname'].'»: done in '.ght(time()-$now,null,0).' :-)'.N,0);
|
||
|
||
|
||
// functions
|
||
|
||
function myq(&$link,$query,$line) {
|
||
try {
|
||
$res=mysqli_query($link,$query);
|
||
}
|
||
catch (Exception $error) {
|
||
mexit('query «'.$query.'» on line '.$line.' failed: '.$error->getMessage().' ('.$error->getCode().').'.N,3);
|
||
}
|
||
// for php versions < 8, which seem to not catch mysql exceptions
|
||
if ($res===false) mexit('query «'.$query.'» on line '.$line.' failed: '.mysqli_error($link).' ('.mysqli_errno($link).').'.N,3);
|
||
return($res);
|
||
}
|
||
|
||
function eecho($lev,$msg) {
|
||
global $opts, $msglevs;
|
||
$time=microtime(false);
|
||
$time=explode(' ',$time);
|
||
$time=date('Y-m-d H:i:s',$time[1]).'.'.substr($time[0],2);
|
||
$msg=$time.' '.$msglevs[$lev].': '.$msg;
|
||
if ($lev>=$opts['minmsgimplev']) {
|
||
if ($lev<2)
|
||
echo($msg);
|
||
else
|
||
fwrite(STDERR,$msg);
|
||
}
|
||
}
|
||
|
||
function mexit($msg,$code) {
|
||
global $link;
|
||
if (isset($link) && $link!==false) mysqli_close($link);
|
||
if ($code!=0)
|
||
eecho(3,$msg);
|
||
else
|
||
eecho(1,$msg);
|
||
exit($code);
|
||
}
|
||
|
||
function setint($keys,&$arr) {
|
||
foreach ($keys as $key)
|
||
if (!is_null($arr[$key]))
|
||
$arr[$key]=$arr[$key]+0;
|
||
}
|
||
|
||
function willtrunc($val,$tab,$col) {
|
||
global $tables, $iswin;
|
||
if ($iswin) $tab=strtolower($tab);
|
||
if (is_string($val) && mb_strlen($val,'UTF-8')>$tables[$tab][$col]) return(true);
|
||
if (is_int($val) && ($val<$tables[$tab][$col]['min'] || $val>$tables[$tab][$col]['max'])) return(true);
|
||
return(false);
|
||
}
|
||
|
||
function truncs($str,$tab,$col,$ctx) {
|
||
global $tables, $iswin;
|
||
if (is_null($str)) return(null);
|
||
if ($iswin)
|
||
$tab=strtolower($tab);
|
||
$ostr=$str;
|
||
if (mb_strlen($str,'UTF-8')>$tables[$tab][$col])
|
||
$str=strip_tags($str);
|
||
if (mb_strlen($str,'UTF-8')>$tables[$tab][$col])
|
||
$str=mb_substr($str,0,$tables[$tab][$col]-1,'UTF-8').'…';
|
||
if ($str!=$ostr)
|
||
notify($ctx.': had to shrink input string to '.$tables[$tab][$col].' chars to be able to insert it into «'.$col.'» column in «'.$tab.'» table.',2);
|
||
return $str;
|
||
}
|
||
|
||
function truncn($num,$tab,$col,$ctx) {
|
||
global $tables, $iswin;
|
||
if ($iswin)
|
||
$tab=strtolower($tab);
|
||
if (is_numeric($num)) {
|
||
if ($num>$tables[$tab][$col]['max']) {
|
||
notify($ctx.': had to ceil «'.$num.'» to «'.$tables[$tab][$col]['max'].'», ie the maximum value it can have in column «'.$col.'» of table «'.$tab.'».',2);
|
||
$num=$tables[$tab][$col]['max'];
|
||
} elseif ($num<$tables[$tab][$col]['min']) {
|
||
notify($ctx.': had to floor «'.$num.'» to «'.$tables[$tab][$col]['min'].'», ie the minimum value it can have in column «'.$col.'» of table «'.$tab.'»).',2);
|
||
$num=$tables[$tab][$col]['min'];
|
||
}
|
||
} else {
|
||
notify($ctx.': function «truncn»: expecting a number, got something else; returning «0».',3);
|
||
$num=0;
|
||
}
|
||
return $num;
|
||
}
|
||
|
||
function nocrnl($str) {
|
||
return(str_replace(["\r","\n"],['\\r','\\n'],$str));
|
||
}
|
||
|
||
function b2i($bool) {
|
||
($bool) ? $r=1 : $r=0;
|
||
return $r;
|
||
}
|
||
|
||
function isempty($str) {
|
||
(preg_match('/^\s*$/',$str)===1) ? $r=true : $r=false;
|
||
return $r;
|
||
}
|
||
|
||
function notify($msg,$lev,$doecho=true) {
|
||
// "$lev" is to be thought of as "$lev" param of function "eecho": 0=debug, 1=info, 2=warning, 3=error
|
||
global $link, $tables, $iswin, $opts;
|
||
if ($doecho) eecho($lev,'*notification*: '.mb_lcfirst(strip_tags($msg)).N);
|
||
if (!$opts['dryrun']) {
|
||
($iswin) ? $tab='notifications' : $tab='Notifications';
|
||
myq($link,'INSERT INTO Notifications (ID, Notification, Severity, Microtime, Seen, Deleted) VALUES (NULL, \''.myesc($link,mb_substr($msg,0,$tables[$tab]['Notification'],'UTF-8')).'\', '.$lev.', \''.microtime(true).'\', 0, 0)',__LINE__);
|
||
}
|
||
}
|
||
|
||
function mdasortbykey(&$arr,$key,$rev=false) {
|
||
$karr=[];
|
||
foreach ($arr as $akey=>$subarr)
|
||
$karr[$subarr[$key]]=[$akey,$subarr];
|
||
if (!$rev)
|
||
ksort($karr);
|
||
else
|
||
krsort($karr);
|
||
$arr=[];
|
||
foreach ($karr as $akey=>$subarr)
|
||
$arr[$subarr[0]]=$subarr[1];
|
||
}
|
||
|
||
// "multi array_key_exists"
|
||
function make($keys,&$arr) {
|
||
foreach ($keys as $key)
|
||
if (!array_key_exists($key,$arr))
|
||
return false;
|
||
return true ;
|
||
}
|
||
|
||
function myv(&$link,$var) {
|
||
if (is_null($var)) {
|
||
return('NULL');
|
||
} elseif (is_bool($var)) {
|
||
if ($var)
|
||
return('1');
|
||
else
|
||
return('0');
|
||
} elseif (trim($var)=='') {
|
||
return('NULL');
|
||
} else {
|
||
return('\''.mysqli_real_escape_string($link,$var).'\'');
|
||
}
|
||
}
|
||
|
||
function datetots($date) {
|
||
$date=explode('-',$date);
|
||
return(mktime(0,0,0,$date[1],$date[2],$date[0]));
|
||
}
|
||
|
||
function ckratelimit($headers,$ctx) {
|
||
$headers=explode("\r\n",$headers);
|
||
$buff=[];
|
||
array_shift($headers);
|
||
foreach ($headers as $header)
|
||
if (preg_match('/^([^:]+):(.*)$/Uu',$header,$matches)===1)
|
||
$buff[strtolower($matches[1])]=trim($matches[2]);
|
||
$headers=$buff;
|
||
if (isset($headers['date']) && isset($headers['x-ratelimit-reset']) && isset($headers['x-ratelimit-remaining'])) {
|
||
if ($headers['x-ratelimit-remaining']==0) {
|
||
$stosl=strtotime($headers['x-ratelimit-reset'])-strtotime($headers['date'])+1;
|
||
eecho(2,$ctx.': reached rate limit, sleeping for '.ght($stosl,null,0).' ...'.N);
|
||
sleep($stosl);
|
||
}
|
||
return($headers['x-ratelimit-remaining']);
|
||
} else {
|
||
$missing=[];
|
||
if (!isset($headers['date'])) $missing[]='date';
|
||
if (!isset($headers['x-ratelimit-reset'])) $missing[]='x-ratelimit-reset';
|
||
if (!isset($headers['x-ratelimit-remaining'])) $missing[]='x-ratelimit-remaining';
|
||
eecho(2,$ctx.': ckratelimit: headers did not contain «'.implode('», «',$missing).'» item(s)!'.N);
|
||
return(false);
|
||
}
|
||
}
|
||
|
||
/** <LANGUAGE MANAGEMENT> */
|
||
/**
|
||
* Executes a call to Mastodon API.
|
||
*
|
||
* @param string $host Host to be called (e.g.: "mastodon.bida.im")
|
||
* @param string $path API path (e.g.: "/api/v1/timelines/public?local=true")
|
||
* @return mixed An array representing the JSON object as returned by json_decode, or NULL if the call fails
|
||
*/
|
||
function get_api($host, $path) {
|
||
global $opts;
|
||
$buf = @gurl('https://'.$host.$path,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
|
||
if ($buf['cont']!==false) {
|
||
ckratelimit($buf['headers'],"«{$host}»");
|
||
$data = json_decode($buf['cont'], true);
|
||
return $data;
|
||
} else {
|
||
return NULL;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Returns a list of known recognized languages, with the related probability, fot the toot that got passed to it
|
||
*
|
||
* @param mixed $toot The toot to be checked, as returned by the API
|
||
* @return array Associative array with language and related probability
|
||
*/
|
||
function get_toot_languages($toot) {
|
||
if (is_array($toot) && array_key_exists('language',$toot))
|
||
$l = $toot['language'];
|
||
else
|
||
$l = NULL;
|
||
$langs=[];
|
||
if($l !== NULL) {
|
||
// the language is explicitly set in the toot, so use that
|
||
$langs[$l] = 1;
|
||
} elseif (is_array($toot) && array_key_exists('content',$toot)) {
|
||
// the language is not explicitly set in the toot, so try and recognize it
|
||
$text = strip_tags($toot['content']);
|
||
$ld = new Language;
|
||
$langs = $ld->detect($text)->bestResults()->close();
|
||
}
|
||
// group derived languages into two-charactes language code (e.g.: "zh-CN" into "zh")
|
||
$grouped_langs = [];
|
||
foreach($langs as $key => $value) {
|
||
$l = explode("-", $key)[0];
|
||
if(array_key_exists($l, $grouped_langs)) {
|
||
$grouped_langs[$l] = max($grouped_langs[$l], $value);
|
||
} else {
|
||
$grouped_langs[$l] = $value;
|
||
}
|
||
}
|
||
return $grouped_langs;
|
||
}
|
||
|
||
/**
|
||
* Given the probability of a language for every toot, calculate the average
|
||
*
|
||
* @param array $detected_langs Array of mappings between language and probability
|
||
* @return array Mapping between language and probability
|
||
*/
|
||
function summary($detected_langs) {
|
||
$res = [];
|
||
foreach($detected_langs as $langs) {
|
||
foreach($langs as $l => $weight) {
|
||
if(!array_key_exists($l, $res)) {
|
||
$res[$l] = 0;
|
||
}
|
||
$res[$l] += $weight;
|
||
}
|
||
}
|
||
foreach($res as $l => $sumweight) {
|
||
$res[$l] = $sumweight / count($detected_langs);
|
||
}
|
||
return $res;
|
||
}
|
||
|
||
/**
|
||
* Helper function for usort: compares two arrays using the first element
|
||
*
|
||
* @param array $entry1 First array to be compared
|
||
* @param array $entry2 Second array to be compared
|
||
* @return number -1, 0 o 1 depening on $entry1[0] being less than, equal to or greater than $entry2[0]
|
||
*/
|
||
function sort_weights($entry1, $entry2) {
|
||
$w1 = $entry1[0];
|
||
$w2 = $entry2[0];
|
||
if ($w1 < $w2)
|
||
$ret=1;
|
||
elseif ($w1 == $w2)
|
||
$ret=0;
|
||
else
|
||
$ret=-1;
|
||
return $ret;
|
||
}
|
||
|
||
/**
|
||
* Given a language mapping, return a list of probable languages
|
||
*
|
||
* @param array $summary Map between language and probabilty
|
||
* @return string[] List of probable languages
|
||
*/
|
||
function get_languages($summary) {
|
||
$lst = [];
|
||
foreach($summary as $code => $weight) {
|
||
$lst[] = [$weight, $code];
|
||
}
|
||
usort($lst, 'sort_weights');
|
||
$languages = [];
|
||
$lastweight = 0;
|
||
foreach($lst as $entry) {
|
||
$l = $entry[1];
|
||
$weight = $entry[0];
|
||
if($weight < $lastweight * 2 / 3) {
|
||
break;
|
||
}
|
||
$languages[] = $l;
|
||
$lastweight = $weight;
|
||
}
|
||
return $languages;
|
||
}
|
||
|
||
/**
|
||
* Returns a list of probable languages for the given instance
|
||
*
|
||
* @param string $host Instance’s hostname (e.g.: "mastodon.bida.im")
|
||
* @return string[] List of probable languages
|
||
*/
|
||
function get_instance_langs($host) {
|
||
global $opts;
|
||
$data = get_api($host, '/api/v1/timelines/public?local=true&limit='.$opts['ldtoots']);
|
||
if($data == NULL) {
|
||
return [];
|
||
}
|
||
$detected_langs = array_map('get_toot_languages', $data);
|
||
$summary = summary($detected_langs);
|
||
$languages = get_languages($summary);
|
||
while (count($languages)>5)
|
||
array_pop($languages);
|
||
return $languages;
|
||
}
|
||
|
||
function getlangid(&$link,$lang,&$supplangs,$hostname,$dryrun,$line) {
|
||
$code=locale_canonicalize($lang);
|
||
if (preg_match('/^\s*$/',$lang)===1 || preg_match('/__/',$code)===1) {
|
||
notify('«'.$hostname.'»: «'.$lang.'» is not a valid language code, falling back to default «en».',2,true);
|
||
$code='en';
|
||
}
|
||
$res=myq($link,'SELECT * FROM Languages WHERE Code=\''.myesc($link,$code).'\'',$line);
|
||
$nrows=mysqli_num_rows($res);
|
||
$langs=[];
|
||
if ($nrows==0) {
|
||
$code=myesc($link,truncs($code,'Languages','Code','«'.$hostname.'»'));
|
||
$NameOrig=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,$code)),'Languages','NameOrig','«'.$hostname.'»'));
|
||
foreach ($supplangs as $key=>$val)
|
||
$langs[$key]=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,$key)),'Languages','Name'.strtoupper($key),'«'.$hostname.'»'));
|
||
/*$NamePt_BR=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'pt_BR')),'Languages','NamePT_BR','«'.$hostname.'»'));
|
||
$NameDe=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'de')),'Languages','NameDE','«'.$hostname.'»'));
|
||
$NameUk=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'uk')),'Languages','NameUK','«'.$hostname.'»'));
|
||
$NameCa=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'ca')),'Languages','NameCA','«'.$hostname.'»'));
|
||
$NameEn=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'en')),'Languages','NameEN','«'.$hostname.'»'));
|
||
$NameEs=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'es')),'Languages','NameES','«'.$hostname.'»'));
|
||
$NameFr=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'fr')),'Languages','NameFR','«'.$hostname.'»'));
|
||
$NameGl=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'gl')),'Languages','NameGL','«'.$hostname.'»'));
|
||
$NameIt=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'it')),'Languages','NameIT','«'.$hostname.'»'));
|
||
$q='INSERT INTO Languages (ID, Code, NameOrig, NamePT_BR, NameDE, NameUK, NameCA, NameEN, NameES, NameFR, NameGL, NameIT) VALUES (NULL, \''.$code.'\', \''.$NameOrig.'\', \''.$NamePt_BR.'\', \''.$NameDe.'\', \''.$NameUk.'\', \''.$NameCa.'\', \''.$NameEn.'\', \''.$NameEs.'\', \''.$NameFr.'\', \''.$NameGl.'\', \''.$NameIt.'\')';*/
|
||
$q='INSERT INTO Languages (ID, Code, NameOrig, ';
|
||
foreach ($langs as $key=>$val)
|
||
$q.='Name'.strtoupper($key).', ';
|
||
$q=substr($q,0,-2).') VALUES (NULL, \''.$code.'\', \''.$NameOrig.'\', ';
|
||
foreach ($langs as $key=>$val)
|
||
$q.='\''.$val.'\', ';
|
||
$q=substr($q,0,-2).')';
|
||
if (!$dryrun) {
|
||
myq($link,$q,$line);
|
||
$langid=mysqli_insert_id($link);
|
||
} else {
|
||
$langid=0;
|
||
}
|
||
} else {
|
||
if ($nrows>1) notify('In table Languages there are '.$nrows.' records with Code = «'.$code.'» :-(',2,true);
|
||
$row=mysqli_fetch_assoc($res);
|
||
$langid=$row['ID'];
|
||
}
|
||
return($langid);
|
||
}
|
||
|
||
function getlangsidsarr(&$langs,&$supplangs,&$link,$hostname,$dryrun,$line) {
|
||
$langids=[];
|
||
foreach ($langs as $lang) {
|
||
$langid=getlangid($link,$lang,$supplangs,$hostname,$dryrun,$line);
|
||
$langids[]=$langid;
|
||
}
|
||
$langids=array_unique($langids);
|
||
return($langids);
|
||
}
|
||
|
||
function waituntilonline() {
|
||
$url='www.google.com';
|
||
$gotoff=false;
|
||
while (false===($f=@fsockopen($url,80,$errno,$errstr,1))) {
|
||
$gotoff=true;
|
||
eecho(2,'it seems we are offline ('.$errno.': '.$errstr.'), waiting for 10 seconds before retrying...'.N);
|
||
sleep(5);
|
||
}
|
||
fclose($f);
|
||
if ($gotoff) eecho(1,'it seems we are back online! :-)'.N);
|
||
}
|
||
|
||
function headcb($arr) {
|
||
/* $lpad='';
|
||
$rpad='';
|
||
for ($i=0; $i<7-$arr[2]; $i++) {
|
||
$lpad.='[';
|
||
$rpad.=']';
|
||
}
|
||
return "{$lpad} ".mb_strtoupper($arr[4])." {$rpad}\n\n";*/
|
||
return '§ '.mb_strtoupper($arr[4])."\n\n";
|
||
}
|
||
|
||
function html2text($html,$keeplinks=false) {
|
||
$html=preg_replace('#\r#','',$html);
|
||
//$html=preg_replace('#\n|\r#','',$html);
|
||
if ($keeplinks) $html=preg_replace('#<a\s.*href="(.*)".*>#iU',' [link: $1] ',$html);
|
||
$html=preg_replace('#<li>#i',' • ',$html);
|
||
$html=preg_replace_callback('#(<h)(\d)(>)(.*)(</h\d>)#iU','headcb',$html);
|
||
$html=preg_replace('#<p>|<div>|<ul>|<ol>#i',"\n\n",$html);
|
||
$html=preg_replace('#</p>|</div>|</ul>|</ol>|</li>#i',"\n\n",$html);
|
||
$html=preg_replace('#<br>|<br />#i',"\n",$html);
|
||
$html=strip_tags($html);
|
||
$html=html_entity_decode($html,ENT_QUOTES,'UTF-8');
|
||
$html=preg_replace('#\s+\n#',"\n",$html);
|
||
$html=preg_replace('# +#',' ',$html);
|
||
$html=preg_replace('#\n+#',"\n\n",$html);
|
||
$html=trim($html)."\n";
|
||
return $html;
|
||
}
|
||
|
||
?>
|