MastodonHelp/web/clitools/getinstinfo.php

1232 lines
57 KiB
PHP
Raw Normal View History

2022-12-17 15:00:36 +01:00
#!/usr/bin/php
<?php
/*
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
define('N',"\n");
define('SNAME',basename(__FILE__));
define('LIBDP','/../site/mustard/include');
require(__DIR__.LIBDP.'/parsetime.php');
require(__DIR__.LIBDP.'/gurl.php');
require(__DIR__.LIBDP.'/tables.php');
require(__DIR__.LIBDP.'/mb_ucfirst.php');
require(__DIR__.LIBDP.'/mb_lcfirst.php');
2022-12-17 15:00:36 +01:00
require(__DIR__.LIBDP.'/ghs.php');
require(__DIR__.LIBDP.'/ght.php');
require(__DIR__.'/lib/vendor/autoload.php');
use LanguageDetection\Language;
use function mysqli_real_escape_string as myesc;
(strtoupper(substr(PHP_OS,0,3))==='WIN') ? $iswin=true : $iswin=false;
declare(ticks=1);
if (function_exists('pcntl_signal')) {
function signalHandler($signal) {
echo(N);
mexit('received signal «'.$signal.'», shutting down.'.N,0);
}
pcntl_signal(SIGTERM,'signalHandler');// Termination ('kill' was called)
pcntl_signal(SIGHUP,'signalHandler');// Terminal log-out
pcntl_signal(SIGINT,'signalHandler');// Interrupted (Ctrl-C is pressed)
}
$opts=[
'hostname'=>null,
'timeout'=>10,
'deadline'=>62*24*60*60,// if an instance has not been responding for more than this value of seconds, declare it dead
2022-12-17 15:00:36 +01:00
'ldtoots'=>40,// number of toots to check with the automatic language detection function
'dryrun'=>false,
'fetchusers'=>false,
'udiratts'=>5,
'udirfailst'=>90,
'logminmsglev'=>1,
'tuiminmsglev'=>1
];
$msglevs=['debug', 'info', 'warning', 'error', 'none'];
$ghtsa=[[' day',' days'],[' hour',' hours'],[' minute',' minutes'],[' second',' seconds']];
$help='SYNOPSIS
'.SNAME.' <hostname> [options]
DESCRIPTION
This script tries to fetch info about the fediverse instance at the given
hostname and insert or update them in mastostarts database.
OPTIONS
-D, --deadline <time>
If an instance has not been responding for longer than this time, declare
it dead. See section «TIME SPECIFICATION» below to see how to specify time.
DEFAULT: '.ght($opts['deadline'],$ghtsa).'
-l, --ldtoots <number>
This option defines the number of toots the script will try to fetch from
the local public timelines, to try and guess the most used languages of each
instance. Its minimum value is 10, its maximum value is 40.
DEFAULT: '.$opts['ldtoots'].'
-f, --fetchusers
If this option is set, the script will try to fetch users info from the
considered instances users directory, and store them in the database.
-r, --udiratts <number>
This option defines how many attempts the script will do at fetching a chunk
of users info from the profile directory, before giving up.
DEFAULT: '.$opts['udiratts'].'
-s, --udirfailst <time>
This option defines how long the script will wait after each failed attempt
at fetching a chunk of users info from the profile directory (see above)
before retrying.
DEFAULT: '.ght($opts['udirfailst'],$ghtsa).'
-t, --timeout <time>
Sets the timeout for every connection attempt. See section «TIME
SPECIFICATION» below to see how to specify time.
DEFAULT: '.ght($opts['timeout'],$ghtsa).'
-d, --dryrun
If this option is set, the script wont write anything in the database.
-L, --logminmsglev <«debug»|«info»|«warning»|«error»|«none»>
Defines the minimum “importance level” of messages to be written into the
log file «run/[instance hostname].log». There are 4 “importance levels”, in
this order of importance: «debug», «info», «warning», «error».
Setting this option to any of these values will write into the logfile all
the messages with the specified or a greater level; setting it to the
special value «none» will completely disable logging to file.
DEFAULT: '.$msglevs[$opts['logminmsglev']].'
-T, --tuiminmsglev <«debug»|«info»|«warning»|«error»|«none»>
Defines the minimum “importance level” of messages to be written to the
terminal. See the option above to understand how this works.
DEFAULT: '.$msglevs[$opts['tuiminmsglev']].'
-h, --help
If this option is set, the script will show this help text and exit.
TIME SPECIFICATION
An example is better than ~5148 words :-)
To specify 1 year, 6 months (made of 31 days), 2 weeks, 3 days, 5 hours,
7 minutes and 12 seconds you can use «1y,6M,2w,3d,5h,7m,12; but you can
also use «12s,7m,5h,3d,2w,6M,1, or even «18M,1w,1w,2d,1d,3h,2h,7m,12.
LICENSE
This program comes with ABSOLUTELY NO WARRANTY; for details see the source.
This is free software, and you are welcome to redistribute it under certain
conditions; see <http://www.gnu.org/licenses/> for details.'.N;
for ($i=1; $i<$argc; $i++) {
if ($argv[$i]=='-f' || $argv[$i]=='--fetchusers') {
$opts['fetchusers']=true;
} elseif ($argv[$i]=='-r' || $argv[$i]=='--udiratts') {
if ($i+1>=$argc || preg_match('/^\d+$/',$argv[$i+1])!==1 || $argv[$i+1]+0<1)
mexit('option «'.$argv[$i].'» requires a number > 1 as an argument (use «-h» to read help).'.N,1);
$i++;
$opts['udiratts']=$argv[$i]+0;
} elseif ($argv[$i]=='-s' || $argv[$i]=='--udirfailst') {
if ($i+1>=$argc || parsetime($argv[$i+1])===false)
mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
$i++;
$opts['udirfailst']=parsetime($argv[$i]);
} elseif ($argv[$i]=='-t' || $argv[$i]=='--timeout') {
if ($i+1>=$argc || parsetime($argv[$i+1])===false)
mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
$i++;
$opts['timeout']=parsetime($argv[$i]);
} elseif ($argv[$i]=='-D' || $argv[$i]=='--deadline') {
if ($i+1>=$argc || parsetime($argv[$i+1])===false)
mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
$i++;
$opts['deadline']=parsetime($argv[$i]);
} elseif ($argv[$i]=='-l' || $argv[$i]=='--ldtoots') {
if ($i+1>=$argc || preg_match('/^\d+$/',$argv[$i+1])!==1 || $argv[$i+1]+0>40 || $argv[$i+1]+0<10)
mexit('option «'.$argv[$i].'» requires a number >= 10 and <= 40 as an argument (use «-h» to read help).'.N,1);
$i++;
$opts['ldtoots']=$argv[$i]+0;
} elseif ($argv[$i]=='-d' || $argv[$i]=='--dryrun') {
$opts['dryrun']=true;
} elseif ($argv[$i]=='-L' || $argv[$i]=='--logminmsglev') {
if ($i+1>=$argc || !in_array(strtolower($argv[$i+1]),$msglevs))
mexit('option «'.$argv[$i].'» requires a “message importance level” value as an argument (use «-h» to read help).'.N,1);
2022-12-17 15:00:36 +01:00
$i++;
$opts['logminmsglev']=array_search(strtolower($argv[$i]),$msglevs);
} elseif ($argv[$i]=='-T' || $argv[$i]=='--tuiminmsglev') {
if ($i+1>=$argc || !in_array(strtolower($argv[$i+1]),$msglevs))
mexit('option «'.$argv[$i].'» requires a “message importance level” value as an argument (use «-h» to read help).'.N,1);
2022-12-17 15:00:36 +01:00
$i++;
$opts['tuiminmsglev']=array_search(strtolower($argv[$i]),$msglevs);
} elseif ($argv[$i]=='-h' || $argv[$i]=='--help') {
echo($help);
exit(0);
} elseif (is_null($opts['hostname']) && $argv[$i][0]!=='-') {
$opts['hostname']=$argv[$i];
} else {
mexit('dont know how to interpret «'.$argv[$i].'», please read the help text using «-h» or «--help».'.N,1);
}
}
if (is_null($opts['hostname'])) mexit('you didnt specify an hostname (you can read the help text using «-h» or «--help»).'.N,1);
foreach ($msglevs as $key=>$val) $msglevs[$key]=ucfirst($val);
$inifp=__DIR__.'/../conf/mustard.ini';
$iniarr=@parse_ini_file($inifp)
or mexit('could not open config file «'.$inifp.'»'.N,1);
try { $link=@mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket']); }
catch (Exception $error) { mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true); }
// for php versions < 8
if ($link===false) mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true);
try { $res=mysqli_set_charset($link,'utf8mb4'); }
catch (Exception $error) { mexit('could not set «utf8mb4» charset for MySQL: '.mysqli_error($link).'.'.N,1,true); }
// for php versions < 8
if ($res===false) mexit('could not set MySQL charset: '.mysqli_errno($link).': '.mysqli_error($link).'.'.N,1,true);
2022-12-17 15:00:36 +01:00
2022-12-21 22:06:10 +01:00
$mastodons=[];
$res=myq($link,'SELECT Name FROM Platforms WHERE Consider=1',__LINE__);
while ($row=mysqli_fetch_assoc($res))
$mastodons[]=preg_quote($row['Name'],'/');
if (count($mastodons)<1) mexit('in table «Platforms», there is no platform to be considered!'.N,1);
$mastodons=implode('|',$mastodons);
2022-12-17 15:00:36 +01:00
$tables=tables($link);
//print_r($tables);
if ($opts['logminmsglev']<4) {
$logfp=__DIR__.'/run/'.$opts['hostname'].'.log';
$logf=@fopen($logfp,'w');
if ($logf===false) mexit('could not open file «'.$logfp.'» in write mode.'.N,1);
}
2022-12-17 15:00:36 +01:00
$instints=['ID', 'FirstSeen', 'IsMastodon', 'Dead', 'Priority', 'Visible', 'Noxious', 'NoxLastModTS', 'LocalityID', 'OurLangsLock', 'UserCount', 'StatusCount', 'DomainCount', 'ActiveUsersMonth', 'ActiveUsersHalfYear', 'RegOpen', 'RegReqApproval', 'MaxTootChars', 'AdmCreatedAt', 'LastCheckOk', 'GuestID', 'LastGuestEdit', 'InsertTS', 'RPos'];
$idata=[];
$res=myq($link,'SHOW COLUMNS FROM Instances',__FILE__);
while ($row=mysqli_fetch_assoc($res))
$idata[$row['Field']]=$row['Default'];
// since we later need to determine if a value is an integer, and mysql returns integers as strings...
setint($instints,$idata);
$idata['URI']=$opts['hostname'];
$instanswered=false;
2022-12-17 15:00:36 +01:00
$now=time();
/*
* Nodeinfo ('https://'.$opts['hostname'].'/nodeinfo/2.0.json') was added in v3.0.0
* Trends ('https://'.$opts['hostname'].'/api/v1/trends') was added in v3.0.0
* Activity ('https://'.$opts['hostname'].'/api/v1/instance/activity') was added in v2.1.2
*/
2022-12-17 15:00:36 +01:00
eecho(1,'[[[ Working on «'.$opts['hostname'].'» ]]]'.N);
if (willtrunc($opts['hostname'],'Instances','URI'))
mexit('«'.$opts['hostname'].'»: ignoring it because hostname is too long for the «URI» column of «Instances» table.'.N,2);
eecho(0,'«'.$opts['hostname'].'»: trying to fetch its info from the database...'.N);
$res=myq($link,'SELECT * FROM Instances WHERE URI=\''.myesc($link,$opts['hostname']).'\'',__LINE__);
$count=mysqli_num_rows($res);
if ($count>1) {
$msg='«'.$opts['hostname'].'»: there are '.$count.' records with this URI in Instances table.';
notify($msg,3,false);
mexit($msg.N,3);
} elseif ($count==1) {
eecho(1,'«'.$opts['hostname'].'»: found 1 record with this URI in Instances table.'.N);
$oidata=mysqli_fetch_assoc($res);
setint($instints,$oidata);
2022-12-17 15:00:36 +01:00
} else {
eecho(1,'«'.$opts['hostname'].'»: found no record with this URI in Instances table.'.N);
$oidata=null;
}
eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on https...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['timeout']);
if ($buf['cont']===false) {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on http...'.N);
$buf=@gurl('http://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['timeout']);
}
if ($buf['cont']!==false) {
$buf=@json_decode($buf['cont'],true);
if (is_array($buf)) {
if (isset($buf['links']) && is_array($buf['links']) && count($buf['links'])>0) {
$ok=true;
$nirefs=[];
foreach ($buf['links'] as $key=>$niref) {
if (isset($niref['rel']) && isset($niref['href'])) {
$nirefs[$niref['rel']]=$niref['href'];
} else {
eecho(2,'«'.$opts['hostname'].'»: nodeinfo specs “links” entitity '.$key.' has unexpected format.'.N);
$ok=false;
}
2022-12-17 15:00:36 +01:00
}
if ($ok) {
krsort($nirefs);
$niref=array_shift($nirefs);
eecho(1,'«'.$opts['hostname'].'»: got and successfully parsed nodeinfo specs :-)'.N);
eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo data...'.N);
$buf=@gurl($niref,$opts['timeout']);
if ($buf['cont']!==false) {
$buf=@json_decode($buf['cont'],true);
if (is_array($buf)) {
eecho(1,'«'.$opts['hostname'].'»: got nodeinfo data :-)'.N);
$instanswered=true;
if (isset($buf['software']['name']) && is_string($buf['software']['name']) && !isempty($buf['software']['name'])) {
$idata['Software']=trim($buf['software']['name']);
(preg_match('/^'.$mastodons.'/',$idata['Software'])===1) ? $idata['IsMastodon']=true : $idata['IsMastodon']=false;
$res=myq($link,'SELECT Name FROM Platforms WHERE Name=\''.myesc($link,$idata['Software']).'\'',__LINE__);
if (mysqli_num_rows($res)<1) {
if (!$opts['dryrun']) myq($link,'INSERT INTO Platforms (Name) VALUES (\''.myesc($link,truncs($idata['Software'], 'Platforms', 'Name', '«'.$opts['hostname'].'»')).'\')',__LINE__);
notify('«'.$opts['hostname'].'» runs on «'.$idata['Software'].'», which was not present in the «Platforms» table, so it was added there. It would be good to check whether it is a Mastodon derivate and how compatible it is, to decide whether to consider instances using it as Mastodon instances by setting the «Consider» field of its record to «1».',2);
2022-12-17 15:00:36 +01:00
}
}
if (isset($buf['software']['version']) && is_string($buf['software']['version']) && !isempty($buf['software']['version']))
$idata['Version']=trim($buf['software']['version']);
if (isset($buf['usage']['users']['total']) && is_int($buf['usage']['users']['total']))
$idata['UserCount']=$buf['usage']['users']['total'];
if (isset($buf['usage']['users']['activeMonth']) && is_int($buf['usage']['users']['activeMonth']))
$idata['ActiveUsersMonth']=$buf['usage']['users']['activeMonth'];
if (isset($buf['usage']['users']['activeHalfyear']) && is_int($buf['usage']['users']['activeHalfyear']))
$idata['ActiveUsersHalfYear']=$buf['usage']['users']['activeHalfyear'];
if (isset($buf['usage']['localPosts']) && is_int($buf['usage']['localPosts']))
$idata['StatusCount']=$buf['usage']['localPosts'];
if (isset($buf['openRegistrations']) && is_bool($buf['openRegistrations']))
$idata['RegOpen']=b2i($buf['openRegistrations']);
} else {
eecho(2,'«'.$opts['hostname'].'»: nodeinfo data was not good JSON.'.N);
2022-12-17 15:00:36 +01:00
}
} else {
eecho(2,'«'.$opts['hostname'].'»: could not fetch nodeinfo data: '.$buf['emsg'].'.'.N);
2022-12-17 15:00:36 +01:00
}
}
} else {
eecho(2,'«'.$opts['hostname'].'»: nodeinfo specs had unexpected format.'.N);
}
} else {
eecho(2,'«'.$opts['hostname'].'»: nodeinfo specs where not good JSON.'.N);
}
} else {
eecho(2,'«'.$opts['hostname'].'»: could not fetch nodeinfo specs: '.$buf['emsg'].'.'.N);
}
if ($idata['IsMastodon'] && $idata['Version']>='4.0.0') {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v2...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v2/instance',$opts['timeout']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers']);
$buf=@json_decode($buf['cont'],true);
if (is_array($buf)) {
eecho(1,'«'.$opts['hostname'].'»: got instance info from API v2 :-)'.N);
if (isset($buf['title']) && is_string($buf['title']) && !isempty($buf['title']))
$idata['Title']=trim($buf['title']);
if (isset($buf['description']) && is_string($buf['description']) && !isempty($buf['description']))
$idata['ShortDesc']=trim($buf['description']);
if (isset($buf['thumbnail']['url']) && is_string($buf['thumbnail']['url']) && !isempty($buf['thumbnail']['url']))
$idata['Thumb']=trim($buf['thumbnail']['url']);
if (isset($buf['configuration']['statuses']['max_characters']) && is_int($buf['configuration']['statuses']['max_characters']))
$idata['MaxTootChars']=$buf['configuration']['statuses']['max_characters'];
if (isset($buf['registrations']['approval_required']) && is_bool($buf['registrations']['approval_required']))
$idata['RegReqApproval']=b2i($buf['registrations']['approval_required']);
if (isset($buf['contact']['email']) && is_string($buf['contact']['email']))
$idata['Email']=trim($buf['contact']['email']);
if (isset($buf['contact']['account']['noindex']) && is_bool($buf['contact']['account']['noindex']) && !$buf['contact']['account']['noindex']) {// ......
if (isset($buf['contact']['account']['acct']) && is_string($buf['contact']['account']['acct']) && !isempty($buf['contact']['account']['acct']))
$idata['AdmAccount']=trim($buf['contact']['account']['acct']);
if (isset($buf['contact']['account']['display_name']) && is_string($buf['contact']['account']['display_name']) && !isempty($buf['contact']['account']['display_name']))
$idata['AdmDisplayName']=trim($buf['contact']['account']['display_name']);
if (isset($buf['contact']['account']['created_at']) && is_string($buf['contact']['account']['created_at']) && ($ts=strtotime($buf['contact']['account']['created_at']))!==false)
$idata['AdmCreatedAt']=$ts;
if (isset($buf['contact']['account']['note']) && is_string($buf['contact']['account']['note']) && !isempty($buf['contact']['account']['note']))
$idata['AdmNote']=trim($buf['contact']['account']['note']);
if (isset($buf['contact']['account']['url']) && is_string($buf['contact']['account']['url']) && !isempty($buf['contact']['account']['url']))
$idata['AdmURL']=trim($buf['contact']['account']['url']);
if (isset($buf['contact']['account']['avatar']) && is_string($buf['contact']['account']['avatar']) && !isempty($buf['contact']['account']['avatar']))
$idata['AdmAvatar']=trim($buf['contact']['account']['avatar']);
if (isset($buf['contact']['account']['header']) && is_string($buf['contact']['account']['header']) && !isempty($buf['contact']['account']['header']))
$idata['AdmHeader']=trim($buf['contact']['account']['header']);
2022-12-17 15:00:36 +01:00
}
// domain_count is gone from api v2, and we won't resort to api v1 just to get it when ver. >= 4.0.0
if (isset($buf['languages']) && is_array($buf['languages']))
$idata['languages']=$buf['languages'];
if (isset($buf['rules']) && is_array($buf['rules']))
foreach ($buf['rules'] as $rule)
2022-12-26 05:08:17 +01:00
if (isset($rule['id']) && is_string($rule['id']) && !isempty($rule['id']) && isset($rule['text']) && is_string($rule['text']) && !isempty($rule['text']))
$idata['rules'][$rule['id']]=$rule['text'];
2022-12-17 15:00:36 +01:00
} else {
eecho(2,'«'.$opts['hostname'].'»: instance info fetched from API v2 were not good JSON.'.N);
2022-12-17 15:00:36 +01:00
}
} else {
eecho(2,'«'.$opts['hostname'].'»: could not fetch instance info from API v2: '.$buf['emsg'].'.'.N);
2022-12-17 15:00:36 +01:00
}
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance extended description from API v1...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/extended_description',$opts['timeout']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers']);
$buf=@json_decode($buf['cont'],true);
if (is_array($buf)) {
eecho(1,'«'.$opts['hostname'].'»: got instance extended description from API v1 :-)'.N);
//print_r($buf);
if (!is_null($buf['content']) && is_string($buf['content']) && !isempty($buf['content']))
$idata['LongDesc']=trim($buf['content']);
} else {
eecho(2,'«'.$opts['hostname'].'»: instance extended description fetched from API v1 was not good JSON.'.N);
}
} else {
eecho(2,'«'.$opts['hostname'].'»: could not fetch instance extended description from API v1: '.$buf['emsg'].'.'.N);
2022-12-17 15:00:36 +01:00
}
} else {// we still try to fetch instance info from api v1, if ver. < 4.0.0, since it could be a mastodon instance older than 2.1.2, when nodeinfo was introduced
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v1...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance',$opts['timeout']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers']);
$buf=@json_decode($buf['cont'],true);
if (is_array($buf)) {
eecho(1,'«'.$opts['hostname'].'»: got instance info from API v1 :-)'.N);
//print_r($buf);
$instanswered=true;
if (isset($buf['title']) && is_string($buf['title']) && !isempty($buf['title']))
$idata['Title']=trim($buf['title']);
if (isset($buf['short_description']) && is_string($buf['short_description']) && !isempty($buf['short_description']))
$idata['ShortDesc']=trim($buf['description']);
if (isset($buf['description']) && is_string($buf['description']) && !isempty($buf['description']))
$idata['LongDesc']=trim($buf['description']);
if (isset($buf['email']) && is_string($buf['email']))
$idata['Email']=trim($buf['email']);
// if nodeinfo did not respond, it could be mastodon < 3.0.0, and we would not have $idata['Version'] yet, so...
if (!isset($idata['Version']) && isset($buf['version']) && is_string($buf['version']) && !isempty($buf['version']))
$idata['Version']=trim($buf['version']);
// if nodeinfo responded we should already have these 2 below, but nodeinfo could have not responded if instance ver. is < 3.0.0
if (isset($buf['stats']['user_count']) && is_int($buf['stats']['user_count']))
$idata['UserCount']=$buf['stats']['user_count'];
if (isset($buf['stats']['status_count']) && is_int($buf['stats']['status_count']))
$idata['StatusCount']=$buf['stats']['status_count'];
if (isset($buf['stats']['domain_count']) && is_int($buf['stats']['domain_count']))
$idata['DomainCount']=$buf['stats']['domain_count'];
if (isset($buf['thumbnail']) && is_string($buf['thumbnail']) && !isempty($buf['thumbnail']))
$idata['Thumb']=trim($buf['thumbnail']);
if (isset($buf['max_toot_chars']) && is_int($buf['max_toot_chars']))
$idata['MaxTootChars']=$buf['max_toot_chars'];
elseif (isset($buf['configuration']['statuses']['max_characters']) && is_int($buf['configuration']['statuses']['max_characters']))
$idata['MaxTootChars']=$buf['configuration']['statuses']['max_characters'];
// if nodeinfo responded we should already have this 1 below, but nodeinfo could have not responded if instance ver. is < 3.0.0
if (isset($buf['registrations']) && is_bool($buf['registrations']))
$idata['RegOpen']=b2i($buf['registrations']);
if (isset($buf['approval_required']) && is_bool($buf['approval_required']))
$idata['RegReqApproval']=b2i($buf['approval_required']);
if (isset($buf['contact_account']['acct']) && is_string($buf['contact_account']['acct']) && !isempty($buf['contact_account']['acct']))
$idata['AdmAccount']=trim($buf['contact_account']['acct']);
if (isset($buf['contact_account']['display_name']) && is_string($buf['contact_account']['display_name']) && !isempty($buf['contact_account']['display_name']))
$idata['AdmDisplayName']=trim($buf['contact_account']['display_name']);
if (isset($buf['contact_account']['created_at']) && is_string($buf['contact_account']['created_at']) && ($ts=strtotime($buf['contact_account']['created_at']))!==false)
$idata['AdmCreatedAt']=$ts;
if (isset($buf['contact_account']['note']) && is_string($buf['contact_account']['note']) && !isempty($buf['contact_account']['note']))
$idata['AdmNote']=trim($buf['contact_account']['note']);
if (isset($buf['contact_account']['url']) && is_string($buf['contact_account']['url']) && !isempty($buf['contact_account']['url']))
$idata['AdmURL']=trim($buf['contact_account']['url']);
if (isset($buf['contact_account']['avatar']) && is_string($buf['contact_account']['avatar']) && !isempty($buf['contact_account']['avatar']))
$idata['AdmAvatar']=trim($buf['contact_account']['avatar']);
if (isset($buf['contact_account']['header']) && is_string($buf['contact_account']['header']) && !isempty($buf['contact_account']['header']))
$idata['AdmHeader']=trim($buf['contact_account']['header']);
if (isset($buf['languages']) && is_array($buf['languages']))
$idata['languages']=$buf['languages'];
if (isset($buf['rules']) && is_array($buf['rules']))
foreach ($buf['rules'] as $rule)
2022-12-26 05:08:17 +01:00
if (isset($rule['id']) && is_string($rule['id']) && !isempty($rule['id']) && isset($rule['text']) && is_string($rule['text']) && !isempty($rule['text']))
$idata['rules'][$rule['id']]=$rule['text'];
// some falsing
if (isset($buf['pleroma'])) $idata['IsMastodon']=false;
if (isset($buf['version']) && is_string($buf['version']) && preg_match('#(pleroma|pixelfed)#i',$buf['version'])===1) $idata['IsMastodon']=false;
2022-12-17 15:00:36 +01:00
} else {
eecho(2,'«'.$opts['hostname'].'»: instance info fetched from API v1 were not good JSON.'.N);
2022-12-17 15:00:36 +01:00
}
} else {
eecho(2,'«'.$opts['hostname'].'»: could not fetch instance info from API v1: '.$buf['emsg'].'.'.N);
}
}
2022-12-17 15:00:36 +01:00
if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='2.1.2') {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance activity info from API v1...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/activity',$opts['timeout']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers']);
$buf=@json_decode($buf['cont'],true);
if (is_array($buf)) {
eecho(1,'«'.$opts['hostname'].'»: got instance activity info from API v1 :-)'.N);
$idata['activity']=$buf;
} else {
eecho(2,'«'.$opts['hostname'].'»: instance activity info from API v1 were not good JSON: '.$buf['emsg'].'.'.N);
}
} else {
eecho(2,'«'.$opts['hostname'].'»: could not fetch instance activity info from API v1: '.$buf['emsg'].'.'.N);
}
}
2022-12-17 15:00:36 +01:00
if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='3.0.0') {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance tags trends info from API v1...'.N);
$url='https://'.$opts['hostname'].'/api/v1/trends';
if ($idata['Version']>='3.5.0') $url.='/tags';
$buf=@gurl($url,$opts['timeout']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers']);
$buf=@json_decode($buf['cont'],true);
if (is_array($buf)) {
eecho(1,'«'.$opts['hostname'].'»: got instance tags trends info from API v1 :-)'.N);
$idata['trends']=$buf;
} else {
eecho(2,'«'.$opts['hostname'].'»: instance tags trends from API v1 were not good JSON: '.$buf['emsg'].'.'.N);
2022-12-17 15:00:36 +01:00
}
} else {
eecho(2,'«'.$opts['hostname'].'»: could not fetch instance tags trends from API v1: '.$buf['emsg'].'.'.N);
}
}
2022-12-17 15:00:36 +01:00
// finished fetching
if (!is_null($idata['IsMastodon'])) $idata['IsMastodon']=b2i($idata['IsMastodon']);
($instanswered) ? $idata['LastCheckOk']=1 : $idata['LastCheckOk']=0;
if (is_null($oidata)) {
$query='INSERT INTO Instances SET ';
if ($instanswered) {
$idata['FirstSeen']=$now;
$idata['InsertTS']=$now;
}
} else {
$query='UPDATE Instances SET ';
$idata['FirstSeen']=$oidata['FirstSeen'];
if ($instanswered && is_null($oidata['FirstSeen'])) $idata['FirstSeen']=$now;
if (!$instanswered && $oidata['Dead']==0) {
// we check the last time instance responded, if ever
$res=myq($link,'SELECT Time FROM InstChecks WHERE InstID='.$oidata['ID'].' AND Status=1 ORDER BY Time DESC LIMIT 1',__LINE__);
// if instance never responded we consider the time of first check
if (mysqli_num_rows($res)==0)
$res=myq($link,'SELECT Time FROM InstChecks WHERE InstID='.$oidata['ID'].' ORDER BY Time ASC LIMIT 1',__LINE__);
if (mysqli_num_rows($res)>0) {
$row=mysqli_fetch_assoc($res);
if ($now-$row['Time']>$opts['deadline']) {
$idata['Dead']=1;
notify('«<a href="viewinst.php?id='.$instid.'">'.$opts['hostname'].'</a>» just died!',2);
}
2022-12-23 19:13:37 +01:00
}/* else {// disabled since now we insert instances from peerscrawl.php directly
2022-12-22 11:28:29 +01:00
eecho(2,'«'.$opts['hostname'].'»: it exists in Instances table but theres no data about it in InstChecks!'.N);
2022-12-23 19:13:37 +01:00
}*/
} else {
$idata['Dead']=$oidata['Dead'];
}
$idata['Priority']=$oidata['Priority'];
$idata['Visible']=$oidata['Visible'];
$idata['Noxious']=$oidata['Noxious'];
$idata['NoxReason']=$oidata['NoxReason'];
$idata['NoxLastModTS']=$oidata['NoxLastModTS'];
$idata['OurDesc']=$oidata['OurDesc'];
$idata['OurDescEN']=$oidata['OurDescEN'];
$idata['LocalityID']=$oidata['LocalityID'];
$idata['OurLangsLock']=$oidata['OurLangsLock'];
$idata['GuestID']=$oidata['GuestID'];
$idata['LastGuestEdit']=$oidata['LastGuestEdit'];
$idata['InsertTS']=$oidata['InsertTS'];
$idata['RPos']=$oidata['RPos'];
if (!$instanswered) {
$idata['IsMastodon']=$oidata['IsMastodon'];
$idata['Title']=$oidata['Title'];
$idata['ShortDesc']=$oidata['ShortDesc'];
$idata['LongDesc']=$oidata['LongDesc'];
$idata['Email']=$oidata['Email'];
$idata['Software']=$oidata['Software'];
$idata['Version']=$oidata['Version'];
$idata['UserCount']=$oidata['UserCount'];
$idata['StatusCount']=$oidata['StatusCount'];
$idata['DomainCount']=$oidata['DomainCount'];
$idata['ActiveUsersMonth']=$oidata['ActiveUsersMonth'];
$idata['ActiveUsersHalfYear']=$oidata['ActiveUsersHalfYear'];
$idata['Thumb']=$oidata['Thumb'];
$idata['RegOpen']=$oidata['RegOpen'];
$idata['RegReqApproval']=$oidata['RegReqApproval'];
$idata['MaxTootChars']=$oidata['MaxTootChars'];
$idata['AdmAccount']=$oidata['AdmAccount'];
$idata['AdmDisplayName']=$oidata['AdmDisplayName'];
$idata['AdmCreatedAt']=$oidata['AdmCreatedAt'];
$idata['AdmNote']=$oidata['AdmNote'];
$idata['AdmURL']=$oidata['AdmURL'];
$idata['AdmAvatar']=$oidata['AdmAvatar'];
$idata['AdmHeader']=$oidata['AdmHeader'];
}
}
$set=[];
foreach ($idata as $key=>$val) {
if (in_array($key,['ID','languages','rules','activity','trends'])) {
true;// do nothing
} elseif (is_null($val)) {
$set[]=$key.'=NULL';
} elseif (is_int($val)) {
if (willtrunc($val,'Instances',$key)) {
$msg='«'.$opts['hostname'].'»: value «'.$val.'» is less than min. admitted value or greater than max. admitted value for column «'.$key.'» of table «Instances». Shutting down.';
notify($msg,3,false);
mexit($msg.N,2);
2022-12-17 15:00:36 +01:00
}
$set[]=$key.'='.$val;
} elseif (is_string($val)) {
if (willtrunc($val,'Instances',$key)) {
$msg='«'.$opts['hostname'].'»: value «'.nocrnl($val).'» is too long for column «'.$key.'» of table «Instances». Shutting down.';
notify($msg,3,false);
mexit($msg.N,2);
2022-12-17 15:00:36 +01:00
}
$set[]=$key.'=\''.myesc($link,$val).'\'';
} else {
mexit('$idata[\''.$key.'\'] value has unmanaged type, see code around line '.__LINE__.'.'.N,3);
}
}
2022-12-17 15:00:36 +01:00
$query.=implode(', ',$set);
2022-12-17 15:00:36 +01:00
if (!is_null($oidata)) $query.=' WHERE ID='.$oidata['ID'];
2022-12-17 15:00:36 +01:00
eecho(1,'query: «'.$query.'».'.N);
if (!$opts['dryrun']) myq($link,$query,__LINE__);
2022-12-17 15:00:36 +01:00
if (is_null($oidata)) {
2022-12-17 15:00:36 +01:00
(!$opts['dryrun']) ? $instid=mysqli_insert_id($link) : $instid=0;
2022-12-17 15:00:36 +01:00
notify('«<a href="viewinst.php?id='.$instid.'">'.$opts['hostname'].'</a>» is a NEW instance! :-)',1);
$instlangs=langs($instid,$opts['hostname'],false,__LINE__);
if (!$opts['dryrun'])
foreach ($instlangs as $row)
myq($link,'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')',__LINE__);
$instourlangs=langs($instid,$opts['hostname'],true,__LINE__);
// if instourlangs is empty and instlangs is not, set instourlangs as instlangs
if (count($instourlangs)==0 && count($instlangs)>0)
$instourlangs=$instlangs;
if (!$opts['dryrun'])
foreach ($instourlangs as $row)
myq($link,'INSERT INTO InstOurLangs (InstID, OurLangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')',__LINE__);
} else {
$instid=$oidata['ID'];
$res=myq($link,'SELECT InstID, LangID, Pos, Code FROM InstLangs LEFT JOIN Languages ON Languages.ID=LangID WHERE InstID='.$instid.' ORDER BY Pos ASC',__LINE__);
$oldinstlangs=[];
while ($row=mysqli_fetch_assoc($res))
$oldinstlangs[]=$row;
$instlangs=langs($instid,$opts['hostname'],false,__LINE__);
if ($instlangs!=$oldinstlangs && !$opts['dryrun']) {
myq($link,'DELETE FROM InstLangs WHERE InstID='.$instid,__LINE__);
foreach ($instlangs as $row)
myq($link,'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')',__LINE__);
}
if ($idata['OurLangsLock']==0) {
$instourlangs=langs($instid, $opts['hostname'], true, __LINE__);
// if instourlangs is empty and instlangs is not, set instourlangs as instlangs
if (count($instourlangs)==0 && count($instlangs)>0)
$instourlangs=$instlangs;
if (count($instourlangs)>0 && !$opts['dryrun']) {
myq($link,'DELETE FROM InstOurLangs WHERE InstID='.$instid,__LINE__);
foreach ($instourlangs as $row)
myq($link,'INSERT INTO InstOurLangs (InstID, OurLangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')',__LINE__);
2022-12-17 15:00:36 +01:00
}
}
2022-12-17 15:00:36 +01:00
}
// from here we know for sure $instid
if (!$opts['dryrun']) myq($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$instid.', '.$now.', '.$idata['LastCheckOk'].')',__LINE__);
if (isset($idata['activity']) && is_array($idata['activity'])) {
if (!$opts['dryrun']) {
myq($link,'DELETE FROM InstActivity WHERE InstID='.$instid,__LINE__);
$pos=0;
foreach ($idata['activity'] as $buf) {
// these should all be int, but mastodon represents them as strings
if (isset($buf['week']) && is_string($buf['week']) && preg_match('/^\d+$/',$buf['week'])===1 && isset($buf['statuses']) && is_string($buf['statuses']) && preg_match('/^\d+$/',$buf['statuses'])===1 && isset($buf['logins']) && is_string($buf['logins']) && preg_match('/^\d+$/',$buf['logins'])===1 && isset($buf['registrations']) && is_string($buf['registrations']) && preg_match('/^\d+$/',$buf['registrations'])===1) {
$pos++;
myq($link,'INSERT INTO InstActivity (InstID, Week, Statuses, Logins, Registrations, Pos) VALUES ('.$instid.', '.$buf['week'].', '.$buf['statuses'].', '.$buf['logins'].', '.$buf['registrations'].', '.$pos.')',__LINE__);
2022-12-17 15:00:36 +01:00
}
}
}
}
2022-12-17 15:00:36 +01:00
if (isset($idata['trends']) && is_array($idata['trends'])) {
$trends=[];
foreach ($idata['trends'] as $buf) {
if (isset($buf['name']) && is_string($buf['name']) && isset($buf['url']) && is_string($buf['url']) && isset($buf['history']) && is_array($buf['history'])) {
$trend=0;
foreach ($buf['history'] as $row) {
// below, we check for "stringness" because, they should be integers, but they are strings
if (isset($row['day']) && is_string($row['day']) && isset($row['uses']) && is_string($row['uses']) && isset($row['accounts']) && is_string($row['uses'])) {
$row['day']+=0;
$row['uses']+=0;
$row['accounts']+=0;
$trend+=$row['accounts'];
}
2022-12-17 15:00:36 +01:00
}
}
$trends[]=[
'InstID'=>$instid,
'LastDay'=>$buf['history'][0]['day'],
'Name'=>$buf['name'],
'URL'=>$buf['url'],
'Pos'=>null,
'trend'=>$trend
];
}
//print_r($trends);
mdasortbykey($trends,'trend',true);
if (!$opts['dryrun']) myq($link,'DELETE FROM InstTrends WHERE InstID='.$instid,__LINE__);
$pos=0;
foreach ($trends as $trend) {
$pos++;
$query='INSERT INTO InstTrends (InstID, LastDay, Name, URL, Pos) VALUES ('.$trend['InstID'].', \''.$trend['LastDay'].'\', \''.myesc($link, truncs($trend['Name'], 'InstTrends', 'Name', '«'.$opts['hostname'].'»')).'\', \''.myesc($link, truncs($trend['URL'], 'InstTrends', 'URL', '«'.$opts['hostname'].'»')).'\', '.$pos.')';
if (!$opts['dryrun']) myq($link,$query,__LINE__);
}
}
2022-12-26 05:08:17 +01:00
if (isset($idata['rules']) && is_array($idata['rules'])) {
if (!$opts['dryrun']) myq($link,'DELETE FROM InstRules WHERE InstID='.$instid,__LINE__);
ksort($idata['rules']);
foreach ($idata['rules'] as $rule)
if (!$opts['dryrun'])
myq($link,'INSERT INTO InstRules SET InstID='.$instid.', Text=\''.myesc($link, truncs($rule, 'InstRules', 'Text', '«'.$opts['hostname'].'»')).'\'',__LINE__);
}
if ($opts['fetchusers'] && $idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4.0.0') {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch users info from directory API...'.N);
$users=[];// array of users in this instance's directory
$chunk=0;
$limit=40;
$end=false;
while (!$end) {
$offset=$chunk*$limit;
for ($att=0; $att<$opts['udiratts']; $att++) {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch chunk '.($chunk+1).' of users info from directory API (attempt '.($att+1).'/'.$opts['udiratts'].')...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['timeout']);
if ($buf['cont']!==false) {
$xrlr=ckratelimit($buf['headers']);
eecho(1,'«'.$opts['hostname'].'»: got chunk '.($chunk+1).' of users info from directory API on attempt '.($att+1).'/'.$opts['udiratts'].' (xrlr: '.$xrlr.') :-)'.N);
$buf=@json_decode($buf['cont'],true);
if (is_array($buf)) {
//print_r($buf);
if (count($buf)<$limit) $end=true;
/*if (count($buf)>0 && !array_key_exists('noindex',$buf[0])) {
eecho(2,'«'.$opts['hostname'].'»: account entities reported by directory api endpoint dont have a “noindex” attribute; skipping directory fetching.'.N);
break;
} else {
eecho(0,'«'.$opts['hostname'].'»: account entities reported by directory api endpoint do have a “noindex” attribute; continuing with directory fetching.'.N);
}*/
//foreach ($buf as $user) echo($user['username'].' '); echo(N.N);
foreach ($buf as $user) {
if (make(['id', 'username', 'display_name', 'locked', 'bot', 'discoverable', 'created_at', 'note', 'url', 'avatar', 'header', 'statuses_count', 'last_status_at', 'fields', 'noindex'], $user)) {
eecho(0,'«'.$opts['hostname'].'»: working on user «'.$user['username'].'»...'.N);
2022-12-23 19:13:37 +01:00
// disabled because it takes too long on instances with many users; that's why we added "$idata['Version']>='4.0.0'" as a condition to the root "if" statement and "noindex" to the checked keys in the "if" statement above (ver. >= 4.0.0 do report "noindex" for account entities)
/*if (!isset($user['noindex'])) {
$user['noindex']=true;
eecho(0,'«'.$opts['hostname'].'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it by fetching users profile page...'.N);
$page=gurl($user['url'],$opts['timeout']);
// here ckratelimit is not needed because it's a normal web page, not json from mastodon api
if ($page['cont']!==false) {
//<meta content='noindex, noarchive' name='robots'>
if (preg_match('/<meta\s+content=[\'"](noindex|noarchive)/ui',$page['cont'])!==1) {
$user['noindex']=false;
eecho(0,'«'.$user['url'].'»: «noindex» is not set.'.N);
} else {
eecho(0,'«'.$user['url'].'»: «noindex» is set.'.N);
}
} else {
eecho(2,'«'.$opts['hostname'].'»: could not fetch «'.$user['url'].'»: '.$page['emsg'].N);
}
2022-12-17 15:00:36 +01:00
}*/
$snote=strip_tags($user['note']);
if (preg_match('/(?<!\w)#(nobots?|noindex)(?!\w)/iu',$snote)===1) $user['noindex']=true;
if (preg_match('/(?<!\w)#(okindex|yesindex|doindex|okmhindex)(?!\w)/iu',$snote)===1) $user['noindex']=false;
2022-12-23 19:13:37 +01:00
// disabled; see previous comment
/*$user['tags']=[];
if (!$user['noindex'] && !is_null($idata['Version']) && $idata['Version']>='3.3.0') {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch tags for user «'.$user['username'].'»...'.N);
$tags=@gurl('https://'.$opts['hostname'].'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['timeout']);
if ($tags['cont']!==false) {
ckratelimit($tags['headers']);
$tags=@json_decode($tags['cont'],true);
if (is_array($tags) && count($tags)>0) {
eecho(1,'«'.$opts['hostname'].'»: got '.count($tags).' tag(s) for user «'.$user['username'].'» :-)'.N);
foreach($tags as $tag) $user['tags'][]=$tag['name'];
2022-12-17 15:00:36 +01:00
}
} else {
eecho(2,'«'.$opts['hostname'].'»: could not fetch tags for user «'.$user['username'].'» :-( ('.$tags['emsg'].').'.N);
2022-12-17 15:00:36 +01:00
}
}
$user['tags']=implode(';',$user['tags']);
if ($user['tags']=='') $user['tags']=null;*/
$user['tags']=null;
if (!is_null($user['created_at'])) $user['created_at']=strtotime($user['created_at']);
if (!is_null($user['last_status_at'])) $user['last_status_at']=datetots($user['last_status_at']);
$users[$user['id']]=$user;
2022-12-17 15:00:36 +01:00
} else {
eecho(2,'«'.$opts['hostname'].'»: user record missed some required keys :-('.N);
//print_r($user);
2022-12-17 15:00:36 +01:00
}
}
break;
} else {
eecho(2,'«'.$opts['hostname'].'»: ... but the chunk was not good JSON :-('.N);
if ($att==$opts['udiratts']-1) $end=true;
}
} else {
eecho(2,'«'.$opts['hostname'].'»: could not fetch chunk '.($chunk+1).' of users info from directory API: '.$buf['emsg'].N);
if ($att==$opts['udiratts']-1) {
eecho(2,'«'.$opts['hostname'].'»: last attempt ('.($att+1).'/'.$opts['udiratts'].') on chunk '.($chunk+1).' failed; i give up.'.N);
$end=true;
} else {
eecho(2,'«'.$opts['hostname'].'»: attempt '.($att+1).'/'.$opts['udiratts'].' on chunk '.($chunk+1).' failed; sleeping for '.ght($opts['udirfailst'],$ghtsa).' before retrying.'.N);
sleep($opts['udirfailst']);
2022-12-17 15:00:36 +01:00
}
}
}
$chunk++;
}
$totusers=count($users);
eecho(1,'«'.$opts['hostname'].'»: got '.$totusers.' users profiles.'.N);
if ($totusers>0) {
eecho(1,'«'.$opts['hostname'].'»: inserting/updating '.$totusers.' users profiles in the database.'.N);
$exusers=[];// array of this instance's users already existing in the db
$res=myq($link,'SELECT ID, locid, username FROM Users WHERE InstID='.$instid,__LINE__);
while ($row=mysqli_fetch_assoc($res)) $exusers[$row['locid']]=$row;
foreach ($users as $locid=>$user) {
$query='SET InstID='.$instid.', host='.myv($link,$opts['hostname']).', locid='.myv($link,$user['id']).', username='.myv($link,truncs($user['username'], 'Users', 'username', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', display_name='.myv($link,truncs($user['display_name'], 'Users', 'display_name', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', locked='.myv($link,$user['locked']).', bot='.myv($link,$user['bot']).', created_at='.myv($link,$user['created_at']).', note='.myv($link,truncs($user['note'], 'Users', 'note', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', url='.myv($link,truncs($user['url'], 'Users', 'url', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', avatar='.myv($link,truncs($user['avatar'], 'Users', 'avatar', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', header='.myv($link,truncs($user['header'], 'Users', 'header', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', statuses_count='.myv($link,$user['statuses_count']).', last_status_at='.myv($link,$user['last_status_at']).', tags='.myv($link,truncs($user['tags'], 'Users', 'tags', '«'.$opts['hostname'].'»: «'.$user['username'].'»'));
$uid=0;
if (!array_key_exists($user['id'],$exusers)) {
if (!$user['noindex']) {
eecho(0,'«'.$opts['hostname'].'»: inserting new user «'.$user['username'].'»...'.N);
$query='INSERT INTO Users '.$query;
if (!$opts['dryrun']) {
myq($link,$query,__LINE__);
$uid=mysqli_insert_id($link);
2022-12-17 15:00:36 +01:00
}
} else {
eecho(0,'«'.$opts['hostname'].'»: NOT inserting user «'.$user['username'].'» because they dont want to be indexed...'.N);
2022-12-17 15:00:36 +01:00
}
} else {
$uid=$exusers[$locid]['ID'];
if (!$user['noindex']) {
eecho(0,'«'.$opts['hostname'].'»: updating existing user «'.$user['username'].'» ('.$uid.')...'.N);
$query='UPDATE Users '.$query.' WHERE ID='.$uid;
} else {
eecho(0,'«'.$opts['hostname'].'»: deleting existing user «'.$user['username'].'» ('.$uid.') because they dont want to be indexed...'.N);
$query='DELETE FROM Users WHERE ID='.$uid;
}
if (!$opts['dryrun']) {
myq($link,$query,__LINE__);
myq($link,'DELETE FROM UsersFields WHERE UserID='.$uid,__LINE__);
}
}
if ($uid!=0 && !$user['noindex'] && is_array($user['fields']) && count($user['fields'])>0) {
eecho(0,'«'.$opts['hostname'].'»: saving user fields for user «'.$user['username'].'» ('.$uid.')...'.N);
foreach ($user['fields'] as $field) {
(is_null($field['verified_at'])) ? $field['verified_at']=0 : $field['verified_at']=1;
$field['name']=truncs($field['name'],'UsersFields','name','«'.$opts['hostname'].'»: «'.$user['username'].'»');
$field['value']=truncs($field['value'],'UsersFields','value','«'.$opts['hostname'].'»: «'.$user['username'].'»');
if (!$opts['dryrun']) myq($link,'INSERT INTO UsersFields SET UserID='.$uid.', name='.myv($link,$field['name']).', value='.myv($link,$field['value']).', verified='.$field['verified_at'],__LINE__);
}
}
}
eecho(1,'«'.$opts['hostname'].'»: deleting possible users profiles which are in the database but no longer in the directory.'.N);
foreach ($exusers as $locid=>$exuser) {
if (!array_key_exists($locid,$users)) {
eecho(0,'«'.$opts['hostname'].'»: user «'.$exusers[$locid]['username'].'» opted out of the directory, deleting their record ('.$exuser['ID'].')...'.N);
if (!$opts['dryrun']) {
myq($link,'DELETE FROM Users WHERE ID='.$exuser['ID'],__LINE__);
myq($link,'DELETE FROM UsersFields WHERE UserID='.$exuser['ID'],__LINE__);
2022-12-17 15:00:36 +01:00
}
}
}
}
}
mexit('«'.$opts['hostname'].'»: done in '.ght(time()-$now,null,0).' :-)'.N,0);
2022-12-17 15:00:36 +01:00
// functions
function myq(&$link,$query,$line) {
try {
$res=mysqli_query($link,$query);
}
catch (Exception $error) {
mexit('query «'.$query.'» on line '.$line.' failed: '.$error->getMessage().' ('.$error->getCode().').'.N,3);
2022-12-17 15:00:36 +01:00
}
// for php versions < 8, which seem to not catch mysql exceptions
if ($res===false) mexit('query «'.$query.'» on line '.$line.' failed: '.mysqli_error($link).' ('.mysqli_errno($link).').'.N,3);
2022-12-17 15:00:36 +01:00
return($res);
}
function eecho($lev,$msg) {
global $logf, $opts, $msglevs;
$time=microtime(false);
$time=explode(' ',$time);
$time=date('Y-m-d H:i:s',$time[1]).'.'.substr($time[0],2);
$msg=$time.' '.$msglevs[$lev].': '.$msg;
if ($lev>=$opts['tuiminmsglev']) {
if ($lev<2)
echo($msg);
else
fwrite(STDERR,$msg);
}
if ($lev>=$opts['logminmsglev'] && isset($logf) && $logf!==false) fwrite($logf,$msg);
}
function mexit($msg,$code) {
global $link, $logf;
2022-12-17 15:00:36 +01:00
if (isset($link) && $link!==false) mysqli_close($link);
if ($code!=0)
eecho(3,$msg);
2022-12-17 15:00:36 +01:00
else
eecho(1,$msg);
if (isset($logf) && $logf!==false) fclose($logf);
2022-12-17 15:00:36 +01:00
exit($code);
}
function setint($keys,&$arr) {
foreach ($keys as $key)
if (!is_null($arr[$key]))
$arr[$key]=$arr[$key]+0;
}
function willtrunc($val,$tab,$col) {
2022-12-17 15:00:36 +01:00
global $tables, $iswin;
if ($iswin) $tab=strtolower($tab);
if (is_string($val) && mb_strlen($val,'UTF-8')>$tables[$tab][$col]) return(true);
if (is_int($val) && ($val<$tables[$tab][$col]['min'] || $val>$tables[$tab][$col]['max'])) return(true);
return(false);
2022-12-17 15:00:36 +01:00
}
function truncs($str,$tab,$col,$ctx) {
global $tables, $iswin;
if (is_null($str)) return(null);
if ($iswin)
$tab=strtolower($tab);
$size=$tables[$tab][$col];
$len=mb_strlen($str,'UTF-8');
if ($len>$size) {
$str=mb_substr($str,0,$size-1,'UTF-8').'…';
notify($ctx.': had to truncate string to '.$size.' chars to be able to insert it into «'.$col.'» column in «'.$tab.'» table.',3);
}
return($str);
}
function truncn($num,$tab,$col,$ctx) {
global $tables, $iswin;
if ($iswin)
$tab=strtolower($tab);
if (is_numeric($num)) {
if ($num>$tables[$tab][$col]['max']) {
notify($ctx.': had to ceil «'.$num.'» to «'.$tables[$tab][$col]['max'].'», ie the maximum value it can have in column «'.$col.'» of table «'.$tab.'».',3);
$num=$tables[$tab][$col]['max'];
} elseif ($num<$tables[$tab][$col]['min']) {
notify($ctx.': had to floor «'.$num.'» to «'.$tables[$tab][$col]['min'].'», ie the minimum value it can have in column «'.$col.'» of table «'.$tab.'»).',3);
$num=$tables[$tab][$col]['min'];
}
} else {
notify($ctx.': function «truncn»: expecting a number, got something else; returning «0».',3);
$num=0;
}
return($num);
}
function nocrnl($str) {
return(str_replace(["\r","\n"],['\\r','\\n'],$str));
}
function b2i($bool) {
($bool) ? $r=1 : $r=0;
return($r);
2022-12-17 15:00:36 +01:00
}
function isempty($str) {
(preg_match('/^\s*$/',$str)===1) ? $r=true : $r=false;
return($r);
}
function notify($msg,$lev,$doecho=true) {
// "$lev" is to be thought of as "$lev" param of function "eecho": 0=debug, 1=info, 2=warning, 3=error
2022-12-17 15:00:36 +01:00
global $link, $tables, $iswin, $opts;
if ($doecho) eecho($lev,'*notification*: '.mb_lcfirst(strip_tags($msg)).N);
if (!$opts['dryrun']) {
($iswin) ? $tab='notifications' : $tab='Notifications';
myq($link,'INSERT INTO Notifications (ID, Notification, Severity, Microtime, Seen, Deleted) VALUES (NULL, \''.myesc($link,mb_substr($msg,0,$tables[$tab]['Notification'],'UTF-8')).'\', '.$lev.', \''.microtime(true).'\', 0, 0)',__LINE__);
}
2022-12-17 15:00:36 +01:00
}
function mdasortbykey(&$arr,$key,$rev=false) {
$karr=[];
foreach ($arr as $akey=>$subarr)
$karr[$subarr[$key]]=[$akey,$subarr];
if (!$rev)
ksort($karr);
else
krsort($karr);
$arr=[];
foreach ($karr as $akey=>$subarr)
$arr[$subarr[0]]=$subarr[1];
}
// "multi array_key_exists"
function make($keys,&$arr) {
foreach ($keys as $key)
if (!array_key_exists($key,$arr))
return(false);
return(true);
}
function myv(&$link,$var) {
if (is_null($var)) {
return('NULL');
} elseif (is_bool($var)) {
if ($var)
return('1');
else
return('0');
} elseif (trim($var)=='') {
return('NULL');
} else {
return('\''.mysqli_real_escape_string($link,$var).'\'');
}
}
function datetots($date) {
$date=explode('-',$date);
return(mktime(0,0,0,$date[1],$date[2],$date[0]));
}
function ckratelimit($httpresphead) {
$headers=explode("\r\n",$httpresphead);
$buff=[];
array_shift($headers);
foreach ($headers as $header)
if (preg_match('/^([^:]+):(.*)$/Uu',$header,$matches)===1)
$buff[strtolower($matches[1])]=trim($matches[2]);
$headers=$buff;
if (isset($headers['date']) && isset($headers['x-ratelimit-reset']) && isset($headers['x-ratelimit-remaining'])) {
if ($headers['x-ratelimit-remaining']==0) {
$stosl=strtotime($headers['x-ratelimit-reset'])-strtotime($headers['date'])+1;
eecho(2,'reached rate limit, sleeping for '.ght($stosl).' ...'.N);
sleep($stosl);
}
return($headers['x-ratelimit-remaining']);
} else {
$missing=[];
if (!isset($headers['date'])) $missing[]='date';
if (!isset($headers['x-ratelimit-reset'])) $missing[]='x-ratelimit-reset';
if (!isset($headers['x-ratelimit-remaining'])) $missing[]='x-ratelimit-remaining';
eecho(2,'ckratelimit: $httpresphead did not contain «'.implode('», «',$missing).'» header(s)!'.N);
return(false);
}
}
2022-12-17 15:00:36 +01:00
/** <LANGUAGE MANAGEMENT> */
/**
* Executes a call to Mastodon API.
*
* @param string $host Host to be called (e.g.: "mastodon.bida.im")
* @param string $path API path (e.g.: "/api/v1/timelines/public?local=true")
* @return mixed An array representing the JSON object as returned by json_decode, or NULL if the call fails
*/
function get_api($host, $path) {
global $opts;
$buf = @gurl('https://'.$host.$path,$opts['timeout']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers']);
$data = json_decode($buf['cont'], true);
return $data;
} else {
return NULL;
}
}
/**
* Returns a list of known recognized languages, with the related probability, fot the toot that got passed to it
*
* @param mixed $toot The toot to be checked, as returned by the API
* @return array Associative array with language and related probability
*/
function get_toot_languages($toot) {
if (is_array($toot) && array_key_exists('language',$toot))
$l = $toot['language'];
else
$l = NULL;
if($l !== NULL) {
// the language is explicitly set in the toot, so use that
$langs[$l] = 1;
} elseif (array_key_exists('content',$toot)) {
// the language is not explicitly set in the toot, so try and recognize it
$text = strip_tags($toot['content']);
$ld = new Language;
$langs = $ld->detect($text)->bestResults()->close();
}
// group derived languages into two-charactes language code (e.g.: "zh-CN" into "zh")
$grouped_langs = [];
foreach($langs as $key => $value) {
$l = explode("-", $key)[0];
if(array_key_exists($l, $grouped_langs)) {
$grouped_langs[$l] = max($grouped_langs[$l], $value);
} else {
$grouped_langs[$l] = $value;
}
}
return $grouped_langs;
}
/**
* Given the probability of a language for every toot, calculate the average
*
* @param array $detected_langs Array of mappings between language and probability
* @return array Mapping between language and probability
*/
function summary($detected_langs) {
$res = [];
foreach($detected_langs as $langs) {
foreach($langs as $l => $weight) {
if(!array_key_exists($l, $res)) {
$res[$l] = 0;
}
$res[$l] += $weight;
}
}
foreach($res as $l => $sumweight) {
$res[$l] = $sumweight / count($detected_langs);
}
return $res;
}
/**
* Helper function for usort: compares two arrays using the first element
*
* @param array $entry1 First array to be compared
* @param array $entry2 Second array to be compared
* @return number -1, 0 o 1 depening on $entry1[0] being less than, equal to or greater than $entry2[0]
*/
function sort_weights($entry1, $entry2) {
$w1 = $entry1[0];
$w2 = $entry2[0];
if ($w1 < $w2)
$ret=1;
elseif ($w1 == $w2)
$ret=0;
else
$ret=-1;
return $ret;
}
/**
* Given a language mapping, return a list of probable languages
*
* @param array $summary Map between language and probabilty
* @return string[] List of probable languages
*/
function get_languages($summary) {
$lst = [];
foreach($summary as $code => $weight) {
$lst[] = [$weight, $code];
}
usort($lst, 'sort_weights');
$languages = [];
$lastweight = 0;
foreach($lst as $entry) {
$l = $entry[1];
$weight = $entry[0];
if($weight < $lastweight * 2 / 3) {
break;
}
$languages[] = $l;
$lastweight = $weight;
}
return $languages;
}
/**
* Returns a list of probable languages for the given instance
*
* @param string $host Instances hostname (e.g.: "mastodon.bida.im")
* @return string[] List of probable languages
*/
function get_instance_langs($host) {
global $opts;
$data = get_api($host, '/api/v1/timelines/public?local=true&limit='.$opts['ldtoots']);
if($data == NULL) {
return [];
}
$detected_langs = array_map('get_toot_languages', $data);
$summary = summary($detected_langs);
$languages = get_languages($summary);
return $languages;
}
function langs($instid, $hostname, $auto, $line) {
global $idata, $link, $opts;
2022-12-17 15:00:36 +01:00
$retlangs=[];
$languages=[];
// even if $auto is true, set it to false (don't do autodection of languages based on last toots) if api/v1/instance returned a language different from the default "en": assume instead it is right, because it has been explicitly set
if (isset($idata['languages'][0]) && $idata['languages'][0]!='en')
2022-12-17 15:00:36 +01:00
$auto=false;
if ($auto) {
$languages=get_instance_langs($hostname);
} elseif (isset($idata['languages']) && is_array($idata['languages'])) {
$languages=$idata['languages'];
2022-12-17 15:00:36 +01:00
}
if (count($languages)==0) {
return($retlangs);
} else {
while (count($languages)>5)
array_pop($languages);
foreach ($languages as $key=>$val)
$languages[$key]=str_replace('-','_',$val);
if ($auto)
eecho(1,'«'.$hostname.'»: detected languages: '.implode(', ',$languages).N);
2022-12-17 15:00:36 +01:00
else
eecho(1,'«'.$hostname.'»: declared languages: '.implode(', ',$languages).N);
2022-12-17 15:00:36 +01:00
$pos=0;
foreach($languages as $lang) {
$res=myq($link,'SELECT * FROM Languages WHERE Code=\''.myesc($link,$lang).'\'',$line);
2022-12-17 15:00:36 +01:00
if (mysqli_num_rows($res)<1) {
$code=myesc($link,truncs($lang,'Languages','Code','«'.$hostname.'»'));
$NameOrig=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,$lang)),'Languages','NameOrig','«'.$hostname.'»'));
$NamePt_BR=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'pt_BR')),'Languages','NamePT_BR','«'.$hostname.'»'));
$NameDe=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'de')),'Languages','NameDE','«'.$hostname.'»'));
$NameUk=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'uk')),'Languages','NameUK','«'.$hostname.'»'));
$NameCa=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'ca')),'Languages','NameCA','«'.$hostname.'»'));
$NameEn=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'en')),'Languages','NameEN','«'.$hostname.'»'));
$NameEs=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'es')),'Languages','NameES','«'.$hostname.'»'));
$NameFr=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'fr')),'Languages','NameFR','«'.$hostname.'»'));
$NameGl=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'gl')),'Languages','NameGL','«'.$hostname.'»'));
$NameIt=myesc($link,truncs(mb_ucfirst(locale_get_display_name($lang,'it')),'Languages','NameIT','«'.$hostname.'»'));
2022-12-17 15:00:36 +01:00
$q = 'INSERT INTO Languages (ID, Code, NameOrig, NamePT_BR, NameDE, NameUK, NameCA, NameEN, NameES, NameFR, NameGL, NameIT) VALUES (NULL, \''.$code.'\', \''.$NameOrig.'\', \''.$NamePt_BR.'\', \''.$NameDe.'\', \''.$NameUk.'\', \''.$NameCa.'\', \''.$NameEn.'\', \''.$NameEs.'\', \''.$NameFr.'\', \''.$NameGl.'\', \''.$NameIt.'\')';
if (!$opts['dryrun']) {
myq($link,$q,$line);
2022-12-17 15:00:36 +01:00
$langid=mysqli_insert_id($link);
} else {
$langid=0;
}
} else {
$row=mysqli_fetch_assoc($res);
$langid=$row['ID'];
}
$pos++;
$retlangs[]=['InstID'=>$instid,'LangID'=>$langid,'Pos'=>$pos,'Code'=>$lang];
}
}
return($retlangs);
}
?>