123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452 |
- #!/usr/bin/php
- <?php
- /*
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
- const N="\n";
- const RN="\r\n";
- define('SNAME',basename(__FILE__));
- chdir(__DIR__);
- require '../lib/parsetime.php';
- require '../lib/gurl.php';
- require '../lib/gethttpcode.php';
- require '../lib/tables.php';
- require '../lib/mb_ucfirst.php';
- require '../lib/mb_lcfirst.php';
- require '../lib/ghs.php';
- require '../lib/ght.php';
- require '../lib/fnum.php';
- require '../lib/supplangs.php';
- require '../lib/gettlds.php';
- require '../lib/mastodon_postlen.php';
- require '../lib/ckratelimit.php';
- require '../lib/vendor/autoload.php';
- use LanguageDetection\Language;
- use function mysqli_real_escape_string as myesc;
- (strtoupper(substr(PHP_OS,0,3))==='WIN') ? $iswin=true : $iswin=false;
- declare(ticks=1);
- if (function_exists('pcntl_signal')) {
- function signalHandler($signal) {
- echo(N);
- mexit('received signal «'.$signal.'», shutting down.'.N,0);
- }
- pcntl_signal(SIGTERM,'signalHandler');// Termination ('kill' was called)
- pcntl_signal(SIGHUP,'signalHandler');// Terminal log-out
- pcntl_signal(SIGINT,'signalHandler');// Interrupted (Ctrl-C is pressed)
- }
- $opts=[
- 'hostname'=>null,
- 'conntimeout'=>10,
- 'functimeout'=>20,
- 'ldtoots'=>40,// number of toots to check with the automatic language detection function
- 'dryrun'=>false,
- 'fetchusers'=>false,
- 'udiratts'=>5,
- 'udirfailst'=>90,
- 'minmsgimplev'=>1,
- 'bothost'=>null,
- 'bottoken'=>null,
- 'botmaxchars'=>null,
- '_sendtoot'=>false,
- 'proxy'=>null,
- 'useproxyfor'=>null
- ];
- $msglevs=['Debug', 'Info', 'Warning', 'Error', 'None'];
- $help='SYNOPSIS
- '.SNAME.' <hostname> [options]
- DESCRIPTION
- This script tries to fetch info about the fediverse instance at the given
- hostname and insert or update them in mastostart’s database.
- OPTIONS
- -l, --ldtoots <number>
- This option defines the number of toots the script will try to fetch from
- the local public timelines, to try and guess the most used languages of each
- instance. Its minimum value is 10, its maximum value is 40.
- DEFAULT: '.$opts['ldtoots'].'
- -f, --fetchusers
- If this option is set, the script will try to fetch users’ info from the
- considered instance’s users directory, and store them in the database.
- -r, --udiratts <number>
- This option defines how many attempts the script will do at fetching a chunk
- of users’ info from the profile directory, before giving up.
- DEFAULT: '.$opts['udiratts'].'
- -s, --udirfailst <time>
- This option defines how long the script will wait after each failed attempt
- at fetching a chunk of users’ info from the profile directory (see above)
- before retrying.
- DEFAULT: '.ght($opts['udirfailst'],null,0).'
- -t, --conntimeout <time>
- Sets the timeout for every connection attempt. See section «TIME
- SPECIFICATION» below to see how to specify time.
- DEFAULT: '.ght($opts['conntimeout'],null,0).'
- -T, --functimeout <time>
- Sets the timeout for every download. See section «TIME SPECIFICATION» below
- to see how to specify time.
- DEFAULT: '.ght($opts['functimeout'],null,0).'
- -d, --dryrun
- If this option is set, the script won’t write anything in the database.
- -m, --minmsgimplev <«debug»|«info»|«warning»|«error»|«none»>
- Defines the minimum “importance level” of messages to be written to the
- text user interface. There are 4 “importance levels”, in this order of
- importance: «debug», «info», «warning», «error». Setting this option to any
- of these values will make the script write to the text user interface all
- the messages with the specified or a greater level; setting it to the
- special value «none» will completely disable messages.
- DEFAULT: '.lcfirst($msglevs[$opts['minmsgimplev']]).'
- -h, --help
- If this option is set, the script will show this help text and exit.
- TIME SPECIFICATION
- An example is better than ~5148 words :-)
- To specify 1 year, 6 months (made of 31 days), 2 weeks, 3 days, 5 hours,
- 7 minutes and 12 seconds you can use «1y,6M,2w,3d,5h,7m,12s»; but you can
- also use «12s,7m,5h,3d,2w,6M,1y», or even «18M,1w,1w,2d,1d,3h,2h,7m,12s».
- NEW INSTANCE ANNOUNCEMENT TOOT
- This script can send an announcement toot when the instance it checks is new.
- It will try to do it if it finds a readable «getinstinfo.ini» file in the
- same directory it lives in, with a «bothost» parameter defining the instance
- to use to send the toot, a «bottoken» parameter defining the token to be used
- to post, and a «botmaxchars» parameter defining the maximum number of
- characters allowed for toots on the defined instance (must be >= 10).
- PROXY SUPPORT
- This script supports socks5 proxy to connect to an instance. It will try to
- use a proxy if it finds a readable «getinstinfo.ini» file in the same
- directory it lives in, with a «proxy» parameter defining the proxy to use and
- a «useproxyfor» parameter including the hostname it’s working on (see
- «SYNOPSIS» above).
- «proxy» syntax: [user:pass@]<host>[:port]
- «useproxyfor» syntax: host[,host][...]
- LICENSE
- This program comes with ABSOLUTELY NO WARRANTY; for details see the source.
- This is free software, and you are welcome to redistribute it under certain
- conditions; see <http://www.gnu.org/licenses/> for details.'.N;
- foreach ($argv as $val) {
- if ($val=='-h' || $val=='--help') {
- echo($help);
- exit(0);
- }
- }
- $inifp=__DIR__.'/getinstinfo.ini';
- $iniarr=@parse_ini_file($inifp);
- if (is_array($iniarr)) {
- if (isset($iniarr['bothost']) && !isempty($iniarr['bothost'])) $opts['bothost']=$iniarr['bothost'];
- if (isset($iniarr['bottoken']) && !isempty($iniarr['bottoken'])) $opts['bottoken']=$iniarr['bottoken'];
- if (isset($iniarr['botmaxchars'])) {
- if (preg_match('/^[0-9]+$/',$iniarr['botmaxchars'],$matches)!=1 || $matches[0]+0<10)
- mexit('config file «'.$inifp.'»: value for «botmaxchars» must be an integer >= 10.'.N,1);
- $opts['botmaxchars']=$iniarr['botmaxchars']+0;
- }
- if (!is_null($opts['bothost']) && !is_null($opts['bottoken']) && !is_null($opts['botmaxchars']))
- $opts['_sendtoot']=true;
- if (isset($iniarr['proxy']) && !isempty($iniarr['proxy'])) $opts['proxy']=$iniarr['proxy'];
- if (isset($iniarr['useproxyfor']) && !isempty($iniarr['useproxyfor'])) $opts['useproxyfor']=explode(',',$iniarr['useproxyfor']);
- } else {
- eecho(1,"Could not open «{$inifp}» (it does not exist or is not readable).\n");
- }
- $inifp=__DIR__.'/../conf/mustard.ini';
- $iniarr=@parse_ini_file($inifp)
- or mexit('could not open config file «'.$inifp.'».'.N,1);
- for ($i=1; $i<$argc; $i++) {
- if ($argv[$i]=='-f' || $argv[$i]=='--fetchusers') {
- $opts['fetchusers']=true;
- } elseif ($argv[$i]=='-r' || $argv[$i]=='--udiratts') {
- if ($i+1>=$argc || preg_match('/^\d+$/',$argv[$i+1])!==1 || $argv[$i+1]+0<1)
- mexit('option «'.$argv[$i].'» requires a number > 1 as an argument (use «-h» to read help).'.N,1);
- $i++;
- $opts['udiratts']=$argv[$i]+0;
- } elseif ($argv[$i]=='-s' || $argv[$i]=='--udirfailst') {
- if ($i+1>=$argc || parsetime($argv[$i+1])===false)
- mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
- $i++;
- $opts['udirfailst']=parsetime($argv[$i]);
- } elseif ($argv[$i]=='-t' || $argv[$i]=='--conntimeout') {
- if ($i+1>=$argc || parsetime($argv[$i+1])===false)
- mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
- $i++;
- $opts['conntimeout']=parsetime($argv[$i]);
- } elseif ($argv[$i]=='-T' || $argv[$i]=='--functimeout') {
- if ($i+1>=$argc || parsetime($argv[$i+1])===false)
- mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
- $i++;
- $opts['functimeout']=parsetime($argv[$i]);
- } elseif ($argv[$i]=='-l' || $argv[$i]=='--ldtoots') {
- if ($i+1>=$argc || preg_match('/^\d+$/',$argv[$i+1])!==1 || $argv[$i+1]+0>40 || $argv[$i+1]+0<10)
- mexit('option «'.$argv[$i].'» requires a number >= 10 and <= 40 as an argument (use «-h» to read help).'.N,1);
- $i++;
- $opts['ldtoots']=$argv[$i]+0;
- } elseif ($argv[$i]=='-d' || $argv[$i]=='--dryrun') {
- $opts['dryrun']=true;
- } elseif ($argv[$i]=='-m' || $argv[$i]=='--minmsgimplev') {
- if ($i+1>=$argc || !in_array(ucfirst(strtolower($argv[$i+1])),$msglevs))
- mexit('option «'.$argv[$i].'» requires a “message importance level” value as an argument (use «-h» to read help).'.N,1);
- $i++;
- $opts['minmsgimplev']=array_search(ucfirst(strtolower($argv[$i])),$msglevs);
- } elseif (is_null($opts['hostname']) && $argv[$i][0]!=='-') {
- $opts['hostname']=$argv[$i];
- } else {
- mexit('don’t know how to interpret «'.$argv[$i].'», please read the help text using «-h» or «--help».'.N,1);
- }
- }
- if (is_null($opts['hostname'])) mexit('you didn’t specify an hostname (you can read the help text using «-h» or «--help»).'.N,1);
- try { $link=@mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket']); }
- catch (Exception $error) { mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true); }
- // for php versions < 8
- if ($link===false) mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true);
- try { $res=mysqli_set_charset($link,'utf8mb4'); }
- catch (Exception $error) { mexit('could not set «utf8mb4» charset for MySQL: '.mysqli_error($link).' ['.mysqli_errno($link).'].'.N,1,true); }
- // for php versions < 8
- if ($res===false) mexit('could not set MySQL charset: '.mysqli_error($link).' ['.mysqli_errno($link).'].'.N,1,true);
- $mastodons=[];
- $res=myq($link,'SELECT Name FROM Platforms WHERE Consider=1',__LINE__);
- while ($row=mysqli_fetch_assoc($res))
- $mastodons[]=preg_quote($row['Name'],'/');
- if (count($mastodons)<1) mexit('in table «Platforms», there is no platform to be considered!'.N,1);
- $mastodons=implode('|',$mastodons);
- $tables=tables($link);
- //print_r($tables);
- $instints=['ID', 'FirstSeen', 'IsMastodon', 'Priority', 'Visible', 'Noxious', 'NoxLastModTS', 'LocalityID', 'OurLangsLock', 'UserCount', 'StatusCount', 'DomainCount', 'ActiveUsersMonth', 'ActiveUsersHalfYear', 'RegOpen', 'RegReqApproval', 'MaxTootChars', 'AdmCreatedAt', 'PublicBlocksList', 'TotChecks', 'OkChecks', 'WasLastCheckOk', 'LastOkCheckTS', 'GuestID', 'LastGuestEdit', 'InsertTS', 'RPos'];
- $idata=[];
- $res=myq($link,'SHOW COLUMNS FROM Instances',__FILE__);
- while ($row=mysqli_fetch_assoc($res))
- $idata[$row['Field']]=$row['Default'];
- // since we later need to determine if a value is an integer, and mysql returns integers as strings...
- setint($instints,$idata);
- $idata['URI']=$opts['hostname'];
- $instanswered=false;
- $now=time();
- /*
- * Nodeinfo ('https://'.$opts['hostname'].'/nodeinfo/2.0.json') was added in v3.0.0
- * Trends ('https://'.$opts['hostname'].'/api/v1/trends') was added in v3.0.0 and became /api/v1/trends/tags with v3.5.0
- * Activity ('https://'.$opts['hostname'].'/api/v1/instance/activity') was added in v2.1.2
- */
- waituntilonline();
- eecho(1,'[[[ Working on «'.$opts['hostname'].'» ]]]'.N);
- if (willtrunc($opts['hostname'],'Instances','URI'))
- mexit('«'.$opts['hostname'].'»: ignoring it because hostname is too long for the «URI» column of «Instances» table.'.N,2);
- if (is_array($opts['useproxyfor']) && !is_null($opts['proxy']) && in_array($opts['hostname'],$opts['useproxyfor']))
- eecho(1,"«{$opts['hostname']}»: will use configured proxy («{$opts['proxy']}») to fetch its data.\n");
- else
- $opts['proxy']=null;
- eecho(0,'«'.$opts['hostname'].'»: trying to fetch its info from the database...'.N);
- $res=myq($link,'SELECT * FROM Instances WHERE URI=\''.myesc($link,$opts['hostname']).'\'',__LINE__);
- $count=mysqli_num_rows($res);
- if ($count>1) {
- $msg='«'.$opts['hostname'].'»: there are '.$count.' records with this URI in Instances table.';
- notify($msg,3,false);
- mexit($msg.N,3);
- } elseif ($count==1) {
- eecho(1,'«'.$opts['hostname'].'»: found 1 record with this URI in Instances table.'.N);
- $oidata=mysqli_fetch_assoc($res);
- setint($instints,$oidata);
- } else {
- eecho(1,'«'.$opts['hostname'].'»: found no record with this URI in Instances table.'.N);
- $oidata=null;
- }
- eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on https...'.N);
- $buf=@gurl('https://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
- if ($buf['cont']===false) {
- eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on http...'.N);
- $buf=@gurl('http://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
- }
- if ($buf['cont']!==false) {
- $buf=@json_decode($buf['cont'],true);
- if (is_array($buf)) {
- if (isset($buf['links']) && is_array($buf['links']) && count($buf['links'])>0) {
- $ok=true;
- $nirefs=[];
- foreach ($buf['links'] as $key=>$niref) {
- if (isset($niref['rel']) && isset($niref['href'])) {
- $nirefs[$niref['rel']]=$niref['href'];
- } else {
- eecho(2,'«'.$opts['hostname'].'»: nodeinfo specs “links” entitity '.$key.' has unexpected format.'.N);
- $ok=false;
- }
- }
- if ($ok) {
- krsort($nirefs);
- $niref=array_shift($nirefs);
- eecho(1,'«'.$opts['hostname'].'»: got and successfully parsed nodeinfo specs :-)'.N);
- eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo data...'.N);
- $buf=@gurl($niref,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
- if ($buf['cont']!==false) {
- $buf=@json_decode($buf['cont'],true);
- if (is_array($buf)) {
- eecho(1,'«'.$opts['hostname'].'»: got nodeinfo data :-)'.N);
- if (isset($buf['software']['name']) && is_string($buf['software']['name']) && !isempty($buf['software']['name'])) {
- $idata['Software']=trim($buf['software']['name']);
- (preg_match('/^'.$mastodons.'/',$idata['Software'])===1) ? $idata['IsMastodon']=true : $idata['IsMastodon']=false;
- $res=myq($link,'SELECT Name FROM Platforms WHERE Name=\''.myesc($link,$idata['Software']).'\'',__LINE__);
- if (mysqli_num_rows($res)<1) {
- if (!$opts['dryrun'])
- myq($link,'INSERT INTO Platforms (Name) VALUES (\''.myesc($link,truncs($idata['Software'], 'Platforms', 'Name', '«'.$opts['hostname'].'»')).'\')',__LINE__);
- notify('«'.$opts['hostname'].'» runs on «'.$idata['Software'].'», which was not present in the «Platforms» table, so it was added there. It would be good to check whether it is a Mastodon derivate and how compatible it is, to decide whether to consider instances using it as Mastodon instances by setting the «Consider» field of its record to «1».',2);
- }
- }
- if (isset($buf['software']['version']) && is_string($buf['software']['version']) && !isempty($buf['software']['version']))
- $idata['Version']=trim($buf['software']['version']);
- if (isset($buf['usage']['users']['total']) && is_int($buf['usage']['users']['total']))
- $idata['UserCount']=$buf['usage']['users']['total'];
- if (isset($buf['usage']['users']['activeMonth']) && is_int($buf['usage']['users']['activeMonth']))
- $idata['ActiveUsersMonth']=$buf['usage']['users']['activeMonth'];
- if (isset($buf['usage']['users']['activeHalfyear']) && is_int($buf['usage']['users']['activeHalfyear']))
- $idata['ActiveUsersHalfYear']=$buf['usage']['users']['activeHalfyear'];
- if (isset($buf['usage']['localPosts']) && is_int($buf['usage']['localPosts']))
- $idata['StatusCount']=$buf['usage']['localPosts'];
- if (isset($buf['openRegistrations']) && is_bool($buf['openRegistrations']))
- $idata['RegOpen']=b2i($buf['openRegistrations']);
- } else {
- eecho(2,'«'.$opts['hostname'].'»: nodeinfo data was not good JSON.'.N);
- }
- } else {
- eecho(2,'«'.$opts['hostname'].'»: could not fetch nodeinfo data: '.$buf['emsg'].'.'.N);
- }
- }
- } else {
- eecho(2,'«'.$opts['hostname'].'»: nodeinfo specs had unexpected format.'.N);
- }
- } else {
- eecho(2,'«'.$opts['hostname'].'»: nodeinfo specs where not good JSON.'.N);
- }
- } else {
- eecho(2,'«'.$opts['hostname'].'»: could not fetch nodeinfo specs: '.$buf['emsg'].'.'.N);
- }
- if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4.0.0') {
- eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v2...'.N);
- $buf=@gurl('https://'.$opts['hostname'].'/api/v2/instance',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
- if ($buf['cont']!==false) {
- ckrl($buf['headers'],"«{$opts['hostname']}»");
- $buf=@json_decode($buf['cont'],true);
- if (is_array($buf)) {
- if (make(['domain', 'title', 'version', 'source_url', 'description', 'usage', 'thumbnail', 'languages', 'configuration', 'registrations', 'contact', 'rules'],$buf)) {
- eecho(1,'«'.$opts['hostname'].'»: got good instance info from API v2 :-)'.N);
- $instanswered=true;
- if (isset($buf['title']) && is_string($buf['title']) && !isempty($buf['title']))
- $idata['Title']=trim($buf['title']);
- if (isset($buf['description']) && is_string($buf['description']) && !isempty($buf['description']))
- $idata['ShortDesc']=trim($buf['description']);
- if (isset($buf['thumbnail']['url']) && is_string($buf['thumbnail']['url']) && !isempty($buf['thumbnail']['url'])) {
- $idata['Thumb']=trim($buf['thumbnail']['url']);
- if (!@file_get_contents($idata['Thumb'],false,null,0,512)) $idata['Thumb']='unavailable';
- }
- if (isset($buf['configuration']['statuses']['max_characters']) && is_int($buf['configuration']['statuses']['max_characters']))
- $idata['MaxTootChars']=$buf['configuration']['statuses']['max_characters'];
- if (isset($buf['registrations']['approval_required']) && is_bool($buf['registrations']['approval_required']))
- $idata['RegReqApproval']=b2i($buf['registrations']['approval_required']);
- if (isset($buf['contact']['email']) && is_string($buf['contact']['email']))
- $idata['Email']=trim($buf['contact']['email']);
- if (!isset($buf['contact']['account']['noindex']) || (isset($buf['contact']['account']['noindex']) && is_bool($buf['contact']['account']['noindex']) && $buf['contact']['account']['noindex']===false)) {
- if (isset($buf['contact']['account']['acct']) && is_string($buf['contact']['account']['acct']) && !isempty($buf['contact']['account']['acct']))
- $idata['AdmAccount']=trim($buf['contact']['account']['acct']);
- if (isset($buf['contact']['account']['display_name']) && is_string($buf['contact']['account']['display_name']) && !isempty($buf['contact']['account']['display_name']))
- $idata['AdmDisplayName']=trim($buf['contact']['account']['display_name']);
- if (isset($buf['contact']['account']['created_at']) && is_string($buf['contact']['account']['created_at']) && ($ts=strtotime($buf['contact']['account']['created_at']))!==false)
- $idata['AdmCreatedAt']=$ts;
- if (isset($buf['contact']['account']['note']) && is_string($buf['contact']['account']['note']) && !isempty($buf['contact']['account']['note']))
- $idata['AdmNote']=trim($buf['contact']['account']['note']);
- if (isset($buf['contact']['account']['url']) && is_string($buf['contact']['account']['url']) && !isempty($buf['contact']['account']['url']))
- $idata['AdmURL']=trim($buf['contact']['account']['url']);
- if (isset($buf['contact']['account']['avatar']) && is_string($buf['contact']['account']['avatar']) && !isempty($buf['contact']['account']['avatar'])) {
- $idata['AdmAvatar']=trim($buf['contact']['account']['avatar']);
- if (!@file_get_contents($idata['AdmAvatar'],false,null,0,512)) $idata['AdmAvatar']='unavailable';
- }
- if (isset($buf['contact']['account']['header']) && is_string($buf['contact']['account']['header']) && !isempty($buf['contact']['account']['header']))
- $idata['AdmHeader']=trim($buf['contact']['account']['header']);
- } else {
- if (isset($buf['contact']['account']['noindex']) && is_bool($buf['contact']['account']['noindex']) && $buf['contact']['account']['noindex']===true)
- $idata['AdmAccount']='OPTED OUT';// here we rely on the fact that nobody could set "acct" to "OPTED OUT" since it doesn't allow spaces
- $idata['AdmAvatar']='unavailable';
- }
- if (isset($buf['languages']) && is_array($buf['languages']))
- $idata['languages']=$buf['languages'];
- if (isset($buf['rules']) && is_array($buf['rules']))
- foreach ($buf['rules'] as $rule)
- if (isset($rule['id']) && is_string($rule['id']) && !isempty($rule['id']) && isset($rule['text']) && is_string($rule['text']) && !isempty($rule['text']))
- $idata['rules'][$rule['id']]=$rule['text'];
- } else {
- eecho(2,'«'.$opts['hostname'].'»: instance info fetched from API v2 had unexpected format.'.N);
- }
- } else {
- eecho(2,'«'.$opts['hostname'].'»: instance info fetched from API v2 were not good JSON.'.N);
- }
- } else {
- eecho(2,'«'.$opts['hostname'].'»: could not fetch instance info from API v2: '.$buf['emsg'].'.'.N);
- }
- // "domain count" is only listed by /api/v1/instance
- eecho(0,'«'.$opts['hostname'].'»: trying to fetch domain count from API v1...'.N);
- $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
- if ($buf['cont']!==false) {
- ckrl($buf['headers'],"«{$opts['hostname']}»");
- $buf=@json_decode($buf['cont'],true);
- if (is_array($buf)) {
- eecho(1,'«'.$opts['hostname'].'»: got instance info from API v1 :-)'.N);
- if (isset($buf['stats']['domain_count']) && is_int($buf['stats']['domain_count']))
- $idata['DomainCount']=$buf['stats']['domain_count'];
- } else {
- eecho(2,'«'.$opts['hostname'].'»: instance info fetched from API v1 was not good JSON.'.N);
- }
- } else {
- eecho(2,'«'.$opts['hostname'].'»: could not fetch instance info from API v1: '.$buf['emsg'].'.'.N);
- }
- eecho(0,'«'.$opts['hostname'].'»: trying to fetch extended description from API v1...'.N);
- $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/extended_description',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
- if ($buf['cont']!==false) {
- ckrl($buf['headers'],"«{$opts['hostname']}»");
- $buf=@json_decode($buf['cont'],true);
- if (is_array($buf)) {
- eecho(1,'«'.$opts['hostname'].'»: got extended description from API v1 :-)'.N);
- if (isset($buf['content']) && is_string($buf['content']) && !isempty($buf['content']))
- $idata['LongDesc']=trim($buf['content']);
- } else {
- eecho(2,'«'.$opts['hostname'].'»: instance extended description fetched from API v1 was not good JSON.'.N);
- }
- } else {
- eecho(2,'«'.$opts['hostname'].'»: could not fetch instance extended description from API v1: '.$buf['emsg'].'.'.N);
- }
- eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance domain blocks from API v1...'.N);
- $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/domain_blocks',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
- if ($buf['cont']!==false) {
- ckrl($buf['headers'],"«{$opts['hostname']}»");
- $buf=@json_decode($buf['cont'],true);
- if (is_array($buf)) {
- eecho(1,'«'.$opts['hostname'].'»: got instance domain blocks from API v1 :-)'.N);
- $idata['blocks']=[];
- $idata['PublicBlocksList']=1;
- $idata['Threads']='accessible';
- foreach ($buf as $key=>$block) {
- if (is_array($block) && make(['domain', 'severity', 'comment'],$block) && is_string($block['domain']) && !isempty($block['domain']) && is_string($block['severity']) && in_array($block['severity'], ['silence','suspend']) && (is_null($block['comment']) || is_string($block['comment']))) {
- if (is_string($block['comment'])) {
- $block['comment']=trim($block['comment']);
- if ($block['comment']=='')
- $block['comment']=null;
- } else {
- $block['comment']=null;
- }
- $idata['blocks'][]=['dom'=>$block['domain'], 'sev'=>$block['severity'], 'comm'=>$block['comment']];
- if (preg_match('#^(threads.net|.*\.threads.net)$#i',$block['domain'])===1) {
- if ($block['severity']=='suspend')
- $idata['Threads']='suspended';
- elseif ($block['severity']=='silence')
- $idata['Threads']='limited';
- else
- $idata['Threads']=$block['severity'];
- }
- } else {
- eecho(2,'«'.$opts['hostname'].'»: domain blocks array has an unexpected format.'.N);
- break;
- }
- }
- } else {
- eecho(2,'«'.$opts['hostname'].'»: instance domain blocks fetched from API v1 were not good JSON.'.N);
- $idata['Threads']=null;
- }
- } else {
- eecho(2,'«'.$opts['hostname'].'»: could not fetch instance domain blocks from API v1: '.$buf['emsg'].'.'.N);
- $idata['Threads']=null;
- }
- } else {// we still try to fetch instance info from api v1, if ver. < 4.0.0, since it could be a mastodon instance older than 2.1.2, when nodeinfo was introduced
- eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v1...'.N);
- $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
- if ($buf['cont']!==false) {
- ckrl($buf['headers'],"«{$opts['hostname']}»");
- $buf=@json_decode($buf['cont'],true);
- if (is_array($buf)) {
- if (make(['uri', 'title', 'short_description', 'description', 'email', 'version', 'urls', 'stats', 'thumbnail', 'languages', 'registrations', 'approval_required', 'contact_account'],$buf)) {
- eecho(1,'«'.$opts['hostname'].'»: got instance info from API v1 :-)'.N);
- //print_r($buf);
- $instanswered=true;
- if (isset($buf['title']) && is_string($buf['title']) && !isempty($buf['title']))
- $idata['Title']=trim($buf['title']);
- if (isset($buf['short_description']) && is_string($buf['short_description']) && !isempty($buf['short_description']))
- $idata['ShortDesc']=trim($buf['description']);
- if (isset($buf['description']) && is_string($buf['description']) && !isempty($buf['description']))
- $idata['LongDesc']=trim($buf['description']);
- if (isset($buf['email']) && is_string($buf['email']))
- $idata['Email']=trim($buf['email']);
- // if nodeinfo did not respond, it could be mastodon < 3.0.0, and we would not have $idata['Version'] yet, so...
- if (!isset($idata['Version']) && isset($buf['version']) && is_string($buf['version']) && !isempty($buf['version']))
- $idata['Version']=trim($buf['version']);
- // if nodeinfo responded we should already have these 2 below, but nodeinfo could have not responded if instance ver. is < 3.0.0
- if (isset($buf['stats']['user_count']) && is_int($buf['stats']['user_count']))
- $idata['UserCount']=$buf['stats']['user_count'];
- if (isset($buf['stats']['status_count']) && is_int($buf['stats']['status_count']))
- $idata['StatusCount']=$buf['stats']['status_count'];
- if (isset($buf['stats']['domain_count']) && is_int($buf['stats']['domain_count']))
- $idata['DomainCount']=$buf['stats']['domain_count'];
- if (isset($buf['thumbnail']) && is_string($buf['thumbnail']) && !isempty($buf['thumbnail'])) {
- $idata['Thumb']=trim($buf['thumbnail']);
- if (!@file_get_contents($idata['Thumb'],false,null,0,512)) $idata['Thumb']='unavailable';
- }
- if (isset($buf['max_toot_chars']) && is_int($buf['max_toot_chars']))
- $idata['MaxTootChars']=$buf['max_toot_chars'];
- elseif (isset($buf['configuration']['statuses']['max_characters']) && is_int($buf['configuration']['statuses']['max_characters']))
- $idata['MaxTootChars']=$buf['configuration']['statuses']['max_characters'];
- // if nodeinfo responded we should already have this 1 below, but nodeinfo could have not responded if instance ver. is < 3.0.0
- if (isset($buf['registrations']) && is_bool($buf['registrations']))
- $idata['RegOpen']=b2i($buf['registrations']);
- if (isset($buf['approval_required']) && is_bool($buf['approval_required']))
- $idata['RegReqApproval']=b2i($buf['approval_required']);
- if (isset($buf['contact_account']['acct']) && is_string($buf['contact_account']['acct']) && !isempty($buf['contact_account']['acct']))
- $idata['AdmAccount']=trim($buf['contact_account']['acct']);
- if (isset($buf['contact_account']['display_name']) && is_string($buf['contact_account']['display_name']) && !isempty($buf['contact_account']['display_name']))
- $idata['AdmDisplayName']=trim($buf['contact_account']['display_name']);
- if (isset($buf['contact_account']['created_at']) && is_string($buf['contact_account']['created_at']) && ($ts=strtotime($buf['contact_account']['created_at']))!==false)
- $idata['AdmCreatedAt']=$ts;
- if (isset($buf['contact_account']['note']) && is_string($buf['contact_account']['note']) && !isempty($buf['contact_account']['note']))
- $idata['AdmNote']=trim($buf['contact_account']['note']);
- if (isset($buf['contact_account']['url']) && is_string($buf['contact_account']['url']) && !isempty($buf['contact_account']['url']))
- $idata['AdmURL']=trim($buf['contact_account']['url']);
- if (isset($buf['contact_account']['avatar']) && is_string($buf['contact_account']['avatar']) && !isempty($buf['contact_account']['avatar'])) {
- $idata['AdmAvatar']=trim($buf['contact_account']['avatar']);
- if (!@file_get_contents($idata['AdmAvatar'],false,null,0,512)) $idata['AdmAvatar']='unavailable';
- }
- if (isset($buf['contact_account']['header']) && is_string($buf['contact_account']['header']) && !isempty($buf['contact_account']['header']))
- $idata['AdmHeader']=trim($buf['contact_account']['header']);
- // next line: isset and is_string because it can be pleroma, that has a different format for "languages"
- if (isset($buf['languages']) && is_array($buf['languages']) && isset($buf['languages'][0]) && is_string($buf['languages'][0]))
- $idata['languages']=$buf['languages'];
- if (isset($buf['rules']) && is_array($buf['rules']))
- foreach ($buf['rules'] as $rule)
- if (isset($rule['id']) && is_string($rule['id']) && !isempty($rule['id']) && isset($rule['text']) && is_string($rule['text']) && !isempty($rule['text']))
- $idata['rules'][$rule['id']]=$rule['text'];
- // some falsing
- if (isset($buf['pleroma'])) $idata['IsMastodon']=false;
- if (isset($buf['version']) && is_string($buf['version']) && preg_match('#(pleroma|pixelfed)#i',$buf['version'])===1) $idata['IsMastodon']=false;
- } else {
- eecho(2,'«'.$opts['hostname'].'»: instance info fetched from API v1 had unexpected format.'.N);
- }
- } else {
- eecho(2,'«'.$opts['hostname'].'»: instance info fetched from API v1 were not good JSON.'.N);
- }
- } else {
- eecho(2,'«'.$opts['hostname'].'»: could not fetch instance info from API v1: '.$buf['emsg'].'.'.N);
- }
- }
- if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='2.1.2') {
- eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance activity info from API v1...'.N);
- $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/activity',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
- if ($buf['cont']!==false) {
- ckrl($buf['headers'],"«{$opts['hostname']}»");
- $buf=@json_decode($buf['cont'],true);
- if (is_array($buf)) {
- eecho(1,'«'.$opts['hostname'].'»: got instance activity info from API v1 :-)'.N);
- $idata['activity']=$buf;
- } else {
- eecho(2,'«'.$opts['hostname'].'»: instance activity info from API v1 were not good JSON: '.$buf['emsg'].'.'.N);
- }
- } else {
- eecho(2,'«'.$opts['hostname'].'»: could not fetch instance activity info from API v1: '.$buf['emsg'].'.'.N);
- }
- }
- if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='3.0.0') {
- eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance tags trends info from API v1...'.N);
- $url='https://'.$opts['hostname'].'/api/v1/trends';
- if ($idata['Version']>='3.5.0') $url.='/tags';
- $buf=@gurl($url,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
- if ($buf['cont']!==false) {
- ckrl($buf['headers'],"«{$opts['hostname']}»");
- $buf=@json_decode($buf['cont'],true);
- if (is_array($buf)) {
- eecho(1,'«'.$opts['hostname'].'»: got instance tags trends info from API v1 :-)'.N);
- $idata['trends']=$buf;
- } else {
- eecho(2,'«'.$opts['hostname'].'»: instance tags trends from API v1 were not good JSON: '.$buf['emsg'].'.'.N);
- }
- } else {
- eecho(2,'«'.$opts['hostname'].'»: could not fetch instance tags trends from API v1: '.$buf['emsg'].'.'.N);
- }
- }
- // finished fetching
- if (!is_null($idata['IsMastodon'])) $idata['IsMastodon']=b2i($idata['IsMastodon']);
- ($instanswered) ? $idata['WasLastCheckOk']=1 : $idata['WasLastCheckOk']=0;
- if (is_null($oidata)) {
- $query='INSERT INTO Instances SET ';
- $idata['InsertTS']=$now;
- $idata['TotChecks']=1;
- if ($instanswered) {
- $idata['FirstSeen']=$now;
- $idata['LastOkCheckTS']=$now;
- $idata['OkChecks']=1;
- } else {
- $idata['Thumb']='unavailable';
- $idata['AdmAvatar']='unavailable';
- $idata['OkChecks']=0;
- }
- } else {
- $query='UPDATE Instances SET ';
- ($instanswered && is_null($oidata['FirstSeen'])) ? $idata['FirstSeen']=$now : $idata['FirstSeen']=$oidata['FirstSeen'];
- ($instanswered) ? $idata['LastOkCheckTS']=$now : $idata['LastOkCheckTS']=$oidata['LastOkCheckTS'];
- $idata['TotChecks']=$oidata['TotChecks']+1;
- $idata['OkChecks']=$oidata['OkChecks'];
- if ($instanswered) $idata['OkChecks']++;
- $idata['Priority']=$oidata['Priority'];
- $idata['Visible']=$oidata['Visible'];
- $idata['Noxious']=$oidata['Noxious'];
- $idata['NoxReason']=$oidata['NoxReason'];
- $idata['NoxLastModTS']=$oidata['NoxLastModTS'];
- $idata['OurDesc']=$oidata['OurDesc'];
- $idata['OurDescEN']=$oidata['OurDescEN'];
- $idata['LocalityID']=$oidata['LocalityID'];
- $idata['OurLangsLock']=$oidata['OurLangsLock'];
- $idata['GuestID']=$oidata['GuestID'];
- $idata['LastGuestEdit']=$oidata['LastGuestEdit'];
- $idata['InsertTS']=$oidata['InsertTS'];
- $idata['RPos']=$oidata['RPos'];
- if (!$instanswered) {
- $idata['IsMastodon']=$oidata['IsMastodon'];
- $idata['Title']=$oidata['Title'];
- $idata['ShortDesc']=$oidata['ShortDesc'];
- $idata['LongDesc']=$oidata['LongDesc'];
- $idata['Email']=$oidata['Email'];
- $idata['Software']=$oidata['Software'];
- $idata['Version']=$oidata['Version'];
- $idata['UserCount']=$oidata['UserCount'];
- $idata['StatusCount']=$oidata['StatusCount'];
- $idata['DomainCount']=$oidata['DomainCount'];
- $idata['ActiveUsersMonth']=$oidata['ActiveUsersMonth'];
- $idata['ActiveUsersHalfYear']=$oidata['ActiveUsersHalfYear'];
- $idata['Thumb']='unavailable';
- $idata['RegOpen']=$oidata['RegOpen'];
- $idata['RegReqApproval']=$oidata['RegReqApproval'];
- $idata['MaxTootChars']=$oidata['MaxTootChars'];
- $idata['AdmAccount']=$oidata['AdmAccount'];
- $idata['AdmDisplayName']=$oidata['AdmDisplayName'];
- $idata['AdmCreatedAt']=$oidata['AdmCreatedAt'];
- $idata['AdmNote']=$oidata['AdmNote'];
- $idata['AdmURL']=$oidata['AdmURL'];
- $idata['AdmAvatar']='unavailable';
- $idata['AdmHeader']=$oidata['AdmHeader'];
- $idata['Threads']=$oidata['Threads'];
- }
- }
- $set=[];
- foreach ($idata as $key=>$val) {
- if (in_array($key,['ID','languages','rules','activity','trends','blocks'])) {
- true;// do nothing
- } elseif (is_null($val)) {
- $set[]=$key.'=NULL';
- } elseif (is_int($val)) {
- $set[]=$key.'='.truncn($val, 'Instances', $key, '«'.$opts['hostname'].'»');
- } elseif (is_string($val)) {
- $set[]=$key.'=\''.myesc($link,truncs($val, 'Instances', $key, '«'.$opts['hostname'].'»')).'\'';
- } else {
- mexit('$idata[\''.$key.'\'] value has unmanaged type, see code around line '.__LINE__.'.'.N,3);
- }
- }
- $query.=implode(', ',$set);
- if (!is_null($oidata)) $query.=' WHERE ID='.$oidata['ID'];
- eecho(1,'query: «'.$query.'».'.N);
- if (!$opts['dryrun']) {
- if (!is_null($oidata) || $instanswered) {
- myq($link,$query,__LINE__);
- } else {
- mexit('«'.$opts['hostname'].'»: not inserting unknown instance because it did not respond; shutting down after '.ght(time()-$now,null,0).' :-)'.N,0);
- }
- }
- if (is_null($oidata)) {
- (!$opts['dryrun']) ? $instid=mysqli_insert_id($link) : $instid=0;
- notify('«<a href="viewinst.php?id='.$instid.'">'.$opts['hostname'].'</a>» is a NEW instance! :-)',1);
- } else {
- $instid=$oidata['ID'];
- }
- // from here we know for sure $instid
- if (!$opts['dryrun']) myq($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$instid.', '.$now.', '.$idata['WasLastCheckOk'].')',__LINE__);
- if ($instanswered && isset($idata['languages']) && is_array($idata['languages']) && count($idata['languages'])>0) {
- eecho(1,'«'.$opts['hostname'].'»: declared languages: '.implode(', ',$idata['languages']).N);
- if (!$opts['dryrun'])
- myq($link,'DELETE FROM InstLangs WHERE InstID='.$instid,__LINE__);
- $langids=getlangsidsarr($idata['languages'],$supplangs,$link,$opts['hostname'],$opts['dryrun'],__LINE__);
- if (!$opts['dryrun']) {
- $pos=0;
- foreach ($langids as $langid) {
- $pos++;
- myq($link,'INSERT INTO InstLangs SET InstID='.$instid.', LangID='.$langid.', Pos='.$pos,__LINE__);
- }
- }
- if (!is_null($oidata) && $oidata['OurLangsLock']==1) {
- eecho(1,'«'.$opts['hostname'].'»: won’t touch “our languages” because they are locked.'.N);
- } else {
- // we try to detect languages only if first declared language (the only one currently definable by admins)
- // is equal to the default "en", otherwise we assume it's been set to the actual mostly used language on the instance
- if ($idata['languages'][0]=='en') {
- $idata['ourlanguages']=get_instance_langs($opts['hostname']);
- if (count($idata['ourlanguages'])>0) {
- eecho(1,'«'.$opts['hostname'].'»: detected languages: '.implode(', ',$idata['ourlanguages']).N);
- } else {
- $idata['ourlanguages']=$idata['languages'];
- eecho(1,'«'.$opts['hostname'].'»: detected languages: NONE; copied declared languages to detected languages.'.N);
- }
- } else {
- $idata['ourlanguages']=$idata['languages'];
- eecho(1,'«'.$opts['hostname'].'»: copied declared languages to detected languages.'.N);
- }
- if (!$opts['dryrun'])
- myq($link,'DELETE FROM InstOurLangs WHERE InstID='.$instid,__LINE__);
- $langids=getlangsidsarr($idata['ourlanguages'],$supplangs,$link,$opts['hostname'],$opts['dryrun'],__LINE__);
- if (!$opts['dryrun']) {
- $pos=0;
- foreach ($langids as $langid) {
- $pos++;
- myq($link,'INSERT INTO InstOurLangs SET InstID='.$instid.', OurLangID='.$langid.', Pos='.$pos,__LINE__);
- }
- }
- }
- }
- if ($instanswered && !$opts['dryrun'])
- myq($link,'DELETE FROM InstActivity WHERE InstID='.$instid,__LINE__);
- if (isset($idata['activity']) && is_array($idata['activity'])) {
- $pos=0;
- foreach ($idata['activity'] as $buf) {
- // these should all be int, but mastodon represents them as strings
- if (isset($buf['week']) && is_string($buf['week']) && preg_match('/^\d+$/',$buf['week'])===1 && isset($buf['statuses']) && is_string($buf['statuses']) && preg_match('/^\d+$/',$buf['statuses'])===1 && isset($buf['logins']) && is_string($buf['logins']) && preg_match('/^\d+$/',$buf['logins'])===1 && isset($buf['registrations']) && is_string($buf['registrations']) && preg_match('/^\d+$/',$buf['registrations'])===1) {
- $pos++;
- if (!$opts['dryrun'])
- myq($link,'INSERT INTO InstActivity (InstID, Week, Statuses, Logins, Registrations, Pos) VALUES ('.$instid.', '.$buf['week'].', '.$buf['statuses'].', '.$buf['logins'].', '.$buf['registrations'].', '.$pos.')',__LINE__);
- }
- }
- }
- if ($instanswered && !$opts['dryrun'])
- myq($link,'DELETE FROM InstTrends WHERE InstID='.$instid,__LINE__);
- if (isset($idata['trends']) && is_array($idata['trends'])) {
- $trends=[];
- foreach ($idata['trends'] as $buf) {
- if (isset($buf['name']) && is_string($buf['name']) && isset($buf['url']) && is_string($buf['url']) && isset($buf['history']) && is_array($buf['history'])) {
- $trend=0;
- foreach ($buf['history'] as $row) {
- // below, we check for "stringness" because, they should be integers, but they are strings
- if (isset($row['day']) && is_string($row['day']) && preg_match('/^\d+$/',$row['day'])===1 && isset($row['uses']) && is_string($row['uses']) && preg_match('/^\d+$/',$row['uses'])===1 && isset($row['accounts']) && is_string($row['accounts']) && preg_match('/^\d+$/',$row['accounts'])===1) {
- $row['day']+=0;
- $row['uses']+=0;
- $row['accounts']+=0;
- $trend+=$row['accounts'];
- }
- }
- }
- $trends[]=[
- 'InstID'=>$instid,
- 'LastDay'=>$buf['history'][0]['day'],
- 'Name'=>$buf['name'],
- 'URL'=>$buf['url'],
- 'Pos'=>null,
- 'trend'=>$trend
- ];
- }
- //print_r($trends);
- mdasortbykey($trends,'trend',true);
- $pos=0;
- foreach ($trends as $trend) {
- $pos++;
- $query='INSERT INTO InstTrends (InstID, LastDay, Name, URL, Pos) VALUES ('.$trend['InstID'].', \''.$trend['LastDay'].'\', \''.myesc($link, truncs($trend['Name'], 'InstTrends', 'Name', '«'.$opts['hostname'].'»')).'\', \''.myesc($link, truncs($trend['URL'], 'InstTrends', 'URL', '«'.$opts['hostname'].'»')).'\', '.$pos.')';
- if (!$opts['dryrun'])
- myq($link,$query,__LINE__);
- }
- }
- if (isset($idata['rules']) && is_array($idata['rules'])) {
- ksort($idata['rules']);
- if (!$opts['dryrun']) {
- myq($link,'DELETE FROM InstRules WHERE InstID='.$instid,__LINE__);
- foreach ($idata['rules'] as $rule)
- myq($link,'INSERT INTO InstRules SET InstID='.$instid.', Text=\''.myesc($link, truncs($rule, 'InstRules', 'Text', '«'.$opts['hostname'].'»')).'\'',__LINE__);
- }
- }
- if ($instanswered && !$opts['dryrun'] && isset($idata['blocks'])) {
- myq($link,'DELETE FROM InstBlocks WHERE InstID='.$instid,__LINE__);
- foreach ($idata['blocks'] as $block) {
- (is_null($block['comm'])) ? $block['comm']='NULL' : $block['comm']="'".myesc($link, truncs($block['comm'], 'InstBlocks', 'Comment', '«'.$opts['hostname'].'»'))."'";
- if (!$opts['dryrun'])
- myq($link,'INSERT INTO InstBlocks SET InstID='.$instid.', Domain=\''.myesc($link, truncs($block['dom'], 'InstBlocks', 'Domain', '«'.$opts['hostname'].'»')).'\', Severity=\''.myesc($link, truncs($block['sev'], 'InstBlocks', 'Severity', '«'.$opts['hostname'].'»')).'\', Comment='.$block['comm'],__LINE__);
- }
- }
- if ($opts['_sendtoot'] && $instanswered && $idata['IsMastodon'] && $idata['FirstSeen']==$now) {
- $lc=['decimal_point'=>'.', 'thousands_sep'=>','];
- gettlds();
- eecho(0,'«'.$opts['hostname'].'»: this instance is new, trying to send an announcement toot about it...'.N);
- $endpoint='https://'.$opts['bothost'].'/api/v1/statuses';
- $toot='A new Mastodon instance, https://'.$opts['hostname'].', has been found by the crawler at https://mastodon.help'.N.N.'Name: ';
- (is_null($idata['Title'])) ? $toot.='unspecified' : $toot.=$idata['Title'];
- $toot.=N.N.'Languages: ';
- if (array_key_exists('languages',$idata) && is_array($idata['languages']) && count($idata['languages'])>0 && $idata['languages'][0]!='en')
- $toot.=implode(', ',$idata['languages']);
- elseif (array_key_exists('ourlanguages',$idata) && is_array($idata['ourlanguages']) && count($idata['ourlanguages'])>0)
- $toot.=implode(', ',$idata['ourlanguages']);
- else
- $toot.='unspecified/undetectable';
- $toot.=N.N.'Users: ';
- (is_null($idata['UserCount'])) ? $toot.='unspecified' : $toot.=fnum($idata['UserCount'],$lc);
- $toot.=N.N.'Max. characters per post: ';
- (is_null($idata['MaxTootChars']) || $idata['MaxTootChars']==500) ? $toot.='500' : $toot.=fnum($idata['MaxTootChars'],$lc);
- $toot.=N.N.'Registrations: ';
- if (is_null($idata['RegOpen'])) {
- $toot.='unspecified';
- } elseif ($idata['RegOpen']==1) {
- $toot.='open';
- if ($idata['RegReqApproval']==1)
- $toot.=' (require admins approval)';
- elseif ($idata['RegReqApproval']==0)
- $toot.=' (don’t require admins approval)';
- } elseif ($idata['RegOpen']==0) {
- $toot.='closed';
- }
- $toot.=N.N.'Short description: ';
- (is_null($idata['ShortDesc'])) ? $toot.='unspecified' : $toot.=$idata['ShortDesc'];
- $toot.=N.N.'Long description: ';
- (is_null($idata['LongDesc'])) ? $toot.='unspecified' : $toot.=html2text($idata['LongDesc']);
- if (postlen($toot)>$opts['botmaxchars']) {
- while (postlen($toot)+13>$opts['botmaxchars'])
- $toot=preg_replace('#\s+(\S|\n)+$#u','',$toot);
- $toot.=' [continues…]';
- }
- $context=[
- 'http'=>[
- 'header'=>'Authorization: Bearer '.$opts['bottoken'].RN.
- 'Idempotency-Key'.md5($opts['bothost'].$now.rand(1000,9999)).RN.
- 'Content-type: application/x-www-form-urlencoded'.RN,
- 'method'=>'POST',
- 'content'=>http_build_query([
- 'status'=>$toot,
- 'visibility'=>'public',
- 'language'=>'en'
- ]),
- 'timeout'=>$opts['conntimeout']
- ]
- ];
- $context=stream_context_create($context);
- $res=@file_get_contents($endpoint,false,$context);
- if ($res===false) {
- eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, could not connect to «'.$opts['bothost'].'».'.N);
- } else {
- $httprsc=gethttpcode($http_response_header);
- $res=@json_decode($res,true);
- if ($httprsc!=200)
- eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, «'.$opts['bothost'].'» has returned HTTP code «'.$httprsc.'».'.N);
- elseif ($res===false)
- eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, «'.$opts['bothost'].'» has not returned valid JSON data.'.N);
- else
- eecho(1,'«'.$opts['hostname'].'»: correctly posted announcement about this new instance on «'.$opts['bothost'].'» :-)'.N);
- }
- }
- if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4.0.0') {
- eecho(0,'«'.$opts['hostname'].'»: trying to fetch users info from directory API...'.N);
- $users=[];// array of users in this instance's directory
- $chunk=0;
- $limit=40;
- $end=false;
- while (!$end) {
- $offset=$chunk*$limit;
- for ($att=0; $att<$opts['udiratts']; $att++) {
- eecho(0,'«'.$opts['hostname'].'»: trying to fetch chunk '.($chunk+1).' of users info from directory API (attempt '.($att+1).'/'.$opts['udiratts'].')...'.N);
- $buf=@gurl('https://'.$opts['hostname'].'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
- if ($buf['cont']!==false) {
- ckrl($buf['headers'],"«{$opts['hostname']}»");
- eecho(1,'«'.$opts['hostname'].'»: got chunk '.($chunk+1).' of users info from directory API on attempt '.($att+1).'/'.$opts['udiratts'].' :-)'.N);
- $buf=@json_decode($buf['cont'],true);
- if (is_array($buf)) {
- //print_r($buf);
- if (count($buf)<$limit) $end=true;
- /*if (count($buf)>0 && !array_key_exists('noindex',$buf[0])) {
- eecho(2,'«'.$opts['hostname'].'»: account entities reported by directory api endpoint don’t have a “noindex” attribute; skipping directory fetching.'.N);
- break;
- } else {
- eecho(0,'«'.$opts['hostname'].'»: account entities reported by directory api endpoint do have a “noindex” attribute; continuing with directory fetching.'.N);
- }*/
- //foreach ($buf as $user) echo($user['username'].' '); echo(N.N);
- foreach ($buf as $user) {
- if (make(['id', 'username', 'display_name', 'locked', 'bot', 'discoverable', 'created_at', 'note', 'url', 'avatar', 'header', 'statuses_count', 'last_status_at', 'fields', 'noindex'], $user)) {
- eecho(0,'«'.$opts['hostname'].'»: working on user «'.$user['username'].'»...'.N);
- // disabled because it takes too long on instances with many users; that's why we added "$idata['Version']>='4.0.0'" as a condition to the root "if" statement and "noindex" to the checked keys in the "if" statement above (ver. >= 4.0.0 do report "noindex" for account entities)
- /*if (!isset($user['noindex'])) {
- $user['noindex']=true;
- eecho(0,'«'.$opts['hostname'].'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it by fetching user’s profile page...'.N);
- $page=gurl($user['url'],$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
- // here ckratelimit is not needed because it's a normal web page, not json from mastodon api
- if ($page['cont']!==false) {
- //<meta content='noindex, noarchive' name='robots'>
- if (preg_match('/<meta\s+content=[\'"](noindex|noarchive)/ui',$page['cont'])!==1) {
- $user['noindex']=false;
- eecho(0,'«'.$user['url'].'»: «noindex» is not set.'.N);
- } else {
- eecho(0,'«'.$user['url'].'»: «noindex» is set.'.N);
- }
- } else {
- eecho(2,'«'.$opts['hostname'].'»: could not fetch «'.$user['url'].'»: '.$page['emsg'].N);
- }
- }*/
- $snote=strip_tags($user['note']);
- if (preg_match('/(?<!\w)#(nobots?|noindex)(?!\w)/iu',$snote)===1) $user['noindex']=true;
- if (preg_match('/(?<!\w)#(okindex|yesindex|doindex|okmhindex)(?!\w)/iu',$snote)===1) $user['noindex']=false;
- // disabled; see previous comment
- /*$user['tags']=[];
- if (!$user['noindex'] && !is_null($idata['Version']) && $idata['Version']>='3.3.0') {
- eecho(0,'«'.$opts['hostname'].'»: trying to fetch tags for user «'.$user['username'].'»...'.N);
- $tags=@gurl('https://'.$opts['hostname'].'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
- if ($tags['cont']!==false) {
- ckrl($tags['headers'],"«{$opts['hostname']}»");
- $tags=@json_decode($tags['cont'],true);
- if (is_array($tags) && count($tags)>0) {
- eecho(1,'«'.$opts['hostname'].'»: got '.count($tags).' tag(s) for user «'.$user['username'].'» :-)'.N);
- foreach($tags as $tag) $user['tags'][]=$tag['name'];
- }
- } else {
- eecho(2,'«'.$opts['hostname'].'»: could not fetch tags for user «'.$user['username'].'» :-( ('.$tags['emsg'].').'.N);
- }
- }
- $user['tags']=implode(';',$user['tags']);
- if ($user['tags']=='') $user['tags']=null;*/
- $user['tags']=null;
- if (!is_null($user['created_at'])) $user['created_at']=strtotime($user['created_at']);
- if (!is_null($user['last_status_at'])) $user['last_status_at']=datetots($user['last_status_at']);
- $users[$user['id']]=$user;
- } else {
- eecho(2,'«'.$opts['hostname'].'»: user record missed some required keys :-('.N);
- //print_r($user);
- }
- }
- break;
- } else {
- eecho(2,'«'.$opts['hostname'].'»: ... but the chunk was not good JSON :-('.N);
- if ($att==$opts['udiratts']-1) $end=true;
- }
- } else {
- eecho(2,'«'.$opts['hostname'].'»: could not fetch chunk '.($chunk+1).' of users info from directory API: '.$buf['emsg'].N);
- if ($att==$opts['udiratts']-1) {
- eecho(2,'«'.$opts['hostname'].'»: last attempt ('.($att+1).'/'.$opts['udiratts'].') on chunk '.($chunk+1).' failed; i give up.'.N);
- $end=true;
- } else {
- eecho(2,'«'.$opts['hostname'].'»: attempt '.($att+1).'/'.$opts['udiratts'].' on chunk '.($chunk+1).' failed; sleeping for '.ght($opts['udirfailst'],null,0).' before retrying.'.N);
- sleep($opts['udirfailst']);
- }
- }
- }
- $chunk++;
- }
- $totusers=count($users);
- eecho(1,'«'.$opts['hostname'].'»: got '.$totusers.' users’ profiles.'.N);
- if ($totusers>0) {
- eecho(1,'«'.$opts['hostname'].'»: inserting/updating '.$totusers.' users’ profiles in the database.'.N);
- $exusers=[];// array of this instance's users already existing in the db
- $res=myq($link,'SELECT ID, locid, username FROM Users WHERE InstID='.$instid,__LINE__);
- while ($row=mysqli_fetch_assoc($res)) $exusers[$row['locid']]=$row;
- foreach ($users as $locid=>$user) {
- $query='SET InstID='.$instid.', host='.myv($link,$opts['hostname']).', locid='.myv($link,$user['id']).', username='.myv($link,truncs($user['username'], 'Users', 'username', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', display_name='.myv($link,truncs($user['display_name'], 'Users', 'display_name', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', locked='.myv($link,$user['locked']).', bot='.myv($link,$user['bot']).', created_at='.myv($link,$user['created_at']).', note='.myv($link,truncs($user['note'], 'Users', 'note', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', url='.myv($link,truncs($user['url'], 'Users', 'url', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', avatar='.myv($link,truncs($user['avatar'], 'Users', 'avatar', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', header='.myv($link,truncs($user['header'], 'Users', 'header', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', statuses_count='.myv($link,$user['statuses_count']).', last_status_at='.myv($link,$user['last_status_at']).', tags='.myv($link,truncs($user['tags'], 'Users', 'tags', '«'.$opts['hostname'].'»: «'.$user['username'].'»'));
- $uid=0;
- if (!array_key_exists($user['id'],$exusers)) {
- if (!$user['noindex']) {
- eecho(0,'«'.$opts['hostname'].'»: inserting new user «'.$user['username'].'»...'.N);
- $query='INSERT INTO Users '.$query;
- if (!$opts['dryrun']) {
- myq($link,$query,__LINE__);
- $uid=mysqli_insert_id($link);
- } else {
- $uid=0;
- }
- } else {
- eecho(0,'«'.$opts['hostname'].'»: NOT inserting user «'.$user['username'].'» because they don’t want to be indexed...'.N);
- }
- } else {
- $uid=$exusers[$locid]['ID'];
- if (!$user['noindex']) {
- eecho(0,'«'.$opts['hostname'].'»: updating existing user «'.$user['username'].'» ('.$uid.')...'.N);
- $query='UPDATE Users '.$query.' WHERE ID='.$uid;
- } else {
- eecho(0,'«'.$opts['hostname'].'»: deleting existing user «'.$user['username'].'» ('.$uid.') because they don’t want to be indexed...'.N);
- $query='DELETE FROM Users WHERE ID='.$uid;
- }
- if (!$opts['dryrun']) {
- myq($link,$query,__LINE__);
- myq($link,'DELETE FROM UsersFields WHERE UserID='.$uid,__LINE__);
- }
- }
- if ($uid!=0 && !$user['noindex'] && is_array($user['fields']) && count($user['fields'])>0) {
- eecho(0,'«'.$opts['hostname'].'»: saving user fields for user «'.$user['username'].'» ('.$uid.')...'.N);
- foreach ($user['fields'] as $field) {
- (is_null($field['verified_at'])) ? $field['verified_at']=0 : $field['verified_at']=1;
- $field['name']=truncs($field['name'],'UsersFields','name','«'.$opts['hostname'].'»: «'.$user['username'].'»');
- $field['value']=truncs($field['value'],'UsersFields','value','«'.$opts['hostname'].'»: «'.$user['username'].'»');
- if (!$opts['dryrun']) myq($link,'INSERT INTO UsersFields SET UserID='.$uid.', name='.myv($link,$field['name']).', value='.myv($link,$field['value']).', verified='.$field['verified_at'],__LINE__);
- }
- }
- }
- eecho(1,'«'.$opts['hostname'].'»: deleting possible users’ profiles which are in the database but no longer in the directory.'.N);
- foreach ($exusers as $locid=>$exuser) {
- if (!array_key_exists($locid,$users)) {
- eecho(0,'«'.$opts['hostname'].'»: user «'.$exusers[$locid]['username'].'» opted out of the directory, deleting their record ('.$exuser['ID'].')...'.N);
- if (!$opts['dryrun']) {
- myq($link,'DELETE FROM Users WHERE ID='.$exuser['ID'],__LINE__);
- myq($link,'DELETE FROM UsersFields WHERE UserID='.$exuser['ID'],__LINE__);
- }
- }
- }
- }
- }
- mexit('«'.$opts['hostname'].'»: done in '.ght(time()-$now,null,0).' :-)'.N,0);
- // functions
- function myq(&$link,$query,$line) {
- try {
- $res=mysqli_query($link,$query);
- }
- catch (Exception $error) {
- mexit('query «'.$query.'» on line '.$line.' failed: '.$error->getMessage().' ('.$error->getCode().').'.N,3);
- }
- // for php versions < 8, which seem to not catch mysql exceptions
- if ($res===false) mexit('query «'.$query.'» on line '.$line.' failed: '.mysqli_error($link).' ('.mysqli_errno($link).').'.N,3);
- return($res);
- }
- function eecho($lev,$msg) {
- global $opts, $msglevs;
- $time=microtime(false);
- $time=explode(' ',$time);
- $time=date('Y-m-d H:i:s',$time[1]).'.'.substr($time[0],2);
- $msg=$time.' '.$msglevs[$lev].': '.$msg;
- if ($lev>=$opts['minmsgimplev']) {
- if ($lev<2)
- echo($msg);
- else
- fwrite(STDERR,$msg);
- }
- }
- function mexit($msg,$code) {
- global $link;
- if (isset($link) && $link!==false) mysqli_close($link);
- if ($code!=0)
- eecho(3,$msg);
- else
- eecho(1,$msg);
- exit($code);
- }
- function setint($keys,&$arr) {
- foreach ($keys as $key)
- if (!is_null($arr[$key]))
- $arr[$key]=$arr[$key]+0;
- }
- function willtrunc($val,$tab,$col) {
- global $tables, $iswin;
- if ($iswin) $tab=strtolower($tab);
- if (is_string($val) && mb_strlen($val,'UTF-8')>$tables[$tab][$col]) return(true);
- if (is_int($val) && ($val<$tables[$tab][$col]['min'] || $val>$tables[$tab][$col]['max'])) return(true);
- return(false);
- }
- function truncs($str,$tab,$col,$ctx) {
- global $tables, $iswin;
- if (is_null($str)) return(null);
- if ($iswin)
- $tab=strtolower($tab);
- $ostr=$str;
- if (mb_strlen($str,'UTF-8')>$tables[$tab][$col])
- $str=strip_tags($str);
- if (mb_strlen($str,'UTF-8')>$tables[$tab][$col])
- $str=mb_substr($str,0,$tables[$tab][$col]-1,'UTF-8').'…';
- if ($str!=$ostr)
- notify($ctx.': had to shrink input string to '.$tables[$tab][$col].' chars to be able to insert it into «'.$col.'» column in «'.$tab.'» table.',2);
- return $str;
- }
- function truncn($num,$tab,$col,$ctx) {
- global $tables, $iswin;
- if ($iswin)
- $tab=strtolower($tab);
- if (is_numeric($num)) {
- if ($num>$tables[$tab][$col]['max']) {
- notify($ctx.': had to ceil «'.$num.'» to «'.$tables[$tab][$col]['max'].'», ie the maximum value it can have in column «'.$col.'» of table «'.$tab.'».',2);
- $num=$tables[$tab][$col]['max'];
- } elseif ($num<$tables[$tab][$col]['min']) {
- notify($ctx.': had to floor «'.$num.'» to «'.$tables[$tab][$col]['min'].'», ie the minimum value it can have in column «'.$col.'» of table «'.$tab.'»).',2);
- $num=$tables[$tab][$col]['min'];
- }
- } else {
- notify($ctx.': function «truncn»: expecting a number, got something else; returning «0».',3);
- $num=0;
- }
- return $num;
- }
- function nocrnl($str) {
- return(str_replace(["\r","\n"],['\\r','\\n'],$str));
- }
- function b2i($bool) {
- ($bool) ? $r=1 : $r=0;
- return $r;
- }
- function isempty($str) {
- (preg_match('/^\s*$/',$str)===1) ? $r=true : $r=false;
- return $r;
- }
- function notify($msg,$lev,$doecho=true) {
- // "$lev" is to be thought of as "$lev" param of function "eecho": 0=debug, 1=info, 2=warning, 3=error
- global $link, $tables, $iswin, $opts;
- if ($doecho) eecho($lev,'*notification*: '.mb_lcfirst(strip_tags($msg)).N);
- if (!$opts['dryrun']) {
- ($iswin) ? $tab='notifications' : $tab='Notifications';
- myq($link,'INSERT INTO Notifications (ID, Notification, Severity, Microtime, Seen, Deleted) VALUES (NULL, \''.myesc($link,mb_substr($msg,0,$tables[$tab]['Notification'],'UTF-8')).'\', '.$lev.', \''.microtime(true).'\', 0, 0)',__LINE__);
- }
- }
- function mdasortbykey(&$arr,$key,$rev=false) {
- $karr=[];
- foreach ($arr as $akey=>$subarr)
- $karr[$subarr[$key]]=[$akey,$subarr];
- if (!$rev)
- ksort($karr);
- else
- krsort($karr);
- $arr=[];
- foreach ($karr as $akey=>$subarr)
- $arr[$subarr[0]]=$subarr[1];
- }
- // "multi array_key_exists"
- function make($keys,&$arr) {
- foreach ($keys as $key)
- if (!array_key_exists($key,$arr))
- return false;
- return true ;
- }
- function myv(&$link,$var) {
- if (is_null($var)) {
- return('NULL');
- } elseif (is_bool($var)) {
- if ($var)
- return('1');
- else
- return('0');
- } elseif (trim($var)=='') {
- return('NULL');
- } else {
- return('\''.mysqli_real_escape_string($link,$var).'\'');
- }
- }
- function datetots($date) {
- $date=explode('-',$date);
- return(mktime(0,0,0,$date[1],$date[2],$date[0]));
- }
- function ckrl($headers,$ctx) {
- $headers=explode("\r\n",$headers);
- $res=ckratelimit($headers);
- if (!$res['ok']) {
- eecho(2,"{$ctx}: ckratelimit: {$res['error']}.\n");
- } elseif ($res['remaining']==0) {
- eecho(2,"{$ctx}: ckratelimit: x-ratelimit-remaining is 0, sleeping for ".ght($res['sleep']).' (until '.date('c',time()+$res['sleep']).") ...\n");
- sleep($res['sleep']);
- }
- }
- /** <LANGUAGE MANAGEMENT> */
- /**
- * Executes a call to Mastodon API.
- *
- * @param string $host Host to be called (e.g.: "mastodon.bida.im")
- * @param string $path API path (e.g.: "/api/v1/timelines/public?local=true")
- * @return mixed An array representing the JSON object as returned by json_decode, or NULL if the call fails
- */
- function get_api($host, $path) {
- global $opts;
- $buf = @gurl('https://'.$host.$path,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
- if ($buf['cont']!==false) {
- ckrl($buf['headers'],"«{$host}»");
- $data = json_decode($buf['cont'], true);
- return $data;
- } else {
- return NULL;
- }
- }
- /**
- * Returns a list of known recognized languages, with the related probability, fot the toot that got passed to it
- *
- * @param mixed $toot The toot to be checked, as returned by the API
- * @return array Associative array with language and related probability
- */
- function get_toot_languages($toot) {
- if (is_array($toot) && array_key_exists('language',$toot))
- $l = $toot['language'];
- else
- $l = NULL;
- $langs=[];
- if($l !== NULL) {
- // the language is explicitly set in the toot, so use that
- $langs[$l] = 1;
- } elseif (is_array($toot) && array_key_exists('content',$toot)) {
- // the language is not explicitly set in the toot, so try and recognize it
- $text = strip_tags($toot['content']);
- $ld = new Language;
- $langs = $ld->detect($text)->bestResults()->close();
- }
- // group derived languages into two-charactes language code (e.g.: "zh-CN" into "zh")
- $grouped_langs = [];
- foreach($langs as $key => $value) {
- $l = explode("-", $key)[0];
- if(array_key_exists($l, $grouped_langs)) {
- $grouped_langs[$l] = max($grouped_langs[$l], $value);
- } else {
- $grouped_langs[$l] = $value;
- }
- }
- return $grouped_langs;
- }
- /**
- * Given the probability of a language for every toot, calculate the average
- *
- * @param array $detected_langs Array of mappings between language and probability
- * @return array Mapping between language and probability
- */
- function summary($detected_langs) {
- $res = [];
- foreach($detected_langs as $langs) {
- foreach($langs as $l => $weight) {
- if(!array_key_exists($l, $res)) {
- $res[$l] = 0;
- }
- $res[$l] += $weight;
- }
- }
- foreach($res as $l => $sumweight) {
- $res[$l] = $sumweight / count($detected_langs);
- }
- return $res;
- }
- /**
- * Helper function for usort: compares two arrays using the first element
- *
- * @param array $entry1 First array to be compared
- * @param array $entry2 Second array to be compared
- * @return number -1, 0 o 1 depening on $entry1[0] being less than, equal to or greater than $entry2[0]
- */
- function sort_weights($entry1, $entry2) {
- $w1 = $entry1[0];
- $w2 = $entry2[0];
- if ($w1 < $w2)
- $ret=1;
- elseif ($w1 == $w2)
- $ret=0;
- else
- $ret=-1;
- return $ret;
- }
- /**
- * Given a language mapping, return a list of probable languages
- *
- * @param array $summary Map between language and probabilty
- * @return string[] List of probable languages
- */
- function get_languages($summary) {
- $lst = [];
- foreach($summary as $code => $weight) {
- $lst[] = [$weight, $code];
- }
- usort($lst, 'sort_weights');
- $languages = [];
- $lastweight = 0;
- foreach($lst as $entry) {
- $l = $entry[1];
- $weight = $entry[0];
- if($weight < $lastweight * 2 / 3) {
- break;
- }
- $languages[] = $l;
- $lastweight = $weight;
- }
- return $languages;
- }
- /**
- * Returns a list of probable languages for the given instance
- *
- * @param string $host Instance’s hostname (e.g.: "mastodon.bida.im")
- * @return string[] List of probable languages
- */
- function get_instance_langs($host) {
- global $opts;
- $data = get_api($host, '/api/v1/timelines/public?local=true&limit='.$opts['ldtoots']);
- if($data == NULL) {
- return [];
- }
- $detected_langs = array_map('get_toot_languages', $data);
- $summary = summary($detected_langs);
- $languages = get_languages($summary);
- while (count($languages)>5)
- array_pop($languages);
- return $languages;
- }
- function getlangid(&$link,$lang,&$supplangs,$hostname,$dryrun,$line) {
- $code=locale_canonicalize($lang);
- if (preg_match('/^\s*$/',$lang)===1 || preg_match('/__/',$code)===1) {
- notify('«'.$hostname.'»: «'.$lang.'» is not a valid language code, falling back to default «en».',2,true);
- $code='en';
- }
- $res=myq($link,'SELECT * FROM Languages WHERE Code=\''.myesc($link,$code).'\'',$line);
- $nrows=mysqli_num_rows($res);
- $langs=[];
- if ($nrows==0) {
- $code=myesc($link,truncs($code,'Languages','Code','«'.$hostname.'»'));
- $NameOrig=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,$code)),'Languages','NameOrig','«'.$hostname.'»'));
- foreach ($supplangs as $key=>$val)
- $langs[$key]=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,$key)),'Languages','Name'.strtoupper($key),'«'.$hostname.'»'));
- /*$NamePt_BR=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'pt_BR')),'Languages','NamePT_BR','«'.$hostname.'»'));
- $NameDe=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'de')),'Languages','NameDE','«'.$hostname.'»'));
- $NameUk=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'uk')),'Languages','NameUK','«'.$hostname.'»'));
- $NameCa=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'ca')),'Languages','NameCA','«'.$hostname.'»'));
- $NameEn=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'en')),'Languages','NameEN','«'.$hostname.'»'));
- $NameEs=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'es')),'Languages','NameES','«'.$hostname.'»'));
- $NameFr=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'fr')),'Languages','NameFR','«'.$hostname.'»'));
- $NameGl=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'gl')),'Languages','NameGL','«'.$hostname.'»'));
- $NameIt=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'it')),'Languages','NameIT','«'.$hostname.'»'));
- $q='INSERT INTO Languages (ID, Code, NameOrig, NamePT_BR, NameDE, NameUK, NameCA, NameEN, NameES, NameFR, NameGL, NameIT) VALUES (NULL, \''.$code.'\', \''.$NameOrig.'\', \''.$NamePt_BR.'\', \''.$NameDe.'\', \''.$NameUk.'\', \''.$NameCa.'\', \''.$NameEn.'\', \''.$NameEs.'\', \''.$NameFr.'\', \''.$NameGl.'\', \''.$NameIt.'\')';*/
- $q='INSERT INTO Languages (ID, Code, NameOrig, ';
- foreach ($langs as $key=>$val)
- $q.='Name'.strtoupper($key).', ';
- $q=substr($q,0,-2).') VALUES (NULL, \''.$code.'\', \''.$NameOrig.'\', ';
- foreach ($langs as $key=>$val)
- $q.='\''.$val.'\', ';
- $q=substr($q,0,-2).')';
- if (!$dryrun) {
- myq($link,$q,$line);
- $langid=mysqli_insert_id($link);
- } else {
- $langid=0;
- }
- } else {
- if ($nrows>1) notify('In table Languages there are '.$nrows.' records with Code = «'.$code.'» :-(',2,true);
- $row=mysqli_fetch_assoc($res);
- $langid=$row['ID'];
- }
- return($langid);
- }
- function getlangsidsarr(&$langs,&$supplangs,&$link,$hostname,$dryrun,$line) {
- $langids=[];
- foreach ($langs as $lang) {
- $langid=getlangid($link,$lang,$supplangs,$hostname,$dryrun,$line);
- $langids[]=$langid;
- }
- $langids=array_unique($langids);
- return($langids);
- }
- function waituntilonline() {
- $url='www.google.com';
- $gotoff=false;
- while (false===($f=@fsockopen($url,80,$errno,$errstr,1))) {
- $gotoff=true;
- eecho(2,'it seems we are offline ('.$errno.': '.$errstr.'), waiting for 10 seconds before retrying...'.N);
- sleep(5);
- }
- fclose($f);
- if ($gotoff) eecho(1,'it seems we are back online! :-)'.N);
- }
- function headcb($arr) {
- /* $lpad='';
- $rpad='';
- for ($i=0; $i<7-$arr[2]; $i++) {
- $lpad.='[';
- $rpad.=']';
- }
- return "{$lpad} ".mb_strtoupper($arr[4])." {$rpad}\n\n";*/
- return '§ '.mb_strtoupper($arr[4])."\n\n";
- }
- function html2text($html,$keeplinks=false) {
- $html=preg_replace('#\r#','',$html);
- //$html=preg_replace('#\n|\r#','',$html);
- if ($keeplinks) $html=preg_replace('#<a\s.*href="(.*)".*>#iU',' [link: $1] ',$html);
- $html=preg_replace('#<li>#i',' • ',$html);
- $html=preg_replace_callback('#(<h)(\d)(>)(.*)(</h\d>)#iU','headcb',$html);
- $html=preg_replace('#<p>|<div>|<ul>|<ol>#i',"\n\n",$html);
- $html=preg_replace('#</p>|</div>|</ul>|</ol>|</li>#i',"\n\n",$html);
- $html=preg_replace('#<br>|<br />#i',"\n",$html);
- $html=strip_tags($html);
- $html=html_entity_decode($html,ENT_QUOTES,'UTF-8');
- $html=preg_replace('#\s+\n#',"\n",$html);
- $html=preg_replace('# +#',' ',$html);
- $html=preg_replace('#\n+#',"\n\n",$html);
- $html=trim($html)."\n";
- return $html;
- }
- ?>
|