getinstinfo.php 68 KB


  1. #!/usr/bin/php
  2. <?php
  3. /*
  4. This program is free software: you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation, either version 3 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. */
  15. const N="\n";
  16. const RN="\r\n";
  17. define('SNAME',basename(__FILE__));
  18. chdir(__DIR__);
  19. require '../lib/parsetime.php';
  20. require '../lib/gurl.php';
  21. require '../lib/gethttpcode.php';
  22. require '../lib/tables.php';
  23. require '../lib/mb_ucfirst.php';
  24. require '../lib/mb_lcfirst.php';
  25. require '../lib/ghs.php';
  26. require '../lib/ght.php';
  27. require '../lib/fnum.php';
  28. require '../lib/supplangs.php';
  29. require '../lib/gettlds.php';
  30. require '../lib/mastodon_postlen.php';
  31. require '../lib/ckratelimit.php';
  32. require '../lib/vendor/autoload.php';
  33. use LanguageDetection\Language;
  34. use function mysqli_real_escape_string as myesc;
  35. (strtoupper(substr(PHP_OS,0,3))==='WIN') ? $iswin=true : $iswin=false;
  36. declare(ticks=1);
  37. if (function_exists('pcntl_signal')) {
  38. function signalHandler($signal) {
  39. echo(N);
  40. mexit('received signal «'.$signal.'», shutting down.'.N,0);
  41. }
  42. pcntl_signal(SIGTERM,'signalHandler');// Termination ('kill' was called)
  43. pcntl_signal(SIGHUP,'signalHandler');// Terminal log-out
  44. pcntl_signal(SIGINT,'signalHandler');// Interrupted (Ctrl-C is pressed)
  45. }
  46. $opts=[
  47. 'hostname'=>null,
  48. 'conntimeout'=>10,
  49. 'functimeout'=>20,
  50. 'ldtoots'=>40,// number of toots to check with the automatic language detection function
  51. 'dryrun'=>false,
  52. 'fetchusers'=>false,
  53. 'udiratts'=>5,
  54. 'udirfailst'=>90,
  55. 'minmsgimplev'=>1,
  56. 'bothost'=>null,
  57. 'bottoken'=>null,
  58. 'botmaxchars'=>null,
  59. '_sendtoot'=>false,
  60. 'proxy'=>null,
  61. 'useproxyfor'=>null
  62. ];
  63. $msglevs=['Debug', 'Info', 'Warning', 'Error', 'None'];
  64. $help='SYNOPSIS
  65. '.SNAME.' <hostname> [options]
  66. DESCRIPTION
  67. This script tries to fetch info about the fediverse instance at the given
  68. hostname and insert or update them in mastostart’s database.
  69. OPTIONS
  70. -l, --ldtoots <number>
  71. This option defines the number of toots the script will try to fetch from
  72. the local public timelines, to try and guess the most used languages of each
  73. instance. Its minimum value is 10, its maximum value is 40.
  74. DEFAULT: '.$opts['ldtoots'].'
  75. -f, --fetchusers
  76. If this option is set, the script will try to fetch users’ info from the
  77. considered instance’s users directory, and store them in the database.
  78. -r, --udiratts <number>
  79. This option defines how many attempts the script will do at fetching a chunk
  80. of users’ info from the profile directory, before giving up.
  81. DEFAULT: '.$opts['udiratts'].'
  82. -s, --udirfailst <time>
  83. This option defines how long the script will wait after each failed attempt
  84. at fetching a chunk of users’ info from the profile directory (see above)
  85. before retrying.
  86. DEFAULT: '.ght($opts['udirfailst'],null,0).'
  87. -t, --conntimeout <time>
  88. Sets the timeout for every connection attempt. See section «TIME
  89. SPECIFICATION» below to see how to specify time.
  90. DEFAULT: '.ght($opts['conntimeout'],null,0).'
  91. -T, --functimeout <time>
  92. Sets the timeout for every download. See section «TIME SPECIFICATION» below
  93. to see how to specify time.
  94. DEFAULT: '.ght($opts['functimeout'],null,0).'
  95. -d, --dryrun
  96. If this option is set, the script won’t write anything in the database.
  97. -m, --minmsgimplev <«debug»|«info»|«warning»|«error»|«none»>
  98. Defines the minimum “importance level” of messages to be written to the
  99. text user interface. There are 4 “importance levels”, in this order of
  100. importance: «debug», «info», «warning», «error». Setting this option to any
  101. of these values will make the script write to the text user interface all
  102. the messages with the specified or a greater level; setting it to the
  103. special value «none» will completely disable messages.
  104. DEFAULT: '.lcfirst($msglevs[$opts['minmsgimplev']]).'
  105. -h, --help
  106. If this option is set, the script will show this help text and exit.
  107. TIME SPECIFICATION
  108. An example is better than ~5148 words :-)
  109. To specify 1 year, 6 months (made of 31 days), 2 weeks, 3 days, 5 hours,
  110. 7 minutes and 12 seconds you can use «1y,6M,2w,3d,5h,7m,12s»; but you can
  111. also use «12s,7m,5h,3d,2w,6M,1y», or even «18M,1w,1w,2d,1d,3h,2h,7m,12s».
  112. NEW INSTANCE ANNOUNCEMENT TOOT
  113. This script can send an announcement toot when the instance it checks is new.
  114. It will try to do it if it finds a readable «getinstinfo.ini» file in the
  115. same directory it lives in, with a «bothost» parameter defining the instance
  116. to use to send the toot, a «bottoken» parameter defining the token to be used
  117. to post, and a «botmaxchars» parameter defining the maximum number of
  118. characters allowed for toots on the defined instance (must be >= 10).
  119. PROXY SUPPORT
  120. This script supports socks5 proxy to connect to an instance. It will try to
  121. use a proxy if it finds a readable «getinstinfo.ini» file in the same
  122. directory it lives in, with a «proxy» parameter defining the proxy to use and
  123. a «useproxyfor» parameter including the hostname it’s working on (see
  124. «SYNOPSIS» above).
  125. «proxy» syntax: [user:pass@]<host>[:port]
  126. «useproxyfor» syntax: host[,host][...]
  127. LICENSE
  128. This program comes with ABSOLUTELY NO WARRANTY; for details see the source.
  129. This is free software, and you are welcome to redistribute it under certain
  130. conditions; see <http://www.gnu.org/licenses/> for details.'.N;
  131. foreach ($argv as $val) {
  132. if ($val=='-h' || $val=='--help') {
  133. echo($help);
  134. exit(0);
  135. }
  136. }
  137. $inifp=__DIR__.'/getinstinfo.ini';
  138. $iniarr=@parse_ini_file($inifp);
  139. if (is_array($iniarr)) {
  140. if (isset($iniarr['bothost']) && !isempty($iniarr['bothost'])) $opts['bothost']=$iniarr['bothost'];
  141. if (isset($iniarr['bottoken']) && !isempty($iniarr['bottoken'])) $opts['bottoken']=$iniarr['bottoken'];
  142. if (isset($iniarr['botmaxchars'])) {
  143. if (preg_match('/^[0-9]+$/',$iniarr['botmaxchars'],$matches)!=1 || $matches[0]+0<10)
  144. mexit('config file «'.$inifp.'»: value for «botmaxchars» must be an integer >= 10.'.N,1);
  145. $opts['botmaxchars']=$iniarr['botmaxchars']+0;
  146. }
  147. if (!is_null($opts['bothost']) && !is_null($opts['bottoken']) && !is_null($opts['botmaxchars']))
  148. $opts['_sendtoot']=true;
  149. if (isset($iniarr['proxy']) && !isempty($iniarr['proxy'])) $opts['proxy']=$iniarr['proxy'];
  150. if (isset($iniarr['useproxyfor']) && !isempty($iniarr['useproxyfor'])) $opts['useproxyfor']=explode(',',$iniarr['useproxyfor']);
  151. } else {
  152. eecho(1,"Could not open «{$inifp}» (it does not exist or is not readable).\n");
  153. }
  154. $inifp=__DIR__.'/../conf/mustard.ini';
  155. $iniarr=@parse_ini_file($inifp)
  156. or mexit('could not open config file «'.$inifp.'».'.N,1);
  157. for ($i=1; $i<$argc; $i++) {
  158. if ($argv[$i]=='-f' || $argv[$i]=='--fetchusers') {
  159. $opts['fetchusers']=true;
  160. } elseif ($argv[$i]=='-r' || $argv[$i]=='--udiratts') {
  161. if ($i+1>=$argc || preg_match('/^\d+$/',$argv[$i+1])!==1 || $argv[$i+1]+0<1)
  162. mexit('option «'.$argv[$i].'» requires a number > 1 as an argument (use «-h» to read help).'.N,1);
  163. $i++;
  164. $opts['udiratts']=$argv[$i]+0;
  165. } elseif ($argv[$i]=='-s' || $argv[$i]=='--udirfailst') {
  166. if ($i+1>=$argc || parsetime($argv[$i+1])===false)
  167. mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
  168. $i++;
  169. $opts['udirfailst']=parsetime($argv[$i]);
  170. } elseif ($argv[$i]=='-t' || $argv[$i]=='--conntimeout') {
  171. if ($i+1>=$argc || parsetime($argv[$i+1])===false)
  172. mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
  173. $i++;
  174. $opts['conntimeout']=parsetime($argv[$i]);
  175. } elseif ($argv[$i]=='-T' || $argv[$i]=='--functimeout') {
  176. if ($i+1>=$argc || parsetime($argv[$i+1])===false)
  177. mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
  178. $i++;
  179. $opts['functimeout']=parsetime($argv[$i]);
  180. } elseif ($argv[$i]=='-l' || $argv[$i]=='--ldtoots') {
  181. if ($i+1>=$argc || preg_match('/^\d+$/',$argv[$i+1])!==1 || $argv[$i+1]+0>40 || $argv[$i+1]+0<10)
  182. mexit('option «'.$argv[$i].'» requires a number >= 10 and <= 40 as an argument (use «-h» to read help).'.N,1);
  183. $i++;
  184. $opts['ldtoots']=$argv[$i]+0;
  185. } elseif ($argv[$i]=='-d' || $argv[$i]=='--dryrun') {
  186. $opts['dryrun']=true;
  187. } elseif ($argv[$i]=='-m' || $argv[$i]=='--minmsgimplev') {
  188. if ($i+1>=$argc || !in_array(ucfirst(strtolower($argv[$i+1])),$msglevs))
  189. mexit('option «'.$argv[$i].'» requires a “message importance level” value as an argument (use «-h» to read help).'.N,1);
  190. $i++;
  191. $opts['minmsgimplev']=array_search(ucfirst(strtolower($argv[$i])),$msglevs);
  192. } elseif (is_null($opts['hostname']) && $argv[$i][0]!=='-') {
  193. $opts['hostname']=$argv[$i];
  194. } else {
  195. mexit('don’t know how to interpret «'.$argv[$i].'», please read the help text using «-h» or «--help».'.N,1);
  196. }
  197. }
  198. if (is_null($opts['hostname'])) mexit('you didn’t specify an hostname (you can read the help text using «-h» or «--help»).'.N,1);
  199. try { $link=@mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket']); }
  200. catch (Exception $error) { mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true); }
  201. // for php versions < 8
  202. if ($link===false) mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true);
  203. try { $res=mysqli_set_charset($link,'utf8mb4'); }
  204. catch (Exception $error) { mexit('could not set «utf8mb4» charset for MySQL: '.mysqli_error($link).' ['.mysqli_errno($link).'].'.N,1,true); }
  205. // for php versions < 8
  206. if ($res===false) mexit('could not set MySQL charset: '.mysqli_error($link).' ['.mysqli_errno($link).'].'.N,1,true);
  207. $mastodons=[];
  208. $res=myq($link,'SELECT Name FROM Platforms WHERE Consider=1',__LINE__);
  209. while ($row=mysqli_fetch_assoc($res))
  210. $mastodons[]=preg_quote($row['Name'],'/');
  211. if (count($mastodons)<1) mexit('in table «Platforms», there is no platform to be considered!'.N,1);
  212. $mastodons=implode('|',$mastodons);
  213. $tables=tables($link);
  214. //print_r($tables);
  215. $instints=['ID', 'FirstSeen', 'IsMastodon', 'Priority', 'Visible', 'Noxious', 'NoxLastModTS', 'LocalityID', 'OurLangsLock', 'UserCount', 'StatusCount', 'DomainCount', 'ActiveUsersMonth', 'ActiveUsersHalfYear', 'RegOpen', 'RegReqApproval', 'MaxTootChars', 'AdmCreatedAt', 'PublicBlocksList', 'TotChecks', 'OkChecks', 'WasLastCheckOk', 'LastOkCheckTS', 'GuestID', 'LastGuestEdit', 'InsertTS', 'RPos'];
  216. $idata=[];
  217. $res=myq($link,'SHOW COLUMNS FROM Instances',__FILE__);
  218. while ($row=mysqli_fetch_assoc($res))
  219. $idata[$row['Field']]=$row['Default'];
  220. // since we later need to determine if a value is an integer, and mysql returns integers as strings...
  221. setint($instints,$idata);
  222. $idata['URI']=$opts['hostname'];
  223. $instanswered=false;
  224. $now=time();
  225. /*
  226. * Nodeinfo ('https://'.$opts['hostname'].'/nodeinfo/2.0.json') was added in v3.0.0
  227. * Trends ('https://'.$opts['hostname'].'/api/v1/trends') was added in v3.0.0 and became /api/v1/trends/tags with v3.5.0
  228. * Activity ('https://'.$opts['hostname'].'/api/v1/instance/activity') was added in v2.1.2
  229. */
  230. waituntilonline();
  231. eecho(1,'[[[ Working on «'.$opts['hostname'].'» ]]]'.N);
  232. if (willtrunc($opts['hostname'],'Instances','URI'))
  233. mexit('«'.$opts['hostname'].'»: ignoring it because hostname is too long for the «URI» column of «Instances» table.'.N,2);
  234. if (is_array($opts['useproxyfor']) && !is_null($opts['proxy']) && in_array($opts['hostname'],$opts['useproxyfor']))
  235. eecho(1,"«{$opts['hostname']}»: will use configured proxy («{$opts['proxy']}») to fetch its data.\n");
  236. else
  237. $opts['proxy']=null;
  238. eecho(0,'«'.$opts['hostname'].'»: trying to fetch its info from the database...'.N);
  239. $res=myq($link,'SELECT * FROM Instances WHERE URI=\''.myesc($link,$opts['hostname']).'\'',__LINE__);
  240. $count=mysqli_num_rows($res);
  241. if ($count>1) {
  242. $msg='«'.$opts['hostname'].'»: there are '.$count.' records with this URI in Instances table.';
  243. notify($msg,3,false);
  244. mexit($msg.N,3);
  245. } elseif ($count==1) {
  246. eecho(1,'«'.$opts['hostname'].'»: found 1 record with this URI in Instances table.'.N);
  247. $oidata=mysqli_fetch_assoc($res);
  248. setint($instints,$oidata);
  249. } else {
  250. eecho(1,'«'.$opts['hostname'].'»: found no record with this URI in Instances table.'.N);
  251. $oidata=null;
  252. }
  253. eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on https...'.N);
  254. $buf=@gurl('https://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
  255. if ($buf['cont']===false) {
  256. eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on http...'.N);
  257. $buf=@gurl('http://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
  258. }
  259. if ($buf['cont']!==false) {
  260. $buf=@json_decode($buf['cont'],true);
  261. if (is_array($buf)) {
  262. if (isset($buf['links']) && is_array($buf['links']) && count($buf['links'])>0) {
  263. $ok=true;
  264. $nirefs=[];
  265. foreach ($buf['links'] as $key=>$niref) {
  266. if (isset($niref['rel']) && isset($niref['href'])) {
  267. $nirefs[$niref['rel']]=$niref['href'];
  268. } else {
  269. eecho(2,'«'.$opts['hostname'].'»: nodeinfo specs “links” entitity '.$key.' has unexpected format.'.N);
  270. $ok=false;
  271. }
  272. }
  273. if ($ok) {
  274. krsort($nirefs);
  275. $niref=array_shift($nirefs);
  276. eecho(1,'«'.$opts['hostname'].'»: got and successfully parsed nodeinfo specs :-)'.N);
  277. eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo data...'.N);
  278. $buf=@gurl($niref,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
  279. if ($buf['cont']!==false) {
  280. $buf=@json_decode($buf['cont'],true);
  281. if (is_array($buf)) {
  282. eecho(1,'«'.$opts['hostname'].'»: got nodeinfo data :-)'.N);
  283. if (isset($buf['software']['name']) && is_string($buf['software']['name']) && !isempty($buf['software']['name'])) {
  284. $idata['Software']=trim($buf['software']['name']);
  285. (preg_match('/^'.$mastodons.'/',$idata['Software'])===1) ? $idata['IsMastodon']=true : $idata['IsMastodon']=false;
  286. $res=myq($link,'SELECT Name FROM Platforms WHERE Name=\''.myesc($link,$idata['Software']).'\'',__LINE__);
  287. if (mysqli_num_rows($res)<1) {
  288. if (!$opts['dryrun'])
  289. myq($link,'INSERT INTO Platforms (Name) VALUES (\''.myesc($link,truncs($idata['Software'], 'Platforms', 'Name', '«'.$opts['hostname'].'»')).'\')',__LINE__);
  290. notify('«'.$opts['hostname'].'» runs on «'.$idata['Software'].'», which was not present in the «Platforms» table, so it was added there. It would be good to check whether it is a Mastodon derivate and how compatible it is, to decide whether to consider instances using it as Mastodon instances by setting the «Consider» field of its record to «1».',2);
  291. }
  292. }
  293. if (isset($buf['software']['version']) && is_string($buf['software']['version']) && !isempty($buf['software']['version']))
  294. $idata['Version']=trim($buf['software']['version']);
  295. if (isset($buf['usage']['users']['total']) && is_int($buf['usage']['users']['total']))
  296. $idata['UserCount']=$buf['usage']['users']['total'];
  297. if (isset($buf['usage']['users']['activeMonth']) && is_int($buf['usage']['users']['activeMonth']))
  298. $idata['ActiveUsersMonth']=$buf['usage']['users']['activeMonth'];
  299. if (isset($buf['usage']['users']['activeHalfyear']) && is_int($buf['usage']['users']['activeHalfyear']))
  300. $idata['ActiveUsersHalfYear']=$buf['usage']['users']['activeHalfyear'];
  301. if (isset($buf['usage']['localPosts']) && is_int($buf['usage']['localPosts']))
  302. $idata['StatusCount']=$buf['usage']['localPosts'];
  303. if (isset($buf['openRegistrations']) && is_bool($buf['openRegistrations']))
  304. $idata['RegOpen']=b2i($buf['openRegistrations']);
  305. } else {
  306. eecho(2,'«'.$opts['hostname'].'»: nodeinfo data was not good JSON.'.N);
  307. }
  308. } else {
  309. eecho(2,'«'.$opts['hostname'].'»: could not fetch nodeinfo data: '.$buf['emsg'].'.'.N);
  310. }
  311. }
  312. } else {
  313. eecho(2,'«'.$opts['hostname'].'»: nodeinfo specs had unexpected format.'.N);
  314. }
  315. } else {
  316. eecho(2,'«'.$opts['hostname'].'»: nodeinfo specs where not good JSON.'.N);
  317. }
  318. } else {
  319. eecho(2,'«'.$opts['hostname'].'»: could not fetch nodeinfo specs: '.$buf['emsg'].'.'.N);
  320. }
  321. if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4.0.0') {
  322. eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v2...'.N);
  323. $buf=@gurl('https://'.$opts['hostname'].'/api/v2/instance',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
  324. if ($buf['cont']!==false) {
  325. ckrl($buf['headers'],"«{$opts['hostname']}»");
  326. $buf=@json_decode($buf['cont'],true);
  327. if (is_array($buf)) {
  328. if (make(['domain', 'title', 'version', 'source_url', 'description', 'usage', 'thumbnail', 'languages', 'configuration', 'registrations', 'contact', 'rules'],$buf)) {
  329. eecho(1,'«'.$opts['hostname'].'»: got good instance info from API v2 :-)'.N);
  330. $instanswered=true;
  331. if (isset($buf['title']) && is_string($buf['title']) && !isempty($buf['title']))
  332. $idata['Title']=trim($buf['title']);
  333. if (isset($buf['description']) && is_string($buf['description']) && !isempty($buf['description']))
  334. $idata['ShortDesc']=trim($buf['description']);
  335. if (isset($buf['thumbnail']['url']) && is_string($buf['thumbnail']['url']) && !isempty($buf['thumbnail']['url'])) {
  336. $idata['Thumb']=trim($buf['thumbnail']['url']);
  337. if (!@file_get_contents($idata['Thumb'],false,null,0,512)) $idata['Thumb']='unavailable';
  338. }
  339. if (isset($buf['configuration']['statuses']['max_characters']) && is_int($buf['configuration']['statuses']['max_characters']))
  340. $idata['MaxTootChars']=$buf['configuration']['statuses']['max_characters'];
  341. if (isset($buf['registrations']['approval_required']) && is_bool($buf['registrations']['approval_required']))
  342. $idata['RegReqApproval']=b2i($buf['registrations']['approval_required']);
  343. if (isset($buf['contact']['email']) && is_string($buf['contact']['email']))
  344. $idata['Email']=trim($buf['contact']['email']);
  345. if (!isset($buf['contact']['account']['noindex']) || (isset($buf['contact']['account']['noindex']) && is_bool($buf['contact']['account']['noindex']) && $buf['contact']['account']['noindex']===false)) {
  346. if (isset($buf['contact']['account']['acct']) && is_string($buf['contact']['account']['acct']) && !isempty($buf['contact']['account']['acct']))
  347. $idata['AdmAccount']=trim($buf['contact']['account']['acct']);
  348. if (isset($buf['contact']['account']['display_name']) && is_string($buf['contact']['account']['display_name']) && !isempty($buf['contact']['account']['display_name']))
  349. $idata['AdmDisplayName']=trim($buf['contact']['account']['display_name']);
  350. if (isset($buf['contact']['account']['created_at']) && is_string($buf['contact']['account']['created_at']) && ($ts=strtotime($buf['contact']['account']['created_at']))!==false)
  351. $idata['AdmCreatedAt']=$ts;
  352. if (isset($buf['contact']['account']['note']) && is_string($buf['contact']['account']['note']) && !isempty($buf['contact']['account']['note']))
  353. $idata['AdmNote']=trim($buf['contact']['account']['note']);
  354. if (isset($buf['contact']['account']['url']) && is_string($buf['contact']['account']['url']) && !isempty($buf['contact']['account']['url']))
  355. $idata['AdmURL']=trim($buf['contact']['account']['url']);
  356. if (isset($buf['contact']['account']['avatar']) && is_string($buf['contact']['account']['avatar']) && !isempty($buf['contact']['account']['avatar'])) {
  357. $idata['AdmAvatar']=trim($buf['contact']['account']['avatar']);
  358. if (!@file_get_contents($idata['AdmAvatar'],false,null,0,512)) $idata['AdmAvatar']='unavailable';
  359. }
  360. if (isset($buf['contact']['account']['header']) && is_string($buf['contact']['account']['header']) && !isempty($buf['contact']['account']['header']))
  361. $idata['AdmHeader']=trim($buf['contact']['account']['header']);
  362. } else {
  363. if (isset($buf['contact']['account']['noindex']) && is_bool($buf['contact']['account']['noindex']) && $buf['contact']['account']['noindex']===true)
  364. $idata['AdmAccount']='OPTED OUT';// here we rely on the fact that nobody could set "acct" to "OPTED OUT" since it doesn't allow spaces
  365. $idata['AdmAvatar']='unavailable';
  366. }
  367. if (isset($buf['languages']) && is_array($buf['languages']))
  368. $idata['languages']=$buf['languages'];
  369. if (isset($buf['rules']) && is_array($buf['rules']))
  370. foreach ($buf['rules'] as $rule)
  371. if (isset($rule['id']) && is_string($rule['id']) && !isempty($rule['id']) && isset($rule['text']) && is_string($rule['text']) && !isempty($rule['text']))
  372. $idata['rules'][$rule['id']]=$rule['text'];
  373. } else {
  374. eecho(2,'«'.$opts['hostname'].'»: instance info fetched from API v2 had unexpected format.'.N);
  375. }
  376. } else {
  377. eecho(2,'«'.$opts['hostname'].'»: instance info fetched from API v2 were not good JSON.'.N);
  378. }
  379. } else {
  380. eecho(2,'«'.$opts['hostname'].'»: could not fetch instance info from API v2: '.$buf['emsg'].'.'.N);
  381. }
  382. // "domain count" is only listed by /api/v1/instance
  383. eecho(0,'«'.$opts['hostname'].'»: trying to fetch domain count from API v1...'.N);
  384. $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
  385. if ($buf['cont']!==false) {
  386. ckrl($buf['headers'],"«{$opts['hostname']}»");
  387. $buf=@json_decode($buf['cont'],true);
  388. if (is_array($buf)) {
  389. eecho(1,'«'.$opts['hostname'].'»: got instance info from API v1 :-)'.N);
  390. if (isset($buf['stats']['domain_count']) && is_int($buf['stats']['domain_count']))
  391. $idata['DomainCount']=$buf['stats']['domain_count'];
  392. } else {
  393. eecho(2,'«'.$opts['hostname'].'»: instance info fetched from API v1 was not good JSON.'.N);
  394. }
  395. } else {
  396. eecho(2,'«'.$opts['hostname'].'»: could not fetch instance info from API v1: '.$buf['emsg'].'.'.N);
  397. }
  398. eecho(0,'«'.$opts['hostname'].'»: trying to fetch extended description from API v1...'.N);
  399. $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/extended_description',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
  400. if ($buf['cont']!==false) {
  401. ckrl($buf['headers'],"«{$opts['hostname']}»");
  402. $buf=@json_decode($buf['cont'],true);
  403. if (is_array($buf)) {
  404. eecho(1,'«'.$opts['hostname'].'»: got extended description from API v1 :-)'.N);
  405. if (isset($buf['content']) && is_string($buf['content']) && !isempty($buf['content']))
  406. $idata['LongDesc']=trim($buf['content']);
  407. } else {
  408. eecho(2,'«'.$opts['hostname'].'»: instance extended description fetched from API v1 was not good JSON.'.N);
  409. }
  410. } else {
  411. eecho(2,'«'.$opts['hostname'].'»: could not fetch instance extended description from API v1: '.$buf['emsg'].'.'.N);
  412. }
  413. eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance domain blocks from API v1...'.N);
  414. $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/domain_blocks',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
  415. if ($buf['cont']!==false) {
  416. ckrl($buf['headers'],"«{$opts['hostname']}»");
  417. $buf=@json_decode($buf['cont'],true);
  418. if (is_array($buf)) {
  419. eecho(1,'«'.$opts['hostname'].'»: got instance domain blocks from API v1 :-)'.N);
  420. $idata['blocks']=[];
  421. $idata['PublicBlocksList']=1;
  422. $idata['Threads']='accessible';
  423. foreach ($buf as $key=>$block) {
  424. if (is_array($block) && make(['domain', 'severity', 'comment'],$block) && is_string($block['domain']) && !isempty($block['domain']) && is_string($block['severity']) && in_array($block['severity'], ['silence','suspend']) && (is_null($block['comment']) || is_string($block['comment']))) {
  425. if (is_string($block['comment'])) {
  426. $block['comment']=trim($block['comment']);
  427. if ($block['comment']=='')
  428. $block['comment']=null;
  429. } else {
  430. $block['comment']=null;
  431. }
  432. $idata['blocks'][]=['dom'=>$block['domain'], 'sev'=>$block['severity'], 'comm'=>$block['comment']];
  433. if (preg_match('#^(threads.net|.*\.threads.net)$#i',$block['domain'])===1) {
  434. if ($block['severity']=='suspend')
  435. $idata['Threads']='suspended';
  436. elseif ($block['severity']=='silence')
  437. $idata['Threads']='limited';
  438. else
  439. $idata['Threads']=$block['severity'];
  440. }
  441. } else {
  442. eecho(2,'«'.$opts['hostname'].'»: domain blocks array has an unexpected format.'.N);
  443. break;
  444. }
  445. }
  446. } else {
  447. eecho(2,'«'.$opts['hostname'].'»: instance domain blocks fetched from API v1 were not good JSON.'.N);
  448. $idata['Threads']=null;
  449. }
  450. } else {
  451. eecho(2,'«'.$opts['hostname'].'»: could not fetch instance domain blocks from API v1: '.$buf['emsg'].'.'.N);
  452. $idata['Threads']=null;
  453. }
  454. } else {// we still try to fetch instance info from api v1, if ver. < 4.0.0, since it could be a mastodon instance older than 2.1.2, when nodeinfo was introduced
  455. eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v1...'.N);
  456. $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
  457. if ($buf['cont']!==false) {
  458. ckrl($buf['headers'],"«{$opts['hostname']}»");
  459. $buf=@json_decode($buf['cont'],true);
  460. if (is_array($buf)) {
  461. if (make(['uri', 'title', 'short_description', 'description', 'email', 'version', 'urls', 'stats', 'thumbnail', 'languages', 'registrations', 'approval_required', 'contact_account'],$buf)) {
  462. eecho(1,'«'.$opts['hostname'].'»: got instance info from API v1 :-)'.N);
  463. //print_r($buf);
  464. $instanswered=true;
  465. if (isset($buf['title']) && is_string($buf['title']) && !isempty($buf['title']))
  466. $idata['Title']=trim($buf['title']);
  467. if (isset($buf['short_description']) && is_string($buf['short_description']) && !isempty($buf['short_description']))
  468. $idata['ShortDesc']=trim($buf['description']);
  469. if (isset($buf['description']) && is_string($buf['description']) && !isempty($buf['description']))
  470. $idata['LongDesc']=trim($buf['description']);
  471. if (isset($buf['email']) && is_string($buf['email']))
  472. $idata['Email']=trim($buf['email']);
  473. // if nodeinfo did not respond, it could be mastodon < 3.0.0, and we would not have $idata['Version'] yet, so...
  474. if (!isset($idata['Version']) && isset($buf['version']) && is_string($buf['version']) && !isempty($buf['version']))
  475. $idata['Version']=trim($buf['version']);
  476. // if nodeinfo responded we should already have these 2 below, but nodeinfo could have not responded if instance ver. is < 3.0.0
  477. if (isset($buf['stats']['user_count']) && is_int($buf['stats']['user_count']))
  478. $idata['UserCount']=$buf['stats']['user_count'];
  479. if (isset($buf['stats']['status_count']) && is_int($buf['stats']['status_count']))
  480. $idata['StatusCount']=$buf['stats']['status_count'];
  481. if (isset($buf['stats']['domain_count']) && is_int($buf['stats']['domain_count']))
  482. $idata['DomainCount']=$buf['stats']['domain_count'];
  483. if (isset($buf['thumbnail']) && is_string($buf['thumbnail']) && !isempty($buf['thumbnail'])) {
  484. $idata['Thumb']=trim($buf['thumbnail']);
  485. if (!@file_get_contents($idata['Thumb'],false,null,0,512)) $idata['Thumb']='unavailable';
  486. }
  487. if (isset($buf['max_toot_chars']) && is_int($buf['max_toot_chars']))
  488. $idata['MaxTootChars']=$buf['max_toot_chars'];
  489. elseif (isset($buf['configuration']['statuses']['max_characters']) && is_int($buf['configuration']['statuses']['max_characters']))
  490. $idata['MaxTootChars']=$buf['configuration']['statuses']['max_characters'];
  491. // if nodeinfo responded we should already have this 1 below, but nodeinfo could have not responded if instance ver. is < 3.0.0
  492. if (isset($buf['registrations']) && is_bool($buf['registrations']))
  493. $idata['RegOpen']=b2i($buf['registrations']);
  494. if (isset($buf['approval_required']) && is_bool($buf['approval_required']))
  495. $idata['RegReqApproval']=b2i($buf['approval_required']);
  496. if (isset($buf['contact_account']['acct']) && is_string($buf['contact_account']['acct']) && !isempty($buf['contact_account']['acct']))
  497. $idata['AdmAccount']=trim($buf['contact_account']['acct']);
  498. if (isset($buf['contact_account']['display_name']) && is_string($buf['contact_account']['display_name']) && !isempty($buf['contact_account']['display_name']))
  499. $idata['AdmDisplayName']=trim($buf['contact_account']['display_name']);
  500. if (isset($buf['contact_account']['created_at']) && is_string($buf['contact_account']['created_at']) && ($ts=strtotime($buf['contact_account']['created_at']))!==false)
  501. $idata['AdmCreatedAt']=$ts;
  502. if (isset($buf['contact_account']['note']) && is_string($buf['contact_account']['note']) && !isempty($buf['contact_account']['note']))
  503. $idata['AdmNote']=trim($buf['contact_account']['note']);
  504. if (isset($buf['contact_account']['url']) && is_string($buf['contact_account']['url']) && !isempty($buf['contact_account']['url']))
  505. $idata['AdmURL']=trim($buf['contact_account']['url']);
  506. if (isset($buf['contact_account']['avatar']) && is_string($buf['contact_account']['avatar']) && !isempty($buf['contact_account']['avatar'])) {
  507. $idata['AdmAvatar']=trim($buf['contact_account']['avatar']);
  508. if (!@file_get_contents($idata['AdmAvatar'],false,null,0,512)) $idata['AdmAvatar']='unavailable';
  509. }
  510. if (isset($buf['contact_account']['header']) && is_string($buf['contact_account']['header']) && !isempty($buf['contact_account']['header']))
  511. $idata['AdmHeader']=trim($buf['contact_account']['header']);
  512. // next line: isset and is_string because it can be pleroma, that has a different format for "languages"
  513. if (isset($buf['languages']) && is_array($buf['languages']) && isset($buf['languages'][0]) && is_string($buf['languages'][0]))
  514. $idata['languages']=$buf['languages'];
  515. if (isset($buf['rules']) && is_array($buf['rules']))
  516. foreach ($buf['rules'] as $rule)
  517. if (isset($rule['id']) && is_string($rule['id']) && !isempty($rule['id']) && isset($rule['text']) && is_string($rule['text']) && !isempty($rule['text']))
  518. $idata['rules'][$rule['id']]=$rule['text'];
  519. // some falsing
  520. if (isset($buf['pleroma'])) $idata['IsMastodon']=false;
  521. if (isset($buf['version']) && is_string($buf['version']) && preg_match('#(pleroma|pixelfed)#i',$buf['version'])===1) $idata['IsMastodon']=false;
  522. } else {
  523. eecho(2,'«'.$opts['hostname'].'»: instance info fetched from API v1 had unexpected format.'.N);
  524. }
  525. } else {
  526. eecho(2,'«'.$opts['hostname'].'»: instance info fetched from API v1 were not good JSON.'.N);
  527. }
  528. } else {
  529. eecho(2,'«'.$opts['hostname'].'»: could not fetch instance info from API v1: '.$buf['emsg'].'.'.N);
  530. }
  531. }
  532. if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='2.1.2') {
  533. eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance activity info from API v1...'.N);
  534. $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/activity',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
  535. if ($buf['cont']!==false) {
  536. ckrl($buf['headers'],"«{$opts['hostname']}»");
  537. $buf=@json_decode($buf['cont'],true);
  538. if (is_array($buf)) {
  539. eecho(1,'«'.$opts['hostname'].'»: got instance activity info from API v1 :-)'.N);
  540. $idata['activity']=$buf;
  541. } else {
  542. eecho(2,'«'.$opts['hostname'].'»: instance activity info from API v1 were not good JSON: '.$buf['emsg'].'.'.N);
  543. }
  544. } else {
  545. eecho(2,'«'.$opts['hostname'].'»: could not fetch instance activity info from API v1: '.$buf['emsg'].'.'.N);
  546. }
  547. }
  548. if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='3.0.0') {
  549. eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance tags trends info from API v1...'.N);
  550. $url='https://'.$opts['hostname'].'/api/v1/trends';
  551. if ($idata['Version']>='3.5.0') $url.='/tags';
  552. $buf=@gurl($url,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
  553. if ($buf['cont']!==false) {
  554. ckrl($buf['headers'],"«{$opts['hostname']}»");
  555. $buf=@json_decode($buf['cont'],true);
  556. if (is_array($buf)) {
  557. eecho(1,'«'.$opts['hostname'].'»: got instance tags trends info from API v1 :-)'.N);
  558. $idata['trends']=$buf;
  559. } else {
  560. eecho(2,'«'.$opts['hostname'].'»: instance tags trends from API v1 were not good JSON: '.$buf['emsg'].'.'.N);
  561. }
  562. } else {
  563. eecho(2,'«'.$opts['hostname'].'»: could not fetch instance tags trends from API v1: '.$buf['emsg'].'.'.N);
  564. }
  565. }
  566. // finished fetching
  567. if (!is_null($idata['IsMastodon'])) $idata['IsMastodon']=b2i($idata['IsMastodon']);
  568. ($instanswered) ? $idata['WasLastCheckOk']=1 : $idata['WasLastCheckOk']=0;
  569. if (is_null($oidata)) {
  570. $query='INSERT INTO Instances SET ';
  571. $idata['InsertTS']=$now;
  572. $idata['TotChecks']=1;
  573. if ($instanswered) {
  574. $idata['FirstSeen']=$now;
  575. $idata['LastOkCheckTS']=$now;
  576. $idata['OkChecks']=1;
  577. } else {
  578. $idata['Thumb']='unavailable';
  579. $idata['AdmAvatar']='unavailable';
  580. $idata['OkChecks']=0;
  581. }
  582. } else {
  583. $query='UPDATE Instances SET ';
  584. ($instanswered && is_null($oidata['FirstSeen'])) ? $idata['FirstSeen']=$now : $idata['FirstSeen']=$oidata['FirstSeen'];
  585. ($instanswered) ? $idata['LastOkCheckTS']=$now : $idata['LastOkCheckTS']=$oidata['LastOkCheckTS'];
  586. $idata['TotChecks']=$oidata['TotChecks']+1;
  587. $idata['OkChecks']=$oidata['OkChecks'];
  588. if ($instanswered) $idata['OkChecks']++;
  589. $idata['Priority']=$oidata['Priority'];
  590. $idata['Visible']=$oidata['Visible'];
  591. $idata['Noxious']=$oidata['Noxious'];
  592. $idata['NoxReason']=$oidata['NoxReason'];
  593. $idata['NoxLastModTS']=$oidata['NoxLastModTS'];
  594. $idata['OurDesc']=$oidata['OurDesc'];
  595. $idata['OurDescEN']=$oidata['OurDescEN'];
  596. $idata['LocalityID']=$oidata['LocalityID'];
  597. $idata['OurLangsLock']=$oidata['OurLangsLock'];
  598. $idata['GuestID']=$oidata['GuestID'];
  599. $idata['LastGuestEdit']=$oidata['LastGuestEdit'];
  600. $idata['InsertTS']=$oidata['InsertTS'];
  601. $idata['RPos']=$oidata['RPos'];
  602. if (!$instanswered) {
  603. $idata['IsMastodon']=$oidata['IsMastodon'];
  604. $idata['Title']=$oidata['Title'];
  605. $idata['ShortDesc']=$oidata['ShortDesc'];
  606. $idata['LongDesc']=$oidata['LongDesc'];
  607. $idata['Email']=$oidata['Email'];
  608. $idata['Software']=$oidata['Software'];
  609. $idata['Version']=$oidata['Version'];
  610. $idata['UserCount']=$oidata['UserCount'];
  611. $idata['StatusCount']=$oidata['StatusCount'];
  612. $idata['DomainCount']=$oidata['DomainCount'];
  613. $idata['ActiveUsersMonth']=$oidata['ActiveUsersMonth'];
  614. $idata['ActiveUsersHalfYear']=$oidata['ActiveUsersHalfYear'];
  615. $idata['Thumb']='unavailable';
  616. $idata['RegOpen']=$oidata['RegOpen'];
  617. $idata['RegReqApproval']=$oidata['RegReqApproval'];
  618. $idata['MaxTootChars']=$oidata['MaxTootChars'];
  619. $idata['AdmAccount']=$oidata['AdmAccount'];
  620. $idata['AdmDisplayName']=$oidata['AdmDisplayName'];
  621. $idata['AdmCreatedAt']=$oidata['AdmCreatedAt'];
  622. $idata['AdmNote']=$oidata['AdmNote'];
  623. $idata['AdmURL']=$oidata['AdmURL'];
  624. $idata['AdmAvatar']='unavailable';
  625. $idata['AdmHeader']=$oidata['AdmHeader'];
  626. $idata['Threads']=$oidata['Threads'];
  627. }
  628. }
  629. $set=[];
  630. foreach ($idata as $key=>$val) {
  631. if (in_array($key,['ID','languages','rules','activity','trends','blocks'])) {
  632. true;// do nothing
  633. } elseif (is_null($val)) {
  634. $set[]=$key.'=NULL';
  635. } elseif (is_int($val)) {
  636. $set[]=$key.'='.truncn($val, 'Instances', $key, '«'.$opts['hostname'].'»');
  637. } elseif (is_string($val)) {
  638. $set[]=$key.'=\''.myesc($link,truncs($val, 'Instances', $key, '«'.$opts['hostname'].'»')).'\'';
  639. } else {
  640. mexit('$idata[\''.$key.'\'] value has unmanaged type, see code around line '.__LINE__.'.'.N,3);
  641. }
  642. }
  643. $query.=implode(', ',$set);
  644. if (!is_null($oidata)) $query.=' WHERE ID='.$oidata['ID'];
  645. eecho(1,'query: «'.$query.'».'.N);
  646. if (!$opts['dryrun']) {
  647. if (!is_null($oidata) || $instanswered) {
  648. myq($link,$query,__LINE__);
  649. } else {
  650. mexit('«'.$opts['hostname'].'»: not inserting unknown instance because it did not respond; shutting down after '.ght(time()-$now,null,0).' :-)'.N,0);
  651. }
  652. }
  653. if (is_null($oidata)) {
  654. (!$opts['dryrun']) ? $instid=mysqli_insert_id($link) : $instid=0;
  655. notify('«<a href="viewinst.php?id='.$instid.'">'.$opts['hostname'].'</a>» is a NEW instance! :-)',1);
  656. } else {
  657. $instid=$oidata['ID'];
  658. }
  659. // from here we know for sure $instid
  660. if (!$opts['dryrun']) myq($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$instid.', '.$now.', '.$idata['WasLastCheckOk'].')',__LINE__);
  661. if ($instanswered && isset($idata['languages']) && is_array($idata['languages']) && count($idata['languages'])>0) {
  662. eecho(1,'«'.$opts['hostname'].'»: declared languages: '.implode(', ',$idata['languages']).N);
  663. if (!$opts['dryrun'])
  664. myq($link,'DELETE FROM InstLangs WHERE InstID='.$instid,__LINE__);
  665. $langids=getlangsidsarr($idata['languages'],$supplangs,$link,$opts['hostname'],$opts['dryrun'],__LINE__);
  666. if (!$opts['dryrun']) {
  667. $pos=0;
  668. foreach ($langids as $langid) {
  669. $pos++;
  670. myq($link,'INSERT INTO InstLangs SET InstID='.$instid.', LangID='.$langid.', Pos='.$pos,__LINE__);
  671. }
  672. }
  673. if (!is_null($oidata) && $oidata['OurLangsLock']==1) {
  674. eecho(1,'«'.$opts['hostname'].'»: won’t touch “our languages” because they are locked.'.N);
  675. } else {
  676. // we try to detect languages only if first declared language (the only one currently definable by admins)
  677. // is equal to the default "en", otherwise we assume it's been set to the actual mostly used language on the instance
  678. if ($idata['languages'][0]=='en') {
  679. $idata['ourlanguages']=get_instance_langs($opts['hostname']);
  680. if (count($idata['ourlanguages'])>0) {
  681. eecho(1,'«'.$opts['hostname'].'»: detected languages: '.implode(', ',$idata['ourlanguages']).N);
  682. } else {
  683. $idata['ourlanguages']=$idata['languages'];
  684. eecho(1,'«'.$opts['hostname'].'»: detected languages: NONE; copied declared languages to detected languages.'.N);
  685. }
  686. } else {
  687. $idata['ourlanguages']=$idata['languages'];
  688. eecho(1,'«'.$opts['hostname'].'»: copied declared languages to detected languages.'.N);
  689. }
  690. if (!$opts['dryrun'])
  691. myq($link,'DELETE FROM InstOurLangs WHERE InstID='.$instid,__LINE__);
  692. $langids=getlangsidsarr($idata['ourlanguages'],$supplangs,$link,$opts['hostname'],$opts['dryrun'],__LINE__);
  693. if (!$opts['dryrun']) {
  694. $pos=0;
  695. foreach ($langids as $langid) {
  696. $pos++;
  697. myq($link,'INSERT INTO InstOurLangs SET InstID='.$instid.', OurLangID='.$langid.', Pos='.$pos,__LINE__);
  698. }
  699. }
  700. }
  701. }
  702. if ($instanswered && !$opts['dryrun'])
  703. myq($link,'DELETE FROM InstActivity WHERE InstID='.$instid,__LINE__);
  704. if (isset($idata['activity']) && is_array($idata['activity'])) {
  705. $pos=0;
  706. foreach ($idata['activity'] as $buf) {
  707. // these should all be int, but mastodon represents them as strings
  708. if (isset($buf['week']) && is_string($buf['week']) && preg_match('/^\d+$/',$buf['week'])===1 && isset($buf['statuses']) && is_string($buf['statuses']) && preg_match('/^\d+$/',$buf['statuses'])===1 && isset($buf['logins']) && is_string($buf['logins']) && preg_match('/^\d+$/',$buf['logins'])===1 && isset($buf['registrations']) && is_string($buf['registrations']) && preg_match('/^\d+$/',$buf['registrations'])===1) {
  709. $pos++;
  710. if (!$opts['dryrun'])
  711. myq($link,'INSERT INTO InstActivity (InstID, Week, Statuses, Logins, Registrations, Pos) VALUES ('.$instid.', '.$buf['week'].', '.$buf['statuses'].', '.$buf['logins'].', '.$buf['registrations'].', '.$pos.')',__LINE__);
  712. }
  713. }
  714. }
  715. if ($instanswered && !$opts['dryrun'])
  716. myq($link,'DELETE FROM InstTrends WHERE InstID='.$instid,__LINE__);
  717. if (isset($idata['trends']) && is_array($idata['trends'])) {
  718. $trends=[];
  719. foreach ($idata['trends'] as $buf) {
  720. if (isset($buf['name']) && is_string($buf['name']) && isset($buf['url']) && is_string($buf['url']) && isset($buf['history']) && is_array($buf['history'])) {
  721. $trend=0;
  722. foreach ($buf['history'] as $row) {
  723. // below, we check for "stringness" because, they should be integers, but they are strings
  724. if (isset($row['day']) && is_string($row['day']) && preg_match('/^\d+$/',$row['day'])===1 && isset($row['uses']) && is_string($row['uses']) && preg_match('/^\d+$/',$row['uses'])===1 && isset($row['accounts']) && is_string($row['accounts']) && preg_match('/^\d+$/',$row['accounts'])===1) {
  725. $row['day']+=0;
  726. $row['uses']+=0;
  727. $row['accounts']+=0;
  728. $trend+=$row['accounts'];
  729. }
  730. }
  731. }
  732. $trends[]=[
  733. 'InstID'=>$instid,
  734. 'LastDay'=>$buf['history'][0]['day'],
  735. 'Name'=>$buf['name'],
  736. 'URL'=>$buf['url'],
  737. 'Pos'=>null,
  738. 'trend'=>$trend
  739. ];
  740. }
  741. //print_r($trends);
  742. mdasortbykey($trends,'trend',true);
  743. $pos=0;
  744. foreach ($trends as $trend) {
  745. $pos++;
  746. $query='INSERT INTO InstTrends (InstID, LastDay, Name, URL, Pos) VALUES ('.$trend['InstID'].', \''.$trend['LastDay'].'\', \''.myesc($link, truncs($trend['Name'], 'InstTrends', 'Name', '«'.$opts['hostname'].'»')).'\', \''.myesc($link, truncs($trend['URL'], 'InstTrends', 'URL', '«'.$opts['hostname'].'»')).'\', '.$pos.')';
  747. if (!$opts['dryrun'])
  748. myq($link,$query,__LINE__);
  749. }
  750. }
  751. if (isset($idata['rules']) && is_array($idata['rules'])) {
  752. ksort($idata['rules']);
  753. if (!$opts['dryrun']) {
  754. myq($link,'DELETE FROM InstRules WHERE InstID='.$instid,__LINE__);
  755. foreach ($idata['rules'] as $rule)
  756. myq($link,'INSERT INTO InstRules SET InstID='.$instid.', Text=\''.myesc($link, truncs($rule, 'InstRules', 'Text', '«'.$opts['hostname'].'»')).'\'',__LINE__);
  757. }
  758. }
  759. if ($instanswered && !$opts['dryrun'] && isset($idata['blocks'])) {
  760. myq($link,'DELETE FROM InstBlocks WHERE InstID='.$instid,__LINE__);
  761. foreach ($idata['blocks'] as $block) {
  762. (is_null($block['comm'])) ? $block['comm']='NULL' : $block['comm']="'".myesc($link, truncs($block['comm'], 'InstBlocks', 'Comment', '«'.$opts['hostname'].'»'))."'";
  763. if (!$opts['dryrun'])
  764. myq($link,'INSERT INTO InstBlocks SET InstID='.$instid.', Domain=\''.myesc($link, truncs($block['dom'], 'InstBlocks', 'Domain', '«'.$opts['hostname'].'»')).'\', Severity=\''.myesc($link, truncs($block['sev'], 'InstBlocks', 'Severity', '«'.$opts['hostname'].'»')).'\', Comment='.$block['comm'],__LINE__);
  765. }
  766. }
  767. if ($opts['_sendtoot'] && $instanswered && $idata['IsMastodon'] && $idata['FirstSeen']==$now) {
  768. $lc=['decimal_point'=>'.', 'thousands_sep'=>','];
  769. gettlds();
  770. eecho(0,'«'.$opts['hostname'].'»: this instance is new, trying to send an announcement toot about it...'.N);
  771. $endpoint='https://'.$opts['bothost'].'/api/v1/statuses';
  772. $toot='A new Mastodon instance, https://'.$opts['hostname'].', has been found by the crawler at https://mastodon.help'.N.N.'Name: ';
  773. (is_null($idata['Title'])) ? $toot.='unspecified' : $toot.=$idata['Title'];
  774. $toot.=N.N.'Languages: ';
  775. if (array_key_exists('languages',$idata) && is_array($idata['languages']) && count($idata['languages'])>0 && $idata['languages'][0]!='en')
  776. $toot.=implode(', ',$idata['languages']);
  777. elseif (array_key_exists('ourlanguages',$idata) && is_array($idata['ourlanguages']) && count($idata['ourlanguages'])>0)
  778. $toot.=implode(', ',$idata['ourlanguages']);
  779. else
  780. $toot.='unspecified/undetectable';
  781. $toot.=N.N.'Users: ';
  782. (is_null($idata['UserCount'])) ? $toot.='unspecified' : $toot.=fnum($idata['UserCount'],$lc);
  783. $toot.=N.N.'Max. characters per post: ';
  784. (is_null($idata['MaxTootChars']) || $idata['MaxTootChars']==500) ? $toot.='500' : $toot.=fnum($idata['MaxTootChars'],$lc);
  785. $toot.=N.N.'Registrations: ';
  786. if (is_null($idata['RegOpen'])) {
  787. $toot.='unspecified';
  788. } elseif ($idata['RegOpen']==1) {
  789. $toot.='open';
  790. if ($idata['RegReqApproval']==1)
  791. $toot.=' (require admins approval)';
  792. elseif ($idata['RegReqApproval']==0)
  793. $toot.=' (don’t require admins approval)';
  794. } elseif ($idata['RegOpen']==0) {
  795. $toot.='closed';
  796. }
  797. $toot.=N.N.'Short description: ';
  798. (is_null($idata['ShortDesc'])) ? $toot.='unspecified' : $toot.=$idata['ShortDesc'];
  799. $toot.=N.N.'Long description: ';
  800. (is_null($idata['LongDesc'])) ? $toot.='unspecified' : $toot.=html2text($idata['LongDesc']);
  801. if (postlen($toot)>$opts['botmaxchars']) {
  802. while (postlen($toot)+13>$opts['botmaxchars'])
  803. $toot=preg_replace('#\s+(\S|\n)+$#u','',$toot);
  804. $toot.=' [continues…]';
  805. }
  806. $context=[
  807. 'http'=>[
  808. 'header'=>'Authorization: Bearer '.$opts['bottoken'].RN.
  809. 'Idempotency-Key'.md5($opts['bothost'].$now.rand(1000,9999)).RN.
  810. 'Content-type: application/x-www-form-urlencoded'.RN,
  811. 'method'=>'POST',
  812. 'content'=>http_build_query([
  813. 'status'=>$toot,
  814. 'visibility'=>'public',
  815. 'language'=>'en'
  816. ]),
  817. 'timeout'=>$opts['conntimeout']
  818. ]
  819. ];
  820. $context=stream_context_create($context);
  821. $res=@file_get_contents($endpoint,false,$context);
  822. if ($res===false) {
  823. eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, could not connect to «'.$opts['bothost'].'».'.N);
  824. } else {
  825. $httprsc=gethttpcode($http_response_header);
  826. $res=@json_decode($res,true);
  827. if ($httprsc!=200)
  828. eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, «'.$opts['bothost'].'» has returned HTTP code «'.$httprsc.'».'.N);
  829. elseif ($res===false)
  830. eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, «'.$opts['bothost'].'» has not returned valid JSON data.'.N);
  831. else
  832. eecho(1,'«'.$opts['hostname'].'»: correctly posted announcement about this new instance on «'.$opts['bothost'].'» :-)'.N);
  833. }
  834. }
  835. if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4.0.0') {
  836. eecho(0,'«'.$opts['hostname'].'»: trying to fetch users info from directory API...'.N);
  837. $users=[];// array of users in this instance's directory
  838. $chunk=0;
  839. $limit=40;
  840. $end=false;
  841. while (!$end) {
  842. $offset=$chunk*$limit;
  843. for ($att=0; $att<$opts['udiratts']; $att++) {
  844. eecho(0,'«'.$opts['hostname'].'»: trying to fetch chunk '.($chunk+1).' of users info from directory API (attempt '.($att+1).'/'.$opts['udiratts'].')...'.N);
  845. $buf=@gurl('https://'.$opts['hostname'].'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
  846. if ($buf['cont']!==false) {
  847. ckrl($buf['headers'],"«{$opts['hostname']}»");
  848. eecho(1,'«'.$opts['hostname'].'»: got chunk '.($chunk+1).' of users info from directory API on attempt '.($att+1).'/'.$opts['udiratts'].' :-)'.N);
  849. $buf=@json_decode($buf['cont'],true);
  850. if (is_array($buf)) {
  851. //print_r($buf);
  852. if (count($buf)<$limit) $end=true;
  853. /*if (count($buf)>0 && !array_key_exists('noindex',$buf[0])) {
  854. eecho(2,'«'.$opts['hostname'].'»: account entities reported by directory api endpoint don’t have a “noindex” attribute; skipping directory fetching.'.N);
  855. break;
  856. } else {
  857. eecho(0,'«'.$opts['hostname'].'»: account entities reported by directory api endpoint do have a “noindex” attribute; continuing with directory fetching.'.N);
  858. }*/
  859. //foreach ($buf as $user) echo($user['username'].' '); echo(N.N);
  860. foreach ($buf as $user) {
  861. if (make(['id', 'username', 'display_name', 'locked', 'bot', 'discoverable', 'created_at', 'note', 'url', 'avatar', 'header', 'statuses_count', 'last_status_at', 'fields', 'noindex'], $user)) {
  862. eecho(0,'«'.$opts['hostname'].'»: working on user «'.$user['username'].'»...'.N);
  863. // disabled because it takes too long on instances with many users; that's why we added "$idata['Version']>='4.0.0'" as a condition to the root "if" statement and "noindex" to the checked keys in the "if" statement above (ver. >= 4.0.0 do report "noindex" for account entities)
  864. /*if (!isset($user['noindex'])) {
  865. $user['noindex']=true;
  866. eecho(0,'«'.$opts['hostname'].'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it by fetching user’s profile page...'.N);
  867. $page=gurl($user['url'],$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
  868. // here ckratelimit is not needed because it's a normal web page, not json from mastodon api
  869. if ($page['cont']!==false) {
  870. //<meta content='noindex, noarchive' name='robots'>
  871. if (preg_match('/<meta\s+content=[\'"](noindex|noarchive)/ui',$page['cont'])!==1) {
  872. $user['noindex']=false;
  873. eecho(0,'«'.$user['url'].'»: «noindex» is not set.'.N);
  874. } else {
  875. eecho(0,'«'.$user['url'].'»: «noindex» is set.'.N);
  876. }
  877. } else {
  878. eecho(2,'«'.$opts['hostname'].'»: could not fetch «'.$user['url'].'»: '.$page['emsg'].N);
  879. }
  880. }*/
  881. $snote=strip_tags($user['note']);
  882. if (preg_match('/(?<!\w)#(nobots?|noindex)(?!\w)/iu',$snote)===1) $user['noindex']=true;
  883. if (preg_match('/(?<!\w)#(okindex|yesindex|doindex|okmhindex)(?!\w)/iu',$snote)===1) $user['noindex']=false;
  884. // disabled; see previous comment
  885. /*$user['tags']=[];
  886. if (!$user['noindex'] && !is_null($idata['Version']) && $idata['Version']>='3.3.0') {
  887. eecho(0,'«'.$opts['hostname'].'»: trying to fetch tags for user «'.$user['username'].'»...'.N);
  888. $tags=@gurl('https://'.$opts['hostname'].'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
  889. if ($tags['cont']!==false) {
  890. ckrl($tags['headers'],"«{$opts['hostname']}»");
  891. $tags=@json_decode($tags['cont'],true);
  892. if (is_array($tags) && count($tags)>0) {
  893. eecho(1,'«'.$opts['hostname'].'»: got '.count($tags).' tag(s) for user «'.$user['username'].'» :-)'.N);
  894. foreach($tags as $tag) $user['tags'][]=$tag['name'];
  895. }
  896. } else {
  897. eecho(2,'«'.$opts['hostname'].'»: could not fetch tags for user «'.$user['username'].'» :-( ('.$tags['emsg'].').'.N);
  898. }
  899. }
  900. $user['tags']=implode(';',$user['tags']);
  901. if ($user['tags']=='') $user['tags']=null;*/
  902. $user['tags']=null;
  903. if (!is_null($user['created_at'])) $user['created_at']=strtotime($user['created_at']);
  904. if (!is_null($user['last_status_at'])) $user['last_status_at']=datetots($user['last_status_at']);
  905. $users[$user['id']]=$user;
  906. } else {
  907. eecho(2,'«'.$opts['hostname'].'»: user record missed some required keys :-('.N);
  908. //print_r($user);
  909. }
  910. }
  911. break;
  912. } else {
  913. eecho(2,'«'.$opts['hostname'].'»: ... but the chunk was not good JSON :-('.N);
  914. if ($att==$opts['udiratts']-1) $end=true;
  915. }
  916. } else {
  917. eecho(2,'«'.$opts['hostname'].'»: could not fetch chunk '.($chunk+1).' of users info from directory API: '.$buf['emsg'].N);
  918. if ($att==$opts['udiratts']-1) {
  919. eecho(2,'«'.$opts['hostname'].'»: last attempt ('.($att+1).'/'.$opts['udiratts'].') on chunk '.($chunk+1).' failed; i give up.'.N);
  920. $end=true;
  921. } else {
  922. eecho(2,'«'.$opts['hostname'].'»: attempt '.($att+1).'/'.$opts['udiratts'].' on chunk '.($chunk+1).' failed; sleeping for '.ght($opts['udirfailst'],null,0).' before retrying.'.N);
  923. sleep($opts['udirfailst']);
  924. }
  925. }
  926. }
  927. $chunk++;
  928. }
  929. $totusers=count($users);
  930. eecho(1,'«'.$opts['hostname'].'»: got '.$totusers.' users’ profiles.'.N);
  931. if ($totusers>0) {
  932. eecho(1,'«'.$opts['hostname'].'»: inserting/updating '.$totusers.' users’ profiles in the database.'.N);
  933. $exusers=[];// array of this instance's users already existing in the db
  934. $res=myq($link,'SELECT ID, locid, username FROM Users WHERE InstID='.$instid,__LINE__);
  935. while ($row=mysqli_fetch_assoc($res)) $exusers[$row['locid']]=$row;
  936. foreach ($users as $locid=>$user) {
  937. $query='SET InstID='.$instid.', host='.myv($link,$opts['hostname']).', locid='.myv($link,$user['id']).', username='.myv($link,truncs($user['username'], 'Users', 'username', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', display_name='.myv($link,truncs($user['display_name'], 'Users', 'display_name', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', locked='.myv($link,$user['locked']).', bot='.myv($link,$user['bot']).', created_at='.myv($link,$user['created_at']).', note='.myv($link,truncs($user['note'], 'Users', 'note', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', url='.myv($link,truncs($user['url'], 'Users', 'url', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', avatar='.myv($link,truncs($user['avatar'], 'Users', 'avatar', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', header='.myv($link,truncs($user['header'], 'Users', 'header', '«'.$opts['hostname'].'»: «'.$user['username'].'»')).', statuses_count='.myv($link,$user['statuses_count']).', last_status_at='.myv($link,$user['last_status_at']).', tags='.myv($link,truncs($user['tags'], 'Users', 'tags', '«'.$opts['hostname'].'»: «'.$user['username'].'»'));
  938. $uid=0;
  939. if (!array_key_exists($user['id'],$exusers)) {
  940. if (!$user['noindex']) {
  941. eecho(0,'«'.$opts['hostname'].'»: inserting new user «'.$user['username'].'»...'.N);
  942. $query='INSERT INTO Users '.$query;
  943. if (!$opts['dryrun']) {
  944. myq($link,$query,__LINE__);
  945. $uid=mysqli_insert_id($link);
  946. } else {
  947. $uid=0;
  948. }
  949. } else {
  950. eecho(0,'«'.$opts['hostname'].'»: NOT inserting user «'.$user['username'].'» because they don’t want to be indexed...'.N);
  951. }
  952. } else {
  953. $uid=$exusers[$locid]['ID'];
  954. if (!$user['noindex']) {
  955. eecho(0,'«'.$opts['hostname'].'»: updating existing user «'.$user['username'].'» ('.$uid.')...'.N);
  956. $query='UPDATE Users '.$query.' WHERE ID='.$uid;
  957. } else {
  958. eecho(0,'«'.$opts['hostname'].'»: deleting existing user «'.$user['username'].'» ('.$uid.') because they don’t want to be indexed...'.N);
  959. $query='DELETE FROM Users WHERE ID='.$uid;
  960. }
  961. if (!$opts['dryrun']) {
  962. myq($link,$query,__LINE__);
  963. myq($link,'DELETE FROM UsersFields WHERE UserID='.$uid,__LINE__);
  964. }
  965. }
  966. if ($uid!=0 && !$user['noindex'] && is_array($user['fields']) && count($user['fields'])>0) {
  967. eecho(0,'«'.$opts['hostname'].'»: saving user fields for user «'.$user['username'].'» ('.$uid.')...'.N);
  968. foreach ($user['fields'] as $field) {
  969. (is_null($field['verified_at'])) ? $field['verified_at']=0 : $field['verified_at']=1;
  970. $field['name']=truncs($field['name'],'UsersFields','name','«'.$opts['hostname'].'»: «'.$user['username'].'»');
  971. $field['value']=truncs($field['value'],'UsersFields','value','«'.$opts['hostname'].'»: «'.$user['username'].'»');
  972. if (!$opts['dryrun']) myq($link,'INSERT INTO UsersFields SET UserID='.$uid.', name='.myv($link,$field['name']).', value='.myv($link,$field['value']).', verified='.$field['verified_at'],__LINE__);
  973. }
  974. }
  975. }
  976. eecho(1,'«'.$opts['hostname'].'»: deleting possible users’ profiles which are in the database but no longer in the directory.'.N);
  977. foreach ($exusers as $locid=>$exuser) {
  978. if (!array_key_exists($locid,$users)) {
  979. eecho(0,'«'.$opts['hostname'].'»: user «'.$exusers[$locid]['username'].'» opted out of the directory, deleting their record ('.$exuser['ID'].')...'.N);
  980. if (!$opts['dryrun']) {
  981. myq($link,'DELETE FROM Users WHERE ID='.$exuser['ID'],__LINE__);
  982. myq($link,'DELETE FROM UsersFields WHERE UserID='.$exuser['ID'],__LINE__);
  983. }
  984. }
  985. }
  986. }
  987. }
  988. mexit('«'.$opts['hostname'].'»: done in '.ght(time()-$now,null,0).' :-)'.N,0);
  989. // functions
  990. function myq(&$link,$query,$line) {
  991. try {
  992. $res=mysqli_query($link,$query);
  993. }
  994. catch (Exception $error) {
  995. mexit('query «'.$query.'» on line '.$line.' failed: '.$error->getMessage().' ('.$error->getCode().').'.N,3);
  996. }
  997. // for php versions < 8, which seem to not catch mysql exceptions
  998. if ($res===false) mexit('query «'.$query.'» on line '.$line.' failed: '.mysqli_error($link).' ('.mysqli_errno($link).').'.N,3);
  999. return($res);
  1000. }
  1001. function eecho($lev,$msg) {
  1002. global $opts, $msglevs;
  1003. $time=microtime(false);
  1004. $time=explode(' ',$time);
  1005. $time=date('Y-m-d H:i:s',$time[1]).'.'.substr($time[0],2);
  1006. $msg=$time.' '.$msglevs[$lev].': '.$msg;
  1007. if ($lev>=$opts['minmsgimplev']) {
  1008. if ($lev<2)
  1009. echo($msg);
  1010. else
  1011. fwrite(STDERR,$msg);
  1012. }
  1013. }
  1014. function mexit($msg,$code) {
  1015. global $link;
  1016. if (isset($link) && $link!==false) mysqli_close($link);
  1017. if ($code!=0)
  1018. eecho(3,$msg);
  1019. else
  1020. eecho(1,$msg);
  1021. exit($code);
  1022. }
  1023. function setint($keys,&$arr) {
  1024. foreach ($keys as $key)
  1025. if (!is_null($arr[$key]))
  1026. $arr[$key]=$arr[$key]+0;
  1027. }
  1028. function willtrunc($val,$tab,$col) {
  1029. global $tables, $iswin;
  1030. if ($iswin) $tab=strtolower($tab);
  1031. if (is_string($val) && mb_strlen($val,'UTF-8')>$tables[$tab][$col]) return(true);
  1032. if (is_int($val) && ($val<$tables[$tab][$col]['min'] || $val>$tables[$tab][$col]['max'])) return(true);
  1033. return(false);
  1034. }
  1035. function truncs($str,$tab,$col,$ctx) {
  1036. global $tables, $iswin;
  1037. if (is_null($str)) return(null);
  1038. if ($iswin)
  1039. $tab=strtolower($tab);
  1040. $ostr=$str;
  1041. if (mb_strlen($str,'UTF-8')>$tables[$tab][$col])
  1042. $str=strip_tags($str);
  1043. if (mb_strlen($str,'UTF-8')>$tables[$tab][$col])
  1044. $str=mb_substr($str,0,$tables[$tab][$col]-1,'UTF-8').'…';
  1045. if ($str!=$ostr)
  1046. notify($ctx.': had to shrink input string to '.$tables[$tab][$col].' chars to be able to insert it into «'.$col.'» column in «'.$tab.'» table.',2);
  1047. return $str;
  1048. }
  1049. function truncn($num,$tab,$col,$ctx) {
  1050. global $tables, $iswin;
  1051. if ($iswin)
  1052. $tab=strtolower($tab);
  1053. if (is_numeric($num)) {
  1054. if ($num>$tables[$tab][$col]['max']) {
  1055. notify($ctx.': had to ceil «'.$num.'» to «'.$tables[$tab][$col]['max'].'», ie the maximum value it can have in column «'.$col.'» of table «'.$tab.'».',2);
  1056. $num=$tables[$tab][$col]['max'];
  1057. } elseif ($num<$tables[$tab][$col]['min']) {
  1058. notify($ctx.': had to floor «'.$num.'» to «'.$tables[$tab][$col]['min'].'», ie the minimum value it can have in column «'.$col.'» of table «'.$tab.'»).',2);
  1059. $num=$tables[$tab][$col]['min'];
  1060. }
  1061. } else {
  1062. notify($ctx.': function «truncn»: expecting a number, got something else; returning «0».',3);
  1063. $num=0;
  1064. }
  1065. return $num;
  1066. }
  1067. function nocrnl($str) {
  1068. return(str_replace(["\r","\n"],['\\r','\\n'],$str));
  1069. }
  1070. function b2i($bool) {
  1071. ($bool) ? $r=1 : $r=0;
  1072. return $r;
  1073. }
  1074. function isempty($str) {
  1075. (preg_match('/^\s*$/',$str)===1) ? $r=true : $r=false;
  1076. return $r;
  1077. }
  1078. function notify($msg,$lev,$doecho=true) {
  1079. // "$lev" is to be thought of as "$lev" param of function "eecho": 0=debug, 1=info, 2=warning, 3=error
  1080. global $link, $tables, $iswin, $opts;
  1081. if ($doecho) eecho($lev,'*notification*: '.mb_lcfirst(strip_tags($msg)).N);
  1082. if (!$opts['dryrun']) {
  1083. ($iswin) ? $tab='notifications' : $tab='Notifications';
  1084. myq($link,'INSERT INTO Notifications (ID, Notification, Severity, Microtime, Seen, Deleted) VALUES (NULL, \''.myesc($link,mb_substr($msg,0,$tables[$tab]['Notification'],'UTF-8')).'\', '.$lev.', \''.microtime(true).'\', 0, 0)',__LINE__);
  1085. }
  1086. }
  1087. function mdasortbykey(&$arr,$key,$rev=false) {
  1088. $karr=[];
  1089. foreach ($arr as $akey=>$subarr)
  1090. $karr[$subarr[$key]]=[$akey,$subarr];
  1091. if (!$rev)
  1092. ksort($karr);
  1093. else
  1094. krsort($karr);
  1095. $arr=[];
  1096. foreach ($karr as $akey=>$subarr)
  1097. $arr[$subarr[0]]=$subarr[1];
  1098. }
  1099. // "multi array_key_exists"
  1100. function make($keys,&$arr) {
  1101. foreach ($keys as $key)
  1102. if (!array_key_exists($key,$arr))
  1103. return false;
  1104. return true ;
  1105. }
  1106. function myv(&$link,$var) {
  1107. if (is_null($var)) {
  1108. return('NULL');
  1109. } elseif (is_bool($var)) {
  1110. if ($var)
  1111. return('1');
  1112. else
  1113. return('0');
  1114. } elseif (trim($var)=='') {
  1115. return('NULL');
  1116. } else {
  1117. return('\''.mysqli_real_escape_string($link,$var).'\'');
  1118. }
  1119. }
  1120. function datetots($date) {
  1121. $date=explode('-',$date);
  1122. return(mktime(0,0,0,$date[1],$date[2],$date[0]));
  1123. }
  1124. function ckrl($headers,$ctx) {
  1125. $headers=explode("\r\n",$headers);
  1126. $res=ckratelimit($headers);
  1127. if (!$res['ok']) {
  1128. eecho(2,"{$ctx}: ckratelimit: {$res['error']}.\n");
  1129. } elseif ($res['remaining']==0) {
  1130. eecho(2,"{$ctx}: ckratelimit: x-ratelimit-remaining is 0, sleeping for ".ght($res['sleep']).' (until '.date('c',time()+$res['sleep']).") ...\n");
  1131. sleep($res['sleep']);
  1132. }
  1133. }
  1134. /** <LANGUAGE MANAGEMENT> */
  1135. /**
  1136. * Executes a call to Mastodon API.
  1137. *
  1138. * @param string $host Host to be called (e.g.: "mastodon.bida.im")
  1139. * @param string $path API path (e.g.: "/api/v1/timelines/public?local=true")
  1140. * @return mixed An array representing the JSON object as returned by json_decode, or NULL if the call fails
  1141. */
  1142. function get_api($host, $path) {
  1143. global $opts;
  1144. $buf = @gurl('https://'.$host.$path,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
  1145. if ($buf['cont']!==false) {
  1146. ckrl($buf['headers'],"«{$host}»");
  1147. $data = json_decode($buf['cont'], true);
  1148. return $data;
  1149. } else {
  1150. return NULL;
  1151. }
  1152. }
  1153. /**
  1154. * Returns a list of known recognized languages, with the related probability, fot the toot that got passed to it
  1155. *
  1156. * @param mixed $toot The toot to be checked, as returned by the API
  1157. * @return array Associative array with language and related probability
  1158. */
  1159. function get_toot_languages($toot) {
  1160. if (is_array($toot) && array_key_exists('language',$toot))
  1161. $l = $toot['language'];
  1162. else
  1163. $l = NULL;
  1164. $langs=[];
  1165. if($l !== NULL) {
  1166. // the language is explicitly set in the toot, so use that
  1167. $langs[$l] = 1;
  1168. } elseif (is_array($toot) && array_key_exists('content',$toot)) {
  1169. // the language is not explicitly set in the toot, so try and recognize it
  1170. $text = strip_tags($toot['content']);
  1171. $ld = new Language;
  1172. $langs = $ld->detect($text)->bestResults()->close();
  1173. }
  1174. // group derived languages into two-charactes language code (e.g.: "zh-CN" into "zh")
  1175. $grouped_langs = [];
  1176. foreach($langs as $key => $value) {
  1177. $l = explode("-", $key)[0];
  1178. if(array_key_exists($l, $grouped_langs)) {
  1179. $grouped_langs[$l] = max($grouped_langs[$l], $value);
  1180. } else {
  1181. $grouped_langs[$l] = $value;
  1182. }
  1183. }
  1184. return $grouped_langs;
  1185. }
  1186. /**
  1187. * Given the probability of a language for every toot, calculate the average
  1188. *
  1189. * @param array $detected_langs Array of mappings between language and probability
  1190. * @return array Mapping between language and probability
  1191. */
  1192. function summary($detected_langs) {
  1193. $res = [];
  1194. foreach($detected_langs as $langs) {
  1195. foreach($langs as $l => $weight) {
  1196. if(!array_key_exists($l, $res)) {
  1197. $res[$l] = 0;
  1198. }
  1199. $res[$l] += $weight;
  1200. }
  1201. }
  1202. foreach($res as $l => $sumweight) {
  1203. $res[$l] = $sumweight / count($detected_langs);
  1204. }
  1205. return $res;
  1206. }
  1207. /**
  1208. * Helper function for usort: compares two arrays using the first element
  1209. *
  1210. * @param array $entry1 First array to be compared
  1211. * @param array $entry2 Second array to be compared
  1212. * @return number -1, 0 o 1 depening on $entry1[0] being less than, equal to or greater than $entry2[0]
  1213. */
  1214. function sort_weights($entry1, $entry2) {
  1215. $w1 = $entry1[0];
  1216. $w2 = $entry2[0];
  1217. if ($w1 < $w2)
  1218. $ret=1;
  1219. elseif ($w1 == $w2)
  1220. $ret=0;
  1221. else
  1222. $ret=-1;
  1223. return $ret;
  1224. }
  1225. /**
  1226. * Given a language mapping, return a list of probable languages
  1227. *
  1228. * @param array $summary Map between language and probabilty
  1229. * @return string[] List of probable languages
  1230. */
  1231. function get_languages($summary) {
  1232. $lst = [];
  1233. foreach($summary as $code => $weight) {
  1234. $lst[] = [$weight, $code];
  1235. }
  1236. usort($lst, 'sort_weights');
  1237. $languages = [];
  1238. $lastweight = 0;
  1239. foreach($lst as $entry) {
  1240. $l = $entry[1];
  1241. $weight = $entry[0];
  1242. if($weight < $lastweight * 2 / 3) {
  1243. break;
  1244. }
  1245. $languages[] = $l;
  1246. $lastweight = $weight;
  1247. }
  1248. return $languages;
  1249. }
  1250. /**
  1251. * Returns a list of probable languages for the given instance
  1252. *
  1253. * @param string $host Instance’s hostname (e.g.: "mastodon.bida.im")
  1254. * @return string[] List of probable languages
  1255. */
  1256. function get_instance_langs($host) {
  1257. global $opts;
  1258. $data = get_api($host, '/api/v1/timelines/public?local=true&limit='.$opts['ldtoots']);
  1259. if($data == NULL) {
  1260. return [];
  1261. }
  1262. $detected_langs = array_map('get_toot_languages', $data);
  1263. $summary = summary($detected_langs);
  1264. $languages = get_languages($summary);
  1265. while (count($languages)>5)
  1266. array_pop($languages);
  1267. return $languages;
  1268. }
  1269. function getlangid(&$link,$lang,&$supplangs,$hostname,$dryrun,$line) {
  1270. $code=locale_canonicalize($lang);
  1271. if (preg_match('/^\s*$/',$lang)===1 || preg_match('/__/',$code)===1) {
  1272. notify('«'.$hostname.'»: «'.$lang.'» is not a valid language code, falling back to default «en».',2,true);
  1273. $code='en';
  1274. }
  1275. $res=myq($link,'SELECT * FROM Languages WHERE Code=\''.myesc($link,$code).'\'',$line);
  1276. $nrows=mysqli_num_rows($res);
  1277. $langs=[];
  1278. if ($nrows==0) {
  1279. $code=myesc($link,truncs($code,'Languages','Code','«'.$hostname.'»'));
  1280. $NameOrig=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,$code)),'Languages','NameOrig','«'.$hostname.'»'));
  1281. foreach ($supplangs as $key=>$val)
  1282. $langs[$key]=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,$key)),'Languages','Name'.strtoupper($key),'«'.$hostname.'»'));
  1283. /*$NamePt_BR=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'pt_BR')),'Languages','NamePT_BR','«'.$hostname.'»'));
  1284. $NameDe=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'de')),'Languages','NameDE','«'.$hostname.'»'));
  1285. $NameUk=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'uk')),'Languages','NameUK','«'.$hostname.'»'));
  1286. $NameCa=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'ca')),'Languages','NameCA','«'.$hostname.'»'));
  1287. $NameEn=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'en')),'Languages','NameEN','«'.$hostname.'»'));
  1288. $NameEs=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'es')),'Languages','NameES','«'.$hostname.'»'));
  1289. $NameFr=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'fr')),'Languages','NameFR','«'.$hostname.'»'));
  1290. $NameGl=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'gl')),'Languages','NameGL','«'.$hostname.'»'));
  1291. $NameIt=myesc($link,truncs(mb_ucfirst(locale_get_display_name($code,'it')),'Languages','NameIT','«'.$hostname.'»'));
  1292. $q='INSERT INTO Languages (ID, Code, NameOrig, NamePT_BR, NameDE, NameUK, NameCA, NameEN, NameES, NameFR, NameGL, NameIT) VALUES (NULL, \''.$code.'\', \''.$NameOrig.'\', \''.$NamePt_BR.'\', \''.$NameDe.'\', \''.$NameUk.'\', \''.$NameCa.'\', \''.$NameEn.'\', \''.$NameEs.'\', \''.$NameFr.'\', \''.$NameGl.'\', \''.$NameIt.'\')';*/
  1293. $q='INSERT INTO Languages (ID, Code, NameOrig, ';
  1294. foreach ($langs as $key=>$val)
  1295. $q.='Name'.strtoupper($key).', ';
  1296. $q=substr($q,0,-2).') VALUES (NULL, \''.$code.'\', \''.$NameOrig.'\', ';
  1297. foreach ($langs as $key=>$val)
  1298. $q.='\''.$val.'\', ';
  1299. $q=substr($q,0,-2).')';
  1300. if (!$dryrun) {
  1301. myq($link,$q,$line);
  1302. $langid=mysqli_insert_id($link);
  1303. } else {
  1304. $langid=0;
  1305. }
  1306. } else {
  1307. if ($nrows>1) notify('In table Languages there are '.$nrows.' records with Code = «'.$code.'» :-(',2,true);
  1308. $row=mysqli_fetch_assoc($res);
  1309. $langid=$row['ID'];
  1310. }
  1311. return($langid);
  1312. }
  1313. function getlangsidsarr(&$langs,&$supplangs,&$link,$hostname,$dryrun,$line) {
  1314. $langids=[];
  1315. foreach ($langs as $lang) {
  1316. $langid=getlangid($link,$lang,$supplangs,$hostname,$dryrun,$line);
  1317. $langids[]=$langid;
  1318. }
  1319. $langids=array_unique($langids);
  1320. return($langids);
  1321. }
  1322. function waituntilonline() {
  1323. $url='www.google.com';
  1324. $gotoff=false;
  1325. while (false===($f=@fsockopen($url,80,$errno,$errstr,1))) {
  1326. $gotoff=true;
  1327. eecho(2,'it seems we are offline ('.$errno.': '.$errstr.'), waiting for 10 seconds before retrying...'.N);
  1328. sleep(5);
  1329. }
  1330. fclose($f);
  1331. if ($gotoff) eecho(1,'it seems we are back online! :-)'.N);
  1332. }
  1333. function headcb($arr) {
  1334. /* $lpad='';
  1335. $rpad='';
  1336. for ($i=0; $i<7-$arr[2]; $i++) {
  1337. $lpad.='[';
  1338. $rpad.=']';
  1339. }
  1340. return "{$lpad} ".mb_strtoupper($arr[4])." {$rpad}\n\n";*/
  1341. return '§ '.mb_strtoupper($arr[4])."\n\n";
  1342. }
  1343. function html2text($html,$keeplinks=false) {
  1344. $html=preg_replace('#\r#','',$html);
  1345. //$html=preg_replace('#\n|\r#','',$html);
  1346. if ($keeplinks) $html=preg_replace('#<a\s.*href="(.*)".*>#iU',' [link: $1] ',$html);
  1347. $html=preg_replace('#<li>#i',' • ',$html);
  1348. $html=preg_replace_callback('#(<h)(\d)(>)(.*)(</h\d>)#iU','headcb',$html);
  1349. $html=preg_replace('#<p>|<div>|<ul>|<ol>#i',"\n\n",$html);
  1350. $html=preg_replace('#</p>|</div>|</ul>|</ol>|</li>#i',"\n\n",$html);
  1351. $html=preg_replace('#<br>|<br />#i',"\n",$html);
  1352. $html=strip_tags($html);
  1353. $html=html_entity_decode($html,ENT_QUOTES,'UTF-8');
  1354. $html=preg_replace('#\s+\n#',"\n",$html);
  1355. $html=preg_replace('# +#',' ',$html);
  1356. $html=preg_replace('#\n+#',"\n\n",$html);
  1357. $html=trim($html)."\n";
  1358. return $html;
  1359. }
  1360. ?>