From 924663e865fcd8aee10b18a40df46473f3c5c4df Mon Sep 17 00:00:00 2001 From: pezcurrel Date: Wed, 21 Aug 2024 10:24:41 +0200 Subject: [PATCH] Added socks5 proxy support; minor changes --- web/clitools/getinstinfo.php | 126 +++++++++++++++++++---------------- 1 file changed, 70 insertions(+), 56 deletions(-) diff --git a/web/clitools/getinstinfo.php b/web/clitools/getinstinfo.php index 150ba52..b8076c2 100755 --- a/web/clitools/getinstinfo.php +++ b/web/clitools/getinstinfo.php @@ -57,13 +57,15 @@ $opts=[ 'functimeout'=>20, 'ldtoots'=>40,// number of toots to check with the automatic language detection function 'dryrun'=>false, - 'sendtoot'=>true, 'fetchusers'=>false, 'udiratts'=>5, 'udirfailst'=>90, 'minmsgimplev'=>1, 'bothost'=>null, - 'bottoken'=>null + 'bottoken'=>null, + 'botmaxchars'=>null, + '_sendtoot'=>false, + 'proxy'=>null ]; $msglevs=['Debug', 'Info', 'Warning', 'Error', 'None']; @@ -106,10 +108,6 @@ OPTIONS DEFAULT: '.ght($opts['functimeout'],null,0).' -d, --dryrun If this option is set, the script won’t write anything in the database. - -S, --dontoot - If this option is set, the script won’t try to read «getinstinfo.ini» file - and won’t post an announcement toot about a new instance. - See NEW INSTANCE ANNOUNCEMENT TOOT below for more info. -m, --minmsgimplev <«debug»|«info»|«warning»|«error»|«none»> Defines the minimum “importance level” of messages to be written to the text user interface. There are 4 “importance levels”, in this order of @@ -131,13 +129,18 @@ TIME SPECIFICATION NEW INSTANCE ANNOUNCEMENT TOOT This script can send an announcement toot when the instance it checks is new. - You can disable this function with the «-S» or «--dontoot» options (see - above). If new instance announcement toot is enabled, this script expects - to find a «getinstinfo.ini» file in the same directory it lives in, with - a «bothost» parameter defining the instance to use to send the toot, - a «bottoken» parameter defining the token to be used to post, and - a «botmaxchars» parameter defining the maximum number of characters allowed - for toots on the defined instance. + It will try to do it if it finds a readable «getinstinfo.ini» file in the + same directory it lives in, with a «bothost» parameter defining the instance + to use to send the toot, a «bottoken» parameter defining the token to be used + to post, and a «botmaxchars» parameter defining the maximum number of + characters allowed for toots on the defined instance (must be >= 10). + +PROXY SUPPORT + + This script supports socks5 proxy to connect to an instance. It will try to + use a proxy if it finds a readable «getinstinfo.ini» file in the same + directory it lives in, with a «proxy» parameter with this syntax: + [user:pass@][:port]. LICENSE @@ -145,6 +148,34 @@ LICENSE This is free software, and you are welcome to redistribute it under certain conditions; see for details.'.N; +foreach ($argv as $val) { + if ($val=='-h' || $val=='--help') { + echo($help); + exit(0); + } +} + +$inifp=__DIR__.'/getinstinfo.ini'; +$iniarr=@parse_ini_file($inifp); +if (is_array($iniarr)) { + if (isset($iniarr['bothost']) && !isempty($iniarr['bothost'])) $opts['bothost']=$iniarr['bothost']; + if (isset($iniarr['bottoken']) && !isempty($iniarr['bottoken'])) $opts['bottoken']=$iniarr['bottoken']; + if (isset($iniarr['botmaxchars'])) { + if (preg_match('/^[0-9]+$/',$iniarr['botmaxchars'],$matches)!=1 || $matches[0]+0<10) + mexit('config file «'.$inifp.'»: value for «botmaxchars» must be an integer >= 10.'.N,1); + $opts['botmaxchars']=$iniarr['botmaxchars']+0; + } + if (!is_null($opts['bothost']) && !is_null($opts['bottoken']) && !is_null($opts['botmaxchars'])) + $opts['_sendtoot']=true; + if (isset($iniarr['proxy'])) $opts['proxy']=$iniarr['proxy']; +} else { + eecho(1,"Could not open «{$inifp}» (it does not exist or is not readable).\n"); +} + +$inifp=__DIR__.'/../conf/mustard.ini'; +$iniarr=@parse_ini_file($inifp) + or mexit('could not open config file «'.$inifp.'».'.N,1); + for ($i=1; $i<$argc; $i++) { if ($argv[$i]=='-f' || $argv[$i]=='--fetchusers') { $opts['fetchusers']=true; @@ -175,16 +206,11 @@ for ($i=1; $i<$argc; $i++) { $opts['ldtoots']=$argv[$i]+0; } elseif ($argv[$i]=='-d' || $argv[$i]=='--dryrun') { $opts['dryrun']=true; - } elseif ($argv[$i]=='-S' || $argv[$i]=='--dontoot') { - $opts['sendtoot']=false; } elseif ($argv[$i]=='-m' || $argv[$i]=='--minmsgimplev') { if ($i+1>=$argc || !in_array(ucfirst(strtolower($argv[$i+1])),$msglevs)) mexit('option «'.$argv[$i].'» requires a “message importance level” value as an argument (use «-h» to read help).'.N,1); $i++; $opts['minmsgimplev']=array_search(ucfirst(strtolower($argv[$i])),$msglevs); - } elseif ($argv[$i]=='-h' || $argv[$i]=='--help') { - echo($help); - exit(0); } elseif (is_null($opts['hostname']) && $argv[$i][0]!=='-') { $opts['hostname']=$argv[$i]; } else { @@ -194,10 +220,6 @@ for ($i=1; $i<$argc; $i++) { if (is_null($opts['hostname'])) mexit('you didn’t specify an hostname (you can read the help text using «-h» or «--help»).'.N,1); -$inifp=__DIR__.'/../conf/mustard.ini'; -$iniarr=@parse_ini_file($inifp) - or mexit('could not open config file «'.$inifp.'»'.N,1); - try { $link=@mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket']); } catch (Exception $error) { mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true); } // for php versions < 8 @@ -207,14 +229,6 @@ catch (Exception $error) { mexit('could not set «utf8mb4» charset for MySQL: ' // for php versions < 8 if ($res===false) mexit('could not set MySQL charset: '.mysqli_error($link).' ['.mysqli_errno($link).'].'.N,1,true); -if ($opts['sendtoot']) { - $inifp=__DIR__.'/getinstinfo.ini'; - $iniarr=@parse_ini_file($inifp) - or mexit('could not open config file «'.$inifp.'»'.N,1); - if (!isset($iniarr['bothost']) || !isset($iniarr['bottoken']) || !isset($iniarr['botmaxchars']) || preg_match('/^[0-9]+$/',$iniarr['botmaxchars'])!=1) - mexit('config file «'.$inifp.'» is malformed, please check its format in the help text (you can read it using the «-h» or «--help» options).'.N,1); -} - $mastodons=[]; $res=myq($link,'SELECT Name FROM Platforms WHERE Consider=1',__LINE__); while ($row=mysqli_fetch_assoc($res)) @@ -267,10 +281,10 @@ if ($count>1) { $oidata=null; } eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on https...'.N); -$buf=@gurl('https://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']); +$buf=@gurl('https://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']); if ($buf['cont']===false) { eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on http...'.N); - $buf=@gurl('http://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']); + $buf=@gurl('http://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']); } if ($buf['cont']!==false) { $buf=@json_decode($buf['cont'],true); @@ -291,7 +305,7 @@ if ($buf['cont']!==false) { $niref=array_shift($nirefs); eecho(1,'«'.$opts['hostname'].'»: got and successfully parsed nodeinfo specs :-)'.N); eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo data...'.N); - $buf=@gurl($niref,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']); + $buf=@gurl($niref,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']); if ($buf['cont']!==false) { $buf=@json_decode($buf['cont'],true); if (is_array($buf)) { @@ -336,7 +350,7 @@ if ($buf['cont']!==false) { } if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4.0.0') { eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v2...'.N); - $buf=@gurl('https://'.$opts['hostname'].'/api/v2/instance',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']); + $buf=@gurl('https://'.$opts['hostname'].'/api/v2/instance',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']); if ($buf['cont']!==false) { ckratelimit($buf['headers'],"«{$opts['hostname']}»"); $buf=@json_decode($buf['cont'],true); @@ -397,7 +411,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4 eecho(2,'«'.$opts['hostname'].'»: could not fetch instance info from API v2: '.$buf['emsg'].'.'.N); } eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance extended description from API v1...'.N); - $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/extended_description',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']); + $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/extended_description',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']); if ($buf['cont']!==false) { ckratelimit($buf['headers'],"«{$opts['hostname']}»"); $buf=@json_decode($buf['cont'],true); @@ -413,7 +427,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4 eecho(2,'«'.$opts['hostname'].'»: could not fetch instance extended description from API v1: '.$buf['emsg'].'.'.N); } eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance domain blocks from API v1...'.N); - $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/domain_blocks',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']); + $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/domain_blocks',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']); if ($buf['cont']!==false) { ckratelimit($buf['headers'],"«{$opts['hostname']}»"); $buf=@json_decode($buf['cont'],true); @@ -454,7 +468,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4 } } else {// we still try to fetch instance info from api v1, if ver. < 4.0.0, since it could be a mastodon instance older than 2.1.2, when nodeinfo was introduced eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v1...'.N); - $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']); + $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']); if ($buf['cont']!==false) { ckratelimit($buf['headers'],"«{$opts['hostname']}»"); $buf=@json_decode($buf['cont'],true); @@ -532,7 +546,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4 if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='2.1.2') { eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance activity info from API v1...'.N); - $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/activity',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']); + $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/activity',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']); if ($buf['cont']!==false) { ckratelimit($buf['headers'],"«{$opts['hostname']}»"); $buf=@json_decode($buf['cont'],true); @@ -551,7 +565,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='3 eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance tags trends info from API v1...'.N); $url='https://'.$opts['hostname'].'/api/v1/trends'; if ($idata['Version']>='3.5.0') $url.='/tags'; - $buf=@gurl($url,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']); + $buf=@gurl($url,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']); if ($buf['cont']!==false) { ckratelimit($buf['headers'],"«{$opts['hostname']}»"); $buf=@json_decode($buf['cont'],true); @@ -784,11 +798,11 @@ if ($instanswered && !$opts['dryrun'] && isset($idata['blocks'])) { } } -if ($opts['sendtoot'] && $instanswered && $idata['IsMastodon'] && $idata['FirstSeen']==$now) { +if ($opts['_sendtoot'] && $instanswered && $idata['IsMastodon'] && $idata['FirstSeen']==$now) { $lc=['decimal_point'=>'.', 'thousands_sep'=>',']; gettlds(); eecho(0,'«'.$opts['hostname'].'»: this instance is new, trying to send an announcement toot about it...'.N); - $endpoint='https://'.$iniarr['bothost'].'/api/v1/statuses'; + $endpoint='https://'.$opts['bothost'].'/api/v1/statuses'; $toot='A new Mastodon instance, https://'.$opts['hostname'].', has been found by the crawler at https://mastodon.help'.N.N.'Name: '; (is_null($idata['Title'])) ? $toot.='unspecified' : $toot.=$idata['Title']; $toot.=N.N.'Languages: '; @@ -818,15 +832,15 @@ if ($opts['sendtoot'] && $instanswered && $idata['IsMastodon'] && $idata['FirstS (is_null($idata['ShortDesc'])) ? $toot.='unspecified' : $toot.=$idata['ShortDesc']; $toot.=N.N.'Long description: '; (is_null($idata['LongDesc'])) ? $toot.='unspecified' : $toot.=html2text($idata['LongDesc']); - if (postlen($toot)>$iniarr['botmaxchars']) { - while (postlen($toot)+13>$iniarr['botmaxchars']) + if (postlen($toot)>$opts['botmaxchars']) { + while (postlen($toot)+13>$opts['botmaxchars']) $toot=preg_replace('#\s+(\S|\n)+$#u','',$toot); $toot.=' [continues…]'; } $context=[ 'http'=>[ - 'header'=>'Authorization: Bearer '.$iniarr['bottoken'].RN. - 'Idempotency-Key'.md5($iniarr['bothost'].$now.rand(1000,9999)).RN. + 'header'=>'Authorization: Bearer '.$opts['bottoken'].RN. + 'Idempotency-Key'.md5($opts['bothost'].$now.rand(1000,9999)).RN. 'Content-type: application/x-www-form-urlencoded'.RN, 'method'=>'POST', 'content'=>http_build_query([ @@ -840,16 +854,16 @@ if ($opts['sendtoot'] && $instanswered && $idata['IsMastodon'] && $idata['FirstS $context=stream_context_create($context); $res=@file_get_contents($endpoint,false,$context); if ($res===false) { - eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, could not connect to «'.$iniarr['bothost'].'».'.N); + eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, could not connect to «'.$opts['bothost'].'».'.N); } else { $httprsc=gethttpcode($http_response_header); $res=@json_decode($res,true); if ($httprsc!=200) - eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, «'.$iniarr['bothost'].'» has returned HTTP code «'.$httprsc.'».'.N); + eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, «'.$opts['bothost'].'» has returned HTTP code «'.$httprsc.'».'.N); elseif ($res===false) - eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, «'.$iniarr['bothost'].'» has not returned valid JSON data.'.N); + eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, «'.$opts['bothost'].'» has not returned valid JSON data.'.N); else - eecho(1,'«'.$opts['hostname'].'»: correctly posted announcement about this new instance on «'.$iniarr['bothost'].'» :-)'.N); + eecho(1,'«'.$opts['hostname'].'»: correctly posted announcement about this new instance on «'.$opts['bothost'].'» :-)'.N); } } @@ -863,7 +877,7 @@ if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($id $offset=$chunk*$limit; for ($att=0; $att<$opts['udiratts']; $att++) { eecho(0,'«'.$opts['hostname'].'»: trying to fetch chunk '.($chunk+1).' of users info from directory API (attempt '.($att+1).'/'.$opts['udiratts'].')...'.N); - $buf=@gurl('https://'.$opts['hostname'].'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']); + $buf=@gurl('https://'.$opts['hostname'].'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']); if ($buf['cont']!==false) { $xrlr=ckratelimit($buf['headers'],"«{$opts['hostname']}»"); eecho(1,'«'.$opts['hostname'].'»: got chunk '.($chunk+1).' of users info from directory API on attempt '.($att+1).'/'.$opts['udiratts'].' (xrlr: '.$xrlr.') :-)'.N); @@ -885,7 +899,7 @@ if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($id /*if (!isset($user['noindex'])) { $user['noindex']=true; eecho(0,'«'.$opts['hostname'].'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it by fetching user’s profile page...'.N); - $page=gurl($user['url'],$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']); + $page=gurl($user['url'],$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']); // here ckratelimit is not needed because it's a normal web page, not json from mastodon api if ($page['cont']!==false) { // @@ -906,7 +920,7 @@ if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($id /*$user['tags']=[]; if (!$user['noindex'] && !is_null($idata['Version']) && $idata['Version']>='3.3.0') { eecho(0,'«'.$opts['hostname'].'»: trying to fetch tags for user «'.$user['username'].'»...'.N); - $tags=@gurl('https://'.$opts['hostname'].'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']); + $tags=@gurl('https://'.$opts['hostname'].'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']); if ($tags['cont']!==false) { ckratelimit($tags['headers'],"«{$opts['hostname']}»"); $tags=@json_decode($tags['cont'],true); @@ -1093,7 +1107,7 @@ function truncn($num,$tab,$col,$ctx) { notify($ctx.': function «truncn»: expecting a number, got something else; returning «0».',3); $num=0; } - return($num); + return $num; } function nocrnl($str) { @@ -1102,12 +1116,12 @@ function nocrnl($str) { function b2i($bool) { ($bool) ? $r=1 : $r=0; - return($r); + return $r; } function isempty($str) { (preg_match('/^\s*$/',$str)===1) ? $r=true : $r=false; - return($r); + return $r; } function notify($msg,$lev,$doecho=true) { @@ -1196,7 +1210,7 @@ function ckratelimit($headers,$ctx) { */ function get_api($host, $path) { global $opts; - $buf = @gurl('https://'.$host.$path,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']); + $buf = @gurl('https://'.$host.$path,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']); if ($buf['cont']!==false) { ckratelimit($buf['headers'],"«{$host}»"); $data = json_decode($buf['cont'], true);