Added socks5 proxy support; minor changes

This commit is contained in:
pezcurrel 2024-08-21 10:24:41 +02:00
parent 1cf437ea2c
commit 924663e865

View file

@ -57,13 +57,15 @@ $opts=[
'functimeout'=>20,
'ldtoots'=>40,// number of toots to check with the automatic language detection function
'dryrun'=>false,
'sendtoot'=>true,
'fetchusers'=>false,
'udiratts'=>5,
'udirfailst'=>90,
'minmsgimplev'=>1,
'bothost'=>null,
'bottoken'=>null
'bottoken'=>null,
'botmaxchars'=>null,
'_sendtoot'=>false,
'proxy'=>null
];
$msglevs=['Debug', 'Info', 'Warning', 'Error', 'None'];
@ -106,10 +108,6 @@ OPTIONS
DEFAULT: '.ght($opts['functimeout'],null,0).'
-d, --dryrun
If this option is set, the script wont write anything in the database.
-S, --dontoot
If this option is set, the script wont try to read «getinstinfo.ini» file
and wont post an announcement toot about a new instance.
See NEW INSTANCE ANNOUNCEMENT TOOT below for more info.
-m, --minmsgimplev <«debug»|«info»|«warning»|«error»|«none»>
Defines the minimum “importance level” of messages to be written to the
text user interface. There are 4 “importance levels”, in this order of
@ -131,13 +129,18 @@ TIME SPECIFICATION
NEW INSTANCE ANNOUNCEMENT TOOT
This script can send an announcement toot when the instance it checks is new.
You can disable this function with the «- or «--dontoot» options (see
above). If new instance announcement toot is enabled, this script expects
to find a «getinstinfo.ini» file in the same directory it lives in, with
a «bothost» parameter defining the instance to use to send the toot,
a «bottoken» parameter defining the token to be used to post, and
a «botmaxchars» parameter defining the maximum number of characters allowed
for toots on the defined instance.
It will try to do it if it finds a readable «getinstinfo.ini» file in the
same directory it lives in, with a «bothost» parameter defining the instance
to use to send the toot, a «bottoken» parameter defining the token to be used
to post, and a «botmaxchars» parameter defining the maximum number of
characters allowed for toots on the defined instance (must be >= 10).
PROXY SUPPORT
This script supports socks5 proxy to connect to an instance. It will try to
use a proxy if it finds a readable «getinstinfo.ini» file in the same
directory it lives in, with a «proxy» parameter with this syntax:
[user:pass@]<host>[:port].
LICENSE
@ -145,6 +148,34 @@ LICENSE
This is free software, and you are welcome to redistribute it under certain
conditions; see <http://www.gnu.org/licenses/> for details.'.N;
foreach ($argv as $val) {
if ($val=='-h' || $val=='--help') {
echo($help);
exit(0);
}
}
$inifp=__DIR__.'/getinstinfo.ini';
$iniarr=@parse_ini_file($inifp);
if (is_array($iniarr)) {
if (isset($iniarr['bothost']) && !isempty($iniarr['bothost'])) $opts['bothost']=$iniarr['bothost'];
if (isset($iniarr['bottoken']) && !isempty($iniarr['bottoken'])) $opts['bottoken']=$iniarr['bottoken'];
if (isset($iniarr['botmaxchars'])) {
if (preg_match('/^[0-9]+$/',$iniarr['botmaxchars'],$matches)!=1 || $matches[0]+0<10)
mexit('config file «'.$inifp.'»: value for «botmaxchars» must be an integer >= 10.'.N,1);
$opts['botmaxchars']=$iniarr['botmaxchars']+0;
}
if (!is_null($opts['bothost']) && !is_null($opts['bottoken']) && !is_null($opts['botmaxchars']))
$opts['_sendtoot']=true;
if (isset($iniarr['proxy'])) $opts['proxy']=$iniarr['proxy'];
} else {
eecho(1,"Could not open «{$inifp}» (it does not exist or is not readable).\n");
}
$inifp=__DIR__.'/../conf/mustard.ini';
$iniarr=@parse_ini_file($inifp)
or mexit('could not open config file «'.$inifp.'».'.N,1);
for ($i=1; $i<$argc; $i++) {
if ($argv[$i]=='-f' || $argv[$i]=='--fetchusers') {
$opts['fetchusers']=true;
@ -175,16 +206,11 @@ for ($i=1; $i<$argc; $i++) {
$opts['ldtoots']=$argv[$i]+0;
} elseif ($argv[$i]=='-d' || $argv[$i]=='--dryrun') {
$opts['dryrun']=true;
} elseif ($argv[$i]=='-S' || $argv[$i]=='--dontoot') {
$opts['sendtoot']=false;
} elseif ($argv[$i]=='-m' || $argv[$i]=='--minmsgimplev') {
if ($i+1>=$argc || !in_array(ucfirst(strtolower($argv[$i+1])),$msglevs))
mexit('option «'.$argv[$i].'» requires a “message importance level” value as an argument (use «-h» to read help).'.N,1);
$i++;
$opts['minmsgimplev']=array_search(ucfirst(strtolower($argv[$i])),$msglevs);
} elseif ($argv[$i]=='-h' || $argv[$i]=='--help') {
echo($help);
exit(0);
} elseif (is_null($opts['hostname']) && $argv[$i][0]!=='-') {
$opts['hostname']=$argv[$i];
} else {
@ -194,10 +220,6 @@ for ($i=1; $i<$argc; $i++) {
if (is_null($opts['hostname'])) mexit('you didnt specify an hostname (you can read the help text using «-h» or «--help»).'.N,1);
$inifp=__DIR__.'/../conf/mustard.ini';
$iniarr=@parse_ini_file($inifp)
or mexit('could not open config file «'.$inifp.'»'.N,1);
try { $link=@mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket']); }
catch (Exception $error) { mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true); }
// for php versions < 8
@ -207,14 +229,6 @@ catch (Exception $error) { mexit('could not set «utf8mb4» charset for MySQL: '
// for php versions < 8
if ($res===false) mexit('could not set MySQL charset: '.mysqli_error($link).' ['.mysqli_errno($link).'].'.N,1,true);
if ($opts['sendtoot']) {
$inifp=__DIR__.'/getinstinfo.ini';
$iniarr=@parse_ini_file($inifp)
or mexit('could not open config file «'.$inifp.'»'.N,1);
if (!isset($iniarr['bothost']) || !isset($iniarr['bottoken']) || !isset($iniarr['botmaxchars']) || preg_match('/^[0-9]+$/',$iniarr['botmaxchars'])!=1)
mexit('config file «'.$inifp.'» is malformed, please check its format in the help text (you can read it using the «-h» or «--help» options).'.N,1);
}
$mastodons=[];
$res=myq($link,'SELECT Name FROM Platforms WHERE Consider=1',__LINE__);
while ($row=mysqli_fetch_assoc($res))
@ -267,10 +281,10 @@ if ($count>1) {
$oidata=null;
}
eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on https...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']);
$buf=@gurl('https://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
if ($buf['cont']===false) {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on http...'.N);
$buf=@gurl('http://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']);
$buf=@gurl('http://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
}
if ($buf['cont']!==false) {
$buf=@json_decode($buf['cont'],true);
@ -291,7 +305,7 @@ if ($buf['cont']!==false) {
$niref=array_shift($nirefs);
eecho(1,'«'.$opts['hostname'].'»: got and successfully parsed nodeinfo specs :-)'.N);
eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo data...'.N);
$buf=@gurl($niref,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']);
$buf=@gurl($niref,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
if ($buf['cont']!==false) {
$buf=@json_decode($buf['cont'],true);
if (is_array($buf)) {
@ -336,7 +350,7 @@ if ($buf['cont']!==false) {
}
if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4.0.0') {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v2...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v2/instance',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']);
$buf=@gurl('https://'.$opts['hostname'].'/api/v2/instance',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers'],"«{$opts['hostname']}»");
$buf=@json_decode($buf['cont'],true);
@ -397,7 +411,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4
eecho(2,'«'.$opts['hostname'].'»: could not fetch instance info from API v2: '.$buf['emsg'].'.'.N);
}
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance extended description from API v1...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/extended_description',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/extended_description',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers'],"«{$opts['hostname']}»");
$buf=@json_decode($buf['cont'],true);
@ -413,7 +427,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4
eecho(2,'«'.$opts['hostname'].'»: could not fetch instance extended description from API v1: '.$buf['emsg'].'.'.N);
}
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance domain blocks from API v1...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/domain_blocks',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/domain_blocks',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers'],"«{$opts['hostname']}»");
$buf=@json_decode($buf['cont'],true);
@ -454,7 +468,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4
}
} else {// we still try to fetch instance info from api v1, if ver. < 4.0.0, since it could be a mastodon instance older than 2.1.2, when nodeinfo was introduced
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v1...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers'],"«{$opts['hostname']}»");
$buf=@json_decode($buf['cont'],true);
@ -532,7 +546,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4
if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='2.1.2') {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance activity info from API v1...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/activity',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/activity',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers'],"«{$opts['hostname']}»");
$buf=@json_decode($buf['cont'],true);
@ -551,7 +565,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='3
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance tags trends info from API v1...'.N);
$url='https://'.$opts['hostname'].'/api/v1/trends';
if ($idata['Version']>='3.5.0') $url.='/tags';
$buf=@gurl($url,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']);
$buf=@gurl($url,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers'],"«{$opts['hostname']}»");
$buf=@json_decode($buf['cont'],true);
@ -784,11 +798,11 @@ if ($instanswered && !$opts['dryrun'] && isset($idata['blocks'])) {
}
}
if ($opts['sendtoot'] && $instanswered && $idata['IsMastodon'] && $idata['FirstSeen']==$now) {
if ($opts['_sendtoot'] && $instanswered && $idata['IsMastodon'] && $idata['FirstSeen']==$now) {
$lc=['decimal_point'=>'.', 'thousands_sep'=>','];
gettlds();
eecho(0,'«'.$opts['hostname'].'»: this instance is new, trying to send an announcement toot about it...'.N);
$endpoint='https://'.$iniarr['bothost'].'/api/v1/statuses';
$endpoint='https://'.$opts['bothost'].'/api/v1/statuses';
$toot='A new Mastodon instance, https://'.$opts['hostname'].', has been found by the crawler at https://mastodon.help'.N.N.'Name: ';
(is_null($idata['Title'])) ? $toot.='unspecified' : $toot.=$idata['Title'];
$toot.=N.N.'Languages: ';
@ -818,15 +832,15 @@ if ($opts['sendtoot'] && $instanswered && $idata['IsMastodon'] && $idata['FirstS
(is_null($idata['ShortDesc'])) ? $toot.='unspecified' : $toot.=$idata['ShortDesc'];
$toot.=N.N.'Long description: ';
(is_null($idata['LongDesc'])) ? $toot.='unspecified' : $toot.=html2text($idata['LongDesc']);
if (postlen($toot)>$iniarr['botmaxchars']) {
while (postlen($toot)+13>$iniarr['botmaxchars'])
if (postlen($toot)>$opts['botmaxchars']) {
while (postlen($toot)+13>$opts['botmaxchars'])
$toot=preg_replace('#\s+(\S|\n)+$#u','',$toot);
$toot.=' [continues…]';
}
$context=[
'http'=>[
'header'=>'Authorization: Bearer '.$iniarr['bottoken'].RN.
'Idempotency-Key'.md5($iniarr['bothost'].$now.rand(1000,9999)).RN.
'header'=>'Authorization: Bearer '.$opts['bottoken'].RN.
'Idempotency-Key'.md5($opts['bothost'].$now.rand(1000,9999)).RN.
'Content-type: application/x-www-form-urlencoded'.RN,
'method'=>'POST',
'content'=>http_build_query([
@ -840,16 +854,16 @@ if ($opts['sendtoot'] && $instanswered && $idata['IsMastodon'] && $idata['FirstS
$context=stream_context_create($context);
$res=@file_get_contents($endpoint,false,$context);
if ($res===false) {
eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, could not connect to «'.$iniarr['bothost'].'».'.N);
eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, could not connect to «'.$opts['bothost'].'».'.N);
} else {
$httprsc=gethttpcode($http_response_header);
$res=@json_decode($res,true);
if ($httprsc!=200)
eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, «'.$iniarr['bothost'].'» has returned HTTP code «'.$httprsc.'».'.N);
eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, «'.$opts['bothost'].'» has returned HTTP code «'.$httprsc.'».'.N);
elseif ($res===false)
eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, «'.$iniarr['bothost'].'» has not returned valid JSON data.'.N);
eecho(2,'«'.$opts['hostname'].'»: trying to post a toot about this new instance, «'.$opts['bothost'].'» has not returned valid JSON data.'.N);
else
eecho(1,'«'.$opts['hostname'].'»: correctly posted announcement about this new instance on «'.$iniarr['bothost'].'» :-)'.N);
eecho(1,'«'.$opts['hostname'].'»: correctly posted announcement about this new instance on «'.$opts['bothost'].'» :-)'.N);
}
}
@ -863,7 +877,7 @@ if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($id
$offset=$chunk*$limit;
for ($att=0; $att<$opts['udiratts']; $att++) {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch chunk '.($chunk+1).' of users info from directory API (attempt '.($att+1).'/'.$opts['udiratts'].')...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
if ($buf['cont']!==false) {
$xrlr=ckratelimit($buf['headers'],"«{$opts['hostname']}»");
eecho(1,'«'.$opts['hostname'].'»: got chunk '.($chunk+1).' of users info from directory API on attempt '.($att+1).'/'.$opts['udiratts'].' (xrlr: '.$xrlr.') :-)'.N);
@ -885,7 +899,7 @@ if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($id
/*if (!isset($user['noindex'])) {
$user['noindex']=true;
eecho(0,'«'.$opts['hostname'].'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it by fetching users profile page...'.N);
$page=gurl($user['url'],$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']);
$page=gurl($user['url'],$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
// here ckratelimit is not needed because it's a normal web page, not json from mastodon api
if ($page['cont']!==false) {
//<meta content='noindex, noarchive' name='robots'>
@ -906,7 +920,7 @@ if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($id
/*$user['tags']=[];
if (!$user['noindex'] && !is_null($idata['Version']) && $idata['Version']>='3.3.0') {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch tags for user «'.$user['username'].'»...'.N);
$tags=@gurl('https://'.$opts['hostname'].'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']);
$tags=@gurl('https://'.$opts['hostname'].'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
if ($tags['cont']!==false) {
ckratelimit($tags['headers'],"«{$opts['hostname']}»");
$tags=@json_decode($tags['cont'],true);
@ -1093,7 +1107,7 @@ function truncn($num,$tab,$col,$ctx) {
notify($ctx.': function «truncn»: expecting a number, got something else; returning «0».',3);
$num=0;
}
return($num);
return $num;
}
function nocrnl($str) {
@ -1102,12 +1116,12 @@ function nocrnl($str) {
function b2i($bool) {
($bool) ? $r=1 : $r=0;
return($r);
return $r;
}
function isempty($str) {
(preg_match('/^\s*$/',$str)===1) ? $r=true : $r=false;
return($r);
return $r;
}
function notify($msg,$lev,$doecho=true) {
@ -1196,7 +1210,7 @@ function ckratelimit($headers,$ctx) {
*/
function get_api($host, $path) {
global $opts;
$buf = @gurl('https://'.$host.$path,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']);
$buf = @gurl('https://'.$host.$path,$opts['conntimeout'],$opts['functimeout'],['Accept: application/json'],$opts['proxy']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers'],"«{$host}»");
$data = json_decode($buf['cont'], true);