Added “functimeout” for “gurl”; updated “ght” calls

This commit is contained in:
pezcurrel 2023-12-31 20:29:31 +01:00
parent 954c351fcf
commit e0a3c8cfa7

View file

@ -48,7 +48,8 @@ if (function_exists('pcntl_signal')) {
$opts=[ $opts=[
'hostname'=>null, 'hostname'=>null,
'timeout'=>10, 'conntimeout'=>10,
'functimeout'=>20,
'ldtoots'=>40,// number of toots to check with the automatic language detection function 'ldtoots'=>40,// number of toots to check with the automatic language detection function
'dryrun'=>false, 'dryrun'=>false,
'fetchusers'=>false, 'fetchusers'=>false,
@ -59,8 +60,6 @@ $opts=[
$msglevs=['Debug', 'Info', 'Warning', 'Error', 'None']; $msglevs=['Debug', 'Info', 'Warning', 'Error', 'None'];
$ghtsa=[[' day',' days'],[' hour',' hours'],[' minute',' minutes'],[' second',' seconds']];
$help='SYNOPSIS $help='SYNOPSIS
'.SNAME.' <hostname> [options] '.SNAME.' <hostname> [options]
@ -88,11 +87,15 @@ OPTIONS
This option defines how long the script will wait after each failed attempt This option defines how long the script will wait after each failed attempt
at fetching a chunk of users info from the profile directory (see above) at fetching a chunk of users info from the profile directory (see above)
before retrying. before retrying.
DEFAULT: '.ght($opts['udirfailst'],$ghtsa).' DEFAULT: '.ght($opts['udirfailst'],null,0).'
-t, --timeout <time> -t, --conntimeout <time>
Sets the timeout for every connection attempt. See section «TIME Sets the timeout for every connection attempt. See section «TIME
SPECIFICATION» below to see how to specify time. SPECIFICATION» below to see how to specify time.
DEFAULT: '.ght($opts['timeout'],$ghtsa).' DEFAULT: '.ght($opts['conntimeout'],null,0).'
-T, --functimeout <time>
Sets the timeout for every downloa. See section «TIME SPECIFICATION» below
to see how to specify time.
DEFAULT: '.ght($opts['functimeout'],null,0).'
-d, --dryrun -d, --dryrun
If this option is set, the script wont write anything in the database. If this option is set, the script wont write anything in the database.
-m, --minmsgimplev <«debug»|«info»|«warning»|«error»|«none»> -m, --minmsgimplev <«debug»|«info»|«warning»|«error»|«none»>
@ -132,11 +135,11 @@ for ($i=1; $i<$argc; $i++) {
mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1); mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
$i++; $i++;
$opts['udirfailst']=parsetime($argv[$i]); $opts['udirfailst']=parsetime($argv[$i]);
} elseif ($argv[$i]=='-t' || $argv[$i]=='--timeout') { } elseif ($argv[$i]=='-t' || $argv[$i]=='--conntimeout') {
if ($i+1>=$argc || parsetime($argv[$i+1])===false) if ($i+1>=$argc || parsetime($argv[$i+1])===false)
mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1); mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
$i++; $i++;
$opts['timeout']=parsetime($argv[$i]); $opts['conntimeout']=parsetime($argv[$i]);
} elseif ($argv[$i]=='-l' || $argv[$i]=='--ldtoots') { } elseif ($argv[$i]=='-l' || $argv[$i]=='--ldtoots') {
if ($i+1>=$argc || preg_match('/^\d+$/',$argv[$i+1])!==1 || $argv[$i+1]+0>40 || $argv[$i+1]+0<10) if ($i+1>=$argc || preg_match('/^\d+$/',$argv[$i+1])!==1 || $argv[$i+1]+0>40 || $argv[$i+1]+0<10)
mexit('option «'.$argv[$i].'» requires a number >= 10 and <= 40 as an argument (use «-h» to read help).'.N,1); mexit('option «'.$argv[$i].'» requires a number >= 10 and <= 40 as an argument (use «-h» to read help).'.N,1);
@ -226,10 +229,10 @@ if ($count>1) {
$oidata=null; $oidata=null;
} }
eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on https...'.N); eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on https...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['timeout']); $buf=@gurl('https://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']===false) { if ($buf['cont']===false) {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on http...'.N); eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on http...'.N);
$buf=@gurl('http://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['timeout']); $buf=@gurl('http://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$conn['functimeout']);
} }
if ($buf['cont']!==false) { if ($buf['cont']!==false) {
$buf=@json_decode($buf['cont'],true); $buf=@json_decode($buf['cont'],true);
@ -250,7 +253,7 @@ if ($buf['cont']!==false) {
$niref=array_shift($nirefs); $niref=array_shift($nirefs);
eecho(1,'«'.$opts['hostname'].'»: got and successfully parsed nodeinfo specs :-)'.N); eecho(1,'«'.$opts['hostname'].'»: got and successfully parsed nodeinfo specs :-)'.N);
eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo data...'.N); eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo data...'.N);
$buf=@gurl($niref,$opts['timeout']); $buf=@gurl($niref,$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']!==false) { if ($buf['cont']!==false) {
$buf=@json_decode($buf['cont'],true); $buf=@json_decode($buf['cont'],true);
if (is_array($buf)) { if (is_array($buf)) {
@ -295,7 +298,7 @@ if ($buf['cont']!==false) {
} }
if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4.0.0') { if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4.0.0') {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v2...'.N); eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v2...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v2/instance',$opts['timeout']); $buf=@gurl('https://'.$opts['hostname'].'/api/v2/instance',$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']!==false) { if ($buf['cont']!==false) {
ckratelimit($buf['headers']); ckratelimit($buf['headers']);
$buf=@json_decode($buf['cont'],true); $buf=@json_decode($buf['cont'],true);
@ -356,7 +359,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4
eecho(2,'«'.$opts['hostname'].'»: could not fetch instance info from API v2: '.$buf['emsg'].'.'.N); eecho(2,'«'.$opts['hostname'].'»: could not fetch instance info from API v2: '.$buf['emsg'].'.'.N);
} }
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance extended description from API v1...'.N); eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance extended description from API v1...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/extended_description',$opts['timeout']); $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/extended_description',$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']!==false) { if ($buf['cont']!==false) {
ckratelimit($buf['headers']); ckratelimit($buf['headers']);
$buf=@json_decode($buf['cont'],true); $buf=@json_decode($buf['cont'],true);
@ -372,7 +375,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4
eecho(2,'«'.$opts['hostname'].'»: could not fetch instance extended description from API v1: '.$buf['emsg'].'.'.N); eecho(2,'«'.$opts['hostname'].'»: could not fetch instance extended description from API v1: '.$buf['emsg'].'.'.N);
} }
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance domain blocks from API v1...'.N); eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance domain blocks from API v1...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/domain_blocks',$opts['timeout']); $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/domain_blocks',$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']!==false) { if ($buf['cont']!==false) {
ckratelimit($buf['headers']); ckratelimit($buf['headers']);
$buf=@json_decode($buf['cont'],true); $buf=@json_decode($buf['cont'],true);
@ -406,7 +409,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4
} }
} else {// we still try to fetch instance info from api v1, if ver. < 4.0.0, since it could be a mastodon instance older than 2.1.2, when nodeinfo was introduced } else {// we still try to fetch instance info from api v1, if ver. < 4.0.0, since it could be a mastodon instance older than 2.1.2, when nodeinfo was introduced
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v1...'.N); eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v1...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance',$opts['timeout']); $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance',$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']!==false) { if ($buf['cont']!==false) {
ckratelimit($buf['headers']); ckratelimit($buf['headers']);
$buf=@json_decode($buf['cont'],true); $buf=@json_decode($buf['cont'],true);
@ -484,7 +487,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4
if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='2.1.2') { if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='2.1.2') {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance activity info from API v1...'.N); eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance activity info from API v1...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/activity',$opts['timeout']); $buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/activity',$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']!==false) { if ($buf['cont']!==false) {
ckratelimit($buf['headers']); ckratelimit($buf['headers']);
$buf=@json_decode($buf['cont'],true); $buf=@json_decode($buf['cont'],true);
@ -503,7 +506,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='3
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance tags trends info from API v1...'.N); eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance tags trends info from API v1...'.N);
$url='https://'.$opts['hostname'].'/api/v1/trends'; $url='https://'.$opts['hostname'].'/api/v1/trends';
if ($idata['Version']>='3.5.0') $url.='/tags'; if ($idata['Version']>='3.5.0') $url.='/tags';
$buf=@gurl($url,$opts['timeout']); $buf=@gurl($url,$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']!==false) { if ($buf['cont']!==false) {
ckratelimit($buf['headers']); ckratelimit($buf['headers']);
$buf=@json_decode($buf['cont'],true); $buf=@json_decode($buf['cont'],true);
@ -758,7 +761,7 @@ if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($id
$offset=$chunk*$limit; $offset=$chunk*$limit;
for ($att=0; $att<$opts['udiratts']; $att++) { for ($att=0; $att<$opts['udiratts']; $att++) {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch chunk '.($chunk+1).' of users info from directory API (attempt '.($att+1).'/'.$opts['udiratts'].')...'.N); eecho(0,'«'.$opts['hostname'].'»: trying to fetch chunk '.($chunk+1).' of users info from directory API (attempt '.($att+1).'/'.$opts['udiratts'].')...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['timeout']); $buf=@gurl('https://'.$opts['hostname'].'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']!==false) { if ($buf['cont']!==false) {
$xrlr=ckratelimit($buf['headers']); $xrlr=ckratelimit($buf['headers']);
eecho(1,'«'.$opts['hostname'].'»: got chunk '.($chunk+1).' of users info from directory API on attempt '.($att+1).'/'.$opts['udiratts'].' (xrlr: '.$xrlr.') :-)'.N); eecho(1,'«'.$opts['hostname'].'»: got chunk '.($chunk+1).' of users info from directory API on attempt '.($att+1).'/'.$opts['udiratts'].' (xrlr: '.$xrlr.') :-)'.N);
@ -780,7 +783,7 @@ if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($id
/*if (!isset($user['noindex'])) { /*if (!isset($user['noindex'])) {
$user['noindex']=true; $user['noindex']=true;
eecho(0,'«'.$opts['hostname'].'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it by fetching users profile page...'.N); eecho(0,'«'.$opts['hostname'].'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it by fetching users profile page...'.N);
$page=gurl($user['url'],$opts['timeout']); $page=gurl($user['url'],$opts['conntimeout'],$conn['functimeout']);
// here ckratelimit is not needed because it's a normal web page, not json from mastodon api // here ckratelimit is not needed because it's a normal web page, not json from mastodon api
if ($page['cont']!==false) { if ($page['cont']!==false) {
//<meta content='noindex, noarchive' name='robots'> //<meta content='noindex, noarchive' name='robots'>
@ -801,7 +804,7 @@ if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($id
/*$user['tags']=[]; /*$user['tags']=[];
if (!$user['noindex'] && !is_null($idata['Version']) && $idata['Version']>='3.3.0') { if (!$user['noindex'] && !is_null($idata['Version']) && $idata['Version']>='3.3.0') {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch tags for user «'.$user['username'].'»...'.N); eecho(0,'«'.$opts['hostname'].'»: trying to fetch tags for user «'.$user['username'].'»...'.N);
$tags=@gurl('https://'.$opts['hostname'].'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['timeout']); $tags=@gurl('https://'.$opts['hostname'].'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['conntimeout'],$conn['functimeout']);
if ($tags['cont']!==false) { if ($tags['cont']!==false) {
ckratelimit($tags['headers']); ckratelimit($tags['headers']);
$tags=@json_decode($tags['cont'],true); $tags=@json_decode($tags['cont'],true);
@ -835,7 +838,7 @@ if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($id
eecho(2,'«'.$opts['hostname'].'»: last attempt ('.($att+1).'/'.$opts['udiratts'].') on chunk '.($chunk+1).' failed; i give up.'.N); eecho(2,'«'.$opts['hostname'].'»: last attempt ('.($att+1).'/'.$opts['udiratts'].') on chunk '.($chunk+1).' failed; i give up.'.N);
$end=true; $end=true;
} else { } else {
eecho(2,'«'.$opts['hostname'].'»: attempt '.($att+1).'/'.$opts['udiratts'].' on chunk '.($chunk+1).' failed; sleeping for '.ght($opts['udirfailst'],$ghtsa).' before retrying.'.N); eecho(2,'«'.$opts['hostname'].'»: attempt '.($att+1).'/'.$opts['udiratts'].' on chunk '.($chunk+1).' failed; sleeping for '.ght($opts['udirfailst'],null,0).' before retrying.'.N);
sleep($opts['udirfailst']); sleep($opts['udirfailst']);
} }
} }
@ -1066,7 +1069,7 @@ function ckratelimit($httpresphead) {
if (isset($headers['date']) && isset($headers['x-ratelimit-reset']) && isset($headers['x-ratelimit-remaining'])) { if (isset($headers['date']) && isset($headers['x-ratelimit-reset']) && isset($headers['x-ratelimit-remaining'])) {
if ($headers['x-ratelimit-remaining']==0) { if ($headers['x-ratelimit-remaining']==0) {
$stosl=strtotime($headers['x-ratelimit-reset'])-strtotime($headers['date'])+1; $stosl=strtotime($headers['x-ratelimit-reset'])-strtotime($headers['date'])+1;
eecho(2,'reached rate limit, sleeping for '.ght($stosl).' ...'.N); eecho(2,'reached rate limit, sleeping for '.ght($stosl,null,0).' ...'.N);
sleep($stosl); sleep($stosl);
} }
return($headers['x-ratelimit-remaining']); return($headers['x-ratelimit-remaining']);
@ -1090,7 +1093,7 @@ function ckratelimit($httpresphead) {
*/ */
function get_api($host, $path) { function get_api($host, $path) {
global $opts; global $opts;
$buf = @gurl('https://'.$host.$path,$opts['timeout']); $buf = @gurl('https://'.$host.$path,$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']!==false) { if ($buf['cont']!==false) {
ckratelimit($buf['headers']); ckratelimit($buf['headers']);
$data = json_decode($buf['cont'], true); $data = json_decode($buf['cont'], true);