Added “functimeout” for “gurl”; updated “ght” calls

This commit is contained in:
pezcurrel 2023-12-31 20:29:31 +01:00
parent 954c351fcf
commit e0a3c8cfa7

View file

@ -48,7 +48,8 @@ if (function_exists('pcntl_signal')) {
$opts=[
'hostname'=>null,
'timeout'=>10,
'conntimeout'=>10,
'functimeout'=>20,
'ldtoots'=>40,// number of toots to check with the automatic language detection function
'dryrun'=>false,
'fetchusers'=>false,
@ -59,8 +60,6 @@ $opts=[
$msglevs=['Debug', 'Info', 'Warning', 'Error', 'None'];
$ghtsa=[[' day',' days'],[' hour',' hours'],[' minute',' minutes'],[' second',' seconds']];
$help='SYNOPSIS
'.SNAME.' <hostname> [options]
@ -88,11 +87,15 @@ OPTIONS
This option defines how long the script will wait after each failed attempt
at fetching a chunk of users info from the profile directory (see above)
before retrying.
DEFAULT: '.ght($opts['udirfailst'],$ghtsa).'
-t, --timeout <time>
DEFAULT: '.ght($opts['udirfailst'],null,0).'
-t, --conntimeout <time>
Sets the timeout for every connection attempt. See section «TIME
SPECIFICATION» below to see how to specify time.
DEFAULT: '.ght($opts['timeout'],$ghtsa).'
DEFAULT: '.ght($opts['conntimeout'],null,0).'
-T, --functimeout <time>
Sets the timeout for every downloa. See section «TIME SPECIFICATION» below
to see how to specify time.
DEFAULT: '.ght($opts['functimeout'],null,0).'
-d, --dryrun
If this option is set, the script wont write anything in the database.
-m, --minmsgimplev <«debug»|«info»|«warning»|«error»|«none»>
@ -132,11 +135,11 @@ for ($i=1; $i<$argc; $i++) {
mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
$i++;
$opts['udirfailst']=parsetime($argv[$i]);
} elseif ($argv[$i]=='-t' || $argv[$i]=='--timeout') {
} elseif ($argv[$i]=='-t' || $argv[$i]=='--conntimeout') {
if ($i+1>=$argc || parsetime($argv[$i+1])===false)
mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
$i++;
$opts['timeout']=parsetime($argv[$i]);
$opts['conntimeout']=parsetime($argv[$i]);
} elseif ($argv[$i]=='-l' || $argv[$i]=='--ldtoots') {
if ($i+1>=$argc || preg_match('/^\d+$/',$argv[$i+1])!==1 || $argv[$i+1]+0>40 || $argv[$i+1]+0<10)
mexit('option «'.$argv[$i].'» requires a number >= 10 and <= 40 as an argument (use «-h» to read help).'.N,1);
@ -226,10 +229,10 @@ if ($count>1) {
$oidata=null;
}
eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on https...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['timeout']);
$buf=@gurl('https://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']===false) {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on http...'.N);
$buf=@gurl('http://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['timeout']);
$buf=@gurl('http://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$conn['functimeout']);
}
if ($buf['cont']!==false) {
$buf=@json_decode($buf['cont'],true);
@ -250,7 +253,7 @@ if ($buf['cont']!==false) {
$niref=array_shift($nirefs);
eecho(1,'«'.$opts['hostname'].'»: got and successfully parsed nodeinfo specs :-)'.N);
eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo data...'.N);
$buf=@gurl($niref,$opts['timeout']);
$buf=@gurl($niref,$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']!==false) {
$buf=@json_decode($buf['cont'],true);
if (is_array($buf)) {
@ -295,7 +298,7 @@ if ($buf['cont']!==false) {
}
if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4.0.0') {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v2...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v2/instance',$opts['timeout']);
$buf=@gurl('https://'.$opts['hostname'].'/api/v2/instance',$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers']);
$buf=@json_decode($buf['cont'],true);
@ -356,7 +359,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4
eecho(2,'«'.$opts['hostname'].'»: could not fetch instance info from API v2: '.$buf['emsg'].'.'.N);
}
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance extended description from API v1...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/extended_description',$opts['timeout']);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/extended_description',$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers']);
$buf=@json_decode($buf['cont'],true);
@ -372,7 +375,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4
eecho(2,'«'.$opts['hostname'].'»: could not fetch instance extended description from API v1: '.$buf['emsg'].'.'.N);
}
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance domain blocks from API v1...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/domain_blocks',$opts['timeout']);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/domain_blocks',$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers']);
$buf=@json_decode($buf['cont'],true);
@ -406,7 +409,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4
}
} else {// we still try to fetch instance info from api v1, if ver. < 4.0.0, since it could be a mastodon instance older than 2.1.2, when nodeinfo was introduced
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v1...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance',$opts['timeout']);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance',$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers']);
$buf=@json_decode($buf['cont'],true);
@ -484,7 +487,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4
if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='2.1.2') {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance activity info from API v1...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/activity',$opts['timeout']);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/activity',$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers']);
$buf=@json_decode($buf['cont'],true);
@ -503,7 +506,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='3
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance tags trends info from API v1...'.N);
$url='https://'.$opts['hostname'].'/api/v1/trends';
if ($idata['Version']>='3.5.0') $url.='/tags';
$buf=@gurl($url,$opts['timeout']);
$buf=@gurl($url,$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers']);
$buf=@json_decode($buf['cont'],true);
@ -758,7 +761,7 @@ if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($id
$offset=$chunk*$limit;
for ($att=0; $att<$opts['udiratts']; $att++) {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch chunk '.($chunk+1).' of users info from directory API (attempt '.($att+1).'/'.$opts['udiratts'].')...'.N);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['timeout']);
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']!==false) {
$xrlr=ckratelimit($buf['headers']);
eecho(1,'«'.$opts['hostname'].'»: got chunk '.($chunk+1).' of users info from directory API on attempt '.($att+1).'/'.$opts['udiratts'].' (xrlr: '.$xrlr.') :-)'.N);
@ -780,7 +783,7 @@ if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($id
/*if (!isset($user['noindex'])) {
$user['noindex']=true;
eecho(0,'«'.$opts['hostname'].'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it by fetching users profile page...'.N);
$page=gurl($user['url'],$opts['timeout']);
$page=gurl($user['url'],$opts['conntimeout'],$conn['functimeout']);
// here ckratelimit is not needed because it's a normal web page, not json from mastodon api
if ($page['cont']!==false) {
//<meta content='noindex, noarchive' name='robots'>
@ -801,7 +804,7 @@ if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($id
/*$user['tags']=[];
if (!$user['noindex'] && !is_null($idata['Version']) && $idata['Version']>='3.3.0') {
eecho(0,'«'.$opts['hostname'].'»: trying to fetch tags for user «'.$user['username'].'»...'.N);
$tags=@gurl('https://'.$opts['hostname'].'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['timeout']);
$tags=@gurl('https://'.$opts['hostname'].'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['conntimeout'],$conn['functimeout']);
if ($tags['cont']!==false) {
ckratelimit($tags['headers']);
$tags=@json_decode($tags['cont'],true);
@ -835,7 +838,7 @@ if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($id
eecho(2,'«'.$opts['hostname'].'»: last attempt ('.($att+1).'/'.$opts['udiratts'].') on chunk '.($chunk+1).' failed; i give up.'.N);
$end=true;
} else {
eecho(2,'«'.$opts['hostname'].'»: attempt '.($att+1).'/'.$opts['udiratts'].' on chunk '.($chunk+1).' failed; sleeping for '.ght($opts['udirfailst'],$ghtsa).' before retrying.'.N);
eecho(2,'«'.$opts['hostname'].'»: attempt '.($att+1).'/'.$opts['udiratts'].' on chunk '.($chunk+1).' failed; sleeping for '.ght($opts['udirfailst'],null,0).' before retrying.'.N);
sleep($opts['udirfailst']);
}
}
@ -1066,7 +1069,7 @@ function ckratelimit($httpresphead) {
if (isset($headers['date']) && isset($headers['x-ratelimit-reset']) && isset($headers['x-ratelimit-remaining'])) {
if ($headers['x-ratelimit-remaining']==0) {
$stosl=strtotime($headers['x-ratelimit-reset'])-strtotime($headers['date'])+1;
eecho(2,'reached rate limit, sleeping for '.ght($stosl).' ...'.N);
eecho(2,'reached rate limit, sleeping for '.ght($stosl,null,0).' ...'.N);
sleep($stosl);
}
return($headers['x-ratelimit-remaining']);
@ -1090,7 +1093,7 @@ function ckratelimit($httpresphead) {
*/
function get_api($host, $path) {
global $opts;
$buf = @gurl('https://'.$host.$path,$opts['timeout']);
$buf = @gurl('https://'.$host.$path,$opts['conntimeout'],$conn['functimeout']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers']);
$data = json_decode($buf['cont'], true);