Added “functimeout” for “gurl”; updated “ght” calls
This commit is contained in:
parent
954c351fcf
commit
e0a3c8cfa7
1 changed files with 26 additions and 23 deletions
|
@ -48,7 +48,8 @@ if (function_exists('pcntl_signal')) {
|
|||
|
||||
$opts=[
|
||||
'hostname'=>null,
|
||||
'timeout'=>10,
|
||||
'conntimeout'=>10,
|
||||
'functimeout'=>20,
|
||||
'ldtoots'=>40,// number of toots to check with the automatic language detection function
|
||||
'dryrun'=>false,
|
||||
'fetchusers'=>false,
|
||||
|
@ -59,8 +60,6 @@ $opts=[
|
|||
|
||||
$msglevs=['Debug', 'Info', 'Warning', 'Error', 'None'];
|
||||
|
||||
$ghtsa=[[' day',' days'],[' hour',' hours'],[' minute',' minutes'],[' second',' seconds']];
|
||||
|
||||
$help='SYNOPSIS
|
||||
|
||||
'.SNAME.' <hostname> [options]
|
||||
|
@ -88,11 +87,15 @@ OPTIONS
|
|||
This option defines how long the script will wait after each failed attempt
|
||||
at fetching a chunk of users’ info from the profile directory (see above)
|
||||
before retrying.
|
||||
DEFAULT: '.ght($opts['udirfailst'],$ghtsa).'
|
||||
-t, --timeout <time>
|
||||
DEFAULT: '.ght($opts['udirfailst'],null,0).'
|
||||
-t, --conntimeout <time>
|
||||
Sets the timeout for every connection attempt. See section «TIME
|
||||
SPECIFICATION» below to see how to specify time.
|
||||
DEFAULT: '.ght($opts['timeout'],$ghtsa).'
|
||||
DEFAULT: '.ght($opts['conntimeout'],null,0).'
|
||||
-T, --functimeout <time>
|
||||
Sets the timeout for every downloa. See section «TIME SPECIFICATION» below
|
||||
to see how to specify time.
|
||||
DEFAULT: '.ght($opts['functimeout'],null,0).'
|
||||
-d, --dryrun
|
||||
If this option is set, the script won’t write anything in the database.
|
||||
-m, --minmsgimplev <«debug»|«info»|«warning»|«error»|«none»>
|
||||
|
@ -132,11 +135,11 @@ for ($i=1; $i<$argc; $i++) {
|
|||
mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
|
||||
$i++;
|
||||
$opts['udirfailst']=parsetime($argv[$i]);
|
||||
} elseif ($argv[$i]=='-t' || $argv[$i]=='--timeout') {
|
||||
} elseif ($argv[$i]=='-t' || $argv[$i]=='--conntimeout') {
|
||||
if ($i+1>=$argc || parsetime($argv[$i+1])===false)
|
||||
mexit('option «'.$argv[$i].'» requires a time specification as an argument (use «-h» to read help).'.N,1);
|
||||
$i++;
|
||||
$opts['timeout']=parsetime($argv[$i]);
|
||||
$opts['conntimeout']=parsetime($argv[$i]);
|
||||
} elseif ($argv[$i]=='-l' || $argv[$i]=='--ldtoots') {
|
||||
if ($i+1>=$argc || preg_match('/^\d+$/',$argv[$i+1])!==1 || $argv[$i+1]+0>40 || $argv[$i+1]+0<10)
|
||||
mexit('option «'.$argv[$i].'» requires a number >= 10 and <= 40 as an argument (use «-h» to read help).'.N,1);
|
||||
|
@ -226,10 +229,10 @@ if ($count>1) {
|
|||
$oidata=null;
|
||||
}
|
||||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on https...'.N);
|
||||
$buf=@gurl('https://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['timeout']);
|
||||
$buf=@gurl('https://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$conn['functimeout']);
|
||||
if ($buf['cont']===false) {
|
||||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo specs on http...'.N);
|
||||
$buf=@gurl('http://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['timeout']);
|
||||
$buf=@gurl('http://'.$opts['hostname'].'/.well-known/nodeinfo',$opts['conntimeout'],$conn['functimeout']);
|
||||
}
|
||||
if ($buf['cont']!==false) {
|
||||
$buf=@json_decode($buf['cont'],true);
|
||||
|
@ -250,7 +253,7 @@ if ($buf['cont']!==false) {
|
|||
$niref=array_shift($nirefs);
|
||||
eecho(1,'«'.$opts['hostname'].'»: got and successfully parsed nodeinfo specs :-)'.N);
|
||||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch nodeinfo data...'.N);
|
||||
$buf=@gurl($niref,$opts['timeout']);
|
||||
$buf=@gurl($niref,$opts['conntimeout'],$conn['functimeout']);
|
||||
if ($buf['cont']!==false) {
|
||||
$buf=@json_decode($buf['cont'],true);
|
||||
if (is_array($buf)) {
|
||||
|
@ -295,7 +298,7 @@ if ($buf['cont']!==false) {
|
|||
}
|
||||
if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4.0.0') {
|
||||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v2...'.N);
|
||||
$buf=@gurl('https://'.$opts['hostname'].'/api/v2/instance',$opts['timeout']);
|
||||
$buf=@gurl('https://'.$opts['hostname'].'/api/v2/instance',$opts['conntimeout'],$conn['functimeout']);
|
||||
if ($buf['cont']!==false) {
|
||||
ckratelimit($buf['headers']);
|
||||
$buf=@json_decode($buf['cont'],true);
|
||||
|
@ -356,7 +359,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4
|
|||
eecho(2,'«'.$opts['hostname'].'»: could not fetch instance info from API v2: '.$buf['emsg'].'.'.N);
|
||||
}
|
||||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance extended description from API v1...'.N);
|
||||
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/extended_description',$opts['timeout']);
|
||||
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/extended_description',$opts['conntimeout'],$conn['functimeout']);
|
||||
if ($buf['cont']!==false) {
|
||||
ckratelimit($buf['headers']);
|
||||
$buf=@json_decode($buf['cont'],true);
|
||||
|
@ -372,7 +375,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4
|
|||
eecho(2,'«'.$opts['hostname'].'»: could not fetch instance extended description from API v1: '.$buf['emsg'].'.'.N);
|
||||
}
|
||||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance domain blocks from API v1...'.N);
|
||||
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/domain_blocks',$opts['timeout']);
|
||||
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/domain_blocks',$opts['conntimeout'],$conn['functimeout']);
|
||||
if ($buf['cont']!==false) {
|
||||
ckratelimit($buf['headers']);
|
||||
$buf=@json_decode($buf['cont'],true);
|
||||
|
@ -406,7 +409,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4
|
|||
}
|
||||
} else {// we still try to fetch instance info from api v1, if ver. < 4.0.0, since it could be a mastodon instance older than 2.1.2, when nodeinfo was introduced
|
||||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance info from API v1...'.N);
|
||||
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance',$opts['timeout']);
|
||||
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance',$opts['conntimeout'],$conn['functimeout']);
|
||||
if ($buf['cont']!==false) {
|
||||
ckratelimit($buf['headers']);
|
||||
$buf=@json_decode($buf['cont'],true);
|
||||
|
@ -484,7 +487,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='4
|
|||
|
||||
if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='2.1.2') {
|
||||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance activity info from API v1...'.N);
|
||||
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/activity',$opts['timeout']);
|
||||
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/instance/activity',$opts['conntimeout'],$conn['functimeout']);
|
||||
if ($buf['cont']!==false) {
|
||||
ckratelimit($buf['headers']);
|
||||
$buf=@json_decode($buf['cont'],true);
|
||||
|
@ -503,7 +506,7 @@ if ($idata['IsMastodon'] && !is_null($idata['Version']) && $idata['Version']>='3
|
|||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch instance tags trends info from API v1...'.N);
|
||||
$url='https://'.$opts['hostname'].'/api/v1/trends';
|
||||
if ($idata['Version']>='3.5.0') $url.='/tags';
|
||||
$buf=@gurl($url,$opts['timeout']);
|
||||
$buf=@gurl($url,$opts['conntimeout'],$conn['functimeout']);
|
||||
if ($buf['cont']!==false) {
|
||||
ckratelimit($buf['headers']);
|
||||
$buf=@json_decode($buf['cont'],true);
|
||||
|
@ -758,7 +761,7 @@ if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($id
|
|||
$offset=$chunk*$limit;
|
||||
for ($att=0; $att<$opts['udiratts']; $att++) {
|
||||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch chunk '.($chunk+1).' of users info from directory API (attempt '.($att+1).'/'.$opts['udiratts'].')...'.N);
|
||||
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['timeout']);
|
||||
$buf=@gurl('https://'.$opts['hostname'].'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['conntimeout'],$conn['functimeout']);
|
||||
if ($buf['cont']!==false) {
|
||||
$xrlr=ckratelimit($buf['headers']);
|
||||
eecho(1,'«'.$opts['hostname'].'»: got chunk '.($chunk+1).' of users info from directory API on attempt '.($att+1).'/'.$opts['udiratts'].' (xrlr: '.$xrlr.') :-)'.N);
|
||||
|
@ -780,7 +783,7 @@ if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($id
|
|||
/*if (!isset($user['noindex'])) {
|
||||
$user['noindex']=true;
|
||||
eecho(0,'«'.$opts['hostname'].'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it by fetching user’s profile page...'.N);
|
||||
$page=gurl($user['url'],$opts['timeout']);
|
||||
$page=gurl($user['url'],$opts['conntimeout'],$conn['functimeout']);
|
||||
// here ckratelimit is not needed because it's a normal web page, not json from mastodon api
|
||||
if ($page['cont']!==false) {
|
||||
//<meta content='noindex, noarchive' name='robots'>
|
||||
|
@ -801,7 +804,7 @@ if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($id
|
|||
/*$user['tags']=[];
|
||||
if (!$user['noindex'] && !is_null($idata['Version']) && $idata['Version']>='3.3.0') {
|
||||
eecho(0,'«'.$opts['hostname'].'»: trying to fetch tags for user «'.$user['username'].'»...'.N);
|
||||
$tags=@gurl('https://'.$opts['hostname'].'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['timeout']);
|
||||
$tags=@gurl('https://'.$opts['hostname'].'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['conntimeout'],$conn['functimeout']);
|
||||
if ($tags['cont']!==false) {
|
||||
ckratelimit($tags['headers']);
|
||||
$tags=@json_decode($tags['cont'],true);
|
||||
|
@ -835,7 +838,7 @@ if ($instanswered && $opts['fetchusers'] && $idata['IsMastodon'] && !is_null($id
|
|||
eecho(2,'«'.$opts['hostname'].'»: last attempt ('.($att+1).'/'.$opts['udiratts'].') on chunk '.($chunk+1).' failed; i give up.'.N);
|
||||
$end=true;
|
||||
} else {
|
||||
eecho(2,'«'.$opts['hostname'].'»: attempt '.($att+1).'/'.$opts['udiratts'].' on chunk '.($chunk+1).' failed; sleeping for '.ght($opts['udirfailst'],$ghtsa).' before retrying.'.N);
|
||||
eecho(2,'«'.$opts['hostname'].'»: attempt '.($att+1).'/'.$opts['udiratts'].' on chunk '.($chunk+1).' failed; sleeping for '.ght($opts['udirfailst'],null,0).' before retrying.'.N);
|
||||
sleep($opts['udirfailst']);
|
||||
}
|
||||
}
|
||||
|
@ -1066,7 +1069,7 @@ function ckratelimit($httpresphead) {
|
|||
if (isset($headers['date']) && isset($headers['x-ratelimit-reset']) && isset($headers['x-ratelimit-remaining'])) {
|
||||
if ($headers['x-ratelimit-remaining']==0) {
|
||||
$stosl=strtotime($headers['x-ratelimit-reset'])-strtotime($headers['date'])+1;
|
||||
eecho(2,'reached rate limit, sleeping for '.ght($stosl).' ...'.N);
|
||||
eecho(2,'reached rate limit, sleeping for '.ght($stosl,null,0).' ...'.N);
|
||||
sleep($stosl);
|
||||
}
|
||||
return($headers['x-ratelimit-remaining']);
|
||||
|
@ -1090,7 +1093,7 @@ function ckratelimit($httpresphead) {
|
|||
*/
|
||||
function get_api($host, $path) {
|
||||
global $opts;
|
||||
$buf = @gurl('https://'.$host.$path,$opts['timeout']);
|
||||
$buf = @gurl('https://'.$host.$path,$opts['conntimeout'],$conn['functimeout']);
|
||||
if ($buf['cont']!==false) {
|
||||
ckratelimit($buf['headers']);
|
||||
$data = json_decode($buf['cont'], true);
|
||||
|
|
Loading…
Reference in a new issue