Added ckratelimit() where useful; made it more flexible with lowercasing every header key; more work on fetching users from users directories

This commit is contained in:
pezcurrel 2022-12-09 22:53:18 +01:00
parent 8341f0e209
commit 18ce06871b

View file

@ -400,6 +400,7 @@ function get_api($host, $path) {
global $opts;
$buf = @getfc('https://'.$host.$path,$opts['timeout']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers']);
$data = json_decode($buf['cont'], true);
return $data;
} else {
@ -646,18 +647,20 @@ while ($i<$cinsts) {
$tela=$now-$tini;
eecho(1,'working on «'.$host.'»; '.$i.'/'.$cinsts.'; '.$qok.' ok; '.$qgood.' good; '.round(100/$cinsts*$i).'%; elapsed time: '.ght($tela,null,0).'; estimated remaining time: '.ght($tela/$i*($cinsts-$beg)-$tela,null,0).'; mem.: '.ghs(memory_get_usage(true)).'; mem. peak: '.ghs(memory_get_peak_usage(true)).N);
if (willtrunc($host,'Instances','URI')) {
eecho(2,'ignoring «'.$host. because hostname is too long for the «URI» column of «Instances» table.'.N);
eecho(2,'«'.$host.: ignoring it because hostname is too long for the «URI» column of «Instances» table.'.N);
} else {
eecho(0,'trying to fetch instance info from API...'.N);
eecho(0,'«'.$host.'»: trying to fetch instance info from API...'.N);
$buf=@getfc('https://'.$host.'/api/v1/instance',$opts['timeout']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers']);
$info=@json_decode($buf['cont'],true);
if (is_array($info)) {
eecho(1,'got instance info from API :-)'.N);
eecho(0,'trying to fetch instance info from nodeinfo...'.N);
eecho(1,'«'.$host.'»: got instance info from API :-)'.N);
eecho(0,'«'.$host.'»: trying to fetch instance info from nodeinfo...'.N);
$buf=@getfc('https://'.$host.'/nodeinfo/2.0.json',$opts['timeout']);
if ($buf['cont']!==false) {
eecho(1,'got instance info from nodeinfo :-)'.N);
//ckratelimit($buf['headers']);// no ckratelimit here because nodeinfo doesn't use it
eecho(1,'«'.$host.'»: got instance info from nodeinfo :-)'.N);
$info['x-nodeinfo']=json_decode($buf['cont'],true);
// we should keep an eye to new software names here, to decide if they are mastodon derivates...
if (isset($info['x-nodeinfo']['software']['name']) && !is_null($info['x-nodeinfo']['software']['name'])) {
@ -672,35 +675,36 @@ while ($i<$cinsts) {
}
}
} else {
eecho(2,'could not fetch instance info from nodeinfo: '.$buf['emsg'].N);
eecho(2,'«'.$host.'»: could not fetch instance info from nodeinfo: '.$buf['emsg'].N);
}
if (array_key_exists('version',$info)) {
if ($info['version']>='2.1.2') {
eecho(0,'trying to fetch instance activity info from API...'.N);
eecho(0,'«'.$host.'»: trying to fetch instance activity info from API...'.N);
$buf=@getfc('https://'.$host.'/api/v1/instance/activity',$opts['timeout']);
if ($buf['cont']!==false) {
eecho(1,'got instance activity info from API :-)'.N);
ckratelimit($buf['headers']);
eecho(1,'«'.$host.'»: got instance activity info from API :-)'.N);
$info['x-activity']=json_decode($buf['cont'],true);
} else {
eecho(2,'could not fetch instance activity from API: '.$buf['emsg'].N);
eecho(2,'«'.$host.'»: could not fetch instance activity from API: '.$buf['emsg'].N);
}
}
if ($info['version']>='3.0.0') {
eecho(0,'trying to fetch instance trends info from API...'.N);
eecho(0,'«'.$host.'»: trying to fetch instance trends info from API...'.N);
$buf=@getfc('https://'.$host.'/api/v1/trends',$opts['timeout']);
if ($buf['cont']!==false) {
eecho(1,'got instance trends info from API :-)'.N);
ckratelimit($buf['headers']);
eecho(1,'«'.$host.'»: got instance trends info from API :-)'.N);
$info['x-trends']=json_decode($buf['cont'],true);
} else {
eecho(2,'could not fetch instance trends from API: '.$buf['emsg'].N);
eecho(2,'«'.$host.'»: could not fetch instance trends from API: '.$buf['emsg'].N);
}
if ($opts['fetchusers']) {
$users=[];// array of users in this instance's directory
$res=mysqli_query($link,'SELECT ID, locid, username, OptedOut FROM Users WHERE host=\''.myesc($link,$host).'\'')
or mexit(__LINE__.': '.mysqli_error($link).N,3);
$exusers=[];// array of this instance's users already existing in the db
$res=mysqli_query($link,'SELECT ID, locid, username FROM Users WHERE host=\''.myesc($link,$host).'\'') or mexit(__LINE__.': '.mysqli_error($link).N,3);
while ($row=mysqli_fetch_assoc($res)) $exusers[$row['locid']]=$row;
eecho(0,'trying to fetch users info from directory API...'.N);
$users=[];// array of users in this instance's directory
eecho(0,'«'.$host.'»: trying to fetch users info from directory API...'.N);
$chunk=0;
$limit=80;
$end=false;
@ -709,7 +713,7 @@ while ($i<$cinsts) {
$buf=@getfc('https://'.$host.'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['timeout']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers']);
eecho(1,'got '.($chunk+1).' chunk(s) of users info from directory API :-)'.N);
eecho(1,'«'.$host.'»: got '.($chunk+1).' chunk(s) of users info from directory API :-)'.N);
$buf=@json_decode($buf['cont'],true);
if (is_array($buf)) {
//print_r($buf);
@ -718,67 +722,84 @@ while ($i<$cinsts) {
foreach ($buf as $user) {
if (make(['id', 'username', 'display_name', 'locked', 'bot', 'discoverable', 'created_at', 'note', 'url', 'avatar', 'header', 'statuses_count', 'last_status_at', 'fields'], $user)) {
if (!isset($user['noindex'])) $user['noindex']=false;
$user['tags']=[];
if (!$user['noindex']) {
eecho(0,'«'.$host.'»: trying to fetch tags for user «'.$user['username'].'»...'.N);
$tags=@getfc('https://'.$host.'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['timeout']);
if ($tags['cont']!==false) {
ckratelimit($tags['headers']);
$tags=@json_decode($tags['cont'],true);
if (is_array($tags) && count($tags)>0) {
eecho(0,'«'.$host.'»: got '.count($tags).' tag(s) for user «'.$user['username'].'» :-)'.N);
foreach($tags as $tag) $user['tags'][]=$tag['name'];
}
} else {
eecho(2,'«'.$host.'»: could not fetch tags for user «'.$user['username'].'» :-( ('.$tags['emsg'].').'.N);
}
}
$user['tags']=implode(';',$user['tags']);
if ($user['tags']=='') $user['tags']=null;
if (!is_null($user['created_at'])) $user['created_at']=pgdatetomy($user['created_at']);
if (!is_null($user['last_status_at'])) $user['last_status_at']=datetomy($user['last_status_at']);
$users[$user['id']]=$user;
} else {
eecho(2,'user record missed some required keys :-('.N);
eecho(2,'«'.$host.'»: user record missed some required keys :-('.N);
//print_r($user);
}
}
} else {
eecho(2,'... but the chunk was not good JSON :-('.N);
eecho(2,'«'.$host.'»: ... but the chunk was not good JSON :-('.N);
$end=true;
}
$chunk++;
} else {
eecho(2,'could not fetch users info from directory API: '.$buf['emsg'].N);
eecho(2,'«'.$host.'»: could not fetch users info from directory API: '.$buf['emsg'].N);
$end=true;
}
}
foreach ($users as $locid=>$user) {
$query='host='.myv($link,$host).', locid='.myv($link,$user['id']).', username='.myv($link,$user['username']).', display_name='.myv($link,$user['display_name']).', locked='.myv($link,$user['locked']).', bot='.myv($link,$user['bot']).', discoverable='.myv($link,$user['discoverable']).', created_at='.myv($link,$user['created_at']).', note='.myv($link,$user['note']).', url='.myv($link,$user['url']).', avatar='.myv($link,$user['avatar']).', header='.myv($link,$user['header']).', statuses_count='.myv($link,$user['statuses_count']).', last_status_at='.myv($link,$user['last_status_at']).', noindex='.myv($link,$user['noindex']).', OptedOut=NULL';
$query='SET host='.myv($link,$host).', locid='.myv($link,$user['id']).', username='.myv($link,$user['username']).', display_name='.myv($link,$user['display_name']).', locked='.myv($link,$user['locked']).', bot='.myv($link,$user['bot']).', discoverable='.myv($link,$user['discoverable']).', created_at='.myv($link,$user['created_at']).', note='.myv($link,$user['note']).', url='.myv($link,$user['url']).', avatar='.myv($link,$user['avatar']).', header='.myv($link,$user['header']).', statuses_count='.myv($link,$user['statuses_count']).', last_status_at='.myv($link,$user['last_status_at']).', tags='.myv($link,$user['tags']);
$uid=0;
if (!array_key_exists($user['id'],$exusers)) {
if (!$user['noindex']) {
eecho(0,'«'.$host.'»: inserting new user «'.$user['username'].'»...'.N);
$query='INSERT INTO Users SET '.$query;
$query='INSERT INTO Users '.$query;
if (!$opts['dryrun']) {
mysqli_query($link,$query) or mexit(__LINE__.': '.mysqli_error($link).N,3);
$uid=mysqli_insert_id($link);
}
} else {
eecho(0,'«'.$host.'»: NOT inserting new user «'.$user['username'].'» because they set noindex...'.N);
eecho(0,'«'.$host.'»: NOT inserting user «'.$user['username'].'» because they set noindex...'.N);
}
} else {
$uid=$exusers[$locid]['ID'];
if (!$user['noindex']) {
/*$msg='«'.$host.'»: updating existing user «'.$user['username'].'» ('.$exusers[$locid]['ID'].')';
if (!is_null($exusers[$locid]['OptedOut'])) $msg.=' (who opted back into the directory)';
$msg.='...';
eecho(0,$msg.N);*/
eecho(0,'«'.$host.'»: updating existing user «'.$user['username'].'» ('.$exusers[$locid]['ID'].')...');
$query='UPDATE Users SET '.$query.' WHERE ID='.$exusers[$locid]['ID'];
eecho(0,'«'.$host.'»: updating existing user «'.$user['username'].'» ('.$uid.')...'.N);
$query='UPDATE Users '.$query.' WHERE ID='.$uid;
} else {
eecho(0,'«'.$host.'»: deleting existing user «'.$user['username'].'» ('.$exusers[$locid]['ID'].') because they set noindex...');
$query='DELETE FROM Users WHERE ID='.$exusers[$locid]['ID'];
eecho(0,'«'.$host.'»: deleting existing user «'.$user['username'].'» ('.$uid.') because they set noindex...'.N);
$query='DELETE FROM Users WHERE ID='.$uid;
}
if (!$opts['dryrun']) {
mysqli_query($link,$query) or mexit(__LINE__.': '.mysqli_error($link).N,3);
mysqli_query($link,'DELETE FROM UsersFields WHERE UserID='.$uid) or mexit(__LINE__.': '.mysqli_error($link).N,3);
}
}
if (!$opts['dryrun']) mysqli_query($link,$query)
or mexit(__LINE__.': '.mysqli_error($link).N,3);
$uid=mysqli_insert_id($link);
if ($uid==0) $uid=$exusers[$locid]['ID'];
if (!$opts['dryrun']) mysqli_query($link,'DELETE FROM UsersFields WHERE UserID='.$uid)
or mexit(__LINE__.': '.mysqli_error($link).N,3);
if (!$user['noindex'] && is_array($user['fields']) && count($user['fields'])>0) {
if ($uid!=0 && !$user['noindex'] && is_array($user['fields']) && count($user['fields'])>0) {
eecho(0,'«'.$host.'»: saving user fields for user «'.$user['username'].'» ('.$uid.')...'.N);
foreach ($user['fields'] as $field) {
(is_null($field['verified_at'])) ? $field['verified_at']=0 : $field['verified_at']=1;
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO UsersFields SET UserID='.$uid.', name='.myv($link,$field['name']).', value='.myv($link,$field['value']).', verified='.$field['verified_at'])
or mexit(__LINE__.': '.mysqli_error($link).N,3);
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO UsersFields SET UserID='.$uid.', name='.myv($link,$field['name']).', value='.myv($link,$field['value']).', verified='.$field['verified_at']) or mexit(__LINE__.': '.mysqli_error($link).N,3);
}
}
}
foreach ($exusers as $locid=>$exuser) {
if (!array_key_exists($locid,$users)) {
/*eecho(0,'«'.$host.'»: user «'.$exusers[$locid]['username'].'» opted out of the directory, updating their record ('.$exuser['ID'].')...'.N);
$query='UPDATE Users SET OptedOut='.$now.' WHERE ID='.$exuser['ID'];*/
eecho(0,'«'.$host.'»: user «'.$exusers[$locid]['username'].'» opted out of the directory, deleting their record ('.$exuser['ID'].')...'.N);
$query='DELETE FROM Users WHERE ID='.$exuser['ID'];
if (!$opts['dryrun']) mysqli_query($link,$query)
or mexit(__LINE__.': '.mysqli_error($link).N,3);
if (!$opts['dryrun']) {
mysqli_query($link,'DELETE FROM Users WHERE ID='.$exuser['ID']) or mexit(__LINE__.': '.mysqli_error($link).N,3);
mysqli_query($link,'DELETE FROM UsersFields WHERE UserID='.$exuser['ID']) or mexit(__LINE__.': '.mysqli_error($link).N,3);
}
}
}
}
@ -786,11 +807,11 @@ while ($i<$cinsts) {
}
} else {
$instans=false;
eecho(2,'fetched data were not good JSON.'.N);
eecho(2,'«'.$host.'»: fetched data were not good JSON.'.N);
}
} else {
$instans=false;
eecho(2,'could not fetch instance info from API: '.$buf['emsg'].N);
eecho(2,'«'.$host.'»: could not fetch instance info from API: '.$buf['emsg'].N);
}
if (!isset($info['uri']) || preg_match('#^\s*$#',$info['uri'])===1)
$instans=false;
@ -805,7 +826,7 @@ while ($i<$cinsts) {
or mexit(__LINE__.': '.mysqli_error($link).N,3);
$nrows=mysqli_num_rows($res);
if ($nrows==1) {
eecho(1,'«'.$host. didnt respond, but it is present in the database; updating InstChecks, Instances.LastCheckOk and possibly Instances.New and Instances.Dead.'.N);
eecho(1,'«'.$host.: didnt respond, but it is present in the database; updating InstChecks, Instances.LastCheckOk and possibly Instances.New and Instances.Dead.'.N);
$row=mysqli_fetch_assoc($res);
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$row['ID'].', '.$now.', 0)')
or mexit(__LINE__.': '.mysqli_error($link).N,3);
@ -831,12 +852,12 @@ while ($i<$cinsts) {
notify('Instance «<a href="viewinst.php?id='.$row['ID'].'">'.$row['URI'].'</a>» is dead!',2);
}
} else {
eecho(2,'«'.$host. exists in the database but theres no data about it in InstChecks! Ill remedy.'.N);
eecho(2,'«'.$host.: exists in the database but theres no data about it in InstChecks! Ill remedy.'.N);
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO InstChecks SET InstID='.$row['ID'].', Time='.$now.', Status=0')
or mexit(__LINE__.': '.mysqli_error($link).N,3);
}
} elseif ($nrows==0) {
eecho(1,'«'.$host. doesnt respond and is not in the database, adding it.'.N);
eecho(1,'«'.$host.: doesnt respond and is not in the database, adding it.'.N);
// "New=0" and "FirstSeen=NULL" because it's not new and not seen until it responds for the first time
if (!$opts['dryrun']) {
mysqli_query($link,'INSERT INTO Instances SET FirstSeen=NULL, New=0, Good=0, Chosen=0, Visible=0, Noxious=0, URI=\''.myesc($link,$host).'\', LastCheckOk=0') or mexit(__LINE__.': '.mysqli_error($link).N,3);
@ -950,10 +971,10 @@ while ($i<$cinsts) {
}
if (count($whynot)==0) {
$instrow['Good']=1;
eecho(1,'this is a suitable instance! :-)'.N);
eecho(1,'«'.$host.'»: this is a suitable instance! :-)'.N);
$qgood++;
} else {
eecho(1,'This is not a suitable instance: '.implode('; ',$whynot).' :-('.N);
eecho(1,'«'.$host.'»: this is not a suitable instance: '.implode('; ',$whynot).' :-('.N);
}
$res=mysqli_query($link,'SELECT * FROM Instances WHERE URI=\''.myesc($link,$instrow['URI']).'\'')
@ -961,7 +982,7 @@ while ($i<$cinsts) {
$nrows=mysqli_num_rows($res);
if ($nrows==1) {
eecho(1,'«'.$instrow['URI']. is already present in the database, updating it...'.N);
eecho(1,'«'.$instrow['URI'].: is already present in the database, updating it...'.N);
$oldinstrow=mysqli_fetch_assoc($res);
$instid=$oldinstrow['ID'];
$instrow['ID']=$oldinstrow['ID'];
@ -1009,7 +1030,7 @@ while ($i<$cinsts) {
$query.=$field.'=NULL, ';
}
$query=substr($query,0,-2).' WHERE Instances.ID='.$instrow['ID'];
eecho(1,'Update query: «'.$query.'».'.N);
eecho(1,'«'.$host.'»: update query: «'.$query.'».'.N);
if (!$opts['dryrun']) mysqli_query($link,$query)
or mexit(__LINE__.': '.mysqli_error($link).N,3);
@ -1064,7 +1085,7 @@ while ($i<$cinsts) {
}
$values=substr($values,0,-2);
$query='INSERT INTO Instances ('.implode(', ',$fields).') VALUES ('.$values.')';
eecho(1,'Insert query: «'.$query.'»'.N);
eecho(1,'«'.$host.'»: insert query: «'.$query.'»'.N);
if (!$opts['dryrun']) {
mysqli_query($link,$query) or mexit(__LINE__.': '.mysqli_error($link).N,3);
$instid=mysqli_insert_id($link);
@ -1196,9 +1217,8 @@ function ckratelimit($httpresphead,$verbose=false) {
array_shift($headers);
foreach ($headers as $header)
if (preg_match('/^([^:]+):(.*)$/Uu',$header,$matches)===1)
$buff[$matches[1]]=trim($matches[2]);
$buff[strtolower($matches[1])]=trim($matches[2]);
$headers=$buff;
//print_r($headers);
if (array_key_exists('x-ratelimit-reset',$headers)) {
//Wed, 30 Mar 2022 21:27:22 GMT
$srvnow=strtotime($headers['date']);