Lots of changes :-))

This commit is contained in:
pezcurrel 2022-12-11 23:29:51 +01:00
parent 882222bdb9
commit b16515f4e8

View file

@ -25,9 +25,6 @@ use LanguageDetection\Language;
(strtoupper(substr(PHP_OS,0,3))==='WIN') ? $iswin=true : $iswin=false;
$link=false;
$jsonf=false;
function eecho($lev,$msg) {
$time=microtime(false);
$time=explode(' ',$time);
@ -42,9 +39,9 @@ function eecho($lev,$msg) {
function mexit($msg,$code) {
global $link, $jsonf, $lockfp;
if ($link) mysqli_close($link);
if ($jsonf) fclose($jsonf);
if (isset($lockfp) && file_exists($lockfp)) unlink($lockfp);
if (isset($link)) mysqli_close($link);
if (isset($jsonf)) fclose($jsonf);
if (isset($lockfp) && is_file($lockfp)) unlink($lockfp);
if ($code!=0)
eecho(3,$msg);
else
@ -319,6 +316,7 @@ if (!$recover) {
}
}
unset($deadinsts);
sort($insts);
// shuffle($insts);
eecho(1,count($insts).' instances to be checked.'.N);
@ -626,6 +624,7 @@ if ($opts['jsonwrite']) {
if ($mode[0]=='w')
fwrite($jsonf,'{'.N);
}
$tini=time();
$cinsts=count($insts);
$i=0;
@ -658,29 +657,48 @@ while ($i<$cinsts) {
$info=@json_decode($buf['cont'],true);
if (is_array($info)) {
eecho(1,'«'.$host.'»: got instance info from API :-)'.N);
eecho(0,'«'.$host.'»: trying to fetch instance info from nodeinfo...'.N);
$buf=@getfc('https://'.$host.'/nodeinfo/2.0.json',$opts['timeout']);
eecho(0,'«'.$host.'»: trying to fetch nodeinfo specs on https...'.N);
$buf=@getfc('https://'.$host.'/.well-known/nodeinfo',$opts['timeout']);
if ($buf['cont']===false) {
eecho(0,'«'.$host.'»: trying to fetch nodeinfo specs on http...'.N);
$buf=@getfc('http://'.$host.'/.well-known/nodeinfo',$opts['timeout']);
}
if ($buf['cont']!==false) {
//ckratelimit($buf['headers']);// no ckratelimit here because nodeinfo doesn't use it
eecho(1,'«'.$host.'»: got instance info from nodeinfo :-)'.N);
$info['x-nodeinfo']=json_decode($buf['cont'],true);
// we should keep an eye to new software names here, to decide if they are mastodon derivates...
if (isset($info['x-nodeinfo']['software']['name']) && !is_null($info['x-nodeinfo']['software']['name'])) {
if (preg_match('/^mastodon|fedibird|ecko|hometown/',$info['x-nodeinfo']['software']['name'])===1)
$ismast=true;
$res=mysqli_query($link,'SELECT Name FROM Platforms WHERE Name=\''.myesc($link,$info['x-nodeinfo']['software']['name']).'\'')
or mexit(__LINE__.': '.mysqli_error($link).N,3);
if (mysqli_num_rows($res)<1) {
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO Platforms (Name) VALUES (\''.myesc($link,truncs($info['x-nodeinfo']['software']['name'],'Platforms','Name','«'.$info['uri'].'»')).'\')')
or mexit(__LINE__.': '.mysqli_error($link).N,3);
notify('New software found: «'.$info['uri'].'» runs on «'.$info['x-nodeinfo']['software']['name'].'»; i added it to the table of known softwares. It would be good to check whether it is a Mastodon derivate and how compatible it is, to decide whether to consider instances using it as Mastodon instances.',2);
$buf=@json_decode($buf['cont'],true);
if (is_array($buf) && array_key_exists('links',$buf) && is_array($buf['links']) && count($buf['links'])>0) {
$nirefs=[];
foreach ($buf['links'] as $key=>$niref)
if (isset($niref['rel']) && isset($niref['href']))
$nirefs[$niref['rel']]=$niref['href'];
else
eecho(2,'«'.$host.'»: nodeinfo specs link '.$key.' has unexpected format.'.N);
krsort($nirefs);
$niref=array_shift($nirefs);
eecho(0,'«'.$host.'»: got nodeinfo specs; trying to fetch nodeinfo...'.N);
$buf=@getfc($niref,$opts['timeout']);
if ($buf['cont']!==false) {
$buf=@json_decode($buf['cont'],true);
if (is_array($buf) && isset($buf['software']['name']) && isset($buf['software']['version'])) {
$ninfo=$buf;
if (preg_match('/^mastodon|fedibird|ecko|hometown/',$ninfo['software']['name'])===1)
$ismast=true;
$res=mysqli_query($link,'SELECT Name FROM Platforms WHERE Name=\''.myesc($link,$ninfo['software']['name']).'\'')
or mexit(__LINE__.': '.mysqli_error($link).N,3);
if (mysqli_num_rows($res)<1) {
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO Platforms (Name) VALUES (\''.myesc($link,truncs($ninfo['software']['name'],'Platforms','Name','«'.$host.'»')).'\')')
or mexit(__LINE__.': '.mysqli_error($link).N,3);
notify('New software found: «'.$host.'» runs on «'.$ninfo['software']['name'].'»; i added it to the table of known softwares. It would be good to check whether it is a Mastodon derivate and how compatible it is, to decide whether to consider instances using it as Mastodon instances.',2);
}
} else {
eecho(2,'«'.$host.'»: nodeinfo was not good json or json had unexpected format.'.N);
}
}
} else {
eecho(2,'«'.$host.'»: nodeinfo specs where not good json or json had unexpected format.'.N);
}
} else {
eecho(2,'«'.$host.'»: could not fetch instance info from nodeinfo: '.$buf['emsg'].N);
}
if (array_key_exists('version',$info)) {
eecho(1,'«'.$host.'» software version is «'.$info['version'].'».');
eecho(1,'«'.$host.'» software version is «'.$info['version'].'».'.N);
if ($info['version']>='2.1.2') {
eecho(0,'«'.$host.'»: trying to fetch instance activity info from API...'.N);
$buf=@getfc('https://'.$host.'/api/v1/instance/activity',$opts['timeout']);
@ -702,140 +720,6 @@ while ($i<$cinsts) {
} else {
eecho(2,'«'.$host.'»: could not fetch instance trends from API: '.$buf['emsg'].N);
}
if ($opts['fetchusers']) {
$exusers=[];// array of this instance's users already existing in the db
$res=mysqli_query($link,'SELECT ID, locid, username FROM Users WHERE host=\''.myesc($link,$host).'\'') or mexit(__LINE__.': '.mysqli_error($link).N,3);
while ($row=mysqli_fetch_assoc($res)) $exusers[$row['locid']]=$row;
$users=[];// array of users in this instance's directory
eecho(0,'«'.$host.'»: trying to fetch users info from directory API...'.N);
$chunk=0;
$limit=80;
$end=false;
while (!$end) {
$offset=$chunk*$limit;
$buf=@getfc('https://'.$host.'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['timeout']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers']);
eecho(1,'«'.$host.'»: got '.($chunk+1).' chunk(s) of users info from directory API :-)'.N);
$buf=@json_decode($buf['cont'],true);
if (is_array($buf)) {
//print_r($buf);
if (count($buf)<$limit) $end=true;
if (count($buf)>0 && !array_key_exists('noindex',$buf[0])) {
eecho(2,'«'.$host.'»: account entities reported by directory api endpoint dont have a “noindex” attribute; skipping directory fetching.'.N);
break;
} else {
eecho(0,'«'.$host.'»: account entities reported by directory api endpoint do have a “noindex” attribute; continuing with directory fetching.'.N);
}
//foreach ($buf as $user) echo($user['username'].' '); echo(N.N);
foreach ($buf as $user) {
if (make(['id', 'username', 'display_name', 'locked', 'bot', 'discoverable', 'created_at', 'note', 'url', 'avatar', 'header', 'statuses_count', 'last_status_at', 'fields'], $user)) {
eecho(0,'«'.$host.'» ('.$i.'/'.$cinsts.'): working on user «'.$user['username'].'»...'.N);
if (!isset($user['noindex'])) {
$user['noindex']=true;
if (0==1) { // disabled; takes too long on instances with many users; see also the if statement before this foreach
eecho(0,'«'.$host.'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it by fetching users profile page...'.N);
$page=getfc($user['url'],$opts['timeout']);
// here ckratelimit is not needed because it's a normal web page, not json from mastodon api
if ($page['cont']!==false) {
//<meta content='noindex, noarchive' name='robots'>
if (preg_match('/<meta\s+content=[\'"](noindex|noarchive)/ui',$page['cont'])!==1) {
$user['noindex']=false;
eecho(0,'«'.$user['url'].'»: «noindex» is not set.'.N);
} else {
eecho(0,'«'.$user['url'].'»: «noindex» is set.'.N);
}
} else {
eecho(2,'«'.$host.'»: could not fetch «'.$user['url'].'»: '.$page['emsg'].N);
}
}
}
$snote=strip_tags($user['note']);
if (preg_match('/(?<!\w)#(nobots?|noindex)(?!\w)/iu',$snote)===1) $user['noindex']=true;
if (preg_match('/(?<!\w)#(okindex|yesindex|doindex)(?!\w)/iu',$snote)===1) $user['noindex']=false;
$user['tags']=[];
if (0==1 && !$user['noindex'] && $info['version']>='3.3.0') {// disabled; takes too long on instances with many users
eecho(0,'«'.$host.'»: trying to fetch tags for user «'.$user['username'].'»...'.N);
$tags=@getfc('https://'.$host.'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['timeout']);
if ($tags['cont']!==false) {
ckratelimit($tags['headers']);
$tags=@json_decode($tags['cont'],true);
if (is_array($tags) && count($tags)>0) {
eecho(1,'«'.$host.'»: got '.count($tags).' tag(s) for user «'.$user['username'].'» :-)'.N);
foreach($tags as $tag) $user['tags'][]=$tag['name'];
}
} else {
eecho(2,'«'.$host.'»: could not fetch tags for user «'.$user['username'].'» :-( ('.$tags['emsg'].').'.N);
}
}
$user['tags']=implode(';',$user['tags']);
if ($user['tags']=='') $user['tags']=null;
if (!is_null($user['created_at'])) $user['created_at']=pgdatetomy($user['created_at']);
if (!is_null($user['last_status_at'])) $user['last_status_at']=datetomy($user['last_status_at']);
$users[$user['id']]=$user;
} else {
eecho(2,'«'.$host.'»: user record missed some required keys :-('.N);
//print_r($user);
}
}
} else {
eecho(2,'«'.$host.'»: ... but the chunk was not good JSON :-('.N);
$end=true;
}
$chunk++;
} else {
eecho(2,'«'.$host.'»: could not fetch users info from directory API: '.$buf['emsg'].N);
$end=true;
}
}
foreach ($users as $locid=>$user) {
$query='SET host='.myv($link,$host).', locid='.myv($link,$user['id']).', username='.myv($link,truncs($user['username'], 'Users', 'username', '«'.$host.'»: «'.$user['username'].'»')).', display_name='.myv($link,truncs($user['display_name'], 'Users', 'display_name', '«'.$host.'»: «'.$user['username'].'»')).', locked='.myv($link,$user['locked']).', bot='.myv($link,$user['bot']).', discoverable='.myv($link,$user['discoverable']).', created_at='.myv($link,$user['created_at']).', note='.myv($link,truncs($user['note'], 'Users', 'note', '«'.$host.'»: «'.$user['username'].'»')).', url='.myv($link,truncs($user['url'], 'Users', 'url', '«'.$host.'»: «'.$user['username'].'»')).', avatar='.myv($link,truncs($user['avatar'], 'Users', 'avatar', '«'.$host.'»: «'.$user['username'].'»')).', header='.myv($link,truncs($user['header'], 'Users', 'header', '«'.$host.'»: «'.$user['username'].'»')).', statuses_count='.myv($link,$user['statuses_count']).', last_status_at='.myv($link,$user['last_status_at']).', tags='.myv($link,truncs($user['tags'], 'Users', 'tags', '«'.$host.'»: «'.$user['username'].'»'));
$uid=0;
if (!array_key_exists($user['id'],$exusers)) {
if (!$user['noindex']) {
eecho(0,'«'.$host.'»: inserting new user «'.$user['username'].'»...'.N);
$query='INSERT INTO Users '.$query;
if (!$opts['dryrun']) {
mysqli_query($link,$query) or mexit(__LINE__.': '.mysqli_error($link).N,3);
$uid=mysqli_insert_id($link);
}
} else {
eecho(0,'«'.$host.'»: NOT inserting user «'.$user['username'].'» because they set noindex...'.N);
}
} else {
$uid=$exusers[$locid]['ID'];
if (!$user['noindex']) {
eecho(0,'«'.$host.'»: updating existing user «'.$user['username'].'» ('.$uid.')...'.N);
$query='UPDATE Users '.$query.' WHERE ID='.$uid;
} else {
eecho(0,'«'.$host.'»: deleting existing user «'.$user['username'].'» ('.$uid.') because they set noindex...'.N);
$query='DELETE FROM Users WHERE ID='.$uid;
}
if (!$opts['dryrun']) {
mysqli_query($link,$query) or mexit(__LINE__.': '.mysqli_error($link).N,3);
mysqli_query($link,'DELETE FROM UsersFields WHERE UserID='.$uid) or mexit(__LINE__.': '.mysqli_error($link).N,3);
}
}
if ($uid!=0 && !$user['noindex'] && is_array($user['fields']) && count($user['fields'])>0) {
eecho(0,'«'.$host.'»: saving user fields for user «'.$user['username'].'» ('.$uid.')...'.N);
foreach ($user['fields'] as $field) {
(is_null($field['verified_at'])) ? $field['verified_at']=0 : $field['verified_at']=1;
$field['name']=truncs($field['name'],'UsersFields','name','«'.$host.'»: «'.$user['username'].'»');
$field['value']=truncs($field['value'],'UsersFields','value','«'.$host.'»: «'.$user['username'].'»');
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO UsersFields SET UserID='.$uid.', name='.myv($link,$field['name']).', value='.myv($link,$field['value']).', verified='.$field['verified_at']) or mexit(__LINE__.': '.mysqli_error($link).N,3);
}
}
}
foreach ($exusers as $locid=>$exuser) {
if (!array_key_exists($locid,$users)) {
eecho(0,'«'.$host.'»: user «'.$exusers[$locid]['username'].'» opted out of the directory, deleting their record ('.$exuser['ID'].')...'.N);
if (!$opts['dryrun']) {
mysqli_query($link,'DELETE FROM Users WHERE ID='.$exuser['ID']) or mexit(__LINE__.': '.mysqli_error($link).N,3);
mysqli_query($link,'DELETE FROM UsersFields WHERE UserID='.$exuser['ID']) or mexit(__LINE__.': '.mysqli_error($link).N,3);
}
}
}
}
}
}
} else {
@ -859,49 +743,48 @@ while ($i<$cinsts) {
or mexit(__LINE__.': '.mysqli_error($link).N,3);
$nrows=mysqli_num_rows($res);
if ($nrows==1) {
eecho(1,'«'.$host.'»: didnt respond, but it is present in the database; updating InstChecks, Instances.LastCheckOk and possibly Instances.New and Instances.Dead.'.N);
eecho(1,'«'.$host.'»: didnt respond, but it is present in the database; updating InstChecks, Instances.LastCheckOk and possibly Instances.New=0 and Instances.Dead=1.'.N);
$row=mysqli_fetch_assoc($res);
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$row['ID'].', '.$now.', 0)')
or mexit(__LINE__.': '.mysqli_error($link).N,3);
if (!$opts['dryrun']) mysqli_query($link,'UPDATE Instances SET LastCheckOk=0 WHERE ID='.$row['ID'])
$instid=$row['ID'];
if (!$opts['dryrun']) mysqli_query($link,'UPDATE Instances SET LastCheckOk=0 WHERE ID='.$instid)
or mexit(__LINE__.': '.mysqli_error($link).N,3);
if ($row['New']==1 && !is_null($row['FirstSeen']) && $now-$row['FirstSeen']>$opts['oldline']) {
notify('Instance «<a href="viewinst.php?id='.$row['ID'].'">'.$row['URI'].'</a>» is no longer new.',2);
if (!$opts['dryrun']) mysqli_query($link,'UPDATE Instances SET New=0 WHERE ID='.$row['ID'])
notify('Instance «<a href="viewinst.php?id='.$instid.'">'.$row['URI'].'</a>» is no longer new.',2);
if (!$opts['dryrun']) mysqli_query($link,'UPDATE Instances SET New=0 WHERE ID='.$instid)
or mexit(__LINE__.': '.mysqli_error($link).N,3);
}
// we check the last time instance responded, if ever
$rres=mysqli_query($link,'SELECT Time FROM InstChecks WHERE InstID='.$row['ID'].' AND Status=1 ORDER BY Time DESC LIMIT 1') or mexit(__LINE__.': '.mysqli_error($link).N,3);
$rres=mysqli_query($link,'SELECT Time FROM InstChecks WHERE InstID='.$instid.' AND Status=1 ORDER BY Time DESC LIMIT 1') or mexit(__LINE__.': '.mysqli_error($link).N,3);
// if instance never responded we consider the time of first check
if (mysqli_num_rows($rres)==0) {
$rres=mysqli_query($link,'SELECT Time FROM InstChecks WHERE InstID='.$row['ID'].' AND Status=0 ORDER BY Time ASC LIMIT 1') or mexit(__LINE__.': '.mysqli_error($link).N,3);
$rres=mysqli_query($link,'SELECT Time FROM InstChecks WHERE InstID='.$instid.' AND Status=0 ORDER BY Time ASC LIMIT 1') or mexit(__LINE__.': '.mysqli_error($link).N,3);
}
if (mysqli_num_rows($rres)>0) {
$rrow=mysqli_fetch_assoc($rres);
if ($now-$rrow['Time']>$opts['deadline']) {
if (!$opts['dryrun']) mysqli_query($link,'UPDATE Instances SET Dead=1 WHERE ID='.$row['ID'])
if (!$opts['dryrun']) mysqli_query($link,'UPDATE Instances SET Dead=1 WHERE ID='.$instid)
or mexit(__LINE__.': '.mysqli_error($link).N,3);
notify('Instance «<a href="viewinst.php?id='.$row['ID'].'">'.$row['URI'].'</a>» is dead!',2);
notify('Instance «<a href="viewinst.php?id='.$instid.'">'.$row['URI'].'</a>» is dead!',2);
}
} else {
eecho(2,'«'.$host.'»: exists in the database but theres no data about it in InstChecks! Ill remedy.'.N);
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO InstChecks SET InstID='.$row['ID'].', Time='.$now.', Status=0')
or mexit(__LINE__.': '.mysqli_error($link).N,3);
eecho(2,'«'.$host.'»: exists in the database but theres no data about it in InstChecks!'.N);
}
} elseif ($nrows==0) {
eecho(1,'«'.$host.'»: doesnt respond and is not in the database, adding it.'.N);
// "New=0" and "FirstSeen=NULL" because it's not new and not seen until it responds for the first time
if (!$opts['dryrun']) {
mysqli_query($link,'INSERT INTO Instances SET FirstSeen=NULL, New=0, Good=0, Chosen=0, Visible=0, Noxious=0, URI=\''.myesc($link,$host).'\', LastCheckOk=0') or mexit(__LINE__.': '.mysqli_error($link).N,3);
mysqli_query($link,'INSERT INTO Instances SET FirstSeen=NULL, New=0, Good=0, Chosen=0, Visible=0, Noxious=0, URI=\''.myesc($link,$host).'\', LastCheckOk=0, InsertTS='.$now) or mexit(__LINE__.': '.mysqli_error($link).N,3);
$instid=mysqli_insert_id($link);
mysqli_query($link,'INSERT INTO InstChecks SET InstID='.$instid.', Time='.$now.', Status=0') or mexit(__LINE__.': '.mysqli_error($link).N,3);
} else {
$instid=0;
}
} else {
notify('Instance «'.$row['URI'].'» has «'.$nrows. entries in «Instances» table!',3);
notify('Instance «'.$host.'» has '.$nrows.' entries in «Instances» table!',3);
}
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$instid.', '.$now.', 0)')
or mexit(__LINE__.': '.mysqli_error($link).N,3);
} else {
// instance responded
@ -924,7 +807,7 @@ while ($i<$cinsts) {
if (!is_null($ismast))
($ismast) ? $ismast=1 : $ismast=0;
$instrow=array('ID'=>null, 'FirstSeen'=>null, 'IsMastodon'=>$ismast, 'Dead'=>0, 'New'=>0, 'Good'=>0, 'Chosen'=>0, 'Priority'=>null, 'Visible'=>0, 'Noxious'=>0, 'NoxReason'=>null, 'NoxLastModTS'=>null, 'URI'=>null, 'Title'=>null, 'ShortDesc'=>null, 'LongDesc'=>null, 'OurDesc'=>null, 'OurDescEN'=> null, 'LocalityID'=>null, 'OurLangsLock'=>0, 'Email'=>null, 'Software'=>null, 'Version'=>null, 'UserCount'=>null, 'StatusCount'=>null, 'DomainCount'=>null, 'ActiveUsersMonth'=>null, 'ActiveUsersHalfYear'=>null, 'Thumb'=>null, 'RegOpen'=>null, 'RegReqApproval'=>null, 'MaxTootChars'=>null, 'AdmAccount'=>null, 'AdmDisplayName'=>null, 'AdmCreatedAt'=>null, 'AdmNote'=>null, 'AdmURL'=>null, 'AdmAvatar'=>null, 'AdmHeader'=>null, 'LastCheckOk'=>1, 'GuestID'=>null, 'LastGuestEdit'=>null);
$instrow['URI']=$info['uri'];
$instrow['URI']=$host;
if (akeavinn('title',$info))
$instrow['Title']=nempty(truncs($info['title'],'Instances','Title','«'.$instrow['URI'].'»'));
if (akeavinn('short_description',$info))
@ -1103,7 +986,7 @@ while ($i<$cinsts) {
}
} elseif ($nrows==0) {
eecho(1,'«'.$info['uri'].'» is not present in the database, adding it...'.N);
eecho(1,'«'.$host.'» is not present in the database, adding it...'.N);
$instrow['FirstSeen']=$now;
if ($opts['setnew'])
$instrow['New']=1;
@ -1117,7 +1000,7 @@ while ($i<$cinsts) {
$values.='NULL, ';
}
$values=substr($values,0,-2);
$query='INSERT INTO Instances ('.implode(', ',$fields).') VALUES ('.$values.')';
$query='INSERT INTO Instances ('.implode(', ',$fields).', InsertTS) VALUES ('.$values.', '.$now.')';
eecho(1,'«'.$host.'»: insert query: «'.$query.'»'.N);
if (!$opts['dryrun']) {
mysqli_query($link,$query) or mexit(__LINE__.': '.mysqli_error($link).N,3);
@ -1125,13 +1008,15 @@ while ($i<$cinsts) {
} else {
$instid=0;
}
if ($opts['setnew'])
if ($opts['setnew'] && !$opts['dryrun'])
notify('New instance found: «<a href="viewinst.php?id='.$instid.'">'.$instrow['URI'].'</a>».',1);
$instlangs=langs($instid, $instrow['URI'], false);
foreach ($instlangs as $row) {
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')')
or mexit(__LINE__.': '.mysqli_error($link).N,3);
if (!$opts['dryrun']) {
foreach ($instlangs as $row) {
mysqli_query($link,'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')')
or mexit(__LINE__.': '.mysqli_error($link).N,3);
}
}
$instourlangs=langs($instid, $instrow['URI'], true);
@ -1149,18 +1034,19 @@ while ($i<$cinsts) {
notify('New instance «<a href="viewinst.php?id='.$instid.'">'.$instrow['URI'].'</a>» is suitable!',1);
} else {
notify('Instance «'.$row['URI'].'» has «'.$nrows. entries in «Instances» table!',3);
notify('Instance «'.$host.'» has '.$nrows.' entries in «Instances» table!',3);
}
if (array_key_exists('x-activity',$info) && is_array($info['x-activity'])) {
if (!$opts['dryrun']) mysqli_query($link,'DELETE FROM InstActivity WHERE InstID='.$instid);
$pos=0;
foreach ($info['x-activity'] as $buf) {
if (akeavinn('week',$buf) && akeavinn('statuses',$buf) && akeavinn('logins',$buf) && akeavinn('registrations',$buf)) {
$pos++;
$query='INSERT INTO InstActivity (InstID, Week, Statuses, Logins, Registrations, Pos) VALUES (\''.$instid.'\', \''.myesc($link,$buf['week']).'\', \''.myesc($link,$buf['statuses']).'\', \''.myesc($link,$buf['logins']).'\', \''.myesc($link,$buf['registrations']).'\', '.$pos.')';
if (!$opts['dryrun']) mysqli_query($link,$query)
or mexit(__LINE__.': '.mysqli_error($link).N,3);
if (!$opts['dryrun']) {
mysqli_query($link,'DELETE FROM InstActivity WHERE InstID='.$instid);
$pos=0;
foreach ($info['x-activity'] as $buf) {
if (akeavinn('week',$buf) && akeavinn('statuses',$buf) && akeavinn('logins',$buf) && akeavinn('registrations',$buf)) {
$pos++;
$query='INSERT INTO InstActivity (InstID, Week, Statuses, Logins, Registrations, Pos) VALUES (\''.$instid.'\', \''.myesc($link,$buf['week']).'\', \''.myesc($link,$buf['statuses']).'\', \''.myesc($link,$buf['logins']).'\', \''.myesc($link,$buf['registrations']).'\', '.$pos.')';
mysqli_query($link,$query) or mexit(__LINE__.': '.mysqli_error($link).N,3);
}
}
}
}
@ -1197,11 +1083,151 @@ while ($i<$cinsts) {
}
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$instid.', '.$now.', 1)')
or mexit(__LINE__.': '.mysqli_error($link).N,3);
if ($opts['fetchusers'] && $ismast && array_key_exists('version',$info) && $info['version']>='4.0.0') {
eecho(0,'«'.$host.'»: trying to fetch users info from directory API...'.N);
$exusers=[];// array of this instance's users already existing in the db
$res=mysqli_query($link,'SELECT ID, locid, username FROM Users WHERE InstID='.$instid) or mexit(__LINE__.': '.mysqli_error($link).N,3);
while ($row=mysqli_fetch_assoc($res)) $exusers[$row['locid']]=$row;
$users=[];// array of users in this instance's directory
$chunk=0;
$limit=80;
$end=false;
while (!$end) {
$offset=$chunk*$limit;
$buf=@getfc('https://'.$host.'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['timeout']);
if ($buf['cont']!==false) {
ckratelimit($buf['headers']);
eecho(1,'«'.$host.'»: got '.($chunk+1).' chunk(s) of users info from directory API :-)'.N);
$buf=@json_decode($buf['cont'],true);
if (is_array($buf)) {
//print_r($buf);
if (count($buf)<$limit) $end=true;
/*if (count($buf)>0 && !array_key_exists('noindex',$buf[0])) {
eecho(2,'«'.$host.'»: account entities reported by directory api endpoint dont have a “noindex” attribute; skipping directory fetching.'.N);
break;
} else {
eecho(0,'«'.$host.'»: account entities reported by directory api endpoint do have a “noindex” attribute; continuing with directory fetching.'.N);
}*/
//foreach ($buf as $user) echo($user['username'].' '); echo(N.N);
foreach ($buf as $user) {
if (make(['id', 'username', 'display_name', 'locked', 'bot', 'discoverable', 'created_at', 'note', 'url', 'avatar', 'header', 'statuses_count', 'last_status_at', 'fields', 'noindex'], $user)) {
eecho(0,'«'.$host.'» ('.$i.'/'.$cinsts.'): working on user «'.$user['username'].'»...'.N);
// disabled because it takes too long on instances with many users
/*if (!isset($user['noindex'])) {
$user['noindex']=true;
eecho(0,'«'.$host.'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it by fetching users profile page...'.N);
$page=getfc($user['url'],$opts['timeout']);
// here ckratelimit is not needed because it's a normal web page, not json from mastodon api
if ($page['cont']!==false) {
//<meta content='noindex, noarchive' name='robots'>
if (preg_match('/<meta\s+content=[\'"](noindex|noarchive)/ui',$page['cont'])!==1) {
$user['noindex']=false;
eecho(0,'«'.$user['url'].'»: «noindex» is not set.'.N);
} else {
eecho(0,'«'.$user['url'].'»: «noindex» is set.'.N);
}
} else {
eecho(2,'«'.$host.'»: could not fetch «'.$user['url'].'»: '.$page['emsg'].N);
}
}*/
$snote=strip_tags($user['note']);
if (preg_match('/(?<!\w)#(nobots?|noindex)(?!\w)/iu',$snote)===1) $user['noindex']=true;
if (preg_match('/(?<!\w)#(okindex|yesindex|doindex|okmhindex)(?!\w)/iu',$snote)===1) $user['noindex']=false;
// disabled; takes too long on instances with many users
/*$user['tags']=[];
if (!$user['noindex'] && $info['version']>='3.3.0') {
eecho(0,'«'.$host.'»: trying to fetch tags for user «'.$user['username'].'»...'.N);
$tags=@getfc('https://'.$host.'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['timeout']);
if ($tags['cont']!==false) {
ckratelimit($tags['headers']);
$tags=@json_decode($tags['cont'],true);
if (is_array($tags) && count($tags)>0) {
eecho(1,'«'.$host.'»: got '.count($tags).' tag(s) for user «'.$user['username'].'» :-)'.N);
foreach($tags as $tag) $user['tags'][]=$tag['name'];
}
} else {
eecho(2,'«'.$host.'»: could not fetch tags for user «'.$user['username'].'» :-( ('.$tags['emsg'].').'.N);
}
}
$user['tags']=implode(';',$user['tags']);
if ($user['tags']=='') $user['tags']=null;*/
$user['tags']=null;
if (!is_null($user['created_at'])) $user['created_at']=pgdatetomy($user['created_at']);
if (!is_null($user['last_status_at'])) $user['last_status_at']=datetomy($user['last_status_at']);
$users[$user['id']]=$user;
} else {
eecho(2,'«'.$host.'»: user record missed some required keys :-('.N);
//print_r($user);
}
}
} else {
eecho(2,'«'.$host.'»: ... but the chunk was not good JSON :-('.N);
$end=true;
}
$chunk++;
} else {
eecho(2,'«'.$host.'»: could not fetch users info from directory API: '.$buf['emsg'].N);
$end=true;
}
}
foreach ($users as $locid=>$user) {
$query='SET InstID='.$instid.', host='.myv($link,$host).', locid='.myv($link,$user['id']).', username='.myv($link,truncs($user['username'], 'Users', 'username', '«'.$host.'»: «'.$user['username'].'»')).', display_name='.myv($link,truncs($user['display_name'], 'Users', 'display_name', '«'.$host.'»: «'.$user['username'].'»')).', locked='.myv($link,$user['locked']).', bot='.myv($link,$user['bot']).', created_at='.myv($link,$user['created_at']).', note='.myv($link,truncs($user['note'], 'Users', 'note', '«'.$host.'»: «'.$user['username'].'»')).', url='.myv($link,truncs($user['url'], 'Users', 'url', '«'.$host.'»: «'.$user['username'].'»')).', avatar='.myv($link,truncs($user['avatar'], 'Users', 'avatar', '«'.$host.'»: «'.$user['username'].'»')).', header='.myv($link,truncs($user['header'], 'Users', 'header', '«'.$host.'»: «'.$user['username'].'»')).', statuses_count='.myv($link,$user['statuses_count']).', last_status_at='.myv($link,$user['last_status_at']).', tags='.myv($link,truncs($user['tags'], 'Users', 'tags', '«'.$host.'»: «'.$user['username'].'»'));
$uid=0;
if (!array_key_exists($user['id'],$exusers)) {
if (!$user['noindex']) {
eecho(0,'«'.$host.'»: inserting new user «'.$user['username'].'»...'.N);
$query='INSERT INTO Users '.$query;
if (!$opts['dryrun']) {
mysqli_query($link,$query) or mexit(__LINE__.': '.mysqli_error($link).N,3);
$uid=mysqli_insert_id($link);
}
} else {
eecho(0,'«'.$host.'»: NOT inserting user «'.$user['username'].'» because they dont want to be indexed...'.N);
}
} else {
$uid=$exusers[$locid]['ID'];
if (!$user['noindex']) {
eecho(0,'«'.$host.'»: updating existing user «'.$user['username'].'» ('.$uid.')...'.N);
$query='UPDATE Users '.$query.' WHERE ID='.$uid;
} else {
eecho(0,'«'.$host.'»: deleting existing user «'.$user['username'].'» ('.$uid.') because they dont want to be indexed...'.N);
$query='DELETE FROM Users WHERE ID='.$uid;
}
if (!$opts['dryrun']) {
mysqli_query($link,$query) or mexit(__LINE__.': '.mysqli_error($link).N,3);
mysqli_query($link,'DELETE FROM UsersFields WHERE UserID='.$uid) or mexit(__LINE__.': '.mysqli_error($link).N,3);
}
}
if ($uid!=0 && !$user['noindex'] && is_array($user['fields']) && count($user['fields'])>0) {
eecho(0,'«'.$host.'»: saving user fields for user «'.$user['username'].'» ('.$uid.')...'.N);
foreach ($user['fields'] as $field) {
(is_null($field['verified_at'])) ? $field['verified_at']=0 : $field['verified_at']=1;
$field['name']=truncs($field['name'],'UsersFields','name','«'.$host.'»: «'.$user['username'].'»');
$field['value']=truncs($field['value'],'UsersFields','value','«'.$host.'»: «'.$user['username'].'»');
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO UsersFields SET UserID='.$uid.', name='.myv($link,$field['name']).', value='.myv($link,$field['value']).', verified='.$field['verified_at']) or mexit(__LINE__.': '.mysqli_error($link).N,3);
}
}
}
foreach ($exusers as $locid=>$exuser) {
if (!array_key_exists($locid,$users)) {
eecho(0,'«'.$host.'»: user «'.$exusers[$locid]['username'].'» opted out of the directory, deleting their record ('.$exuser['ID'].')...'.N);
if (!$opts['dryrun']) {
mysqli_query($link,'DELETE FROM Users WHERE ID='.$exuser['ID']) or mexit(__LINE__.': '.mysqli_error($link).N,3);
mysqli_query($link,'DELETE FROM UsersFields WHERE UserID='.$exuser['ID']) or mexit(__LINE__.': '.mysqli_error($link).N,3);
}
}
}
}
}
}
}
mysqli_close($link);
unset($link);
if ($opts['jsonwrite']) {
fwrite($jsonf,'"The end?": true'.N.'}'.N);
@ -1212,7 +1238,7 @@ unlink($instsjfp);
unlink($currinstjfp);
unlink($lockfp);
eecho(1,'Done :-)'.N);
eecho(1,'Done (in '.ght(time()-$tini,null,0).') :-)'.N);
exit(0);
@ -1244,7 +1270,7 @@ function datetomy($date) {
return(mktime(0,0,0,$date[1],$date[2],$date[0]));
}
function ckratelimit($httpresphead,$verbose=false) {
function ckratelimit($httpresphead) {
$headers=explode("\r\n",$httpresphead);
$buff=[];
array_shift($headers);
@ -1253,17 +1279,21 @@ function ckratelimit($httpresphead,$verbose=false) {
$buff[strtolower($matches[1])]=trim($matches[2]);
$headers=$buff;
if (array_key_exists('x-ratelimit-reset',$headers)) {
//Wed, 30 Mar 2022 21:27:22 GMT
$srvnow=strtotime($headers['date']);
//2022-03-31T04:05:00.058705Z
$srvrlr=strtotime($headers['x-ratelimit-reset']);
$stosl=$srvrlr-$srvnow+1;
if ($verbose) eecho(0,'ckratelimit: x-ratelimit-remaining: '.$headers['x-ratelimit-remaining'].'; $srvnow: '.gmdate('c',$srvnow).'; $srvrlr: '.gmdate('c',$srvrlr).'; current time to sleep: '.$stosl.'.'.N);
if ($headers['x-ratelimit-remaining']<3) {
eecho(2,'reached rate limit, sleeping for '.$stosl.' seconds ...'.N);
sleep($stosl);
if (array_key_exists('date',$headers)) {
//Wed, 30 Mar 2022 21:27:22 GMT
$srvnow=strtotime($headers['date']);
//2022-03-31T04:05:00.058705Z
$srvrlr=strtotime($headers['x-ratelimit-reset']);
$stosl=$srvrlr-$srvnow+1;
//echo('ckratelimit: x-ratelimit-remaining: '.$headers['x-ratelimit-remaining'].'; $srvnow: '.gmdate('c',$srvnow).'; $srvrlr: '.gmdate('c',$srvrlr).'; current time to sleep: '.$stosl.'.'.N);
if ($headers['x-ratelimit-remaining']<3) {
eecho(2,'reached rate limit, sleeping for '.$stosl.' seconds ...'.N);
sleep($stosl);
}
} else {
eecho(2,'ckratelimit: $httpresphead did not contain a «date» header!'.N);
}
} elseif ($verbose) {
} else {
eecho(2,'ckratelimit: $httpresphead did not contain an «x-ratelimit-reset» header!'.N);
}
}