Lots of changes :-))
This commit is contained in:
parent
882222bdb9
commit
b16515f4e8
1 changed files with 231 additions and 201 deletions
|
@ -25,9 +25,6 @@ use LanguageDetection\Language;
|
|||
|
||||
(strtoupper(substr(PHP_OS,0,3))==='WIN') ? $iswin=true : $iswin=false;
|
||||
|
||||
$link=false;
|
||||
$jsonf=false;
|
||||
|
||||
function eecho($lev,$msg) {
|
||||
$time=microtime(false);
|
||||
$time=explode(' ',$time);
|
||||
|
@ -42,9 +39,9 @@ function eecho($lev,$msg) {
|
|||
|
||||
function mexit($msg,$code) {
|
||||
global $link, $jsonf, $lockfp;
|
||||
if ($link) mysqli_close($link);
|
||||
if ($jsonf) fclose($jsonf);
|
||||
if (isset($lockfp) && file_exists($lockfp)) unlink($lockfp);
|
||||
if (isset($link)) mysqli_close($link);
|
||||
if (isset($jsonf)) fclose($jsonf);
|
||||
if (isset($lockfp) && is_file($lockfp)) unlink($lockfp);
|
||||
if ($code!=0)
|
||||
eecho(3,$msg);
|
||||
else
|
||||
|
@ -319,6 +316,7 @@ if (!$recover) {
|
|||
}
|
||||
}
|
||||
|
||||
unset($deadinsts);
|
||||
sort($insts);
|
||||
// shuffle($insts);
|
||||
eecho(1,count($insts).' instances to be checked.'.N);
|
||||
|
@ -626,6 +624,7 @@ if ($opts['jsonwrite']) {
|
|||
if ($mode[0]=='w')
|
||||
fwrite($jsonf,'{'.N);
|
||||
}
|
||||
|
||||
$tini=time();
|
||||
$cinsts=count($insts);
|
||||
$i=0;
|
||||
|
@ -658,29 +657,48 @@ while ($i<$cinsts) {
|
|||
$info=@json_decode($buf['cont'],true);
|
||||
if (is_array($info)) {
|
||||
eecho(1,'«'.$host.'»: got instance info from API :-)'.N);
|
||||
eecho(0,'«'.$host.'»: trying to fetch instance info from nodeinfo...'.N);
|
||||
$buf=@getfc('https://'.$host.'/nodeinfo/2.0.json',$opts['timeout']);
|
||||
eecho(0,'«'.$host.'»: trying to fetch nodeinfo specs on https...'.N);
|
||||
$buf=@getfc('https://'.$host.'/.well-known/nodeinfo',$opts['timeout']);
|
||||
if ($buf['cont']===false) {
|
||||
eecho(0,'«'.$host.'»: trying to fetch nodeinfo specs on http...'.N);
|
||||
$buf=@getfc('http://'.$host.'/.well-known/nodeinfo',$opts['timeout']);
|
||||
}
|
||||
if ($buf['cont']!==false) {
|
||||
//ckratelimit($buf['headers']);// no ckratelimit here because nodeinfo doesn't use it
|
||||
eecho(1,'«'.$host.'»: got instance info from nodeinfo :-)'.N);
|
||||
$info['x-nodeinfo']=json_decode($buf['cont'],true);
|
||||
// we should keep an eye to new software names here, to decide if they are mastodon derivates...
|
||||
if (isset($info['x-nodeinfo']['software']['name']) && !is_null($info['x-nodeinfo']['software']['name'])) {
|
||||
if (preg_match('/^mastodon|fedibird|ecko|hometown/',$info['x-nodeinfo']['software']['name'])===1)
|
||||
$ismast=true;
|
||||
$res=mysqli_query($link,'SELECT Name FROM Platforms WHERE Name=\''.myesc($link,$info['x-nodeinfo']['software']['name']).'\'')
|
||||
or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
if (mysqli_num_rows($res)<1) {
|
||||
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO Platforms (Name) VALUES (\''.myesc($link,truncs($info['x-nodeinfo']['software']['name'],'Platforms','Name','«'.$info['uri'].'»')).'\')')
|
||||
or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
notify('New software found: «'.$info['uri'].'» runs on «'.$info['x-nodeinfo']['software']['name'].'»; i added it to the table of known softwares. It would be good to check whether it is a Mastodon derivate and how compatible it is, to decide whether to consider instances using it as Mastodon instances.',2);
|
||||
$buf=@json_decode($buf['cont'],true);
|
||||
if (is_array($buf) && array_key_exists('links',$buf) && is_array($buf['links']) && count($buf['links'])>0) {
|
||||
$nirefs=[];
|
||||
foreach ($buf['links'] as $key=>$niref)
|
||||
if (isset($niref['rel']) && isset($niref['href']))
|
||||
$nirefs[$niref['rel']]=$niref['href'];
|
||||
else
|
||||
eecho(2,'«'.$host.'»: nodeinfo specs link '.$key.' has unexpected format.'.N);
|
||||
krsort($nirefs);
|
||||
$niref=array_shift($nirefs);
|
||||
eecho(0,'«'.$host.'»: got nodeinfo specs; trying to fetch nodeinfo...'.N);
|
||||
$buf=@getfc($niref,$opts['timeout']);
|
||||
if ($buf['cont']!==false) {
|
||||
$buf=@json_decode($buf['cont'],true);
|
||||
if (is_array($buf) && isset($buf['software']['name']) && isset($buf['software']['version'])) {
|
||||
$ninfo=$buf;
|
||||
if (preg_match('/^mastodon|fedibird|ecko|hometown/',$ninfo['software']['name'])===1)
|
||||
$ismast=true;
|
||||
$res=mysqli_query($link,'SELECT Name FROM Platforms WHERE Name=\''.myesc($link,$ninfo['software']['name']).'\'')
|
||||
or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
if (mysqli_num_rows($res)<1) {
|
||||
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO Platforms (Name) VALUES (\''.myesc($link,truncs($ninfo['software']['name'],'Platforms','Name','«'.$host.'»')).'\')')
|
||||
or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
notify('New software found: «'.$host.'» runs on «'.$ninfo['software']['name'].'»; i added it to the table of known softwares. It would be good to check whether it is a Mastodon derivate and how compatible it is, to decide whether to consider instances using it as Mastodon instances.',2);
|
||||
}
|
||||
} else {
|
||||
eecho(2,'«'.$host.'»: nodeinfo was not good json or json had unexpected format.'.N);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
eecho(2,'«'.$host.'»: nodeinfo specs where not good json or json had unexpected format.'.N);
|
||||
}
|
||||
} else {
|
||||
eecho(2,'«'.$host.'»: could not fetch instance info from nodeinfo: '.$buf['emsg'].N);
|
||||
}
|
||||
if (array_key_exists('version',$info)) {
|
||||
eecho(1,'«'.$host.'» software version is «'.$info['version'].'».');
|
||||
eecho(1,'«'.$host.'» software version is «'.$info['version'].'».'.N);
|
||||
if ($info['version']>='2.1.2') {
|
||||
eecho(0,'«'.$host.'»: trying to fetch instance activity info from API...'.N);
|
||||
$buf=@getfc('https://'.$host.'/api/v1/instance/activity',$opts['timeout']);
|
||||
|
@ -702,140 +720,6 @@ while ($i<$cinsts) {
|
|||
} else {
|
||||
eecho(2,'«'.$host.'»: could not fetch instance trends from API: '.$buf['emsg'].N);
|
||||
}
|
||||
if ($opts['fetchusers']) {
|
||||
$exusers=[];// array of this instance's users already existing in the db
|
||||
$res=mysqli_query($link,'SELECT ID, locid, username FROM Users WHERE host=\''.myesc($link,$host).'\'') or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
while ($row=mysqli_fetch_assoc($res)) $exusers[$row['locid']]=$row;
|
||||
$users=[];// array of users in this instance's directory
|
||||
eecho(0,'«'.$host.'»: trying to fetch users info from directory API...'.N);
|
||||
$chunk=0;
|
||||
$limit=80;
|
||||
$end=false;
|
||||
while (!$end) {
|
||||
$offset=$chunk*$limit;
|
||||
$buf=@getfc('https://'.$host.'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['timeout']);
|
||||
if ($buf['cont']!==false) {
|
||||
ckratelimit($buf['headers']);
|
||||
eecho(1,'«'.$host.'»: got '.($chunk+1).' chunk(s) of users info from directory API :-)'.N);
|
||||
$buf=@json_decode($buf['cont'],true);
|
||||
if (is_array($buf)) {
|
||||
//print_r($buf);
|
||||
if (count($buf)<$limit) $end=true;
|
||||
if (count($buf)>0 && !array_key_exists('noindex',$buf[0])) {
|
||||
eecho(2,'«'.$host.'»: account entities reported by directory api endpoint don’t have a “noindex” attribute; skipping directory fetching.'.N);
|
||||
break;
|
||||
} else {
|
||||
eecho(0,'«'.$host.'»: account entities reported by directory api endpoint do have a “noindex” attribute; continuing with directory fetching.'.N);
|
||||
}
|
||||
//foreach ($buf as $user) echo($user['username'].' '); echo(N.N);
|
||||
foreach ($buf as $user) {
|
||||
if (make(['id', 'username', 'display_name', 'locked', 'bot', 'discoverable', 'created_at', 'note', 'url', 'avatar', 'header', 'statuses_count', 'last_status_at', 'fields'], $user)) {
|
||||
eecho(0,'«'.$host.'» ('.$i.'/'.$cinsts.'): working on user «'.$user['username'].'»...'.N);
|
||||
if (!isset($user['noindex'])) {
|
||||
$user['noindex']=true;
|
||||
if (0==1) { // disabled; takes too long on instances with many users; see also the if statement before this foreach
|
||||
eecho(0,'«'.$host.'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it by fetching user’s profile page...'.N);
|
||||
$page=getfc($user['url'],$opts['timeout']);
|
||||
// here ckratelimit is not needed because it's a normal web page, not json from mastodon api
|
||||
if ($page['cont']!==false) {
|
||||
//<meta content='noindex, noarchive' name='robots'>
|
||||
if (preg_match('/<meta\s+content=[\'"](noindex|noarchive)/ui',$page['cont'])!==1) {
|
||||
$user['noindex']=false;
|
||||
eecho(0,'«'.$user['url'].'»: «noindex» is not set.'.N);
|
||||
} else {
|
||||
eecho(0,'«'.$user['url'].'»: «noindex» is set.'.N);
|
||||
}
|
||||
} else {
|
||||
eecho(2,'«'.$host.'»: could not fetch «'.$user['url'].'»: '.$page['emsg'].N);
|
||||
}
|
||||
}
|
||||
}
|
||||
$snote=strip_tags($user['note']);
|
||||
if (preg_match('/(?<!\w)#(nobots?|noindex)(?!\w)/iu',$snote)===1) $user['noindex']=true;
|
||||
if (preg_match('/(?<!\w)#(okindex|yesindex|doindex)(?!\w)/iu',$snote)===1) $user['noindex']=false;
|
||||
$user['tags']=[];
|
||||
if (0==1 && !$user['noindex'] && $info['version']>='3.3.0') {// disabled; takes too long on instances with many users
|
||||
eecho(0,'«'.$host.'»: trying to fetch tags for user «'.$user['username'].'»...'.N);
|
||||
$tags=@getfc('https://'.$host.'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['timeout']);
|
||||
if ($tags['cont']!==false) {
|
||||
ckratelimit($tags['headers']);
|
||||
$tags=@json_decode($tags['cont'],true);
|
||||
if (is_array($tags) && count($tags)>0) {
|
||||
eecho(1,'«'.$host.'»: got '.count($tags).' tag(s) for user «'.$user['username'].'» :-)'.N);
|
||||
foreach($tags as $tag) $user['tags'][]=$tag['name'];
|
||||
}
|
||||
} else {
|
||||
eecho(2,'«'.$host.'»: could not fetch tags for user «'.$user['username'].'» :-( ('.$tags['emsg'].').'.N);
|
||||
}
|
||||
}
|
||||
$user['tags']=implode(';',$user['tags']);
|
||||
if ($user['tags']=='') $user['tags']=null;
|
||||
if (!is_null($user['created_at'])) $user['created_at']=pgdatetomy($user['created_at']);
|
||||
if (!is_null($user['last_status_at'])) $user['last_status_at']=datetomy($user['last_status_at']);
|
||||
$users[$user['id']]=$user;
|
||||
} else {
|
||||
eecho(2,'«'.$host.'»: user record missed some required keys :-('.N);
|
||||
//print_r($user);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
eecho(2,'«'.$host.'»: ... but the chunk was not good JSON :-('.N);
|
||||
$end=true;
|
||||
}
|
||||
$chunk++;
|
||||
} else {
|
||||
eecho(2,'«'.$host.'»: could not fetch users info from directory API: '.$buf['emsg'].N);
|
||||
$end=true;
|
||||
}
|
||||
}
|
||||
foreach ($users as $locid=>$user) {
|
||||
$query='SET host='.myv($link,$host).', locid='.myv($link,$user['id']).', username='.myv($link,truncs($user['username'], 'Users', 'username', '«'.$host.'»: «'.$user['username'].'»')).', display_name='.myv($link,truncs($user['display_name'], 'Users', 'display_name', '«'.$host.'»: «'.$user['username'].'»')).', locked='.myv($link,$user['locked']).', bot='.myv($link,$user['bot']).', discoverable='.myv($link,$user['discoverable']).', created_at='.myv($link,$user['created_at']).', note='.myv($link,truncs($user['note'], 'Users', 'note', '«'.$host.'»: «'.$user['username'].'»')).', url='.myv($link,truncs($user['url'], 'Users', 'url', '«'.$host.'»: «'.$user['username'].'»')).', avatar='.myv($link,truncs($user['avatar'], 'Users', 'avatar', '«'.$host.'»: «'.$user['username'].'»')).', header='.myv($link,truncs($user['header'], 'Users', 'header', '«'.$host.'»: «'.$user['username'].'»')).', statuses_count='.myv($link,$user['statuses_count']).', last_status_at='.myv($link,$user['last_status_at']).', tags='.myv($link,truncs($user['tags'], 'Users', 'tags', '«'.$host.'»: «'.$user['username'].'»'));
|
||||
$uid=0;
|
||||
if (!array_key_exists($user['id'],$exusers)) {
|
||||
if (!$user['noindex']) {
|
||||
eecho(0,'«'.$host.'»: inserting new user «'.$user['username'].'»...'.N);
|
||||
$query='INSERT INTO Users '.$query;
|
||||
if (!$opts['dryrun']) {
|
||||
mysqli_query($link,$query) or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
$uid=mysqli_insert_id($link);
|
||||
}
|
||||
} else {
|
||||
eecho(0,'«'.$host.'»: NOT inserting user «'.$user['username'].'» because they set noindex...'.N);
|
||||
}
|
||||
} else {
|
||||
$uid=$exusers[$locid]['ID'];
|
||||
if (!$user['noindex']) {
|
||||
eecho(0,'«'.$host.'»: updating existing user «'.$user['username'].'» ('.$uid.')...'.N);
|
||||
$query='UPDATE Users '.$query.' WHERE ID='.$uid;
|
||||
} else {
|
||||
eecho(0,'«'.$host.'»: deleting existing user «'.$user['username'].'» ('.$uid.') because they set noindex...'.N);
|
||||
$query='DELETE FROM Users WHERE ID='.$uid;
|
||||
}
|
||||
if (!$opts['dryrun']) {
|
||||
mysqli_query($link,$query) or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
mysqli_query($link,'DELETE FROM UsersFields WHERE UserID='.$uid) or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
}
|
||||
}
|
||||
if ($uid!=0 && !$user['noindex'] && is_array($user['fields']) && count($user['fields'])>0) {
|
||||
eecho(0,'«'.$host.'»: saving user fields for user «'.$user['username'].'» ('.$uid.')...'.N);
|
||||
foreach ($user['fields'] as $field) {
|
||||
(is_null($field['verified_at'])) ? $field['verified_at']=0 : $field['verified_at']=1;
|
||||
$field['name']=truncs($field['name'],'UsersFields','name','«'.$host.'»: «'.$user['username'].'»');
|
||||
$field['value']=truncs($field['value'],'UsersFields','value','«'.$host.'»: «'.$user['username'].'»');
|
||||
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO UsersFields SET UserID='.$uid.', name='.myv($link,$field['name']).', value='.myv($link,$field['value']).', verified='.$field['verified_at']) or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
}
|
||||
}
|
||||
}
|
||||
foreach ($exusers as $locid=>$exuser) {
|
||||
if (!array_key_exists($locid,$users)) {
|
||||
eecho(0,'«'.$host.'»: user «'.$exusers[$locid]['username'].'» opted out of the directory, deleting their record ('.$exuser['ID'].')...'.N);
|
||||
if (!$opts['dryrun']) {
|
||||
mysqli_query($link,'DELETE FROM Users WHERE ID='.$exuser['ID']) or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
mysqli_query($link,'DELETE FROM UsersFields WHERE UserID='.$exuser['ID']) or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -859,49 +743,48 @@ while ($i<$cinsts) {
|
|||
or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
$nrows=mysqli_num_rows($res);
|
||||
if ($nrows==1) {
|
||||
eecho(1,'«'.$host.'»: didn’t respond, but it is present in the database; updating InstChecks, Instances.LastCheckOk and possibly Instances.New and Instances.Dead.'.N);
|
||||
eecho(1,'«'.$host.'»: didn’t respond, but it is present in the database; updating InstChecks, Instances.LastCheckOk and possibly Instances.New=0 and Instances.Dead=1.'.N);
|
||||
$row=mysqli_fetch_assoc($res);
|
||||
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$row['ID'].', '.$now.', 0)')
|
||||
or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
if (!$opts['dryrun']) mysqli_query($link,'UPDATE Instances SET LastCheckOk=0 WHERE ID='.$row['ID'])
|
||||
$instid=$row['ID'];
|
||||
if (!$opts['dryrun']) mysqli_query($link,'UPDATE Instances SET LastCheckOk=0 WHERE ID='.$instid)
|
||||
or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
if ($row['New']==1 && !is_null($row['FirstSeen']) && $now-$row['FirstSeen']>$opts['oldline']) {
|
||||
notify('Instance «<a href="viewinst.php?id='.$row['ID'].'">'.$row['URI'].'</a>» is no longer new.',2);
|
||||
if (!$opts['dryrun']) mysqli_query($link,'UPDATE Instances SET New=0 WHERE ID='.$row['ID'])
|
||||
notify('Instance «<a href="viewinst.php?id='.$instid.'">'.$row['URI'].'</a>» is no longer new.',2);
|
||||
if (!$opts['dryrun']) mysqli_query($link,'UPDATE Instances SET New=0 WHERE ID='.$instid)
|
||||
or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
}
|
||||
|
||||
// we check the last time instance responded, if ever
|
||||
$rres=mysqli_query($link,'SELECT Time FROM InstChecks WHERE InstID='.$row['ID'].' AND Status=1 ORDER BY Time DESC LIMIT 1') or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
$rres=mysqli_query($link,'SELECT Time FROM InstChecks WHERE InstID='.$instid.' AND Status=1 ORDER BY Time DESC LIMIT 1') or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
// if instance never responded we consider the time of first check
|
||||
if (mysqli_num_rows($rres)==0) {
|
||||
$rres=mysqli_query($link,'SELECT Time FROM InstChecks WHERE InstID='.$row['ID'].' AND Status=0 ORDER BY Time ASC LIMIT 1') or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
$rres=mysqli_query($link,'SELECT Time FROM InstChecks WHERE InstID='.$instid.' AND Status=0 ORDER BY Time ASC LIMIT 1') or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
}
|
||||
if (mysqli_num_rows($rres)>0) {
|
||||
$rrow=mysqli_fetch_assoc($rres);
|
||||
if ($now-$rrow['Time']>$opts['deadline']) {
|
||||
if (!$opts['dryrun']) mysqli_query($link,'UPDATE Instances SET Dead=1 WHERE ID='.$row['ID'])
|
||||
if (!$opts['dryrun']) mysqli_query($link,'UPDATE Instances SET Dead=1 WHERE ID='.$instid)
|
||||
or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
notify('Instance «<a href="viewinst.php?id='.$row['ID'].'">'.$row['URI'].'</a>» is dead!',2);
|
||||
notify('Instance «<a href="viewinst.php?id='.$instid.'">'.$row['URI'].'</a>» is dead!',2);
|
||||
}
|
||||
} else {
|
||||
eecho(2,'«'.$host.'»: exists in the database but there’s no data about it in InstChecks! I’ll remedy.'.N);
|
||||
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO InstChecks SET InstID='.$row['ID'].', Time='.$now.', Status=0')
|
||||
or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
eecho(2,'«'.$host.'»: exists in the database but there’s no data about it in InstChecks!'.N);
|
||||
}
|
||||
} elseif ($nrows==0) {
|
||||
eecho(1,'«'.$host.'»: doesn’t respond and is not in the database, adding it.'.N);
|
||||
// "New=0" and "FirstSeen=NULL" because it's not new and not seen until it responds for the first time
|
||||
if (!$opts['dryrun']) {
|
||||
mysqli_query($link,'INSERT INTO Instances SET FirstSeen=NULL, New=0, Good=0, Chosen=0, Visible=0, Noxious=0, URI=\''.myesc($link,$host).'\', LastCheckOk=0') or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
mysqli_query($link,'INSERT INTO Instances SET FirstSeen=NULL, New=0, Good=0, Chosen=0, Visible=0, Noxious=0, URI=\''.myesc($link,$host).'\', LastCheckOk=0, InsertTS='.$now) or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
$instid=mysqli_insert_id($link);
|
||||
mysqli_query($link,'INSERT INTO InstChecks SET InstID='.$instid.', Time='.$now.', Status=0') or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
} else {
|
||||
$instid=0;
|
||||
}
|
||||
} else {
|
||||
notify('Instance «'.$row['URI'].'» has «'.$nrows.'» entries in «Instances» table!',3);
|
||||
notify('Instance «'.$host.'» has '.$nrows.' entries in «Instances» table!',3);
|
||||
}
|
||||
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$instid.', '.$now.', 0)')
|
||||
or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
} else {
|
||||
|
||||
// instance responded
|
||||
|
@ -924,7 +807,7 @@ while ($i<$cinsts) {
|
|||
if (!is_null($ismast))
|
||||
($ismast) ? $ismast=1 : $ismast=0;
|
||||
$instrow=array('ID'=>null, 'FirstSeen'=>null, 'IsMastodon'=>$ismast, 'Dead'=>0, 'New'=>0, 'Good'=>0, 'Chosen'=>0, 'Priority'=>null, 'Visible'=>0, 'Noxious'=>0, 'NoxReason'=>null, 'NoxLastModTS'=>null, 'URI'=>null, 'Title'=>null, 'ShortDesc'=>null, 'LongDesc'=>null, 'OurDesc'=>null, 'OurDescEN'=> null, 'LocalityID'=>null, 'OurLangsLock'=>0, 'Email'=>null, 'Software'=>null, 'Version'=>null, 'UserCount'=>null, 'StatusCount'=>null, 'DomainCount'=>null, 'ActiveUsersMonth'=>null, 'ActiveUsersHalfYear'=>null, 'Thumb'=>null, 'RegOpen'=>null, 'RegReqApproval'=>null, 'MaxTootChars'=>null, 'AdmAccount'=>null, 'AdmDisplayName'=>null, 'AdmCreatedAt'=>null, 'AdmNote'=>null, 'AdmURL'=>null, 'AdmAvatar'=>null, 'AdmHeader'=>null, 'LastCheckOk'=>1, 'GuestID'=>null, 'LastGuestEdit'=>null);
|
||||
$instrow['URI']=$info['uri'];
|
||||
$instrow['URI']=$host;
|
||||
if (akeavinn('title',$info))
|
||||
$instrow['Title']=nempty(truncs($info['title'],'Instances','Title','«'.$instrow['URI'].'»'));
|
||||
if (akeavinn('short_description',$info))
|
||||
|
@ -1103,7 +986,7 @@ while ($i<$cinsts) {
|
|||
}
|
||||
|
||||
} elseif ($nrows==0) {
|
||||
eecho(1,'«'.$info['uri'].'» is not present in the database, adding it...'.N);
|
||||
eecho(1,'«'.$host.'» is not present in the database, adding it...'.N);
|
||||
$instrow['FirstSeen']=$now;
|
||||
if ($opts['setnew'])
|
||||
$instrow['New']=1;
|
||||
|
@ -1117,7 +1000,7 @@ while ($i<$cinsts) {
|
|||
$values.='NULL, ';
|
||||
}
|
||||
$values=substr($values,0,-2);
|
||||
$query='INSERT INTO Instances ('.implode(', ',$fields).') VALUES ('.$values.')';
|
||||
$query='INSERT INTO Instances ('.implode(', ',$fields).', InsertTS) VALUES ('.$values.', '.$now.')';
|
||||
eecho(1,'«'.$host.'»: insert query: «'.$query.'»'.N);
|
||||
if (!$opts['dryrun']) {
|
||||
mysqli_query($link,$query) or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
|
@ -1125,13 +1008,15 @@ while ($i<$cinsts) {
|
|||
} else {
|
||||
$instid=0;
|
||||
}
|
||||
if ($opts['setnew'])
|
||||
if ($opts['setnew'] && !$opts['dryrun'])
|
||||
notify('New instance found: «<a href="viewinst.php?id='.$instid.'">'.$instrow['URI'].'</a>».',1);
|
||||
|
||||
$instlangs=langs($instid, $instrow['URI'], false);
|
||||
foreach ($instlangs as $row) {
|
||||
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')')
|
||||
or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
if (!$opts['dryrun']) {
|
||||
foreach ($instlangs as $row) {
|
||||
mysqli_query($link,'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')')
|
||||
or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
}
|
||||
}
|
||||
|
||||
$instourlangs=langs($instid, $instrow['URI'], true);
|
||||
|
@ -1149,18 +1034,19 @@ while ($i<$cinsts) {
|
|||
notify('New instance «<a href="viewinst.php?id='.$instid.'">'.$instrow['URI'].'</a>» is suitable!',1);
|
||||
|
||||
} else {
|
||||
notify('Instance «'.$row['URI'].'» has «'.$nrows.'» entries in «Instances» table!',3);
|
||||
notify('Instance «'.$host.'» has '.$nrows.' entries in «Instances» table!',3);
|
||||
}
|
||||
|
||||
if (array_key_exists('x-activity',$info) && is_array($info['x-activity'])) {
|
||||
if (!$opts['dryrun']) mysqli_query($link,'DELETE FROM InstActivity WHERE InstID='.$instid);
|
||||
$pos=0;
|
||||
foreach ($info['x-activity'] as $buf) {
|
||||
if (akeavinn('week',$buf) && akeavinn('statuses',$buf) && akeavinn('logins',$buf) && akeavinn('registrations',$buf)) {
|
||||
$pos++;
|
||||
$query='INSERT INTO InstActivity (InstID, Week, Statuses, Logins, Registrations, Pos) VALUES (\''.$instid.'\', \''.myesc($link,$buf['week']).'\', \''.myesc($link,$buf['statuses']).'\', \''.myesc($link,$buf['logins']).'\', \''.myesc($link,$buf['registrations']).'\', '.$pos.')';
|
||||
if (!$opts['dryrun']) mysqli_query($link,$query)
|
||||
or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
if (!$opts['dryrun']) {
|
||||
mysqli_query($link,'DELETE FROM InstActivity WHERE InstID='.$instid);
|
||||
$pos=0;
|
||||
foreach ($info['x-activity'] as $buf) {
|
||||
if (akeavinn('week',$buf) && akeavinn('statuses',$buf) && akeavinn('logins',$buf) && akeavinn('registrations',$buf)) {
|
||||
$pos++;
|
||||
$query='INSERT INTO InstActivity (InstID, Week, Statuses, Logins, Registrations, Pos) VALUES (\''.$instid.'\', \''.myesc($link,$buf['week']).'\', \''.myesc($link,$buf['statuses']).'\', \''.myesc($link,$buf['logins']).'\', \''.myesc($link,$buf['registrations']).'\', '.$pos.')';
|
||||
mysqli_query($link,$query) or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1197,11 +1083,151 @@ while ($i<$cinsts) {
|
|||
}
|
||||
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$instid.', '.$now.', 1)')
|
||||
or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
|
||||
if ($opts['fetchusers'] && $ismast && array_key_exists('version',$info) && $info['version']>='4.0.0') {
|
||||
eecho(0,'«'.$host.'»: trying to fetch users info from directory API...'.N);
|
||||
$exusers=[];// array of this instance's users already existing in the db
|
||||
$res=mysqli_query($link,'SELECT ID, locid, username FROM Users WHERE InstID='.$instid) or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
while ($row=mysqli_fetch_assoc($res)) $exusers[$row['locid']]=$row;
|
||||
$users=[];// array of users in this instance's directory
|
||||
$chunk=0;
|
||||
$limit=80;
|
||||
$end=false;
|
||||
while (!$end) {
|
||||
$offset=$chunk*$limit;
|
||||
$buf=@getfc('https://'.$host.'/api/v1/directory?local=1&order=new&limit='.$limit.'&offset='.$offset,$opts['timeout']);
|
||||
if ($buf['cont']!==false) {
|
||||
ckratelimit($buf['headers']);
|
||||
eecho(1,'«'.$host.'»: got '.($chunk+1).' chunk(s) of users info from directory API :-)'.N);
|
||||
$buf=@json_decode($buf['cont'],true);
|
||||
if (is_array($buf)) {
|
||||
//print_r($buf);
|
||||
if (count($buf)<$limit) $end=true;
|
||||
/*if (count($buf)>0 && !array_key_exists('noindex',$buf[0])) {
|
||||
eecho(2,'«'.$host.'»: account entities reported by directory api endpoint don’t have a “noindex” attribute; skipping directory fetching.'.N);
|
||||
break;
|
||||
} else {
|
||||
eecho(0,'«'.$host.'»: account entities reported by directory api endpoint do have a “noindex” attribute; continuing with directory fetching.'.N);
|
||||
}*/
|
||||
//foreach ($buf as $user) echo($user['username'].' '); echo(N.N);
|
||||
foreach ($buf as $user) {
|
||||
if (make(['id', 'username', 'display_name', 'locked', 'bot', 'discoverable', 'created_at', 'note', 'url', 'avatar', 'header', 'statuses_count', 'last_status_at', 'fields', 'noindex'], $user)) {
|
||||
eecho(0,'«'.$host.'» ('.$i.'/'.$cinsts.'): working on user «'.$user['username'].'»...'.N);
|
||||
// disabled because it takes too long on instances with many users
|
||||
/*if (!isset($user['noindex'])) {
|
||||
$user['noindex']=true;
|
||||
eecho(0,'«'.$host.'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it by fetching user’s profile page...'.N);
|
||||
$page=getfc($user['url'],$opts['timeout']);
|
||||
// here ckratelimit is not needed because it's a normal web page, not json from mastodon api
|
||||
if ($page['cont']!==false) {
|
||||
//<meta content='noindex, noarchive' name='robots'>
|
||||
if (preg_match('/<meta\s+content=[\'"](noindex|noarchive)/ui',$page['cont'])!==1) {
|
||||
$user['noindex']=false;
|
||||
eecho(0,'«'.$user['url'].'»: «noindex» is not set.'.N);
|
||||
} else {
|
||||
eecho(0,'«'.$user['url'].'»: «noindex» is set.'.N);
|
||||
}
|
||||
} else {
|
||||
eecho(2,'«'.$host.'»: could not fetch «'.$user['url'].'»: '.$page['emsg'].N);
|
||||
}
|
||||
}*/
|
||||
$snote=strip_tags($user['note']);
|
||||
if (preg_match('/(?<!\w)#(nobots?|noindex)(?!\w)/iu',$snote)===1) $user['noindex']=true;
|
||||
if (preg_match('/(?<!\w)#(okindex|yesindex|doindex|okmhindex)(?!\w)/iu',$snote)===1) $user['noindex']=false;
|
||||
// disabled; takes too long on instances with many users
|
||||
/*$user['tags']=[];
|
||||
if (!$user['noindex'] && $info['version']>='3.3.0') {
|
||||
eecho(0,'«'.$host.'»: trying to fetch tags for user «'.$user['username'].'»...'.N);
|
||||
$tags=@getfc('https://'.$host.'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['timeout']);
|
||||
if ($tags['cont']!==false) {
|
||||
ckratelimit($tags['headers']);
|
||||
$tags=@json_decode($tags['cont'],true);
|
||||
if (is_array($tags) && count($tags)>0) {
|
||||
eecho(1,'«'.$host.'»: got '.count($tags).' tag(s) for user «'.$user['username'].'» :-)'.N);
|
||||
foreach($tags as $tag) $user['tags'][]=$tag['name'];
|
||||
}
|
||||
} else {
|
||||
eecho(2,'«'.$host.'»: could not fetch tags for user «'.$user['username'].'» :-( ('.$tags['emsg'].').'.N);
|
||||
}
|
||||
}
|
||||
$user['tags']=implode(';',$user['tags']);
|
||||
if ($user['tags']=='') $user['tags']=null;*/
|
||||
$user['tags']=null;
|
||||
if (!is_null($user['created_at'])) $user['created_at']=pgdatetomy($user['created_at']);
|
||||
if (!is_null($user['last_status_at'])) $user['last_status_at']=datetomy($user['last_status_at']);
|
||||
$users[$user['id']]=$user;
|
||||
} else {
|
||||
eecho(2,'«'.$host.'»: user record missed some required keys :-('.N);
|
||||
//print_r($user);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
eecho(2,'«'.$host.'»: ... but the chunk was not good JSON :-('.N);
|
||||
$end=true;
|
||||
}
|
||||
$chunk++;
|
||||
} else {
|
||||
eecho(2,'«'.$host.'»: could not fetch users info from directory API: '.$buf['emsg'].N);
|
||||
$end=true;
|
||||
}
|
||||
}
|
||||
foreach ($users as $locid=>$user) {
|
||||
$query='SET InstID='.$instid.', host='.myv($link,$host).', locid='.myv($link,$user['id']).', username='.myv($link,truncs($user['username'], 'Users', 'username', '«'.$host.'»: «'.$user['username'].'»')).', display_name='.myv($link,truncs($user['display_name'], 'Users', 'display_name', '«'.$host.'»: «'.$user['username'].'»')).', locked='.myv($link,$user['locked']).', bot='.myv($link,$user['bot']).', created_at='.myv($link,$user['created_at']).', note='.myv($link,truncs($user['note'], 'Users', 'note', '«'.$host.'»: «'.$user['username'].'»')).', url='.myv($link,truncs($user['url'], 'Users', 'url', '«'.$host.'»: «'.$user['username'].'»')).', avatar='.myv($link,truncs($user['avatar'], 'Users', 'avatar', '«'.$host.'»: «'.$user['username'].'»')).', header='.myv($link,truncs($user['header'], 'Users', 'header', '«'.$host.'»: «'.$user['username'].'»')).', statuses_count='.myv($link,$user['statuses_count']).', last_status_at='.myv($link,$user['last_status_at']).', tags='.myv($link,truncs($user['tags'], 'Users', 'tags', '«'.$host.'»: «'.$user['username'].'»'));
|
||||
$uid=0;
|
||||
if (!array_key_exists($user['id'],$exusers)) {
|
||||
if (!$user['noindex']) {
|
||||
eecho(0,'«'.$host.'»: inserting new user «'.$user['username'].'»...'.N);
|
||||
$query='INSERT INTO Users '.$query;
|
||||
if (!$opts['dryrun']) {
|
||||
mysqli_query($link,$query) or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
$uid=mysqli_insert_id($link);
|
||||
}
|
||||
} else {
|
||||
eecho(0,'«'.$host.'»: NOT inserting user «'.$user['username'].'» because they don’t want to be indexed...'.N);
|
||||
}
|
||||
} else {
|
||||
$uid=$exusers[$locid]['ID'];
|
||||
if (!$user['noindex']) {
|
||||
eecho(0,'«'.$host.'»: updating existing user «'.$user['username'].'» ('.$uid.')...'.N);
|
||||
$query='UPDATE Users '.$query.' WHERE ID='.$uid;
|
||||
} else {
|
||||
eecho(0,'«'.$host.'»: deleting existing user «'.$user['username'].'» ('.$uid.') because they don’t want to be indexed...'.N);
|
||||
$query='DELETE FROM Users WHERE ID='.$uid;
|
||||
}
|
||||
if (!$opts['dryrun']) {
|
||||
mysqli_query($link,$query) or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
mysqli_query($link,'DELETE FROM UsersFields WHERE UserID='.$uid) or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
}
|
||||
}
|
||||
if ($uid!=0 && !$user['noindex'] && is_array($user['fields']) && count($user['fields'])>0) {
|
||||
eecho(0,'«'.$host.'»: saving user fields for user «'.$user['username'].'» ('.$uid.')...'.N);
|
||||
foreach ($user['fields'] as $field) {
|
||||
(is_null($field['verified_at'])) ? $field['verified_at']=0 : $field['verified_at']=1;
|
||||
$field['name']=truncs($field['name'],'UsersFields','name','«'.$host.'»: «'.$user['username'].'»');
|
||||
$field['value']=truncs($field['value'],'UsersFields','value','«'.$host.'»: «'.$user['username'].'»');
|
||||
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO UsersFields SET UserID='.$uid.', name='.myv($link,$field['name']).', value='.myv($link,$field['value']).', verified='.$field['verified_at']) or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
}
|
||||
}
|
||||
}
|
||||
foreach ($exusers as $locid=>$exuser) {
|
||||
if (!array_key_exists($locid,$users)) {
|
||||
eecho(0,'«'.$host.'»: user «'.$exusers[$locid]['username'].'» opted out of the directory, deleting their record ('.$exuser['ID'].')...'.N);
|
||||
if (!$opts['dryrun']) {
|
||||
mysqli_query($link,'DELETE FROM Users WHERE ID='.$exuser['ID']) or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
mysqli_query($link,'DELETE FROM UsersFields WHERE UserID='.$exuser['ID']) or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mysqli_close($link);
|
||||
unset($link);
|
||||
|
||||
if ($opts['jsonwrite']) {
|
||||
fwrite($jsonf,'"The end?": true'.N.'}'.N);
|
||||
|
@ -1212,7 +1238,7 @@ unlink($instsjfp);
|
|||
unlink($currinstjfp);
|
||||
unlink($lockfp);
|
||||
|
||||
eecho(1,'Done :-)'.N);
|
||||
eecho(1,'Done (in '.ght(time()-$tini,null,0).') :-)'.N);
|
||||
|
||||
exit(0);
|
||||
|
||||
|
@ -1244,7 +1270,7 @@ function datetomy($date) {
|
|||
return(mktime(0,0,0,$date[1],$date[2],$date[0]));
|
||||
}
|
||||
|
||||
function ckratelimit($httpresphead,$verbose=false) {
|
||||
function ckratelimit($httpresphead) {
|
||||
$headers=explode("\r\n",$httpresphead);
|
||||
$buff=[];
|
||||
array_shift($headers);
|
||||
|
@ -1253,17 +1279,21 @@ function ckratelimit($httpresphead,$verbose=false) {
|
|||
$buff[strtolower($matches[1])]=trim($matches[2]);
|
||||
$headers=$buff;
|
||||
if (array_key_exists('x-ratelimit-reset',$headers)) {
|
||||
//Wed, 30 Mar 2022 21:27:22 GMT
|
||||
$srvnow=strtotime($headers['date']);
|
||||
//2022-03-31T04:05:00.058705Z
|
||||
$srvrlr=strtotime($headers['x-ratelimit-reset']);
|
||||
$stosl=$srvrlr-$srvnow+1;
|
||||
if ($verbose) eecho(0,'ckratelimit: x-ratelimit-remaining: '.$headers['x-ratelimit-remaining'].'; $srvnow: '.gmdate('c',$srvnow).'; $srvrlr: '.gmdate('c',$srvrlr).'; current time to sleep: '.$stosl.'.'.N);
|
||||
if ($headers['x-ratelimit-remaining']<3) {
|
||||
eecho(2,'reached rate limit, sleeping for '.$stosl.' seconds ...'.N);
|
||||
sleep($stosl);
|
||||
if (array_key_exists('date',$headers)) {
|
||||
//Wed, 30 Mar 2022 21:27:22 GMT
|
||||
$srvnow=strtotime($headers['date']);
|
||||
//2022-03-31T04:05:00.058705Z
|
||||
$srvrlr=strtotime($headers['x-ratelimit-reset']);
|
||||
$stosl=$srvrlr-$srvnow+1;
|
||||
//echo('ckratelimit: x-ratelimit-remaining: '.$headers['x-ratelimit-remaining'].'; $srvnow: '.gmdate('c',$srvnow).'; $srvrlr: '.gmdate('c',$srvrlr).'; current time to sleep: '.$stosl.'.'.N);
|
||||
if ($headers['x-ratelimit-remaining']<3) {
|
||||
eecho(2,'reached rate limit, sleeping for '.$stosl.' seconds ...'.N);
|
||||
sleep($stosl);
|
||||
}
|
||||
} else {
|
||||
eecho(2,'ckratelimit: $httpresphead did not contain a «date» header!'.N);
|
||||
}
|
||||
} elseif ($verbose) {
|
||||
} else {
|
||||
eecho(2,'ckratelimit: $httpresphead did not contain an «x-ratelimit-reset» header!'.N);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue