Disabled fetching profile’s page when “noindex” is not set in account because it takes too long; disabled featured tags fetching fro the same reason; other minor changes
This commit is contained in:
parent
f343cb702e
commit
61ad655a62
1 changed files with 27 additions and 18 deletions
|
@ -680,6 +680,7 @@ while ($i<$cinsts) {
|
|||
eecho(2,'«'.$host.'»: could not fetch instance info from nodeinfo: '.$buf['emsg'].N);
|
||||
}
|
||||
if (array_key_exists('version',$info)) {
|
||||
eecho(1,'«'.$host.'» software version is «'.$info['version'].'».');
|
||||
if ($info['version']>='2.1.2') {
|
||||
eecho(0,'«'.$host.'»: trying to fetch instance activity info from API...'.N);
|
||||
$buf=@getfc('https://'.$host.'/api/v1/instance/activity',$opts['timeout']);
|
||||
|
@ -720,32 +721,40 @@ while ($i<$cinsts) {
|
|||
if (is_array($buf)) {
|
||||
//print_r($buf);
|
||||
if (count($buf)<$limit) $end=true;
|
||||
if (count($buf)>0 && !array_key_exists('noindex',$buf[0])) {
|
||||
eecho(2,'«'.$host.'»: account entities reported by directory api endpoint don’t have a “noindex” attribute; skipping directory fetching.'.N);
|
||||
break;
|
||||
} else {
|
||||
eecho(0,'«'.$host.'»: account entities reported by directory api endpoint do have a “noindex” attribute; continuing with directory fetching.'.N);
|
||||
}
|
||||
//foreach ($buf as $user) echo($user['username'].' '); echo(N.N);
|
||||
foreach ($buf as $user) {
|
||||
if (make(['id', 'username', 'display_name', 'locked', 'bot', 'discoverable', 'created_at', 'note', 'url', 'avatar', 'header', 'statuses_count', 'last_status_at', 'fields'], $user)) {
|
||||
eecho(0,'«'.$host.'» ('.$i.'/'.$cinsts.'): working on user «'.$user['username'].'»...'.N);
|
||||
if (!isset($user['noindex'])) {
|
||||
$user['noindex']=true;
|
||||
eecho(0,'«'.$host.'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it fetching user’s profile page...'.N);
|
||||
$page=getfc($user['url'],$opts['timeout']);
|
||||
// here ckratelimit is not needed because it's a normal web page, not json from mastodon api
|
||||
//var_dump($page);
|
||||
if ($page['cont']!==false) {
|
||||
//<meta content='noindex, noarchive' name='robots'>
|
||||
if (preg_match('/<meta\s+content=[\'"]noindex/ui',$page['cont'])!==1) {
|
||||
$user['noindex']=false;
|
||||
eecho(0,'«'.$user['url'].'»: «noindex» is not set.'.N);
|
||||
if (0==1) { // disabled; takes too long on instances with many users; see also the if statement before this foreach
|
||||
eecho(0,'«'.$host.'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it by fetching user’s profile page...'.N);
|
||||
$page=getfc($user['url'],$opts['timeout']);
|
||||
// here ckratelimit is not needed because it's a normal web page, not json from mastodon api
|
||||
if ($page['cont']!==false) {
|
||||
//<meta content='noindex, noarchive' name='robots'>
|
||||
if (preg_match('/<meta\s+content=[\'"](noindex|noarchive)/ui',$page['cont'])!==1) {
|
||||
$user['noindex']=false;
|
||||
eecho(0,'«'.$user['url'].'»: «noindex» is not set.'.N);
|
||||
} else {
|
||||
eecho(0,'«'.$user['url'].'»: «noindex» is set.'.N);
|
||||
}
|
||||
} else {
|
||||
eecho(0,'«'.$user['url'].'»: «noindex» is set.'.N);
|
||||
eecho(2,'«'.$host.'»: could not fetch «'.$user['url'].'»: '.$page['emsg'].N);
|
||||
}
|
||||
} else {
|
||||
eecho(2,'«'.$host.'»: could not fetch «'.$user['url'].'»: '.$page['emsg'].N);
|
||||
}
|
||||
}
|
||||
$snote=strip_tags($user['note']);
|
||||
if (preg_match('/(^|\W)#nobots?($|\W)/iu',$snote)===1) $user['noindex']=true;
|
||||
if (preg_match('/(^|\W)#globdir($|\W)/iu',$snote)===1) $user['noindex']=false;
|
||||
if (preg_match('/(?<!\w)#(nobots?|noindex)(?!\w)/iu',$snote)===1) $user['noindex']=true;
|
||||
if (preg_match('/(?<!\w)#(okindex|yesindex|doindex)(?!\w)/iu',$snote)===1) $user['noindex']=false;
|
||||
$user['tags']=[];
|
||||
if (!$user['noindex'] && $info['version']>='3.3.0') {
|
||||
if (0==1 && !$user['noindex'] && $info['version']>='3.3.0') {// disabled; takes too long on instances with many users
|
||||
eecho(0,'«'.$host.'»: trying to fetch tags for user «'.$user['username'].'»...'.N);
|
||||
$tags=@getfc('https://'.$host.'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['timeout']);
|
||||
if ($tags['cont']!==false) {
|
||||
|
@ -780,7 +789,7 @@ while ($i<$cinsts) {
|
|||
}
|
||||
}
|
||||
foreach ($users as $locid=>$user) {
|
||||
$query='SET host='.myv($link,$host).', locid='.myv($link,$user['id']).', username='.myv($link,truncs($user['username'], 'Users', 'username', '«'.$host.'»')).', display_name='.myv($link,truncs($user['display_name'], 'Users', 'display_name', '«'.$host.'»')).', locked='.myv($link,$user['locked']).', bot='.myv($link,$user['bot']).', discoverable='.myv($link,$user['discoverable']).', created_at='.myv($link,$user['created_at']).', note='.myv($link,truncs($user['note'], 'Users', 'note', '«'.$host.'»')).', url='.myv($link,truncs($user['url'], 'Users', 'url', '«'.$host.'»')).', avatar='.myv($link,truncs($user['avatar'], 'Users', 'avatar', '«'.$host.'»')).', header='.myv($link,truncs($user['header'], 'Users', 'header', '«'.$host.'»')).', statuses_count='.myv($link,$user['statuses_count']).', last_status_at='.myv($link,$user['last_status_at']).', tags='.myv($link,truncs($user['tags'], 'Users', 'tags', '«'.$host.'»'));
|
||||
$query='SET host='.myv($link,$host).', locid='.myv($link,$user['id']).', username='.myv($link,truncs($user['username'], 'Users', 'username', '«'.$host.'»: «'.$user['username'].'»')).', display_name='.myv($link,truncs($user['display_name'], 'Users', 'display_name', '«'.$host.'»: «'.$user['username'].'»')).', locked='.myv($link,$user['locked']).', bot='.myv($link,$user['bot']).', discoverable='.myv($link,$user['discoverable']).', created_at='.myv($link,$user['created_at']).', note='.myv($link,truncs($user['note'], 'Users', 'note', '«'.$host.'»: «'.$user['username'].'»')).', url='.myv($link,truncs($user['url'], 'Users', 'url', '«'.$host.'»: «'.$user['username'].'»')).', avatar='.myv($link,truncs($user['avatar'], 'Users', 'avatar', '«'.$host.'»: «'.$user['username'].'»')).', header='.myv($link,truncs($user['header'], 'Users', 'header', '«'.$host.'»: «'.$user['username'].'»')).', statuses_count='.myv($link,$user['statuses_count']).', last_status_at='.myv($link,$user['last_status_at']).', tags='.myv($link,truncs($user['tags'], 'Users', 'tags', '«'.$host.'»: «'.$user['username'].'»'));
|
||||
$uid=0;
|
||||
if (!array_key_exists($user['id'],$exusers)) {
|
||||
if (!$user['noindex']) {
|
||||
|
@ -811,8 +820,8 @@ while ($i<$cinsts) {
|
|||
eecho(0,'«'.$host.'»: saving user fields for user «'.$user['username'].'» ('.$uid.')...'.N);
|
||||
foreach ($user['fields'] as $field) {
|
||||
(is_null($field['verified_at'])) ? $field['verified_at']=0 : $field['verified_at']=1;
|
||||
$field['name']=truncs($field['name'],'UsersFields','name','«'.$host.'»');
|
||||
$field['value']=truncs($field['value'],'UsersFields','value','«'.$host.'»');
|
||||
$field['name']=truncs($field['name'],'UsersFields','name','«'.$host.'»: «'.$user['username'].'»');
|
||||
$field['value']=truncs($field['value'],'UsersFields','value','«'.$host.'»: «'.$user['username'].'»');
|
||||
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO UsersFields SET UserID='.$uid.', name='.myv($link,$field['name']).', value='.myv($link,$field['value']).', verified='.$field['verified_at']) or mexit(__LINE__.': '.mysqli_error($link).N,3);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue