Disabled fetching profile’s page when “noindex” is not set in account because it takes too long; disabled featured tags fetching fro the same reason; other minor changes

This commit is contained in:
pezcurrel 2022-12-10 23:32:58 +01:00
parent f343cb702e
commit 61ad655a62

View file

@ -680,6 +680,7 @@ while ($i<$cinsts) {
eecho(2,'«'.$host.'»: could not fetch instance info from nodeinfo: '.$buf['emsg'].N);
}
if (array_key_exists('version',$info)) {
eecho(1,'«'.$host.'» software version is «'.$info['version'].'».');
if ($info['version']>='2.1.2') {
eecho(0,'«'.$host.'»: trying to fetch instance activity info from API...'.N);
$buf=@getfc('https://'.$host.'/api/v1/instance/activity',$opts['timeout']);
@ -720,32 +721,40 @@ while ($i<$cinsts) {
if (is_array($buf)) {
//print_r($buf);
if (count($buf)<$limit) $end=true;
if (count($buf)>0 && !array_key_exists('noindex',$buf[0])) {
eecho(2,'«'.$host.'»: account entities reported by directory api endpoint dont have a “noindex” attribute; skipping directory fetching.'.N);
break;
} else {
eecho(0,'«'.$host.'»: account entities reported by directory api endpoint do have a “noindex” attribute; continuing with directory fetching.'.N);
}
//foreach ($buf as $user) echo($user['username'].' '); echo(N.N);
foreach ($buf as $user) {
if (make(['id', 'username', 'display_name', 'locked', 'bot', 'discoverable', 'created_at', 'note', 'url', 'avatar', 'header', 'statuses_count', 'last_status_at', 'fields'], $user)) {
eecho(0,'«'.$host.'» ('.$i.'/'.$cinsts.'): working on user «'.$user['username'].'»...'.N);
if (!isset($user['noindex'])) {
$user['noindex']=true;
eecho(0,'«'.$host.'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it fetching users profile page...'.N);
$page=getfc($user['url'],$opts['timeout']);
// here ckratelimit is not needed because it's a normal web page, not json from mastodon api
//var_dump($page);
if ($page['cont']!==false) {
//<meta content='noindex, noarchive' name='robots'>
if (preg_match('/<meta\s+content=[\'"]noindex/ui',$page['cont'])!==1) {
$user['noindex']=false;
eecho(0,'«'.$user['url'].'»: «noindex» is not set.'.N);
if (0==1) { // disabled; takes too long on instances with many users; see also the if statement before this foreach
eecho(0,'«'.$host.'»: «'.$user['username'].'»: «noindex» is undefined, trying to define it by fetching users profile page...'.N);
$page=getfc($user['url'],$opts['timeout']);
// here ckratelimit is not needed because it's a normal web page, not json from mastodon api
if ($page['cont']!==false) {
//<meta content='noindex, noarchive' name='robots'>
if (preg_match('/<meta\s+content=[\'"](noindex|noarchive)/ui',$page['cont'])!==1) {
$user['noindex']=false;
eecho(0,'«'.$user['url'].'»: «noindex» is not set.'.N);
} else {
eecho(0,'«'.$user['url'].'»: «noindex» is set.'.N);
}
} else {
eecho(0,'«'.$user['url'].'»: «noindex» is set.'.N);
eecho(2,'«'.$host.'»: could not fetch «'.$user['url'].'»: '.$page['emsg'].N);
}
} else {
eecho(2,'«'.$host.'»: could not fetch «'.$user['url'].'»: '.$page['emsg'].N);
}
}
$snote=strip_tags($user['note']);
if (preg_match('/(^|\W)#nobots?($|\W)/iu',$snote)===1) $user['noindex']=true;
if (preg_match('/(^|\W)#globdir($|\W)/iu',$snote)===1) $user['noindex']=false;
if (preg_match('/(?<!\w)#(nobots?|noindex)(?!\w)/iu',$snote)===1) $user['noindex']=true;
if (preg_match('/(?<!\w)#(okindex|yesindex|doindex)(?!\w)/iu',$snote)===1) $user['noindex']=false;
$user['tags']=[];
if (!$user['noindex'] && $info['version']>='3.3.0') {
if (0==1 && !$user['noindex'] && $info['version']>='3.3.0') {// disabled; takes too long on instances with many users
eecho(0,'«'.$host.'»: trying to fetch tags for user «'.$user['username'].'»...'.N);
$tags=@getfc('https://'.$host.'/api/v1/accounts/'.$user['id'].'/featured_tags',$opts['timeout']);
if ($tags['cont']!==false) {
@ -780,7 +789,7 @@ while ($i<$cinsts) {
}
}
foreach ($users as $locid=>$user) {
$query='SET host='.myv($link,$host).', locid='.myv($link,$user['id']).', username='.myv($link,truncs($user['username'], 'Users', 'username', '«'.$host.')).', display_name='.myv($link,truncs($user['display_name'], 'Users', 'display_name', '«'.$host.')).', locked='.myv($link,$user['locked']).', bot='.myv($link,$user['bot']).', discoverable='.myv($link,$user['discoverable']).', created_at='.myv($link,$user['created_at']).', note='.myv($link,truncs($user['note'], 'Users', 'note', '«'.$host.')).', url='.myv($link,truncs($user['url'], 'Users', 'url', '«'.$host.')).', avatar='.myv($link,truncs($user['avatar'], 'Users', 'avatar', '«'.$host.')).', header='.myv($link,truncs($user['header'], 'Users', 'header', '«'.$host.')).', statuses_count='.myv($link,$user['statuses_count']).', last_status_at='.myv($link,$user['last_status_at']).', tags='.myv($link,truncs($user['tags'], 'Users', 'tags', '«'.$host.'));
$query='SET host='.myv($link,$host).', locid='.myv($link,$user['id']).', username='.myv($link,truncs($user['username'], 'Users', 'username', '«'.$host.: «'.$user['username'].')).', display_name='.myv($link,truncs($user['display_name'], 'Users', 'display_name', '«'.$host.: «'.$user['username'].')).', locked='.myv($link,$user['locked']).', bot='.myv($link,$user['bot']).', discoverable='.myv($link,$user['discoverable']).', created_at='.myv($link,$user['created_at']).', note='.myv($link,truncs($user['note'], 'Users', 'note', '«'.$host.: «'.$user['username'].')).', url='.myv($link,truncs($user['url'], 'Users', 'url', '«'.$host.: «'.$user['username'].')).', avatar='.myv($link,truncs($user['avatar'], 'Users', 'avatar', '«'.$host.: «'.$user['username'].')).', header='.myv($link,truncs($user['header'], 'Users', 'header', '«'.$host.: «'.$user['username'].')).', statuses_count='.myv($link,$user['statuses_count']).', last_status_at='.myv($link,$user['last_status_at']).', tags='.myv($link,truncs($user['tags'], 'Users', 'tags', '«'.$host.: «'.$user['username'].'));
$uid=0;
if (!array_key_exists($user['id'],$exusers)) {
if (!$user['noindex']) {
@ -811,8 +820,8 @@ while ($i<$cinsts) {
eecho(0,'«'.$host.'»: saving user fields for user «'.$user['username'].'» ('.$uid.')...'.N);
foreach ($user['fields'] as $field) {
(is_null($field['verified_at'])) ? $field['verified_at']=0 : $field['verified_at']=1;
$field['name']=truncs($field['name'],'UsersFields','name','«'.$host.');
$field['value']=truncs($field['value'],'UsersFields','value','«'.$host.');
$field['name']=truncs($field['name'],'UsersFields','name','«'.$host.: «'.$user['username'].');
$field['value']=truncs($field['value'],'UsersFields','value','«'.$host.: «'.$user['username'].');
if (!$opts['dryrun']) mysqli_query($link,'INSERT INTO UsersFields SET UserID='.$uid.', name='.myv($link,$field['name']).', value='.myv($link,$field['value']).', verified='.$field['verified_at']) or mexit(__LINE__.': '.mysqli_error($link).N,3);
}
}