verbose/lib/mastodon.php
2023-11-20 12:42:57 +01:00

282 lines
10 KiB
PHP

<?php
// Warning: postlength function requires $retlds global variable to be defined,
// it has to be a reverse ordered list of "|" separated valid tlds, you can
// require gettlds.php in the calling script and use it to set it, like this:
// $retlds=gettlds(); $retlds=implode('|',$retlds);
function validtoken($token) {
if (preg_match('#^[A-Za-z0-9_-]{43}$#',$token)===1)
return true;
else
return false;
}
function mastreq($context,$host,$endpoint) {
$context=stream_context_create($context);
$endpoint="https://{$host}{$endpoint}";
$res=@file_get_contents($endpoint,false,$context);
if ($res===false)
return ['ok'=>false,'error'=>"could not connect to «{$host}»",'headers'=>null];
$res=@json_decode($res,true);
if (is_null($res))
return ['ok'=>false,'error'=>"could not decode JSON data from «{$endpoint}» (".json_last_error().': '.json_last_error_msg().")",'headers'=>$http_response_header];
if (isset($res['error']))
return ['ok'=>false,'error'=>lcfirst($res['error']),'headers'=>$http_response_header];
/*print_r($http_response_header);
preg_match('#^\S+\s+(\S+)\s+(\S+)#',$http_response_header[0],$matches);
print_r($matches);
$httpcode=$matches[1]+0;
$httpcodetext=$matches[2];
if (($httpcode>=400 && $httpcode<=499) || ($httpcode>=500 && $httpcode<=599))
return ['ok'=>false,'error'=>"HTTP error: {$httpcodetext}"];*/
return ['ok'=>true,'data'=>$res,'headers'=>$http_response_header];
}
function mastget($host,$token,$endpoint,$timeout) {
$context=[
'http'=>[
'header'=>"Content-type: application/x-www-form-urlencoded\r\nAccept: application/json\r\n",
'method'=>'GET',
'ignore_errors'=>true,
'timeout'=>$timeout
]
];
if (!is_null($token))
$context['http']['header'].="Authorization: Bearer {$token}\r\n";
$res=mastreq($context,$host,$endpoint);
return $res;
}
function mastpost($host,$token,$endpoint,$content,$timeout) {
$content=http_build_query($content);
$context=[
'http'=>[
'header'=>"Content-type: application/x-www-form-urlencoded\r\nAccept: application/json\r\n",
'method'=>'POST',
'ignore_errors'=>true,
'content'=>$content,
'timeout'=>$timeout
]
];
if (!is_null($token))
$context['http']['header'].="Authorization: Bearer {$token}\r\n";
$res=mastreq($context,$host,$endpoint);
return $res;
}
function mastpostfile($host,$token,$endpoint,$content,$timeout) {
$content=http_build_query($content);
$context=[
'http'=>[
'header'=>"Content-type: multipart/form-data;boundary=\"boundary\"\r\nAccept: application/json\r\n",
'method'=>'POST',
'ignore_errors'=>true,
'content'=>$content,
'timeout'=>$timeout
]
];
if (!is_null($token))
$context['http']['header'].="Authorization: Bearer {$token}\r\n";
$res=mastreq($context,$host,$endpoint);
return $res;
}
function mastdel($host,$token,$endpoint,$timeout) {
$context=[
'http'=>[
'header'=>"Content-type: application/x-www-form-urlencoded\r\nAccept: application/json\r\n",
'method'=>'DELETE',
'ignore_errors'=>true,
'timeout'=>$timeout
]
];
if (!is_null($token))
$context['http']['header'].="Authorization: Bearer {$token}\r\n";
$res=mastreq($context,$host,$endpoint);
return $res;
}
/*
some endpoints
get
auth required
verify app creds and get app info: /api/v1/apps/verify_credentials
verify user creds and get account info: /api/v1/accounts/verify_credentials
get a post: /api/v1/statuses/[id]
post
auth required
post a status: /api/v1/statuses
send follow request to an account: /api/v1/accounts/[id]/follow
unfollow an account: /api/v1/accounts/[id]/unfollow
*/
function splitpost($post,$avchars,$cw,$pre,$cntup) {
// decided use $matches[1] instead of $matches[0]
// to stay safe, $avchars should be at least 30 (didn't test with less);
// $pre can be used to list recipients (in this case it has to end with
// a "\n" or " "), or for anything else
$post=preg_replace('#[ \t\f\r]+\n#',"\n",$post);
$post=rtrim($post);
$postrlen=strlen($post);
$postlen=postlength($post);
$cwlen=mb_strlen($cw,'UTF-8');
$prelen=postlength($pre);
if ($postlen+$prelen+$cwlen<=$avchars)
return [['cw'=>$cw,'post'=>$pre.$post,'mastlen'=>$postlen+$prelen+$cwlen]];
// there is no way to know the total of posts before splitting, and its
// string length modifies the total, so we roughly estimate it very
// cautiosly to the decrease, just to spare cycles
$tot='';
$gtot=ceil($postlen/($avchars-7-2-$prelen-$cwlen));// "7" is the min length of the counter ("\n\n[x/x]"); 2 counts for start and end "…"
for ($i=0; $i<strlen($gtot); $i++)
$tot.='x';
$c=0;
while (true) {
$c++;
$totlen=strlen($tot);
$spost=[];
$buf='';
$off=0;
$i=1;
while (true) {
//echo "========================\n";
if (strlen($i)>$totlen) break;// do another cycle
$cnt="__[{$i}/{$tot}]";
//$lastcons=substr($post,$off,40);
preg_match('#(\S+)(\s+|$)#',$post,$matches,0,$off);
//var_dump($matches);
if (count($matches)==0) {// done, last post
$spost[]=['cw'=>$cw,'post'=>rtrim($buf)];
break 2;
}
$offadd=strlen($matches[0]);
($off+$offadd>=$postrlen) ? $dotsaddlen=0 : $dotsaddlen=2;// if we are on the last word, we don't add "…"
if ($prelen+$cwlen+postlength($buf.$matches[1].$cnt)+$dotsaddlen>$avchars) {// if current match would make buf+overhead overcome avchars
//echo "LONGMATCH: «$matches[0]»\n";
$nxcntlen=$totlen+strlen($i+1)+5;// next cnt may be different, so we precalc its length
($i==1 || $dotsaddlen==0) ? $nxdotsaddlen=2 : $nxdotsaddlen=4;// if we are on first or last post, we add 1 "…"; otherwise we add 2
if ($prelen+$cwlen+postlength($matches[1])+$nxcntlen+$nxdotsaddlen>$avchars) {// if next match+overhead is by itself longer than avchars
//echo "BLOCKMATCH: «$matches[0]»\n";
//$len=$avchars-$nxcntlen-$prelen-$nxdotsaddlen;
$len=$avchars-postlength($buf.$cnt)-$prelen-$cwlen-$dotsaddlen;
if ($len>0) {
// deactivate possible links because they will be broken
$matches[0]=preg_replace('#^http(s)?://#','zttp$1://',$matches[0]);
$matches[0]=preg_replace('#^@([a-zA-Z0-9_]+@[a-z0-9-]+)#','+$1',$matches[0]);
$matches[0]=mb_substr($matches[0],0,$len,'UTF-8');
//echo "SUBSTRING: «$matches[0]»\n";
$offadd=strlen($matches[0]);
//echo "{$matches[0]}: OFF: {$off}; OFFADD: {$offadd}\n";
$buf.=$matches[0];
$matches[0]='';
}
}
$spost[]=['cw'=>$cw,'post'=>rtrim($buf).' …'];
$buf='… ';
$i++;
}/* else {
echo "NORMATCH: «$matches[0]»\n";
}*/
$buf.=$matches[0];
$off+=$offadd;
}
$tot.='x';
}
//echo '<pre>'.print_r($spost,true).'</pre>';
if ($cntup)
foreach ($spost as $key=>$post) {
$spost[$key]['post']="{$pre}[".($key+1)."/{$i}]\n\n{$post['post']}";
$spost[$key]['mastlen']=postlength($spost[$key]['post'])+$cwlen;
}
else
foreach ($spost as $key=>$post) {
$spost[$key]['post']="{$pre}{$post['post']}\n\n[".($key+1)."/{$i}]";
$spost[$key]['mastlen']=postlength($spost[$key]['post'])+$cwlen;
}
//echo "CYCLES: {$c}\n";
//echo "LASTCONS: {$lastcons}\n";
return $spost;
}
function postlength($post) {
global $retlds;
// echo "-A-> |{$post}|\n";
// for some reason, mastodon seems to check tld existence only on http(s) links - see next regexp
$res=preg_replace('#(^|\W)(@[a-zA-Z0-9_]+)@(([a-z0-9]([a-z0-9-]+[a-z0-9])?){1,63}\.)+([a-z0-9]([a-z0-9-]+[a-z0-9])?){1,63}\b#u', '$1$2', $post);
if (!is_null($res)) $post=$res;
// $res=preg_replace('#(^|\W)https?://(([a-z0-9]([a-z0-9-]+[a-z0-9])?){1,63}\.)+([a-z0-9]([a-z0-9-]+[a-z0-9])?){1,63}(/\S*[\w=?_-])?#u', '$1HTTP://UUUUUUUUUUUUUUUU', $post);
// on http(s) links mastodon checks if tld exists...
$res=preg_replace('#(^|\W)https?://(([a-z0-9]([a-z0-9-]+[a-z0-9])?){1,63}\.)+('.$retlds.')(/\S*[\w/=_\-])?#u', '$1UUUUUUUUUUUUUUUUUUUUUUU', $post);
if (!is_null($res)) $post=$res;
// echo "-B-> |{$post}|\n";
return mb_strlen($post,'UTF-8');
}
// this function requires these to be defined:
// - an "evhandle" function to handle events
// - an "eecho" function to handle output
// - a "$doshut" global variable and a "shutdown" function that, since it's placed in secure places, can be used eg to safely shut down the program when "$doshut" is set to true by eg a function bound to a signal, like pcntl_signal(SIGTERM,'sighandler')
// see ocrbot for an example
function evlisten($host,$port,$endpoint,$token,$timeout) {
global $doshut;
while (true) {
shutdown($doshut);
$dispurl="tls://{$host}:{$port}";
eecho(1,"trying to connect to «{$dispurl}».");
$sh=@fsockopen("tls://{$host}",$port,$errno,$errstr,$timeout);
if ($sh===false) {
eecho(3,"could not connect to «{$dispurl}»: {$errstr} ({$errno}); will try again in 1 second.");
sleep(1);
} else {
//stream_set_blocking($sh,false);
stream_set_timeout($sh,1,0);
eecho(1,"succesfully connected to «{$dispurl}».");
$req="GET {$endpoint} HTTP/1.1\r\nHost: {$host}\r\nUser-Agent: a_bot\r\nAuthorization: Bearer {$token}\r\n\r\n";
if (fwrite($sh,$req)===false) {
eecho(3,"could not subscribe to user notifications on «{$dispurl}»; will try again in 1 second.");
fclose($sh);
unset($sh);// this is because shutdown can check if $sh is set and if it is, try to fclose it
sleep(1);
} else {
eecho(1,"listening for user notifications on «{$dispurl}».");
//$lc=0;
while (!feof($sh)) {
shutdown($doshut);
//$lc++;
$line=rtrim(fgets($sh),"\r\n");
//echo "{$lc}> {$line}\n";
if (preg_match('#^event: #',$line)===1) {
$event=['type'=>preg_replace('#^event: #','',$line),'data'=>''];
$line=rtrim(fgets($sh),"\r\n");
//echo "{$lc} DATA> {$line}\n";
if (preg_match('#^data: #',$line)===1) {
$event['data'].=preg_replace('#^data: #','',$line);
while ($line!='') {
$line=rtrim(fgets($sh),"\r\n");
if ($line=='') break;
//echo "{$lc} LENGTH> {$line}\n";
$line=rtrim(fgets($sh),"\r\n");
//echo "{$lc} DATA> {$line}\n";
$event['data'].=$line;
}
$event['data']=@json_decode($event['data'],true);
if ($event['data']===false) {
eecho(2,"could not decode data for event of type «{$event['type']}».");
} else {
//print_r($event);
evhandle($event);
}
}
}
}
fclose($sh);
unset($sh);// this is because shutdown can check if $sh is set and if it is, try to fclose it
eecho(3,"lost connection to «{$dispurl}»; will try reconnecting in 1 second.");
sleep(1);
}
}
}
}
?>