Now, by default, the script adds a link to the original post only if its text doesn’t fit into the post to be posted; added a “always_link_gancio_post” to make it do it always; added a “test mode”; fixed some flaws; updated help text

This commit is contained in:
pezcurrel 2024-10-25 17:58:30 +02:00
parent 8929654cdf
commit 70c09b4e7d

123
gancioff
View file

@ -105,22 +105,48 @@ post_visibility = unlisted
# «fedi_hostname»; it can be used for testing purposes or just to keep the
# posts shorter than they would be otherwise; for example:
max_post_length = 840
# «always_link_gancio_post» is optional and if unspecified it defaults to
# «false», which means that {$SNAME} adds to the Mastodon post a link to the
# original Gancio post only if the latter is too long to fit into the first
# (i.e. into the Mastodon instance “max post length”, or into the
# «max_post_length» specified in this configuration file - see above); this
# way, {$SNAME} reduces the burden on the Gancio instance that is due to the
# requests that it gets from every Mastodon instance where a Mastodon post
# with a link to the original Gancio post will end up, in order for each of
# them to generate a “link preview”; such burden gets reduced in different
# measures depending on the average length of a post on the Gancio instance
# and on the “max post length” on the Mastodon instance that {$SNAME} is using
# to post (or on the «max_post_length» explicitly specified in this file).
# If set to «true», {$SNAME} will instead always add a link to the original
# Gancio post.
always_link_gancio_post = true
--- End of example configuration file ---
[[[ OPTIONS ]]]
-h / --help
Show this help text and exit.
-t / --test
Do a test: {$SNAME} will try as always to read the configuration file, fetch
the defined Mastodon instances info, load the state file and fetch the feed,
but it will post only the first event it may find there, with a visibility of
«direct», even if according to the state file it has already been posted, and
wont update the state file.
-p / --do-post <y|n>
Setting this option to «n» («no») will make {$SNAME} skip posting. Note that
even in this case it will save into the state file the GUIDs of new events
it may find in the feed, so it wont post them even on subsequent runs.
Also note that, in order to prevent you from flooding your instances local
timeline on {$SNAME} first run on a given feed (i.e. when the state file
specified in the configuration file doesnt exist yet), if events in the feed
are more than 5 and «posts_visibility» is set to «public» {$SNAME} refuses to
post them unless you explicitly set this option to «n» («no») or «y» («yes»).
When the state file exists, this option defaults to «y» («yes»).
When a state file already exists, this option defaults to «y» («yes»), which
means that {$SNAME} will try to post all the new events it may find in the
feed; if set to «n» («no»), {$SNAME} will not try to post them, but it will
save their GUIDs into the state file nonetheless, so they wont be posted
again on subsequent runs.
This is mainly useful on {$SNAME}s first run on a given feed, i.e. when
the state file specified in the configuration file doesnt exist yet and thus
all the events in the feed will be considered “new”: in this case, {$SNAME}
refuses to run unless you explicitly set this option to «y» or «n»: this is a
way to prevent you from unintentionally flooding your Mastodon instance with
all the events in the feed.
When “test mode” is active (see the previous option description), setting
this option has no effect.
-v / --verbose
Show some more messages about what the script is doing.
--
@ -148,10 +174,12 @@ $conf=[
'state_file_absolute_path'=>['required'=>true, 'default'=>null],
'posts_language'=>['required'=>true, 'default'=>null],
'posts_visibility'=>['required'=>false, 'default'=>'public'],
'max_post_length'=>['required'=>false, 'default'=>null]
'max_post_length'=>['required'=>false, 'default'=>null],
'always_link_gancio_post'=>['required'=>false, 'default'=>false]
];
$opts=[
'test'=>false,
'do-post'=>null,
'verbose'=>false,
'update-language-codes'=>false
@ -165,6 +193,8 @@ for ($i=1; $i<$argc; $i++) {
} elseif ($argv[$i]=='-h' || $argv[$i]=='--help') {
echo $help;
exit(0);
} elseif ($argv[$i]=='-t' || $argv[$i]=='--test') {
$opts['test']=true;
} elseif ($argv[$i]=='-p' || $argv[$i]=='--do-post') {
if ($i+1>=$argc) dieYoung("Error: option «{$argv[$i]}» requires an argument; use «-h» or «--help» to display help.\n",1);
if ($argv[$i+1]=='y')
@ -231,6 +261,12 @@ if (!is_null($conf['max_post_length'])) {
$conf['max_post_length']+=0;
echo "Info: got «{$conf['max_post_length']}» as «max_post_length» from configuration file.\n";
}
if (!is_null($conf['always_link_gancio_post']) && preg_match('#^(true|false)$#',$conf['always_link_gancio_post'])!==1) {
dieYoung("Error: configuration file: «always_link_gancio_post» must be «true» or «false».\n",1);
} else {
($conf['always_link_gancio_post']=='true') ? $conf['always_link_gancio_post']=true : $conf['always_link_gancio_post']=false;
}
if ($opts['test']) $conf['posts_visibility']='direct';
echo "Info: got good configuration from configuration file.\n";
$tldsregex=gettlds(__DIR__.'/storage/tlds.txt',true);
@ -255,22 +291,8 @@ if (is_null($conf['max_post_length'])) {
}
//print_r($conf);
echo "Info: trying to fetch feed from «{$conf['feed_url']}».\n";
$feed=curl($conf['feed_url'],null,['Accept: application/xml']);
if ($feed['content']===false) dieYoung("Error: could not connect to «{$conf['feed_url']}» (error: «{$feed['error']}»).\n",1);
if ($feed['httpcode']!='200') dieYoung("Error: «{$conf['feed_url']} returned http code «{$res['httpcode']}».\n",1);
$feed=@simplexml_load_string($feed['content'],null,LIBXML_NOCDATA);
if ($feed===false) dieYoung("Error: got no valid XML from «{$conf['feed_url']}».\n",1);
//print_r($feed);
if (!isset($feed->channel->item) || !is_iterable($feed->channel->item) || !is_countable($feed->channel->item)) dieYoung("Error: feed from «{$conf['feed_url']}» had unexpected format.\n",1);
$itemsCount=$feed->channel->item->count();
if ($itemsCount==0) exitYoung("Info: feed from «{$conf['feed_url']}» was empty, bye.\n");
echo "Info: got good feed from «{$conf['feed_url']}».\n";
$guids=[];
echo "Info: trying to load GUIDs of already posted events from state file «{$conf['state_file_absolute_path']}».\n";
if (!file_exists($conf['state_file_absolute_path']) && is_null($opts['do-post']) && $conf['posts_visibility']=='public' && $itemsCount>5) dieYoung("Error: state file «{$conf['state_file_absolute_path']}» doesnt exist yet, so this is probably a first run on feed «{$conf['feed_url']}», and since statuses visibility is set to «public» and the feed contains more than 5 events you have to decide whether it would be ok to post {$itemsCount} statuses that would show up, probably all in a row, on «{$conf['fedi_hostname']}»s local timeline, and explicitly declare whether you want {$SNAME} to post them or not by setting option «-p» or «--do-post» to «y» («yes») or «n» («no»); use «-h» or «--help» to display help.\n",1);
if (is_null($opts['do-post'])) $opts['do-post']=true;
if (file_exists($conf['state_file_absolute_path'])) {
if (!is_file($conf['state_file_absolute_path'])) dieYoung("Error: «{$conf['state_file_absolute_path']}» exists but its not a file.\n",1);
if (!is_readable($conf['state_file_absolute_path'])) dieYoung("Error: «{$conf['state_file_absolute_path']}» exists but its not readable.\n",1);
@ -295,12 +317,31 @@ if (file_exists($conf['state_file_absolute_path'])) {
}
fclose($fh);
echo 'Info: got '.count($guids)." GUID(s) for already posted event(s) from state file «{$conf['state_file_absolute_path']}»; removed {$i} line(s) older than one year.\n";
} elseif (is_null($opts['do-post']) && !$opts['test']) {
dieYoung("Error: state file «{$conf['state_file_absolute_path']}» doesnt exist yet, so this is probably a first run on feed «{$conf['feed_url']}»; thus, all the events {$SNAME} may find in the feed will be considered new and, as a precaution against flooding your local timeline, you have to explicitly declare whether you want it to post them all, or not, by explicitly setting option «-p» or «--do-post» to «y» («yes») or «n» («no»); mind that in both cases they will be recorded as posted in the state file, and wont be posted again on subsequent runs (you can use «-h» or «--help» to display help).\n",1);
} else {
echo "Info: state file «{$conf['state_file_absolute_path']}» was not found.\n";
}
if (is_null($opts['do-post']) || $opts['test']) $opts['do-post']=true;
if (($fh=@fopen($conf['state_file_absolute_path'],'a'))===false) dieYoung("Error: could not open «{$conf['state_file_absolute_path']}» in «append» mode.\n",1);
$newItemsCount=0;
echo "Info: trying to fetch feed from «{$conf['feed_url']}».\n";
$feed=curl($conf['feed_url'],null,['Accept: application/xml']);
if ($feed['content']===false) dieYoung("Error: could not connect to «{$conf['feed_url']}» (error: «{$feed['error']}»).\n",1);
if ($feed['httpcode']!='200') dieYoung("Error: «{$conf['feed_url']} returned http code «{$res['httpcode']}».\n",1);
$feed=@simplexml_load_string($feed['content'],null,LIBXML_NOCDATA);
if ($feed===false) dieYoung("Error: got no valid XML from «{$conf['feed_url']}».\n",1);
//print_r($feed);
if (!isset($feed->channel->item) || !is_iterable($feed->channel->item) || !is_countable($feed->channel->item)) dieYoung("Error: feed from «{$conf['feed_url']}» had unexpected format.\n",1);
$itemsCount=$feed->channel->item->count();
if ($itemsCount==0) exitYoung("Info: feed from «{$conf['feed_url']}» was empty, bye.\n");
foreach ($feed->channel->item as $item)
if (isset($item->guid) && $item->guid->__toString()!='' && !array_key_exists($item->guid->__toString(),$guids))
$newItemsCount++;
echo "Info: got good feed with {$itemsCount} events ({$newItemsCount} new) from «{$conf['feed_url']}».\n";
if (!$opts['test'] && $newItemsCount==0) exitYoung("Info: feed from «{$conf['feed_url']}» had no new events, bye.\n");
if (!$opts['test'] && ($fh=@fopen($conf['state_file_absolute_path'],'a'))===false) dieYoung("Error: could not open «{$conf['state_file_absolute_path']}» in «append» mode.\n",1);
$goodPostsCount=0;
$index=0;
foreach ($feed->channel->item as $item) {
@ -308,8 +349,7 @@ foreach ($feed->channel->item as $item) {
// print_r($item);
if (!isset($item->guid) || ($guid=$item->guid->__toString())=='') {
fwrite(STDERR,"Warning: event #{$index} has no GUID, skipping.\n");
} elseif (!array_key_exists($guid,$guids)) {
$newItemsCount++;
} elseif (!array_key_exists($guid,$guids) || $opts['test']) {
$file=null;
if (isset($item->enclosure[0]['url']) && isset($item->enclosure[0]['type']) && isset($item->enclosure[0]['length']))
$file=['url'=>$item->enclosure[0]['url']->__toString(), 'type'=>$item->enclosure[0]['type']->__toString(), 'length'=>$item->enclosure[0]['length']->__toString()];
@ -339,12 +379,17 @@ foreach ($feed->channel->item as $item) {
$pcats='';
}
$post="{$ptext}{$plink}{$pcats}";
if (postLength($post,$tldsregex['tlds'])<=$conf['max_post_length'] && !$conf['always_link_gancio_post']) {
$plink='';
} else {
$pcats='';
}
$post="{$ptext}{$plink}{$pcats}";
while (postLength($post,$tldsregex['tlds'])>$conf['max_post_length'] && $ptext!='') {
$ptext=preg_replace('#\S+\W*$#','',$ptext);
// echo "[[[{$ptext}]]]\n";
$post="{$ptext}[…]{$plink}{$pcats}";
}
if (postLength($post,$tldsregex['tlds'])>$conf['max_post_length']) $post=$plink;
if (postLength($post,$tldsregex['tlds'])>$conf['max_post_length']) {
fwrite(STDERR,"Warning: could not shorten post for event «{$guid}» to make it fit into {$conf['max_post_length']} characters; wont post.\n");
} else {
@ -428,7 +473,7 @@ foreach ($feed->channel->item as $item) {
// print_r($res['content']);
$now=time();
$guids[$guid]=$now;
fwrite($fh,"{$now}\t{$guid}\n");
if (!$opts['test']) fwrite($fh,"{$now}\t{$guid}\n");
$goodPostsCount++;
}
}
@ -436,20 +481,26 @@ foreach ($feed->channel->item as $item) {
echo "Info: would have posted status for event «{$guid}».\n";
$now=time();
$guids[$guid]=$now;
fwrite($fh,"{$now}\t{$guid}\n");
if (!$opts['test']) fwrite($fh,"{$now}\t{$guid}\n");
$goodPostsCount++;
}
}
} else {
if ($opts['verbose']) echo "Info: event «{$guid}» has already been posted on ".date('c',$guids[$guid]).", skipping.\n";
}
// fclose($fh); exit(0);// to test a single post
if ($opts['test']) break;// to test a single post
}
if (!$opts['test']) fclose($fh);
if (!$opts['test']) {
if ($opts['do-post'])
echo "Info: succesfully posted {$goodPostsCount} of {$newItemsCount} new event(s) (of {$itemsCount} total events in the feed).\n";
else
echo "Info: would have tried to post {$newItemsCount} new event(s) of {$itemsCount} total events in the feed.\n";
} elseif ($goodPostsCount==1) {
echo "Info: successfully posted the first of {$itemsCount} total events in the feed ({$newItemsCount} are new).\n";
} else {
echo "Info: failed to post the first of {$itemsCount} total events in the feed ({$newItemsCount} are new).\n";
}
fclose($fh);
if ($opts['do-post'])
echo "Info: feed got {$itemsCount} events; succesfully posted {$goodPostsCount} of {$newItemsCount} new event(s).\n";
else
echo "Info: feed got {$itemsCount} event(s), {$newItemsCount} new.\n";
exit(0);