From 70c09b4e7dd7018dc833eff1ff342d5d96fd2eab Mon Sep 17 00:00:00 2001 From: pezcurrel Date: Fri, 25 Oct 2024 17:58:30 +0200 Subject: [PATCH] =?UTF-8?q?Now,=20by=20default,=20the=20script=20adds=20a?= =?UTF-8?q?=20link=20to=20the=20original=20post=20only=20if=20its=20text?= =?UTF-8?q?=20doesn=E2=80=99t=20fit=20into=20the=20post=20to=20be=20posted?= =?UTF-8?q?;=20added=20a=20=E2=80=9Calways=5Flink=5Fgancio=5Fpost=E2=80=9D?= =?UTF-8?q?=20to=20make=20it=20do=20it=20always;=20added=20a=20=E2=80=9Cte?= =?UTF-8?q?st=20mode=E2=80=9D;=20fixed=20some=20flaws;=20updated=20help=20?= =?UTF-8?q?text?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- gancioff | 123 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 87 insertions(+), 36 deletions(-) diff --git a/gancioff b/gancioff index c3f8618..99c4eec 100755 --- a/gancioff +++ b/gancioff @@ -105,22 +105,48 @@ post_visibility = unlisted # «fedi_hostname»; it can be used for testing purposes or just to keep the # posts shorter than they would be otherwise; for example: max_post_length = 840 + +# «always_link_gancio_post» is optional and if unspecified it defaults to +# «false», which means that {$SNAME} adds to the Mastodon post a link to the +# original Gancio post only if the latter is too long to fit into the first +# (i.e. into the Mastodon instance “max post length”, or into the +# «max_post_length» specified in this configuration file - see above); this +# way, {$SNAME} reduces the burden on the Gancio instance that is due to the +# requests that it gets from every Mastodon instance where a Mastodon post +# with a link to the original Gancio post will end up, in order for each of +# them to generate a “link preview”; such burden gets reduced in different +# measures depending on the average length of a post on the Gancio instance +# and on the “max post length” on the Mastodon instance that {$SNAME} is using +# to post (or on the «max_post_length» explicitly specified in this file). +# If set to «true», {$SNAME} will instead always add a link to the original +# Gancio post. +always_link_gancio_post = true --- End of example configuration file --- [[[ OPTIONS ]]] -h / --help Show this help text and exit. +-t / --test + Do a test: {$SNAME} will try as always to read the configuration file, fetch + the defined Mastodon instance’s info, load the state file and fetch the feed, + but it will post only the first event it may find there, with a visibility of + «direct», even if according to the state file it has already been posted, and + won’t update the state file. -p / --do-post - Setting this option to «n» («no») will make {$SNAME} skip posting. Note that - even in this case it will save into the state file the GUIDs of new events - it may find in the feed, so it won’t post them even on subsequent runs. - Also note that, in order to prevent you from flooding your instance’s local - timeline on {$SNAME} first run on a given feed (i.e. when the state file - specified in the configuration file doesn’t exist yet), if events in the feed - are more than 5 and «posts_visibility» is set to «public» {$SNAME} refuses to - post them unless you explicitly set this option to «n» («no») or «y» («yes»). - When the state file exists, this option defaults to «y» («yes»). + When a state file already exists, this option defaults to «y» («yes»), which + means that {$SNAME} will try to post all the new events it may find in the + feed; if set to «n» («no»), {$SNAME} will not try to post them, but it will + save their GUIDs into the state file nonetheless, so they won’t be posted + again on subsequent runs. + This is mainly useful on {$SNAME}’s first run on a given feed, i.e. when + the state file specified in the configuration file doesn’t exist yet and thus + all the events in the feed will be considered “new”: in this case, {$SNAME} + refuses to run unless you explicitly set this option to «y» or «n»: this is a + way to prevent you from unintentionally flooding your Mastodon instance with + all the events in the feed. + When “test mode” is active (see the previous option description), setting + this option has no effect. -v / --verbose Show some more messages about what the script is doing. -- @@ -148,10 +174,12 @@ $conf=[ 'state_file_absolute_path'=>['required'=>true, 'default'=>null], 'posts_language'=>['required'=>true, 'default'=>null], 'posts_visibility'=>['required'=>false, 'default'=>'public'], - 'max_post_length'=>['required'=>false, 'default'=>null] + 'max_post_length'=>['required'=>false, 'default'=>null], + 'always_link_gancio_post'=>['required'=>false, 'default'=>false] ]; $opts=[ + 'test'=>false, 'do-post'=>null, 'verbose'=>false, 'update-language-codes'=>false @@ -165,6 +193,8 @@ for ($i=1; $i<$argc; $i++) { } elseif ($argv[$i]=='-h' || $argv[$i]=='--help') { echo $help; exit(0); + } elseif ($argv[$i]=='-t' || $argv[$i]=='--test') { + $opts['test']=true; } elseif ($argv[$i]=='-p' || $argv[$i]=='--do-post') { if ($i+1>=$argc) dieYoung("Error: option «{$argv[$i]}» requires an argument; use «-h» or «--help» to display help.\n",1); if ($argv[$i+1]=='y') @@ -231,6 +261,12 @@ if (!is_null($conf['max_post_length'])) { $conf['max_post_length']+=0; echo "Info: got «{$conf['max_post_length']}» as «max_post_length» from configuration file.\n"; } +if (!is_null($conf['always_link_gancio_post']) && preg_match('#^(true|false)$#',$conf['always_link_gancio_post'])!==1) { + dieYoung("Error: configuration file: «always_link_gancio_post» must be «true» or «false».\n",1); +} else { + ($conf['always_link_gancio_post']=='true') ? $conf['always_link_gancio_post']=true : $conf['always_link_gancio_post']=false; +} +if ($opts['test']) $conf['posts_visibility']='direct'; echo "Info: got good configuration from configuration file.\n"; $tldsregex=gettlds(__DIR__.'/storage/tlds.txt',true); @@ -255,22 +291,8 @@ if (is_null($conf['max_post_length'])) { } //print_r($conf); -echo "Info: trying to fetch feed from «{$conf['feed_url']}».\n"; -$feed=curl($conf['feed_url'],null,['Accept: application/xml']); -if ($feed['content']===false) dieYoung("Error: could not connect to «{$conf['feed_url']}» (error: «{$feed['error']}»).\n",1); -if ($feed['httpcode']!='200') dieYoung("Error: «{$conf['feed_url']} returned http code «{$res['httpcode']}».\n",1); -$feed=@simplexml_load_string($feed['content'],null,LIBXML_NOCDATA); -if ($feed===false) dieYoung("Error: got no valid XML from «{$conf['feed_url']}».\n",1); -//print_r($feed); -if (!isset($feed->channel->item) || !is_iterable($feed->channel->item) || !is_countable($feed->channel->item)) dieYoung("Error: feed from «{$conf['feed_url']}» had unexpected format.\n",1); -$itemsCount=$feed->channel->item->count(); -if ($itemsCount==0) exitYoung("Info: feed from «{$conf['feed_url']}» was empty, bye.\n"); -echo "Info: got good feed from «{$conf['feed_url']}».\n"; - $guids=[]; echo "Info: trying to load GUIDs of already posted events from state file «{$conf['state_file_absolute_path']}».\n"; -if (!file_exists($conf['state_file_absolute_path']) && is_null($opts['do-post']) && $conf['posts_visibility']=='public' && $itemsCount>5) dieYoung("Error: state file «{$conf['state_file_absolute_path']}» doesn’t exist yet, so this is probably a first run on feed «{$conf['feed_url']}», and since statuses visibility is set to «public» and the feed contains more than 5 events you have to decide whether it would be ok to post {$itemsCount} statuses that would show up, probably all in a row, on «{$conf['fedi_hostname']}»’s local timeline, and explicitly declare whether you want {$SNAME} to post them or not by setting option «-p» or «--do-post» to «y» («yes») or «n» («no»); use «-h» or «--help» to display help.\n",1); -if (is_null($opts['do-post'])) $opts['do-post']=true; if (file_exists($conf['state_file_absolute_path'])) { if (!is_file($conf['state_file_absolute_path'])) dieYoung("Error: «{$conf['state_file_absolute_path']}» exists but it’s not a file.\n",1); if (!is_readable($conf['state_file_absolute_path'])) dieYoung("Error: «{$conf['state_file_absolute_path']}» exists but it’s not readable.\n",1); @@ -295,12 +317,31 @@ if (file_exists($conf['state_file_absolute_path'])) { } fclose($fh); echo 'Info: got '.count($guids)." GUID(s) for already posted event(s) from state file «{$conf['state_file_absolute_path']}»; removed {$i} line(s) older than one year.\n"; +} elseif (is_null($opts['do-post']) && !$opts['test']) { + dieYoung("Error: state file «{$conf['state_file_absolute_path']}» doesn’t exist yet, so this is probably a first run on feed «{$conf['feed_url']}»; thus, all the events {$SNAME} may find in the feed will be considered new and, as a precaution against flooding your local timeline, you have to explicitly declare whether you want it to post them all, or not, by explicitly setting option «-p» or «--do-post» to «y» («yes») or «n» («no»); mind that in both cases they will be recorded as posted in the state file, and won’t be posted again on subsequent runs (you can use «-h» or «--help» to display help).\n",1); } else { echo "Info: state file «{$conf['state_file_absolute_path']}» was not found.\n"; } +if (is_null($opts['do-post']) || $opts['test']) $opts['do-post']=true; -if (($fh=@fopen($conf['state_file_absolute_path'],'a'))===false) dieYoung("Error: could not open «{$conf['state_file_absolute_path']}» in «append» mode.\n",1); $newItemsCount=0; +echo "Info: trying to fetch feed from «{$conf['feed_url']}».\n"; +$feed=curl($conf['feed_url'],null,['Accept: application/xml']); +if ($feed['content']===false) dieYoung("Error: could not connect to «{$conf['feed_url']}» (error: «{$feed['error']}»).\n",1); +if ($feed['httpcode']!='200') dieYoung("Error: «{$conf['feed_url']} returned http code «{$res['httpcode']}».\n",1); +$feed=@simplexml_load_string($feed['content'],null,LIBXML_NOCDATA); +if ($feed===false) dieYoung("Error: got no valid XML from «{$conf['feed_url']}».\n",1); +//print_r($feed); +if (!isset($feed->channel->item) || !is_iterable($feed->channel->item) || !is_countable($feed->channel->item)) dieYoung("Error: feed from «{$conf['feed_url']}» had unexpected format.\n",1); +$itemsCount=$feed->channel->item->count(); +if ($itemsCount==0) exitYoung("Info: feed from «{$conf['feed_url']}» was empty, bye.\n"); +foreach ($feed->channel->item as $item) + if (isset($item->guid) && $item->guid->__toString()!='' && !array_key_exists($item->guid->__toString(),$guids)) + $newItemsCount++; +echo "Info: got good feed with {$itemsCount} events ({$newItemsCount} new) from «{$conf['feed_url']}».\n"; +if (!$opts['test'] && $newItemsCount==0) exitYoung("Info: feed from «{$conf['feed_url']}» had no new events, bye.\n"); + +if (!$opts['test'] && ($fh=@fopen($conf['state_file_absolute_path'],'a'))===false) dieYoung("Error: could not open «{$conf['state_file_absolute_path']}» in «append» mode.\n",1); $goodPostsCount=0; $index=0; foreach ($feed->channel->item as $item) { @@ -308,8 +349,7 @@ foreach ($feed->channel->item as $item) { // print_r($item); if (!isset($item->guid) || ($guid=$item->guid->__toString())=='') { fwrite(STDERR,"Warning: event #{$index} has no GUID, skipping.\n"); - } elseif (!array_key_exists($guid,$guids)) { - $newItemsCount++; + } elseif (!array_key_exists($guid,$guids) || $opts['test']) { $file=null; if (isset($item->enclosure[0]['url']) && isset($item->enclosure[0]['type']) && isset($item->enclosure[0]['length'])) $file=['url'=>$item->enclosure[0]['url']->__toString(), 'type'=>$item->enclosure[0]['type']->__toString(), 'length'=>$item->enclosure[0]['length']->__toString()]; @@ -339,12 +379,17 @@ foreach ($feed->channel->item as $item) { $pcats=''; } $post="{$ptext}{$plink}{$pcats}"; + if (postLength($post,$tldsregex['tlds'])<=$conf['max_post_length'] && !$conf['always_link_gancio_post']) { + $plink=''; + } else { + $pcats=''; + } + $post="{$ptext}{$plink}{$pcats}"; while (postLength($post,$tldsregex['tlds'])>$conf['max_post_length'] && $ptext!='') { $ptext=preg_replace('#\S+\W*$#','',$ptext); // echo "[[[{$ptext}]]]\n"; $post="{$ptext}[…]{$plink}{$pcats}"; } - if (postLength($post,$tldsregex['tlds'])>$conf['max_post_length']) $post=$plink; if (postLength($post,$tldsregex['tlds'])>$conf['max_post_length']) { fwrite(STDERR,"Warning: could not shorten post for event «{$guid}» to make it fit into {$conf['max_post_length']} characters; won’t post.\n"); } else { @@ -428,7 +473,7 @@ foreach ($feed->channel->item as $item) { // print_r($res['content']); $now=time(); $guids[$guid]=$now; - fwrite($fh,"{$now}\t{$guid}\n"); + if (!$opts['test']) fwrite($fh,"{$now}\t{$guid}\n"); $goodPostsCount++; } } @@ -436,20 +481,26 @@ foreach ($feed->channel->item as $item) { echo "Info: would have posted status for event «{$guid}».\n"; $now=time(); $guids[$guid]=$now; - fwrite($fh,"{$now}\t{$guid}\n"); + if (!$opts['test']) fwrite($fh,"{$now}\t{$guid}\n"); $goodPostsCount++; } } } else { if ($opts['verbose']) echo "Info: event «{$guid}» has already been posted on ".date('c',$guids[$guid]).", skipping.\n"; } -// fclose($fh); exit(0);// to test a single post + if ($opts['test']) break;// to test a single post +} +if (!$opts['test']) fclose($fh); +if (!$opts['test']) { + if ($opts['do-post']) + echo "Info: succesfully posted {$goodPostsCount} of {$newItemsCount} new event(s) (of {$itemsCount} total events in the feed).\n"; + else + echo "Info: would have tried to post {$newItemsCount} new event(s) of {$itemsCount} total events in the feed.\n"; +} elseif ($goodPostsCount==1) { + echo "Info: successfully posted the first of {$itemsCount} total events in the feed ({$newItemsCount} are new).\n"; +} else { + echo "Info: failed to post the first of {$itemsCount} total events in the feed ({$newItemsCount} are new).\n"; } -fclose($fh); -if ($opts['do-post']) - echo "Info: feed got {$itemsCount} events; succesfully posted {$goodPostsCount} of {$newItemsCount} new event(s).\n"; -else - echo "Info: feed got {$itemsCount} event(s), {$newItemsCount} new.\n"; exit(0);