Now, by default, the script adds a link to the original post only if its text doesn’t fit into the post to be posted; added a “always_link_gancio_post” to make it do it always; added a “test mode”; fixed some flaws; updated help text
This commit is contained in:
parent
8929654cdf
commit
70c09b4e7d
1 changed files with 87 additions and 36 deletions
123
gancioff
123
gancioff
|
@ -105,22 +105,48 @@ post_visibility = unlisted
|
|||
# «fedi_hostname»; it can be used for testing purposes or just to keep the
|
||||
# posts shorter than they would be otherwise; for example:
|
||||
max_post_length = 840
|
||||
|
||||
# «always_link_gancio_post» is optional and if unspecified it defaults to
|
||||
# «false», which means that {$SNAME} adds to the Mastodon post a link to the
|
||||
# original Gancio post only if the latter is too long to fit into the first
|
||||
# (i.e. into the Mastodon instance “max post length”, or into the
|
||||
# «max_post_length» specified in this configuration file - see above); this
|
||||
# way, {$SNAME} reduces the burden on the Gancio instance that is due to the
|
||||
# requests that it gets from every Mastodon instance where a Mastodon post
|
||||
# with a link to the original Gancio post will end up, in order for each of
|
||||
# them to generate a “link preview”; such burden gets reduced in different
|
||||
# measures depending on the average length of a post on the Gancio instance
|
||||
# and on the “max post length” on the Mastodon instance that {$SNAME} is using
|
||||
# to post (or on the «max_post_length» explicitly specified in this file).
|
||||
# If set to «true», {$SNAME} will instead always add a link to the original
|
||||
# Gancio post.
|
||||
always_link_gancio_post = true
|
||||
--- End of example configuration file ---
|
||||
|
||||
[[[ OPTIONS ]]]
|
||||
|
||||
-h / --help
|
||||
Show this help text and exit.
|
||||
-t / --test
|
||||
Do a test: {$SNAME} will try as always to read the configuration file, fetch
|
||||
the defined Mastodon instance’s info, load the state file and fetch the feed,
|
||||
but it will post only the first event it may find there, with a visibility of
|
||||
«direct», even if according to the state file it has already been posted, and
|
||||
won’t update the state file.
|
||||
-p / --do-post <y|n>
|
||||
Setting this option to «n» («no») will make {$SNAME} skip posting. Note that
|
||||
even in this case it will save into the state file the GUIDs of new events
|
||||
it may find in the feed, so it won’t post them even on subsequent runs.
|
||||
Also note that, in order to prevent you from flooding your instance’s local
|
||||
timeline on {$SNAME} first run on a given feed (i.e. when the state file
|
||||
specified in the configuration file doesn’t exist yet), if events in the feed
|
||||
are more than 5 and «posts_visibility» is set to «public» {$SNAME} refuses to
|
||||
post them unless you explicitly set this option to «n» («no») or «y» («yes»).
|
||||
When the state file exists, this option defaults to «y» («yes»).
|
||||
When a state file already exists, this option defaults to «y» («yes»), which
|
||||
means that {$SNAME} will try to post all the new events it may find in the
|
||||
feed; if set to «n» («no»), {$SNAME} will not try to post them, but it will
|
||||
save their GUIDs into the state file nonetheless, so they won’t be posted
|
||||
again on subsequent runs.
|
||||
This is mainly useful on {$SNAME}’s first run on a given feed, i.e. when
|
||||
the state file specified in the configuration file doesn’t exist yet and thus
|
||||
all the events in the feed will be considered “new”: in this case, {$SNAME}
|
||||
refuses to run unless you explicitly set this option to «y» or «n»: this is a
|
||||
way to prevent you from unintentionally flooding your Mastodon instance with
|
||||
all the events in the feed.
|
||||
When “test mode” is active (see the previous option description), setting
|
||||
this option has no effect.
|
||||
-v / --verbose
|
||||
Show some more messages about what the script is doing.
|
||||
--
|
||||
|
@ -148,10 +174,12 @@ $conf=[
|
|||
'state_file_absolute_path'=>['required'=>true, 'default'=>null],
|
||||
'posts_language'=>['required'=>true, 'default'=>null],
|
||||
'posts_visibility'=>['required'=>false, 'default'=>'public'],
|
||||
'max_post_length'=>['required'=>false, 'default'=>null]
|
||||
'max_post_length'=>['required'=>false, 'default'=>null],
|
||||
'always_link_gancio_post'=>['required'=>false, 'default'=>false]
|
||||
];
|
||||
|
||||
$opts=[
|
||||
'test'=>false,
|
||||
'do-post'=>null,
|
||||
'verbose'=>false,
|
||||
'update-language-codes'=>false
|
||||
|
@ -165,6 +193,8 @@ for ($i=1; $i<$argc; $i++) {
|
|||
} elseif ($argv[$i]=='-h' || $argv[$i]=='--help') {
|
||||
echo $help;
|
||||
exit(0);
|
||||
} elseif ($argv[$i]=='-t' || $argv[$i]=='--test') {
|
||||
$opts['test']=true;
|
||||
} elseif ($argv[$i]=='-p' || $argv[$i]=='--do-post') {
|
||||
if ($i+1>=$argc) dieYoung("Error: option «{$argv[$i]}» requires an argument; use «-h» or «--help» to display help.\n",1);
|
||||
if ($argv[$i+1]=='y')
|
||||
|
@ -231,6 +261,12 @@ if (!is_null($conf['max_post_length'])) {
|
|||
$conf['max_post_length']+=0;
|
||||
echo "Info: got «{$conf['max_post_length']}» as «max_post_length» from configuration file.\n";
|
||||
}
|
||||
if (!is_null($conf['always_link_gancio_post']) && preg_match('#^(true|false)$#',$conf['always_link_gancio_post'])!==1) {
|
||||
dieYoung("Error: configuration file: «always_link_gancio_post» must be «true» or «false».\n",1);
|
||||
} else {
|
||||
($conf['always_link_gancio_post']=='true') ? $conf['always_link_gancio_post']=true : $conf['always_link_gancio_post']=false;
|
||||
}
|
||||
if ($opts['test']) $conf['posts_visibility']='direct';
|
||||
echo "Info: got good configuration from configuration file.\n";
|
||||
|
||||
$tldsregex=gettlds(__DIR__.'/storage/tlds.txt',true);
|
||||
|
@ -255,22 +291,8 @@ if (is_null($conf['max_post_length'])) {
|
|||
}
|
||||
//print_r($conf);
|
||||
|
||||
echo "Info: trying to fetch feed from «{$conf['feed_url']}».\n";
|
||||
$feed=curl($conf['feed_url'],null,['Accept: application/xml']);
|
||||
if ($feed['content']===false) dieYoung("Error: could not connect to «{$conf['feed_url']}» (error: «{$feed['error']}»).\n",1);
|
||||
if ($feed['httpcode']!='200') dieYoung("Error: «{$conf['feed_url']} returned http code «{$res['httpcode']}».\n",1);
|
||||
$feed=@simplexml_load_string($feed['content'],null,LIBXML_NOCDATA);
|
||||
if ($feed===false) dieYoung("Error: got no valid XML from «{$conf['feed_url']}».\n",1);
|
||||
//print_r($feed);
|
||||
if (!isset($feed->channel->item) || !is_iterable($feed->channel->item) || !is_countable($feed->channel->item)) dieYoung("Error: feed from «{$conf['feed_url']}» had unexpected format.\n",1);
|
||||
$itemsCount=$feed->channel->item->count();
|
||||
if ($itemsCount==0) exitYoung("Info: feed from «{$conf['feed_url']}» was empty, bye.\n");
|
||||
echo "Info: got good feed from «{$conf['feed_url']}».\n";
|
||||
|
||||
$guids=[];
|
||||
echo "Info: trying to load GUIDs of already posted events from state file «{$conf['state_file_absolute_path']}».\n";
|
||||
if (!file_exists($conf['state_file_absolute_path']) && is_null($opts['do-post']) && $conf['posts_visibility']=='public' && $itemsCount>5) dieYoung("Error: state file «{$conf['state_file_absolute_path']}» doesn’t exist yet, so this is probably a first run on feed «{$conf['feed_url']}», and since statuses visibility is set to «public» and the feed contains more than 5 events you have to decide whether it would be ok to post {$itemsCount} statuses that would show up, probably all in a row, on «{$conf['fedi_hostname']}»’s local timeline, and explicitly declare whether you want {$SNAME} to post them or not by setting option «-p» or «--do-post» to «y» («yes») or «n» («no»); use «-h» or «--help» to display help.\n",1);
|
||||
if (is_null($opts['do-post'])) $opts['do-post']=true;
|
||||
if (file_exists($conf['state_file_absolute_path'])) {
|
||||
if (!is_file($conf['state_file_absolute_path'])) dieYoung("Error: «{$conf['state_file_absolute_path']}» exists but it’s not a file.\n",1);
|
||||
if (!is_readable($conf['state_file_absolute_path'])) dieYoung("Error: «{$conf['state_file_absolute_path']}» exists but it’s not readable.\n",1);
|
||||
|
@ -295,12 +317,31 @@ if (file_exists($conf['state_file_absolute_path'])) {
|
|||
}
|
||||
fclose($fh);
|
||||
echo 'Info: got '.count($guids)." GUID(s) for already posted event(s) from state file «{$conf['state_file_absolute_path']}»; removed {$i} line(s) older than one year.\n";
|
||||
} elseif (is_null($opts['do-post']) && !$opts['test']) {
|
||||
dieYoung("Error: state file «{$conf['state_file_absolute_path']}» doesn’t exist yet, so this is probably a first run on feed «{$conf['feed_url']}»; thus, all the events {$SNAME} may find in the feed will be considered new and, as a precaution against flooding your local timeline, you have to explicitly declare whether you want it to post them all, or not, by explicitly setting option «-p» or «--do-post» to «y» («yes») or «n» («no»); mind that in both cases they will be recorded as posted in the state file, and won’t be posted again on subsequent runs (you can use «-h» or «--help» to display help).\n",1);
|
||||
} else {
|
||||
echo "Info: state file «{$conf['state_file_absolute_path']}» was not found.\n";
|
||||
}
|
||||
if (is_null($opts['do-post']) || $opts['test']) $opts['do-post']=true;
|
||||
|
||||
if (($fh=@fopen($conf['state_file_absolute_path'],'a'))===false) dieYoung("Error: could not open «{$conf['state_file_absolute_path']}» in «append» mode.\n",1);
|
||||
$newItemsCount=0;
|
||||
echo "Info: trying to fetch feed from «{$conf['feed_url']}».\n";
|
||||
$feed=curl($conf['feed_url'],null,['Accept: application/xml']);
|
||||
if ($feed['content']===false) dieYoung("Error: could not connect to «{$conf['feed_url']}» (error: «{$feed['error']}»).\n",1);
|
||||
if ($feed['httpcode']!='200') dieYoung("Error: «{$conf['feed_url']} returned http code «{$res['httpcode']}».\n",1);
|
||||
$feed=@simplexml_load_string($feed['content'],null,LIBXML_NOCDATA);
|
||||
if ($feed===false) dieYoung("Error: got no valid XML from «{$conf['feed_url']}».\n",1);
|
||||
//print_r($feed);
|
||||
if (!isset($feed->channel->item) || !is_iterable($feed->channel->item) || !is_countable($feed->channel->item)) dieYoung("Error: feed from «{$conf['feed_url']}» had unexpected format.\n",1);
|
||||
$itemsCount=$feed->channel->item->count();
|
||||
if ($itemsCount==0) exitYoung("Info: feed from «{$conf['feed_url']}» was empty, bye.\n");
|
||||
foreach ($feed->channel->item as $item)
|
||||
if (isset($item->guid) && $item->guid->__toString()!='' && !array_key_exists($item->guid->__toString(),$guids))
|
||||
$newItemsCount++;
|
||||
echo "Info: got good feed with {$itemsCount} events ({$newItemsCount} new) from «{$conf['feed_url']}».\n";
|
||||
if (!$opts['test'] && $newItemsCount==0) exitYoung("Info: feed from «{$conf['feed_url']}» had no new events, bye.\n");
|
||||
|
||||
if (!$opts['test'] && ($fh=@fopen($conf['state_file_absolute_path'],'a'))===false) dieYoung("Error: could not open «{$conf['state_file_absolute_path']}» in «append» mode.\n",1);
|
||||
$goodPostsCount=0;
|
||||
$index=0;
|
||||
foreach ($feed->channel->item as $item) {
|
||||
|
@ -308,8 +349,7 @@ foreach ($feed->channel->item as $item) {
|
|||
// print_r($item);
|
||||
if (!isset($item->guid) || ($guid=$item->guid->__toString())=='') {
|
||||
fwrite(STDERR,"Warning: event #{$index} has no GUID, skipping.\n");
|
||||
} elseif (!array_key_exists($guid,$guids)) {
|
||||
$newItemsCount++;
|
||||
} elseif (!array_key_exists($guid,$guids) || $opts['test']) {
|
||||
$file=null;
|
||||
if (isset($item->enclosure[0]['url']) && isset($item->enclosure[0]['type']) && isset($item->enclosure[0]['length']))
|
||||
$file=['url'=>$item->enclosure[0]['url']->__toString(), 'type'=>$item->enclosure[0]['type']->__toString(), 'length'=>$item->enclosure[0]['length']->__toString()];
|
||||
|
@ -339,12 +379,17 @@ foreach ($feed->channel->item as $item) {
|
|||
$pcats='';
|
||||
}
|
||||
$post="{$ptext}{$plink}{$pcats}";
|
||||
if (postLength($post,$tldsregex['tlds'])<=$conf['max_post_length'] && !$conf['always_link_gancio_post']) {
|
||||
$plink='';
|
||||
} else {
|
||||
$pcats='';
|
||||
}
|
||||
$post="{$ptext}{$plink}{$pcats}";
|
||||
while (postLength($post,$tldsregex['tlds'])>$conf['max_post_length'] && $ptext!='') {
|
||||
$ptext=preg_replace('#\S+\W*$#','',$ptext);
|
||||
// echo "[[[{$ptext}]]]\n";
|
||||
$post="{$ptext}[…]{$plink}{$pcats}";
|
||||
}
|
||||
if (postLength($post,$tldsregex['tlds'])>$conf['max_post_length']) $post=$plink;
|
||||
if (postLength($post,$tldsregex['tlds'])>$conf['max_post_length']) {
|
||||
fwrite(STDERR,"Warning: could not shorten post for event «{$guid}» to make it fit into {$conf['max_post_length']} characters; won’t post.\n");
|
||||
} else {
|
||||
|
@ -428,7 +473,7 @@ foreach ($feed->channel->item as $item) {
|
|||
// print_r($res['content']);
|
||||
$now=time();
|
||||
$guids[$guid]=$now;
|
||||
fwrite($fh,"{$now}\t{$guid}\n");
|
||||
if (!$opts['test']) fwrite($fh,"{$now}\t{$guid}\n");
|
||||
$goodPostsCount++;
|
||||
}
|
||||
}
|
||||
|
@ -436,20 +481,26 @@ foreach ($feed->channel->item as $item) {
|
|||
echo "Info: would have posted status for event «{$guid}».\n";
|
||||
$now=time();
|
||||
$guids[$guid]=$now;
|
||||
fwrite($fh,"{$now}\t{$guid}\n");
|
||||
if (!$opts['test']) fwrite($fh,"{$now}\t{$guid}\n");
|
||||
$goodPostsCount++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ($opts['verbose']) echo "Info: event «{$guid}» has already been posted on ".date('c',$guids[$guid]).", skipping.\n";
|
||||
}
|
||||
// fclose($fh); exit(0);// to test a single post
|
||||
if ($opts['test']) break;// to test a single post
|
||||
}
|
||||
if (!$opts['test']) fclose($fh);
|
||||
if (!$opts['test']) {
|
||||
if ($opts['do-post'])
|
||||
echo "Info: succesfully posted {$goodPostsCount} of {$newItemsCount} new event(s) (of {$itemsCount} total events in the feed).\n";
|
||||
else
|
||||
echo "Info: would have tried to post {$newItemsCount} new event(s) of {$itemsCount} total events in the feed.\n";
|
||||
} elseif ($goodPostsCount==1) {
|
||||
echo "Info: successfully posted the first of {$itemsCount} total events in the feed ({$newItemsCount} are new).\n";
|
||||
} else {
|
||||
echo "Info: failed to post the first of {$itemsCount} total events in the feed ({$newItemsCount} are new).\n";
|
||||
}
|
||||
fclose($fh);
|
||||
if ($opts['do-post'])
|
||||
echo "Info: feed got {$itemsCount} events; succesfully posted {$goodPostsCount} of {$newItemsCount} new event(s).\n";
|
||||
else
|
||||
echo "Info: feed got {$itemsCount} event(s), {$newItemsCount} new.\n";
|
||||
|
||||
exit(0);
|
||||
|
||||
|
|
Loading…
Reference in a new issue