Now, by default, the script adds a link to the original post only if its text doesn’t fit into the post to be posted; added a “always_link_gancio_post” to make it do it always; added a “test mode”; fixed some flaws; updated help text
This commit is contained in:
parent
8929654cdf
commit
70c09b4e7d
1 changed files with 87 additions and 36 deletions
123
gancioff
123
gancioff
|
@ -105,22 +105,48 @@ post_visibility = unlisted
|
||||||
# «fedi_hostname»; it can be used for testing purposes or just to keep the
|
# «fedi_hostname»; it can be used for testing purposes or just to keep the
|
||||||
# posts shorter than they would be otherwise; for example:
|
# posts shorter than they would be otherwise; for example:
|
||||||
max_post_length = 840
|
max_post_length = 840
|
||||||
|
|
||||||
|
# «always_link_gancio_post» is optional and if unspecified it defaults to
|
||||||
|
# «false», which means that {$SNAME} adds to the Mastodon post a link to the
|
||||||
|
# original Gancio post only if the latter is too long to fit into the first
|
||||||
|
# (i.e. into the Mastodon instance “max post length”, or into the
|
||||||
|
# «max_post_length» specified in this configuration file - see above); this
|
||||||
|
# way, {$SNAME} reduces the burden on the Gancio instance that is due to the
|
||||||
|
# requests that it gets from every Mastodon instance where a Mastodon post
|
||||||
|
# with a link to the original Gancio post will end up, in order for each of
|
||||||
|
# them to generate a “link preview”; such burden gets reduced in different
|
||||||
|
# measures depending on the average length of a post on the Gancio instance
|
||||||
|
# and on the “max post length” on the Mastodon instance that {$SNAME} is using
|
||||||
|
# to post (or on the «max_post_length» explicitly specified in this file).
|
||||||
|
# If set to «true», {$SNAME} will instead always add a link to the original
|
||||||
|
# Gancio post.
|
||||||
|
always_link_gancio_post = true
|
||||||
--- End of example configuration file ---
|
--- End of example configuration file ---
|
||||||
|
|
||||||
[[[ OPTIONS ]]]
|
[[[ OPTIONS ]]]
|
||||||
|
|
||||||
-h / --help
|
-h / --help
|
||||||
Show this help text and exit.
|
Show this help text and exit.
|
||||||
|
-t / --test
|
||||||
|
Do a test: {$SNAME} will try as always to read the configuration file, fetch
|
||||||
|
the defined Mastodon instance’s info, load the state file and fetch the feed,
|
||||||
|
but it will post only the first event it may find there, with a visibility of
|
||||||
|
«direct», even if according to the state file it has already been posted, and
|
||||||
|
won’t update the state file.
|
||||||
-p / --do-post <y|n>
|
-p / --do-post <y|n>
|
||||||
Setting this option to «n» («no») will make {$SNAME} skip posting. Note that
|
When a state file already exists, this option defaults to «y» («yes»), which
|
||||||
even in this case it will save into the state file the GUIDs of new events
|
means that {$SNAME} will try to post all the new events it may find in the
|
||||||
it may find in the feed, so it won’t post them even on subsequent runs.
|
feed; if set to «n» («no»), {$SNAME} will not try to post them, but it will
|
||||||
Also note that, in order to prevent you from flooding your instance’s local
|
save their GUIDs into the state file nonetheless, so they won’t be posted
|
||||||
timeline on {$SNAME} first run on a given feed (i.e. when the state file
|
again on subsequent runs.
|
||||||
specified in the configuration file doesn’t exist yet), if events in the feed
|
This is mainly useful on {$SNAME}’s first run on a given feed, i.e. when
|
||||||
are more than 5 and «posts_visibility» is set to «public» {$SNAME} refuses to
|
the state file specified in the configuration file doesn’t exist yet and thus
|
||||||
post them unless you explicitly set this option to «n» («no») or «y» («yes»).
|
all the events in the feed will be considered “new”: in this case, {$SNAME}
|
||||||
When the state file exists, this option defaults to «y» («yes»).
|
refuses to run unless you explicitly set this option to «y» or «n»: this is a
|
||||||
|
way to prevent you from unintentionally flooding your Mastodon instance with
|
||||||
|
all the events in the feed.
|
||||||
|
When “test mode” is active (see the previous option description), setting
|
||||||
|
this option has no effect.
|
||||||
-v / --verbose
|
-v / --verbose
|
||||||
Show some more messages about what the script is doing.
|
Show some more messages about what the script is doing.
|
||||||
--
|
--
|
||||||
|
@ -148,10 +174,12 @@ $conf=[
|
||||||
'state_file_absolute_path'=>['required'=>true, 'default'=>null],
|
'state_file_absolute_path'=>['required'=>true, 'default'=>null],
|
||||||
'posts_language'=>['required'=>true, 'default'=>null],
|
'posts_language'=>['required'=>true, 'default'=>null],
|
||||||
'posts_visibility'=>['required'=>false, 'default'=>'public'],
|
'posts_visibility'=>['required'=>false, 'default'=>'public'],
|
||||||
'max_post_length'=>['required'=>false, 'default'=>null]
|
'max_post_length'=>['required'=>false, 'default'=>null],
|
||||||
|
'always_link_gancio_post'=>['required'=>false, 'default'=>false]
|
||||||
];
|
];
|
||||||
|
|
||||||
$opts=[
|
$opts=[
|
||||||
|
'test'=>false,
|
||||||
'do-post'=>null,
|
'do-post'=>null,
|
||||||
'verbose'=>false,
|
'verbose'=>false,
|
||||||
'update-language-codes'=>false
|
'update-language-codes'=>false
|
||||||
|
@ -165,6 +193,8 @@ for ($i=1; $i<$argc; $i++) {
|
||||||
} elseif ($argv[$i]=='-h' || $argv[$i]=='--help') {
|
} elseif ($argv[$i]=='-h' || $argv[$i]=='--help') {
|
||||||
echo $help;
|
echo $help;
|
||||||
exit(0);
|
exit(0);
|
||||||
|
} elseif ($argv[$i]=='-t' || $argv[$i]=='--test') {
|
||||||
|
$opts['test']=true;
|
||||||
} elseif ($argv[$i]=='-p' || $argv[$i]=='--do-post') {
|
} elseif ($argv[$i]=='-p' || $argv[$i]=='--do-post') {
|
||||||
if ($i+1>=$argc) dieYoung("Error: option «{$argv[$i]}» requires an argument; use «-h» or «--help» to display help.\n",1);
|
if ($i+1>=$argc) dieYoung("Error: option «{$argv[$i]}» requires an argument; use «-h» or «--help» to display help.\n",1);
|
||||||
if ($argv[$i+1]=='y')
|
if ($argv[$i+1]=='y')
|
||||||
|
@ -231,6 +261,12 @@ if (!is_null($conf['max_post_length'])) {
|
||||||
$conf['max_post_length']+=0;
|
$conf['max_post_length']+=0;
|
||||||
echo "Info: got «{$conf['max_post_length']}» as «max_post_length» from configuration file.\n";
|
echo "Info: got «{$conf['max_post_length']}» as «max_post_length» from configuration file.\n";
|
||||||
}
|
}
|
||||||
|
if (!is_null($conf['always_link_gancio_post']) && preg_match('#^(true|false)$#',$conf['always_link_gancio_post'])!==1) {
|
||||||
|
dieYoung("Error: configuration file: «always_link_gancio_post» must be «true» or «false».\n",1);
|
||||||
|
} else {
|
||||||
|
($conf['always_link_gancio_post']=='true') ? $conf['always_link_gancio_post']=true : $conf['always_link_gancio_post']=false;
|
||||||
|
}
|
||||||
|
if ($opts['test']) $conf['posts_visibility']='direct';
|
||||||
echo "Info: got good configuration from configuration file.\n";
|
echo "Info: got good configuration from configuration file.\n";
|
||||||
|
|
||||||
$tldsregex=gettlds(__DIR__.'/storage/tlds.txt',true);
|
$tldsregex=gettlds(__DIR__.'/storage/tlds.txt',true);
|
||||||
|
@ -255,22 +291,8 @@ if (is_null($conf['max_post_length'])) {
|
||||||
}
|
}
|
||||||
//print_r($conf);
|
//print_r($conf);
|
||||||
|
|
||||||
echo "Info: trying to fetch feed from «{$conf['feed_url']}».\n";
|
|
||||||
$feed=curl($conf['feed_url'],null,['Accept: application/xml']);
|
|
||||||
if ($feed['content']===false) dieYoung("Error: could not connect to «{$conf['feed_url']}» (error: «{$feed['error']}»).\n",1);
|
|
||||||
if ($feed['httpcode']!='200') dieYoung("Error: «{$conf['feed_url']} returned http code «{$res['httpcode']}».\n",1);
|
|
||||||
$feed=@simplexml_load_string($feed['content'],null,LIBXML_NOCDATA);
|
|
||||||
if ($feed===false) dieYoung("Error: got no valid XML from «{$conf['feed_url']}».\n",1);
|
|
||||||
//print_r($feed);
|
|
||||||
if (!isset($feed->channel->item) || !is_iterable($feed->channel->item) || !is_countable($feed->channel->item)) dieYoung("Error: feed from «{$conf['feed_url']}» had unexpected format.\n",1);
|
|
||||||
$itemsCount=$feed->channel->item->count();
|
|
||||||
if ($itemsCount==0) exitYoung("Info: feed from «{$conf['feed_url']}» was empty, bye.\n");
|
|
||||||
echo "Info: got good feed from «{$conf['feed_url']}».\n";
|
|
||||||
|
|
||||||
$guids=[];
|
$guids=[];
|
||||||
echo "Info: trying to load GUIDs of already posted events from state file «{$conf['state_file_absolute_path']}».\n";
|
echo "Info: trying to load GUIDs of already posted events from state file «{$conf['state_file_absolute_path']}».\n";
|
||||||
if (!file_exists($conf['state_file_absolute_path']) && is_null($opts['do-post']) && $conf['posts_visibility']=='public' && $itemsCount>5) dieYoung("Error: state file «{$conf['state_file_absolute_path']}» doesn’t exist yet, so this is probably a first run on feed «{$conf['feed_url']}», and since statuses visibility is set to «public» and the feed contains more than 5 events you have to decide whether it would be ok to post {$itemsCount} statuses that would show up, probably all in a row, on «{$conf['fedi_hostname']}»’s local timeline, and explicitly declare whether you want {$SNAME} to post them or not by setting option «-p» or «--do-post» to «y» («yes») or «n» («no»); use «-h» or «--help» to display help.\n",1);
|
|
||||||
if (is_null($opts['do-post'])) $opts['do-post']=true;
|
|
||||||
if (file_exists($conf['state_file_absolute_path'])) {
|
if (file_exists($conf['state_file_absolute_path'])) {
|
||||||
if (!is_file($conf['state_file_absolute_path'])) dieYoung("Error: «{$conf['state_file_absolute_path']}» exists but it’s not a file.\n",1);
|
if (!is_file($conf['state_file_absolute_path'])) dieYoung("Error: «{$conf['state_file_absolute_path']}» exists but it’s not a file.\n",1);
|
||||||
if (!is_readable($conf['state_file_absolute_path'])) dieYoung("Error: «{$conf['state_file_absolute_path']}» exists but it’s not readable.\n",1);
|
if (!is_readable($conf['state_file_absolute_path'])) dieYoung("Error: «{$conf['state_file_absolute_path']}» exists but it’s not readable.\n",1);
|
||||||
|
@ -295,12 +317,31 @@ if (file_exists($conf['state_file_absolute_path'])) {
|
||||||
}
|
}
|
||||||
fclose($fh);
|
fclose($fh);
|
||||||
echo 'Info: got '.count($guids)." GUID(s) for already posted event(s) from state file «{$conf['state_file_absolute_path']}»; removed {$i} line(s) older than one year.\n";
|
echo 'Info: got '.count($guids)." GUID(s) for already posted event(s) from state file «{$conf['state_file_absolute_path']}»; removed {$i} line(s) older than one year.\n";
|
||||||
|
} elseif (is_null($opts['do-post']) && !$opts['test']) {
|
||||||
|
dieYoung("Error: state file «{$conf['state_file_absolute_path']}» doesn’t exist yet, so this is probably a first run on feed «{$conf['feed_url']}»; thus, all the events {$SNAME} may find in the feed will be considered new and, as a precaution against flooding your local timeline, you have to explicitly declare whether you want it to post them all, or not, by explicitly setting option «-p» or «--do-post» to «y» («yes») or «n» («no»); mind that in both cases they will be recorded as posted in the state file, and won’t be posted again on subsequent runs (you can use «-h» or «--help» to display help).\n",1);
|
||||||
} else {
|
} else {
|
||||||
echo "Info: state file «{$conf['state_file_absolute_path']}» was not found.\n";
|
echo "Info: state file «{$conf['state_file_absolute_path']}» was not found.\n";
|
||||||
}
|
}
|
||||||
|
if (is_null($opts['do-post']) || $opts['test']) $opts['do-post']=true;
|
||||||
|
|
||||||
if (($fh=@fopen($conf['state_file_absolute_path'],'a'))===false) dieYoung("Error: could not open «{$conf['state_file_absolute_path']}» in «append» mode.\n",1);
|
|
||||||
$newItemsCount=0;
|
$newItemsCount=0;
|
||||||
|
echo "Info: trying to fetch feed from «{$conf['feed_url']}».\n";
|
||||||
|
$feed=curl($conf['feed_url'],null,['Accept: application/xml']);
|
||||||
|
if ($feed['content']===false) dieYoung("Error: could not connect to «{$conf['feed_url']}» (error: «{$feed['error']}»).\n",1);
|
||||||
|
if ($feed['httpcode']!='200') dieYoung("Error: «{$conf['feed_url']} returned http code «{$res['httpcode']}».\n",1);
|
||||||
|
$feed=@simplexml_load_string($feed['content'],null,LIBXML_NOCDATA);
|
||||||
|
if ($feed===false) dieYoung("Error: got no valid XML from «{$conf['feed_url']}».\n",1);
|
||||||
|
//print_r($feed);
|
||||||
|
if (!isset($feed->channel->item) || !is_iterable($feed->channel->item) || !is_countable($feed->channel->item)) dieYoung("Error: feed from «{$conf['feed_url']}» had unexpected format.\n",1);
|
||||||
|
$itemsCount=$feed->channel->item->count();
|
||||||
|
if ($itemsCount==0) exitYoung("Info: feed from «{$conf['feed_url']}» was empty, bye.\n");
|
||||||
|
foreach ($feed->channel->item as $item)
|
||||||
|
if (isset($item->guid) && $item->guid->__toString()!='' && !array_key_exists($item->guid->__toString(),$guids))
|
||||||
|
$newItemsCount++;
|
||||||
|
echo "Info: got good feed with {$itemsCount} events ({$newItemsCount} new) from «{$conf['feed_url']}».\n";
|
||||||
|
if (!$opts['test'] && $newItemsCount==0) exitYoung("Info: feed from «{$conf['feed_url']}» had no new events, bye.\n");
|
||||||
|
|
||||||
|
if (!$opts['test'] && ($fh=@fopen($conf['state_file_absolute_path'],'a'))===false) dieYoung("Error: could not open «{$conf['state_file_absolute_path']}» in «append» mode.\n",1);
|
||||||
$goodPostsCount=0;
|
$goodPostsCount=0;
|
||||||
$index=0;
|
$index=0;
|
||||||
foreach ($feed->channel->item as $item) {
|
foreach ($feed->channel->item as $item) {
|
||||||
|
@ -308,8 +349,7 @@ foreach ($feed->channel->item as $item) {
|
||||||
// print_r($item);
|
// print_r($item);
|
||||||
if (!isset($item->guid) || ($guid=$item->guid->__toString())=='') {
|
if (!isset($item->guid) || ($guid=$item->guid->__toString())=='') {
|
||||||
fwrite(STDERR,"Warning: event #{$index} has no GUID, skipping.\n");
|
fwrite(STDERR,"Warning: event #{$index} has no GUID, skipping.\n");
|
||||||
} elseif (!array_key_exists($guid,$guids)) {
|
} elseif (!array_key_exists($guid,$guids) || $opts['test']) {
|
||||||
$newItemsCount++;
|
|
||||||
$file=null;
|
$file=null;
|
||||||
if (isset($item->enclosure[0]['url']) && isset($item->enclosure[0]['type']) && isset($item->enclosure[0]['length']))
|
if (isset($item->enclosure[0]['url']) && isset($item->enclosure[0]['type']) && isset($item->enclosure[0]['length']))
|
||||||
$file=['url'=>$item->enclosure[0]['url']->__toString(), 'type'=>$item->enclosure[0]['type']->__toString(), 'length'=>$item->enclosure[0]['length']->__toString()];
|
$file=['url'=>$item->enclosure[0]['url']->__toString(), 'type'=>$item->enclosure[0]['type']->__toString(), 'length'=>$item->enclosure[0]['length']->__toString()];
|
||||||
|
@ -339,12 +379,17 @@ foreach ($feed->channel->item as $item) {
|
||||||
$pcats='';
|
$pcats='';
|
||||||
}
|
}
|
||||||
$post="{$ptext}{$plink}{$pcats}";
|
$post="{$ptext}{$plink}{$pcats}";
|
||||||
|
if (postLength($post,$tldsregex['tlds'])<=$conf['max_post_length'] && !$conf['always_link_gancio_post']) {
|
||||||
|
$plink='';
|
||||||
|
} else {
|
||||||
|
$pcats='';
|
||||||
|
}
|
||||||
|
$post="{$ptext}{$plink}{$pcats}";
|
||||||
while (postLength($post,$tldsregex['tlds'])>$conf['max_post_length'] && $ptext!='') {
|
while (postLength($post,$tldsregex['tlds'])>$conf['max_post_length'] && $ptext!='') {
|
||||||
$ptext=preg_replace('#\S+\W*$#','',$ptext);
|
$ptext=preg_replace('#\S+\W*$#','',$ptext);
|
||||||
// echo "[[[{$ptext}]]]\n";
|
// echo "[[[{$ptext}]]]\n";
|
||||||
$post="{$ptext}[…]{$plink}{$pcats}";
|
$post="{$ptext}[…]{$plink}{$pcats}";
|
||||||
}
|
}
|
||||||
if (postLength($post,$tldsregex['tlds'])>$conf['max_post_length']) $post=$plink;
|
|
||||||
if (postLength($post,$tldsregex['tlds'])>$conf['max_post_length']) {
|
if (postLength($post,$tldsregex['tlds'])>$conf['max_post_length']) {
|
||||||
fwrite(STDERR,"Warning: could not shorten post for event «{$guid}» to make it fit into {$conf['max_post_length']} characters; won’t post.\n");
|
fwrite(STDERR,"Warning: could not shorten post for event «{$guid}» to make it fit into {$conf['max_post_length']} characters; won’t post.\n");
|
||||||
} else {
|
} else {
|
||||||
|
@ -428,7 +473,7 @@ foreach ($feed->channel->item as $item) {
|
||||||
// print_r($res['content']);
|
// print_r($res['content']);
|
||||||
$now=time();
|
$now=time();
|
||||||
$guids[$guid]=$now;
|
$guids[$guid]=$now;
|
||||||
fwrite($fh,"{$now}\t{$guid}\n");
|
if (!$opts['test']) fwrite($fh,"{$now}\t{$guid}\n");
|
||||||
$goodPostsCount++;
|
$goodPostsCount++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -436,20 +481,26 @@ foreach ($feed->channel->item as $item) {
|
||||||
echo "Info: would have posted status for event «{$guid}».\n";
|
echo "Info: would have posted status for event «{$guid}».\n";
|
||||||
$now=time();
|
$now=time();
|
||||||
$guids[$guid]=$now;
|
$guids[$guid]=$now;
|
||||||
fwrite($fh,"{$now}\t{$guid}\n");
|
if (!$opts['test']) fwrite($fh,"{$now}\t{$guid}\n");
|
||||||
$goodPostsCount++;
|
$goodPostsCount++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if ($opts['verbose']) echo "Info: event «{$guid}» has already been posted on ".date('c',$guids[$guid]).", skipping.\n";
|
if ($opts['verbose']) echo "Info: event «{$guid}» has already been posted on ".date('c',$guids[$guid]).", skipping.\n";
|
||||||
}
|
}
|
||||||
// fclose($fh); exit(0);// to test a single post
|
if ($opts['test']) break;// to test a single post
|
||||||
|
}
|
||||||
|
if (!$opts['test']) fclose($fh);
|
||||||
|
if (!$opts['test']) {
|
||||||
|
if ($opts['do-post'])
|
||||||
|
echo "Info: succesfully posted {$goodPostsCount} of {$newItemsCount} new event(s) (of {$itemsCount} total events in the feed).\n";
|
||||||
|
else
|
||||||
|
echo "Info: would have tried to post {$newItemsCount} new event(s) of {$itemsCount} total events in the feed.\n";
|
||||||
|
} elseif ($goodPostsCount==1) {
|
||||||
|
echo "Info: successfully posted the first of {$itemsCount} total events in the feed ({$newItemsCount} are new).\n";
|
||||||
|
} else {
|
||||||
|
echo "Info: failed to post the first of {$itemsCount} total events in the feed ({$newItemsCount} are new).\n";
|
||||||
}
|
}
|
||||||
fclose($fh);
|
|
||||||
if ($opts['do-post'])
|
|
||||||
echo "Info: feed got {$itemsCount} events; succesfully posted {$goodPostsCount} of {$newItemsCount} new event(s).\n";
|
|
||||||
else
|
|
||||||
echo "Info: feed got {$itemsCount} event(s), {$newItemsCount} new.\n";
|
|
||||||
|
|
||||||
exit(0);
|
exit(0);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue