2024-10-24 06:53:20 +02:00
#!/usr/bin/php
<?php
/*
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
2024-10-29 13:54:18 +01:00
$SNAME='GancioF2F';
2024-10-24 06:53:20 +02:00
$ENAME=strtolower($SNAME);
2024-10-29 14:10:50 +01:00
$SVERS='0.5.1';
2024-10-24 06:53:20 +02:00
2024-10-29 13:54:18 +01:00
require __DIR__.'/lib/ckmkeys.php';
2024-10-24 06:53:20 +02:00
require __DIR__.'/lib/gettlds.php';
require __DIR__.'/lib/mastodon-postLength.php';
2024-10-29 13:54:18 +01:00
require __DIR__.'/lib/mb_ucfirst.php';
2024-10-24 06:53:20 +02:00
require __DIR__.'/lib/hashtag.php';
require __DIR__.'/lib/html2text.php';
require __DIR__.'/lib/curl.php';
$help=
"[[[ SYNOPSIS ]]]
2024-10-24 12:41:22 +02:00
{$ENAME} [options] <configuration file path>
2024-10-24 06:53:20 +02:00
[[[ DESCRIPTION ]]]
2024-10-29 13:54:18 +01:00
This is {$SNAME} v{$SVERS} («GancioFeed2Fedi»), a CLI PHP script that can
be used to periodically fetch the JSON feed from an instance of Gancio
(https://gancio.org) and post its new or changed events announcements on the
Fediverse through a Mastodon account, recording into a state file a reference
to each already posted announcement in order to post only new or changed ones
on each run.
2024-10-24 06:53:20 +02:00
It can be useful, for example, when the admins of a Gancio instance chose not
2024-10-27 06:52:53 +01:00
to use its federation feature because it would be too heavy on its server:
in fact, {$SNAME} is a light alternative to federating the Gancio instance,
moving from its server to the one running Mastodon the burden of posting each
2024-10-29 13:54:18 +01:00
announcement to each Fediverse instance hosting at least one follower, and of
2024-10-27 06:52:53 +01:00
sending them the image a Gancio user can attach to each announcement, because
{$SNAME} will fetch it only once and attach it to the Mastodon post; moreover,
by default, if an announcement on the Gancio instance fits into a Mastodon
post {$SNAME} doesn’ t place a link to the original announcement into the post,
thus further reducing the burden due to the requests the Gancio instance gets
from every Mastodon instance trying to generate a “link preview”.
{$SNAME} is meant to be run periodically, every half an hour or so, by a cron
2024-10-24 06:53:20 +02:00
job, or systemd timer, or the likes (you can find a sample «{$ENAME}.timer»
and a commented sample «{$ENAME}.service» in the «systemd» directory).
2024-10-24 12:41:22 +02:00
In order to work, {$SNAME} needs a configuration file path to be passed to it
as an argument on the command line.
2024-10-24 06:53:20 +02:00
[[[ CONFIGURATION FILE ]]]
2024-10-27 06:52:53 +01:00
The configuration file needs to be like this:
2024-10-24 06:53:20 +02:00
--- Example configuration file ---
# Lines beginnig with a «#» and empty lines will be ignored
2024-10-29 13:54:18 +01:00
# «feed_hostname» is required to specify the hostname of the Gancio instance.
2024-10-27 06:52:53 +01:00
# For example:
2024-10-29 13:54:18 +01:00
feed_hostname = gancio.some.domain
2024-10-24 06:53:20 +02:00
# «fedi_hostname» is required to specify the hostname of the Mastodon instance
2024-10-27 06:52:53 +01:00
# you want to post to. For example:
2024-10-24 06:53:20 +02:00
fedi_hostname = mastodon.another.domain
# «fedi_token» is required to specify an «app token» to access the account
# that you want to use on the instance defined by «fedi_hostname». On Mastodon
# default web frontend you can get such a token under «Preferences» ->
# «Development», by clicking on the «New application» button; the new
2024-10-24 22:50:41 +02:00
# application should have at least the «write:media» and «write:statuses»
# privileges; when you’ ll be done setting it up, it will be listed under
# «Your applications», and by clicking on its name you’ ll be able to copy
# «Your access token» and paste it here. For example:
2024-10-24 06:53:20 +02:00
fedi_token = w6oQ_Ot2LSAm_Q31hrvp0asfl22ip3O4ipYq1kV1ceY
# «state_file_absolute_path» is required to specify the absolute path of the
2024-10-27 06:52:53 +01:00
# state file where {$SNAME} will store the references to already posted
# announcements (on every run, {$SNAME} will check this file for entries older
# than one year and discard them, to avoid the state file to grow too much).
# For example:
2024-10-29 13:54:18 +01:00
state_file_absolute_path = /var/local/cache/gancioff/gancio.some.domain.state
2024-10-24 06:53:20 +02:00
2024-10-28 10:14:10 +01:00
# «timezone» is required to specify the timezone of the Gancio instance, in
# order for {$SNAME} to calculate the correct datetimes. You can list the
# supported timezones using option «-T» or «--timezones» (see the related
2024-10-29 13:54:18 +01:00
# entry in the «OPTIONS» section). For example:
2024-10-28 10:14:10 +01:00
timezone = Europe/Rome
2024-10-24 06:53:20 +02:00
# «posts_language» is required to specify the ISO 639-1 code for the language
# of posts (see https://www.loc.gov/standards/iso639-2/php/code_list.php for
2024-10-27 06:52:53 +01:00
# a complete list). For example:
2024-10-24 06:53:20 +02:00
posts_language = it
# «posts_visibility» is optional and lets you override the default “public”
# visibility of posts; it can be set to «public» (posts will be visible in the
# «Local» and «Federated» timelines, and any user will be able to boost them),
# «unlisted» (posts will be visible only in the «Home» timeline of followers
# and on the profile of the Mastodon account in use, not in the «Local» or
# «Federated» timelines, but any user will still be able to boost them),
# «private» (AKA «followers only»: posts will be visible only by followers and
# won’ t be boostable by anyone), and «direct» (since {$SNAME} posts won’ t ever
# explicitly mention any account, posts with this visibility will be visible
2024-10-27 06:52:53 +01:00
# only from the Mastodon account in use, which may be good for testing).
# For example:
2024-10-24 06:53:20 +02:00
post_visibility = unlisted
# «max_post_length» is optional and lets you override the automatically
# detected maximum length that a post can have on the instance specified with
# «fedi_hostname»; it can be used for testing purposes or just to keep the
2024-10-27 06:52:53 +01:00
# posts shorter than they would be otherwise. For example:
2024-10-24 06:53:20 +02:00
max_post_length = 840
2024-10-25 17:58:30 +02:00
# «always_link_gancio_post» is optional and if unspecified it defaults to
# «false», which means that {$SNAME} adds to the Mastodon post a link to the
# original Gancio post only if the latter is too long to fit into the first
2024-10-27 06:52:53 +01:00
# (i.e. into the maximum post length allowed by the Mastodon instance, or into
# the «max_post_length» specified in this configuration file - see above);
# this further reduces the burden on the Gancio instance (see the second
# paragraph of the «Description» section).
2024-10-25 17:58:30 +02:00
# If set to «true», {$SNAME} will instead always add a link to the original
2024-10-27 06:52:53 +01:00
# Gancio announcement.
2024-10-25 17:58:30 +02:00
always_link_gancio_post = true
2024-10-24 06:53:20 +02:00
--- End of example configuration file ---
[[[ OPTIONS ]]]
-h / --help
Show this help text and exit.
-p / --do-post <y|n>
2024-10-25 17:58:30 +02:00
When a state file already exists, this option defaults to «y» («yes»), which
2024-10-27 06:52:53 +01:00
means that {$SNAME} will try to post all the new or changed announcements it
may find in the feed; if set to «n» («no»), {$SNAME} will not try to post
them, but it will still record a reference to each of them into the state
file, so they won’ t be posted again on subsequent runs (unless they were
changed in the meantime).
This option is mainly useful on {$SNAME}’ s first run on a given feed, i.e.
when the state file specified in the configuration file doesn’ t exist yet and
thus all the announcements in the feed will be considered “new”: in this
case, {$SNAME} refuses to run unless you explicitly set this option to «y» or
«n»: this is a way to prevent you from unintentionally flooding your Mastodon
instance’ s «Local» timeline, and possibly your followers’ «Home» timelines,
with all the announcements in the feed.
2024-10-26 23:11:05 +02:00
When “test mode” is active (see the next option description), setting this
option has no effect.
-t / --test
Do a test: {$SNAME} will try as always to read the configuration file, fetch
the defined Mastodon instance’ s info, load the state file and fetch the feed,
2024-10-27 06:52:53 +01:00
but it will post only the first of the announcements it may find there, with
a visibility of «direct», even if according to the state file it has already
been posted, and won’ t update the state file.
2024-10-26 23:11:05 +02:00
This option also activates “verbose mode” (see below).
2024-10-24 06:53:20 +02:00
-v / --verbose
2024-10-26 23:11:05 +02:00
When this option is not set {$SNAME} prints only warning and error messages;
when it is set it also prints informational messages about what it’ s doing.
2024-10-28 10:14:10 +01:00
-T / --timezones
List all the supported timezones.
2024-10-24 06:53:20 +02:00
--
2024-10-27 06:52:53 +01:00
Treat every possible subsequent argument as non-options. Useful only in the
2024-10-24 06:53:20 +02:00
very improbable case your config file is named «--help» or as another option.
[[[ EXIT VALUES ]]]
0: regular run
1: some error occurred
2024-10-24 12:04:37 +02:00
99: killed with signal (ctrl+c, etc.)
2024-10-24 06:53:20 +02:00
[[[ DISCLAIMER AND LICENSE ]]]
2024-10-27 06:52:53 +01:00
This program comes with ABSOLUTELY NO WARRANTY; for details see the source.
This is free software, and you are welcome to redistribute it under certain
2024-10-24 06:53:20 +02:00
conditions; see <http://www.gnu.org/licenses/> for details.\n";
$confFP=null;
$conf=[
2024-10-29 13:54:18 +01:00
'feed_hostname'=>['required'=>true, 'default'=>null],
2024-10-24 06:53:20 +02:00
'fedi_hostname'=>['required'=>true, 'default'=>null],
'fedi_token'=>['required'=>true, 'default'=>null],
'state_file_absolute_path'=>['required'=>true, 'default'=>null],
2024-10-28 10:14:10 +01:00
'timezone'=>['required'=>true, 'default'=>null],
2024-10-24 06:53:20 +02:00
'posts_language'=>['required'=>true, 'default'=>null],
'posts_visibility'=>['required'=>false, 'default'=>'public'],
2024-10-25 17:58:30 +02:00
'max_post_length'=>['required'=>false, 'default'=>null],
'always_link_gancio_post'=>['required'=>false, 'default'=>false]
2024-10-24 06:53:20 +02:00
];
$opts=[
2024-10-25 17:58:30 +02:00
'test'=>false,
2024-10-24 06:53:20 +02:00
'do-post'=>null,
'verbose'=>false,
'update-language-codes'=>false
];
2024-10-28 10:14:10 +01:00
$timezones=timezone_identifiers_list();
2024-10-24 06:53:20 +02:00
$canBeOpt=true;
for ($i=1; $i<$argc; $i++) {
if ($canBeOpt && $argv[$i][0]=='-') {
if ($argv[$i]=='--') {
$canBeOpt=false;
} elseif ($argv[$i]=='-h' || $argv[$i]=='--help') {
echo $help;
exit(0);
2024-10-25 17:58:30 +02:00
} elseif ($argv[$i]=='-t' || $argv[$i]=='--test') {
$opts['test']=true;
2024-10-26 23:11:05 +02:00
$opts['verbose']=true;
2024-10-24 06:53:20 +02:00
} elseif ($argv[$i]=='-p' || $argv[$i]=='--do-post') {
if ($i+1>=$argc) dieYoung("Error: option «{$argv[$i]}» requires an argument; use «-h» or «--help» to display help.\n",1);
if ($argv[$i+1]=='y')
$opts['do-post']=true;
elseif ($argv[$i+1]=='n')
$opts['do-post']=false;
else
dieYoung("Error: option «{$argv[$i]}» requires an argument of «y» for «yes» or «n» for «no»; use «-h» or «--help» to display help.\n",1);
$i++;
} elseif ($argv[$i]=='-v' || $argv[$i]=='--verbose') {
$opts['verbose']=true;
2024-10-28 10:14:10 +01:00
} elseif ($argv[$i]=='-T' || $argv[$i]=='--timezones') {
foreach ($timezones as $val)
echo "$val\n";
exit(0);
2024-10-24 06:53:20 +02:00
} elseif ($argv[$i]=='-u' || $argv[$i]=='--update-language-codes') {
$opts['update-language-codes']=true;
} elseif ($argv[$i]=='--make-readme') {
file_put_contents(__DIR__.'/README.md',"```text\n{$help}```\n");
exit(0);
} else {
dieYoung("Error: «{$argv[$i]}» is not a known option; use «-h» or «--help» to display help.\n",1);
}
} elseif (is_null($confFP)) {
$confFP=$argv[$i];
} else {
dieYoung("Error: could not interpret «{$argv[$i]}» (configuration file has already been set to «{$confFP}»); use «-h» or «--help» to display help.\n",1);
}
}
2024-10-24 12:04:37 +02:00
//declare(ticks=1);
pcntl_async_signals(true);
pcntl_signal(SIGTERM,'sighandler');// Termination ('kill' was called)
pcntl_signal(SIGHUP,'sighandler');// Terminal log-out
pcntl_signal(SIGINT,'sighandler');// Interrupted (ctrl-c is pressed)
2024-10-24 06:53:20 +02:00
$langsFP=__DIR__.'/lib/iso-639-1-langcodes.txt';
if ($opts['update-language-codes']) {
$langCodes=[];
$url='https://www.loc.gov/standards/iso639-2/ISO-639-2_8859-1.txt';
$res=curl($url);
if ($res['content']===false) dieYoung("Error: could not connect to «{$url}».\n",1);
if ($res['httpcode']!='200') dieYoung("Error: got http code «{$res['httpcode']}» from «{$url}».\n",1);
$res=explode("\r\n",$res['content']);
2024-10-26 23:11:05 +02:00
// alb|sqi|sq|Albanian|albanais
// tup|||Tupi languages|tupi, langues
2024-10-24 06:53:20 +02:00
foreach ($res as $val)
if (preg_match('#^[a-z]{3}\|([a-z]{3})?\|([a-z]{2})\|.+\|.+$#',$val,$matches)===1)
$langCodes[]=$matches[2];
$count=count($langCodes);
if (@file_put_contents($langsFP,implode("\n",$langCodes)."\n")===false) dieYoung("Error: could not save the {$count} ISO 639-1 language code(s) i got from «{$url}» into «{$langsFP}».\n",1);
2024-10-26 23:11:05 +02:00
vecho($opts['verbose'],"Info: successfully saved the {$count} ISO 639-1 language code(s) i got from «{$url}» into «{$langsFP}».\n");
2024-10-24 06:53:20 +02:00
exit(0);
}
if (($langs=@file($langsFP,FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES))===false) dieYoung("Error: could not load ISO 639-1 language codes from «{$langsFP}».\n",1);
if (is_null($confFP)) dieYoung("Error: you have not specified a configuration file; use «-h» or «--help» to display help.\n",1);
2024-10-26 23:11:05 +02:00
vecho($opts['verbose'],"Info: trying to load configuration file «{$confFP}» from directory «".getcwd()."».\n");
2024-10-24 06:53:20 +02:00
if (!file_exists($confFP)) dieYoung("Error: «{$confFP}» does not exist.\n",1);
if (!is_file($confFP)) dieYoung("Error: «{$confFP}» is not a file.\n",1);
if (!is_readable($confFP)) dieYoung("Error: «{$confFP}» is not readable.\n",1);
getConf($conf,$confFP);
if (preg_match('#^/.*$#',$conf['state_file_absolute_path'])!==1) dieYoung("Error: in configuration file: «state_file_absolute_path» must be an absolute path.\n",1);
2024-10-28 10:14:10 +01:00
if (!in_array($conf['timezone'],$timezones)) dieYoung("Error: in configuration file: «timezone»: «{$conf['timezone']}» is not one of the supported timezones (use «-T» or «--timezones» to list them).\n",1);
2024-10-24 06:53:20 +02:00
if (!in_array($conf['posts_language'],$langs)) dieYoung("Error: in configuration file: «posts_language»: «{$conf['posts_language']}» is not a known language code.\n",1);
if (!in_array($conf['posts_visibility'],['public', 'unlisted', 'private', 'direct'])) dieYoung("Error: in configuration file: «posts_visibility» must be one of «public», «unlisted», «private» or «direct».\n",1);
if (!is_null($conf['max_post_length'])) {
if (preg_match('#^\d+$#',$conf['max_post_length'])!==1 || $conf['max_post_length']+0<10) dieYoung("Error: configuration file: «max_post_length» must be an integer greater than or equal to 10.\n",1);
$conf['max_post_length']+=0;
2024-10-26 23:11:05 +02:00
vecho($opts['verbose'],"Info: got «{$conf['max_post_length']}» as «max_post_length» from configuration file.\n");
2024-10-24 06:53:20 +02:00
}
2024-10-25 18:16:14 +02:00
if (!is_bool($conf['always_link_gancio_post']) && preg_match('#^(true|false)$#',$conf['always_link_gancio_post'])!==1) {
2024-10-25 17:58:30 +02:00
dieYoung("Error: configuration file: «always_link_gancio_post» must be «true» or «false».\n",1);
} else {
($conf['always_link_gancio_post']=='true') ? $conf['always_link_gancio_post']=true : $conf['always_link_gancio_post']=false;
}
if ($opts['test']) $conf['posts_visibility']='direct';
2024-10-26 23:11:05 +02:00
vecho($opts['verbose'],"Info: got good configuration from configuration file.\n");
2024-10-24 06:53:20 +02:00
2024-10-24 12:23:54 +02:00
$tldsregex=gettlds(__DIR__.'/storage/tlds.txt',true);
2024-10-24 06:53:20 +02:00
$url="https://{$conf['fedi_hostname']}/api/v2/instance";
2024-10-26 23:11:05 +02:00
vecho($opts['verbose'],"Info: trying to fetch instance info from «{$url}».\n");
2024-10-24 06:53:20 +02:00
$res=curl($url,null,["Authorization: Bearer {$conf['fedi_token']}", 'Accept: application/json']);
if ($res['content']===false) dieYoung("Error: could not connect to «{$url}» (error: «{$res['error']}»).\n",1);
$res['content']=@json_decode($res['content'],true);
if (is_null($res['content'])) dieYoung("Error: content from «{$url}» was not good JSON.\n",1);
(isset($res['content']['error'])) ? $buff=" («{$res['content']['error']}»)" : $buff='';
if ($res['httpcode']!='200') dieYoung("Error: got http code «{$res['httpcode']}»{$buff} from «{$url}».\n",1);
if (!isset($res['content']['configuration']['media_attachments']['image_size_limit'])) dieYoung("Error: JSON from «{$url}» doesn’ t declare «image_size_limit».\n",1);
if (!is_int($res['content']['configuration']['media_attachments']['image_size_limit'])) dieYoung("Error: JSON from «{$url}» declares «image_size_limit» with an unexpected format.\n",1);
$conf['max_image_size']=$res['content']['configuration']['media_attachments']['image_size_limit']+0;
2024-10-26 23:11:05 +02:00
vecho($opts['verbose'],"Info: got «{$conf['max_image_size']}» as «max_image_size» from «{$url}».\n");
2024-10-24 06:53:20 +02:00
if (!isset($res['content']['configuration']['statuses']['max_characters'])) dieYoung("Error: JSON from «{$url}» doesn’ t declare «max_characters».\n",1);
if (!is_int($res['content']['configuration']['statuses']['max_characters'])) dieYoung("Error: JSON from «{$url}» declares «max_characters» with an unexpected format.\n",1);
if (is_null($conf['max_post_length'])) {
$conf['max_post_length']=$res['content']['configuration']['statuses']['max_characters']+0;
2024-10-26 23:11:05 +02:00
vecho($opts['verbose'],"Info: got «{$conf['max_post_length']}» as «max_post_length» from «{$url}».\n");
2024-10-24 06:53:20 +02:00
}
//print_r($conf);
2024-10-27 06:52:53 +01:00
vecho($opts['verbose'],"Info: trying to load the references to already posted announcements from state file «{$conf['state_file_absolute_path']}».\n");
2024-10-27 09:33:43 +01:00
$refs=[];
2024-10-24 06:53:20 +02:00
if (file_exists($conf['state_file_absolute_path'])) {
if (!is_file($conf['state_file_absolute_path'])) dieYoung("Error: «{$conf['state_file_absolute_path']}» exists but it’ s not a file.\n",1);
if (!is_readable($conf['state_file_absolute_path'])) dieYoung("Error: «{$conf['state_file_absolute_path']}» exists but it’ s not readable.\n",1);
2024-10-24 12:59:41 +02:00
if (!is_writeable($conf['state_file_absolute_path'])) dieYoung("Error: «{$conf['state_file_absolute_path']}» exists but it’ s not writable.\n",1);
2024-10-27 06:52:53 +01:00
$refs=[];
2024-10-24 06:53:20 +02:00
$graceTime=365*24*60*60;
$graceLine=time()-$graceTime;
$i=0;
2024-10-26 23:54:26 +02:00
$buff=file($conf['state_file_absolute_path'],FILE_IGNORE_NEW_LINES);
2024-10-24 06:53:20 +02:00
foreach ($buff as $key=>$val) {
2024-10-29 13:54:18 +01:00
if (preg_match('#^(\S+)\t(\S+)\t(\d+)$#',$val,$matches)===1) {// todo: refine the pattern
if ($matches[3]+0>=$graceLine)
$refs[$matches[1]]=['updatedAt'=>$matches[2], 'postedAt'=>$matches[3]];
2024-10-26 23:54:26 +02:00
else
2024-10-24 06:53:20 +02:00
$i++;
} else {
2024-10-26 23:54:26 +02:00
dieYoung("Error: in state file «{$conf['state_file_absolute_path']}», line ".($key+1)." has unexpected format.\n",1);
2024-10-24 06:53:20 +02:00
}
}
2024-10-27 06:52:53 +01:00
unset($buff);
2024-10-26 23:54:26 +02:00
$fh=fopen($conf['state_file_absolute_path'],'w');
2024-10-27 06:52:53 +01:00
foreach ($refs as $key=>$val)
2024-10-29 13:54:18 +01:00
fwrite($fh,"{$key}\t{$val['updatedAt']}\t{$val['postedAt']}\n");
2024-10-24 06:53:20 +02:00
fclose($fh);
2024-10-27 06:52:53 +01:00
vecho($opts['verbose'],'Info: got '.count($refs)." reference(s) to already posted announcement(s) from state file «{$conf['state_file_absolute_path']}»; removed {$i} reference(s) older than one year.\n");
2024-10-25 17:58:30 +02:00
} elseif (is_null($opts['do-post']) && !$opts['test']) {
2024-10-29 13:54:18 +01:00
dieyoung("Warning: state file «{$conf['state_file_absolute_path']}» doesn’ t exist yet, so this is probably a first run on Gancio instance «{$conf['feed_hostname']}»; thus, all the announcements {$SNAME} may find in the feed will be considered new and, as a precaution against unintentionally flooding your Mastodon instance’ s «Local» timeline, and possibly your followers’ «Home» timelines, you have to explicitly declare whether you want it to post them all, or not, by explicitly setting option «-p» or «--do-post» to «y» («yes») or «n» («no»); mind that in both cases the references to the announcements will be recorded in the state file, so the announcements won’ t be posted again on subsequent runs (unless they were changed in the meantime).\n",1);
2024-10-25 07:48:04 +02:00
} else {
2024-10-26 23:11:05 +02:00
vecho($opts['verbose'],"Info: state file «{$conf['state_file_absolute_path']}» was not found.\n");
2024-10-24 06:53:20 +02:00
}
2024-10-27 06:52:53 +01:00
//print_r($refs);die();
2024-10-25 17:58:30 +02:00
if (is_null($opts['do-post']) || $opts['test']) $opts['do-post']=true;
2024-10-24 06:53:20 +02:00
2024-10-29 13:54:18 +01:00
$url="https://{$conf['feed_hostname']}/feed/json?show_recurrent=true";
vecho($opts['verbose'],"Info: trying to fetch JSON feed from «{$url}».\n");
$feed=curl($url,null,['Accept: application/json']);
if ($feed['content']===false) dieYoung("Error: could not connect to «{$url}» (error: «{$feed['error']}»).\n",1);
$feed['content']=@json_decode($feed['content'],true);
(!is_null($feed['content']) && isset($feed['content']['error'])) ? $buff=" ({$feed['content']['error']})" : $buff='';
if ($feed['httpcode']!='200') dieYoung("Error: «{$url} returned http code «{$res['httpcode']}»{$buff}.\n",1);
$feed=$feed['content'];
if (is_null($feed)) dieYoung("Error: got no valid JSON from «{$url}».\n",1);
2024-10-25 17:58:30 +02:00
//print_r($feed);
2024-10-29 13:54:18 +01:00
$buff=['id', 'title', 'slug', 'description', 'multidate', 'start_datetime', 'end_datetime', 'media', 'online_locations', 'updatedAt', 'tags', 'place'];
foreach ($feed as $item)
if (!ckmkeys($buff,$item)) dieYoung("Error: feed from «{$url}» had unexpected format.\n",1);
$itemsCount=count($feed);
if ($itemsCount==0) exitYoung("Info: feed from «{$url}» was empty, bye.\n");
vecho($opts['verbose'],"Info: got feed with {$itemsCount} announcement(s) from «{$url}».\n");
2024-10-28 10:14:10 +01:00
//file_put_contents(__DIR__.'/storage/dump-'.time(),print_r($feed,true));
2024-10-25 17:58:30 +02:00
2024-10-29 13:54:18 +01:00
//date_default_timezone_set($conf['timezone']);
//$dfmt=datefmt_create('it',IntlDateFormatter::FULL,IntlDateFormatter::SHORT,$conf['timezone'],IntlDateFormatter::GREGORIAN,"eeee d MMMM '"._('alle')."' HH:mm");
2024-10-26 23:11:05 +02:00
$tsfp="{$conf['state_file_absolute_path']}.tmp";
if (!$opts['test'] && ($fh=@fopen($tsfp,'w'))===false) dieYoung("Error: could not open «{$tsfp}» in «write» mode.\n",1);
$itemsToPost=0;
2024-10-24 06:53:20 +02:00
$goodPostsCount=0;
2024-10-29 13:54:18 +01:00
foreach ($feed as $item) {
//print_r($item);
$now=time();
$postUrl="https://{$conf['feed_hostname']}/event/{$item['slug']}";
if (!array_key_exists($item['slug'],$refs)) {
$state='new';
} elseif ($item['updatedAt']!=$refs[$item['slug']]['updatedAt']) {
if ($item['start_datetime']>$now || (!is_null($item['end_datetime']) && $item['end_datetime']>$now))
$state='changed';
else
$state='old';
2024-10-26 23:11:05 +02:00
} else {
2024-10-29 13:54:18 +01:00
$state='old';
}
// $state='new';
if ($state=='old' && !$opts['test']) {
if ($opts['do-post'])
vecho($opts['verbose'],"Info: won’ t try to post status for {$state} announcement «{$postUrl}».\n");
else
vecho($opts['verbose'],"Info: wouldn’ t try to post status for {$state} announcement «{$postUrl}».\n");
} else {
$itemsToPost++;
$postHead="{$item['title']}\n\n";
if ($item['multidate']) {
2024-10-29 14:10:50 +01:00
$dfmt=datefmt_create($conf['posts_language'],0,0,$conf['timezone'],null,"eeee d MMMM");
$postHead.=_('Tutti i giorni da').' '.datefmt_format($dfmt,$item['start_datetime']).' '._('a').' '.datefmt_format($dfmt,$item['end_datetime']).', ';
$dfmt=datefmt_create($conf['posts_language'],0,0,$conf['timezone'],null,"HH:mm");
$postHead.=_('dalle').' '.datefmt_format($dfmt,$item['start_datetime']).' '._('alle').' '.datefmt_format($dfmt,$item['end_datetime']);
2024-10-24 06:53:20 +02:00
} else {
2024-10-29 13:54:18 +01:00
$dfmt=datefmt_create($conf['posts_language'],0,0,$conf['timezone'],null,"eeee d MMMM '"._('dalle')."' HH:mm");
$postHead.=mb_ucfirst(datefmt_format($dfmt,$item['start_datetime']));
if (!is_null($item['end_datetime'])) {
$dfmt=datefmt_create($conf['posts_language'],0,0,$conf['timezone'],null,"HH:mm");
$postHead.=' '._('alle').' '.datefmt_format($dfmt,$item['end_datetime']);
2024-10-26 23:11:05 +02:00
}
}
2024-10-29 14:10:50 +01:00
// if (isset($item['parentId']))// this probably means it's a recurring event, but i see no way to check *when* it is recurring
2024-10-29 13:54:18 +01:00
$postHead.=', '._('presso')." {$item['place']['name']}, {$item['place']['address']}";
if (is_array($item['online_locations']) && count($item['online_locations'])>0) $postHead.='; '._('e anche online su ').implode(' - ',$item['online_locations']);
$postBody='';
if (!is_null($item['description']) && $item['description']!='' && $item['description']!='<p></p>') $postBody.=html2text($item['description']);
if ($postBody!='') $postBody="\n\n{$postBody}";
$postLink="\n\n{$postUrl}";
$postTags='';
if (isset($item['tags']) && is_array($item['tags']) && count($item['tags'])>0) {
$buff=[];
foreach ($item['tags'] as $val)
$buff[]=hashtag($val['tag']);
$postTags.=implode(' ',$buff);
if ($postTags!='') $postTags="\n\n{$postTags}";
}
$post="{$postHead}{$postBody}{$postLink}{$postTags}";
2024-10-28 22:48:24 +01:00
$postLen=postLength($post,$tldsregex['tlds']);
2024-10-29 13:54:18 +01:00
if (!$conf['always_link_gancio_post'] && $postLen<=$conf['max_post_length']) {
$postLink='';
$post="{$postHead}{$postBody}{$postTags}";
2024-10-28 22:48:24 +01:00
$postLen=postLength($post,$tldsregex['tlds']);
}
if ($postLen>$conf['max_post_length']) {
2024-10-29 13:54:18 +01:00
$postTags='';
$post="{$postHead}{$postBody}{$postLink}";
2024-10-28 22:48:24 +01:00
$postLen=postLength($post,$tldsregex['tlds']);
2024-10-25 17:58:30 +02:00
}
2024-10-29 13:54:18 +01:00
while ($postLen>$conf['max_post_length'] && $postBody!='') {
$postBody=preg_replace('#\S+\W*$#','',$postBody);
$post="{$postHead}{$postBody}[…]{$postLink}{$postTags}";
2024-10-28 22:48:24 +01:00
$postLen=postLength($post,$tldsregex['tlds']);
2024-10-24 06:53:20 +02:00
}
2024-10-29 13:54:18 +01:00
// echo "@@@ {$postUrl}: {$postLen} @@@\n{$post}\n---\n";
2024-10-28 22:48:24 +01:00
if ($postLen>$conf['max_post_length']) {
2024-10-29 13:54:18 +01:00
fwrite(STDERR,"Warning: could not shrink post for {$state} announcement «{$postUrl}» into {$conf['max_post_length']} characters; won’ t try to post.\n");
} elseif (!$opts['do-post'] && !$opts['test']) {
vecho($opts['verbose'],"Info: would try to post status for {$state} announcement «{$postUrl}».\n");
if ($state=='new' || $state=='changed') $refs[$item['slug']]=['updatedAt'=>$item['updatedAt'], 'postedAt'=>time()];
$goodPostsCount++;
} else {
vecho($opts['verbose'],"Info: trying to post status for {$state} announcement «{$postUrl}».\n");
$doPost=false;
if (isset($item['media']) && count($item['media'])>0) {
vecho($opts['verbose'],"Info: {$state} announcement «{$postUrl}» has an attachment; processing.\n");
if ($item['media'][0]['size']>$conf['max_image_size']) {
fwrite(STDERR,"Warning: attachment size is greater than «{$conf['fedi_hostname']}» maximum image size; won’ t try to post.\n");
2024-10-24 06:53:20 +02:00
} else {
2024-10-29 13:54:18 +01:00
$url="https://{$conf['feed_hostname']}/media/{$item['media'][0]['url']}";
$res=curl($url);
2024-10-24 06:53:20 +02:00
if ($res['content']===false) {
2024-10-29 13:54:18 +01:00
fwrite(STDERR,"Warning: could not connect to «{$url}» to fetch attachment: «{$res['error']}»; won’ t try to post.\n");
2024-10-24 06:53:20 +02:00
} elseif ($res['httpcode']!='200') {
2024-10-29 13:54:18 +01:00
fwrite(STDERR,"Warning: could not fetch attachment «{$url}»: the server returned «{$res['httpcode']}»; won’ t try to post.\n");
2024-10-24 06:53:20 +02:00
} else {
2024-10-26 23:11:05 +02:00
// we don't use CURLStringFile because in php 7.3 it is not available
//$pd=['file'=>new CURLStringFile($res['content'],'file',$file['type']), 'description'=>'Flyer dell’ evento'];
2024-10-29 13:54:18 +01:00
$tfp=__DIR__."/storage/{$item['media'][0]['url']}";
2024-10-24 23:36:11 +02:00
if (@file_put_contents($tfp,$res['content'])===false) {
2024-10-29 13:54:18 +01:00
fwrite(STDERR,"Warning: could not save attachment into «{$tfp}»; won’ t try to post.\n");
2024-10-24 06:53:20 +02:00
} else {
2024-10-29 13:54:18 +01:00
if (($type=mime_content_type($tfp))===false) {
fwrite(STDERR,"Warning: could not identify the MIME type of «{$tfp}»; won’ t try to post.\n");
2024-10-24 23:36:11 +02:00
} else {
2024-10-29 13:54:18 +01:00
$postData=['file'=>curl_file_create($tfp,$type,'file'), 'description'=>$item['media'][0]['name']];
$url="https://{$conf['fedi_hostname']}/api/v2/media";
$res=curl($url,'/api/v2/media',["Authorization: Bearer {$conf['fedi_token']}", 'Accept: application/json'],$postData);
unset($postData);
if ($res['content']===false) {
fwrite(STDERR,"Warning: could not connect to «{$url}»: «{$res['error']}»; won’ t try to post.\n");
} elseif (is_null($res['content']=@json_decode($res['content'],true))) {
fwrite(STDERR,"Warning: «{$url}» did not return valid JSON; won’ t try to post.\n");
} elseif ($res['httpcode']!='200' && $res['httpcode']!='202') {
(isset($res['content']['error'])) ? $buff=" (error: «{$res['content']['error']}»)" : $buff='';
fwrite(STDERR,"Warning: «{$url}» returned http code «{$res['httpcode']}»{$buff}; won’ t try to post.\n");
} elseif (!isset($res['content']['id'])) {
fwrite(STDERR,"Warning: no «id» in JSON from «{$url}»; file has not been uploaded successfully; won’ t try to post.\n");
2024-10-25 07:37:05 +02:00
} else {
2024-10-29 13:54:18 +01:00
$id=$res['content']['id'];
if ($res['httpcode']=='202') {
$id=null;
$i=1;
while ($res['httpcode']!='200' && $i<5) {
sleep(2);
$url="https://{$conf['fedi_hostname']}/api/v1/media/{$res['id']}";
$res=curl($url,'/api/v1/media',["Authorization: Bearer {$conf['fedi_token']}", 'Accept: application/json']);
if ($res['content']!==false && $res['httpcode']=='200' && !is_null($res['content']=@json_decode($res,true)) && isset($res['content']['id'])) $id=$res['content']['id'];
$i++;
}
}
if (!is_null($id)) {
vecho($opts['verbose'],"Info: successfully posted attachment for {$state} announcement «{$postUrl}».\n");
$postData['media_ids[]']=$id;
$doPost=true;
} else {
fwrite(STDERR,"Warning: server took too long to process file, or could not; won’ t try to post.\n");
}
2024-10-25 07:37:05 +02:00
}
2024-10-24 06:53:20 +02:00
}
2024-10-29 13:54:18 +01:00
if (@unlink($tfp)===false) fwrite(STDERR,"Warning: could not delete temporary attachment file «{$tfp}».\n");
2024-10-24 06:53:20 +02:00
}
}
}
} else {
2024-10-29 13:54:18 +01:00
vecho($opts['verbose'],"Info: {$state} announcement «{$postUrl}» has no attachment.\n");
$doPost=true;
}
if ($doPost) {
$postData['status']=$post;
$postData['visibility']=$conf['posts_visibility'];
$postData['language']=$conf['posts_language'];
$url="https://{$conf['fedi_hostname']}/api/v1/statuses";
$headers=["Authorization: Bearer {$conf['fedi_token']}", 'Accept: application/json', 'Idempotency-Key: '.md5(implode('-',$postData).time())];
$res=curl($url,'/api/v1/statuses',$headers,$postData);
if ($res['content']===false) {
fwrite(STDERR,"Warning: could not connect to «{$url}»: «{$res['error']}»; could not post status for {$state} announcement «{$postUrl}».\n");
} elseif (is_null($res['content']=@json_decode($res['content'],true))) {
fwrite(STDERR,"Warning: «{$url}» did not return good JSON; could not post status for {$state} announcement «{$postUrl}».\n");
} elseif ($res['httpcode']!='200') {
(isset($res['content']['error'])) ? $buff=" (error: «{$res['content']['error']}»)" : $buff='';
fwrite(STDERR,"Warning: «{$url}» returned http code «{$res['httpcode']}»{$buff}; could not post status for {$state} announcement «{$postUrl}».\n");
} elseif (!isset($res['content']['url'])) {
fwrite(STDERR,"Warning: JSON from «{$url}» had unexpected format; could not post status for {$state} announcement «{$postUrl}».\n");
} else {
vecho($opts['verbose'],"Info: successfully posted status for {$state} announcement «{$postUrl}» (post URL: «{$res['content']['url']}»).\n");
//print_r($res['content']);
$refs[$item['slug']]=['updatedAt'=>$item['updatedAt'], 'postedAt'=>time()];
$goodPostsCount++;
}
2024-10-24 06:53:20 +02:00
}
}
}
2024-10-29 13:54:18 +01:00
if (!$opts['test']) {
if (array_key_exists($item['slug'],$refs)) fwrite($fh,"{$item['slug']}\t{$refs[$item['slug']]['updatedAt']}\t{$refs[$item['slug']]['postedAt']}\n");
} else {
break;
}
2024-10-25 17:58:30 +02:00
}
2024-10-29 13:54:18 +01:00
2024-10-26 23:11:05 +02:00
if (!$opts['test']) {
fclose($fh);
rename($tsfp,$conf['state_file_absolute_path']);
2024-10-25 17:58:30 +02:00
if ($opts['do-post'])
2024-10-29 13:54:18 +01:00
vecho($opts['verbose'],"Info: succesfully posted {$goodPostsCount} of {$itemsToPost} statuses for new or changed announcement(s) (of {$itemsCount} total announcement(s) in the feed).\n");
2024-10-25 17:58:30 +02:00
else
2024-10-29 13:54:18 +01:00
vecho($opts['verbose'],"Info: would have tried to post {$itemsToPost} statuses for new or changed announcement(s) of {$itemsCount} total announcement(s) in the feed.\n");
2024-10-25 17:58:30 +02:00
} elseif ($goodPostsCount==1) {
2024-10-29 13:54:18 +01:00
vecho($opts['verbose'],"Info: successfully posted status for the first of {$itemsCount} total announcements in the feed.\n");
2024-10-25 17:58:30 +02:00
} else {
2024-10-29 13:54:18 +01:00
vecho($opts['verbose'],"Info: failed to post status for the first of {$itemsCount} total announcements in the feed.\n");
2024-10-24 06:53:20 +02:00
}
exit(0);
function getConf(&$conf,&$confFP) {
$errors=[];
$nconf=[];
$buff=@file($confFP,FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
if ($buff===false) dieYoung("Error: could not read configuration file «$confFP}».\n",1);
$i=0;
foreach ($buff as $line) {
$i++;
if ($line[0]!=='#') {
if (preg_match('#^([^=]+)=(.+)$#',$line,$matches)===1) {
$matches[1]=trim($matches[1]);
$matches[2]=trim($matches[2]);
if (array_key_exists($matches[1],$conf))
$nconf[$matches[1]]=$matches[2];
else
2024-10-28 22:48:24 +01:00
$errors[]="line {$i}: «{$matches[1]}» is an unknown key";
2024-10-24 06:53:20 +02:00
} else {
$errors[]="could not interpret line {$i} («{$line}»)";
}
}
}
//print_r($nconf);
foreach ($conf as $key=>$val) {
if ($conf[$key]['required'] && !array_key_exists($key,$nconf))
$errors[]="«{$key}» is not defined";
if (array_key_exists($key,$nconf))
$conf[$key]=$nconf[$key];
else
$conf[$key]=$conf[$key]['default'];
}
$errorsCount=count($errors);
if ($errorsCount>0) {
fwrite(STDERR,"Sorry, there are errors in configuration file «{$confFP}»:\n");
for ($i=1; $i<=$errorsCount; $i++)
fwrite(STDERR," {$i}. {$errors[$i-1]}\n");
fwrite(STDERR,"Use «-h» or «--help» to display help.\n");
exit(1);
}
}
function cknap($napid) {
global $naps;
$now=time();
if (isset($naps[$napid]) && $naps[$napid]>$now) {
$sleepsecs=$naps[$napid]-$now;
echo "Info: reached rate limit on «{$napid}»; sleeping until ".date('c',$naps[$napid]).' ...';
sleep($sleepsecs);
echo "\n";
$naps[$napid]=0;
}
}
function hent($str) {
return html_entity_decode($str,ENT_QUOTES,'UTF-8');
}
2024-10-26 23:11:05 +02:00
function vecho($do,$msg) {
if ($do) echo $msg;
}
2024-10-24 12:04:37 +02:00
function sighandler($sig) {
global $fh;
if (isset($fh)) fclose($fh);
echo "\nInfo: received signal {$sig}, shutting down.\n";
exit(99);
}
2024-10-24 06:53:20 +02:00
function dieYoung($msg,$ec) {
2024-10-26 23:11:05 +02:00
if (isset($fh)) fclose($fh);
2024-10-24 06:53:20 +02:00
fwrite(STDERR,$msg);
die($ec);
}
function exitYoung($msg) {
2024-10-26 23:11:05 +02:00
if (isset($fh)) fclose($fh);
2024-10-24 06:53:20 +02:00
echo $msg;
exit(0);
}
?>