Now an announcement is considered modified if its “pubDate” has changed (dropped hash creation), and if its start datetime is before now, it gets skipped; edited posts get a “[MODIFICATO]” line on top; added «timezone» definition in config file; added option to list supported timezones; bumped version to 0.4.3

This commit is contained in:
pezcurrel 2024-10-28 10:14:10 +01:00
parent 7e897ac840
commit 12fc2c0963

View file

@ -18,7 +18,7 @@
$SNAME='GancioFF';
$ENAME=strtolower($SNAME);
$SVERS='0.4.2';
$SVERS='0.4.3';
require __DIR__.'/lib/gettlds.php';
require __DIR__.'/lib/mastodon-postLength.php';
@ -87,6 +87,12 @@ fedi_token = w6oQ_Ot2LSAm_Q31hrvp0asfl22ip3O4ipYq1kV1ceY
# For example:
state_file_absolute_path = /var/local/cache/gancio.some.domain.feed.state
# «timezone» is required to specify the timezone of the Gancio instance, in
# order for {$SNAME} to calculate the correct datetimes. You can list the
# supported timezones using option «-T» or «--timezones» (see the related
# entry in the «OPTIONS» section. For example:
timezone = Europe/Rome
# «posts_language» is required to specify the ISO 639-1 code for the language
# of posts (see https://www.loc.gov/standards/iso639-2/php/code_list.php for
# a complete list). For example:
@ -153,6 +159,8 @@ always_link_gancio_post = true
-v / --verbose
When this option is not set {$SNAME} prints only warning and error messages;
when it is set it also prints informational messages about what its doing.
-T / --timezones
List all the supported timezones.
--
Treat every possible subsequent argument as non-options. Useful only in the
very improbable case your config file is named «--help» or as another option.
@ -176,6 +184,7 @@ $conf=[
'fedi_hostname'=>['required'=>true, 'default'=>null],
'fedi_token'=>['required'=>true, 'default'=>null],
'state_file_absolute_path'=>['required'=>true, 'default'=>null],
'timezone'=>['required'=>true, 'default'=>null],
'posts_language'=>['required'=>true, 'default'=>null],
'posts_visibility'=>['required'=>false, 'default'=>'public'],
'max_post_length'=>['required'=>false, 'default'=>null],
@ -189,6 +198,8 @@ $opts=[
'update-language-codes'=>false
];
$timezones=timezone_identifiers_list();
$canBeOpt=true;
for ($i=1; $i<$argc; $i++) {
if ($canBeOpt && $argv[$i][0]=='-') {
@ -211,6 +222,10 @@ for ($i=1; $i<$argc; $i++) {
$i++;
} elseif ($argv[$i]=='-v' || $argv[$i]=='--verbose') {
$opts['verbose']=true;
} elseif ($argv[$i]=='-T' || $argv[$i]=='--timezones') {
foreach ($timezones as $val)
echo "$val\n";
exit(0);
} elseif ($argv[$i]=='-u' || $argv[$i]=='--update-language-codes') {
$opts['update-language-codes']=true;
} elseif ($argv[$i]=='--make-readme') {
@ -259,6 +274,7 @@ if (!is_file($confFP)) dieYoung("Error: «{$confFP}» is not a file.\n",1);
if (!is_readable($confFP)) dieYoung("Error: «{$confFP}» is not readable.\n",1);
getConf($conf,$confFP);
if (preg_match('#^/.*$#',$conf['state_file_absolute_path'])!==1) dieYoung("Error: in configuration file: «state_file_absolute_path» must be an absolute path.\n",1);
if (!in_array($conf['timezone'],$timezones)) dieYoung("Error: in configuration file: «timezone»: «{$conf['timezone']}» is not one of the supported timezones (use «-T» or «--timezones» to list them).\n",1);
if (!in_array($conf['posts_language'],$langs)) dieYoung("Error: in configuration file: «posts_language»: «{$conf['posts_language']}» is not a known language code.\n",1);
if (!in_array($conf['posts_visibility'],['public', 'unlisted', 'private', 'direct'])) dieYoung("Error: in configuration file: «posts_visibility» must be one of «public», «unlisted», «private» or «direct».\n",1);
if (!is_null($conf['max_post_length'])) {
@ -308,9 +324,9 @@ if (file_exists($conf['state_file_absolute_path'])) {
$i=0;
$buff=file($conf['state_file_absolute_path'],FILE_IGNORE_NEW_LINES);
foreach ($buff as $key=>$val) {
if (preg_match('#^(\d+)\t([a-z0-9]{64})\t(\S+)$#',$val,$matches)===1) {
if (preg_match('#^(\d+)\t(\d+)\t(\S+)$#',$val,$matches)===1) {
if ($matches[1]+0>=$graceLine)
$refs[$matches[3]]=['timestamp'=>$matches[1], 'hash'=>$matches[2]];
$refs[$matches[3]]=['postdate'=>$matches[1], 'pubdate'=>$matches[2]];
else
$i++;
} else {
@ -320,7 +336,7 @@ if (file_exists($conf['state_file_absolute_path'])) {
unset($buff);
$fh=fopen($conf['state_file_absolute_path'],'w');
foreach ($refs as $key=>$val)
fwrite($fh,"{$val['timestamp']}\t{$val['hash']}\t{$key}\n");
fwrite($fh,"{$val['postdate']}\t{$val['pubdate']}\t{$key}\n");
fclose($fh);
vecho($opts['verbose'],'Info: got '.count($refs)." reference(s) to already posted announcement(s) from state file «{$conf['state_file_absolute_path']}»; removed {$i} reference(s) older than one year.\n");
} elseif (is_null($opts['do-post']) && !$opts['test']) {
@ -341,64 +357,78 @@ if ($feed===false) dieYoung("Error: got no valid XML from «{$conf['feed_url']}
if (!isset($feed->channel->item) || !is_iterable($feed->channel->item) || !is_countable($feed->channel->item)) dieYoung("Error: feed from «{$conf['feed_url']}» had unexpected format.\n",1);
$itemsCount=$feed->channel->item->count();
if ($itemsCount==0) exitYoung("Info: feed from «{$conf['feed_url']}» was empty, bye.\n");
//file_put_contents(__DIR__.'/storage/dump-'.time(),print_r($feed,true));
date_default_timezone_set($conf['timezone']);
$tsfp="{$conf['state_file_absolute_path']}.tmp";
if (!$opts['test'] && ($fh=@fopen($tsfp,'w'))===false) dieYoung("Error: could not open «{$tsfp}» in «write» mode.\n",1);
$itemsToPost=0;
$goodPostsCount=0;
$index=0;
//2024-10-27T22:01:28+01:00
foreach ($feed->channel->item as $item) {
$index++;
if (!isset($item->guid) || !isset($item->title) || !isset($item->link) || !isset($item->description) || !isset($item->pubDate)) {
fwrite(STDERR,"Warning: announcement #{$index} has unexpected format, skipping.\n");
} else {
//print_r($item);
$now=time();
$guid=$item->guid->__toString();
//$slug=preg_replace('#^.*/(.*)$#','$1',$guid);
$pubdate=strtotime($item->pubDate->__toString());
(preg_match('#^\[(\d{4,}-\d{2}-\d{2})\] #',$item->title,$matches)===1) ? $evdate=$matches[1] : $evdate=false;
$file=null;
$imgalt='';
$hash=$item->title.$item->pubDate;
if (isset($item->enclosure[0]['url']) && isset($item->enclosure[0]['type']) && isset($item->enclosure[0]['length'])) {
$file=['url'=>$item->enclosure[0]['url']->__toString(), 'type'=>$item->enclosure[0]['type']->__toString(), 'length'=>$item->enclosure[0]['length']->__toString()];
if (preg_match('#<img [^>]*alt="([^"]+)"#',$item->description->__toString(),$matches)===1) $imgalt=trim($matches[1]);
$hash.=$item->enclosure[0]['url'].$item->enclosure[0]['type'].$item->enclosure[0]['length'];
}
if ($imgalt=='') $imgalt='Flyer dellevento';
//<h3>Raawwr Beats</h3><strong>Kassel - Werner-Hilpert-Straße 22</strong><br/><small>(samedi, 26 octobre 22:00)</small><br/><img alt="This is the alt-text" src="https://demo.gancio.org/media/fcb4ac7e55cb5a53a4008e7c49200dbd.jpg"/><p></p>
$buff=$item->description->__toString();
if ($buff=='') {
$ptext='';
} elseif (preg_match('#^\n?<h3>(.+)</h3><strong>(.+)</strong><br/><small>\((\w+)\W+(\d+)\W+(\w+)\W+(\d+:\d+)\)</small><br/>(.+)$#iuU',$buff,$matches)===1) {
} elseif (preg_match('#^\n?<h3>(.+)</h3><strong>(.+)</strong><br/><small>\((\w+)\W+(\d+)\W+(\w+)\W+(\d+:\d+)\)</small><br/>(.*)$#iuU',$buff,$matches)===1) {
//print_r($matches);
$matches[1]=hent($matches[1]);
$matches[2]=hent($matches[2]);
$ptext="{$matches[1]}\n\n".ucfirst($matches[3])." {$matches[4]} {$matches[5]} alle {$matches[6]} presso {$matches[2]}\n\n".html2text($matches[7]);
$ptext="{$matches[1]}\n\n".ucfirst($matches[3])." {$matches[4]} {$matches[5]} dalle {$matches[6]} presso {$matches[2]}\n\n".html2text($matches[7]);
if ($evdate!==false) $evdate.="T{$matches[6]}:00";
} else {
$ptext=html2text($item->description);
$evdate=false;
}
$hash.=$buff;
//echo "evdate: {$evdate}\n";
$evdate=strtotime($evdate);
//echo "{$now}: ".date('c',$now)." (now)\n{$pubdate}: ".date('c',$pubdate)." (pubdate: {$item->pubDate})\n{$evdate}: ".date('c',$evdate)." (evdate)\n";
//exitYoung("Ciao\n");
$plink="\n\n".$item->link->__toString();
$hash.=$item->link;
if (isset($item->category) && is_countable($item->category) && is_iterable($item->category) && $item->category->count()>0) {
$pcats=[];
foreach ($item->category as $val) {
foreach ($item->category as $val)
$pcats[]=hashtag($val->__toString());
$hash.=$val;
}
$pcats="\n\n".implode(' ',$pcats);
} else {
$pcats='';
}
$hash=hash('sha256',$hash);
if ($opts['test']) {
vecho($opts['verbose'],"Info: considering announcement «{$guid}» as new because we are in «test mode»; processing.\n");
$state='new';
} elseif ($evdate===false) {
fwrite(STDERR,"Warning: could not identify the event start datetime in announcement «{$guid}»; skipping.\n");
$state='error';
} elseif ($evdate<$now) {
vecho($opts['verbose'],"Info: announcement «{$guid}» has a start datetime of ".date('c',$evdate).", which is before now, ".date('c',$now)."; skipping.\n");
$state='error';
} elseif (array_key_exists($guid,$refs)) {
if ($hash==$refs[$guid]['hash']) {
if ($pubdate==$refs[$guid]['pubdate']) {
vecho($opts['verbose'],"Info: announcement «{$guid}» is not new and has not changed; skipping.\n");
$state='old';
} else {
vecho($opts['verbose'],"Info: announcement «{$guid}» is not new, but it has changed; processing.\n");
$state='changed';
$itemsToPost++;
$ptext="[MODIFICATO]\n\n{$ptext}";
}
} else {
vecho($opts['verbose'],"Info: announcement «{$guid}» is new; processing.\n");
@ -500,17 +530,17 @@ foreach ($feed->channel->item as $item) {
} else {
vecho($opts['verbose'],"Info: successfully posted status for {$state} announcement «{$guid}» (URL: «{$res['content']['url']}»).\n");
//print_r($res['content']);
$refs[$guid]=['timestamp'=>time(), 'hash'=>$hash];
$refs[$guid]=['postdate'=>time(), 'pubdate'=>$pubdate];
$goodPostsCount++;
}
}
} else {
vecho($opts['verbose'],"Info: would have tried to post status for {$state} announcement «{$guid}».\n");
if ($state=='new' || $state=='changed') $refs[$guid]=['timestamp'=>time(), 'hash'=>$hash];
if ($state=='new' || $state=='changed') $refs[$guid]=['postdate'=>time(), 'pubdate'=>$pubdate];
$goodPostsCount++;
}
}
if (!$opts['test'] && array_key_exists($guid,$refs)) fwrite($fh,"{$refs[$guid]['timestamp']}\t{$refs[$guid]['hash']}\t{$guid}\n");
if (!$opts['test'] && array_key_exists($guid,$refs)) fwrite($fh,"{$refs[$guid]['postdate']}\t{$refs[$guid]['pubdate']}\t{$guid}\n");
}
if ($opts['test']) break;// to test a single post
}