Now an announcement is considered modified if its “pubDate” has changed (dropped hash creation), and if its start datetime is before now, it gets skipped; edited posts get a “[MODIFICATO]” line on top; added «timezone» definition in config file; added option to list supported timezones; bumped version to 0.4.3
This commit is contained in:
parent
7e897ac840
commit
12fc2c0963
1 changed files with 48 additions and 18 deletions
66
gancioff
66
gancioff
|
@ -18,7 +18,7 @@
|
|||
|
||||
$SNAME='GancioFF';
|
||||
$ENAME=strtolower($SNAME);
|
||||
$SVERS='0.4.2';
|
||||
$SVERS='0.4.3';
|
||||
|
||||
require __DIR__.'/lib/gettlds.php';
|
||||
require __DIR__.'/lib/mastodon-postLength.php';
|
||||
|
@ -87,6 +87,12 @@ fedi_token = w6oQ_Ot2LSAm_Q31hrvp0asfl22ip3O4ipYq1kV1ceY
|
|||
# For example:
|
||||
state_file_absolute_path = /var/local/cache/gancio.some.domain.feed.state
|
||||
|
||||
# «timezone» is required to specify the timezone of the Gancio instance, in
|
||||
# order for {$SNAME} to calculate the correct datetimes. You can list the
|
||||
# supported timezones using option «-T» or «--timezones» (see the related
|
||||
# entry in the «OPTIONS» section. For example:
|
||||
timezone = Europe/Rome
|
||||
|
||||
# «posts_language» is required to specify the ISO 639-1 code for the language
|
||||
# of posts (see https://www.loc.gov/standards/iso639-2/php/code_list.php for
|
||||
# a complete list). For example:
|
||||
|
@ -153,6 +159,8 @@ always_link_gancio_post = true
|
|||
-v / --verbose
|
||||
When this option is not set {$SNAME} prints only warning and error messages;
|
||||
when it is set it also prints informational messages about what it’s doing.
|
||||
-T / --timezones
|
||||
List all the supported timezones.
|
||||
--
|
||||
Treat every possible subsequent argument as non-options. Useful only in the
|
||||
very improbable case your config file is named «--help» or as another option.
|
||||
|
@ -176,6 +184,7 @@ $conf=[
|
|||
'fedi_hostname'=>['required'=>true, 'default'=>null],
|
||||
'fedi_token'=>['required'=>true, 'default'=>null],
|
||||
'state_file_absolute_path'=>['required'=>true, 'default'=>null],
|
||||
'timezone'=>['required'=>true, 'default'=>null],
|
||||
'posts_language'=>['required'=>true, 'default'=>null],
|
||||
'posts_visibility'=>['required'=>false, 'default'=>'public'],
|
||||
'max_post_length'=>['required'=>false, 'default'=>null],
|
||||
|
@ -189,6 +198,8 @@ $opts=[
|
|||
'update-language-codes'=>false
|
||||
];
|
||||
|
||||
$timezones=timezone_identifiers_list();
|
||||
|
||||
$canBeOpt=true;
|
||||
for ($i=1; $i<$argc; $i++) {
|
||||
if ($canBeOpt && $argv[$i][0]=='-') {
|
||||
|
@ -211,6 +222,10 @@ for ($i=1; $i<$argc; $i++) {
|
|||
$i++;
|
||||
} elseif ($argv[$i]=='-v' || $argv[$i]=='--verbose') {
|
||||
$opts['verbose']=true;
|
||||
} elseif ($argv[$i]=='-T' || $argv[$i]=='--timezones') {
|
||||
foreach ($timezones as $val)
|
||||
echo "$val\n";
|
||||
exit(0);
|
||||
} elseif ($argv[$i]=='-u' || $argv[$i]=='--update-language-codes') {
|
||||
$opts['update-language-codes']=true;
|
||||
} elseif ($argv[$i]=='--make-readme') {
|
||||
|
@ -259,6 +274,7 @@ if (!is_file($confFP)) dieYoung("Error: «{$confFP}» is not a file.\n",1);
|
|||
if (!is_readable($confFP)) dieYoung("Error: «{$confFP}» is not readable.\n",1);
|
||||
getConf($conf,$confFP);
|
||||
if (preg_match('#^/.*$#',$conf['state_file_absolute_path'])!==1) dieYoung("Error: in configuration file: «state_file_absolute_path» must be an absolute path.\n",1);
|
||||
if (!in_array($conf['timezone'],$timezones)) dieYoung("Error: in configuration file: «timezone»: «{$conf['timezone']}» is not one of the supported timezones (use «-T» or «--timezones» to list them).\n",1);
|
||||
if (!in_array($conf['posts_language'],$langs)) dieYoung("Error: in configuration file: «posts_language»: «{$conf['posts_language']}» is not a known language code.\n",1);
|
||||
if (!in_array($conf['posts_visibility'],['public', 'unlisted', 'private', 'direct'])) dieYoung("Error: in configuration file: «posts_visibility» must be one of «public», «unlisted», «private» or «direct».\n",1);
|
||||
if (!is_null($conf['max_post_length'])) {
|
||||
|
@ -308,9 +324,9 @@ if (file_exists($conf['state_file_absolute_path'])) {
|
|||
$i=0;
|
||||
$buff=file($conf['state_file_absolute_path'],FILE_IGNORE_NEW_LINES);
|
||||
foreach ($buff as $key=>$val) {
|
||||
if (preg_match('#^(\d+)\t([a-z0-9]{64})\t(\S+)$#',$val,$matches)===1) {
|
||||
if (preg_match('#^(\d+)\t(\d+)\t(\S+)$#',$val,$matches)===1) {
|
||||
if ($matches[1]+0>=$graceLine)
|
||||
$refs[$matches[3]]=['timestamp'=>$matches[1], 'hash'=>$matches[2]];
|
||||
$refs[$matches[3]]=['postdate'=>$matches[1], 'pubdate'=>$matches[2]];
|
||||
else
|
||||
$i++;
|
||||
} else {
|
||||
|
@ -320,7 +336,7 @@ if (file_exists($conf['state_file_absolute_path'])) {
|
|||
unset($buff);
|
||||
$fh=fopen($conf['state_file_absolute_path'],'w');
|
||||
foreach ($refs as $key=>$val)
|
||||
fwrite($fh,"{$val['timestamp']}\t{$val['hash']}\t{$key}\n");
|
||||
fwrite($fh,"{$val['postdate']}\t{$val['pubdate']}\t{$key}\n");
|
||||
fclose($fh);
|
||||
vecho($opts['verbose'],'Info: got '.count($refs)." reference(s) to already posted announcement(s) from state file «{$conf['state_file_absolute_path']}»; removed {$i} reference(s) older than one year.\n");
|
||||
} elseif (is_null($opts['do-post']) && !$opts['test']) {
|
||||
|
@ -341,64 +357,78 @@ if ($feed===false) dieYoung("Error: got no valid XML from «{$conf['feed_url']}
|
|||
if (!isset($feed->channel->item) || !is_iterable($feed->channel->item) || !is_countable($feed->channel->item)) dieYoung("Error: feed from «{$conf['feed_url']}» had unexpected format.\n",1);
|
||||
$itemsCount=$feed->channel->item->count();
|
||||
if ($itemsCount==0) exitYoung("Info: feed from «{$conf['feed_url']}» was empty, bye.\n");
|
||||
//file_put_contents(__DIR__.'/storage/dump-'.time(),print_r($feed,true));
|
||||
|
||||
date_default_timezone_set($conf['timezone']);
|
||||
$tsfp="{$conf['state_file_absolute_path']}.tmp";
|
||||
if (!$opts['test'] && ($fh=@fopen($tsfp,'w'))===false) dieYoung("Error: could not open «{$tsfp}» in «write» mode.\n",1);
|
||||
$itemsToPost=0;
|
||||
$goodPostsCount=0;
|
||||
$index=0;
|
||||
//2024-10-27T22:01:28+01:00
|
||||
foreach ($feed->channel->item as $item) {
|
||||
$index++;
|
||||
if (!isset($item->guid) || !isset($item->title) || !isset($item->link) || !isset($item->description) || !isset($item->pubDate)) {
|
||||
fwrite(STDERR,"Warning: announcement #{$index} has unexpected format, skipping.\n");
|
||||
} else {
|
||||
//print_r($item);
|
||||
$now=time();
|
||||
$guid=$item->guid->__toString();
|
||||
//$slug=preg_replace('#^.*/(.*)$#','$1',$guid);
|
||||
$pubdate=strtotime($item->pubDate->__toString());
|
||||
(preg_match('#^\[(\d{4,}-\d{2}-\d{2})\] #',$item->title,$matches)===1) ? $evdate=$matches[1] : $evdate=false;
|
||||
$file=null;
|
||||
$imgalt='';
|
||||
$hash=$item->title.$item->pubDate;
|
||||
if (isset($item->enclosure[0]['url']) && isset($item->enclosure[0]['type']) && isset($item->enclosure[0]['length'])) {
|
||||
$file=['url'=>$item->enclosure[0]['url']->__toString(), 'type'=>$item->enclosure[0]['type']->__toString(), 'length'=>$item->enclosure[0]['length']->__toString()];
|
||||
if (preg_match('#<img [^>]*alt="([^"]+)"#',$item->description->__toString(),$matches)===1) $imgalt=trim($matches[1]);
|
||||
$hash.=$item->enclosure[0]['url'].$item->enclosure[0]['type'].$item->enclosure[0]['length'];
|
||||
}
|
||||
if ($imgalt=='') $imgalt='Flyer dell’evento';
|
||||
//<h3>Raawwr Beats</h3><strong>Kassel - Werner-Hilpert-Straße 22</strong><br/><small>(samedi, 26 octobre 22:00)</small><br/><img alt="This is the alt-text" src="https://demo.gancio.org/media/fcb4ac7e55cb5a53a4008e7c49200dbd.jpg"/><p></p>
|
||||
$buff=$item->description->__toString();
|
||||
if ($buff=='') {
|
||||
$ptext='';
|
||||
} elseif (preg_match('#^\n?<h3>(.+)</h3><strong>(.+)</strong><br/><small>\((\w+)\W+(\d+)\W+(\w+)\W+(\d+:\d+)\)</small><br/>(.+)$#iuU',$buff,$matches)===1) {
|
||||
} elseif (preg_match('#^\n?<h3>(.+)</h3><strong>(.+)</strong><br/><small>\((\w+)\W+(\d+)\W+(\w+)\W+(\d+:\d+)\)</small><br/>(.*)$#iuU',$buff,$matches)===1) {
|
||||
//print_r($matches);
|
||||
$matches[1]=hent($matches[1]);
|
||||
$matches[2]=hent($matches[2]);
|
||||
$ptext="{$matches[1]}\n\n".ucfirst($matches[3])." {$matches[4]} {$matches[5]} alle {$matches[6]} presso {$matches[2]}\n\n".html2text($matches[7]);
|
||||
$ptext="{$matches[1]}\n\n".ucfirst($matches[3])." {$matches[4]} {$matches[5]} dalle {$matches[6]} presso {$matches[2]}\n\n".html2text($matches[7]);
|
||||
if ($evdate!==false) $evdate.="T{$matches[6]}:00";
|
||||
} else {
|
||||
$ptext=html2text($item->description);
|
||||
$evdate=false;
|
||||
}
|
||||
$hash.=$buff;
|
||||
//echo "evdate: {$evdate}\n";
|
||||
$evdate=strtotime($evdate);
|
||||
//echo "{$now}: ".date('c',$now)." (now)\n{$pubdate}: ".date('c',$pubdate)." (pubdate: {$item->pubDate})\n{$evdate}: ".date('c',$evdate)." (evdate)\n";
|
||||
//exitYoung("Ciao\n");
|
||||
$plink="\n\n".$item->link->__toString();
|
||||
$hash.=$item->link;
|
||||
if (isset($item->category) && is_countable($item->category) && is_iterable($item->category) && $item->category->count()>0) {
|
||||
$pcats=[];
|
||||
foreach ($item->category as $val) {
|
||||
foreach ($item->category as $val)
|
||||
$pcats[]=hashtag($val->__toString());
|
||||
$hash.=$val;
|
||||
}
|
||||
$pcats="\n\n".implode(' ',$pcats);
|
||||
} else {
|
||||
$pcats='';
|
||||
}
|
||||
$hash=hash('sha256',$hash);
|
||||
if ($opts['test']) {
|
||||
vecho($opts['verbose'],"Info: considering announcement «{$guid}» as new because we are in «test mode»; processing.\n");
|
||||
$state='new';
|
||||
} elseif ($evdate===false) {
|
||||
fwrite(STDERR,"Warning: could not identify the event start datetime in announcement «{$guid}»; skipping.\n");
|
||||
$state='error';
|
||||
} elseif ($evdate<$now) {
|
||||
vecho($opts['verbose'],"Info: announcement «{$guid}» has a start datetime of ".date('c',$evdate).", which is before now, ".date('c',$now)."; skipping.\n");
|
||||
$state='error';
|
||||
} elseif (array_key_exists($guid,$refs)) {
|
||||
if ($hash==$refs[$guid]['hash']) {
|
||||
if ($pubdate==$refs[$guid]['pubdate']) {
|
||||
vecho($opts['verbose'],"Info: announcement «{$guid}» is not new and has not changed; skipping.\n");
|
||||
$state='old';
|
||||
} else {
|
||||
vecho($opts['verbose'],"Info: announcement «{$guid}» is not new, but it has changed; processing.\n");
|
||||
$state='changed';
|
||||
$itemsToPost++;
|
||||
$ptext="[MODIFICATO]\n\n{$ptext}";
|
||||
}
|
||||
} else {
|
||||
vecho($opts['verbose'],"Info: announcement «{$guid}» is new; processing.\n");
|
||||
|
@ -500,17 +530,17 @@ foreach ($feed->channel->item as $item) {
|
|||
} else {
|
||||
vecho($opts['verbose'],"Info: successfully posted status for {$state} announcement «{$guid}» (URL: «{$res['content']['url']}»).\n");
|
||||
//print_r($res['content']);
|
||||
$refs[$guid]=['timestamp'=>time(), 'hash'=>$hash];
|
||||
$refs[$guid]=['postdate'=>time(), 'pubdate'=>$pubdate];
|
||||
$goodPostsCount++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
vecho($opts['verbose'],"Info: would have tried to post status for {$state} announcement «{$guid}».\n");
|
||||
if ($state=='new' || $state=='changed') $refs[$guid]=['timestamp'=>time(), 'hash'=>$hash];
|
||||
if ($state=='new' || $state=='changed') $refs[$guid]=['postdate'=>time(), 'pubdate'=>$pubdate];
|
||||
$goodPostsCount++;
|
||||
}
|
||||
}
|
||||
if (!$opts['test'] && array_key_exists($guid,$refs)) fwrite($fh,"{$refs[$guid]['timestamp']}\t{$refs[$guid]['hash']}\t{$guid}\n");
|
||||
if (!$opts['test'] && array_key_exists($guid,$refs)) fwrite($fh,"{$refs[$guid]['postdate']}\t{$refs[$guid]['pubdate']}\t{$guid}\n");
|
||||
}
|
||||
if ($opts['test']) break;// to test a single post
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue