From 12fc2c0963f1bde2d781a606b8c4e3a8101e8823 Mon Sep 17 00:00:00 2001 From: pezcurrel Date: Mon, 28 Oct 2024 10:14:10 +0100 Subject: [PATCH] =?UTF-8?q?Now=20an=20announcement=20is=20considered=20mod?= =?UTF-8?q?ified=20if=20its=20=E2=80=9CpubDate=E2=80=9D=20has=20changed=20?= =?UTF-8?q?(dropped=20hash=20creation),=20and=20if=20its=20start=20datetim?= =?UTF-8?q?e=20is=20before=20now,=20it=20gets=20skipped;=20edited=20posts?= =?UTF-8?q?=20get=20a=20=E2=80=9C[MODIFICATO]=E2=80=9D=20line=20on=20top;?= =?UTF-8?q?=20added=20=C2=ABtimezone=C2=BB=20definition=20in=20config=20fi?= =?UTF-8?q?le;=20added=20option=20to=20list=20supported=20timezones;=20bum?= =?UTF-8?q?ped=20version=20to=200.4.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- gancioff | 66 ++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 18 deletions(-) diff --git a/gancioff b/gancioff index a727260..3c4f3f7 100755 --- a/gancioff +++ b/gancioff @@ -18,7 +18,7 @@ $SNAME='GancioFF'; $ENAME=strtolower($SNAME); -$SVERS='0.4.2'; +$SVERS='0.4.3'; require __DIR__.'/lib/gettlds.php'; require __DIR__.'/lib/mastodon-postLength.php'; @@ -87,6 +87,12 @@ fedi_token = w6oQ_Ot2LSAm_Q31hrvp0asfl22ip3O4ipYq1kV1ceY # For example: state_file_absolute_path = /var/local/cache/gancio.some.domain.feed.state +# «timezone» is required to specify the timezone of the Gancio instance, in +# order for {$SNAME} to calculate the correct datetimes. You can list the +# supported timezones using option «-T» or «--timezones» (see the related +# entry in the «OPTIONS» section. For example: +timezone = Europe/Rome + # «posts_language» is required to specify the ISO 639-1 code for the language # of posts (see https://www.loc.gov/standards/iso639-2/php/code_list.php for # a complete list). For example: @@ -153,6 +159,8 @@ always_link_gancio_post = true -v / --verbose When this option is not set {$SNAME} prints only warning and error messages; when it is set it also prints informational messages about what it’s doing. +-T / --timezones + List all the supported timezones. -- Treat every possible subsequent argument as non-options. Useful only in the very improbable case your config file is named «--help» or as another option. @@ -176,6 +184,7 @@ $conf=[ 'fedi_hostname'=>['required'=>true, 'default'=>null], 'fedi_token'=>['required'=>true, 'default'=>null], 'state_file_absolute_path'=>['required'=>true, 'default'=>null], + 'timezone'=>['required'=>true, 'default'=>null], 'posts_language'=>['required'=>true, 'default'=>null], 'posts_visibility'=>['required'=>false, 'default'=>'public'], 'max_post_length'=>['required'=>false, 'default'=>null], @@ -189,6 +198,8 @@ $opts=[ 'update-language-codes'=>false ]; +$timezones=timezone_identifiers_list(); + $canBeOpt=true; for ($i=1; $i<$argc; $i++) { if ($canBeOpt && $argv[$i][0]=='-') { @@ -211,6 +222,10 @@ for ($i=1; $i<$argc; $i++) { $i++; } elseif ($argv[$i]=='-v' || $argv[$i]=='--verbose') { $opts['verbose']=true; + } elseif ($argv[$i]=='-T' || $argv[$i]=='--timezones') { + foreach ($timezones as $val) + echo "$val\n"; + exit(0); } elseif ($argv[$i]=='-u' || $argv[$i]=='--update-language-codes') { $opts['update-language-codes']=true; } elseif ($argv[$i]=='--make-readme') { @@ -259,6 +274,7 @@ if (!is_file($confFP)) dieYoung("Error: «{$confFP}» is not a file.\n",1); if (!is_readable($confFP)) dieYoung("Error: «{$confFP}» is not readable.\n",1); getConf($conf,$confFP); if (preg_match('#^/.*$#',$conf['state_file_absolute_path'])!==1) dieYoung("Error: in configuration file: «state_file_absolute_path» must be an absolute path.\n",1); +if (!in_array($conf['timezone'],$timezones)) dieYoung("Error: in configuration file: «timezone»: «{$conf['timezone']}» is not one of the supported timezones (use «-T» or «--timezones» to list them).\n",1); if (!in_array($conf['posts_language'],$langs)) dieYoung("Error: in configuration file: «posts_language»: «{$conf['posts_language']}» is not a known language code.\n",1); if (!in_array($conf['posts_visibility'],['public', 'unlisted', 'private', 'direct'])) dieYoung("Error: in configuration file: «posts_visibility» must be one of «public», «unlisted», «private» or «direct».\n",1); if (!is_null($conf['max_post_length'])) { @@ -308,9 +324,9 @@ if (file_exists($conf['state_file_absolute_path'])) { $i=0; $buff=file($conf['state_file_absolute_path'],FILE_IGNORE_NEW_LINES); foreach ($buff as $key=>$val) { - if (preg_match('#^(\d+)\t([a-z0-9]{64})\t(\S+)$#',$val,$matches)===1) { + if (preg_match('#^(\d+)\t(\d+)\t(\S+)$#',$val,$matches)===1) { if ($matches[1]+0>=$graceLine) - $refs[$matches[3]]=['timestamp'=>$matches[1], 'hash'=>$matches[2]]; + $refs[$matches[3]]=['postdate'=>$matches[1], 'pubdate'=>$matches[2]]; else $i++; } else { @@ -320,7 +336,7 @@ if (file_exists($conf['state_file_absolute_path'])) { unset($buff); $fh=fopen($conf['state_file_absolute_path'],'w'); foreach ($refs as $key=>$val) - fwrite($fh,"{$val['timestamp']}\t{$val['hash']}\t{$key}\n"); + fwrite($fh,"{$val['postdate']}\t{$val['pubdate']}\t{$key}\n"); fclose($fh); vecho($opts['verbose'],'Info: got '.count($refs)." reference(s) to already posted announcement(s) from state file «{$conf['state_file_absolute_path']}»; removed {$i} reference(s) older than one year.\n"); } elseif (is_null($opts['do-post']) && !$opts['test']) { @@ -341,64 +357,78 @@ if ($feed===false) dieYoung("Error: got no valid XML from «{$conf['feed_url']} if (!isset($feed->channel->item) || !is_iterable($feed->channel->item) || !is_countable($feed->channel->item)) dieYoung("Error: feed from «{$conf['feed_url']}» had unexpected format.\n",1); $itemsCount=$feed->channel->item->count(); if ($itemsCount==0) exitYoung("Info: feed from «{$conf['feed_url']}» was empty, bye.\n"); +//file_put_contents(__DIR__.'/storage/dump-'.time(),print_r($feed,true)); +date_default_timezone_set($conf['timezone']); $tsfp="{$conf['state_file_absolute_path']}.tmp"; if (!$opts['test'] && ($fh=@fopen($tsfp,'w'))===false) dieYoung("Error: could not open «{$tsfp}» in «write» mode.\n",1); $itemsToPost=0; $goodPostsCount=0; $index=0; +//2024-10-27T22:01:28+01:00 foreach ($feed->channel->item as $item) { $index++; if (!isset($item->guid) || !isset($item->title) || !isset($item->link) || !isset($item->description) || !isset($item->pubDate)) { fwrite(STDERR,"Warning: announcement #{$index} has unexpected format, skipping.\n"); } else { + //print_r($item); + $now=time(); $guid=$item->guid->__toString(); + //$slug=preg_replace('#^.*/(.*)$#','$1',$guid); + $pubdate=strtotime($item->pubDate->__toString()); + (preg_match('#^\[(\d{4,}-\d{2}-\d{2})\] #',$item->title,$matches)===1) ? $evdate=$matches[1] : $evdate=false; $file=null; $imgalt=''; - $hash=$item->title.$item->pubDate; if (isset($item->enclosure[0]['url']) && isset($item->enclosure[0]['type']) && isset($item->enclosure[0]['length'])) { $file=['url'=>$item->enclosure[0]['url']->__toString(), 'type'=>$item->enclosure[0]['type']->__toString(), 'length'=>$item->enclosure[0]['length']->__toString()]; if (preg_match('#]*alt="([^"]+)"#',$item->description->__toString(),$matches)===1) $imgalt=trim($matches[1]); - $hash.=$item->enclosure[0]['url'].$item->enclosure[0]['type'].$item->enclosure[0]['length']; } if ($imgalt=='') $imgalt='Flyer dell’evento'; //

Raawwr Beats

Kassel - Werner-Hilpert-Straße 22
(samedi, 26 octobre 22:00)
This is the alt-text

$buff=$item->description->__toString(); if ($buff=='') { $ptext=''; - } elseif (preg_match('#^\n?

(.+)

(.+)
\((\w+)\W+(\d+)\W+(\w+)\W+(\d+:\d+)\)
(.+)$#iuU',$buff,$matches)===1) { + } elseif (preg_match('#^\n?

(.+)

(.+)
\((\w+)\W+(\d+)\W+(\w+)\W+(\d+:\d+)\)
(.*)$#iuU',$buff,$matches)===1) { //print_r($matches); $matches[1]=hent($matches[1]); $matches[2]=hent($matches[2]); - $ptext="{$matches[1]}\n\n".ucfirst($matches[3])." {$matches[4]} {$matches[5]} alle {$matches[6]} presso {$matches[2]}\n\n".html2text($matches[7]); + $ptext="{$matches[1]}\n\n".ucfirst($matches[3])." {$matches[4]} {$matches[5]} dalle {$matches[6]} presso {$matches[2]}\n\n".html2text($matches[7]); + if ($evdate!==false) $evdate.="T{$matches[6]}:00"; } else { $ptext=html2text($item->description); + $evdate=false; } - $hash.=$buff; + //echo "evdate: {$evdate}\n"; + $evdate=strtotime($evdate); + //echo "{$now}: ".date('c',$now)." (now)\n{$pubdate}: ".date('c',$pubdate)." (pubdate: {$item->pubDate})\n{$evdate}: ".date('c',$evdate)." (evdate)\n"; + //exitYoung("Ciao\n"); $plink="\n\n".$item->link->__toString(); - $hash.=$item->link; if (isset($item->category) && is_countable($item->category) && is_iterable($item->category) && $item->category->count()>0) { $pcats=[]; - foreach ($item->category as $val) { + foreach ($item->category as $val) $pcats[]=hashtag($val->__toString()); - $hash.=$val; - } $pcats="\n\n".implode(' ',$pcats); } else { $pcats=''; } - $hash=hash('sha256',$hash); if ($opts['test']) { vecho($opts['verbose'],"Info: considering announcement «{$guid}» as new because we are in «test mode»; processing.\n"); $state='new'; + } elseif ($evdate===false) { + fwrite(STDERR,"Warning: could not identify the event start datetime in announcement «{$guid}»; skipping.\n"); + $state='error'; + } elseif ($evdate<$now) { + vecho($opts['verbose'],"Info: announcement «{$guid}» has a start datetime of ".date('c',$evdate).", which is before now, ".date('c',$now)."; skipping.\n"); + $state='error'; } elseif (array_key_exists($guid,$refs)) { - if ($hash==$refs[$guid]['hash']) { + if ($pubdate==$refs[$guid]['pubdate']) { vecho($opts['verbose'],"Info: announcement «{$guid}» is not new and has not changed; skipping.\n"); $state='old'; } else { vecho($opts['verbose'],"Info: announcement «{$guid}» is not new, but it has changed; processing.\n"); $state='changed'; $itemsToPost++; + $ptext="[MODIFICATO]\n\n{$ptext}"; } } else { vecho($opts['verbose'],"Info: announcement «{$guid}» is new; processing.\n"); @@ -500,17 +530,17 @@ foreach ($feed->channel->item as $item) { } else { vecho($opts['verbose'],"Info: successfully posted status for {$state} announcement «{$guid}» (URL: «{$res['content']['url']}»).\n"); //print_r($res['content']); - $refs[$guid]=['timestamp'=>time(), 'hash'=>$hash]; + $refs[$guid]=['postdate'=>time(), 'pubdate'=>$pubdate]; $goodPostsCount++; } } } else { vecho($opts['verbose'],"Info: would have tried to post status for {$state} announcement «{$guid}».\n"); - if ($state=='new' || $state=='changed') $refs[$guid]=['timestamp'=>time(), 'hash'=>$hash]; + if ($state=='new' || $state=='changed') $refs[$guid]=['postdate'=>time(), 'pubdate'=>$pubdate]; $goodPostsCount++; } } - if (!$opts['test'] && array_key_exists($guid,$refs)) fwrite($fh,"{$refs[$guid]['timestamp']}\t{$refs[$guid]['hash']}\t{$guid}\n"); + if (!$opts['test'] && array_key_exists($guid,$refs)) fwrite($fh,"{$refs[$guid]['postdate']}\t{$refs[$guid]['pubdate']}\t{$guid}\n"); } if ($opts['test']) break;// to test a single post }