123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458 |
- #!/bin/php
- <?php
- /*
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
- define('N',"\n");
- declare(ticks=1);
- pcntl_signal(SIGTERM,'signalHandler');// Termination ('kill' was called)
- pcntl_signal(SIGHUP,'signalHandler');// Terminal log-out
- pcntl_signal(SIGINT,'signalHandler');// Interrupted (Ctrl-C is pressed)
- function signalHandler($signal) {
- global $link, $logf, $jsonf;
- lecho(N.'Sono stato interrotto.'.N);
- if ($link) {
- lecho('La connessione MySQL è aperta, la chiudo.'.N);
- mysqli_close($link);
- }
- if ($jsonf) {
- echo('Il file di dump json è aperto, lo chiudo.'.N);
- fwrite($jsonf,'"Fine?": true'.N.'}'.N);
- fclose($jsonf);
- }
- if ($logf) {
- echo('Il file di log è aperto, lo chiudo.'.N);
- fclose($logf);
- }
- exit(2);
- }
- $opts=array(
- 'timeout'=>3,
- 'log'=>true,
- 'jsonfp'=>'instances.json',
- 'jsonwrite'=>true,
- 'jsonread'=>false
- );
- use function mysqli_real_escape_string as myesc;
- function tosec($str) {
- if (preg_match('/^([0-9]+)([smogSMA]?)/',$str,$buf)===1) {
- switch ($buf[2]) {
- case '':
- case 's':
- return($buf[1]);
- break;
- case 'm':
- return($buf[1]*60);
- break;
- case 'o':
- return($buf[1]*60*60);
- break;
- case 'g':
- return($buf[1]*60*60*24);
- break;
- case 'S':
- return($buf[1]*60*60*24*7);
- break;
- case 'M':
- return($buf[1]*60*60*24*30);
- break;
- case 'A':
- return($buf[1]*60*60*24*365);
- break;
- }
- } else {
- return(false);
- }
- }
- function mexit($msg,$code,$closemy=false) {
- global $link;
- lecho($msg);
- if ($closemy)
- mysqli_close($link);
- if ($logf)
- fclose($logf);
- exit($code);
- }
- function lecho($msg,$logonly=false) {
- global $opts, $logf;
- if (!$logonly)
- echo($msg);
- if ($opts['log'])
- fwrite($logf,$msg);
- }
- $logfp='crawler.log';
- if ($opts['log']) {
- $logf=@fopen(__DIR__.'/'.$logfp,'w')
- or mexit('Non ho potuto aprire in scrittura il file di log «'.$logfp.'».',1);
- }
- $inifp='../sec/mastostartadmin.ini';
- $iniarr=parse_ini_file($inifp)
- or mexit('Impossibile aprire il file di configurazione «'.$inifp.'»'.N,1);
- $link=mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket'])
- or mexit(mysqli_error($link).N,1);
- mysqli_set_charset($link,'utf8');
- $contextopts=array(
- 'http'=>array(
- 'timeout'=>$opts['timeout']
- ),
- 'socket'=>array(
- 'tcp_nodelay'=>true
- )
- );
- $context=stream_context_create($contextopts);
- $blacklist=array();
- lecho('Carico la blacklist dal database...'.N);
- $res=mysqli_query($link,'SELECT * FROM Blacklist')
- or mexit(mysqli_error($link).N,3,true);
- lecho(mysqli_num_rows($res).' istanze nella blacklist.'.N);
- while($row=mysqli_fetch_assoc($res)) {
- $blacklist[$row['Domain']]=$row;
- }
- function pgdatetomy($pgdate) {
- if (preg_match('/^(\d+)-(\d+)-(\d+)[ T]{1}(\d+):(\d+):(\d+)\.(\d+)Z?$/',$pgdate,$buf)===1) {
- return(mktime($buf[4],$buf[5],$buf[6],$buf[2],$buf[3],$buf[1])+floatval('0.'.$buf[7]));
- } else {
- return(false);
- }
- }
- function blpgdumplinetomy($line) {
- $truefalse=array('f'=>0,'t'=>1);
- $row=explode("\t",$line);
- $row=array('Domain'=>$row[0],
- 'CreatedAt'=>pgdatetomy($row[1]),
- 'ModifiedAt'=>pgdatetomy($row[2]),
- 'Severity'=>$row[3],
- 'RejectMedia'=>$truefalse[$row[4]],
- 'RejectReports'=>$truefalse[$row[5]],
- 'PublicComment'=>$row[6]);
- return($row);
- }
- $blacklistnew=array();
- $insts=array();
- lecho('Carico le istanze di partenza...'.N);
- $res=mysqli_query($link,'SELECT Domain FROM StartNodes')
- or mexit(mysqli_error($link).N,3,true);
- lecho(mysqli_num_rows($res).' istanze di partenza.'.N);
- while($row=mysqli_fetch_assoc($res)) {
- $insts[$row['Domain']]=null;
- lecho('Recupero la lista delle istanze note a «'.$row['Domain'].'» ... ');
- $buf=@file_get_contents('https://'.$row['Domain'].'/api/v1/instance/peers',false,$context);
- if ($buf!==false) {
- lecho('OK :-)'.N);
- $peers=json_decode($buf,true);
- foreach ($peers as $pdom) {
- if (!array_key_exists($pdom,$insts) && strlen($pdom)<=64) {
- $insts[$pdom]=null;
- }
- }
- } else {
- lecho('ERRORE :-('.N);
- }
- lecho('Recupero la blacklist di «'.$row['Domain'].'» ... ');
- $buf=@file_get_contents('https://'.$row['Domain'].'/domain_blocks.txt',false,$context);
- if ($buf!==false) {
- lecho('OK :-)'.N);
- $buf=explode(N,$buf);
- foreach ($buf as $line) {
- if (preg_match('/(^#.*$)|(^\s*$)/',$line)===0) {
- $brow=blpgdumplinetomy($line);
- if (!array_key_exists($brow['Domain'],$blacklist)) {
- $blacklistnew[$brow['Domain']]=$brow;
- }
- $blacklist[$brow['Domain']]=$brow;
- }
- }
- } else {
- lecho('ERRORE :-('.N);
- }
- }
- //lecho('Carico le istanze note dal DB e aggiungo alla lista di quelle da controllare quelle che non ci sono già.'.N);
- $res=mysqli_query($link,'SELECT URI FROM Instances')
- or mexit(mysqli_error($link).N,3,true);
- while($row=mysqli_fetch_assoc($res)) {
- if (!array_key_exists($row['URI'],$insts))
- $insts[$row['URI']]=null;
- }
- ksort($insts);
- ksort($blacklist);
- ksort($blacklistnew);
- lecho('Istanze recuperate: '.count($insts).N);
- lecho('Istanze blacklistate: '.count($blacklist).', di cui '.count($blacklistnew).' nuove da aggiungere al DB.'.N);
- foreach ($blacklistnew as $row) {
- foreach($row as $key=>$val)
- $row[$key]=myesc($link,$val);
- mysqli_query($link,'INSERT INTO Blacklist (ID, Domain, CreatedAt, ModifiedAt, Severity, RejectMedia, RejectReports, PrivateComment, PublicComment) VALUES (NULL, \''.$row['Domain'].'\', \''.$row['CreatedAt'].'\', \''.$row['ModifiedAt'].'\', \''.$row['Severity'].'\', \''.$row['RejectMedia'].'\', \''.$row['RejectReports'].'\', NULL, \''.$row['PublicComment'].'\')')
- or mexit(mysqli_error($link).N,3,true);
- }
- //INSERT INTO `Instances` (`ID`, `New`, `Chosen`, `Visible`, `BlackListed`, `URI`, `Title`, `ShortDesc`, `LongDesc`, `OurDesc`, `PlaceID`, `Email`, `Software`, `Version`, `UserCount`, `StatusCount`, `DomainCount`, `ActiveUsersMonth`, `ActiveUsersHalfYear`, `Thumb`, `RegOpen`, `RegReqApproval`, `MaxTootChars`, `AdmAccount`, `AdmDisplayName`, `AdmCreatedAt`, `AdmNote`, `AdmURL`, `AdmAvatar`, `AdmHeader`) VALUES (NULL, '1', '0', '0', '0', 'pantagruel.dnsup.net', 'Pantagruel', 'Descrizione breve', 'Descrizione lunga', 'Istanza molto carina senza soffitto, senza cucina', '1', 'Graume <graume@inventati.org>', 'mastodon', '3.0.1', '2', '12', '345', '5', '10', 'http://www.iedm.it', '1', '0', '540', 'admin', 'Admin', '2019-12-11', 'Note \'admin\'', 'https://rame.altervista.org', 'http://www.iedm.it', 'http://www.iedm.it');
- function b2i($bool) {
- if ($bool)
- return(1);
- else
- return(0);
- }
- //array key exists and value is not null
- function akeavinn($key,&$arr) {
- if (array_key_exists($key,$arr) && !is_null($arr[$key]))
- return(true);
- else
- return(false);
- }
- function nempty($str) {
- if (preg_match('/^\s*$/',$str)===1)
- return(null);
- else
- return($str);
- }
- function subarim($glue,$key,&$arr) {
- $str='';
- $i=1;
- $carr=count($arr);
- foreach ($arr as $inarr) {
- $str.=$inarr[$key];
- if ($i<$carr)
- $str.=$glue;
- $i++;
- }
- return($str);
- }
- function notify($msg,$sev) {
- global $link;
- mysqli_query($link,'INSERT INTO Notifications (ID, Notification, Severity, Microtime) VALUES (NULL, \''.myesc($link,$msg).'\', '.$sev.', '.microtime().')')
- or mexit(mysqli_error($link).N,3,true);
- }
- /*
- * Nodeinfo ('https://'.$dom.'/nodeinfo/2.0') è stato aggiunto nella 3.0.0
- * Trends ('https://'.$dom.'/api/v1/trends') è stato aggiunto nella 3.0.0
- * Activity ('https://'.$dom.'/api/v1/instance/activity') è stato aggiunto nella 2.1.2
- */
- if ($opts['jsonwrite']) {
- $jsonf=@fopen(__DIR__.'/'.$opts['jsonfp'],'w')
- or mexit('Non ho potuto aprire in scrittura il file di dump delle info json «'.$opts['jsonfp'].'».',1);
- fwrite($jsonf,'{'.N);
- }
- $cinsts=count($insts);
- $i=0;
- $ok=0;
- foreach ($insts as $dom=>$row) {
- $i++;
- $info=null;
- lecho('~~~~~~~~~~~~~~~'.N);
- lecho('Provo a recuperare info su «'.$dom.'» ['.$i.'/'.$cinsts.' ('.$ok.' OK) - '.round(100/$cinsts*$i).'%]'.N);
- lecho('Provo a recuperare le informazioni API sull’istanza ... ');
- $buf=@file_get_contents('https://'.$dom.'/api/v1/instance',false,$context);
- if ($buf!==false) {
- $ok++;
- lecho('OK :-)'.N);
- $info=json_decode($buf,true);
- if (array_key_exists('version',$info)) {
- if ($info['version']>='2.1.2') {
- lecho('Provo a recuperare le informazioni API sull’attività dell’istanza ... ');
- $buf=@file_get_contents('https://'.$dom.'/api/v1/instance/activity',false,$context);
- if ($buf!==false) {
- lecho('OK :-)'.N);
- $info['x-activity']=json_decode($buf,true);
- } else {
- lecho('ERRORE :-('.N);
- }
- }
- if ($info['version']>='3.0.0') {
- lecho('Provo a recuperare le informazioni Nodeinfo sull’istanza ... ');
- $buf=@file_get_contents('https://'.$dom.'/nodeinfo/2.0',false,$context);
- if ($buf!==false) {
- lecho('OK :-)'.N);
- $info['x-nodeinfo']=json_decode($buf,true);
- } else {
- lecho('ERRORE :-('.N);
- }
- lecho('Provo a recuperare le informazioni API sui trends dell’istanza ... ');
- $buf=@file_get_contents('https://'.$dom.'/api/v1/trends',false,$context);
- if ($buf!==false) {
- lecho('OK :-)'.N);
- $info['x-trends']=json_decode($buf,true);
- } else {
- lecho('ERRORE :-('.N);
- }
- }
- }
- } else {
- lecho('ERRORE :-('.N);
- }
- if (!is_null($info) && akeavinn('uri',$info) && !is_null(nempty($info['uri']))) {
- lecho(json_encode($info,JSON_PRETTY_PRINT).N,true);
- if ($opts['jsonwrite'])
- fwrite($jsonf,'"'.$info['uri'].'": '.json_encode($info,JSON_PRETTY_PRINT).','.N);
- //INSERT INTO `Instances` (`ID`, `New`, `Chosen`, `Visible`, `BlackListed`, `URI`, `Title`, `ShortDesc`, `LongDesc`, `OurDesc`, `PlaceID`, `Email`, `Software`, `Version`, `UserCount`, `StatusCount`, `DomainCount`, `ActiveUsersMonth`, `ActiveUsersHalfYear`, `Thumb`, `RegOpen`, `RegReqApproval`, `MaxTootChars`, `AdmAccount`, `AdmDisplayName`, `AdmCreatedAt`, `AdmNote`, `AdmURL`, `AdmAvatar`, `AdmHeader`) VALUES (NULL, '1', '0', '0', '0', 'pantagruel.dnsup.net', 'Pantagruel', 'Descrizione breve', 'Descrizione lunga', 'Istanza molto carina senza soffitto, senza cucina', '1', 'Graume <graume@inventati.org>', 'mastodon', '3.0.1', '2', '12', '345', '5', '10', 'http://www.iedm.it', '1', '0', '540', 'admin', 'Admin', '2019-12-11', 'Note \'admin\'', 'https://rame.altervista.org', 'http://www.iedm.it', 'http://www.iedm.it');
- $instrow=array('ID'=>null, 'New'=>0, 'Chosen'=>0, 'Visible'=>0, 'BlackListed'=>0, 'URI'=>null, 'Title'=>null, 'ShortDesc'=>null, 'LongDesc'=>null, 'OurDesc'=>null, 'PlaceID'=>null, 'Email'=>null, 'Software'=>null, 'Version'=>null, 'UserCount'=>null, 'StatusCount'=>null, 'DomainCount'=>null, 'ActiveUsersMonth'=>null, 'ActiveUsersHalfYear'=>null, 'Thumb'=>null, 'RegOpen'=>null, 'RegReqApproval'=>null, 'MaxTootChars'=>null, 'AdmAccount'=>null, 'AdmDisplayName'=>null, 'AdmCreatedAt'=>null, 'AdmNote'=>null, 'AdmURL'=>null, 'AdmAvatar'=>null, 'AdmHeader'=>null);
- if (array_key_exists($info['uri'],$blacklist))
- $instrow['BlackListed']=1;
- $instrow['URI']=nempty($info['uri']);
- if (akeavinn('title',$info))
- $instrow['Title']=nempty($info['title']);
- if (akeavinn('short_description',$info))
- $instrow['ShortDesc']=nempty($info['short_description']);
- if (akeavinn('description',$info))
- $instrow['LongDesc']=nempty($info['description']);
- if (akeavinn('email',$info))
- $instrow['Email']=nempty($info['email']);
- if (akeavinn('version',$info))
- $instrow['Version']=nempty($info['version']);
- if (akeavinn('stats',$info)) {
- if (akeavinn('user_count',$info['stats']))
- $instrow['UserCount']=$info['stats']['user_count'];
- if (akeavinn('status_count',$info['stats']))
- $instrow['StatusCount']=$info['stats']['status_count'];
- if (akeavinn('domain_count',$info['stats']))
- $instrow['DomainCount']=$info['stats']['domain_count'];
- }
- if (akeavinn('thumbnail',$info))
- $instrow['Thumb']=nempty($info['thumbnail']);
- if (akeavinn('max_toot_chars',$info))
- $instrow['MaxTootChars']=$info['max_toot_chars'];
- if (akeavinn('registrations',$info))
- $instrow['RegOpen']=b2i($info['registrations']);
- if (akeavinn('approval_required',$info))
- $instrow['RegReqApproval']=b2i($info['approval_required']);
- if (akeavinn('contact_account',$info)) {
- if (akeavinn('acct',$info['contact_account']))
- $instrow['AdmAccount']=nempty($info['contact_account']['acct']);
- if (akeavinn('display_name',$info['contact_account']))
- $instrow['AdmDisplayName']=nempty($info['contact_account']['display_name']);
- if (akeavinn('created_at',$info['contact_account']))
- $instrow['AdmCreatedAt']=pgdatetomy($info['contact_account']['created_at']);
- if (akeavinn('note',$info['contact_account']))
- $instrow['AdmNote']=nempty(strip_tags($info['contact_account']['note'],'<a>'));
- if (akeavinn('url',$info['contact_account']))
- $instrow['AdmURL']=nempty($info['contact_account']['url']);
- if (akeavinn('avatar',$info['contact_account']))
- $instrow['AdmAvatar']=nempty($info['contact_account']['avatar']);
- if (akeavinn('header',$info['contact_account']))
- $instrow['AdmHeader']=nempty($info['contact_account']['header']);
- }
- if (akeavinn('x-nodeinfo',$info)) {
- if (akeavinn('software',$info['x-nodeinfo']) && akeavinn('name',$info['x-nodeinfo']['software']))
- $instrow['Software']=nempty($info['x-nodeinfo']['software']['name']);
- if (akeavinn('usage',$info['x-nodeinfo']) && akeavinn('users',$info['x-nodeinfo']['usage'])) {
- if (akeavinn('activeMonth',$info['x-nodeinfo']['usage']['users']))
- $instrow['ActiveUsersMonth']=$info['x-nodeinfo']['usage']['users']['activeMonth'];
- if (akeavinn('activeHalfyear',$info['x-nodeinfo']['usage']['users']))
- $instrow['ActiveUsersHalfYear']=$info['x-nodeinfo']['usage']['users']['activeHalfyear'];
- }
- }
- $res=mysqli_query($link,'SELECT * FROM Instances WHERE URI=\''.myesc($link,$instrow['URI']).'\'')
- or mexit(mysqli_error($link).N,3,true);
- if (mysqli_num_rows($res)>0) {
- lecho('«'.$instrow['URI'].'» è già presente nel DB, la aggiorno...'.N);
- $oldinstrow=mysqli_fetch_assoc($res);
- $query='UPDATE Instances SET ';
- foreach ($instrow as $field=>$value) {
- if (!is_null($value))
- $query.=$field.'=\''.myesc($link,$value).'\', ';
- else
- $query.=$field.'=\'NULL\', ';
- }
- $query=substr($query,0,-2).' WHERE Instances.ID='.$oldinstrow['ID'];
- echo('QUERONA DI UPDATE: «'.$query.'».'.N);
- /* $res=mysql_query($link,'SELECT InstID, LangID, Pos, Code FROM InstLangs LEFT JOIN Languages ON Languages.ID=LangID WHERE InstID='.$oldinstrow['ID'].' ORDER BY Pos ASC')
- or mexit(mysqli_error($link).N,3,true);
- $oldinstlangs=array();
- while ($row=mysql_fetch_assoc($res))
- $oldinstlangs[]=$row;
- if (akeavinn('languages',$info)) {
- $instlangs=array();
- $pos=0;
- foreach ($info['languages'] as $lang) {
- $res=mysqli_query($link,'SELECT * FROM Languages WHERE Code=\''.myesc($link,$lang).'\'')
- or mexit(mysqli_error($link).N,3,true);
- if (mysqli_num_rows($res)<1) {
- mysqli_query($link,'INSERT INTO Languages (ID, Code, Name) VALUES (NULL, \''.myesc($link,$lang).'\', NULL)')
- or mexit(mysqli_error($link).N,3,true);
- $langid=mysqli_insert_id($link);
- notify('L’aggiornamento dei dati relativi all’istanza «<a href="editinst.php?id='.$oldinstrow['ID'].'">'.$info['URI'].'</a>» ha aggiunto un codice lingua non ancora noto, «'.$lang.'», di cui non conosco il nome per esteso. Puoi <a href="editlang.php?id='.$langid.'">editarlo qui</a>.',1);
- } else {
- $row=mysqli_fetch_assoc($res);
- $langid=$row['ID'];
- }
- $pos++;
- $instlangs[]=array('InstID'=>$oldinstrow['ID'],'LangID'=>$langid,'Pos'=>$pos,'Code'=>$lang);
- }
- print_r($instlangs);
- print_r($oldinstlangs);
- if ($instlangs!=$oldinstlangs) {
- notify('La lista delle lingue utilizzate dichiarate dall’istanza «<a href="editinst.php?id='.$oldinstrow['ID'].'">'.$info['URI'].'</a>» è cambiata da «'.subarim(', ','Code',$oldinstlangs).'» a «'.subarim(', ','Code',$oldinstlangs).'».',1);
- mysqli_query($link,'DELETE FROM InstLangs WHERE InstID='.$oldinstrow['ID'])
- or mexit(mysqli_error($link).N,3,true);
- foreach ($instlangs as $row) {
- mysqli_query($link,'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')')
- or mexit(mysqli_error($link).N,3,true);
- }
- }
- }*/
- } else {
- lecho('«'.$info['uri'].'» non è già presente nel DB, la aggiungo...'.N);
- $instrow['New']=1;
- $fields=array();
- $values='';
- foreach ($instrow as $field=>$value) {
- $fields[]=$field;
- if (!is_null($value))
- $values.='\''.myesc($link,$value).'\', ';
- else
- $values.='NULL, ';
- }
- $values=substr($values,0,-2);
- $query='INSERT INTO Instances ('.implode(', ',$fields).') VALUES ('.$values.')';
- echo('QUERONA DI INSERT: «'.$query.'»'.N);
- }
- // var_dump($instrow);
- }
- }
- mysqli_close($link);
- if ($opts['jsonwrite']) {
- fwrite($jsonf,'"Fine?": true'.N.'}'.N);
- fclose($jsonf);
- }
- if ($opts['log'])
- fclose($logf);
- exit(0);
- ?>
|