#!/bin/php . */ define('N',"\n"); declare(ticks=1); pcntl_signal(SIGTERM,'signalHandler');// Termination ('kill' was called) pcntl_signal(SIGHUP,'signalHandler');// Terminal log-out pcntl_signal(SIGINT,'signalHandler');// Interrupted (Ctrl-C is pressed) function signalHandler($signal) { global $link, $logf, $jsonf; lecho(N.'Sono stato interrotto.'.N); if ($link) { lecho('La connessione MySQL è aperta, la chiudo.'.N); mysqli_close($link); } if ($jsonf) { echo('Il file di dump json è aperto, lo chiudo.'.N); fwrite($jsonf,'"Fine?": true'.N.'}'.N); fclose($jsonf); } if ($logf) { echo('Il file di log è aperto, lo chiudo.'.N); fclose($logf); } exit(2); } $opts=array( 'timeout'=>3, 'log'=>true, 'jsonfp'=>'instances.json', 'jsonwrite'=>true, 'jsonread'=>false ); use function mysqli_real_escape_string as myesc; function tosec($str) { if (preg_match('/^([0-9]+)([smogSMA]?)/',$str,$buf)===1) { switch ($buf[2]) { case '': case 's': return($buf[1]); break; case 'm': return($buf[1]*60); break; case 'o': return($buf[1]*60*60); break; case 'g': return($buf[1]*60*60*24); break; case 'S': return($buf[1]*60*60*24*7); break; case 'M': return($buf[1]*60*60*24*30); break; case 'A': return($buf[1]*60*60*24*365); break; } } else { return(false); } } function mexit($msg,$code,$closemy=false) { global $link; lecho($msg); if ($closemy) mysqli_close($link); if ($logf) fclose($logf); exit($code); } function lecho($msg,$logonly=false) { global $opts, $logf; if (!$logonly) echo($msg); if ($opts['log']) fwrite($logf,$msg); } $logfp='crawler.log'; if ($opts['log']) { $logf=@fopen(__DIR__.'/'.$logfp,'w') or mexit('Non ho potuto aprire in scrittura il file di log «'.$logfp.'».',1); } $inifp='../sec/mastostartadmin.ini'; $iniarr=parse_ini_file($inifp) or mexit('Impossibile aprire il file di configurazione «'.$inifp.'»'.N,1); $link=mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket']) or mexit(mysqli_error($link).N,1); mysqli_set_charset($link,'utf8'); $contextopts=array( 'http'=>array( 'timeout'=>$opts['timeout'] ), 'socket'=>array( 'tcp_nodelay'=>true ) ); $context=stream_context_create($contextopts); $blacklist=array(); lecho('Carico la blacklist dal database...'.N); $res=mysqli_query($link,'SELECT * FROM Blacklist') or mexit(mysqli_error($link).N,3,true); lecho(mysqli_num_rows($res).' istanze nella blacklist.'.N); while($row=mysqli_fetch_assoc($res)) { $blacklist[$row['Domain']]=$row; } function pgdatetomy($pgdate) { if (preg_match('/^(\d+)-(\d+)-(\d+)[ T]{1}(\d+):(\d+):(\d+)\.(\d+)Z?$/',$pgdate,$buf)===1) { return(mktime($buf[4],$buf[5],$buf[6],$buf[2],$buf[3],$buf[1])+floatval('0.'.$buf[7])); } else { return(false); } } function blpgdumplinetomy($line) { $truefalse=array('f'=>0,'t'=>1); $row=explode("\t",$line); $row=array('Domain'=>$row[0], 'CreatedAt'=>pgdatetomy($row[1]), 'ModifiedAt'=>pgdatetomy($row[2]), 'Severity'=>$row[3], 'RejectMedia'=>$truefalse[$row[4]], 'RejectReports'=>$truefalse[$row[5]], 'PublicComment'=>$row[6]); return($row); } $blacklistnew=array(); $insts=array(); lecho('Carico le istanze di partenza...'.N); $res=mysqli_query($link,'SELECT Domain FROM StartNodes') or mexit(mysqli_error($link).N,3,true); lecho(mysqli_num_rows($res).' istanze di partenza.'.N); while($row=mysqli_fetch_assoc($res)) { $insts[$row['Domain']]=null; lecho('Recupero la lista delle istanze note a «'.$row['Domain'].'» ... '); $buf=@file_get_contents('https://'.$row['Domain'].'/api/v1/instance/peers',false,$context); if ($buf!==false) { lecho('OK :-)'.N); $peers=json_decode($buf,true); foreach ($peers as $pdom) { if (!array_key_exists($pdom,$insts) && strlen($pdom)<=64) { $insts[$pdom]=null; } } } else { lecho('ERRORE :-('.N); } lecho('Recupero la blacklist di «'.$row['Domain'].'» ... '); $buf=@file_get_contents('https://'.$row['Domain'].'/domain_blocks.txt',false,$context); if ($buf!==false) { lecho('OK :-)'.N); $buf=explode(N,$buf); foreach ($buf as $line) { if (preg_match('/(^#.*$)|(^\s*$)/',$line)===0) { $brow=blpgdumplinetomy($line); if (!array_key_exists($brow['Domain'],$blacklist)) { $blacklistnew[$brow['Domain']]=$brow; } $blacklist[$brow['Domain']]=$brow; } } } else { lecho('ERRORE :-('.N); } } //lecho('Carico le istanze note dal DB e aggiungo alla lista di quelle da controllare quelle che non ci sono già.'.N); $res=mysqli_query($link,'SELECT URI FROM Instances') or mexit(mysqli_error($link).N,3,true); while($row=mysqli_fetch_assoc($res)) { if (!array_key_exists($row['URI'],$insts)) $insts[$row['URI']]=null; } ksort($insts); ksort($blacklist); ksort($blacklistnew); lecho('Istanze recuperate: '.count($insts).N); lecho('Istanze blacklistate: '.count($blacklist).', di cui '.count($blacklistnew).' nuove da aggiungere al DB.'.N); foreach ($blacklistnew as $row) { foreach($row as $key=>$val) $row[$key]=myesc($link,$val); mysqli_query($link,'INSERT INTO Blacklist (ID, Domain, CreatedAt, ModifiedAt, Severity, RejectMedia, RejectReports, PrivateComment, PublicComment) VALUES (NULL, \''.$row['Domain'].'\', \''.$row['CreatedAt'].'\', \''.$row['ModifiedAt'].'\', \''.$row['Severity'].'\', \''.$row['RejectMedia'].'\', \''.$row['RejectReports'].'\', NULL, \''.$row['PublicComment'].'\')') or mexit(mysqli_error($link).N,3,true); } //INSERT INTO `Instances` (`ID`, `New`, `Chosen`, `Visible`, `BlackListed`, `URI`, `Title`, `ShortDesc`, `LongDesc`, `OurDesc`, `PlaceID`, `Email`, `Software`, `Version`, `UserCount`, `StatusCount`, `DomainCount`, `ActiveUsersMonth`, `ActiveUsersHalfYear`, `Thumb`, `RegOpen`, `RegReqApproval`, `MaxTootChars`, `AdmAccount`, `AdmDisplayName`, `AdmCreatedAt`, `AdmNote`, `AdmURL`, `AdmAvatar`, `AdmHeader`) VALUES (NULL, '1', '0', '0', '0', 'pantagruel.dnsup.net', 'Pantagruel', 'Descrizione breve', 'Descrizione lunga', 'Istanza molto carina senza soffitto, senza cucina', '1', 'Graume ', 'mastodon', '3.0.1', '2', '12', '345', '5', '10', 'http://www.iedm.it', '1', '0', '540', 'admin', 'Admin', '2019-12-11', 'Note \'admin\'', 'https://rame.altervista.org', 'http://www.iedm.it', 'http://www.iedm.it'); function b2i($bool) { if ($bool) return(1); else return(0); } //array key exists and value is not null function akeavinn($key,&$arr) { if (array_key_exists($key,$arr) && !is_null($arr[$key])) return(true); else return(false); } function nempty($str) { if (preg_match('/^\s*$/',$str)===1) return(null); else return($str); } function subarim($glue,$key,&$arr) { $str=''; $i=1; $carr=count($arr); foreach ($arr as $inarr) { $str.=$inarr[$key]; if ($i<$carr) $str.=$glue; $i++; } return($str); } function notify($msg,$sev) { global $link; mysqli_query($link,'INSERT INTO Notifications (ID, Notification, Severity, Microtime) VALUES (NULL, \''.myesc($link,$msg).'\', '.$sev.', '.microtime().')') or mexit(mysqli_error($link).N,3,true); } /* * Nodeinfo ('https://'.$dom.'/nodeinfo/2.0') è stato aggiunto nella 3.0.0 * Trends ('https://'.$dom.'/api/v1/trends') è stato aggiunto nella 3.0.0 * Activity ('https://'.$dom.'/api/v1/instance/activity') è stato aggiunto nella 2.1.2 */ if ($opts['jsonwrite']) { $jsonf=@fopen(__DIR__.'/'.$opts['jsonfp'],'w') or mexit('Non ho potuto aprire in scrittura il file di dump delle info json «'.$opts['jsonfp'].'».',1); fwrite($jsonf,'{'.N); } $cinsts=count($insts); $i=0; $ok=0; foreach ($insts as $dom=>$row) { $i++; $info=null; lecho('~~~~~~~~~~~~~~~'.N); lecho('Provo a recuperare info su «'.$dom.'» ['.$i.'/'.$cinsts.' ('.$ok.' OK) - '.round(100/$cinsts*$i).'%]'.N); lecho('Provo a recuperare le informazioni API sull’istanza ... '); $buf=@file_get_contents('https://'.$dom.'/api/v1/instance',false,$context); if ($buf!==false) { $ok++; lecho('OK :-)'.N); $info=json_decode($buf,true); if (array_key_exists('version',$info)) { if ($info['version']>='2.1.2') { lecho('Provo a recuperare le informazioni API sull’attività dell’istanza ... '); $buf=@file_get_contents('https://'.$dom.'/api/v1/instance/activity',false,$context); if ($buf!==false) { lecho('OK :-)'.N); $info['x-activity']=json_decode($buf,true); } else { lecho('ERRORE :-('.N); } } if ($info['version']>='3.0.0') { lecho('Provo a recuperare le informazioni Nodeinfo sull’istanza ... '); $buf=@file_get_contents('https://'.$dom.'/nodeinfo/2.0',false,$context); if ($buf!==false) { lecho('OK :-)'.N); $info['x-nodeinfo']=json_decode($buf,true); } else { lecho('ERRORE :-('.N); } lecho('Provo a recuperare le informazioni API sui trends dell’istanza ... '); $buf=@file_get_contents('https://'.$dom.'/api/v1/trends',false,$context); if ($buf!==false) { lecho('OK :-)'.N); $info['x-trends']=json_decode($buf,true); } else { lecho('ERRORE :-('.N); } } } } else { lecho('ERRORE :-('.N); } if (!is_null($info) && akeavinn('uri',$info) && !is_null(nempty($info['uri']))) { lecho(json_encode($info,JSON_PRETTY_PRINT).N,true); if ($opts['jsonwrite']) fwrite($jsonf,'"'.$info['uri'].'": '.json_encode($info,JSON_PRETTY_PRINT).','.N); //INSERT INTO `Instances` (`ID`, `New`, `Chosen`, `Visible`, `BlackListed`, `URI`, `Title`, `ShortDesc`, `LongDesc`, `OurDesc`, `PlaceID`, `Email`, `Software`, `Version`, `UserCount`, `StatusCount`, `DomainCount`, `ActiveUsersMonth`, `ActiveUsersHalfYear`, `Thumb`, `RegOpen`, `RegReqApproval`, `MaxTootChars`, `AdmAccount`, `AdmDisplayName`, `AdmCreatedAt`, `AdmNote`, `AdmURL`, `AdmAvatar`, `AdmHeader`) VALUES (NULL, '1', '0', '0', '0', 'pantagruel.dnsup.net', 'Pantagruel', 'Descrizione breve', 'Descrizione lunga', 'Istanza molto carina senza soffitto, senza cucina', '1', 'Graume ', 'mastodon', '3.0.1', '2', '12', '345', '5', '10', 'http://www.iedm.it', '1', '0', '540', 'admin', 'Admin', '2019-12-11', 'Note \'admin\'', 'https://rame.altervista.org', 'http://www.iedm.it', 'http://www.iedm.it'); $instrow=array('ID'=>null, 'New'=>0, 'Chosen'=>0, 'Visible'=>0, 'BlackListed'=>0, 'URI'=>null, 'Title'=>null, 'ShortDesc'=>null, 'LongDesc'=>null, 'OurDesc'=>null, 'PlaceID'=>null, 'Email'=>null, 'Software'=>null, 'Version'=>null, 'UserCount'=>null, 'StatusCount'=>null, 'DomainCount'=>null, 'ActiveUsersMonth'=>null, 'ActiveUsersHalfYear'=>null, 'Thumb'=>null, 'RegOpen'=>null, 'RegReqApproval'=>null, 'MaxTootChars'=>null, 'AdmAccount'=>null, 'AdmDisplayName'=>null, 'AdmCreatedAt'=>null, 'AdmNote'=>null, 'AdmURL'=>null, 'AdmAvatar'=>null, 'AdmHeader'=>null); if (array_key_exists($info['uri'],$blacklist)) $instrow['BlackListed']=1; $instrow['URI']=nempty($info['uri']); if (akeavinn('title',$info)) $instrow['Title']=nempty($info['title']); if (akeavinn('short_description',$info)) $instrow['ShortDesc']=nempty($info['short_description']); if (akeavinn('description',$info)) $instrow['LongDesc']=nempty($info['description']); if (akeavinn('email',$info)) $instrow['Email']=nempty($info['email']); if (akeavinn('version',$info)) $instrow['Version']=nempty($info['version']); if (akeavinn('stats',$info)) { if (akeavinn('user_count',$info['stats'])) $instrow['UserCount']=$info['stats']['user_count']; if (akeavinn('status_count',$info['stats'])) $instrow['StatusCount']=$info['stats']['status_count']; if (akeavinn('domain_count',$info['stats'])) $instrow['DomainCount']=$info['stats']['domain_count']; } if (akeavinn('thumbnail',$info)) $instrow['Thumb']=nempty($info['thumbnail']); if (akeavinn('max_toot_chars',$info)) $instrow['MaxTootChars']=$info['max_toot_chars']; if (akeavinn('registrations',$info)) $instrow['RegOpen']=b2i($info['registrations']); if (akeavinn('approval_required',$info)) $instrow['RegReqApproval']=b2i($info['approval_required']); if (akeavinn('contact_account',$info)) { if (akeavinn('acct',$info['contact_account'])) $instrow['AdmAccount']=nempty($info['contact_account']['acct']); if (akeavinn('display_name',$info['contact_account'])) $instrow['AdmDisplayName']=nempty($info['contact_account']['display_name']); if (akeavinn('created_at',$info['contact_account'])) $instrow['AdmCreatedAt']=pgdatetomy($info['contact_account']['created_at']); if (akeavinn('note',$info['contact_account'])) $instrow['AdmNote']=nempty(strip_tags($info['contact_account']['note'],'')); if (akeavinn('url',$info['contact_account'])) $instrow['AdmURL']=nempty($info['contact_account']['url']); if (akeavinn('avatar',$info['contact_account'])) $instrow['AdmAvatar']=nempty($info['contact_account']['avatar']); if (akeavinn('header',$info['contact_account'])) $instrow['AdmHeader']=nempty($info['contact_account']['header']); } if (akeavinn('x-nodeinfo',$info)) { if (akeavinn('software',$info['x-nodeinfo']) && akeavinn('name',$info['x-nodeinfo']['software'])) $instrow['Software']=nempty($info['x-nodeinfo']['software']['name']); if (akeavinn('usage',$info['x-nodeinfo']) && akeavinn('users',$info['x-nodeinfo']['usage'])) { if (akeavinn('activeMonth',$info['x-nodeinfo']['usage']['users'])) $instrow['ActiveUsersMonth']=$info['x-nodeinfo']['usage']['users']['activeMonth']; if (akeavinn('activeHalfyear',$info['x-nodeinfo']['usage']['users'])) $instrow['ActiveUsersHalfYear']=$info['x-nodeinfo']['usage']['users']['activeHalfyear']; } } $res=mysqli_query($link,'SELECT * FROM Instances WHERE URI=\''.myesc($link,$instrow['URI']).'\'') or mexit(mysqli_error($link).N,3,true); if (mysqli_num_rows($res)>0) { lecho('«'.$instrow['URI'].'» è già presente nel DB, la aggiorno...'.N); $oldinstrow=mysqli_fetch_assoc($res); $query='UPDATE Instances SET '; foreach ($instrow as $field=>$value) { if (!is_null($value)) $query.=$field.'=\''.myesc($link,$value).'\', '; else $query.=$field.'=\'NULL\', '; } $query=substr($query,0,-2).' WHERE Instances.ID='.$oldinstrow['ID']; echo('QUERONA DI UPDATE: «'.$query.'».'.N); /* $res=mysql_query($link,'SELECT InstID, LangID, Pos, Code FROM InstLangs LEFT JOIN Languages ON Languages.ID=LangID WHERE InstID='.$oldinstrow['ID'].' ORDER BY Pos ASC') or mexit(mysqli_error($link).N,3,true); $oldinstlangs=array(); while ($row=mysql_fetch_assoc($res)) $oldinstlangs[]=$row; if (akeavinn('languages',$info)) { $instlangs=array(); $pos=0; foreach ($info['languages'] as $lang) { $res=mysqli_query($link,'SELECT * FROM Languages WHERE Code=\''.myesc($link,$lang).'\'') or mexit(mysqli_error($link).N,3,true); if (mysqli_num_rows($res)<1) { mysqli_query($link,'INSERT INTO Languages (ID, Code, Name) VALUES (NULL, \''.myesc($link,$lang).'\', NULL)') or mexit(mysqli_error($link).N,3,true); $langid=mysqli_insert_id($link); notify('L’aggiornamento dei dati relativi all’istanza «'.$info['URI'].'» ha aggiunto un codice lingua non ancora noto, «'.$lang.'», di cui non conosco il nome per esteso. Puoi editarlo qui.',1); } else { $row=mysqli_fetch_assoc($res); $langid=$row['ID']; } $pos++; $instlangs[]=array('InstID'=>$oldinstrow['ID'],'LangID'=>$langid,'Pos'=>$pos,'Code'=>$lang); } print_r($instlangs); print_r($oldinstlangs); if ($instlangs!=$oldinstlangs) { notify('La lista delle lingue utilizzate dichiarate dall’istanza «'.$info['URI'].'» è cambiata da «'.subarim(', ','Code',$oldinstlangs).'» a «'.subarim(', ','Code',$oldinstlangs).'».',1); mysqli_query($link,'DELETE FROM InstLangs WHERE InstID='.$oldinstrow['ID']) or mexit(mysqli_error($link).N,3,true); foreach ($instlangs as $row) { mysqli_query($link,'INSERT INTO InstLangs (InstID, LangID, Pos) VALUES ('.$row['InstID'].', '.$row['LangID'].', '.$row['Pos'].')') or mexit(mysqli_error($link).N,3,true); } } }*/ } else { lecho('«'.$info['uri'].'» non è già presente nel DB, la aggiungo...'.N); $instrow['New']=1; $fields=array(); $values=''; foreach ($instrow as $field=>$value) { $fields[]=$field; if (!is_null($value)) $values.='\''.myesc($link,$value).'\', '; else $values.='NULL, '; } $values=substr($values,0,-2); $query='INSERT INTO Instances ('.implode(', ',$fields).') VALUES ('.$values.')'; echo('QUERONA DI INSERT: «'.$query.'»'.N); } // var_dump($instrow); } } mysqli_close($link); if ($opts['jsonwrite']) { fwrite($jsonf,'"Fine?": true'.N.'}'.N); fclose($jsonf); } if ($opts['log']) fclose($logf); exit(0); ?>