peerscrawl.php 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. #!/usr/bin/php
  2. <?php
  3. /*
  4. This program is free software: you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation, either version 3 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. */
  15. const N="\n";
  16. define('SNAME',basename(__FILE__));
  17. define('BNAME',preg_replace('/\.[^.]*$/','',SNAME));
  18. require __DIR__.'/../lib/gurl.php';
  19. require __DIR__.'/../lib/ghs.php';
  20. require __DIR__.'/../lib/ght.php';
  21. require __DIR__.'/../lib/grace.php';
  22. require __DIR__.'/../lib/parsetime.php';
  23. use function mysqli_real_escape_string as myesc;
  24. $opts=[
  25. 'inifp'=>__DIR__.'/../conf/mustard.ini',
  26. 'startinst'=>'mastodon.social',
  27. 'gracetime'=>$gracetime,
  28. 'peersfp'=>__DIR__.'/peers.responding',
  29. 'ckpeersfp'=>__DIR__.'/peers.checked',
  30. 'excludefp'=>null,
  31. 'conntimeout'=>4,
  32. 'functimeout'=>7,
  33. 'loop'=>false,
  34. 'excludedead'=>true,
  35. 'ignorelock'=>false,
  36. 'minmsgimplev'=>1
  37. ];
  38. $msglevs=['Debug', 'Info', 'Warning', 'Error', 'None'];
  39. $help='SYNOPSIS
  40. '.SNAME.' [options]
  41. DESCRIPTION
  42. This program tries to build a fairly complete list of fediverse instances
  43. exposing the [instance]/api/v1/instance/peers endpoint.
  44. OPTIONS
  45. -s, --startinst <domain>
  46. Defines the first instance to crawl.
  47. DEFAULT: «'.$opts['startinst'].'»
  48. -e, --excludefp <file>
  49. Defines a file containing exclusion rules: one regular expression per
  50. line (empty lines are ignored). Any instance matching any defined regex
  51. will be ignored by the program. Changes made to this file during program
  52. execution will be taken into account.
  53. -i, --includedead
  54. Include “dead” instances (see next two options descriptions to know which
  55. instances are considered “dead”).
  56. -g, --gracetime <time>
  57. If an instance has not been responding for longer than this time, consider
  58. it dead and avoid checking it. See section «TIME SPECIFICATION» below to see
  59. how to specify time.
  60. DEFAULT: '.ght($opts['gracetime'],null,0).'
  61. -G, --graceline
  62. Return the “graceline” (0:0:0 of today minus gracetime: see option above) in
  63. unix time and local time, then exit.
  64. -p, --peersfp <file>
  65. Defines the file into which the ordered list of responding instances
  66. will be saved.
  67. DEFAULT: «'.$opts['peersfp'].'»
  68. -c, --ckpeersfp <file>
  69. Defines the file into which the ordered list of all checked instances will
  70. be saved.
  71. DEFAULT: «'.$opts['ckpeersfp'].'»
  72. -I, --ignorelock
  73. Normally, if its lockfile exists, the program exits with an error before
  74. doing anything. With this option the lockfile is ignored. Please verify
  75. that the program is not already running before using it.
  76. -t, --conntimeout <time>
  77. Defines the timeout in seconds for every connection attempt. See section
  78. «TIME SPECIFICATION» below to see how to specify time.
  79. DEFAULT: '.ght($opts['conntimeout'],null,0).'
  80. -T, --functimeout <time>
  81. Defines the timeout in seconds for every download. See section «TIME
  82. SPECIFICATION» below to see how to specify time.
  83. DEFAULT: '.ght($opts['functimeout'],null,0).'
  84. -m, --minmsgimplev <«debug»|«info»|«warning»|«error»|«none»>
  85. Defines the minimum “importance level” of messages to be written to the text
  86. user interface. There are 4 “importance levels”, in this order of
  87. importance: «debug», «info», «warning», «error». Setting this option to any
  88. of these values will write to the text user interface all the messages with
  89. the specified or a greater level; setting it to the special value «none»
  90. will completely disable message writing.
  91. DEFAULT: '.lcfirst($msglevs[$opts['minmsgimplev']]).'
  92. -h, --help
  93. Show this help text and exit.
  94. TIME SPECIFICATION
  95. An example is better than ~5148 words :-)
  96. To specify 1 year, 6 months (made of 31 days), 2 weeks, 3 days, 5 hours,
  97. 7 minutes and 12 seconds you can use «1y,6M,2w,3d,5h,7m,12s»; but you can
  98. also use «12s,7m,5h,3d,2w,6M,1y», or even «18M,1w,1w,2d,1d,3h,2h,7m,12s».
  99. This program comes with ABSOLUTELY NO WARRANTY; for details see the source.
  100. This is free software, and you are welcome to redistribute it under certain
  101. conditions; see <http://www.gnu.org/licenses/> for details.'.N;
  102. for ($i=1; $i<$argc; $i++) {
  103. if ($argv[$i]=='-s' || $argv[$i]=='--startinst') {
  104. if ($i+1>=$argc)
  105. mexit(3,'option «'.$argv[$i].'» has to be followed by a domain name (use «-h» for more info).'.N,1,false);
  106. $i++;
  107. $opts['startinst']=$argv[$i];
  108. } elseif ($argv[$i]=='-g' || $argv[$i]=='--gracetime') {
  109. if ($i+1>=$argc || ($time=parsetime($argv[$i+1]))===false)
  110. mexit(3,'option «'.$argv[$i].'» requires a valid time specification as an argument (use «-h» to read help).'.N,1,false);
  111. $i++;
  112. $opts['gracetime']=$time;
  113. } elseif ($argv[$i]=='-p' || $argv[$i]=='--peersfp') {
  114. if ($i+1>=$argc)
  115. mexit(3,'option «'.$argv[$i].'» has to be followed by a file’s path (use «-h» for more info).'.N,1,false);
  116. $i++;
  117. $opts['peersfp']=$argv[$i];
  118. } elseif ($argv[$i]=='-c' || $argv[$i]=='--ckpeersfp') {
  119. if ($i+1>=$argc)
  120. mexit(3,'option «'.$argv[$i].'» has to be followed by a file’s path (use «-h» for more info).'.N,1,false);
  121. $i++;
  122. $opts['ckpeersfp']=$argv[$i];
  123. } elseif ($argv[$i]=='-I' || $argv[$i]=='--ignorelock') {
  124. $opts['ignorelock']=true;
  125. } elseif ($argv[$i]=='-e' || $argv[$i]=='--excludefp') {
  126. if ($i+1>=$argc)
  127. mexit(3,'option «'.$argv[$i].'» has to be followed by a file’s path (use «-h» for more info).'.N,1,false);
  128. $i++;
  129. $opts['excludefp']=$argv[$i];
  130. } elseif ($argv[$i]=='-t' || $argv[$i]=='--conntimeout') {
  131. if ($i+1>=$argc || ($time=parsetime($argv[$i+1]))===false)
  132. mexit(3,'option «'.$argv[$i].'» requires a valid time specification as an argument (use «-h» to read help).'.N,1,false);
  133. $i++;
  134. $opts['conntimeout']=$time;
  135. } elseif ($argv[$i]=='-T' || $argv[$i]=='--functimeout') {
  136. if ($i+1>=$argc || ($time=parsetime($argv[$i+1]))===false)
  137. mexit(3,'option «'.$argv[$i].'» requires a valid time specification as an argument (use «-h» to read help).'.N,1,false);
  138. $i++;
  139. $opts['functimeout']=$time;
  140. } elseif ($argv[$i]=='-i' || $argv[$i]=='--includedead') {
  141. $opts['excludedead']=false;
  142. } elseif ($argv[$i]=='-m' || $argv[$i]=='--minmsgimplev') {
  143. if ($i+1>=$argc || !in_array(ucfirst(strtolower($argv[$i+1])),$msglevs))
  144. mexit(3,'option «'.$argv[$i].'» requires a valid “message importance level” value as an argument (use «-h» to read help).'.N,1,false);
  145. $i++;
  146. $opts['minmsgimplev']=array_search(ucfirst(strtolower($argv[$i])),$msglevs);
  147. } elseif ($argv[$i]=='-G' || $argv[$i]=='--graceline') {
  148. echo 'Graceline: '.$graceline.' ('.date('Y-m-d H:i:s',$graceline).').'.N;
  149. exit(0);
  150. } elseif ($argv[$i]=='-h' || $argv[$i]=='--help') {
  151. echo $help;
  152. exit(0);
  153. } else {
  154. mexit(3,'don’t know how to interpret «'.$argv[$i].'» (use «-h» to read the help text).'.N,1,false);
  155. }
  156. }
  157. $lockfp=__DIR__.'/'.BNAME.'.lock';
  158. if (is_file($lockfp) && !$opts['ignorelock']) mexit(3,'lockfile exists: it seems the program is already running; if you’re sure it’s not, you can use «-I» to force execution.'.N,1,false);
  159. if (@touch($lockfp)===false) mexit(3,'could not create lockfile «'.$lockfp.'».'.N,1,false);
  160. //declare(ticks=1);
  161. pcntl_async_signals(true);
  162. pcntl_signal(SIGTERM,'sighandler');// Termination ('kill' was called)
  163. pcntl_signal(SIGHUP,'sighandler');// Terminal log-out
  164. pcntl_signal(SIGINT,'sighandler');// Interrupted (Ctrl-C is pressed)
  165. lecho(1,'###### Starting '.BNAME.' ######'.N);
  166. $iniarr=@parse_ini_file($opts['inifp']);
  167. if ($iniarr===false) mexit(3,'couldn’t open «'.$opts['inifp'].'».'.N,1,true);
  168. try { $link=@mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket']); }
  169. catch (Exception $error) { mexit(3,'couldn’t connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true); }
  170. // for php versions < 8
  171. if ($link===false) mexit(3,'couldn’t connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true);
  172. try { $res=mysqli_set_charset($link,'utf8mb4'); }
  173. catch (Exception $error) { mexit(3,'couldn’t set «utf8mb4» charset for MySQL: '.mysqli_error($link).' ('.mysqli_errno($link).'.'.N,1,true); }
  174. // for php versions < 8
  175. if ($res===false) mexit(3,'couldn’t set «utf8mb4» charset for MySQL: '.mysqli_error($link).' ('.mysqli_errno($link).').'.N,1,true);
  176. $deadinsts=[];
  177. if ($opts['excludedead']) {
  178. lecho(1,'loading dead instances from “Instances” and “Peers” table (gracetime: '.ght($opts['gracetime'],null,0).'; graceline: '.date('Y-m-d H:i:s',$graceline).').'.N);
  179. $res=myq($link,'SELECT URI FROM Instances WHERE LastOkCheckTS IS NULL OR LastOkCheckTS < '.$graceline);
  180. lecho(1,'got '.mysqli_num_rows($res).' dead instances from “Instances” table.'.N);
  181. while ($row=mysqli_fetch_assoc($res))
  182. if (!in_array($row['URI'],$deadinsts))
  183. $deadinsts[]=$row['URI'];
  184. // $res=myq($link,'SELECT Hostname FROM Peers WHERE LastOkCheckTS IS NULL OR (LastCheckTS-LastOkCheckTS < '.$gracetime.' AND (LastCheckTS-LastOkCheckTS) / (ChecksAtLast-ChecksAtLastOk) <= '.$graceminfreq.')');
  185. $res=myq($link,'SELECT Hostname FROM Peers WHERE LastOkCheckTS IS NULL OR LastOkCheckTS < '.$graceline);
  186. lecho(1,'got '.mysqli_num_rows($res).' dead instances from “Peers” table.'.N);
  187. while ($row=mysqli_fetch_assoc($res))
  188. if (!in_array($row['Hostname'],$deadinsts))
  189. $deadinsts[]=$row['Hostname'];
  190. unset($res,$row);
  191. lecho(1,'loaded list of dead instances ('.count($deadinsts).').'.N);
  192. }
  193. //mexit(1,'bau!'.N,0,true);
  194. $insts=[];
  195. $ckinsts=[];
  196. $exarr=[];
  197. $maxround=1;
  198. $totnewc=0;
  199. $tini=time();
  200. $list=[$opts['startinst']];
  201. // go
  202. crawl($list,1);
  203. lecho(1,'done crawling! :-)'.N);
  204. $now=time();
  205. lecho(1,'crawl started on '.date('Y-m-d H:i:s',$tini).' and ended on '.date('Y-m-d H:i:s',$now).'; took '.ght($now-$tini,null,0).' in '.$maxround.' rounds; '.count($insts).' instances responded; '.count($ckinsts).' instances were considered; '.$totnewc.' new instances were found; max. memory usage: '.ghs(memory_get_peak_usage(true)).'.'.N);
  206. sortcheckandsave($insts,'list of responding instances',$opts['peersfp']);
  207. sortcheckandsave($ckinsts,'list of checked instances',$opts['ckpeersfp']);
  208. mysqli_close($link);
  209. unlink($lockfp);
  210. lecho(1,'###### DONE :-) ######'.N);
  211. exit(0);
  212. // functions
  213. function crawl(&$list,$id) {
  214. global $insts, $ckinsts, $deadinsts, $tini, $opts, $maxround, $totnewc, $link;
  215. // wouldn't make sense to filter $list here: filtering already happens before adding an instance to next round list
  216. $newc=0;
  217. lecho(1,'###### START OF ROUND '.$id.' ######'.N);
  218. $clist=count($list);
  219. lecho(1,'will check '.$clist.' instance(s).'.N);
  220. $nlist=[];
  221. $i=0;
  222. $rtini=time();
  223. foreach ($list as $inst) {
  224. $responded=false;
  225. $i++;
  226. lecho(1,'round '.$id.': working on instance «'.$inst.'» ('.$i.'/'.$clist.').'.N);
  227. updexarr($id);
  228. waituntilonline($id);
  229. lecho(1,'round '.$id.': trying to load instance «'.$inst.'»’s peers...'.N);
  230. $peers=gurl('https://'.$inst.'/api/v1/instance/peers',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']);
  231. if ($peers['cont']===false) {
  232. lecho(2,'round '.$id.': could not load instance «'.$inst.'»’s peers: '.$peers['emsg'].'.'.N);
  233. } else {
  234. $peers=@json_decode($peers['cont'],true);
  235. if (!is_array($peers)) {
  236. lecho(2,'round '.$id.': expecting instance «'.$inst.'»’s peers, got bad JSON instead.'.N);
  237. } else {
  238. $responded=true;
  239. $cpeers=count($peers);
  240. lecho(1,'round '.$id.': successfully loaded instance «'.$inst.'»’s peers ('.$cpeers.') :-)'.N);
  241. $pi=1;
  242. foreach ($peers as $key=>$peer) {
  243. if ($key!=$pi-1) {
  244. lecho(2,'round '.$id.': instance «'.$inst.'»’s peers: entity '.$pi.'/'.$cpeers.'’s key is not sequential: not checking further.'.N);
  245. break;
  246. } elseif (!is_string($peer)) {
  247. lecho(2,'round '.$id.': instance «'.$inst.'»’s peers: entity '.$pi.'/'.$cpeers.' is not a string: not checking further.'.N);
  248. break;
  249. } else {
  250. $whynot=[];
  251. if (in_array($peer,$ckinsts)) $whynot[]='it has already been checked';
  252. if (!validhostname($peer)) $whynot[]='its hostname is not valid';
  253. if (ckexarr($peer)) $whynot[]='its hostname matches an exclusion regexp';
  254. if (in_array($peer,$list)) $whynot[]='it is already present in current list';
  255. if (in_array($peer,$nlist)) $whynot[]='it is already present in next round list';
  256. if ($opts['excludedead'] && in_array($peer,$deadinsts)) $whynot[]='it’s dead';
  257. if (count($whynot)>0) {
  258. lecho(0,'round '.$id.': instance «'.$inst.'»: not adding peer «'.$peer.'» ('.$pi.'/'.$cpeers.') to next round list: '.implode(', ',$whynot).'.'.N);
  259. } else {
  260. lecho(1,'round '.$id.': instance «'.$inst.'»: adding peer «'.$peer.'» ('.$pi.'/'.$cpeers.') to next round list :-)'.N);
  261. $nlist[]=$peer;
  262. }
  263. }
  264. $pi++;
  265. }
  266. }
  267. }
  268. if (!$responded) {
  269. lecho(1,'round '.$id.': instance «'.$inst.'» didn’t respond at its “peers” endpoint; trying to load its info from “instance” endpoint...'.N);
  270. $instinfo=gurl('https://'.$inst.'/api/v1/instance',$opts['conntimeout'],$opts['functimeout'],['Accept: application/json']);
  271. if ($instinfo['cont']===false) {
  272. lecho(2,'round '.$id.': could not load instance «'.$inst.'»’s info: '.$instinfo['emsg'].'.'.N);
  273. } else {
  274. $instinfo=@json_decode($instinfo['cont'],true);
  275. if (is_array($instinfo))
  276. $responded=true;
  277. else
  278. lecho(2,'round '.$id.': expecting instance «'.$inst.'»’s info, got bad JSON instead.'.N);
  279. }
  280. }
  281. $now=time();
  282. if ($responded) {
  283. lecho(1,'round '.$id.': instance «'.$inst.'» responded :-)'.N);
  284. $insts[]=$inst;
  285. $res=myq($link,'SELECT ID FROM Instances WHERE URI=\''.myesc($link,$inst).'\'');
  286. $cres=mysqli_num_rows($res);
  287. if ($cres<1) {
  288. lecho(1,'round '.$id.': instance «'.$inst.'» is new to “Instances” table, adding it :-)'.N);
  289. myq($link,'INSERT INTO Instances SET URI=\''.myesc($link,$inst).'\', InsertTS='.$now);
  290. $totnewc++;
  291. $newc++;
  292. } elseif ($cres>1) {
  293. lecho(2,'round '.$id.': instance «'.$inst.'» has '.$cres.' records in “Instances” table! :-('.N);
  294. } else {
  295. lecho(1,'round '.$id.': instance «'.$inst.'» is already present in “Instances” table.'.N);
  296. }
  297. } else {
  298. lecho(1,'round '.$id.': instance «'.$inst.'» didn’t respond :-('.N);
  299. }
  300. $res=myq($link,'SELECT * FROM Peers WHERE Hostname=\''.myesc($link,$inst).'\'');
  301. $cres=mysqli_num_rows($res);
  302. if ($cres<1) {
  303. lecho(1,'round '.$id.': instance «'.$inst.'» is new to “Peers” table, adding it :-)'.N);
  304. $query='INSERT INTO Peers SET Hostname=\''.myesc($link,$inst).'\', FirstCheckTS='.$now;
  305. if ($responded) $query.=', LastOkCheckTS='.$now;
  306. myq($link,$query);
  307. } elseif ($cres>0) {
  308. if ($cres>1) lecho(2,'round '.$id.': instance «'.$inst.'» has '.$cres.' records in “Peers” table! :-('.N);
  309. if ($responded) {
  310. lecho(1,'round '.$id.': instance «'.$inst.'» is already present in “Peers” table, but it responded: updating its record’s “LastOkCheckTS” value...'.N);
  311. $row=mysqli_fetch_assoc($res);
  312. myq($link,'UPDATE Peers SET LastOkCheckTS='.$now.' WHERE ID='.$row['ID']);
  313. }
  314. }
  315. $ckinsts[]=$inst;
  316. $now=time();
  317. $rtela=$now-$rtini;
  318. lecho(1,'round '.$id.': finished working on instance «'.$inst.'» ('.$i.'/'.$clist.'); RoundElapsedTime: '.ght($rtela,null,0).'; RoundEstimatedTimeRemaining: '.ght($rtela/$i*$clist-$rtela,null,0).'; RoundNewInsts: '.$newc.'; NextRoundInsts: '.count($nlist).'; TotElapsedTime: '.ght($now-$tini,null,0).'; TotConsideredInsts: '.count($ckinsts).'; TotRespondingInsts: '.count($insts).'; TotNewInsts: '.$totnewc.'; using '.ghs(memory_get_usage(true)).' mem. (peak: '.ghs(memory_get_peak_usage(true)).').'.N);
  319. }
  320. unset($list);
  321. $now=time();
  322. $rtela=$now-$rtini;
  323. $cnlist=count($nlist);
  324. lecho(1,'END OF ROUND STATS: RoundCheckedInsts: '.$clist.'; RoundElapsedTime: '.ght($rtela,null,0).'; RoundNewInsts: '.$newc.'; NextRoundInsts: '.$cnlist.'; TotElapsedTime: '.ght($now-$tini,null,0).'; TotConsideredInsts: '.count($ckinsts).'; TotRespondingInsts: '.count($insts).'; TotNewInsts: '.$totnewc.'; using '.ghs(memory_get_usage(true)).' mem. (peak: '.ghs(memory_get_peak_usage(true)).').'.N);
  325. if ($cnlist<1) lecho(1,'next round list is empty.'.N);
  326. lecho(1,'###### END OF ROUND '.$id.' ######'.N);
  327. if ($cnlist>0) {
  328. crawl($nlist,$id+1);
  329. if ($id+1>$maxround) $maxround=$id+1;
  330. }
  331. }
  332. function mexit($lev,$msg,$code,$remlock) {
  333. global $link, $insts, $ckinsts, $lockfp, $opts;
  334. if (isset($insts) && is_array($insts)) sortcheckandsave($insts,'list of responding instances',$opts['peersfp']);
  335. if (isset($ckinsts) && is_array($ckinsts)) sortcheckandsave($ckinsts,'list of checked instances',$opts['ckpeersfp']);
  336. if ($remlock && isset($lockfp) && is_file($lockfp)) unlink($lockfp);
  337. lecho($lev,$msg);
  338. exit($code);
  339. }
  340. function lecho($lev,$msg) {
  341. global $opts, $msglevs;
  342. $time=microdate();
  343. $msg=$time.' '.$msglevs[$lev].': '.$msg;
  344. if ($lev>=$opts['minmsgimplev']) {
  345. if ($lev<2)
  346. echo $msg;
  347. else
  348. fwrite(STDERR,$msg);
  349. }
  350. }
  351. function myq(&$link,$query) {
  352. try { $res=mysqli_query($link,$query); }
  353. catch (Exception $error) { mexit(3,'query «'.$query.'» failed: '.$error->getMessage().' ('.$error->getCode().').'.N,2,true); }
  354. // for php versions < 8, which seem to not catch mysql exceptions
  355. if ($res===false) mexit(3,'query «'.$query.'» failed: '.mysqli_error($link).' ('.mysqli_errno($link).').'.N,2,true);
  356. return($res);
  357. }
  358. function microdate($time=null) {
  359. if (is_null($time)) $time=microtime(false);
  360. $time=explode(' ',$time);
  361. return(date('Y-m-d H:i:s',$time[1]).'.'.substr($time[0],2,-2));
  362. }
  363. function sortcheckandsave(&$arr,$arrdesc,&$fp) {
  364. $buc=count($arr);
  365. $arr=array_unique($arr);
  366. $auc=count($arr);
  367. if ($buc!=$auc) lecho(2,$arrdesc.' contained duplicates, better check the code ;-)'.N);
  368. lecho(1,'saving ordered '.$arrdesc.' into «'.$fp.'».'.N);
  369. sort($arr);
  370. $f=@fopen($fp,'w');
  371. if ($f!==false) {
  372. foreach ($arr as $val)
  373. fwrite($f,$val.N);
  374. fclose($f);
  375. } else {
  376. lecho(2,'couldn’t open «'.$fp.'» for writing.'.N);
  377. }
  378. }
  379. function sighandler($signal) {
  380. echo N;
  381. mexit(1,'interrupted (signal: '.$signal.').'.N,0,true);
  382. }
  383. function isempty($val) {
  384. if (preg_match('/^\s*$/',$val)===1)
  385. return(true);
  386. else
  387. return(false);
  388. }
  389. function waituntilonline($roundid) {
  390. $url='www.google.com';
  391. $gotoff=false;
  392. while (false===($f=@fsockopen($url,80,$errno,$errstr,1))) {
  393. $gotoff=true;
  394. lecho(2,'round '.$roundid.': it seems we are offline, waiting 10 seconds before retrying...'.N);
  395. sleep(10);
  396. }
  397. fclose($f);
  398. if ($gotoff) lecho(1,'round '.$roundid.': it seems we are back online! :-)'.N);
  399. }
  400. function updexarr($roundid) {
  401. global $exarr, $opts;
  402. if (!is_null($opts['excludefp'])) {
  403. $f=@fopen($opts['excludefp'],'r');
  404. if ($f!==false) {
  405. $i=0;
  406. $exarr=[];
  407. while (!feof($f)) {
  408. $i++;
  409. $line=trim(fgets($f));
  410. if (!isempty($line)) {
  411. if (@preg_match($line,'foo')!==false)
  412. $exarr[]=$line;
  413. else
  414. lecho(2,'round '.$roundid.': exclude file «'.$opts['excludefp'].'» contains an invalid regular expression on line '.$i.': «'.$line.'».'.N);
  415. }
  416. }
  417. } else {
  418. lecho(2,'round '.$roundid.': could not open exclude file «'.$opts['excludefp'].'» for reading.'.N);
  419. }
  420. }
  421. }
  422. function ckexarr($inst) {
  423. global $exarr;
  424. foreach ($exarr as $re)
  425. if (preg_match($re,$inst)===1) return(true);
  426. return(false);
  427. }
  428. function ismultibyte($s) {
  429. preg_replace('/./u','.',$s,-1,$c);
  430. (strlen($s)>$c) ? $r=true : $r=false;
  431. return($r);
  432. }
  433. function validhostname($hostname) {
  434. //$hostname=preg_replace('#/.*#','',$hostname);
  435. //$hostname=preg_replace('#:[0-9]+$#','',$hostname);
  436. if (ismultibyte($hostname)) $hostname=idn_to_ascii($hostname,IDNA_DEFAULT,INTL_IDNA_VARIANT_UTS46);
  437. //echo($hostname.N);
  438. if (strlen($hostname)>253) return(false);
  439. $labels=explode('.',$hostname);
  440. foreach($labels as $label) {
  441. $len=strlen($label);
  442. if ($len<1 || $len>63) return(false);
  443. if (preg_match('#^-#',$label)==1) return(false);
  444. if (preg_match('#-$#',$label)==1) return(false);
  445. //if (preg_match('#--#',$label)==1) return(false);
  446. if (preg_match('#^[a-zA-Z0-9-]+$#',$label)!==1) return(false);
  447. }
  448. return(true);
  449. }
  450. //$url='www.team.starschlep.com/'; if (validhostname($url)) echo('OK: '.$url.N); else echo('KO: '.$url.N); die();
  451. ?>