mustool.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. #!/usr/bin/php
  2. <?php
  3. /*
  4. This program is free software: you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation, either version 3 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. */
  15. define('N',"\n");
  16. require __DIR__.'/../lib/delinstbyid.php';
  17. $levs=['Debug', 'Info', 'Warning', 'Error'];
  18. $opts=array(
  19. 'checkspam'=>false,
  20. 'minent'=>10,
  21. 'revive'=>false,
  22. 'shuffle'=>false,
  23. 'updstats'=>false,
  24. 'clean'=>false,
  25. 'clean_notifs_before_weeks'=>24,
  26. 'clean_keep_checks'=>16,
  27. 'deleteinstswhere'=>false,
  28. 'optimize'=>false
  29. );
  30. $help='mustool.php
  31. DESCRIPTION
  32. mustool.php can do lots of things on Mastodon Help’s database.
  33. SYNOPSIS
  34. mustool.php [options] <action> [parameters] ...
  35. ACTIONS
  36. checkspam
  37. Tries to detect spam instances, i.e. those with the same TLD, same second
  38. level domain, variable higher domains, both in «Instances» and «Peers»
  39. tables.
  40. deleteinstswhere <condition[s]>
  41. First, return a list of Instances records matching “condition”, then let
  42. you choose whether you want to delete them and all records referencing them
  43. in other tables.
  44. Example: mustool.php deleteinstswhere "IsMastodon!=1"
  45. revive
  46. Set a new, succesful check with current time for every instance that
  47. succesfully responded to last check.
  48. shuffle
  49. Randomize instances list (values in «RPos» column).
  50. updstats
  51. Update site’s statistics.
  52. clean
  53. Delete records older than '.$opts['clean_notifs_before_weeks'].' weeks from «Notifications» table, and for each
  54. «Instances» record delete all but the most recent '.$opts['clean_keep_checks'].' records from
  55. «InstChecks» table.
  56. optimize
  57. Optimize all the tables in the database.
  58. OPTIONS
  59. -h, --help
  60. Shows this help text and exits.
  61. This program comes with ABSOLUTELY NO WARRANTY; for details see the source.
  62. This is free software, and you are welcome to redistribute it under
  63. certain conditions; see <http://www.gnu.org/licenses/> for details.'.N;
  64. $dosome=false;
  65. for ($i=1; $i<$argc; $i++) {
  66. if ($argv[$i]=='-h' || $argv[$i]=='--help') {
  67. echo $help;
  68. exit(0);
  69. } elseif ($argv[$i]=='deleteinstswhere') {
  70. if ($i==$argc-1) mexit('«'.$argv[$i].'» requires a MySQL condition as an argument (use «-h» to read help).'.N,1);
  71. $i++;
  72. $dosome=true;
  73. $opts['deleteinstswhere']=true;
  74. $opts['deleteinstswhereconds']=$argv[$i];
  75. } elseif ($argv[$i]=='checkspam') {
  76. $dosome=true;
  77. $opts['checkspam']=true;
  78. } elseif ($argv[$i]=='shuffle') {
  79. $dosome=true;
  80. $opts['shuffle']=true;
  81. } elseif ($argv[$i]=='revive') {
  82. $dosome=true;
  83. $opts['revive']=true;
  84. } elseif ($argv[$i]=='updstats') {
  85. $dosome=true;
  86. $opts['updstats']=true;
  87. } elseif ($argv[$i]=='clean') {
  88. $dosome=true;
  89. $opts['clean']=true;
  90. } elseif ($argv[$i]=='optimize') {
  91. $dosome=true;
  92. $opts['optimize']=true;
  93. } else {
  94. mexit('don’t know how to interpret «'.$argv[$i].'» (use «-h» to read help).'.N,1);
  95. }
  96. }
  97. if (!$dosome) mexit('no action was specified (use «-h» to read help).'.N,1);
  98. use function mysqli_real_escape_string as myesc;
  99. $inifp=__DIR__.'/../conf/mustard.ini';
  100. $iniarr=@parse_ini_file($inifp);
  101. if ($iniarr==false) mexit('Could not open configuration file «'.$inifp.'»'.N,1);
  102. try { $link=@mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket']); }
  103. catch (Exception $error) { mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true); }
  104. // for php versions < 8
  105. if ($link===false) mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true);
  106. try { $res=mysqli_set_charset($link,'utf8mb4'); }
  107. catch (Exception $error) { mexit('could not set «utf8mb4» charset for MySQL: '.mysqli_error($link).' ['.mysqli_errno($link).'].'.N,1,true); }
  108. // for php versions < 8
  109. if ($res===false) mexit('could not set MySQL charset: '.mysqli_error($link).' ['.mysqli_errno($link).'].'.N,1,true);
  110. if ($opts['checkspam']) {
  111. $res=myq($link,'SELECT ID, URI FROM Instances');
  112. checkspam($res,'ID','URI','Instances');
  113. $res=myq($link,'SELECT ID, Hostname FROM Peers');
  114. checkspam($res,'ID','Hostname','Peers');
  115. }
  116. function checkspam(&$res,$idcol,$domcol,$tabnam) {
  117. global $opts;
  118. $buf=[];
  119. while ($row=mysqli_fetch_assoc($res)) $buf[]=$row;
  120. unset($res);
  121. $cbuf=count($buf);
  122. if ($cbuf>0) {
  123. $doms=[];
  124. foreach ($buf as $row)
  125. if (preg_match('#[^.]+\.[^.]+$#',$row[$domcol],$matches)===1)
  126. $doms[$matches[0]][]=['dom'=>$row[$domcol], 'id'=>$row[$idcol]];
  127. /* echo('Do you really want to delete those '.$cbuf.' record(s)? Enter «YES» to do it, anything else to not do it: ');
  128. $inp=rtrim(fgets(STDIN));
  129. if ($inp=='YES') {
  130. $i=0;
  131. foreach ($buf as $row) {
  132. $i++;
  133. eecho('deleting Instances record with ID = '.$row['ID'].' and URI = «'.$row['URI'].'», and all references to it ('.$i.'/'.$cbuf.', '.round(100/$cbuf*$i,2).'%)'.N,1);
  134. $res=delinstbyid($link,$row['ID'],'eecho',N);
  135. if (!$res) mexit('error trying to delete Instances record with ID='.$row['ID'].'; see the log above for more info.'.N,2);
  136. }
  137. }*/
  138. } else {
  139. eecho('no '.$tabnam.' records found.'.N,2);
  140. }
  141. uasort($doms,'cmp');
  142. foreach ($doms as $key=>$arr) {
  143. $carr=count($arr);
  144. if ($carr>$opts['minent']) {
  145. eecho('>>> '.$tabnam.': '.$key.': '.$carr.' entries'.N,1);
  146. foreach ($arr as $entry)
  147. eecho(' '.$domcol.': '.$entry['dom'].'; '.$idcol.': '.$entry['id'].N,1);
  148. }
  149. }
  150. }
  151. function cmp($a,$b) {
  152. $a=count($a);
  153. $b=count($b);
  154. if ($a==$b)
  155. return 0;
  156. return ($a<$b) ? 1 : -1;// reverse :-)
  157. }
  158. if ($opts['deleteinstswhere']) {
  159. $res=myq($link,'SELECT ID, URI FROM Instances WHERE '.$opts['deleteinstswhereconds']);
  160. $buf=[];
  161. while ($row=mysqli_fetch_assoc($res)) $buf[]=$row;
  162. $cbuf=count($buf);
  163. if ($cbuf>0) {
  164. foreach ($buf as $row) eecho($row['URI'].' (ID='.$row['ID'].')'.N,1);
  165. echo 'Do you really want to delete those '.$cbuf.' record(s)? Enter «YES» to do it, anything else to not do it: ';
  166. $inp=rtrim(fgets(STDIN));
  167. if ($inp=='YES') {
  168. $i=0;
  169. foreach ($buf as $row) {
  170. $i++;
  171. eecho('deleting Instances record with ID = '.$row['ID'].' and URI = «'.$row['URI'].'», and all references to it ('.$i.'/'.$cbuf.', '.round(100/$cbuf*$i,2).'%)'.N,1);
  172. $res=delinstbyid($link,$row['ID'],'eecho',N);
  173. if (!$res) mexit('error trying to delete Instances record with ID='.$row['ID'].'; see the log above for more info.'.N,2);
  174. }
  175. }
  176. } else {
  177. eecho('no Instances records match expression «'.$opts['deleteinstswhereconds'].'».'.N,2);
  178. }
  179. }
  180. if ($opts['revive']) {
  181. $now=time();
  182. $res=myq($link,'SELECT * FROM Instances WHERE WasLastCheckOk IS TRUE;');
  183. $buf=[];
  184. while ($row=mysqli_fetch_assoc($res)) $buf[]=$row;
  185. $cbuf=count($buf);
  186. $i=0;
  187. foreach ($buf as $row) {
  188. $i++;
  189. $res=myq($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$row['ID'].', '.$now.', 1);');
  190. if ($res!==false) {
  191. $res=myq($link,'UPDATE Instances SET TotChecks='.($row['TotChecks']+1).', OkChecks='.($row['OkChecks']+1).', WasLastCheckOk=1, LastOkCheckTS='.$now.' WHERE ID='.$row['ID'].';');
  192. if ($res===false)
  193. mexit('could not update instance record with ID='.$row['ID'].'; shutting down.'.N,3);
  194. else
  195. eecho("{$i}/{$cbuf}\n",1);
  196. } else {
  197. mexit('could not insert new check record into InstChecks for instance with ID='.$row['ID'].'; shutting down.'.N,3);
  198. }
  199. }
  200. eecho('done! Affected rows: '.$cbuf.'.'.N,1);
  201. }
  202. if ($opts['shuffle']) {
  203. eecho('randomizing values in «RPos» column...'.N,1);
  204. $res=myq($link,'SELECT ID FROM Instances');
  205. $i=0;
  206. while ($row=mysqli_fetch_assoc($res)) {
  207. $i++;
  208. $buf[$row['ID']]=$i;
  209. }
  210. shuffle($buf);
  211. foreach ($buf as $key=>$val)
  212. myq($link,'UPDATE Instances SET RPos='.$val.' WHERE ID='.$key);
  213. eecho('done! Affected rows: '.count($buf).'.'.N,1);
  214. }
  215. if ($opts['updstats']) {
  216. require __DIR__.'/../lib/supplangs.php';
  217. $day=24*60*60;
  218. $now=time();
  219. $tdstart=gmmktime(0,0,0,gmdate('n',$now),gmdate('j',$now),gmdate('Y',$now));
  220. //eecho('Today started at '.$tdstart.' ('.gmdate('d M Y H:i:s',$tdstart).').'.N,1);
  221. // this below, if uncommented, populates DESTRUCTIVELY table ZHits for testing purposes
  222. /*myq($link,'DELETE FROM ZHits WHERE TS < '.$tdstart);
  223. myq($link,'DELETE FROM ZStats');
  224. $uids=array(
  225. 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
  226. 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
  227. 'cccccccccccccccccccccccccccccccc',
  228. 'dddddddddddddddddddddddddddddddd',
  229. 'eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee',
  230. 'ffffffffffffffffffffffffffffffff'
  231. );
  232. $urls=array('home','instances','users','about','links','stats','contribute','404');
  233. for ($i=0; $i<1460; $i++) myq($link,'INSERT INTO ZHits (UID,URL,Lang,TS) VALUES ("'.$uids[rand(0,count($uids)-1)].'","'.$urls[rand(0,count($urls)-1)].'","'.$supplangscodes[rand(0,count($supplangscodes)-1)].'",'.rand($now-365*24*60*60,$tdstart).')');*/
  234. $res=myq($link,'SELECT * FROM ZHits WHERE TS < '.$tdstart.' ORDER BY TS ASC');
  235. if (mysqli_num_rows($res)>0) {
  236. $row=mysqli_fetch_assoc($res);
  237. $dstart=gmmktime(0,0,0,gmdate('n',$row['TS']),gmdate('j',$row['TS']),gmdate('Y',$row['TS']));
  238. eecho('updating statistics...'.N,1);
  239. } else {
  240. mexit('statistics are already up to date :-)'.N,0);
  241. }
  242. $inserts=0;
  243. while ($dstart<$tdstart) {
  244. //eecho('-------- '.gmdate('d M Y H:i:s',$dstart).' ---------'.N,1);
  245. $inserts++;
  246. $hits=0;
  247. foreach ($supplangs as $key=>$val)
  248. $supplangs[$key]['hc']=0;
  249. // this line below must be synced with the urls we consider, see ../site/index.php
  250. $hitspage=array('home'=>0, 'instances'=>0, 'users'=>0, 'about'=>0, 'links'=>0, 'stats'=>0, 'contribute'=>0, '404'=>0);
  251. $visits=0;
  252. $buf=array();
  253. $res=myq($link,'SELECT * FROM ZHits WHERE TS >= '.$dstart.' AND TS < '.($dstart+$day).' ORDER BY TS ASC');
  254. while ($row=mysqli_fetch_assoc($res)) {
  255. //eecho($row['UID'].' '.$row['URL'].' '.$row['Lang'].' '.$row['TS'].N,1);
  256. $hits++;
  257. if (array_key_exists($row['Lang'],$supplangs))
  258. $supplangs[$row['Lang']]['hc']++;
  259. $hitspage[$row['URL']]++;
  260. if (!in_array($row['UID'],$buf)) {
  261. $buf[]=$row['UID'];
  262. $visits++;
  263. }
  264. }
  265. $buf='';
  266. foreach ($supplangs as $key=>$val) $buf.=$key.':'.$val['hc'].';';
  267. $hitslang=substr($buf,0,-1);
  268. $buf='';
  269. foreach ($hitspage as $key=>$val) $buf.=$key.':'.$val.';';
  270. $hitspage=substr($buf,0,-1);
  271. //eecho('>>> hits: '.$hits.', hitslang: '.$hitslang.', hitspage: '.$hitspage.', visits: '.$visits.' <<<'.N,1);
  272. $query='INSERT INTO ZStats (TS, Hits, HitsLang, HitsPage, Visits) VALUES ('.$dstart.', '.$hits.', "'.$hitslang.'", "'.$hitspage.'", '.$visits.')';
  273. //eecho('QUERY: '.$query.N,1);
  274. myq($link,$query);
  275. $dstart+=$day;
  276. }
  277. myq($link,'DELETE FROM ZHits WHERE TS < '.$tdstart);
  278. eecho('done! Affected rows: '.$inserts.'.'.N,1);
  279. }
  280. if ($opts['clean']) {
  281. $ago=time()-($opts['clean_notifs_before_weeks']*7*24*60*60);
  282. eecho('cleaning records older than '.$opts['clean_notifs_before_weeks'].' weeks from «Notifications» table...'.N,1);
  283. $res=myq($link,'DELETE FROM Notifications WHERE Microtime < '.$ago);
  284. eecho('done! Affected rows: '.mysqli_affected_rows($link).'.'.N,1);
  285. eecho('cleaning records from «InstChecks» table, keeping only the most recent '.$opts['clean_keep_checks'].' for each instance...'.N,1);
  286. $res=myq($link,'SELECT ID FROM Instances WHERE TotChecks > '.$opts['clean_keep_checks']);
  287. $nrows=mysqli_num_rows($res);
  288. $p=0;
  289. $totar=0;
  290. while ($row=mysqli_fetch_assoc($res)) {
  291. $p++;
  292. //eecho('working on instance '.$p.'/'.$nrows.' (ID = '.$row['ID'].')...'.N,1);
  293. $rres=myq($link,'SELECT Time FROM InstChecks WHERE InstID='.$row['ID'].' ORDER BY Time DESC');
  294. $instchecks=[];
  295. while ($rrow=mysqli_fetch_assoc($rres)) $instchecks[]=$rrow;
  296. if (count($instchecks)>$opts['clean_keep_checks']) {
  297. while(count($instchecks)>$opts['clean_keep_checks']) $bef=array_pop($instchecks);
  298. $bef=$bef['Time'];
  299. myq($link,'DELETE FROM InstChecks WHERE InstID='.$row['ID'].' AND Time<='.$bef);
  300. $ar=mysqli_affected_rows($link);
  301. //eecho('deleted '.$ar.' records from “InstChecks” table.'.N,1);
  302. $totar+=$ar;
  303. }/* else {
  304. eecho('no “InstChecks” records to delete.'.N,1);
  305. }*/
  306. }
  307. eecho('done! Total affected rows: '.$totar.'.'.N,1);
  308. }
  309. if ($opts['optimize']) {
  310. eecho('optimizing all the tables in the database...'.N,1);
  311. $res=myq($link,'SHOW TABLES');
  312. while ($row=mysqli_fetch_row($res)) {
  313. $rres=myq($link,'OPTIMIZE TABLE '.$row[0]);
  314. $rrow=mysqli_fetch_assoc($rres);
  315. if ($rrow['Msg_type']=='error' || $rrow['Msg_type']=='warning')
  316. eecho(kimplode($rrow).N,2);
  317. }
  318. eecho('done!'.N,1);
  319. }
  320. mysqli_close($link);
  321. exit(0);
  322. // functions
  323. function kimplode(&$arr) {
  324. $buf=[];
  325. foreach ($arr as $key=>$val)
  326. $buf[]=$key.': '.$val;
  327. return(implode('; ',$buf));
  328. }
  329. function myq(&$link,$query) {
  330. try { $res=mysqli_query($link,$query); }
  331. catch (Exception $error) { mexit('Query «'.$query.'» failed: '.$error->getMessage().'.'.N,2); }
  332. // for php versions < 8, which seem to not catch mysql exceptions
  333. if ($res===false) mexit('Query «'.$query.'» failed: '.mysqli_errno($link).': '.mysqli_error($link).'.'.N,2);
  334. return($res);
  335. }
  336. function mexit($msg,$code) {
  337. global $link;
  338. if (isset($link) && $link!==false) mysqli_close($link);
  339. if ($code==0)
  340. eecho($msg,1);
  341. else
  342. eecho($msg,3);
  343. exit($code);
  344. }
  345. function eecho($msg,$lev=1) {
  346. global $levs;
  347. $time=microtime(false);
  348. $time=explode(' ',$time);
  349. $time=date('Y-m-d H:i:s',$time[1]).'.'.substr($time[0],2,-2);
  350. $msg=$time.' '.$levs[$lev].': '.$msg;
  351. if ($lev<2)
  352. echo $msg;
  353. else
  354. fwrite(STDERR,$msg);
  355. }
  356. ?>