#!/usr/bin/php . */ define('N',"\n"); require __DIR__.'/../lib/delinstbyid.php'; $levs=['Debug', 'Info', 'Warning', 'Error']; $opts=array( 'checkspam'=>false, 'minent'=>10, 'revive'=>false, 'shuffle'=>false, 'updstats'=>false, 'clean'=>false, 'clean_notifs_before_weeks'=>24, 'clean_keep_checks'=>16, 'deleteinstswhere'=>false, 'optimize'=>false ); $help='mustool.php DESCRIPTION mustool.php can do lots of things on Mastodon Help’s database. SYNOPSIS mustool.php [options] [parameters] ... ACTIONS checkspam Tries to detect spam instances, i.e. those with the same TLD, same second level domain, variable higher domains, both in «Instances» and «Peers» tables. deleteinstswhere First, return a list of Instances records matching “condition”, then let you choose whether you want to delete them and all records referencing them in other tables. Example: mustool.php deleteinstswhere "IsMastodon!=1" revive Set a new, succesful check with current time for every instance that succesfully responded to last check. shuffle Randomize instances list (values in «RPos» column). updstats Update site’s statistics. clean Delete records older than '.$opts['clean_notifs_before_weeks'].' weeks from «Notifications» table, and for each «Instances» record delete all but the most recent '.$opts['clean_keep_checks'].' records from «InstChecks» table. optimize Optimize all the tables in the database. OPTIONS -h, --help Shows this help text and exits. This program comes with ABSOLUTELY NO WARRANTY; for details see the source. This is free software, and you are welcome to redistribute it under certain conditions; see for details.'.N; $dosome=false; for ($i=1; $i<$argc; $i++) { if ($argv[$i]=='-h' || $argv[$i]=='--help') { echo($help); exit(0); } elseif ($argv[$i]=='deleteinstswhere') { if ($i==$argc-1) mexit('«'.$argv[$i].'» requires a MySQL condition as an argument (use «-h» to read help).'.N,1); $i++; $dosome=true; $opts['deleteinstswhere']=true; $opts['deleteinstswhereconds']=$argv[$i]; } elseif ($argv[$i]=='checkspam') { $dosome=true; $opts['checkspam']=true; } elseif ($argv[$i]=='shuffle') { $dosome=true; $opts['shuffle']=true; } elseif ($argv[$i]=='revive') { $dosome=true; $opts['revive']=true; } elseif ($argv[$i]=='updstats') { $dosome=true; $opts['updstats']=true; } elseif ($argv[$i]=='clean') { $dosome=true; $opts['clean']=true; } elseif ($argv[$i]=='optimize') { $dosome=true; $opts['optimize']=true; } else { mexit('don’t know how to interpret «'.$argv[$i].'» (use «-h» to read help).'.N,1); } } if (!$dosome) mexit('no action was specified (use «-h» to read help).'.N,1); use function mysqli_real_escape_string as myesc; $inifp=__DIR__.'/../conf/mustard.ini'; $iniarr=@parse_ini_file($inifp); if ($iniarr==false) mexit('Could not open configuration file «'.$inifp.'»'.N,1); try { $link=@mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket']); } catch (Exception $error) { mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true); } // for php versions < 8 if ($link===false) mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true); try { $res=mysqli_set_charset($link,'utf8mb4'); } catch (Exception $error) { mexit('could not set «utf8mb4» charset for MySQL: '.mysqli_error($link).' ['.mysqli_errno($link).'].'.N,1,true); } // for php versions < 8 if ($res===false) mexit('could not set MySQL charset: '.mysqli_error($link).' ['.mysqli_errno($link).'].'.N,1,true); if ($opts['checkspam']) { $res=myq($link,'SELECT ID, URI FROM Instances'); checkspam($res,'ID','URI','Instances'); $res=myq($link,'SELECT ID, Hostname FROM Peers'); checkspam($res,'ID','Hostname','Peers'); } function checkspam(&$res,$idcol,$domcol,$tabnam) { global $opts; $buf=[]; while ($row=mysqli_fetch_assoc($res)) $buf[]=$row; unset($res); $cbuf=count($buf); if ($cbuf>0) { $doms=[]; foreach ($buf as $row) if (preg_match('#[^.]+\.[^.]+$#',$row[$domcol],$matches)===1) $doms[$matches[0]][]=['dom'=>$row[$domcol], 'id'=>$row[$idcol]]; /* echo('Do you really want to delete those '.$cbuf.' record(s)? Enter «YES» to do it, anything else to not do it: '); $inp=rtrim(fgets(STDIN)); if ($inp=='YES') { $i=0; foreach ($buf as $row) { $i++; eecho('deleting Instances record with ID = '.$row['ID'].' and URI = «'.$row['URI'].'», and all references to it ('.$i.'/'.$cbuf.', '.round(100/$cbuf*$i,2).'%)'.N,1); $res=delinstbyid($link,$row['ID'],'eecho',N); if (!$res) mexit('error trying to delete Instances record with ID='.$row['ID'].'; see the log above for more info.'.N,2); } }*/ } else { eecho('no '.$tabnam.' records found.'.N,2); } uasort($doms,'cmp'); foreach ($doms as $key=>$arr) { $carr=count($arr); if ($carr>$opts['minent']) { echo $tabnam.': '.$key.': '.$carr.' entries'.N; foreach ($arr as $entry) echo ' '.$domcol.': '.$entry['dom'].'; '.$idcol.': '.$entry['id'].N; echo N; } } } function cmp($a,$b) { $a=count($a); $b=count($b); if ($a==$b) return 0; return ($a<$b) ? 1 : -1;// reverse :-) } if ($opts['deleteinstswhere']) { $res=myq($link,'SELECT ID, URI FROM Instances WHERE '.$opts['deleteinstswhereconds']); $buf=[]; while ($row=mysqli_fetch_assoc($res)) $buf[]=$row; $cbuf=count($buf); if ($cbuf>0) { foreach ($buf as $row) echo($row['URI'].' (ID='.$row['ID'].')'.N); echo('Do you really want to delete those '.$cbuf.' record(s)? Enter «YES» to do it, anything else to not do it: '); $inp=rtrim(fgets(STDIN)); if ($inp=='YES') { $i=0; foreach ($buf as $row) { $i++; eecho('deleting Instances record with ID = '.$row['ID'].' and URI = «'.$row['URI'].'», and all references to it ('.$i.'/'.$cbuf.', '.round(100/$cbuf*$i,2).'%)'.N,1); $res=delinstbyid($link,$row['ID'],'eecho',N); if (!$res) mexit('error trying to delete Instances record with ID='.$row['ID'].'; see the log above for more info.'.N,2); } } } else { eecho('no Instances records match expression «'.$opts['deleteinstswhereconds'].'».'.N,2); } } if ($opts['revive']) { $now=time(); $res=myq($link,'SELECT * FROM Instances WHERE WasLastCheckOk IS TRUE;'); $buf=[]; while ($row=mysqli_fetch_assoc($res)) $buf[]=$row; $cbuf=count($buf); $i=0; foreach ($buf as $row) { $i++; $res=myq($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$row['ID'].', '.$now.', 1);'); if ($res!==false) { $res=myq($link,'UPDATE Instances SET TotChecks='.($row['TotChecks']+1).', OkChecks='.($row['OkChecks']+1).', WasLastCheckOk=1, LastOkCheckTS='.$now.' WHERE ID='.$row['ID'].';'); if ($res===false) mexit('could not update instance record with ID='.$row['ID'].'; shutting down.'.N,3); else eecho("{$i}/{$cbuf}\n",1); } else { mexit('could not insert new check record into InstChecks for instance with ID='.$row['ID'].'; shutting down.'.N,3); } } eecho('done! Affected rows: '.$cbuf.'.'.N,1); } if ($opts['shuffle']) { eecho('randomizing values in «RPos» column...'.N,1); $res=myq($link,'SELECT ID FROM Instances'); $i=0; while ($row=mysqli_fetch_assoc($res)) { $i++; $buf[$row['ID']]=$i; } shuffle($buf); foreach ($buf as $key=>$val) myq($link,'UPDATE Instances SET RPos='.$val.' WHERE ID='.$key); eecho('done! Affected rows: '.count($buf).'.'.N,1); } if ($opts['updstats']) { require __DIR__.'/../lib/supplangs.php'; $day=24*60*60; $now=time(); $tdstart=gmmktime(0,0,0,gmdate('n',$now),gmdate('j',$now),gmdate('Y',$now)); //echo('Today started at '.$tdstart.' ('.gmdate('d M Y H:i:s',$tdstart).').'.N); // this below, if uncommented, populates DESTRUCTIVELY table ZHits for testing purposes /*myq($link,'DELETE FROM ZHits WHERE TS < '.$tdstart); myq($link,'DELETE FROM ZStats'); $uids=array( 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb', 'cccccccccccccccccccccccccccccccc', 'dddddddddddddddddddddddddddddddd', 'eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee', 'ffffffffffffffffffffffffffffffff' ); $urls=array('home','instances','users','about','links','stats','contribute','404'); for ($i=0; $i<1460; $i++) myq($link,'INSERT INTO ZHits (UID,URL,Lang,TS) VALUES ("'.$uids[rand(0,count($uids)-1)].'","'.$urls[rand(0,count($urls)-1)].'","'.$supplangscodes[rand(0,count($supplangscodes)-1)].'",'.rand($now-365*24*60*60,$tdstart).')');*/ $res=myq($link,'SELECT * FROM ZHits WHERE TS < '.$tdstart.' ORDER BY TS ASC'); if (mysqli_num_rows($res)>0) { $row=mysqli_fetch_assoc($res); $dstart=gmmktime(0,0,0,gmdate('n',$row['TS']),gmdate('j',$row['TS']),gmdate('Y',$row['TS'])); eecho('updating statistics...'.N,1); } else { mexit('statistics are already up to date :-)'.N,0); } $inserts=0; while ($dstart<$tdstart) { //echo('-------- '.gmdate('d M Y H:i:s',$dstart).' ---------'.N); $inserts++; $hits=0; foreach ($supplangs as $key=>$val) $supplangs[$key]['hc']=0; // this line below must be synced with the urls we consider, see ../site/index.php $hitspage=array('home'=>0, 'instances'=>0, 'users'=>0, 'about'=>0, 'links'=>0, 'stats'=>0, 'contribute'=>0, '404'=>0); $visits=0; $buf=array(); $res=myq($link,'SELECT * FROM ZHits WHERE TS >= '.$dstart.' AND TS < '.($dstart+$day).' ORDER BY TS ASC'); while ($row=mysqli_fetch_assoc($res)) { //echo($row['UID'].' '.$row['URL'].' '.$row['Lang'].' '.$row['TS'].N); $hits++; if (array_key_exists($row['Lang'],$supplangs)) $supplangs[$row['Lang']]['hc']++; $hitspage[$row['URL']]++; if (!in_array($row['UID'],$buf)) { $buf[]=$row['UID']; $visits++; } } $buf=''; foreach ($supplangs as $key=>$val) $buf.=$key.':'.$val['hc'].';'; $hitslang=substr($buf,0,-1); $buf=''; foreach ($hitspage as $key=>$val) $buf.=$key.':'.$val.';'; $hitspage=substr($buf,0,-1); //echo('>>> hits: '.$hits.', hitslang: '.$hitslang.', hitspage: '.$hitspage.', visits: '.$visits.' <<<'.N); $query='INSERT INTO ZStats (TS, Hits, HitsLang, HitsPage, Visits) VALUES ('.$dstart.', '.$hits.', "'.$hitslang.'", "'.$hitspage.'", '.$visits.')'; //echo($query.N); myq($link,$query); $dstart+=$day; } myq($link,'DELETE FROM ZHits WHERE TS < '.$tdstart); eecho('done! Affected rows: '.$inserts.'.'.N,1); } if ($opts['clean']) { $ago=time()-($opts['clean_notifs_before_weeks']*7*24*60*60); eecho('cleaning records older than '.$opts['clean_notifs_before_weeks'].' weeks from «Notifications» table...'.N,1); $res=myq($link,'DELETE FROM Notifications WHERE Microtime < '.$ago); eecho('done! Affected rows: '.mysqli_affected_rows($link).'.'.N,1); eecho('cleaning records from «InstChecks» table, keeping only the most recent '.$opts['clean_keep_checks'].' for each instance...'.N,1); $res=myq($link,'SELECT ID FROM Instances WHERE TotChecks > '.$opts['clean_keep_checks']); $nrows=mysqli_num_rows($res); $p=0; $totar=0; while ($row=mysqli_fetch_assoc($res)) { $p++; //echo('Working on instance '.$p.'/'.$nrows.' (ID = '.$row['ID'].')...'.N); $rres=myq($link,'SELECT Time FROM InstChecks WHERE InstID='.$row['ID'].' ORDER BY Time DESC'); $instchecks=[]; while ($rrow=mysqli_fetch_assoc($rres)) $instchecks[]=$rrow; if (count($instchecks)>$opts['clean_keep_checks']) { while(count($instchecks)>$opts['clean_keep_checks']) $bef=array_pop($instchecks); $bef=$bef['Time']; myq($link,'DELETE FROM InstChecks WHERE InstID='.$row['ID'].' AND Time<='.$bef); $ar=mysqli_affected_rows($link); //echo('Deleted '.$ar.' records from InstChecks table.'.N); $totar+=$ar; }/* else { echo('No InstChecks records to delete.'.N); }*/ } eecho('done! Total affected rows: '.$totar.'.'.N,1); } if ($opts['optimize']) { eecho('optimizing all the tables in the database...'.N,1); $res=myq($link,'SHOW TABLES'); while ($row=mysqli_fetch_row($res)) { $rres=myq($link,'OPTIMIZE TABLE '.$row[0]); $rrow=mysqli_fetch_assoc($rres); if ($rrow['Msg_type']=='error' || $rrow['Msg_type']=='warning') eecho(kimplode($rrow).N,2); } eecho('done!'.N,1); } mysqli_close($link); exit(0); // functions function kimplode(&$arr) { $buf=[]; foreach ($arr as $key=>$val) $buf[]=$key.': '.$val; return(implode('; ',$buf)); } function myq(&$link,$query) { try { $res=mysqli_query($link,$query); } catch (Exception $error) { mexit('Query «'.$query.'» failed: '.$error->getMessage().'.'.N,2); } // for php versions < 8, which seem to not catch mysql exceptions if ($res===false) mexit('Query «'.$query.'» failed: '.mysqli_errno($link).': '.mysqli_error($link).'.'.N,2); return($res); } function mexit($msg,$code) { global $link; if (isset($link) && $link!==false) mysqli_close($link); if ($code==0) eecho($msg,1); else eecho($msg,3); exit($code); } function eecho($msg,$lev=1) { global $levs; $time=microtime(false); $time=explode(' ',$time); $time=date('Y-m-d H:i:s',$time[1]).'.'.substr($time[0],2); $msg=$time.' '.$levs[$lev].': '.$msg; if ($lev<2) echo($msg); else fwrite(STDERR,$msg); } ?>