MastodonHelp/web/clitools/mustool.php
2024-08-23 23:35:15 +02:00

383 lines
14 KiB
PHP
Executable file
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/php
<?php
/*
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
define('N',"\n");
require __DIR__.'/../lib/delinstbyid.php';
$levs=['Debug', 'Info', 'Warning', 'Error'];
$opts=array(
'checkspam'=>false,
'minent'=>10,
'revive'=>false,
'shuffle'=>false,
'updstats'=>false,
'clean'=>false,
'clean_notifs_before_weeks'=>24,
'clean_keep_checks'=>16,
'deleteinstswhere'=>false,
'optimize'=>false
);
$help='mustool.php
DESCRIPTION
mustool.php can do lots of things on Mastodon Helps database.
SYNOPSIS
mustool.php [options] <action> [parameters] ...
ACTIONS
checkspam
Tries to detect spam instances, i.e. those with the same TLD, same second
level domain, variable higher domains, both in «Instances» and «Peers»
tables.
deleteinstswhere <condition[s]>
First, return a list of Instances records matching “condition”, then let
you choose whether you want to delete them and all records referencing them
in other tables.
Example: mustool.php deleteinstswhere "IsMastodon!=1"
revive
Set a new, succesful check with current time for every instance that
succesfully responded to last check.
shuffle
Randomize instances list (values in «RPos» column).
updstats
Update sites statistics.
clean
Delete records older than '.$opts['clean_notifs_before_weeks'].' weeks from «Notifications» table, and for each
«Instances» record delete all but the most recent '.$opts['clean_keep_checks'].' records from
«InstChecks» table.
optimize
Optimize all the tables in the database.
OPTIONS
-h, --help
Shows this help text and exits.
This program comes with ABSOLUTELY NO WARRANTY; for details see the source.
This is free software, and you are welcome to redistribute it under
certain conditions; see <http://www.gnu.org/licenses/> for details.'.N;
$dosome=false;
for ($i=1; $i<$argc; $i++) {
if ($argv[$i]=='-h' || $argv[$i]=='--help') {
echo $help;
exit(0);
} elseif ($argv[$i]=='deleteinstswhere') {
if ($i==$argc-1) mexit('«'.$argv[$i].'» requires a MySQL condition as an argument (use «-h» to read help).'.N,1);
$i++;
$dosome=true;
$opts['deleteinstswhere']=true;
$opts['deleteinstswhereconds']=$argv[$i];
} elseif ($argv[$i]=='checkspam') {
$dosome=true;
$opts['checkspam']=true;
} elseif ($argv[$i]=='shuffle') {
$dosome=true;
$opts['shuffle']=true;
} elseif ($argv[$i]=='revive') {
$dosome=true;
$opts['revive']=true;
} elseif ($argv[$i]=='updstats') {
$dosome=true;
$opts['updstats']=true;
} elseif ($argv[$i]=='clean') {
$dosome=true;
$opts['clean']=true;
} elseif ($argv[$i]=='optimize') {
$dosome=true;
$opts['optimize']=true;
} else {
mexit('dont know how to interpret «'.$argv[$i].'» (use «-h» to read help).'.N,1);
}
}
if (!$dosome) mexit('no action was specified (use «-h» to read help).'.N,1);
use function mysqli_real_escape_string as myesc;
$inifp=__DIR__.'/../conf/mustard.ini';
$iniarr=@parse_ini_file($inifp);
if ($iniarr==false) mexit('Could not open configuration file «'.$inifp.'»'.N,1);
try { $link=@mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket']); }
catch (Exception $error) { mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true); }
// for php versions < 8
if ($link===false) mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true);
try { $res=mysqli_set_charset($link,'utf8mb4'); }
catch (Exception $error) { mexit('could not set «utf8mb4» charset for MySQL: '.mysqli_error($link).' ['.mysqli_errno($link).'].'.N,1,true); }
// for php versions < 8
if ($res===false) mexit('could not set MySQL charset: '.mysqli_error($link).' ['.mysqli_errno($link).'].'.N,1,true);
if ($opts['checkspam']) {
$res=myq($link,'SELECT ID, URI FROM Instances');
checkspam($res,'ID','URI','Instances');
$res=myq($link,'SELECT ID, Hostname FROM Peers');
checkspam($res,'ID','Hostname','Peers');
}
function checkspam(&$res,$idcol,$domcol,$tabnam) {
global $opts;
$buf=[];
while ($row=mysqli_fetch_assoc($res)) $buf[]=$row;
unset($res);
$cbuf=count($buf);
if ($cbuf>0) {
$doms=[];
foreach ($buf as $row)
if (preg_match('#[^.]+\.[^.]+$#',$row[$domcol],$matches)===1)
$doms[$matches[0]][]=['dom'=>$row[$domcol], 'id'=>$row[$idcol]];
/* echo('Do you really want to delete those '.$cbuf.' record(s)? Enter «YES» to do it, anything else to not do it: ');
$inp=rtrim(fgets(STDIN));
if ($inp=='YES') {
$i=0;
foreach ($buf as $row) {
$i++;
eecho('deleting Instances record with ID = '.$row['ID'].' and URI = «'.$row['URI'].'», and all references to it ('.$i.'/'.$cbuf.', '.round(100/$cbuf*$i,2).'%)'.N,1);
$res=delinstbyid($link,$row['ID'],'eecho',N);
if (!$res) mexit('error trying to delete Instances record with ID='.$row['ID'].'; see the log above for more info.'.N,2);
}
}*/
} else {
eecho('no '.$tabnam.' records found.'.N,2);
}
uasort($doms,'cmp');
foreach ($doms as $key=>$arr) {
$carr=count($arr);
if ($carr>$opts['minent']) {
eecho('>>> '.$tabnam.': '.$key.': '.$carr.' entries'.N,1);
foreach ($arr as $entry)
eecho(' '.$domcol.': '.$entry['dom'].'; '.$idcol.': '.$entry['id'].N,1);
}
}
}
function cmp($a,$b) {
$a=count($a);
$b=count($b);
if ($a==$b)
return 0;
return ($a<$b) ? 1 : -1;// reverse :-)
}
if ($opts['deleteinstswhere']) {
$res=myq($link,'SELECT ID, URI FROM Instances WHERE '.$opts['deleteinstswhereconds']);
$buf=[];
while ($row=mysqli_fetch_assoc($res)) $buf[]=$row;
$cbuf=count($buf);
if ($cbuf>0) {
foreach ($buf as $row) eecho($row['URI'].' (ID='.$row['ID'].')'.N,1);
echo 'Do you really want to delete those '.$cbuf.' record(s)? Enter «YES» to do it, anything else to not do it: ';
$inp=rtrim(fgets(STDIN));
if ($inp=='YES') {
$i=0;
foreach ($buf as $row) {
$i++;
eecho('deleting Instances record with ID = '.$row['ID'].' and URI = «'.$row['URI'].'», and all references to it ('.$i.'/'.$cbuf.', '.round(100/$cbuf*$i,2).'%)'.N,1);
$res=delinstbyid($link,$row['ID'],'eecho',N);
if (!$res) mexit('error trying to delete Instances record with ID='.$row['ID'].'; see the log above for more info.'.N,2);
}
}
} else {
eecho('no Instances records match expression «'.$opts['deleteinstswhereconds'].'».'.N,2);
}
}
if ($opts['revive']) {
$now=time();
$res=myq($link,'SELECT * FROM Instances WHERE WasLastCheckOk IS TRUE;');
$buf=[];
while ($row=mysqli_fetch_assoc($res)) $buf[]=$row;
$cbuf=count($buf);
$i=0;
foreach ($buf as $row) {
$i++;
$res=myq($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$row['ID'].', '.$now.', 1);');
if ($res!==false) {
$res=myq($link,'UPDATE Instances SET TotChecks='.($row['TotChecks']+1).', OkChecks='.($row['OkChecks']+1).', WasLastCheckOk=1, LastOkCheckTS='.$now.' WHERE ID='.$row['ID'].';');
if ($res===false)
mexit('could not update instance record with ID='.$row['ID'].'; shutting down.'.N,3);
else
eecho("{$i}/{$cbuf}\n",1);
} else {
mexit('could not insert new check record into InstChecks for instance with ID='.$row['ID'].'; shutting down.'.N,3);
}
}
eecho('done! Affected rows: '.$cbuf.'.'.N,1);
}
if ($opts['shuffle']) {
eecho('randomizing values in «RPos» column...'.N,1);
$res=myq($link,'SELECT ID FROM Instances ORDER BY RAND()');
$i=0;
while ($row=mysqli_fetch_assoc($res)) {
myq($link,"UPDATE Instances SET RPos={$i} WHERE ID={$row['ID']}");
$i++;
}
eecho("done! Affected rows: {$i}.\n",1);
}
if ($opts['updstats']) {
require __DIR__.'/../lib/supplangs.php';
$day=24*60*60;
$now=time();
$tdstart=gmmktime(0,0,0,gmdate('n',$now),gmdate('j',$now),gmdate('Y',$now));
//eecho('Today started at '.$tdstart.' ('.gmdate('d M Y H:i:s',$tdstart).').'.N,1);
// this below, if uncommented, populates DESTRUCTIVELY table ZHits for testing purposes
/*myq($link,'DELETE FROM ZHits WHERE TS < '.$tdstart);
myq($link,'DELETE FROM ZStats');
$uids=array(
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'cccccccccccccccccccccccccccccccc',
'dddddddddddddddddddddddddddddddd',
'eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee',
'ffffffffffffffffffffffffffffffff'
);
$urls=array('home','instances','users','about','links','stats','contribute','404');
for ($i=0; $i<1460; $i++) myq($link,'INSERT INTO ZHits (UID,URL,Lang,TS) VALUES ("'.$uids[rand(0,count($uids)-1)].'","'.$urls[rand(0,count($urls)-1)].'","'.$supplangscodes[rand(0,count($supplangscodes)-1)].'",'.rand($now-365*24*60*60,$tdstart).')');*/
$res=myq($link,'SELECT * FROM ZHits WHERE TS < '.$tdstart.' ORDER BY TS ASC');
if (mysqli_num_rows($res)>0) {
$row=mysqli_fetch_assoc($res);
$dstart=gmmktime(0,0,0,gmdate('n',$row['TS']),gmdate('j',$row['TS']),gmdate('Y',$row['TS']));
eecho('updating statistics...'.N,1);
} else {
mexit('statistics are already up to date :-)'.N,0);
}
$inserts=0;
while ($dstart<$tdstart) {
//eecho('-------- '.gmdate('d M Y H:i:s',$dstart).' ---------'.N,1);
$inserts++;
$hits=0;
foreach ($supplangs as $key=>$val)
$supplangs[$key]['hc']=0;
// this line below must be synced with the urls we consider, see ../site/index.php
$hitspage=array('home'=>0, 'instances'=>0, 'users'=>0, 'about'=>0, 'links'=>0, 'stats'=>0, 'contribute'=>0, '404'=>0);
$visits=0;
$buf=array();
$res=myq($link,'SELECT * FROM ZHits WHERE TS >= '.$dstart.' AND TS < '.($dstart+$day).' ORDER BY TS ASC');
while ($row=mysqli_fetch_assoc($res)) {
//eecho($row['UID'].' '.$row['URL'].' '.$row['Lang'].' '.$row['TS'].N,1);
$hits++;
if (array_key_exists($row['Lang'],$supplangs))
$supplangs[$row['Lang']]['hc']++;
$hitspage[$row['URL']]++;
if (!in_array($row['UID'],$buf)) {
$buf[]=$row['UID'];
$visits++;
}
}
$buf='';
foreach ($supplangs as $key=>$val) $buf.=$key.':'.$val['hc'].';';
$hitslang=substr($buf,0,-1);
$buf='';
foreach ($hitspage as $key=>$val) $buf.=$key.':'.$val.';';
$hitspage=substr($buf,0,-1);
//eecho('>>> hits: '.$hits.', hitslang: '.$hitslang.', hitspage: '.$hitspage.', visits: '.$visits.' <<<'.N,1);
$query='INSERT INTO ZStats (TS, Hits, HitsLang, HitsPage, Visits) VALUES ('.$dstart.', '.$hits.', "'.$hitslang.'", "'.$hitspage.'", '.$visits.')';
//eecho('QUERY: '.$query.N,1);
myq($link,$query);
$dstart+=$day;
}
myq($link,'DELETE FROM ZHits WHERE TS < '.$tdstart);
eecho('done! Affected rows: '.$inserts.'.'.N,1);
}
if ($opts['clean']) {
$ago=time()-($opts['clean_notifs_before_weeks']*7*24*60*60);
eecho('cleaning records older than '.$opts['clean_notifs_before_weeks'].' weeks from «Notifications» table...'.N,1);
$res=myq($link,'DELETE FROM Notifications WHERE Microtime < '.$ago);
eecho('done! Affected rows: '.mysqli_affected_rows($link).'.'.N,1);
eecho('cleaning records from «InstChecks» table, keeping only the most recent '.$opts['clean_keep_checks'].' for each instance...'.N,1);
$res=myq($link,'SELECT ID FROM Instances WHERE TotChecks > '.$opts['clean_keep_checks']);
$nrows=mysqli_num_rows($res);
$p=0;
$totar=0;
while ($row=mysqli_fetch_assoc($res)) {
$p++;
//eecho('working on instance '.$p.'/'.$nrows.' (ID = '.$row['ID'].')...'.N,1);
$rres=myq($link,'SELECT Time FROM InstChecks WHERE InstID='.$row['ID'].' ORDER BY Time DESC');
$instchecks=[];
while ($rrow=mysqli_fetch_assoc($rres)) $instchecks[]=$rrow;
if (count($instchecks)>$opts['clean_keep_checks']) {
while(count($instchecks)>$opts['clean_keep_checks']) $bef=array_pop($instchecks);
$bef=$bef['Time'];
myq($link,'DELETE FROM InstChecks WHERE InstID='.$row['ID'].' AND Time<='.$bef);
$ar=mysqli_affected_rows($link);
//eecho('deleted '.$ar.' records from “InstChecks” table.'.N,1);
$totar+=$ar;
}/* else {
eecho('no “InstChecks” records to delete.'.N,1);
}*/
}
eecho('done! Total affected rows: '.$totar.'.'.N,1);
}
if ($opts['optimize']) {
eecho('optimizing all the tables in the database...'.N,1);
$res=myq($link,'SHOW TABLES');
while ($row=mysqli_fetch_row($res)) {
$rres=myq($link,'OPTIMIZE TABLE '.$row[0]);
$rrow=mysqli_fetch_assoc($rres);
if ($rrow['Msg_type']=='error' || $rrow['Msg_type']=='warning')
eecho(kimplode($rrow).N,2);
}
eecho('done!'.N,1);
}
mysqli_close($link);
exit(0);
// functions
function kimplode(&$arr) {
$buf=[];
foreach ($arr as $key=>$val)
$buf[]=$key.': '.$val;
return(implode('; ',$buf));
}
function myq(&$link,$query) {
try { $res=mysqli_query($link,$query); }
catch (Exception $error) { mexit('Query «'.$query.'» failed: '.$error->getMessage().'.'.N,2); }
// for php versions < 8, which seem to not catch mysql exceptions
if ($res===false) mexit('Query «'.$query.'» failed: '.mysqli_errno($link).': '.mysqli_error($link).'.'.N,2);
return($res);
}
function mexit($msg,$code) {
global $link;
if (isset($link) && $link!==false) mysqli_close($link);
if ($code==0)
eecho($msg,1);
else
eecho($msg,3);
exit($code);
}
function eecho($msg,$lev=1) {
global $levs;
$time=microtime(false);
$time=explode(' ',$time);
$time=date('Y-m-d H:i:s',$time[1]).'.'.substr($time[0],2,-2);
$msg=$time.' '.$levs[$lev].': '.$msg;
if ($lev<2)
echo $msg;
else
fwrite(STDERR,$msg);
}
?>