MastodonHelp/web/clitools/mustool.php

387 lines
14 KiB
PHP
Raw Normal View History

2020-10-13 08:21:26 +02:00
#!/usr/bin/php
<?php
/*
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
define('N',"\n");
2023-12-26 11:17:54 +01:00
require __DIR__.'/../lib/delinstbyid.php';
$levs=['Debug', 'Info', 'Warning', 'Error'];
2020-10-13 08:21:26 +02:00
$opts=array(
2023-12-27 21:53:44 +01:00
'checkspam'=>false,
'minent'=>10,
'revive'=>false,
2020-10-20 22:06:00 +02:00
'shuffle'=>false,
2020-10-26 16:05:59 +01:00
'updstats'=>false,
2022-11-29 08:54:41 +01:00
'clean'=>false,
'clean_notifs_before_weeks'=>24,
'clean_keep_checks'=>16,
2022-12-18 11:41:09 +01:00
'deleteinstswhere'=>false,
'optimize'=>false
2020-10-13 08:21:26 +02:00
);
$help='mustool.php
DESCRIPTION
mustool.php can do lots of things on Mastodon Helps database.
SYNOPSIS
2022-12-18 11:41:09 +01:00
mustool.php [options] <action> [parameters] ...
ACTIONS
2023-12-27 21:53:44 +01:00
checkspam
Tries to detect spam instances, i.e. those with the same TLD, same second
level domain, variable higher domains, both in «Instances» and «Peers»
tables.
2022-12-18 11:41:09 +01:00
deleteinstswhere <condition[s]>
First, return a list of Instances records matching “condition”, then let
you choose whether you want to delete them and all records referencing them
in other tables.
Example: mustool.php deleteinstswhere "IsMastodon!=1"
revive
Set a new, succesful check with current time for every instance that
succesfully responded to last check.
2020-10-20 22:06:00 +02:00
shuffle
Randomize instances list (values in «RPos» column).
2020-10-26 16:05:59 +01:00
updstats
Update sites statistics.
2022-11-29 08:54:41 +01:00
clean
Delete records older than '.$opts['clean_notifs_before_weeks'].' weeks from «Notifications» table, and for each
«Instances» record delete all but the most recent '.$opts['clean_keep_checks'].' records from
«InstChecks» table.
optimize
Optimize all the tables in the database.
OPTIONS
2020-10-13 08:21:26 +02:00
-h, --help
Shows this help text and exits.
2020-10-13 08:21:26 +02:00
This program comes with ABSOLUTELY NO WARRANTY; for details see the source.
This is free software, and you are welcome to redistribute it under
certain conditions; see <http://www.gnu.org/licenses/> for details.'.N;
$dosome=false;
for ($i=1; $i<$argc; $i++) {
if ($argv[$i]=='-h' || $argv[$i]=='--help') {
echo $help;
exit(0);
2022-12-18 11:41:09 +01:00
} elseif ($argv[$i]=='deleteinstswhere') {
if ($i==$argc-1) mexit('«'.$argv[$i].'» requires a MySQL condition as an argument (use «-h» to read help).'.N,1);
$i++;
$dosome=true;
$opts['deleteinstswhere']=true;
$opts['deleteinstswhereconds']=$argv[$i];
2023-12-27 21:53:44 +01:00
} elseif ($argv[$i]=='checkspam') {
$dosome=true;
$opts['checkspam']=true;
2020-10-20 22:06:00 +02:00
} elseif ($argv[$i]=='shuffle') {
$dosome=true;
$opts['shuffle']=true;
} elseif ($argv[$i]=='revive') {
$dosome=true;
$opts['revive']=true;
2020-10-26 16:05:59 +01:00
} elseif ($argv[$i]=='updstats') {
$dosome=true;
$opts['updstats']=true;
2022-11-29 08:54:41 +01:00
} elseif ($argv[$i]=='clean') {
$dosome=true;
$opts['clean']=true;
} elseif ($argv[$i]=='optimize') {
$dosome=true;
$opts['optimize']=true;
2020-10-13 08:21:26 +02:00
} else {
mexit('dont know how to interpret «'.$argv[$i].'» (use «-h» to read help).'.N,1);
2020-10-13 08:21:26 +02:00
}
}
if (!$dosome) mexit('no action was specified (use «-h» to read help).'.N,1);
2020-10-13 08:21:26 +02:00
use function mysqli_real_escape_string as myesc;
2020-10-18 06:53:27 +02:00
$inifp=__DIR__.'/../conf/mustard.ini';
$iniarr=@parse_ini_file($inifp);
if ($iniarr==false) mexit('Could not open configuration file «'.$inifp.'»'.N,1);
try { $link=@mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket']); }
catch (Exception $error) { mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true); }
// for php versions < 8
if ($link===false) mexit('could not connect to MySQL server: '.mysqli_connect_error().'.'.N,1,true);
try { $res=mysqli_set_charset($link,'utf8mb4'); }
catch (Exception $error) { mexit('could not set «utf8mb4» charset for MySQL: '.mysqli_error($link).' ['.mysqli_errno($link).'].'.N,1,true); }
// for php versions < 8
if ($res===false) mexit('could not set MySQL charset: '.mysqli_error($link).' ['.mysqli_errno($link).'].'.N,1,true);
2020-10-13 08:21:26 +02:00
2023-12-27 21:53:44 +01:00
if ($opts['checkspam']) {
$res=myq($link,'SELECT ID, URI FROM Instances');
checkspam($res,'ID','URI','Instances');
$res=myq($link,'SELECT ID, Hostname FROM Peers');
checkspam($res,'ID','Hostname','Peers');
}
function checkspam(&$res,$idcol,$domcol,$tabnam) {
global $opts;
$buf=[];
while ($row=mysqli_fetch_assoc($res)) $buf[]=$row;
unset($res);
$cbuf=count($buf);
if ($cbuf>0) {
$doms=[];
foreach ($buf as $row)
if (preg_match('#[^.]+\.[^.]+$#',$row[$domcol],$matches)===1)
$doms[$matches[0]][]=['dom'=>$row[$domcol], 'id'=>$row[$idcol]];
/* echo('Do you really want to delete those '.$cbuf.' record(s)? Enter «YES» to do it, anything else to not do it: ');
$inp=rtrim(fgets(STDIN));
if ($inp=='YES') {
$i=0;
foreach ($buf as $row) {
$i++;
eecho('deleting Instances record with ID = '.$row['ID'].' and URI = «'.$row['URI'].'», and all references to it ('.$i.'/'.$cbuf.', '.round(100/$cbuf*$i,2).'%)'.N,1);
$res=delinstbyid($link,$row['ID'],'eecho',N);
if (!$res) mexit('error trying to delete Instances record with ID='.$row['ID'].'; see the log above for more info.'.N,2);
}
}*/
} else {
eecho('no '.$tabnam.' records found.'.N,2);
}
uasort($doms,'cmp');
foreach ($doms as $key=>$arr) {
$carr=count($arr);
if ($carr>$opts['minent']) {
eecho('>>> '.$tabnam.': '.$key.': '.$carr.' entries'.N,1);
2023-12-27 21:53:44 +01:00
foreach ($arr as $entry)
eecho(' '.$domcol.': '.$entry['dom'].'; '.$idcol.': '.$entry['id'].N,1);
2023-12-27 21:53:44 +01:00
}
}
}
function cmp($a,$b) {
$a=count($a);
$b=count($b);
if ($a==$b)
return 0;
return ($a<$b) ? 1 : -1;// reverse :-)
}
2022-12-18 11:41:09 +01:00
if ($opts['deleteinstswhere']) {
$res=myq($link,'SELECT ID, URI FROM Instances WHERE '.$opts['deleteinstswhereconds']);
$buf=[];
while ($row=mysqli_fetch_assoc($res)) $buf[]=$row;
$cbuf=count($buf);
if ($cbuf>0) {
foreach ($buf as $row) eecho($row['URI'].' (ID='.$row['ID'].')'.N,1);
echo 'Do you really want to delete those '.$cbuf.' record(s)? Enter «YES» to do it, anything else to not do it: ';
2022-12-18 11:41:09 +01:00
$inp=rtrim(fgets(STDIN));
if ($inp=='YES') {
$i=0;
foreach ($buf as $row) {
$i++;
eecho('deleting Instances record with ID = '.$row['ID'].' and URI = «'.$row['URI'].'», and all references to it ('.$i.'/'.$cbuf.', '.round(100/$cbuf*$i,2).'%)'.N,1);
$res=delinstbyid($link,$row['ID'],'eecho',N);
if (!$res) mexit('error trying to delete Instances record with ID='.$row['ID'].'; see the log above for more info.'.N,2);
2022-12-18 11:41:09 +01:00
}
}
} else {
eecho('no Instances records match expression «'.$opts['deleteinstswhereconds'].'».'.N,2);
}
}
if ($opts['revive']) {
$now=time();
$res=myq($link,'SELECT * FROM Instances WHERE WasLastCheckOk IS TRUE;');
$buf=[];
while ($row=mysqli_fetch_assoc($res)) $buf[]=$row;
$cbuf=count($buf);
$i=0;
foreach ($buf as $row) {
$i++;
$res=myq($link,'INSERT INTO InstChecks (InstID, Time, Status) VALUES ('.$row['ID'].', '.$now.', 1);');
if ($res!==false) {
$res=myq($link,'UPDATE Instances SET TotChecks='.($row['TotChecks']+1).', OkChecks='.($row['OkChecks']+1).', WasLastCheckOk=1, LastOkCheckTS='.$now.' WHERE ID='.$row['ID'].';');
if ($res===false)
mexit('could not update instance record with ID='.$row['ID'].'; shutting down.'.N,3);
else
eecho("{$i}/{$cbuf}\n",1);
} else {
mexit('could not insert new check record into InstChecks for instance with ID='.$row['ID'].'; shutting down.'.N,3);
}
2022-12-18 11:41:09 +01:00
}
eecho('done! Affected rows: '.$cbuf.'.'.N,1);
2022-12-18 11:41:09 +01:00
}
2020-10-20 22:06:00 +02:00
if ($opts['shuffle']) {
eecho('randomizing values in «RPos» column...'.N,1);
2022-12-18 11:41:09 +01:00
$res=myq($link,'SELECT ID FROM Instances');
2020-10-20 22:06:00 +02:00
$i=0;
while ($row=mysqli_fetch_assoc($res)) {
$i++;
$buf[$row['ID']]=$i;
}
shuffle($buf);
foreach ($buf as $key=>$val)
2022-12-18 11:41:09 +01:00
myq($link,'UPDATE Instances SET RPos='.$val.' WHERE ID='.$key);
eecho('done! Affected rows: '.count($buf).'.'.N,1);
2020-10-20 22:06:00 +02:00
}
2020-10-26 16:05:59 +01:00
if ($opts['updstats']) {
2023-12-26 11:17:54 +01:00
require __DIR__.'/../lib/supplangs.php';
2020-10-26 16:05:59 +01:00
$day=24*60*60;
$now=time();
$tdstart=gmmktime(0,0,0,gmdate('n',$now),gmdate('j',$now),gmdate('Y',$now));
//eecho('Today started at '.$tdstart.' ('.gmdate('d M Y H:i:s',$tdstart).').'.N,1);
// this below, if uncommented, populates DESTRUCTIVELY table ZHits for testing purposes
/*myq($link,'DELETE FROM ZHits WHERE TS < '.$tdstart);
myq($link,'DELETE FROM ZStats');
$uids=array(
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'cccccccccccccccccccccccccccccccc',
'dddddddddddddddddddddddddddddddd',
'eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee',
'ffffffffffffffffffffffffffffffff'
);
$urls=array('home','instances','users','about','links','stats','contribute','404');
2023-11-02 08:10:56 +01:00
for ($i=0; $i<1460; $i++) myq($link,'INSERT INTO ZHits (UID,URL,Lang,TS) VALUES ("'.$uids[rand(0,count($uids)-1)].'","'.$urls[rand(0,count($urls)-1)].'","'.$supplangscodes[rand(0,count($supplangscodes)-1)].'",'.rand($now-365*24*60*60,$tdstart).')');*/
2022-12-18 11:41:09 +01:00
$res=myq($link,'SELECT * FROM ZHits WHERE TS < '.$tdstart.' ORDER BY TS ASC');
2020-10-26 16:05:59 +01:00
if (mysqli_num_rows($res)>0) {
$row=mysqli_fetch_assoc($res);
$dstart=gmmktime(0,0,0,gmdate('n',$row['TS']),gmdate('j',$row['TS']),gmdate('Y',$row['TS']));
eecho('updating statistics...'.N,1);
2020-10-26 16:05:59 +01:00
} else {
mexit('statistics are already up to date :-)'.N,0);
2020-10-26 16:05:59 +01:00
}
$inserts=0;
while ($dstart<$tdstart) {
//eecho('-------- '.gmdate('d M Y H:i:s',$dstart).' ---------'.N,1);
2020-10-26 16:05:59 +01:00
$inserts++;
2020-10-31 06:57:34 +01:00
$hits=0;
2023-11-02 08:10:56 +01:00
foreach ($supplangs as $key=>$val)
$supplangs[$key]['hc']=0;
// this line below must be synced with the urls we consider, see ../site/index.php
$hitspage=array('home'=>0, 'instances'=>0, 'users'=>0, 'about'=>0, 'links'=>0, 'stats'=>0, 'contribute'=>0, '404'=>0);
2020-10-26 16:05:59 +01:00
$visits=0;
$buf=array();
2022-12-18 11:41:09 +01:00
$res=myq($link,'SELECT * FROM ZHits WHERE TS >= '.$dstart.' AND TS < '.($dstart+$day).' ORDER BY TS ASC');
2020-10-26 16:05:59 +01:00
while ($row=mysqli_fetch_assoc($res)) {
//eecho($row['UID'].' '.$row['URL'].' '.$row['Lang'].' '.$row['TS'].N,1);
2020-10-31 06:57:34 +01:00
$hits++;
2023-11-02 20:57:21 +01:00
if (array_key_exists($row['Lang'],$supplangs))
$supplangs[$row['Lang']]['hc']++;
2020-10-31 06:57:34 +01:00
$hitspage[$row['URL']]++;
2020-10-26 16:05:59 +01:00
if (!in_array($row['UID'],$buf)) {
$buf[]=$row['UID'];
$visits++;
}
}
2020-10-31 06:57:34 +01:00
$buf='';
2023-11-02 08:10:56 +01:00
foreach ($supplangs as $key=>$val) $buf.=$key.':'.$val['hc'].';';
2020-10-31 06:57:34 +01:00
$hitslang=substr($buf,0,-1);
$buf='';
foreach ($hitspage as $key=>$val) $buf.=$key.':'.$val.';';
$hitspage=substr($buf,0,-1);
//eecho('>>> hits: '.$hits.', hitslang: '.$hitslang.', hitspage: '.$hitspage.', visits: '.$visits.' <<<'.N,1);
2020-10-31 06:57:34 +01:00
$query='INSERT INTO ZStats (TS, Hits, HitsLang, HitsPage, Visits) VALUES ('.$dstart.', '.$hits.', "'.$hitslang.'", "'.$hitspage.'", '.$visits.')';
//eecho('QUERY: '.$query.N,1);
2022-12-18 11:41:09 +01:00
myq($link,$query);
2020-10-26 16:05:59 +01:00
$dstart+=$day;
}
2022-12-18 11:41:09 +01:00
myq($link,'DELETE FROM ZHits WHERE TS < '.$tdstart);
eecho('done! Affected rows: '.$inserts.'.'.N,1);
2020-10-26 16:05:59 +01:00
}
2022-11-29 08:54:41 +01:00
if ($opts['clean']) {
$ago=time()-($opts['clean_notifs_before_weeks']*7*24*60*60);
eecho('cleaning records older than '.$opts['clean_notifs_before_weeks'].' weeks from «Notifications» table...'.N,1);
2022-12-18 11:41:09 +01:00
$res=myq($link,'DELETE FROM Notifications WHERE Microtime < '.$ago);
eecho('done! Affected rows: '.mysqli_affected_rows($link).'.'.N,1);
eecho('cleaning records from «InstChecks» table, keeping only the most recent '.$opts['clean_keep_checks'].' for each instance...'.N,1);
$res=myq($link,'SELECT ID FROM Instances WHERE TotChecks > '.$opts['clean_keep_checks']);
$nrows=mysqli_num_rows($res);
$p=0;
$totar=0;
while ($row=mysqli_fetch_assoc($res)) {
$p++;
//eecho('working on instance '.$p.'/'.$nrows.' (ID = '.$row['ID'].')...'.N,1);
$rres=myq($link,'SELECT Time FROM InstChecks WHERE InstID='.$row['ID'].' ORDER BY Time DESC');
$instchecks=[];
while ($rrow=mysqli_fetch_assoc($rres)) $instchecks[]=$rrow;
if (count($instchecks)>$opts['clean_keep_checks']) {
while(count($instchecks)>$opts['clean_keep_checks']) $bef=array_pop($instchecks);
$bef=$bef['Time'];
myq($link,'DELETE FROM InstChecks WHERE InstID='.$row['ID'].' AND Time<='.$bef);
$ar=mysqli_affected_rows($link);
//eecho('deleted '.$ar.' records from “InstChecks” table.'.N,1);
$totar+=$ar;
}/* else {
eecho('no “InstChecks” records to delete.'.N,1);
}*/
}
eecho('done! Total affected rows: '.$totar.'.'.N,1);
}
if ($opts['optimize']) {
eecho('optimizing all the tables in the database...'.N,1);
2022-12-18 11:41:09 +01:00
$res=myq($link,'SHOW TABLES');
while ($row=mysqli_fetch_row($res)) {
2022-12-18 11:41:09 +01:00
$rres=myq($link,'OPTIMIZE TABLE '.$row[0]);
$rrow=mysqli_fetch_assoc($rres);
if ($rrow['Msg_type']=='error' || $rrow['Msg_type']=='warning')
eecho(kimplode($rrow).N,2);
}
eecho('done!'.N,1);
}
2020-10-13 08:21:26 +02:00
mysqli_close($link);
exit(0);
// functions
function kimplode(&$arr) {
$buf=[];
foreach ($arr as $key=>$val)
$buf[]=$key.': '.$val;
return(implode('; ',$buf));
}
2022-12-18 11:41:09 +01:00
function myq(&$link,$query) {
try { $res=mysqli_query($link,$query); }
catch (Exception $error) { mexit('Query «'.$query.'» failed: '.$error->getMessage().'.'.N,2); }
2022-12-18 11:41:09 +01:00
// for php versions < 8, which seem to not catch mysql exceptions
if ($res===false) mexit('Query «'.$query.'» failed: '.mysqli_errno($link).': '.mysqli_error($link).'.'.N,2);
2022-12-18 11:41:09 +01:00
return($res);
}
function mexit($msg,$code) {
global $link;
if (isset($link) && $link!==false) mysqli_close($link);
if ($code==0)
eecho($msg,1);
2022-12-18 11:41:09 +01:00
else
eecho($msg,3);
2022-12-18 11:41:09 +01:00
exit($code);
}
function eecho($msg,$lev=1) {
global $levs;
$time=microtime(false);
$time=explode(' ',$time);
$time=date('Y-m-d H:i:s',$time[1]).'.'.substr($time[0],2,-2);
$msg=$time.' '.$levs[$lev].': '.$msg;
if ($lev<2)
echo $msg;
else
fwrite(STDERR,$msg);
}
2020-10-13 08:21:26 +02:00
?>