From 353ad24504775ca2f1319c1a6f71fe2cc6f753d6 Mon Sep 17 00:00:00 2001 From: pezcurrel Date: Sun, 15 Dec 2019 17:06:02 +0100 Subject: [PATCH] ... --- crawler/bidablock.txt | 40 ----- crawler/istanzesorelle | 5 - .../mastblocksdump.sh | 71 +++++--- web/admin/crawler/blacklist_bida.txt | 36 ++++ web/admin/crawler/blacklist_cagi.txt | 157 +++++++++++++++++ {crawler => web/admin/crawler}/crawler.php | 158 +++++++++++++----- web/admin/crawler/crawler_pant.ini | 8 + web/admin/crawler/crawler_sorellanza.ini | 8 + web/admin/crawler/istanzesorelle | 5 + web/admin/crawler/istanzesorelle_pant | 1 + web/theme.css | 2 +- 11 files changed, 381 insertions(+), 110 deletions(-) delete mode 100644 crawler/bidablock.txt delete mode 100644 crawler/istanzesorelle rename domain_blocks_dump/domain_blocks_dump.sh => mastblocksdump/mastblocksdump.sh (52%) create mode 100644 web/admin/crawler/blacklist_bida.txt create mode 100644 web/admin/crawler/blacklist_cagi.txt rename {crawler => web/admin/crawler}/crawler.php (74%) create mode 100644 web/admin/crawler/crawler_pant.ini create mode 100644 web/admin/crawler/crawler_sorellanza.ini create mode 100644 web/admin/crawler/istanzesorelle create mode 100644 web/admin/crawler/istanzesorelle_pant diff --git a/crawler/bidablock.txt b/crawler/bidablock.txt deleted file mode 100644 index 8501f53..0000000 --- a/crawler/bidablock.txt +++ /dev/null @@ -1,40 +0,0 @@ -## Istanze silenziate e sospese in Mastodon.bida.im - -| Istanza | Stato | Motivo | -|----------|:-------------:|------:| -|toot.love|Silenziata|| -|switter.at|Silenziata|https://mastodon.bida.im/@jops/101404791975700441| -|freezepeach.xyz|Sospesa|| -|ediot.social|Sospesa|| -|anitwitter.com|Sospesa|| -|gs.smuglo.li|Sospesa|| -|pawoo.net|Sospesa|| -|unsafe.space|Sospesa|| -|woofer.alfter.us|Sospesa|| -|preteengirls.biz|Sospesa|| -|shitposter.club|Silenziata|| -|social.heldscal.la|Sospesa|| -|gorf.club|Sospesa|| -|sealion.club|Silenziata|| -|baraag.net|Sospesa|| -|social.imirhil.fr|Sospesa|| -|wrongthink.net|Sospesa|| -|porntoot.com|Silenziata|| -|anitwitter.moe|Sospesa|| -|social.targaryen.house|Silenziata|| -|noagendasocial.com|Silenziata|| -|social.au2pb.net|Sospesa|| -|ika.moe|Sospesa|| -|newjack.city|Sospesa|| -|pl.smuglo.li|Sospesa|| -|freespeechextremist.com|Sospesa|| -|babymetal.party|Sospesa|| -|mobile.co|Sospesa|https://mastodon.bida.im/@Ca_Gi/101355947506820592| -|mastodon.starrevolution.org|Sospesa|https://a.nom.pl/notice/450131| -|humblr.social|Silenziata|https://mastodon.bida.im/@cirku17/101399587014096355| -|quodverum.com|Sospesa|https://mastodon.bida.im/@Ca_Gi/101514801964087604| -|neckbeard.xyz|Sospesa|Instance suspended: neckbeard.xyz - anime nazi shit, irony bro admin| -|social.quodverum.com|Sospesa|Razzisti| -|freespeech.firedragonstudios.com|Sospesa|https://mastodon.bida.im/@Ca_Gi/101344114624456297| -|shitposter.club|Sospesa|https://mastodon.bida.im/@Ca_Gi/101270762003908554| -|bsd.moe|Sospesa|https://mastodon.bida.im/@Ca_Gi/101270762003908554| diff --git a/crawler/istanzesorelle b/crawler/istanzesorelle deleted file mode 100644 index 995366a..0000000 --- a/crawler/istanzesorelle +++ /dev/null @@ -1,5 +0,0 @@ -mastodon.bida.im|https://git.lattuga.net/bida/suspended-silenced-instances/raw/master/README.md -mastodon.cisti.org| -nebbia.fail| -stereodon.social| -snapj.saja.freemyip.com| diff --git a/domain_blocks_dump/domain_blocks_dump.sh b/mastblocksdump/mastblocksdump.sh similarity index 52% rename from domain_blocks_dump/domain_blocks_dump.sh rename to mastblocksdump/mastblocksdump.sh index 2ad3a3e..9a25592 100755 --- a/domain_blocks_dump/domain_blocks_dump.sh +++ b/mastblocksdump/mastblocksdump.sh @@ -16,21 +16,36 @@ MASTENVFP="$MASTHOME/live/.env.production" DOMBLOCKSFP="$MASTHOME/live/public/domain_blocks.txt" HELP="SINTASSI - domain_blocks_dump.sh [opzioni] + mastblocksdump.sh [opzioni] DESCRIZIONE Questo script rimedia all'attuale mancanza, in mastodon, di un endpoint dell'API che ritorni una lista dei domini bloccati da un'istanza in un formato univoco e parsabile. Legge i dati necessari alla connessione al - db di mastodon dal file di configurazione di mastodon, esegue tramite - psql una query sulla tabella domain_blocks e ne salva i risultati in un - file di testo pubblicamente accessibile - (da https://[dominio]/domain_blocks.txt). - È pensato per essere eseguito periodicamente da un cron job. + db di mastodon dal file di configurazione di mastodon, per default + \"$MASTENVFP\" + poi esegue tramite psql una query sulla tabella domain_blocks + e ne salva i risultati in un file di testo, per default + \"$DOMBLOCKSFP\" + che sarà accessibile da \"https://[dominio]/domain_blocks.txt\". + È pensato per essere eseguito periodicamente da un cron job, come + utente mastodon oppure root oppure altro utente che abbia accesso in + lettura al file di configurazione di mastodon e in scrittura a quello + in cui scrivere la lista delle istanze bloccate. OPZIONI -H, --home Definisce la home di mastodon (per default \"$MASTHOME\") - e di conseguenza la posizione del suo file di configurazione - (per default \"$MASTENVFP\") e del file in cui scrivere la + e di conseguenza il percorso del suo file di configurazione + (per default \"$MASTENVFP\") e del file + in cui scrivere la lista delle istanze bloccate (per default + \"$DOMBLOCKSFP\"). + È comunque possibile specificare individualmente il percorso + del file di configurazione di mastodon e di quello in cui scrivere + la lista delle istanze bloccate con le due opzioni che seguono. + -e, --envfp + Definisce il percorso del file di configurazione di mastodon in uso. + -b, --blocksfp + Definisce il percorso del file in cui scrivere la lista delle istanze + bloccate. -h, --help Mostra questo aiuto ed esce." @@ -40,12 +55,32 @@ while [ $i -lt ${#args[@]} ]; do if [ "${args[$i]:0:1}" == "-" ]; then case "${args[$i]}" in "-H" | "--home" ) - if [ -z "${args[$i+1]}" ] || [ ! -d "${args[$i+1]}" ]; then - echo "L'opzione \"${args[$i]}\" richiede un parametro di tipo directory (usa \"-h\" per l'aiuto)." + if [ -z "${args[$i+1]}" ]; then + echo "L'opzione \"${args[$i]}\" richiede un parametro (usa \"-h\" per l'aiuto)." exit 1 else ((i++)) MASTHOME=$(echo "${args[$i]}" | sed -e 's/\/$//') + MASTENVFP="$MASTHOME/live/.env.production" + DOMBLOCKSFP="$MASTHOME/live/public/domain_blocks.txt" + fi + ;; + "-e" | "--envfp" ) + if [ -z "${args[$i+1]}" ]; then + echo "L'opzione \"${args[$i]}\" richiede un parametro (usa \"-h\" per l'aiuto)." + exit 1 + else + ((i++)) + MASTENVFP="${args[$i]}" + fi + ;; + "-b" | "--blocksfp" ) + if [ -z "${args[$i+1]}" ]; then + echo "L'opzione \"${args[$i]}\" richiede un parametro (usa \"-h\" per l'aiuto)." + exit 1 + else + ((i++)) + DOMBLOCKSFP="${args[$i]}" fi ;; "-h" | "--help" ) @@ -64,20 +99,18 @@ while [ $i -lt ${#args[@]} ]; do ((i++)) done -MASTENVFP="$MASTHOME/live/.env.production" [ ! -e "$MASTENVFP" ] && echo "\"$MASTENVFP\" non esiste, muoio (usa \"-h\" per l'aiuto)." && exit 1 [ ! -f "$MASTENVFP" ] && echo "\"$MASTENVFP\" non è un file, muoio (usa \"-h\" per l'aiuto)." && exit 1 [ ! -r "$MASTENVFP" ] && echo "\"$MASTENVFP\" non è leggibile, muoio (usa \"-h\" per l'aiuto)." && exit 1 -DOMBLOCKSFP="$MASTHOME/live/public/domain_blocks.txt" -touch "$DOMBLOCKSFP" 2>/dev/null -[ ! $? -eq 0 ] && echo "Non ho potuto creare/modificare il file \"$DOMBLOCKSFP\", muoio (usa \"-h\" per l'aiuto)." && exit 1 +DB_HOST=`grep 'DB_HOST' "$MASTENVFP"|sed -e 's/[^=]*=//'` +DB_PORT=`grep 'DB_PORT' "$MASTENVFP"|sed -e 's/[^=]*=//'` +DB_NAME=`grep 'DB_NAME' "$MASTENVFP"|sed -e 's/[^=]*=//'` +DB_USER=`grep 'DB_USER' "$MASTENVFP"|sed -e 's/[^=]*=//'` +DB_PASS=`grep 'DB_PASS' "$MASTENVFP"|sed -e 's/[^=]*=//'` -DB_HOST=`cat "$MASTENVFP"|grep 'DB_HOST'|sed -e 's/[^=]*=//'` -DB_PORT=`cat "$MASTENVFP"|grep 'DB_PORT'|sed -e 's/[^=]*=//'` -DB_NAME=`cat "$MASTENVFP"|grep 'DB_NAME'|sed -e 's/[^=]*=//'` -DB_USER=`cat "$MASTENVFP"|grep 'DB_USER'|sed -e 's/[^=]*=//'` -DB_PASS=`cat "$MASTENVFP"|grep 'DB_PASS'|sed -e 's/[^=]*=//'` +touch "$DOMBLOCKSFP" 2>/dev/null +[ ! $? -eq 0 ] && echo "Non posso creare/modificare il file \"$DOMBLOCKSFP\", muoio (usa \"-h\" per l'aiuto)." && exit 1 echo "# generato $(date -u)" > "$DOMBLOCKSFP" echo "# formato di output: dominio bloccato|data ultima modifica blocco|tipo blocco|commento pubblico" >> "$DOMBLOCKSFP" diff --git a/web/admin/crawler/blacklist_bida.txt b/web/admin/crawler/blacklist_bida.txt new file mode 100644 index 0000000..71efa21 --- /dev/null +++ b/web/admin/crawler/blacklist_bida.txt @@ -0,0 +1,36 @@ +anitwitter.com|2019-12-11 18:44:06.710862|1| +anitwitter.moe|2019-12-11 18:44:06.710862|1| +babymetal.party|2019-12-11 18:44:06.710862|1| +baraag.net|2019-12-11 18:44:06.710862|1| +bsd.moe|2019-12-11 18:44:06.710862|1|https://mastodon.bida.im/@Ca_Gi/101270762003908554 +ediot.social|2019-12-11 18:44:06.710862|1| +freespeechextremist.com|2019-12-11 18:44:06.710862|1| +freespeech.firedragonstudios.com|2019-12-11 18:44:06.710862|1|https://mastodon.bida.im/@Ca_Gi/101344114624456297 +freezepeach.xyz|2019-12-11 18:44:06.710862|1| +gorf.club|2019-12-11 18:44:06.710862|1| +gs.smuglo.li|2019-12-11 18:44:06.710862|1| +humblr.social|2019-12-11 18:44:06.710862|0|https://mastodon.bida.im/@cirku17/101399587014096355 +ika.moe|2019-12-11 18:44:06.710862|1| +mastodon.starrevolution.org|2019-12-11 18:44:06.710862|1|https://a.nom.pl/notice/450131 +mobile.co|2019-12-11 18:44:06.710862|1|https://mastodon.bida.im/@Ca_Gi/101355947506820592 +neckbeard.xyz|2019-12-11 18:44:06.710862|1|Instance suspended: neckbeard.xyz - anime nazi shit, irony bro admin +newjack.city|2019-12-11 18:44:06.710862|1| +noagendasocial.com|2019-12-11 18:44:06.710862|0| +pawoo.net|2019-12-11 18:44:06.710862|1| +pl.smuglo.li|2019-12-11 18:44:06.710862|1| +porntoot.com|2019-12-11 18:44:06.710862|0| +preteengirls.biz|2019-12-11 18:44:06.710862|1| +quodverum.com|2019-12-11 18:44:06.710862|1|https://mastodon.bida.im/@Ca_Gi/101514801964087604 +sealion.club|2019-12-11 18:44:06.710862|0| +shitposter.club|2019-12-11 18:44:06.710862|0| +shitposter.club|2019-12-11 18:44:06.710862|1|https://mastodon.bida.im/@Ca_Gi/101270762003908554 +social.au2pb.net|2019-12-11 18:44:06.710862|1| +social.heldscal.la|2019-12-11 18:44:06.710862|1| +social.imirhil.fr|2019-12-11 18:44:06.710862|1| +social.quodverum.com|2019-12-11 18:44:06.710862|1|Razzisti +social.targaryen.house|2019-12-11 18:44:06.710862|0| +switter.at|2019-12-11 18:44:06.710862|0|https://mastodon.bida.im/@jops/101404791975700441 +toot.love|2019-12-11 18:44:06.710862|0| +unsafe.space|2019-12-11 18:44:06.710862|1| +woofer.alfter.us|2019-12-11 18:44:06.710862|1| +wrongthink.net|2019-12-11 18:44:06.710862|1| diff --git a/web/admin/crawler/blacklist_cagi.txt b/web/admin/crawler/blacklist_cagi.txt new file mode 100644 index 0000000..a3beb7b --- /dev/null +++ b/web/admin/crawler/blacklist_cagi.txt @@ -0,0 +1,157 @@ +2.distsn.org|2019-12-11 18:44:06.710862|1|Spam +2hu.club|2019-12-11 18:44:06.710862|1|Pedo – Loli +anitwitter.com|2019-12-11 18:44:06.710862|1| +anitwitter.moe|2019-12-11 18:44:06.710862|1|Bad Moderation +ap.torlipen.net|2019-12-11 18:44:06.710862|1| +ap.uwu.st|2019-12-11 18:44:06.710862|1|Harassment or abuse +babymetal.party|2019-12-11 18:44:06.710862|1| +baraag.net|2019-12-11 18:44:06.710862|1|Pedo – Loli +beehub.org|2019-12-11 18:44:06.710862|0| +blob.cat|2019-12-11 18:44:06.710862|1| +bodybuilding.im|2019-12-11 18:44:06.710862|1| +bofa.lol|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +bsd.moe|2019-12-11 18:44:06.710862|1| +carnal-gabhub.protohype.net|2019-12-11 18:44:06.710862|1| +civiq.social|2019-12-11 18:44:06.710862|1| +cofe.moe|2019-12-11 18:44:06.710862|1|Bad Moderation +comm.network|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +community.halle-leaks.de|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +community.highlandarrow.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +counter.social|2019-12-11 18:44:06.710862|1|Bad Moderation +cryzed.com|2019-12-11 18:44:06.710862|0| +cyzed.com|2019-12-11 18:44:06.710862|1| +daffodil-11.org|2019-12-11 18:44:06.710862|1|Malicious site +dev.civiq.social|2019-12-11 18:44:06.710862|1| +develop.gab.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +dickshow.social|2019-12-11 18:44:06.710862|1|Bad Moderation +djitter.com|2019-12-11 18:44:06.710862|1| +ediot.social|2019-12-11 18:44:06.710862|0|Fascism – Hatespeech +ediot.socialsilence|2019-12-11 18:44:06.710862|1| +ekrem.develop.gab.com|2019-12-11 18:44:06.710862|1| +exited.eu|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +explosion.party|2019-12-11 18:44:06.710862|1|Conspiracy theories +fedichive.tk|2019-12-11 18:44:06.710862|1| +feminism.lgbt|2019-12-11 18:44:06.710862|1|Pedo – Loli +freefedifollowers.ga|2019-12-11 18:44:06.710862|1| +freehold.earth|2019-12-11 18:44:06.710862|1|Bad Moderation +freespeechextremist.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +freespeech.firedragonstudios.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +freespeech.host|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +freevoice.space|2019-12-11 18:44:06.710862|0|Fascism – Hatespeech +freezepeach.xyz|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +gab.ai|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +gabble.xyz|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +gab.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +gabfed.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +gab.io|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +gab.polaris-1.work|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +gab.protohype.net|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +gab.sleek.eu|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +gameliberty.club|2019-12-11 18:44:06.710862|1|Pedo – Loli +gasthe.lgbt|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +gnusocial.no|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +goldandblack.xyz|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +gorf.club|2019-12-11 18:44:06.710862|0|Fascism – Hatespeech +gorf.club|2019-12-11 18:44:06.710862|1| +gs.archae.me|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +gs.kawa-kun.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +gs.mon5t3r.info|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +gs.smuglo.li|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +hakui.club|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +homura.space|2019-12-11 18:44:06.710862|1|Harassment or abuse +ika.moe|2019-12-11 18:44:06.710862|1|Bad Moderation +impeccable.social|2019-12-11 18:44:06.710862|1| +inditoot.com|2019-12-11 18:44:06.710862|1|Bad Moderation +jabb.in|2019-12-11 18:44:06.710862|1|Conspiracy theories +juche.town|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +karolat.press|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +kawaiistu.moe|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +kawen.space|2019-12-11 18:44:06.710862|1| +kazvam.com|2019-12-11 18:44:06.710862|1| +kipper.im|2019-12-11 18:44:06.710862|1|Bad Moderation +kiwifarms.cc|2019-12-11 18:44:06.710862|0| +kneegrows.top|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +kowai.youkai.town|2019-12-11 18:44:06.710862|1|Pedo – Loli +kyot.me|2019-12-11 18:44:06.710862|1|Bad Moderation +liberdon.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +libertarianism.club|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +libre.tube|2019-12-11 18:44:06.710862|0| +loli.estate|2019-12-11 18:44:06.710862|1|Pedo – Loli +lolis.world|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +manx.social|2019-12-11 18:44:06.710862|1|Bad Moderation +mastodon.loliandstuff.moe|2019-12-11 18:44:06.710862|1|Pedo – Loli +mastodon.starrevolution.org|2019-12-11 18:44:06.710862|1| +mast.wholemars.com|2019-12-11 18:44:06.710862|0| +melalandia.tk|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +me.nooruul.xyz|2019-12-11 18:44:06.710862|1| +mobile.co|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +neckbeard.xyz|2019-12-11 18:44:06.710862|1| +neenster.org|2019-12-11 18:44:06.710862|1| +newjack.city|2019-12-11 18:44:06.710862|1|Spam +niu.moe|2019-12-11 18:44:06.710862|1|Bad Moderation +noagendasocial.com|2019-12-11 18:44:06.710862|0|Fascism – Hatespeech +not-develop.gab.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +not.phrack.fyi|2019-12-11 18:44:06.710862|1|Conspiracy theories +npf.mlpol.net|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +pawoo.net|2019-12-11 18:44:06.710862|1|Pedo – Loli +pleroma.cucked.me|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +pleroma.rareome.ga|2019-12-11 18:44:06.710862|1|Malicious site +pleroma.soykaf.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +pleroma.wolfie.pw|2019-12-11 18:44:06.710862|1| +pleroma.yorha.club|2019-12-11 18:44:06.710862|1|Harassment or abuse +pleville.net|2019-12-11 18:44:06.710862|0| +pl.smuglo.li|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +porntoot.com|2019-12-11 18:44:06.710862|1|Pedo – Loli +preteen.biz|2019-12-11 18:44:06.710862|1|Pedo – Loli +preteengirls.bi|2019-12-11 18:44:06.710862|1|Pedo – Loli +preteengirls.biz|2019-12-11 18:44:06.710862|1|Pedo – Loli +pridelands.io|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +qoto.org|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +quey.org|2019-12-11 18:44:06.710862|0| +quitter.pw|2019-12-11 18:44:06.710862|1| +quodverum.com|2019-12-11 18:44:06.710862|1| +rainbowdash.net|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +rapefeminists.network|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +sealion.club|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +shitasstits.life|2019-12-11 18:44:06.710862|1|Pedo – Loli +shitposter.club|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +social.allthefallen.ninja|2019-12-11 18:44:06.710862|1|Pedo – Loli +social.au2pb.net|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +social.guizzyordi.info|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +social.heldscal.la|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +social.hidamari.blue|2019-12-11 18:44:06.710862|0| +social.homunyan.com|2019-12-11 18:44:06.710862|0| +social.i2p.rocks|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +social.imirhil.fr|2019-12-11 18:44:06.710862|1| +social.louisoft01.moe|2019-12-11 18:44:06.710862|1|Conspiracy theories +social.lucci.xyz|2019-12-11 18:44:06.710862|1|Conspiracy theories +social.quodverum.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +social.raptorengineering.io|2019-12-11 18:44:06.710862|1|Advertisement +social.sunshinegardens.org|2019-12-11 18:44:06.710862|1|Harassment or abuse +social.super-niche.club|2019-12-11 18:44:06.710862|1|Pedo – Loli +social.targaryen.house|2019-12-11 18:44:06.710862|0|Fascism – Hatespeech +social.wiuwiu.de|2019-12-11 18:44:06.710862|1|Bad Moderation +socnet.supes.com|2019-12-11 18:44:06.710862|1|Bad Moderation +spinster.dev|2019-12-11 18:44:06.710862|1| +spinster.xyz|2019-12-11 18:44:06.710862|1| +sunshinegardens.org|2019-12-11 18:44:06.710862|1|Harassment or abuse +thechad.zone|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +the.hedgehoghunter.club|2019-12-11 18:44:06.710862|0| +toot.love|2019-12-11 18:44:06.710862|1|Bad Moderation +unsafe.space|2019-12-11 18:44:06.710862|0|Fascism – Hatespeech +vampire.estate|2019-12-11 18:44:06.710862|1|Pedo – Loli +video.halle-leaks.de|2019-12-11 18:44:06.710862|1| +vipgirlfriend.xxx|2019-12-11 18:44:06.710862|1|Unflagged porn +voluntaryism.club|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +wagesofsinisdeath.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +waifu.social|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +warc.space|2019-12-11 18:44:06.710862|0| +weeaboo.space|2019-12-11 18:44:06.710862|1|Pedo – Loli +weedis.life|2019-12-11 18:44:06.710862|0| +wogan.im|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +woofer.alfter.us|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech +wrongthink.net|2019-12-11 18:44:06.710862|0|Fascism – Hatespeech +wxw.moe|2019-12-11 18:44:06.710862|1|Pedo – Loli +yiff.rocks|2019-12-11 18:44:06.710862|1|Harassment or abuse +youkai.town|2019-12-11 18:44:06.710862|1|Pedo – Loli +zerohack.xyz|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech diff --git a/crawler/crawler.php b/web/admin/crawler/crawler.php similarity index 74% rename from crawler/crawler.php rename to web/admin/crawler/crawler.php index af550b7..78cf9d2 100755 --- a/crawler/crawler.php +++ b/web/admin/crawler/crawler.php @@ -27,7 +27,8 @@ $opts=array( 'onlinecheck'=>true, 'timeout'=>5, 'biglistfp'=>null, - 'prodlistfp'=>null + 'prodlistfp'=>null, + 'blacklists'=>array() ); $help='DESCRIZIONE @@ -64,13 +65,11 @@ OPZIONI bloccate] Ogni riga vuota o che cominci con il carattere «#» sarà ignorata. Il formato del file delle istanze bloccate è questo: per ogni riga: - ||(Silenziata|Sospesa)| - [riferimento al motivo del blocco. - Le prime 4 righe del file saranno ignorate, così come le righe che non - corrispondessero al formato di cui sopra. - In futuro utilizzeremo un altro formato, per ora ci stiamo adeguando - a quello impiegato da mastodon.bida.im per la sua lista di istanze - bloccate. + |||| + + Esempio di : «2019-12-11 18:44:06.710862» + : «0» per silenziata, «1» per sospesa, «2» per "solo + file media e rapporti". -b, --biglistfp DEVE essere specificata. Imposta il file da cui leggere le istanze già testate in passato @@ -81,6 +80,13 @@ OPZIONI Imposta il file da cui leggere (se esiste) e in cui scrivere i dati relativi alle istanze corrispondenti ai criteri di selezione descritti nel paragrafo «DESCRIZIONE». + -B, --blacklistfp + Imposta un eventuale file di istanze bloccate aggiuntivo. Per il + formato di questi file vedi sopra il paragrafo relativo nella + descrizione dell\'opzione «-s, --startinstancesfp». Questa opzione può + essere utilizzata più volte per specificare più file di istanze + bloccate. Nel file di configurazione è l\'unica opzione con un formato + particolare: «blacklistfp=file1[,file2[,file3[...]]]. -t, --timeout Imposta il timeout delle richieste http(s) in secondi. DEFAULT: '.$opts['timeout'].' secondi. @@ -158,15 +164,19 @@ if (!is_null($inifp)) { $buf=@parse_ini_file($inifp); if ($buf!==false) { foreach ($buf as $key=>$val) { - if (array_key_exists($key,$opts)) - $opts[$key]=$val; - else + if (array_key_exists($key,$opts)) { + if ($key=='excludeafter') { + $opts['excludeafter']=tosec($val); + if ($opts['excludeafter']===false) + mexit('L’opzione «excludeafter» specificata in «'.$inifp.'» non è in un formato corretto (usa «-h» per vedere la guida).'.N,1); + } elseif ($key=='blacklists') { + $opts['blacklists']=explode(',',$val); + } else { + $opts[$key]=$val; + } + } else { echo('Attenzione: l’opzione «'.$key.'» in «'.$inifp.'» è sconosciuta e sarà ignorata.'.N); - } - if (array_key_exists('excludeafter',$opts)) { - $opts['excludeafter']=tosec($opts['excludeafter']); - if ($opts['excludeafter']===false) - mexit('L’opzione «excludeafter» specificata in «'.$inifp.'» non è in un formato corretto (usa «-h» per vedere la guida).'.N,1); + } } } else { mexit('Attenzione: non ho potuto leggere la configurazione dal file «'.$inifp.'».'.N,1); @@ -216,6 +226,13 @@ for ($i=1; $i<$argc; $i++) { $i++; $opts['startinstancesfp']=$argv[$i]; break; + case '-B': + case '--blacklistfp': + if ($i+1>=$argc || $argv[$i+1]=='') + mexit('L’opzione «'.$argv[$i].'» richiede un parametro di tipo file (usa «-h» per vedere la guida).'.N,1); + $i++; + $opts['blacklists'][]=$argv[$i]; + break; case '-l': case '--loadbiglist': if ($i+1>=$argc || ($argv[$i+1]!='si' && $argv[$i+1]!='no')) @@ -276,6 +293,27 @@ if ($opts['loadbiglist']) { $blinstances=array(); +function loadblacklist($bluri) { + global $blinstances, $opts; + $context=stream_context_create(array('http'=>array('timeout'=>$opts['timeout']))); + $f=@fopen($bluri,'r',false,$context); + if ($f!==false) { + $i=0; + while (!feof($f)) { + $lin=fgets($f); +//bsd.moe|2019-12-11 18:44:06.710862|1|https://mastodon.bida.im/@Ca_Gi/101270762003908554 + if (preg_match('/^([^#\|]{1}[^\|]+)\|{1}([^\|]+)\|{1}([012]{1})\|{1}(.*)$/',$lin,$buf)===1 && !in_array($buf[1],$blinstances) ) { + $i++; + $blinstances[]=$buf[1]; + } + } + fclose($f); + echo('OK :-) (+'.$i.' istanze bloccate caricate; totale: '.count($blinstances).')'.N); + } else { + mexit('ERRORE :-('.N,1); + } +} + if ($opts['onlinecheck']) { $startinstances=array(); @@ -297,33 +335,23 @@ if ($opts['onlinecheck']) { if (count($startinstances)<1) mexit('Il file delle istanze di partenza «'.$opts['startinstancesfp'].'» non contiene alcuna voce, muoio.'.N,1); - $context=stream_context_create(array('http'=>array('timeout'=>$opts['timeout']))); - foreach ($startinstances as $dom=>$bluri) { if (!is_null($bluri)) { echo('Recupero la lista delle istanze bloccate da «'.$dom.'» («'.$bluri.'») ... '); - $f=@fopen($bluri,'r',false,$context); - if ($f!==false) { - // le prime 4 righe non ci interessano - for ($i=0; $i<4; $i++) - fgets($f); - while (!feof($f)) { - $lin=fgets($f); - if (preg_match('/^\|([^\|]*)\|([^\|]*)\|([^\|]*)\|$/',$lin,$buf)===1) - $blinstances[]=$buf[1]; - } - fclose($f); - echo('OK :-)'.N); - } else { - echo('ERRORE :-('.N); - } + loadblacklist($bluri); } else { echo('NON recupero la lista delle istanze bloccate da «'.$dom.'»: la uri della stessa non è definita.'.N); } } - ksort($blinstances); + foreach ($opts['blacklists'] as $bluri) { + echo('Carico le istanze bloccate dall\'URI «'.$bluri.'» ... '); + loadblacklist($bluri); + } + sort($blinstances); echo(count($blinstances).' istanze bloccate.'.N); + $context=stream_context_create(array('http'=>array('timeout'=>$opts['timeout']))); + foreach ($startinstances as $dom=>$bluri) { if (!array_key_exists($dom,$biglist)) $biglist[$dom]=null; @@ -359,8 +387,25 @@ $newbiglist=array(); $i=0; $biglistc=count($biglist); foreach ($biglist as $dom=>$oinfo) { + $i++; + echo('~~~~~~'.N); if ($opts['onlinecheck']) { - echo('Recupero le informazioni su «'.$dom.'» ('.($i+1).'/'.$biglistc.' - '.round(100/$biglistc*$i).'%) ... '); + + echo('Recupero le informazioni Nodeinfo su «'.$dom.'» ('.$i.'/'.$biglistc.' - '.round(100/$biglistc*$i).'%) ... '); + $buf=@file_get_contents('https://'.$dom.'/nodeinfo/2.0',false,$context); + if ($buf!==false) { + echo('OK :-)'.N); + $ninfo=json_decode($buf,true); + if (!is_null($ninfo) && array_key_exists('software',$ninfo) && array_key_exists('name',$ninfo['software'])) { + echo('Il software è '.$ninfo['software']['name'].'!'.N); + } else { + echo('Impossibile determinare il software!'.N); + } + } else { + echo('ERRORE :-('.N); + } + + echo('Recupero le informazioni API su «'.$dom.'» ('.$i.'/'.$biglistc.' - '.round(100/$biglistc*$i).'%) ... '); $buf=@file_get_contents('https://'.$dom.'/api/v1/instance',false,$context); if ($buf!==false) { echo('OK :-)'.N); @@ -386,19 +431,41 @@ foreach ($biglist as $dom=>$oinfo) { } $info=$oinfo; } - $i++; } else { $info=$oinfo; } - if (!is_null($info) - && !in_array($dom,$blinstances) - && array_key_exists('registrations',$info) && $info['registrations']==true - && array_key_exists('stats',$info) && array_key_exists('user_count',$info['stats']) && $info['stats']['user_count']>10 && $info['stats']['user_count']<=30000 - && array_key_exists('domain_count',$info['stats']) && $info['stats']['domain_count']>=500 - && array_key_exists('status_count',$info['stats']) && $info['stats']['status_count']/$info['stats']['user_count']>=10 -/* && array_key_exists('contact_account',$info) && array_key_exists('created_at',$info['contact_account']) - && time()-strtotime($info['contact_account']['created_at'])>=6*30*24*60*60*/ - ) { + $whynot=array(); + if (in_array($dom,$blinstances)) + $whynot[]='Istanza blacklistata'; + if (array_key_exists('uri',$info)) { + if (!array_key_exists('registrations',$info)) + $whynot[]='Stato delle registrazioni non disponibile'; + elseif ($info['registrations']==false) + $whynot[]='Registrazioni chiuse'; + if (!array_key_exists('stats',$info)) + $whynot[]='Stats non disponibili'; + elseif (!array_key_exists('user_count',$info['stats'])) + $whynot[]='Numero utenti non disponibile'; + elseif ($info['stats']['user_count']<10 || $info['stats']['user_count']>30000) + $whynot[]='Numero utenti ('.$info['stats']['user_count'].') non compreso tra 10 e 30000'; + elseif (!array_key_exists('domain_count',$info['stats'])) + $whynot[]='Numero istanze conosciute non disponibile'; + elseif ($info['stats']['domain_count']<500) + $whynot[]='Numero istanze conosciute minore di 500'; + elseif (!array_key_exists('status_count',$info['stats'])) + $whynot[]='Numero di toots non disponibile'; + elseif ($info['stats']['status_count']/$info['stats']['user_count']<10) + $whynot[]='Media dei toots per utente minore di 10'; + if (!array_key_exists('contact_account',$info) || is_null($info['contact_account'])) + $whynot[]='Informazioni sull\'account admin principale non disponibili'; + elseif (!array_key_exists('created_at',$info['contact_account'])) + $whynot[]='Data di creazione dell\'account admin principale non disponibile'; + elseif (time()-strtotime($info['contact_account']['created_at'])<6*31*24*60*60) + $whynot[]='L\'account admin principale risulta esser stato creato meno di 6 mesi fa'; + } elseif (!array_key_exists($dom,$prodlist)) { + $whynot[]='Info non disponibili, e l\'istanza non era già presente nella lista delle istanze occhei'; + } + if (count($whynot)==0) { if (array_key_exists($dom,$prodlist)) { if (array_key_exists('short_description',$info) && (!array_key_exists('short_description',$prodlist[$dom]) || $prodlist[$dom]['short_description']!=$info['short_description'])) { $info['short_description_changed']=true; @@ -430,6 +497,7 @@ foreach ($biglist as $dom=>$oinfo) { } else { echo('«'.$dom.'» non era nella lista delle istanze occhei e NON CI È ENTRATA! :-('.N); } + echo('Motivazioni: '.implode('; ',$whynot).'.'.N); } } diff --git a/web/admin/crawler/crawler_pant.ini b/web/admin/crawler/crawler_pant.ini new file mode 100644 index 0000000..cdf9bea --- /dev/null +++ b/web/admin/crawler/crawler_pant.ini @@ -0,0 +1,8 @@ +excludeafter=1M +startinstancesfp=istanzesorelle_pant +loadbiglist=true +onlinecheck=true +timeout=5 +biglistfp=biglist_pant.json +prodlistfp=prodlist_pant.json +blacklists=blacklist_cagi.txt diff --git a/web/admin/crawler/crawler_sorellanza.ini b/web/admin/crawler/crawler_sorellanza.ini new file mode 100644 index 0000000..6760660 --- /dev/null +++ b/web/admin/crawler/crawler_sorellanza.ini @@ -0,0 +1,8 @@ +excludeafter=1M +startinstancesfp=istanzesorelle +loadbiglist=true +onlinecheck=true +timeout=5 +biglistfp=biglist.json +prodlistfp=prodlist.json +blacklists=blacklist_cagi.txt diff --git a/web/admin/crawler/istanzesorelle b/web/admin/crawler/istanzesorelle new file mode 100644 index 0000000..06acaac --- /dev/null +++ b/web/admin/crawler/istanzesorelle @@ -0,0 +1,5 @@ +mastodon.bida.im|blacklist_bida.txt +mastodon.cisti.org| +nebbia.fail| +stereodon.social| +snapj.saja.freemyip.com| diff --git a/web/admin/crawler/istanzesorelle_pant b/web/admin/crawler/istanzesorelle_pant new file mode 100644 index 0000000..8b56c12 --- /dev/null +++ b/web/admin/crawler/istanzesorelle_pant @@ -0,0 +1 @@ +pantagruel.dnsup.net|blacklist_bida.txt diff --git a/web/theme.css b/web/theme.css index 69d2c7c..fb88d59 100644 --- a/web/theme.css +++ b/web/theme.css @@ -205,7 +205,7 @@ a:active { #summcont { background-color: rgba(0,0,0,0.85); position: fixed; - width: 200pt; + width: 300pt; height: calc(100% - 40px); top: 40px; left: 0;