pezcurrel 4 years ago
parent
commit
353ad24504

+ 0 - 40
crawler/bidablock.txt

@@ -1,40 +0,0 @@
-## Istanze silenziate e sospese in Mastodon.bida.im
-
-| Istanza  |      Stato      |  Motivo  |
-|----------|:-------------:|------:|
-|toot.love|Silenziata||
-|switter.at|Silenziata|https://mastodon.bida.im/@jops/101404791975700441|
-|freezepeach.xyz|Sospesa||
-|ediot.social|Sospesa||
-|anitwitter.com|Sospesa||
-|gs.smuglo.li|Sospesa||
-|pawoo.net|Sospesa||
-|unsafe.space|Sospesa||
-|woofer.alfter.us|Sospesa||
-|preteengirls.biz|Sospesa||
-|shitposter.club|Silenziata||
-|social.heldscal.la|Sospesa||
-|gorf.club|Sospesa||
-|sealion.club|Silenziata||
-|baraag.net|Sospesa||
-|social.imirhil.fr|Sospesa||
-|wrongthink.net|Sospesa||
-|porntoot.com|Silenziata||
-|anitwitter.moe|Sospesa||
-|social.targaryen.house|Silenziata||
-|noagendasocial.com|Silenziata||
-|social.au2pb.net|Sospesa||
-|ika.moe|Sospesa||
-|newjack.city|Sospesa||
-|pl.smuglo.li|Sospesa||
-|freespeechextremist.com|Sospesa||
-|babymetal.party|Sospesa||
-|mobile.co|Sospesa|https://mastodon.bida.im/@Ca_Gi/101355947506820592|
-|mastodon.starrevolution.org|Sospesa|https://a.nom.pl/notice/450131|
-|humblr.social|Silenziata|https://mastodon.bida.im/@cirku17/101399587014096355|
-|quodverum.com|Sospesa|https://mastodon.bida.im/@Ca_Gi/101514801964087604|
-|neckbeard.xyz|Sospesa|Instance suspended: neckbeard.xyz - anime nazi shit, irony bro admin|
-|social.quodverum.com|Sospesa|Razzisti|
-|freespeech.firedragonstudios.com|Sospesa|https://mastodon.bida.im/@Ca_Gi/101344114624456297|
-|shitposter.club|Sospesa|https://mastodon.bida.im/@Ca_Gi/101270762003908554|
-|bsd.moe|Sospesa|https://mastodon.bida.im/@Ca_Gi/101270762003908554|

+ 0 - 5
crawler/istanzesorelle

@@ -1,5 +0,0 @@
-mastodon.bida.im|https://git.lattuga.net/bida/suspended-silenced-instances/raw/master/README.md
-mastodon.cisti.org|
-nebbia.fail|
-stereodon.social|
-snapj.saja.freemyip.com|

+ 52 - 19
domain_blocks_dump/domain_blocks_dump.sh → mastblocksdump/mastblocksdump.sh

@@ -16,21 +16,36 @@ MASTENVFP="$MASTHOME/live/.env.production"
 DOMBLOCKSFP="$MASTHOME/live/public/domain_blocks.txt"
 
 HELP="SINTASSI
- domain_blocks_dump.sh [opzioni]
+ mastblocksdump.sh [opzioni]
 DESCRIZIONE
  Questo script rimedia all'attuale mancanza, in mastodon, di un endpoint
  dell'API che ritorni una lista dei domini bloccati da un'istanza in un
  formato univoco e parsabile. Legge i dati necessari alla connessione al
- db di mastodon dal file di configurazione di mastodon, esegue tramite
- psql una query sulla tabella domain_blocks e ne salva i risultati in un
- file di testo pubblicamente accessibile
- (da https://[dominio]/domain_blocks.txt).
- È pensato per essere eseguito periodicamente da un cron job.
+ db di mastodon dal file di configurazione di mastodon, per default
+ \"$MASTENVFP\"
+ poi esegue tramite psql una query sulla tabella domain_blocks
+ e ne salva i risultati in un file di testo, per default
+ \"$DOMBLOCKSFP\"
+ che sarà accessibile da \"https://[dominio]/domain_blocks.txt\".
+ È pensato per essere eseguito periodicamente da un cron job, come
+ utente mastodon oppure root oppure altro utente che abbia accesso in
+ lettura al file di configurazione di mastodon e in scrittura a quello
+ in cui scrivere la lista delle istanze bloccate.
 OPZIONI
  -H, --home
   Definisce la home di mastodon (per default \"$MASTHOME\")
-  e di conseguenza la posizione del suo file di configurazione
-  (per default \"$MASTENVFP\") e del file in cui scrivere la
+  e di conseguenza il percorso del suo file di configurazione
+  (per default \"$MASTENVFP\") e del file
+  in cui scrivere la lista delle istanze bloccate (per default
+  \"$DOMBLOCKSFP\").
+  È comunque possibile specificare individualmente il percorso
+  del file di configurazione di mastodon e di quello in cui scrivere
+  la lista delle istanze bloccate con le due opzioni che seguono.
+ -e, --envfp
+  Definisce il percorso del file di configurazione di mastodon in uso.
+ -b, --blocksfp
+  Definisce il percorso del file in cui scrivere la lista delle istanze
+  bloccate.
  -h, --help
   Mostra questo aiuto ed esce."
 
@@ -40,12 +55,32 @@ while [ $i -lt ${#args[@]} ]; do
 	if [ "${args[$i]:0:1}" == "-" ]; then
 		case "${args[$i]}" in
 			"-H" | "--home" )
-				if [ -z "${args[$i+1]}" ] || [ ! -d "${args[$i+1]}" ]; then
-					echo "L'opzione \"${args[$i]}\" richiede un parametro di tipo directory (usa \"-h\" per l'aiuto)."
+				if [ -z "${args[$i+1]}" ]; then
+					echo "L'opzione \"${args[$i]}\" richiede un parametro (usa \"-h\" per l'aiuto)."
 					exit 1
 				else
 					((i++))
 					MASTHOME=$(echo "${args[$i]}" | sed -e 's/\/$//')
+					MASTENVFP="$MASTHOME/live/.env.production"
+					DOMBLOCKSFP="$MASTHOME/live/public/domain_blocks.txt"
+				fi
+			;;
+			"-e" | "--envfp" )
+				if [ -z "${args[$i+1]}" ]; then
+					echo "L'opzione \"${args[$i]}\" richiede un parametro (usa \"-h\" per l'aiuto)."
+					exit 1
+				else
+					((i++))
+					MASTENVFP="${args[$i]}"
+				fi
+			;;
+			"-b" | "--blocksfp" )
+				if [ -z "${args[$i+1]}" ]; then
+					echo "L'opzione \"${args[$i]}\" richiede un parametro (usa \"-h\" per l'aiuto)."
+					exit 1
+				else
+					((i++))
+					DOMBLOCKSFP="${args[$i]}"
 				fi
 			;;
 			"-h" | "--help" )
@@ -64,20 +99,18 @@ while [ $i -lt ${#args[@]} ]; do
 	((i++))
 done
 
-MASTENVFP="$MASTHOME/live/.env.production"
 [ ! -e "$MASTENVFP" ] && echo "\"$MASTENVFP\" non esiste, muoio (usa \"-h\" per l'aiuto)." && exit 1
 [ ! -f "$MASTENVFP" ] && echo "\"$MASTENVFP\" non è un file, muoio (usa \"-h\" per l'aiuto)." && exit 1
 [ ! -r "$MASTENVFP" ] && echo "\"$MASTENVFP\" non è leggibile, muoio (usa \"-h\" per l'aiuto)." && exit 1
 
-DOMBLOCKSFP="$MASTHOME/live/public/domain_blocks.txt"
-touch "$DOMBLOCKSFP" 2>/dev/null
-[ ! $? -eq 0 ] && echo "Non ho potuto creare/modificare il file \"$DOMBLOCKSFP\", muoio (usa \"-h\" per l'aiuto)." && exit 1
+DB_HOST=`grep 'DB_HOST' "$MASTENVFP"|sed -e 's/[^=]*=//'`
+DB_PORT=`grep 'DB_PORT' "$MASTENVFP"|sed -e 's/[^=]*=//'`
+DB_NAME=`grep 'DB_NAME' "$MASTENVFP"|sed -e 's/[^=]*=//'`
+DB_USER=`grep 'DB_USER' "$MASTENVFP"|sed -e 's/[^=]*=//'`
+DB_PASS=`grep 'DB_PASS' "$MASTENVFP"|sed -e 's/[^=]*=//'`
 
-DB_HOST=`cat "$MASTENVFP"|grep 'DB_HOST'|sed -e 's/[^=]*=//'`
-DB_PORT=`cat "$MASTENVFP"|grep 'DB_PORT'|sed -e 's/[^=]*=//'`
-DB_NAME=`cat "$MASTENVFP"|grep 'DB_NAME'|sed -e 's/[^=]*=//'`
-DB_USER=`cat "$MASTENVFP"|grep 'DB_USER'|sed -e 's/[^=]*=//'`
-DB_PASS=`cat "$MASTENVFP"|grep 'DB_PASS'|sed -e 's/[^=]*=//'`
+touch "$DOMBLOCKSFP" 2>/dev/null
+[ ! $? -eq 0 ] && echo "Non posso creare/modificare il file \"$DOMBLOCKSFP\", muoio (usa \"-h\" per l'aiuto)." && exit 1
 
 echo "# generato $(date -u)" > "$DOMBLOCKSFP"
 echo "# formato di output: dominio bloccato|data ultima modifica blocco|tipo blocco|commento pubblico" >> "$DOMBLOCKSFP"

+ 36 - 0
web/admin/crawler/blacklist_bida.txt

@@ -0,0 +1,36 @@
+anitwitter.com|2019-12-11 18:44:06.710862|1|
+anitwitter.moe|2019-12-11 18:44:06.710862|1|
+babymetal.party|2019-12-11 18:44:06.710862|1|
+baraag.net|2019-12-11 18:44:06.710862|1|
+bsd.moe|2019-12-11 18:44:06.710862|1|https://mastodon.bida.im/@Ca_Gi/101270762003908554
+ediot.social|2019-12-11 18:44:06.710862|1|
+freespeechextremist.com|2019-12-11 18:44:06.710862|1|
+freespeech.firedragonstudios.com|2019-12-11 18:44:06.710862|1|https://mastodon.bida.im/@Ca_Gi/101344114624456297
+freezepeach.xyz|2019-12-11 18:44:06.710862|1|
+gorf.club|2019-12-11 18:44:06.710862|1|
+gs.smuglo.li|2019-12-11 18:44:06.710862|1|
+humblr.social|2019-12-11 18:44:06.710862|0|https://mastodon.bida.im/@cirku17/101399587014096355
+ika.moe|2019-12-11 18:44:06.710862|1|
+mastodon.starrevolution.org|2019-12-11 18:44:06.710862|1|https://a.nom.pl/notice/450131
+mobile.co|2019-12-11 18:44:06.710862|1|https://mastodon.bida.im/@Ca_Gi/101355947506820592
+neckbeard.xyz|2019-12-11 18:44:06.710862|1|Instance suspended: neckbeard.xyz - anime nazi shit, irony bro admin
+newjack.city|2019-12-11 18:44:06.710862|1|
+noagendasocial.com|2019-12-11 18:44:06.710862|0|
+pawoo.net|2019-12-11 18:44:06.710862|1|
+pl.smuglo.li|2019-12-11 18:44:06.710862|1|
+porntoot.com|2019-12-11 18:44:06.710862|0|
+preteengirls.biz|2019-12-11 18:44:06.710862|1|
+quodverum.com|2019-12-11 18:44:06.710862|1|https://mastodon.bida.im/@Ca_Gi/101514801964087604
+sealion.club|2019-12-11 18:44:06.710862|0|
+shitposter.club|2019-12-11 18:44:06.710862|0|
+shitposter.club|2019-12-11 18:44:06.710862|1|https://mastodon.bida.im/@Ca_Gi/101270762003908554
+social.au2pb.net|2019-12-11 18:44:06.710862|1|
+social.heldscal.la|2019-12-11 18:44:06.710862|1|
+social.imirhil.fr|2019-12-11 18:44:06.710862|1|
+social.quodverum.com|2019-12-11 18:44:06.710862|1|Razzisti
+social.targaryen.house|2019-12-11 18:44:06.710862|0|
+switter.at|2019-12-11 18:44:06.710862|0|https://mastodon.bida.im/@jops/101404791975700441
+toot.love|2019-12-11 18:44:06.710862|0|
+unsafe.space|2019-12-11 18:44:06.710862|1|
+woofer.alfter.us|2019-12-11 18:44:06.710862|1|
+wrongthink.net|2019-12-11 18:44:06.710862|1|

+ 157 - 0
web/admin/crawler/blacklist_cagi.txt

@@ -0,0 +1,157 @@
+2.distsn.org|2019-12-11 18:44:06.710862|1|Spam
+2hu.club|2019-12-11 18:44:06.710862|1|Pedo – Loli
+anitwitter.com|2019-12-11 18:44:06.710862|1|
+anitwitter.moe|2019-12-11 18:44:06.710862|1|Bad Moderation
+ap.torlipen.net|2019-12-11 18:44:06.710862|1|
+ap.uwu.st|2019-12-11 18:44:06.710862|1|Harassment or abuse
+babymetal.party|2019-12-11 18:44:06.710862|1|
+baraag.net|2019-12-11 18:44:06.710862|1|Pedo – Loli
+beehub.org|2019-12-11 18:44:06.710862|0|
+blob.cat|2019-12-11 18:44:06.710862|1|
+bodybuilding.im|2019-12-11 18:44:06.710862|1|
+bofa.lol|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+bsd.moe|2019-12-11 18:44:06.710862|1|
+carnal-gabhub.protohype.net|2019-12-11 18:44:06.710862|1|
+civiq.social|2019-12-11 18:44:06.710862|1|
+cofe.moe|2019-12-11 18:44:06.710862|1|Bad Moderation
+comm.network|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+community.halle-leaks.de|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+community.highlandarrow.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+counter.social|2019-12-11 18:44:06.710862|1|Bad Moderation
+cryzed.com|2019-12-11 18:44:06.710862|0|
+cyzed.com|2019-12-11 18:44:06.710862|1|
+daffodil-11.org|2019-12-11 18:44:06.710862|1|Malicious site
+dev.civiq.social|2019-12-11 18:44:06.710862|1|
+develop.gab.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+dickshow.social|2019-12-11 18:44:06.710862|1|Bad Moderation
+djitter.com|2019-12-11 18:44:06.710862|1|
+ediot.social|2019-12-11 18:44:06.710862|0|Fascism – Hatespeech
+ediot.socialsilence|2019-12-11 18:44:06.710862|1|
+ekrem.develop.gab.com|2019-12-11 18:44:06.710862|1|
+exited.eu|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+explosion.party|2019-12-11 18:44:06.710862|1|Conspiracy theories
+fedichive.tk|2019-12-11 18:44:06.710862|1|
+feminism.lgbt|2019-12-11 18:44:06.710862|1|Pedo – Loli
+freefedifollowers.ga|2019-12-11 18:44:06.710862|1|
+freehold.earth|2019-12-11 18:44:06.710862|1|Bad Moderation
+freespeechextremist.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+freespeech.firedragonstudios.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+freespeech.host|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+freevoice.space|2019-12-11 18:44:06.710862|0|Fascism – Hatespeech
+freezepeach.xyz|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+gab.ai|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+gabble.xyz|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+gab.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+gabfed.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+gab.io|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+gab.polaris-1.work|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+gab.protohype.net|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+gab.sleek.eu|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+gameliberty.club|2019-12-11 18:44:06.710862|1|Pedo – Loli
+gasthe.lgbt|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+gnusocial.no|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+goldandblack.xyz|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+gorf.club|2019-12-11 18:44:06.710862|0|Fascism – Hatespeech
+gorf.club|2019-12-11 18:44:06.710862|1|
+gs.archae.me|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+gs.kawa-kun.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+gs.mon5t3r.info|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+gs.smuglo.li|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+hakui.club|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+homura.space|2019-12-11 18:44:06.710862|1|Harassment or abuse
+ika.moe|2019-12-11 18:44:06.710862|1|Bad Moderation
+impeccable.social|2019-12-11 18:44:06.710862|1|
+inditoot.com|2019-12-11 18:44:06.710862|1|Bad Moderation
+jabb.in|2019-12-11 18:44:06.710862|1|Conspiracy theories
+juche.town|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+karolat.press|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+kawaiistu.moe|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+kawen.space|2019-12-11 18:44:06.710862|1|
+kazvam.com|2019-12-11 18:44:06.710862|1|
+kipper.im|2019-12-11 18:44:06.710862|1|Bad Moderation
+kiwifarms.cc|2019-12-11 18:44:06.710862|0|
+kneegrows.top|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+kowai.youkai.town|2019-12-11 18:44:06.710862|1|Pedo – Loli
+kyot.me|2019-12-11 18:44:06.710862|1|Bad Moderation
+liberdon.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+libertarianism.club|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+libre.tube|2019-12-11 18:44:06.710862|0|
+loli.estate|2019-12-11 18:44:06.710862|1|Pedo – Loli
+lolis.world|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+manx.social|2019-12-11 18:44:06.710862|1|Bad Moderation
+mastodon.loliandstuff.moe|2019-12-11 18:44:06.710862|1|Pedo – Loli
+mastodon.starrevolution.org|2019-12-11 18:44:06.710862|1|
+mast.wholemars.com|2019-12-11 18:44:06.710862|0|
+melalandia.tk|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+me.nooruul.xyz|2019-12-11 18:44:06.710862|1|
+mobile.co|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+neckbeard.xyz|2019-12-11 18:44:06.710862|1|
+neenster.org|2019-12-11 18:44:06.710862|1|
+newjack.city|2019-12-11 18:44:06.710862|1|Spam
+niu.moe|2019-12-11 18:44:06.710862|1|Bad Moderation
+noagendasocial.com|2019-12-11 18:44:06.710862|0|Fascism – Hatespeech
+not-develop.gab.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+not.phrack.fyi|2019-12-11 18:44:06.710862|1|Conspiracy theories
+npf.mlpol.net|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+pawoo.net|2019-12-11 18:44:06.710862|1|Pedo – Loli
+pleroma.cucked.me|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+pleroma.rareome.ga|2019-12-11 18:44:06.710862|1|Malicious site
+pleroma.soykaf.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+pleroma.wolfie.pw|2019-12-11 18:44:06.710862|1|
+pleroma.yorha.club|2019-12-11 18:44:06.710862|1|Harassment or abuse
+pleville.net|2019-12-11 18:44:06.710862|0|
+pl.smuglo.li|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+porntoot.com|2019-12-11 18:44:06.710862|1|Pedo – Loli
+preteen.biz|2019-12-11 18:44:06.710862|1|Pedo – Loli
+preteengirls.bi|2019-12-11 18:44:06.710862|1|Pedo – Loli
+preteengirls.biz|2019-12-11 18:44:06.710862|1|Pedo – Loli
+pridelands.io|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+qoto.org|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+quey.org|2019-12-11 18:44:06.710862|0|
+quitter.pw|2019-12-11 18:44:06.710862|1|
+quodverum.com|2019-12-11 18:44:06.710862|1|
+rainbowdash.net|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+rapefeminists.network|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+sealion.club|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+shitasstits.life|2019-12-11 18:44:06.710862|1|Pedo – Loli
+shitposter.club|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+social.allthefallen.ninja|2019-12-11 18:44:06.710862|1|Pedo – Loli
+social.au2pb.net|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+social.guizzyordi.info|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+social.heldscal.la|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+social.hidamari.blue|2019-12-11 18:44:06.710862|0|
+social.homunyan.com|2019-12-11 18:44:06.710862|0|
+social.i2p.rocks|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+social.imirhil.fr|2019-12-11 18:44:06.710862|1|
+social.louisoft01.moe|2019-12-11 18:44:06.710862|1|Conspiracy theories
+social.lucci.xyz|2019-12-11 18:44:06.710862|1|Conspiracy theories
+social.quodverum.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+social.raptorengineering.io|2019-12-11 18:44:06.710862|1|Advertisement
+social.sunshinegardens.org|2019-12-11 18:44:06.710862|1|Harassment or abuse
+social.super-niche.club|2019-12-11 18:44:06.710862|1|Pedo – Loli
+social.targaryen.house|2019-12-11 18:44:06.710862|0|Fascism – Hatespeech
+social.wiuwiu.de|2019-12-11 18:44:06.710862|1|Bad Moderation
+socnet.supes.com|2019-12-11 18:44:06.710862|1|Bad Moderation
+spinster.dev|2019-12-11 18:44:06.710862|1|
+spinster.xyz|2019-12-11 18:44:06.710862|1|
+sunshinegardens.org|2019-12-11 18:44:06.710862|1|Harassment or abuse
+thechad.zone|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+the.hedgehoghunter.club|2019-12-11 18:44:06.710862|0|
+toot.love|2019-12-11 18:44:06.710862|1|Bad Moderation
+unsafe.space|2019-12-11 18:44:06.710862|0|Fascism – Hatespeech
+vampire.estate|2019-12-11 18:44:06.710862|1|Pedo – Loli
+video.halle-leaks.de|2019-12-11 18:44:06.710862|1|
+vipgirlfriend.xxx|2019-12-11 18:44:06.710862|1|Unflagged porn
+voluntaryism.club|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+wagesofsinisdeath.com|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+waifu.social|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+warc.space|2019-12-11 18:44:06.710862|0|
+weeaboo.space|2019-12-11 18:44:06.710862|1|Pedo – Loli
+weedis.life|2019-12-11 18:44:06.710862|0|
+wogan.im|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+woofer.alfter.us|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech
+wrongthink.net|2019-12-11 18:44:06.710862|0|Fascism – Hatespeech
+wxw.moe|2019-12-11 18:44:06.710862|1|Pedo – Loli
+yiff.rocks|2019-12-11 18:44:06.710862|1|Harassment or abuse
+youkai.town|2019-12-11 18:44:06.710862|1|Pedo – Loli
+zerohack.xyz|2019-12-11 18:44:06.710862|1|Fascism – Hatespeech

+ 113 - 45
crawler/crawler.php → web/admin/crawler/crawler.php

@@ -27,7 +27,8 @@ $opts=array(
 	'onlinecheck'=>true,
 	'timeout'=>5,
 	'biglistfp'=>null,
-	'prodlistfp'=>null
+	'prodlistfp'=>null,
+	'blacklists'=>array()
 );
 
 $help='DESCRIZIONE
@@ -64,13 +65,11 @@ OPZIONI
   bloccate]
   Ogni riga vuota o che cominci con il carattere «#» sarà ignorata.
   Il formato del file delle istanze bloccate è questo: per ogni riga:
-  |<uri della istanza bloccata>|<tipo di blocco>(Silenziata|Sospesa)|
-  [riferimento al motivo del blocco.
-  Le prime 4 righe del file saranno ignorate, così come le righe che non
-  corrispondessero al formato di cui sopra.
-  In futuro utilizzeremo un altro formato, per ora ci stiamo adeguando
-  a quello impiegato da mastodon.bida.im per la sua lista di istanze
-  bloccate.
+  |<uri della istanza bloccata>|<data del blocco>|<tipo di blocco>|
+  <motivazione pubblica del blocco>
+  Esempio di <data del blocco>: «2019-12-11 18:44:06.710862»
+  <tipo di blocco>: «0» per silenziata, «1» per sospesa, «2» per "solo
+  file media e rapporti".
  -b, --biglistfp <file>
   DEVE essere specificata.
   Imposta il file da cui leggere le istanze già testate in passato
@@ -81,6 +80,13 @@ OPZIONI
   Imposta il file da cui leggere (se esiste) e in cui scrivere i dati
   relativi alle istanze corrispondenti ai criteri di selezione descritti
   nel paragrafo «DESCRIZIONE».
+ -B, --blacklistfp
+  Imposta un eventuale file di istanze bloccate aggiuntivo. Per il
+  formato di questi file vedi sopra il paragrafo relativo nella
+  descrizione dell\'opzione «-s, --startinstancesfp». Questa opzione può
+  essere utilizzata più volte per specificare più file di istanze
+  bloccate. Nel file di configurazione è l\'unica opzione con un formato
+  particolare: «blacklistfp=file1[,file2[,file3[...]]].
  -t, --timeout <secondi>
   Imposta il timeout delle richieste http(s) in secondi.
   DEFAULT: '.$opts['timeout'].' secondi.
@@ -158,15 +164,19 @@ if (!is_null($inifp)) {
 	$buf=@parse_ini_file($inifp);
 	if ($buf!==false) {
 		foreach ($buf as $key=>$val) {
-			if (array_key_exists($key,$opts))
-				$opts[$key]=$val;
-			else
+			if (array_key_exists($key,$opts)) {
+				if ($key=='excludeafter') {
+					$opts['excludeafter']=tosec($val);
+					if ($opts['excludeafter']===false)
+						mexit('L’opzione «excludeafter» specificata in «'.$inifp.'» non è in un formato corretto (usa «-h» per vedere la guida).'.N,1);
+				} elseif ($key=='blacklists') {
+					$opts['blacklists']=explode(',',$val);
+				} else {
+					$opts[$key]=$val;
+				}
+			} else {
 				echo('Attenzione: l’opzione «'.$key.'» in «'.$inifp.'» è sconosciuta e sarà ignorata.'.N);
-		}
-		if (array_key_exists('excludeafter',$opts)) {
-			$opts['excludeafter']=tosec($opts['excludeafter']);
-			if ($opts['excludeafter']===false)
-				mexit('L’opzione «excludeafter» specificata in «'.$inifp.'» non è in un formato corretto (usa «-h» per vedere la guida).'.N,1);
+			}
 		}
 	} else {
 		mexit('Attenzione: non ho potuto leggere la configurazione dal file «'.$inifp.'».'.N,1);
@@ -216,6 +226,13 @@ for ($i=1; $i<$argc; $i++) {
 			$i++;
 			$opts['startinstancesfp']=$argv[$i];
 			break;
+			case '-B':
+			case '--blacklistfp':
+			if ($i+1>=$argc || $argv[$i+1]=='')
+				mexit('L’opzione «'.$argv[$i].'» richiede un parametro di tipo file (usa «-h» per vedere la guida).'.N,1);
+			$i++;
+			$opts['blacklists'][]=$argv[$i];
+			break;
 			case '-l':
 			case '--loadbiglist':
 			if ($i+1>=$argc || ($argv[$i+1]!='si' && $argv[$i+1]!='no'))
@@ -276,6 +293,27 @@ if ($opts['loadbiglist']) {
 
 $blinstances=array();
 
+function loadblacklist($bluri) {
+	global $blinstances, $opts;
+	$context=stream_context_create(array('http'=>array('timeout'=>$opts['timeout'])));
+	$f=@fopen($bluri,'r',false,$context);
+	if ($f!==false) {
+		$i=0;
+		while (!feof($f)) {
+			$lin=fgets($f);
+//bsd.moe|2019-12-11 18:44:06.710862|1|https://mastodon.bida.im/@Ca_Gi/101270762003908554
+			if (preg_match('/^([^#\|]{1}[^\|]+)\|{1}([^\|]+)\|{1}([012]{1})\|{1}(.*)$/',$lin,$buf)===1 && !in_array($buf[1],$blinstances) ) {
+				$i++;
+				$blinstances[]=$buf[1];
+			}
+		}
+		fclose($f);
+		echo('OK :-) (+'.$i.' istanze bloccate caricate; totale: '.count($blinstances).')'.N);
+	} else {
+		mexit('ERRORE :-('.N,1);
+	}
+}
+
 if ($opts['onlinecheck']) {
 
 	$startinstances=array();
@@ -297,33 +335,23 @@ if ($opts['onlinecheck']) {
 	if (count($startinstances)<1)
 		mexit('Il file delle istanze di partenza «'.$opts['startinstancesfp'].'» non contiene alcuna voce, muoio.'.N,1);
 
-	$context=stream_context_create(array('http'=>array('timeout'=>$opts['timeout'])));
-
 	foreach ($startinstances as $dom=>$bluri) {
 		if (!is_null($bluri)) {
 			echo('Recupero la lista delle istanze bloccate da «'.$dom.'» («'.$bluri.'») ... ');
-			$f=@fopen($bluri,'r',false,$context);
-			if ($f!==false) {
-	// le prime 4 righe non ci interessano
-				for ($i=0; $i<4; $i++)
-					fgets($f);
-				while (!feof($f)) {
-					$lin=fgets($f);
-					if (preg_match('/^\|([^\|]*)\|([^\|]*)\|([^\|]*)\|$/',$lin,$buf)===1)
-						$blinstances[]=$buf[1];
-				}
-				fclose($f);
-				echo('OK :-)'.N);
-			} else {
-				echo('ERRORE :-('.N);
-			}
+			loadblacklist($bluri);
 		} else {
 			echo('NON recupero la lista delle istanze bloccate da «'.$dom.'»: la uri della stessa non è definita.'.N);
 		}
 	}
-	ksort($blinstances);
+	foreach ($opts['blacklists'] as $bluri) {
+		echo('Carico le istanze bloccate dall\'URI «'.$bluri.'» ... ');
+		loadblacklist($bluri);
+	}
+	sort($blinstances);
 	echo(count($blinstances).' istanze bloccate.'.N);
 
+	$context=stream_context_create(array('http'=>array('timeout'=>$opts['timeout'])));
+
 	foreach ($startinstances as $dom=>$bluri) {
 		if (!array_key_exists($dom,$biglist))
 			$biglist[$dom]=null;
@@ -359,8 +387,25 @@ $newbiglist=array();
 $i=0;
 $biglistc=count($biglist);
 foreach ($biglist as $dom=>$oinfo) {
+	$i++;
+	echo('~~~~~~'.N);
 	if ($opts['onlinecheck']) {
-		echo('Recupero le informazioni su «'.$dom.'» ('.($i+1).'/'.$biglistc.' - '.round(100/$biglistc*$i).'%) ... ');
+
+		echo('Recupero le informazioni Nodeinfo su «'.$dom.'» ('.$i.'/'.$biglistc.' - '.round(100/$biglistc*$i).'%) ... ');
+		$buf=@file_get_contents('https://'.$dom.'/nodeinfo/2.0',false,$context);
+		if ($buf!==false) {
+			echo('OK :-)'.N);
+			$ninfo=json_decode($buf,true);
+			if (!is_null($ninfo) && array_key_exists('software',$ninfo) && array_key_exists('name',$ninfo['software'])) {
+				echo('Il software è '.$ninfo['software']['name'].'!'.N);
+			} else {
+				echo('Impossibile determinare il software!'.N);
+			}
+		} else {
+			echo('ERRORE :-('.N);
+		}
+
+		echo('Recupero le informazioni API su «'.$dom.'» ('.$i.'/'.$biglistc.' - '.round(100/$biglistc*$i).'%) ... ');
 		$buf=@file_get_contents('https://'.$dom.'/api/v1/instance',false,$context);
 		if ($buf!==false) {
 			echo('OK :-)'.N);
@@ -386,19 +431,41 @@ foreach ($biglist as $dom=>$oinfo) {
 			}
 			$info=$oinfo;
 		}
-		$i++;
 	} else {
 		$info=$oinfo;
 	}
-	if (!is_null($info)
-		&& !in_array($dom,$blinstances)
-		&& array_key_exists('registrations',$info) && $info['registrations']==true
-		&& array_key_exists('stats',$info) && array_key_exists('user_count',$info['stats']) && $info['stats']['user_count']>10 && $info['stats']['user_count']<=30000
-		&& array_key_exists('domain_count',$info['stats']) && $info['stats']['domain_count']>=500
-		&& array_key_exists('status_count',$info['stats']) && $info['stats']['status_count']/$info['stats']['user_count']>=10
-/*		&& array_key_exists('contact_account',$info) && array_key_exists('created_at',$info['contact_account'])
-		&& time()-strtotime($info['contact_account']['created_at'])>=6*30*24*60*60*/
-	) {
+	$whynot=array();
+	if (in_array($dom,$blinstances))
+		$whynot[]='Istanza blacklistata';
+	if (array_key_exists('uri',$info)) {
+		if (!array_key_exists('registrations',$info))
+			$whynot[]='Stato delle registrazioni non disponibile';
+		elseif ($info['registrations']==false)
+			$whynot[]='Registrazioni chiuse';
+		if (!array_key_exists('stats',$info))
+			$whynot[]='Stats non disponibili';
+		elseif (!array_key_exists('user_count',$info['stats']))
+			$whynot[]='Numero utenti non disponibile';
+		elseif ($info['stats']['user_count']<10 || $info['stats']['user_count']>30000)
+			$whynot[]='Numero utenti ('.$info['stats']['user_count'].') non compreso tra 10 e 30000';
+		elseif (!array_key_exists('domain_count',$info['stats']))
+			$whynot[]='Numero istanze conosciute non disponibile';
+		elseif ($info['stats']['domain_count']<500)
+			$whynot[]='Numero istanze conosciute minore di 500';
+		elseif (!array_key_exists('status_count',$info['stats']))
+			$whynot[]='Numero di toots non disponibile';
+		elseif ($info['stats']['status_count']/$info['stats']['user_count']<10)
+			$whynot[]='Media dei toots per utente minore di 10';
+		if (!array_key_exists('contact_account',$info) || is_null($info['contact_account']))
+			$whynot[]='Informazioni sull\'account admin principale non disponibili';
+		elseif (!array_key_exists('created_at',$info['contact_account']))
+			$whynot[]='Data di creazione dell\'account admin principale non disponibile';
+		elseif (time()-strtotime($info['contact_account']['created_at'])<6*31*24*60*60)
+			$whynot[]='L\'account admin principale risulta esser stato creato meno di 6 mesi fa';
+	} elseif (!array_key_exists($dom,$prodlist)) {
+		$whynot[]='Info non disponibili, e l\'istanza non era già presente nella lista delle istanze occhei';
+	}
+	if (count($whynot)==0) {
 		if (array_key_exists($dom,$prodlist)) {
 			if (array_key_exists('short_description',$info) && (!array_key_exists('short_description',$prodlist[$dom]) || $prodlist[$dom]['short_description']!=$info['short_description'])) {
 				$info['short_description_changed']=true;
@@ -430,6 +497,7 @@ foreach ($biglist as $dom=>$oinfo) {
 		} else {
 			echo('«'.$dom.'» non era nella lista delle istanze occhei e NON CI È ENTRATA! :-('.N);
 		}
+		echo('Motivazioni: '.implode('; ',$whynot).'.'.N);
 	}
 }
 

+ 8 - 0
web/admin/crawler/crawler_pant.ini

@@ -0,0 +1,8 @@
+excludeafter=1M
+startinstancesfp=istanzesorelle_pant
+loadbiglist=true
+onlinecheck=true
+timeout=5
+biglistfp=biglist_pant.json
+prodlistfp=prodlist_pant.json
+blacklists=blacklist_cagi.txt

+ 8 - 0
web/admin/crawler/crawler_sorellanza.ini

@@ -0,0 +1,8 @@
+excludeafter=1M
+startinstancesfp=istanzesorelle
+loadbiglist=true
+onlinecheck=true
+timeout=5
+biglistfp=biglist.json
+prodlistfp=prodlist.json
+blacklists=blacklist_cagi.txt

+ 5 - 0
web/admin/crawler/istanzesorelle

@@ -0,0 +1,5 @@
+mastodon.bida.im|blacklist_bida.txt
+mastodon.cisti.org|
+nebbia.fail|
+stereodon.social|
+snapj.saja.freemyip.com|

+ 1 - 0
web/admin/crawler/istanzesorelle_pant

@@ -0,0 +1 @@
+pantagruel.dnsup.net|blacklist_bida.txt

+ 1 - 1
web/theme.css

@@ -205,7 +205,7 @@ a:active {
 #summcont {
 	background-color: rgba(0,0,0,0.85);
 	position: fixed;
-	width: 200pt;
+	width: 300pt;
 	height: calc(100% - 40px);
 	top: 40px;
 	left: 0;