This commit is contained in:
pezcurrel 2020-10-13 15:46:45 +02:00
parent 59a2d3f98a
commit 80eb9cef2c
20 changed files with 8 additions and 274407 deletions

15
.gitignore vendored
View file

@ -1,13 +1,14 @@
/appunti.txt
/web/zzz-materiali/
/web/mustard/crawler/crawler.log
/web/mustard/crawler/currinst.job
/web/mustard/crawler/instances.job
/web/mustard/crawler/instances.json
/web/mustard/crawler/peers
/web/mustard/crawler/zzz-materiali/
/web/mustard/clitools/crawler.log
/web/mustard/clitools/currinst.job
/web/mustard/clitools/instances.job
/web/mustard/clitools/instances.json
/web/mustard/clitools/peers
/web/mustard/clitools/peers.all
/web/mustard/clitools/peerscrawl.log
/web/mustard/clitools/zzz-materiali/
/web/mustard/sec/mustard.ini
/web/mustard/zzz-estemp/
/web/mustard/zzz-materiali/mastostart_solo_struttura.sql.gz
/web/mustard/zzz-materiali/geonames/
/web/mustard/zzz-materiali/locales.json

View file

@ -1 +0,0 @@
a.nti.social 34 32 8

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,788 +0,0 @@
(
[uri] => a11y.info
[title] => a11y.info
[short_description] =>
[description] => An open community for those interested in learning more about digital accessibility.
[email] => spell@spellacy.net
[version] => 3.0.1
[urls] => Array
(
[streaming_api] => wss://a11y.info
)
[stats] => Array
(
[user_count] => 36
[status_count] => 263
[domain_count] => 2321
)
[thumbnail] => https://cf.mastohost.com/v1/AUTH_91eb37814936490c95da7b85993cc2ff/a11yinfo/site_uploads/files/000/000/001/original/p9HR75p9_400x400.jpg
[languages] => Array
(
[0] => en
)
[max_toot_chars] => 2048
[registrations] => 1
[approval_required] =>
[contact_account] => Array
(
[id] => 1
[username] => spellacy
[acct] => spellacy
[display_name] => Michael Spellacy (Spell)
[locked] =>
[bot] =>
[created_at] => 2018-08-20T13:24:38.839Z
[note] => <p>Spell is a web developer, designer and accessibility specialist living in beautiful Allentown, PA. Opinions own. He/Him</p>
[url] => https://a11y.info/@spellacy
[avatar] => https://cf.mastohost.com/v1/AUTH_91eb37814936490c95da7b85993cc2ff/a11yinfo/accounts/avatars/000/000/001/original/8b5b9f0cc3aa2868.jpg
[avatar_static] => https://cf.mastohost.com/v1/AUTH_91eb37814936490c95da7b85993cc2ff/a11yinfo/accounts/avatars/000/000/001/original/8b5b9f0cc3aa2868.jpg
[header] => https://cf.mastohost.com/v1/AUTH_91eb37814936490c95da7b85993cc2ff/a11yinfo/accounts/headers/000/000/001/original/20b5a0caf0069359.jpg
[header_static] => https://cf.mastohost.com/v1/AUTH_91eb37814936490c95da7b85993cc2ff/a11yinfo/accounts/headers/000/000/001/original/20b5a0caf0069359.jpg
[followers_count] => 98
[following_count] => 102
[statuses_count] => 197
[last_status_at] => 2019-12-16T17:44:21.756Z
[emojis] => Array
(
)
[fields] => Array
(
[0] => Array
(
[name] => Website
[value] => <a href="https://michaelspellacy.com" rel="me nofollow noopener" target="_blank"><span class="invisible">https://</span><span class="">michaelspellacy.com</span><span class="invisible"></span></a>
[verified_at] => 2019-08-02T01:31:55.195+00:00
)
)
)
[x-activity] => Array
(
[0] => Array
(
[week] => 1576454400
[statuses] => 1
[logins] => 6
[registrations] => 0
)
[1] => Array
(
[week] => 1575849600
[statuses] => 0
[logins] => 7
[registrations] => 0
)
[2] => Array
(
[week] => 1575244800
[statuses] => 6
[logins] => 5
[registrations] => 0
)
[3] => Array
(
[week] => 1574640000
[statuses] => 0
[logins] => 5
[registrations] => 0
)
[4] => Array
(
[week] => 1574035200
[statuses] => 0
[logins] => 5
[registrations] => 0
)
[5] => Array
(
[week] => 1573430400
[statuses] => 0
[logins] => 5
[registrations] => 0
)
[6] => Array
(
[week] => 1572825600
[statuses] => 5
[logins] => 5
[registrations] => 0
)
[7] => Array
(
[week] => 1572220800
[statuses] => 0
[logins] => 5
[registrations] => 0
)
[8] => Array
(
[week] => 1571616000
[statuses] => 0
[logins] => 4
[registrations] => 0
)
[9] => Array
(
[week] => 1571011200
[statuses] => 0
[logins] => 6
[registrations] => 0
)
[10] => Array
(
[week] => 1570406400
[statuses] => 0
[logins] => 4
[registrations] => 0
)
[11] => Array
(
[week] => 1569801600
[statuses] => 0
[logins] => 4
[registrations] => 0
)
)
[x-nodeinfo] => Array
(
[version] => 2.0
[software] => Array
(
[name] => mastodon
[version] => 3.0.1
)
[protocols] => Array
(
[0] => activitypub
)
[usage] => Array
(
[users] => Array
(
[total] => 36
[activeMonth] => 8
[activeHalfyear] => 11
)
[localPosts] => 263
)
[openRegistrations] => 1
)
[x-trends] => Array
(
[0] => Array
(
[name] => ニックネームの由来を教えてくれ
[url] => https://101010.pl/tags/%E3%83%8B%E3%83%83%E3%82%AF%E3%83%8D%E3%83%BC%E3%83%A0%E3%81%AE%E7%94%B1%E6%9D%A5%E3%82%92%E6%95%99%E3%81%88%E3%81%A6%E3%81%8F%E3%82%8C
[history] => Array
(
[0] => Array
(
[day] => 1576972800
[uses] => 0
[accounts] => 0
)
[1] => Array
(
[day] => 1576886400
[uses] => 0
[accounts] => 0
)
[2] => Array
(
[day] => 1576800000
[uses] => 0
[accounts] => 0
)
[3] => Array
(
[day] => 1576713600
[uses] => 0
[accounts] => 0
)
[4] => Array
(
[day] => 1576627200
[uses] => 0
[accounts] => 0
)
[5] => Array
(
[day] => 1576540800
[uses] => 0
[accounts] => 0
)
[6] => Array
(
[day] => 1576454400
[uses] => 0
[accounts] => 0
)
)
)
[1] => Array
(
[name] => jamiaprotest
[url] => https://101010.pl/tags/jamiaprotest
[history] => Array
(
[0] => Array
(
[day] => 1576972800
[uses] => 0
[accounts] => 0
)
[1] => Array
(
[day] => 1576886400
[uses] => 0
[accounts] => 0
)
[2] => Array
(
[day] => 1576800000
[uses] => 0
[accounts] => 0
)
[3] => Array
(
[day] => 1576713600
[uses] => 2
[accounts] => 2
)
[4] => Array
(
[day] => 1576627200
[uses] => 1
[accounts] => 1
)
[5] => Array
(
[day] => 1576540800
[uses] => 11
[accounts] => 5
)
[6] => Array
(
[day] => 1576454400
[uses] => 49
[accounts] => 11
)
)
)
[2] => Array
(
[name] => SOSJAMIA
[url] => https://101010.pl/tags/SOSJAMIA
[history] => Array
(
[0] => Array
(
[day] => 1576972800
[uses] => 0
[accounts] => 0
)
[1] => Array
(
[day] => 1576886400
[uses] => 0
[accounts] => 0
)
[2] => Array
(
[day] => 1576800000
[uses] => 0
[accounts] => 0
)
[3] => Array
(
[day] => 1576713600
[uses] => 0
[accounts] => 0
)
[4] => Array
(
[day] => 1576627200
[uses] => 0
[accounts] => 0
)
[5] => Array
(
[day] => 1576540800
[uses] => 0
[accounts] => 0
)
[6] => Array
(
[day] => 1576454400
[uses] => 1
[accounts] => 1
)
)
)
[3] => Array
(
[name] => jamia
[url] => https://101010.pl/tags/jamia
[history] => Array
(
[0] => Array
(
[day] => 1576972800
[uses] => 0
[accounts] => 0
)
[1] => Array
(
[day] => 1576886400
[uses] => 0
[accounts] => 0
)
[2] => Array
(
[day] => 1576800000
[uses] => 1
[accounts] => 1
)
[3] => Array
(
[day] => 1576713600
[uses] => 0
[accounts] => 0
)
[4] => Array
(
[day] => 1576627200
[uses] => 2
[accounts] => 2
)
[5] => Array
(
[day] => 1576540800
[uses] => 5
[accounts] => 4
)
[6] => Array
(
[day] => 1576454400
[uses] => 14
[accounts] => 7
)
)
)
[4] => Array
(
[name] => jamiamilia
[url] => https://101010.pl/tags/jamiamilia
[history] => Array
(
[0] => Array
(
[day] => 1576972800
[uses] => 0
[accounts] => 0
)
[1] => Array
(
[day] => 1576886400
[uses] => 0
[accounts] => 0
)
[2] => Array
(
[day] => 1576800000
[uses] => 0
[accounts] => 0
)
[3] => Array
(
[day] => 1576713600
[uses] => 2
[accounts] => 2
)
[4] => Array
(
[day] => 1576627200
[uses] => 0
[accounts] => 0
)
[5] => Array
(
[day] => 1576540800
[uses] => 5
[accounts] => 4
)
[6] => Array
(
[day] => 1576454400
[uses] => 15
[accounts] => 9
)
)
)
[5] => Array
(
[name] => wearenameless
[url] => https://101010.pl/tags/wearenameless
[history] => Array
(
[0] => Array
(
[day] => 1576972800
[uses] => 0
[accounts] => 0
)
[1] => Array
(
[day] => 1576886400
[uses] => 0
[accounts] => 0
)
[2] => Array
(
[day] => 1576800000
[uses] => 0
[accounts] => 0
)
[3] => Array
(
[day] => 1576713600
[uses] => 0
[accounts] => 0
)
[4] => Array
(
[day] => 1576627200
[uses] => 0
[accounts] => 0
)
[5] => Array
(
[day] => 1576540800
[uses] => 0
[accounts] => 0
)
[6] => Array
(
[day] => 1576454400
[uses] => 0
[accounts] => 0
)
)
)
[6] => Array
(
[name] => cuteposting
[url] => https://101010.pl/tags/cuteposting
[history] => Array
(
[0] => Array
(
[day] => 1576972800
[uses] => 19
[accounts] => 3
)
[1] => Array
(
[day] => 1576886400
[uses] => 26
[accounts] => 4
)
[2] => Array
(
[day] => 1576800000
[uses] => 11
[accounts] => 2
)
[3] => Array
(
[day] => 1576713600
[uses] => 27
[accounts] => 2
)
[4] => Array
(
[day] => 1576627200
[uses] => 22
[accounts] => 3
)
[5] => Array
(
[day] => 1576540800
[uses] => 13
[accounts] => 2
)
[6] => Array
(
[day] => 1576454400
[uses] => 6
[accounts] => 3
)
)
)
[7] => Array
(
[name] => nowplaying
[url] => https://101010.pl/tags/nowplaying
[history] => Array
(
[0] => Array
(
[day] => 1576972800
[uses] => 9
[accounts] => 4
)
[1] => Array
(
[day] => 1576886400
[uses] => 40
[accounts] => 11
)
[2] => Array
(
[day] => 1576800000
[uses] => 15
[accounts] => 6
)
[3] => Array
(
[day] => 1576713600
[uses] => 21
[accounts] => 4
)
[4] => Array
(
[day] => 1576627200
[uses] => 27
[accounts] => 7
)
[5] => Array
(
[day] => 1576540800
[uses] => 33
[accounts] => 6
)
[6] => Array
(
[day] => 1576454400
[uses] => 24
[accounts] => 4
)
)
)
[8] => Array
(
[name] => christmas
[url] => https://101010.pl/tags/christmas
[history] => Array
(
[0] => Array
(
[day] => 1576972800
[uses] => 6
[accounts] => 5
)
[1] => Array
(
[day] => 1576886400
[uses] => 4
[accounts] => 4
)
[2] => Array
(
[day] => 1576800000
[uses] => 10
[accounts] => 10
)
[3] => Array
(
[day] => 1576713600
[uses] => 9
[accounts] => 7
)
[4] => Array
(
[day] => 1576627200
[uses] => 10
[accounts] => 8
)
[5] => Array
(
[day] => 1576540800
[uses] => 7
[accounts] => 7
)
[6] => Array
(
[day] => 1576454400
[uses] => 11
[accounts] => 10
)
)
)
[9] => Array
(
[name] => greve17decembre
[url] => https://101010.pl/tags/greve17decembre
[history] => Array
(
[0] => Array
(
[day] => 1576972800
[uses] => 0
[accounts] => 0
)
[1] => Array
(
[day] => 1576886400
[uses] => 1
[accounts] => 1
)
[2] => Array
(
[day] => 1576800000
[uses] => 0
[accounts] => 0
)
[3] => Array
(
[day] => 1576713600
[uses] => 1
[accounts] => 1
)
[4] => Array
(
[day] => 1576627200
[uses] => 1
[accounts] => 1
)
[5] => Array
(
[day] => 1576540800
[uses] => 61
[accounts] => 16
)
[6] => Array
(
[day] => 1576454400
[uses] => 18
[accounts] => 12
)
)
)
)
)

View file

@ -1 +0,0 @@
url blacklist bida: https://git.lattuga.net/bida/suspended-silenced-instances/raw/master/README.md

View file

@ -1,36 +0,0 @@
anitwitter.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f
anitwitter.moe 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f
babymetal.party 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f
baraag.net 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f
bsd.moe 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f https://mastodon.bida.im/@Ca_Gi/101270762003908554
ediot.social 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f
freespeechextremist.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f
freespeech.firedragonstudios.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f https://mastodon.bida.im/@Ca_Gi/101344114624456297
freezepeach.xyz 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f
gorf.club 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f
gs.smuglo.li 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f
humblr.social 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 0 f f https://mastodon.bida.im/@cirku17/101399587014096355
ika.moe 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f
mastodon.starrevolution.org 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f https://a.nom.pl/notice/450131
mobile.co 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f https://mastodon.bida.im/@Ca_Gi/101355947506820592
neckbeard.xyz 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f Instance suspended: neckbeard.xyz - anime nazi shit, irony bro admin
newjack.city 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f
noagendasocial.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 0 f f
pawoo.net 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f
pl.smuglo.li 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f
porntoot.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 0 f f
preteengirls.biz 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f
quodverum.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f https://mastodon.bida.im/@Ca_Gi/101514801964087604
sealion.club 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 0 f f
shitposter.club 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 0 f f
shitposter.club 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f https://mastodon.bida.im/@Ca_Gi/101270762003908554
social.au2pb.net 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f
social.heldscal.la 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f
social.imirhil.fr 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f
social.quodverum.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f Razzisti
social.targaryen.house 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 0 f f
switter.at 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 0 f f https://mastodon.bida.im/@jops/101404791975700441
toot.love 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 0 f f
unsafe.space 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f
woofer.alfter.us 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f
wrongthink.net 2019-12-11 18:44:05.111111 2019-12-11 18:44:05.111111 1 f f

View file

@ -1,157 +0,0 @@
2.distsn.org 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Spam
2hu.club 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Pedo Loli
anitwitter.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
anitwitter.moe 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Bad Moderation
ap.torlipen.net 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
ap.uwu.st 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Harassment or abuse
babymetal.party 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
baraag.net 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Pedo Loli
beehub.org 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 0 f f
blob.cat 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
bodybuilding.im 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
bofa.lol 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
bsd.moe 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
carnal-gabhub.protohype.net 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
civiq.social 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
cofe.moe 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Bad Moderation
comm.network 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
community.halle-leaks.de 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
community.highlandarrow.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
counter.social 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Bad Moderation
cryzed.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 0 f f
cyzed.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
daffodil-11.org 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Malicious site
dev.civiq.social 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
develop.gab.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
dickshow.social 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Bad Moderation
djitter.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
ediot.social 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 0 f f Fascism Hatespeech
ediot.socialsilence 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
ekrem.develop.gab.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
exited.eu 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
explosion.party 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Conspiracy theories
fedichive.tk 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
feminism.lgbt 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Pedo Loli
freefedifollowers.ga 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
freehold.earth 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Bad Moderation
freespeechextremist.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
freespeech.firedragonstudios.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
freespeech.host 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
freevoice.space 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 0 f f Fascism Hatespeech
freezepeach.xyz 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
gab.ai 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
gabble.xyz 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
gab.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
gabfed.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
gab.io 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
gab.polaris-1.work 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
gab.protohype.net 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
gab.sleek.eu 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
gameliberty.club 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Pedo Loli
gasthe.lgbt 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
gnusocial.no 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
goldandblack.xyz 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
gorf.club 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 0 f f Fascism Hatespeech
gorf.club 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
gs.archae.me 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
gs.kawa-kun.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
gs.mon5t3r.info 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
gs.smuglo.li 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
hakui.club 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
homura.space 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Harassment or abuse
ika.moe 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Bad Moderation
impeccable.social 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
inditoot.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Bad Moderation
jabb.in 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Conspiracy theories
juche.town 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
karolat.press 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
kawaiistu.moe 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
kawen.space 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
kazvam.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
kipper.im 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Bad Moderation
kiwifarms.cc 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 0 f f
kneegrows.top 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
kowai.youkai.town 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Pedo Loli
kyot.me 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Bad Moderation
liberdon.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
libertarianism.club 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
libre.tube 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 0 f f
loli.estate 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Pedo Loli
lolis.world 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
manx.social 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Bad Moderation
mastodon.loliandstuff.moe 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Pedo Loli
mastodon.starrevolution.org 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
mast.wholemars.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 0 f f
melalandia.tk 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
me.nooruul.xyz 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
mobile.co 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
neckbeard.xyz 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
neenster.org 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
newjack.city 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Spam
niu.moe 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Bad Moderation
noagendasocial.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 0 f f Fascism Hatespeech
not-develop.gab.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
not.phrack.fyi 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Conspiracy theories
npf.mlpol.net 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
pawoo.net 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Pedo Loli
pleroma.cucked.me 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
pleroma.rareome.ga 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Malicious site
pleroma.soykaf.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
pleroma.wolfie.pw 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
pleroma.yorha.club 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Harassment or abuse
pleville.net 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 0 f f
pl.smuglo.li 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
porntoot.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Pedo Loli
preteen.biz 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Pedo Loli
preteengirls.bi 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Pedo Loli
preteengirls.biz 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Pedo Loli
pridelands.io 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
qoto.org 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
quey.org 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 0 f f
quitter.pw 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
quodverum.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
rainbowdash.net 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
rapefeminists.network 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
sealion.club 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
shitasstits.life 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Pedo Loli
shitposter.club 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
social.allthefallen.ninja 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Pedo Loli
social.au2pb.net 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
social.guizzyordi.info 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
social.heldscal.la 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
social.hidamari.blue 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 0 f f
social.homunyan.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 0 f f
social.i2p.rocks 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
social.imirhil.fr 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
social.louisoft01.moe 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Conspiracy theories
social.lucci.xyz 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Conspiracy theories
social.quodverum.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
social.raptorengineering.io 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Advertisement
social.sunshinegardens.org 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Harassment or abuse
social.super-niche.club 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Pedo Loli
social.targaryen.house 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 0 f f Fascism Hatespeech
social.wiuwiu.de 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Bad Moderation
socnet.supes.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Bad Moderation
spinster.dev 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
spinster.xyz 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
sunshinegardens.org 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Harassment or abuse
thechad.zone 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
the.hedgehoghunter.club 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 0 f f
toot.love 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Bad Moderation
unsafe.space 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 0 f f Fascism Hatespeech
vampire.estate 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Pedo Loli
video.halle-leaks.de 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f
vipgirlfriend.xxx 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Unflagged porn
voluntaryism.club 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
wagesofsinisdeath.com 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
waifu.social 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
warc.space 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 0 f f
weeaboo.space 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Pedo Loli
weedis.life 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 0 f f
wogan.im 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
woofer.alfter.us 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech
wrongthink.net 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 0 f f Fascism Hatespeech
wxw.moe 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Pedo Loli
yiff.rocks 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Harassment or abuse
youkai.town 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Pedo Loli
zerohack.xyz 2019-12-11 18:44:05.111111 2019-12-11 18:44:06.710862 1 f f Fascism Hatespeech

View file

@ -1,612 +0,0 @@
#!/usr/bin/php
<?php
/*
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
define('N',"\n");
$inifp=null;
$opts=array(
'excludeafter'=>60*60*24*30,
'startinstancesfp'=>null,
'loadbiglist'=>true,
'onlinecheck'=>true,
'timeout'=>5,
'biglistfp'=>null,
'prodlistfp'=>null,
'blacklists'=>array(),
'whitelists'=>array()
);
$help='DESCRIZIONE
Questo script parte da una selezione di istanze Mastodon («istanze
di partenza»), ciascuna con una relativa lista di istanze da essa
bloccate (che può anche essere omessa), e genera/aggiorna due liste:
una che conterrà i dati di tutte le istanze di partenza e delle istanze
ad esse note (comprese quelle bloccate, escluse soltanto quelle che non
rispondono da un lasso di tempo impostabile), e una che conterrà solo
le istanze non bloccate, la cui piattaforma è mastodon, che danno
possibilità di iscrizione di nuovi utenti, il cui numero di utenti
è compreso tra 11 e 30000, che conoscono almeno altre 500 istanze,
che hanno avuto almeno 10 utenti attivi nell\'ultimo mese o, se questo
dato non è disponibile, la cui media di toot per utente è maggiore
o uguale a 10.
SINTASSI
crawler.php -i <file> | -s <file> -b <file> -p <file> [altre opzioni]
OPZIONI
-i, --inifp <file>
Imposta un file di configurazione da cui leggere le opzioni.
Il formato di questo file è semplice: una opzione per riga in formato
<opzione>=<valore>, dove «opzione» è una qualsiasi tra le opzioni
descritte qui nel suo formato lungo, tranne «inifp» e «help».
Esempio: «startinstancesfp=startinstances.txt».
Il file di configurazione può non contenere tutte le opzioni
disponibili.
Nota bene: tutte le opzioni impostate da riga di comando, che siano
specificate prima o dopo questa, hanno la precedenza su quelle
definite nel file di configurazione.
-s, --startinstancesfp <file>
DEVE essere specificata.
Imposta il file da cui leggere le istanze di partenza e le relative
liste di istanze sospese-silenziate.
Il formato del file è questo: per ogni riga:
<uri della istanza di partenza>|[uri della relativa lista di istanze
bloccate]
Ogni riga vuota o che cominci con il carattere «#» sarà ignorata.
Il formato del file delle istanze bloccate è questo: per ogni riga:
|<uri della istanza bloccata>|<data del blocco>|<tipo di blocco>|
<motivazione pubblica del blocco>
Esempio di <data del blocco>: «2019-12-11 18:44:06.710862»
<tipo di blocco>: «0» per silenziata, «1» per sospesa, «2» per "solo
file media e rapporti".
-b, --biglistfp <file>
DEVE essere specificata.
Imposta il file da cui leggere le istanze già testate in passato
(se il file esiste e non è specificata lopzione «-, vedi sotto)
e in cui scrivere tutti i dati recuperabili delle istanze testate.
-p, --prodlistfp <file>
DEVE essere specificata.
Imposta il file da cui leggere (se esiste) e in cui scrivere i dati
relativi alle istanze corrispondenti ai criteri di selezione descritti
nel paragrafo «DESCRIZIONE».
-B, --blacklistfp
Imposta un eventuale file di istanze bloccate aggiuntivo. Per il
formato di questi file vedi sopra il paragrafo relativo nella
descrizione dell\'opzione «-s, --startinstancesfp». Questa opzione può
essere utilizzata più volte per specificare più file di istanze
bloccate. Nel file di configurazione ha un formato particolare:
«blacklistfp=file1[,file2[,file3[...]]]».
-w, --whitelistfp
Imposta un eventuale file di istanze da non scartare mai, nemmeno
se fanno parte di una delle blacklist utilizzato o non corrispondono
ai criteri di filtraggio. Il formato di questi file è semplice:
un dominio per riga (le righe vuote o che cominciano con il carattere
«#» vengono ignorate. Questa opzione può essere utilizzata più volte
per specificare più whitelist. Nel file di configurazione
ha un formato particolare: «whitelistfp=file1[,file2[,file3[...]]]».
-t, --timeout <secondi>
Imposta il timeout delle richieste http(s) in secondi.
DEFAULT: '.$opts['timeout'].' secondi.
-e, --excludeafter <tempo>
Imposta il lasso di tempo dopo il quale unistanza che non risponde
viene eliminata dal listone di tutte le istanze testate.
«tempo» deve essere specificato come un numero, seguito eventualmente
da un carattere che ne indica lunità di misura: «s» o nessun
carattere per secondi, «m» per minuti, «o» per ore, «g» per giorni,
«S» per settimane, «M» per mesi (30 giorni), «A» per anni.
DEFAULT: 1 mese.
-l, --loadbiglist <si|no>
Dice al programma se caricare o meno il listone delle istanze già
testate in passato.
DEFAULT: «si».
-c, --onlinecheck <si|no>
Dice al programma se interrogare o meno le istanze note.
Se impostato a «no» forza a «si» «loadbiglist» (vedi opzione
precedente).
DEFAULT: «si».
-h, --help
Mostra questo aiuto ed esce.
This program comes with ABSOLUTELY NO WARRANTY; for details see
the source.
This is free software, and you are welcome to redistribute it under
certain conditions; see <http://www.gnu.org/licenses/> for details.'.N;
function mexit($msg,$code) {
echo($msg);
exit($code);
}
function tosec($str) {
if (preg_match('/^([0-9]+)([smogSMA]?)/',$str,$buf)===1) {
switch ($buf[2]) {
case '':
case 's':
return($buf[1]);
break;
case 'm':
return($buf[1]*60);
break;
case 'o':
return($buf[1]*60*60);
break;
case 'g':
return($buf[1]*60*60*24);
break;
case 'S':
return($buf[1]*60*60*24*7);
break;
case 'M':
return($buf[1]*60*60*24*30);
break;
case 'A':
return($buf[1]*60*60*24*365);
break;
}
} else {
return(false);
}
}
for ($i=1; $i<$argc; $i++) {
if ($argv[$i]=='-i' || $argv[$i]=='--inifp') {
if ($i+1>=$argc || $argv[$i+1]=='')
mexit('Lopzione «'.$argv[$i].'» richiede di specificare un file di configurazione (usa «-h» per vedere la guida).'.N,1);
$i++;
$inifp=$argv[$i];
}
}
if (!is_null($inifp)) {
$buf=@parse_ini_file($inifp);
if ($buf!==false) {
foreach ($buf as $key=>$val) {
if (array_key_exists($key,$opts)) {
if ($key=='excludeafter') {
$opts['excludeafter']=tosec($val);
if ($opts['excludeafter']===false)
mexit('Lopzione «excludeafter» specificata in «'.$inifp.'» non è in un formato corretto (usa «-h» per vedere la guida).'.N,1);
} elseif ($key=='blacklists') {
$opts['blacklists']=explode(',',$val);
} elseif ($key=='whitelists') {
$opts['whitelists']=explode(',',$val);
} else {
$opts[$key]=$val;
}
} else {
echo('Attenzione: lopzione «'.$key.'» in «'.$inifp.'» è sconosciuta e sarà ignorata.'.N);
}
}
} else {
mexit('Attenzione: non ho potuto leggere la configurazione dal file «'.$inifp.'».'.N,1);
}
}
for ($i=1; $i<$argc; $i++) {
if (substr($argv[$i],0,1)=='-') {
switch($argv[$i]) {
case '-i':
case '--inifp':
$i++;
break;
case '-e':
case '--excludeafter':
if ($i+1>=$argc)
$i++;
$opts['excludeafter']=tosec($argv[$i]);
if ($opts['excludeafter']===false)
mexit('Opzione «'.$argv[$i].'»: formato non corretto (usa «-h» per vedere la guida).'.N,1);
break;
case '-t':
case '--timeout':
if ($i+1>=$argc || preg_match('/^[0-9]+$/',$argv[$i+1])!==1)
mexit('Lopzione «'.$argv[$i].'» richiede un parametro numerico intero (usa «-h» per vedere la guida).'.N,1);
$i++;
$opts['timeout']=$argv[$i];
break;
case '-b':
case '--biglistfp':
if ($i+1>=$argc || $argv[$i+1]=='')
mexit('Lopzione «'.$argv[$i].'» richiede un parametro di tipo file (usa «-h» per vedere la guida).'.N,1);
$i++;
$opts['biglistfp']=$argv[$i];
break;
case '-p':
case '--prodlistfp':
if ($i+1>=$argc || $argv[$i+1]=='')
mexit('Lopzione «'.$argv[$i].'» richiede un parametro di tipo file (usa «-h» per vedere la guida).'.N,1);
$i++;
$opts['prodlistfp']=$argv[$i];
break;
case '-s':
case '--startinstancesfp':
if ($i+1>=$argc || $argv[$i+1]=='')
mexit('Lopzione «'.$argv[$i].'» richiede un parametro di tipo file (usa «-h» per vedere la guida).'.N,1);
$i++;
$opts['startinstancesfp']=$argv[$i];
break;
case '-B':
case '--blacklistfp':
if ($i+1>=$argc || $argv[$i+1]=='')
mexit('Lopzione «'.$argv[$i].'» richiede un parametro di tipo file (usa «-h» per vedere la guida).'.N,1);
$i++;
$opts['blacklists'][]=$argv[$i];
break;
case '-w':
case '--whitelistfp':
if ($i+1>=$argc || $argv[$i+1]=='')
mexit('Lopzione «'.$argv[$i].'» richiede un parametro di tipo file (usa «-h» per vedere la guida).'.N,1);
$i++;
$opts['whitelists'][]=$argv[$i];
break;
case '-l':
case '--loadbiglist':
if ($i+1>=$argc || ($argv[$i+1]!='si' && $argv[$i+1]!='no'))
mexit('Lopzione «'.$argv[$i].'» richiede un parametro («si/no») (usa «-h» per vedere la guida).'.N,1);
$i++;
$opts['loadbiglist']=true;
if ($argv[$i]=='no') $opts['loadbiglist']=false;
break;
case '-c':
case '--onlinecheck':
if ($i+1>=$argc || ($argv[$i+1]!='si' && $argv[$i+1]!='no'))
mexit('Lopzione «'.$argv[$i].'» richiede un parametro («si/no») (usa «-h» per vedere la guida).'.N,1);
$i++;
$opts['onlinecheck']=true;
if ($argv[$i]=='no') $opts['onlinecheck']=false;
break;
case '-h':
case '--help':
mexit($help,1);
break;
default:
mexit('Opzione «'.$argv[$i].'» sconosciuta (usa «-h» per vedere la guida).'.N,1);
break;
}
} else {
mexit('Opzione «'.$argv[$i].'» sconosciuta (usa «-h» per vedere la guida).'.N,1);
}
}
$buf=null;
if (is_null($opts['startinstancesfp']))
$buf.='- Non hai specificato il file delle istanze di partenza («-s/--startinstancesfp»)'.N;
if (is_null($opts['biglistfp']))
$buf.='- Non hai specificato il file da cui leggere e in cui salvare i dati di tutte le istanze testate («-b/--biglistfp»)'.N;
if (is_null($opts['prodlistfp']))
$buf.='- Non hai specificato il file da cui leggere e in cui salvare i dati di tutte le istanze testate che corrispondono ai criteri di selezione («-p/--prodlistfp»)'.N;
if (!is_null($buf))
mexit('ERRORI'.N.$buf.'Usa «-h/--help» per leggere la guida.'.N,1);
if (!$opts['onlinecheck'])
$opts['loadbiglist']=true;
$biglist=array();
$ibiglistc=0;
if ($opts['loadbiglist']) {
if (file_exists($opts['biglistfp']) && is_file($opts['biglistfp']) && is_readable($opts['biglistfp'])) {
echo('Carico la listona pre-esistente («'.$opts['biglistfp'].'») ... ');
$buf=@file_get_contents($opts['biglistfp']);
if ($buf!==false) {
echo('OK :-)'.N);
$biglist=json_decode($buf,true);
$ibiglistc=count($biglist);
} else {
echo('ERRORE :-('.N);
}
}
}
$blinstances=array();
$wlinstances=array();
function loadblacklist($bluri) {
global $blinstances, $opts;
$context=stream_context_create(array('http'=>array('timeout'=>$opts['timeout'])));
$f=@fopen($bluri,'r',false,$context);
if ($f!==false) {
$i=0;
while (!feof($f)) {
$lin=fgets($f);
//bsd.moe|2019-12-11 18:44:06.710862|1|https://mastodon.bida.im/@Ca_Gi/101270762003908554
if (preg_match('/^([^#\|]{1}[^\|]+)\|{1}([^\|]+)\|{1}([012]{1})\|{1}(.*)$/',$lin,$buf)===1 && !in_array($buf[1],$blinstances) ) {
$i++;
$blinstances[]=$buf[1];
}
}
fclose($f);
echo('OK :-) (+'.$i.' istanze bloccate caricate; totale: '.count($blinstances).')'.N);
} else {
mexit('ERRORE :-('.N,1);
}
}
$startinstances=array();
echo('Carico il file delle istanze di partenza («'.$opts['startinstancesfp'].'») ... ');
$buf=@file_get_contents($opts['startinstancesfp']);
if ($buf!==false) {
echo('OK :-)'.N);
$buf=explode(N,$buf);
foreach ($buf as $val) {
if ($val!='' && $val[0]!='#') {
$kv=explode('|',$val);
if ($kv[1]=='') $kv[1]=null;
$startinstances[$kv[0]]=$kv[1];
}
}
} else {
mexit(N.'Non ho potuto caricare il file delle istanze di partenza «'.$opts['startinstancesfp'].'», muoio.'.N,1);
}
if (count($startinstances)<1)
mexit('Il file delle istanze di partenza «'.$opts['startinstancesfp'].'» non contiene alcuna voce, muoio.'.N,1);
foreach ($startinstances as $dom=>$bluri) {
if (!is_null($bluri)) {
echo('Recupero la lista delle istanze bloccate da «'.$dom.'» («'.$bluri.'») ... ');
loadblacklist($bluri);
} else {
echo('NON recupero la lista delle istanze bloccate da «'.$dom.'»: la uri della stessa non è definita.'.N);
}
}
foreach ($opts['blacklists'] as $bluri) {
echo('Carico la lista delle istanze bloccate dall\'URI «'.$bluri.'» ... ');
loadblacklist($bluri);
}
sort($blinstances);
echo(count($blinstances).' istanze bloccate.'.N);
foreach ($opts['whitelists'] as $wluri) {
echo('Carico la whitelist delle istanze dall\'URI «'.$wluri.'» ... ');
$buf=@file_get_contents($wluri);
if ($buf!==false) {
echo('OK :-)'.N);
$buf=explode(N,$buf);
foreach ($buf as $val) {
if ($val!='' && $val[0]!='#' && !in_array($val,$wlinstances))
$wlinstances[]=$val;
}
} else {
mexit(N.'Non ho potuto caricare la whitelist delle istanze «'.$wluri.'», muoio.'.N,1);
}
}
sort($wlinstances);
echo(count($wlinstances).' istanze whitelistate.'.N);
if ($opts['onlinecheck']) {
$context=stream_context_create(array('http'=>array('timeout'=>$opts['timeout'])));
foreach ($startinstances as $dom=>$bluri) {
if (!array_key_exists($dom,$biglist))
$biglist[$dom]=null;
echo('Recupero la lista delle istanze note a «'.$dom.'» ... ');
$buf=@file_get_contents('https://'.$dom.'/api/v1/instance/peers',false,$context);
if ($buf!==false) {
echo('OK :-)'.N);
$peers=json_decode($buf,true);
foreach ($peers as $pdom) {
if (!array_key_exists($pdom,$biglist)) {
$biglist[$pdom]=null;
}
}
} else {
echo('ERRORE :-('.N);
}
}
ksort($biglist);
$diff=count($biglist)-$ibiglistc;
if ($diff>=0) $diff='+'.$diff;
echo('Totale istanze note: '.count($biglist).' ('.$diff.' rispetto all\'ultima volta).'.N);
}
$prodlist=array();
$iprodlistc=0;
$buf=@file_get_contents($opts['prodlistfp']);
if ($buf!==false) {
$prodlist=json_decode($buf,true);
$iprodlistc=count($prodlist);
}
$newbiglist=array();
$i=0;
$biglistc=count($biglist);
foreach ($biglist as $dom=>$oinfo) {
$i++;
echo('~~~~~~'.N);
if ($opts['onlinecheck']) {
echo('Recupero le informazioni su «'.$dom.'» ('.$i.'/'.$biglistc.' - '.round(100/$biglistc*$i).'%)'.N);
echo('Recupero le informazioni Nodeinfo ... ');
$ninfo=null;
$buf=@file_get_contents('https://'.$dom.'/nodeinfo/2.0',false,$context);
if ($buf!==false) {
echo('OK :-)'.N);
$ninfo=json_decode($buf,true);
} else {
echo('ERRORE :-('.N);
}
echo('Recupero le informazioni API sull\'attività dell\'istanza ... ');
$activity=null;
$buf=@file_get_contents('https://'.$dom.'/api/v1/instance/activity',false,$context);
if ($buf!==false) {
echo('OK :-)'.N);
$activity=json_decode($buf,true);
} else {
echo('ERRORE :-('.N);
}
echo('Recupero le informazioni API sui trending hashtags dell\'istanza ... ');
$trending=null;
$buf=@file_get_contents('https://'.$dom.'/api/v1/trends',false,$context);
if ($buf!==false) {
echo('OK :-)'.N);
$trending=json_decode($buf,true);
} else {
echo('ERRORE :-('.N);
}
echo('Recupero le informazioni API sull\'istanza ... ');
$info=null;
$buf=@file_get_contents('https://'.$dom.'/api/v1/instance',false,$context);
if ($buf!==false) {
echo('OK :-)'.N);
$info=json_decode($buf,true);
if (!is_null($oinfo) && array_key_exists('X-Checks',$oinfo))
$info['X-Checks']=$oinfo['X-Checks'];
$info['X-Checks'][]=array('time'=>time(),'ok'=>true);
if (!is_null($ninfo)) {
if (array_key_exists('usage',$ninfo) && array_key_exists('users',$ninfo['usage'])) {
if (array_key_exists('activeMonth',$ninfo['usage']['users']))
$info['X-ActiveUsersPerMonth']=$ninfo['usage']['users']['activeMonth'];
if (array_key_exists('activeHalfyear',$ninfo['usage']['users']))
$info['X-ActiveUsersPerHalfYear']=$ninfo['usage']['users']['activeHalfyear'];
}
if (array_key_exists('software',$ninfo)) {
if (array_key_exists('name',$ninfo['software']))
$info['X-Software']=$ninfo['software']['name'];
if (array_key_exists('version',$ninfo['software']))
$info['X-Version']=$ninfo['software']['version'];
}
}
if (!is_null($activity))
$info['X-Activity']=$activity;
$newbiglist[$dom]=$info;
} else {
echo('ERRORE :-( ... ');
$lastokk=null;
if (!is_null($oinfo) && array_key_exists('X-Checks',$oinfo)) {
foreach ($oinfo['X-Checks'] as $key=>$val)
if ($val['ok']) $lastokk=$key;
}
if (is_null($oinfo) || is_null($lastokk) || time()-$oinfo['X-Checks'][$lastokk]['time']<=$opts['excludeafter']) {
echo('ma riproveremo...'.N);
$oinfo['X-Checks'][]=array('time'=>time(),'ok'=>false);
$newbiglist[$dom]=$oinfo;
} else {
echo('e non riproveremo...'.N);
$oinfo=null;
}
$info=$oinfo;
}
} else {
$info=$oinfo;
}
$whynot=array();
if (array_key_exists('uri',$info)) {
if (!in_array($dom,$wlinstances)) {
if (in_array($dom,$blinstances))
$whynot[]='Istanza blacklistata';
if (array_key_exists('X-Software',$info) && !in_array($info['X-Software'],array('mastodon','corgidon')))
$whynot[]='Il software non è Mastodon (ma '.$info['X-Software'].')';
if (!array_key_exists('registrations',$info))
$whynot[]='Stato delle registrazioni non disponibile';
elseif ($info['registrations']==false)
$whynot[]='Registrazioni chiuse';
if (!array_key_exists('stats',$info)) {
$whynot[]='Stats non disponibili';
} else {
if (!array_key_exists('user_count',$info['stats']))
$whynot[]='Numero utenti non disponibile';
elseif ($info['stats']['user_count']<10 || $info['stats']['user_count']>30000)
$whynot[]='Numero utenti ('.$info['stats']['user_count'].') non compreso tra 10 e 30000';
if (!array_key_exists('domain_count',$info['stats']))
$whynot[]='Numero istanze conosciute non disponibile';
elseif ($info['stats']['domain_count']<500)
$whynot[]='Numero istanze conosciute minore di 500';
/* if (!array_key_exists('status_count',$info['stats']))
$whynot[]='Numero di toots non disponibile';
elseif ($info['stats']['status_count']/$info['stats']['user_count']<10)
$whynot[]='Media dei toots per utente minore di 10';*/
}
if (array_key_exists('X-ActiveUsersPerMonth',$info)) {
if ($info['X-ActiveUsersPerMonth']<10)
$whynot[]='Numero utenti attivi nell\'ultimo mese minore di 10';
} elseif (array_key_exists('stats',$info) && array_key_exists('status_count',$info['stats']) && array_key_exists('user_count',$info['stats']) && $info['stats']['user_count']>0 && $info['stats']['status_count']/$info['stats']['user_count']<10) {
$whynot[]='Media dei toots per utente minore di 10';
}
if (!array_key_exists('contact_account',$info) || is_null($info['contact_account'])) {
$whynot[]='Informazioni sull\'account admin principale non disponibili';
}/* else {
if (!array_key_exists('created_at',$info['contact_account']))
$whynot[]='Data di creazione dell\'account admin principale non disponibile';
elseif (time()-strtotime($info['contact_account']['created_at'])<6*31*24*60*60)
$whynot[]='L\'account admin principale risulta esser stato creato meno di 6 mesi fa';
}*/
} else {
echo('«'.$dom.'» è whitelistata, la teniamo a prescindere.'.N);
}
} elseif (!array_key_exists($dom,$prodlist)) {
$whynot[]='Info non disponibili, e l\'istanza non era già presente nella lista delle istanze occhei';
}
if (count($whynot)==0) {
if (array_key_exists($dom,$prodlist)) {
if (array_key_exists('short_description',$info) && (!array_key_exists('short_description',$prodlist[$dom]) || $prodlist[$dom]['short_description']!=$info['short_description'])) {
$info['X-ShortDescriptionChanged']=true;
$info['X-PrevShortDescription']=$prodlist[$dom]['short_description'];
} else {
$info['X-ShortDescriptionChanged']=false;
}
if (array_key_exists('description',$info) && (!array_key_exists('description',$prodlist[$dom]) || $prodlist[$dom]['description']!=$info['description'])) {
$info['X-DescriptionChanged']=true;
$info['X-PrevDescription']=$prodlist[$dom]['description'];
} else {
$info['X-DescriptionChanged']=false;
}
echo('«'.$dom.'» era nella lista delle istanze occhei ed è stata AGGIORNATA! :-)'.N);
if (array_key_exists('X-Show',$prodlist[$dom]))
$info['X-Show']=$prodlist[$dom]['X-Show'];
else
$info['X-Show']=-1;
} else {
$info['X-ShortDescriptionChanged']=false;
$info['X-DescriptionChanged']=false;
$info['X-Show']=-1;
echo('«'.$dom.'» non era nella lista delle istanze occhei ed è stata AGGIUNTA! :-)'.N);
}
$prodlist[$dom]=$info;
} else {
if (array_key_exists($dom,$prodlist)) {
unset($prodlist[$dom]);
echo('«'.$dom.'» era nella lista delle istanze occhei ma è stata SCARTATA! :-('.N);
} else {
echo('«'.$dom.'» non era nella lista delle istanze occhei e NON CI È ENTRATA! :-('.N);
}
echo('Motivazioni: '.implode('; ',$whynot).'.'.N);
}
}
echo('~~~~~~'.N);
if ($opts['onlinecheck']) {
$json=json_encode($newbiglist,JSON_PRETTY_PRINT);
file_put_contents($opts['biglistfp'],$json);
$newbiglistc=count($newbiglist);
$diff=$newbiglistc-$ibiglistc;
if ($diff>=0) $diff='+'.$diff;
echo('Totale istanze nella listona: '.$newbiglistc.' ('.$diff.' rispetto all\'ultima volta)'.N);
} else {
echo('Totale istanze nella listona: '.count($biglist).N);
}
$json=json_encode($prodlist,JSON_PRETTY_PRINT);
file_put_contents($opts['prodlistfp'],$json);
$diff=count($prodlist)-$iprodlistc;
if ($diff>=0) $diff='+'.$diff;
echo('Totale istanze nella listina di quelle occhei: '.count($prodlist).' ('.$diff.' rispetto all\'ultima volta)'.N);
?>

View file

@ -1,9 +0,0 @@
excludeafter=1M
startinstancesfp=istanzesorelle_pant
loadbiglist=true
onlinecheck=true
timeout=5
biglistfp=biglist_pant.json
prodlistfp=prodlist_pant.json
blacklists=blacklist_cagi.txt
whitelists=whitelist_sorellanza.txt

View file

@ -1,9 +0,0 @@
excludeafter=1M
startinstancesfp=istanzesorelle
loadbiglist=true
onlinecheck=true
timeout=3
biglistfp=biglist.json
prodlistfp=prodlist.json
blacklists=blacklist_cagi.txt
whitelists=whitelist_sorellanza.txt

View file

@ -1,5 +0,0 @@
/\.gab\.best$/
/\.4chan\.icu$/
/\.ngrok\.io$/
/^reloaded\.openmtx\.com$/
/\.onion$/

View file

@ -1,197 +0,0 @@
#!/usr/bin/php
<?php
/*
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
setlocale(LC_ALL,'it_IT.UTF-8');
define('N',"\n");
$contextopts=array(
'http'=>array(
'timeout'=>3
),
'socket'=>array(
'tcp_nodelay'=>true
)
);
$context=stream_context_create($contextopts);
$startinst='mastodon.social';
$exfp='crawlerone.exclude';
$allfp='listaglobale.txt';
$okfp='listamastodon.txt';
$softfp='listasoft.txt';
$allf=@fopen($allfp,'w');
$okf=@fopen($okfp,'w');
$softf=@fopen($softfp,'w');
//$insts=array(array('dom'=>$startinst,'ckd'=>false));
$insts=array();
$okinsts=array();
$softwares=array();
function isempty($val) {
if (preg_match('/^\s*$/',$val)===1)
return(true);
else
return(false);
}
function cdate() {
return(strftime('%a %d %b %Y, %T'));
}
function waituntilonline() {
global $context;
$url='http://www.google.com';
while (@file_get_contents($url,false,$context)===false) {
echo(cdate().' - Pare che siamo offline...'.N);
sleep(5);
}
echo(cdate().' - Pare che siamo online! :-)'.N);
}
function updexarr() {
global $exarr, $exfp;
$exarr=file($exfp,FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
}
function ckexarr($inst) {
global $exarr;
foreach ($exarr as $re)
if (preg_match($re,$inst)===1) return(true);
return(false);
}
function crawl($inst) {
global $insts, $okinsts, $softwares, $allf, $okf, $softf, $context;
waituntilonline();
updexarr();
foreach ($softwares as $key=>$val)
echo('Software «'.$key.'»: '.$val.' istanze.'.N);
if (!isempty($inst)) {
// i check sulla presenza di $inst tra le già scovate e crawlate e quello sull'eventuale esclusione vengon fatti subito prima della chiamata ricorsiva di crawl, sotto
echo('«'.$inst.'» non è presente nella lista delle istanze scovate, la aggiungo.'.N);
$insts[]=$inst;
fwrite($allf,$inst.N);
echo('«'.$inst.'»: provo a recuperare info da Nodeinfo ... ');
$buf=@file_get_contents('https://'.$inst.'/nodeinfo/2.0',false,$context);
if ($buf!=false) {
echo('OK :-)'.N);
echo('«'.$inst.'»: Nodeinfo: controllo che il software sia mastodon ... ');
$buf=json_decode($buf,true);
if (is_array($buf) && array_key_exists('software',$buf) && array_key_exists('name',$buf['software'])) {
if (preg_match('/mastodon/i',$buf['software']['name'])===1) {
echo('SI! :-)'.N);
echo('«'.$inst.'»: il software è mastodon, aggiungo listanza alla lista delle istanze OK! :-)'.N);
$okinsts[]=$inst;
fwrite($okf,$inst.N);
} else {
echo('NO :-('.N);
echo('«'.$inst.'»: il software non è mastodon, NON aggiungo listanza alla lista delle istanze ok :-('.N);
}
$software=strtolower($buf['software']['name']);
if (!isempty($software)) {
if (!array_key_exists($software,$softwares)) {
echo('Ho trovato un software che non mi è ancora noto: «'.$software.'»!'.N);
$softwares[$software]=1;
fwrite($softf,$software.N);
} else {
$softwares[$software]++;
}
}
} else {
echo('ERRORE! :-('.N);
}
} else {
echo('ERRORE :-('.N);
echo('«'.$inst.'»: Nodeinfo non ha risposto, NON aggiungo listanza alla lista delle istanze ok :-('.N);
}
echo('«'.$inst.'»: provo a recuperare la lista delle istanze conosciute allistanza ... ');
$peers=@file_get_contents('https://'.$inst.'/api/v1/instance/peers',false,$context);
if ($peers!=false) {
echo('OK :-)'.N);
$peers=json_decode($peers,true);
if (is_array($peers)) {
foreach ($peers as $peer) {
if (@is_string($peer)) {
if (!ckexarr($peer)) {
if (!in_array($peer,$insts)) {
echo('>>> Crawlo «'.$peer.'».'.N);
crawl($peer);
} else {
echo('>>> NON crawlo «'.$peer.'» perché lho già fatto.'.N);
}
} else {
echo('>>> NON crawlo «'.$peer.'» perché il suo nome corrisponde a unesclusione.'.N);
}
} else {
echo('>>> NON crawlo «'.$peer.'» perché il suo nome non è una stringa.'.N);
}
}
}
} else {
echo('ERRORE :-('.N);
}
} else {
echo('NON aggiungo istanze senza nome.'.N);
}
echo('~~~~~~~ Stats: '.count($insts).' istanze note, '.count($okinsts).' istanze mastodon vive, '.count($softwares).' software trovati. ~~~~~~~'.N);
}
crawl($startinst);
echo('FINE CRAWLING! :-)'.N);
@fclose($allf);
@fclose($okf);
@fclose($softf);
echo('Salvo i risultati (tutte le istanze, istanze ok, softwares) ordinati nei rispettivi file.'.N);
sort($insts);
sort($okinsts);
arsort($softwares,SORT_NUMERIC);
$f=@fopen($allfp,'w');
if ($f!==false) {
foreach ($insts as $inst)
fwrite($f,$inst.N);
fclose($f);
} else {
echo('Non ho potuto aprire in scrittura il file «'.$allfp.'».'.N);
}
$f=@fopen($okfp,'w');
if ($f!==false) {
foreach ($okinsts as $inst)
fwrite($f,$inst.N);
fclose($f);
} else {
echo('Non ho potuto aprire in scrittura il file «'.$okfp.'».'.N);
}
$f=@fopen($softfp,'w');
if ($f!==false) {
foreach ($softwares as $software=>$num)
fwrite($f,$software.' '.$num.N);
fclose($f);
} else {
echo('Non ho potuto aprire in scrittura il file «'.$softfp.'».'.N);
}
exit(0);
?>

View file

@ -1,5 +0,0 @@
mastodon.bida.im|blacklist_bida.txt
mastodon.cisti.org|
nebbia.fail|
stereodon.social|
snapj.saja.freemyip.com|

View file

@ -1 +0,0 @@
pantagruel.dnsup.net|blacklist_bida.txt

View file

@ -1,281 +0,0 @@
#!/usr/bin/php
<?php
/*
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
require(__DIR__.'/../include/getfc.php');
setlocale(LC_ALL,getenv('LANG'));
define('N',"\n");
$opts=array(
'inifp'=>'../sec/mustard.ini',
'startinst'=>'mastodon.social',
'peersfp'=>__DIR__.'/peers',
'restore'=>false,
'excludefp'=>null,
'timeout'=>5
);
$help='peerscrawl.php
DESCRIPTION
This program tries to build a fairly complete list of mastodon instances.
SYNOPSIS
peerscrawl.php [options]
OPTIONS
-s, --startinst <domain>
Defines the first instance to crawl.
DEFAULT: «'.$opts['startinst'].'»
-p, --peersfp <file>
Defines the file into which the ordered list of instances will be saved.
DEFAULT: «'.$opts['peersfp'].'»
-r, --restore
If peers file already exists on programs start it will be loaded into
memory and each instance it contains will be considered “already
crawled”, thus allowing to “restore an interrupted crawling session”.
-e, --excludefp <file>
Defines a file containing exclusion rules: one regular expression per
line (empty lines are ignored). Any instance matching any defined regex
will be ignored by the program. Changes made to this file during program
execution will be taken into account.
-t, --timeout <seconds>
Defines the timeout in seconds for every connection attempt.
DEFAULT: «'.$opts['timeout'].'»
This program comes with ABSOLUTELY NO WARRANTY; for details see the source.
This is free software, and you are welcome to redistribute it under
certain conditions; see <http://www.gnu.org/licenses/> for details.'.N;
for ($i=1; $i<$argc; $i++) {
if (substr($argv[$i],0,1)=='-') {
switch($argv[$i]) {
case '-s':
case '--startinst':
if ($i+1>=$argc)
mexit('Option «'.$argv[$i].'» has to be followed by a domain name (use «-h» for more info).'.N,1);
$i++;
$opts['startinst']=$argv[$i];
break;
case '-p':
case '--peersfp':
if ($i+1>=$argc)
mexit('Option «'.$argv[$i].'» has to be followed by a files path (use «-h» for more info).'.N,1);
$i++;
$opts['peersfp']=$argv[$i];
break;
case '-r':
case '--restore':
$opts['restore']=true;
$i++;
break;
case '-e':
case '--excludefp':
if ($i+1>=$argc || !file_exists($argv[$i+1]) || !is_file($argv[$i+1]) || !is_readable($argv[$i+1]))
mexit('Option «'.$argv[$i].'» has to be followed by an existing, readable files path (use «-h» for more info).'.N,1);
$i++;
$opts['excludefp']=$argv[$i];
break;
case '-t':
case '--timeout':
if ($i+1>=$argc || preg_match('/^[0-9]+$/',$argv[$i+1])!==1)
mexit('Option «'.$argv[$i].'» has to be followed by a number of seconds (use «-h» for more info).'.N,1);
$i++;
$opts['timeout']=$argv[$i]+0;
break;
case '-h':
case '--help':
mexit($help,0);
break;
default:
mexit('Option «'.$argv[$i].'» is unknown (use «-h» for more info).'.N,1);
break;
}
}
}
function mexit($msg,$code) {
global $link;
if ($link) mysqli_close($link);
echo($msg);
exit($code);
}
function shutdown($dosort) {
global $opts, $peersf, $insts;
if ($peersf) @fclose($peersf);
if ($dosort) {
echo('Saving ordered instances list into «'.$opts['peersfp'].'».'.N);
sort($insts);
$peersf=@fopen($opts['peersfp'],'w');
if ($peersf!==false) {
foreach ($insts as $inst)
fwrite($peersf,$inst.N);
fclose($peersf);
} else {
echo('Couldnt open «'.$opts['peersfp'].'» for writing.'.N);
}
}
}
declare(ticks=1);
function signalHandler($signal) {
echo(N.'I got interrupted (signal: '.$signal.').'.N);
shutdown(false);
exit(2);
}
pcntl_signal(SIGTERM,'signalHandler');// Termination ('kill' was called)
pcntl_signal(SIGHUP,'signalHandler');// Terminal log-out
pcntl_signal(SIGINT,'signalHandler');// Interrupted (Ctrl-C is pressed)
$iniarr=@parse_ini_file($opts['inifp'])
or mexit('ERROR: I couldnt open «'.$opts['inifp'].'».'.N);
$link=@mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket'])
or mexit('ERROR: I couldnt connect to MySQL server: '.mysqli_connect_error().N,2);
mysqli_set_charset($link,'utf8mb4')
or mexit('ERROR trying to set MySQL client charset: '.__LINE__.': '.mysqli_error($link).N,2);
$res=mysqli_query($link,'SELECT URI FROM Instances WHERE Dead=1')
or mexit('ERROR: '.__LINE__.': '.mysqli_error($link).N,2);
mysqli_close($link);
$deadinsts=array();
while ($row=mysqli_fetch_assoc($res))
$deadinsts[]=$row['URI'];
unset($res);
$contextopts=array(
'http'=>array(
'timeout'=>$opts['timeout']
),
'socket'=>array(
'tcp_nodelay'=>true
)
);
$context=stream_context_create($contextopts);
$insts=array();
$exarr=array();
if ($opts['restore']) {
if (file_exists($opts['peersfp']) && is_file($opts['peersfp']) && is_readable($opts['peersfp'])) {
echo('Loading «'.$opts['peersfp'].'».'.N);
$insts=file($opts['peersfp'],FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
} else {
echo('WARNING: I couldnt open «'.$opts['peersfp'].'» for reading.'.N);
}
}
$peersf=@fopen($opts['peersfp'],'w');
function isempty($val) {
if (preg_match('/^\s*$/',$val)===1)
return(true);
else
return(false);
}
function waituntilonline() {
global $context;
$url='www.google.com';
while (false===($f=@fsockopen($url,80,$errno,$errstr,1))) {
echo(strftime('%c').' - WARNING: it seems we are offline :-('.N);
sleep(5);
}
fclose($f);
// echo(strftime('%c').' - it seems we are online! :-)'.N);
}
function updexarr() {
global $exarr, $opts;
if (!is_null($opts['excludefp'])) {
$f=@fopen($opts['excludefp'],'r');
if ($f!==false) {
$i=0;
$exarr=array();
while (!feof($f)) {
$i++;
$line=trim(fgets($f));
if (!isempty($line)) {
if (@preg_match($line,'foo')!==false)
$exarr[]=$line;
else
echo('WARNING: «'.$opts['excludefp'].'», line '.$i.': «'.$line.'» is not a valid regular expression.'.N);
}
}
} else {
echo('WARNING: I could not open «'.$opts['excludefp'].'» for reading.'.N);
}
}
}
function ckexarr($inst) {
global $exarr;
foreach ($exarr as $re)
if (preg_match($re,$inst)===1) return(true);
return(false);
}
function crawl($inst) {
global $insts, $deadinsts, $peersf, $context, $opts;
waituntilonline();
updexarr();
if (!isempty($inst) && !in_array($inst,$deadinsts)) {
// questo qui sotto è ridondante su un lancio normale, ma serve per evitare che l'istanza di partenza finisca nella lista 2 volte quando lo si lancia con "--restore"
if (!in_array($inst,$insts)) {
echo('«'.$inst.'» is not a known instance, I add it to the list of known instances.'.N);
$insts[]=$inst;
fwrite($peersf,$inst.N);
}
echo('«'.$inst.'»: trying to load instances peers ... ');
// $peers=@file_get_contents('https://'.$inst.'/api/v1/instance/peers',false,$context);
$peers=getfc('https://'.$inst.'/api/v1/instance/peers',$opts['timeout']);
if ($peers['cont']!=false) {
echo('OK :-)'.N);
$peers=json_decode($peers['cont'],true);
if (is_array($peers)) {
foreach ($peers as $peer) {
if (is_string($peer)) {
if (!ckexarr($peer)) {
if (!in_array($peer,$insts)) {
echo('>>> I will crawl «'.$peer.'».'.N);
crawl($peer);
}/* else {
echo('>>> I wont crawl «'.$peer.'» because I already did.'.N);
}*/
} else {
echo('>>> I wont crawl «'.$peer.'» because its name matches with an exclusion regex.'.N);
}
} else {
echo('>>> I wont crawl this peer because its name is not a string.'.N);
}
}
}
} else {
echo('ERROR :-( ['.$peers['emsg'].']'.N);
}
} elseif (isempty($inst)) {
echo('I WONT add nameless instances.'.N);
} else {
echo('I WONT crawl «'.$inst.'» because its a DEAD instance.'.N);
}
echo('~~~~~~~ Stats: '.count($insts).' known istances ~~~~~~~'.N);
}
crawl($opts['startinst']);
echo('DONE CRAWLING! :-)'.N);
shutdown(true);
exit(0);
?>

View file

@ -1,467 +0,0 @@
#!/usr/bin/php
<?php
/*
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
require(__DIR__.'/../include/getfc.php');
setlocale(LC_ALL,getenv('LANG'));
define('N',"\n");
$opts=array(
'inifp'=>'../sec/mustard.ini',
'startinst'=>'mastodon.social',
'peersfp'=>__DIR__.'/peers',
'restore'=>false,
'excludefp'=>null,
'timeout'=>5,
'verbose'=>false,
'excludedead'=>false
);
$help='peerscrawl.php
DESCRIPTION
This program tries to build a fairly complete list of mastodon instances.
SYNOPSIS
peerscrawl.php [options]
OPTIONS
-s, --startinst <domain>
Defines the first instance to crawl.
DEFAULT: «'.$opts['startinst'].'»
-p, --peersfp <file>
Defines the file into which the ordered list of instances will be saved.
DEFAULT: «'.$opts['peersfp'].'»
-r, --restore
If peers file already exists on programs start it will be loaded into
memory and each instance it contains will be considered “already
crawled”, thus allowing to “restore an interrupted crawling session”.
-e, --excludefp <file>
Defines a file containing exclusion rules: one regular expression per
line (empty lines are ignored). Any instance matching any defined regex
will be ignored by the program. Changes made to this file during program
execution will be taken into account.
-t, --timeout <seconds>
Defines the timeout in seconds for every connection attempt.
DEFAULT: «'.$opts['timeout'].'»
-v, --verbose
Be more verbose.
-d, --excludedead
Exclude instances marked as "Dead" in the database.
This program comes with ABSOLUTELY NO WARRANTY; for details see the source.
This is free software, and you are welcome to redistribute it under
certain conditions; see <http://www.gnu.org/licenses/> for details.'.N;
for ($i=1; $i<$argc; $i++) {
if (substr($argv[$i],0,1)=='-') {
switch($argv[$i]) {
case '-s':
case '--startinst':
if ($i+1>=$argc)
mexit('Option «'.$argv[$i].'» has to be followed by a domain name (use «-h» for more info).'.N,1);
$i++;
$opts['startinst']=$argv[$i];
break;
case '-p':
case '--peersfp':
if ($i+1>=$argc)
mexit('Option «'.$argv[$i].'» has to be followed by a files path (use «-h» for more info).'.N,1);
$i++;
$opts['peersfp']=$argv[$i];
break;
case '-r':
case '--restore':
$opts['restore']=true;
$i++;
break;
case '-e':
case '--excludefp':
if ($i+1>=$argc || !file_exists($argv[$i+1]) || !is_file($argv[$i+1]) || !is_readable($argv[$i+1]))
mexit('Option «'.$argv[$i].'» has to be followed by an existing, readable files path (use «-h» for more info).'.N,1);
$i++;
$opts['excludefp']=$argv[$i];
break;
case '-t':
case '--timeout':
if ($i+1>=$argc || preg_match('/^[0-9]+$/',$argv[$i+1])!==1)
mexit('Option «'.$argv[$i].'» has to be followed by a number of seconds (use «-h» for more info).'.N,1);
$i++;
$opts['timeout']=$argv[$i]+0;
break;
case '-v':
case '--verbose':
$opts['verbose']=true;
$i++;
break;
case '-d':
case '--excludedead':
$opts['excludedead']=true;
$i++;
break;
case '-h':
case '--help':
mexit($help,0);
break;
default:
mexit('Option «'.$argv[$i].'» is unknown (use «-h» for more info).'.N,1);
break;
}
}
}
function mexit($msg,$code) {
global $link;
if ($link) mysqli_close($link);
echo($msg);
exit($code);
}
function writestrange() {
global $softwares, $versions;
$f=fopen('softandvers.list','w');
fwrite($f,'All softwares: '.print_r($softwares,1));
fwrite($f,'Strange versions: '.print_r($versions,1));
fclose($f);
}
function shutdown($dosort) {
global $opts, $peersf, $insts;
if ($peersf) @fclose($peersf);
if ($dosort) {
$buc=count($insts);
$insts=array_unique($insts);
$auc=count($insts);
if ($buc!=$auc) echo('WARNING: $insts contained '.($buc-$auc).' duplicates, better check my code ;-)'.N);
echo('Saving ordered instances list into «'.$opts['peersfp'].'».'.N);
sort($insts);
$peersf=@fopen($opts['peersfp'],'w');
if ($peersf!==false) {
foreach ($insts as $inst)
fwrite($peersf,$inst.N);
fclose($peersf);
} else {
echo('Couldnt open «'.$opts['peersfp'].'» for writing.'.N);
}
}
writestrange();
}
// ATTENZIONE: se lo script viene pipato, tipo "script.php | tee script.log",
// la funzione viene eseguita, anche se l'output della stessa non viene mostrato
function signalHandler($signal) {
echo(N.'I got interrupted (signal: '.$signal.').'.N);
shutdown(false);
//touch('KILLED');
exit(2);
}
//declare(ticks=1);
pcntl_async_signals(true);
pcntl_signal(SIGTERM,'signalHandler');// Termination ('kill' was called)
pcntl_signal(SIGHUP,'signalHandler');// Terminal log-out
pcntl_signal(SIGINT,'signalHandler');// Interrupted (Ctrl-C is pressed)
$deadinsts=array();
if ($opts['excludedead']) {
$iniarr=@parse_ini_file($opts['inifp'])
or mexit('ERROR: I couldnt open «'.$opts['inifp'].'».'.N);
$link=@mysqli_connect($iniarr['db_host'],$iniarr['db_admin_name'],$iniarr['db_admin_password'],$iniarr['db_name'],$iniarr['db_port'],$iniarr['db_socket'])
or mexit('ERROR: I couldnt connect to MySQL server: '.mysqli_connect_error().N,2);
mysqli_set_charset($link,'utf8mb4')
or mexit('ERROR trying to set MySQL client charset: '.__LINE__.': '.mysqli_error($link).N,2);
$res=mysqli_query($link,'SELECT URI FROM Instances WHERE Dead=1')
or mexit('ERROR: '.__LINE__.': '.mysqli_error($link).N,2);
mysqli_close($link);
while ($row=mysqli_fetch_assoc($res))
$deadinsts[]=$row['URI'];
unset($res);
}
$contextopts=array(
'http'=>array(
'timeout'=>$opts['timeout']
),
'socket'=>array(
'tcp_nodelay'=>true
)
);
$context=stream_context_create($contextopts);
$insts=array();
$ainsts=array();
$softwares=array();
$versions=array();
$exarr=array();
if ($opts['restore']) {
if (file_exists($opts['peersfp']) && is_file($opts['peersfp']) && is_readable($opts['peersfp'])) {
echo('Loading «'.$opts['peersfp'].'».'.N);
$insts=file($opts['peersfp'],FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
} else {
echo('WARNING: I couldnt open «'.$opts['peersfp'].'» for reading.'.N);
}
}
$peersf=@fopen($opts['peersfp'],'w');
function isempty($val) {
if (preg_match('/^\s*$/',$val)===1)
return(true);
else
return(false);
}
function waituntilonline() {
global $context;
$url='www.google.com';
while (false===($f=@fsockopen($url,80,$errno,$errstr,1))) {
echo(strftime('%c').' - WARNING: it seems we are offline :-('.N);
sleep(5);
}
fclose($f);
// echo(strftime('%c').' - it seems we are online! :-)'.N);
}
function updexarr() {
global $exarr, $opts;
if (!is_null($opts['excludefp'])) {
$f=@fopen($opts['excludefp'],'r');
if ($f!==false) {
$i=0;
$exarr=array();
while (!feof($f)) {
$i++;
$line=trim(fgets($f));
if (!isempty($line)) {
if (@preg_match($line,'foo')!==false)
$exarr[]=$line;
else
echo('WARNING: «'.$opts['excludefp'].'», line '.$i.': «'.$line.'» is not a valid regular expression.'.N);
}
}
} else {
echo('WARNING: I could not open «'.$opts['excludefp'].'» for reading.'.N);
}
}
}
function ckexarr($inst) {
global $exarr;
foreach ($exarr as $re)
if (preg_match($re,$inst)===1) return(true);
return(false);
}
function ghs($b) {
$o='';
if (round($b/1024,1) >= 1) $o=round($b/1024,2).' KiB';
if (round($b/1048576,1) >= 1) $o=round($b/1048576,2).' MiB';
if (round($b/1073741824,1) >= 1) $o=round($b/1073741824,2).' GiB';
if ($o=='') $o=$b.' B';
return($o);
}
function ght($ts,$fa=null,$sd=2) {
/*
* $ts è una quantità di secondi (può essere float)
* $fa è il formato, tipo così:
* $fa=array(' giorno, § giorni, ',' ora, § ore, ',' minuto, § minuti, ',' secondo§ secondi');
* $sd è il numero di decimali a cui si vuole arrotondare
*/
if ($fa==null)
// $fa=array('g, §g, ','o, §o, ','m, §m, ','s§s');
$fa=array('d, §d, ','h:§h:','m:§m:','s§s');
foreach ($fa as $k=>$v)
$fa[$k]=explode('§',$v);
$out='';
//giorni
$x=floor($ts/86400);
if ($x>0)
($x==1) ? $out.=$x.$fa[0][0] : $out.=$x.$fa[0][1];
$ts=$ts-$x*86400;
//ore
$x=floor($ts/3600);
if ($x>0)
($x==1) ? $out.=$x.$fa[1][0] : $out.=$x.$fa[1][1];
$ts=$ts-$x*3600;
//minuti
$x=floor($ts/60);
if ($x>0)
($x==1) ? $out.=$x.$fa[2][0] : $out.=$x.$fa[2][1];
$ts=$ts-$x*60;
//secondi
$x=round($ts,$sd);
($x==1) ? $out.=$x.$fa[3][0] : $out.=$x.$fa[3][1];
return($out);
}
function validhostname($url) {
$hostname=preg_replace('#/.*#','',$url);
$hostname=preg_replace('#:[0-9]+$#','',$hostname);
$hostname=idn_to_ascii($hostname);
// echo($hostname.N);
if (strlen($hostname)>253) return(false);
$labels=explode('.',$hostname);
foreach($labels as $label) {
$len=strlen($label);
if ($len<1 || $len>63) return(false);
if (preg_match('#^-#',$label)==1) return(false);
if (preg_match('#-$#',$label)==1) return(false);
//if (preg_match('#--#',$label)==1) return(false);
if (preg_match('#^[a-zA-Z0-9-]+$#',$label)!==1) return(false);
}
return(true);
}
function ismastinst($uri) {
global $softwares, $versions, $opts;
sleep(1);
$nodeinfo=getfc('https://'.$uri.'/nodeinfo/2.0.json',$opts['timeout']);
if ($nodeinfo['cont']!==false) {
$nodeinfo=json_decode($nodeinfo['cont'],true);
if (is_array($nodeinfo) && array_key_exists('software',$nodeinfo) && array_key_exists('name',$nodeinfo['software'])) {
$name=$nodeinfo['software']['name'];
if (array_key_exists($name,$softwares)) {
$softwares[$name]['count']++;
$softwares[$name]['lasturi']=$uri;
} else {
$softwares[$name]=array('count'=>1,'lasturi'=>$uri);
}
if (preg_match('#^(mastodon|corgidon)#',$name)===1)
return(true);
else
return(false);
}
}
sleep(1);
$instinfo=getfc('https://'.$uri.'/api/v1/instance',$opts['timeout']);
if ($instinfo['cont']===false) {
return(false);
} else {
$instinfo=json_decode($instinfo['cont'],true);
if (!is_array($instinfo)) {
return(false);
} elseif (!array_key_exists('version',$instinfo)) {
$versions[$uri]=$instinfo;
return(false);
} elseif (array_key_exists('pleroma',$instinfo)) {
return(false);
} elseif (preg_match('#(compatible|pleroma|pixelfed)#i',$instinfo['version'])==1) {
return(false);
} elseif (preg_match('#^[0-9]+\.[0-9]+\.[0-9]+#',$instinfo['version'])!==1) {
$versions[$uri]=$instinfo['version'];
return(false);
} else {
return(true);
}
}
}
//$url='www.team.starschlep.com/'; if (validhostname($url)) echo('OK: '.$url.N); else echo('KO: '.$url.N); die();
function crawl($list,$id) {
global $ainsts, $insts, $deadinsts, $peersf, $opts, $tini;
echo('~~~~~~~ BEGINNING '.$id.' ~~~~~~~'.N);
waituntilonline();
updexarr();
foreach ($list as $inst)
if (!in_array($inst,$ainsts)) $ainsts[]=$inst;
$nlist=array();
$c=count($list);
$i=0;
$rtini=time();
foreach ($list as $inst) {
if (!in_array($inst,$ainsts)) $ainsts[]=$inst;
$i++;
$now=time();
$rtela=$now-$rtini;
echo(N.'>>> '.$inst.N);
echo('@@@ Round '.$id.', '.$i.'/'.$c.': TET: '.ght($now-$tini,null,0).'; ETR of this round: '.ght($rtela/$i*$c-$rtela,null,0).'; using '.ghs(memory_get_usage(true)).' mem. (peak: '.ghs(memory_get_peak_usage(true)).'); '.count($insts).' mastodon insts; '.count($nlist).' insts in $nlist').N;
if (ckexarr($inst)) {
echo('NOTICE: I wont try and load peers from «'.$inst.'» since its in the exclude list.'.N);
} elseif ($opts['excludedead'] && in_array($inst,$deadinsts)) {
echo('NOTICE: I wont try and load peers from «'.$inst.'» since its DEAD.'.N);
} else {
echo('Trying to load «'.$inst.s peers... ');
$peers=getfc('https://'.$inst.'/api/v1/instance/peers',$opts['timeout']);
if ($peers['cont']===false) {
echo('ERROR: '.$peers['emsg'].N);
} else {
$peers=json_decode($peers['cont'],true);
if (!is_array($peers)) {
echo('ERROR: $peers is not an array (its type is '.gettype($peers).').'.N);
} else {
echo('LOADED!'.N);
if (in_array($inst,$insts)) {
echo('NOTICE: «'.$inst.'» is not a new instance (it was already in $insts).'.N);
} else {
echo('Verifying whether «'.$inst.'» is a mastodon instance... ');
if (!ismastinst($inst)) {
echo('NO, ITS NOT :-('.N);
} else {
echo('YES, IT IS! :-)'.N);
$insts[]=$inst;
fwrite($peersf,$inst.N);
}
writestrange();
}
foreach ($peers as $peer) {
if (!is_string($peer)) {
echo(' ERROR: I wont add this peer to next round list because its name is not a string.'.N);
} elseif (!validhostname($peer)) {
echo(' ERROR: I wont add «'.$peer.'» to next round list because its not a valid hostname.'.N);
} elseif (ckexarr($peer)) {
echo(' NOTICE: I wont add «'.$peer.'» to next round list because its name matches with an exclusion regex.'.N);
} elseif (in_array($peer,$ainsts)) {
if ($opts['verbose'])
echo(' NOTICE: I wont add «'.$peer.'» to next round list because its already in $ainsts.'.N);
// tutto il blocco qui sotto diventa ridondante ora che uso $ainsts e lo popolo a inizio funzione
/*} elseif (in_array($peer,$list)) {
if ($opts['verbose'])
echo(' NOTICE: I wont add «'.$peer.'» to next round list because its already in $list.'.N);
} elseif (in_array($peer,$insts)) {
if ($opts['verbose'])
echo(' NOTICE: I wont add «'.$peer.'» to next round list because its already in $insts.'.N);
} elseif (in_array($peer,$nlist)) {
if ($opts['verbose'])
echo(' NOTICE: I wont add «'.$peer.'» to next round list because its already in $nlist.'.N);*/
} elseif ($opts['excludedead'] && in_array($peer,$deadinsts)) {
if ($opts['verbose'])
echo(' NOTICE: I wont add «'.$peer.'» to next round list because its DEAD.'.N);
} else {
//EVVIVA!
echo(' ADDING PEER «'.$peer.'» to to next round list.'.N);
$nlist[]=$peer;
}
}
}
}
}
}
echo('~~~~~~~ FINISHED '.$id.' ~~~~~~~'.N);
if (count($nlist)>0)
crawl($nlist,$id+1);
else
echo('$nlist is empty.'.N);
}
$tini=time();
crawl(array($opts['startinst']),1);
echo('DONE CRAWLING! :-)'.N);
shutdown(true);
exit(0);
?>

View file

@ -1,5 +0,0 @@
mastodon.bida.im
mastodon.cisti.org
nebbia.fail
stereodon.social
snapj.saja.freemyip.com