2022-01-21 11:15:39 +01:00
|
|
|
// import p from '@prisma/client'
|
|
|
|
// const { PrismaClient } = p
|
|
|
|
import fetch from 'node-fetch'
|
|
|
|
import FeedParser from 'feedparser'
|
|
|
|
|
|
|
|
// const prisma = new PrismaClient()
|
|
|
|
|
|
|
|
import { getParams, maybeTranslate, parseContent } from './helper.js'
|
|
|
|
|
|
|
|
// let fetch
|
|
|
|
// import('node-fetch').then(f => {
|
|
|
|
// fetch = f
|
|
|
|
// })
|
|
|
|
// import fetch from 'node-fetch'
|
|
|
|
// const FeedParser = require('feedparser')
|
|
|
|
|
|
|
|
// // get('https://cavallette.noblogs.org/feed/atom')
|
|
|
|
const manager = {
|
|
|
|
/**
|
|
|
|
* check if post is new, updated or unknown
|
|
|
|
* @param {Post} post
|
|
|
|
*/
|
|
|
|
async isPostNew (post) {
|
|
|
|
// console.error(post.link)
|
|
|
|
const ret = await prisma.post.findUnique({ where: { URL: post.link } })
|
|
|
|
return !ret
|
|
|
|
},
|
|
|
|
isValid (post) {
|
|
|
|
return true
|
|
|
|
},
|
|
|
|
async sourceError (err, source) {
|
|
|
|
try {
|
|
|
|
await prisma.source.update({ where: { id: source.id }, data: { status: 'WARNING', lastError: String(err) } })
|
|
|
|
} catch (e) {
|
|
|
|
console.error(source, e)
|
|
|
|
}
|
|
|
|
},
|
|
|
|
async sourceCompleted (source) {
|
|
|
|
try {
|
|
|
|
await prisma.source.update({ where: { id: source.id }, data: { status: 'OK', lastError: null }})
|
|
|
|
} catch(e) {
|
|
|
|
console.error(source, e)
|
|
|
|
}
|
|
|
|
},
|
|
|
|
async get (source) {
|
|
|
|
try {
|
|
|
|
|
|
|
|
// Get a response stream
|
|
|
|
const res = await fetch(source.URL,
|
|
|
|
{
|
|
|
|
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36',
|
|
|
|
'accept': 'text/html,application/xhtml+xml'
|
|
|
|
})
|
|
|
|
|
|
|
|
// Setup feedparser stream
|
|
|
|
const feedparser = new FeedParser()
|
|
|
|
feedparser.on('error', e => manager.sourceError(e, source))
|
|
|
|
feedparser.on('end', e => manager.sourceCompleted(source))
|
|
|
|
feedparser.on('readable', async () => {
|
|
|
|
let post
|
|
|
|
while(post = feedparser.read()) {
|
|
|
|
// validate post
|
|
|
|
if (!manager.isValid(post)) return
|
|
|
|
|
|
|
|
// check if already exist and is not updated
|
|
|
|
if (!await manager.isPostNew(post)) return
|
|
|
|
try {
|
|
|
|
|
|
|
|
// dompurify
|
|
|
|
let { html, image } = parseContent(post.description || post.summary)
|
|
|
|
// console.error(post.enclosures)
|
|
|
|
const enclosuresImages = post.enclosures.filter(e => e.type.includes('image'))
|
|
|
|
image = enclosuresImages.length ? enclosuresImages[0].url : image
|
|
|
|
|
2022-02-14 13:06:33 +01:00
|
|
|
const data = {
|
2022-01-21 11:15:39 +01:00
|
|
|
date: post.pubdate,
|
|
|
|
title: post.title,
|
|
|
|
URL: post.link,
|
|
|
|
content: html,
|
|
|
|
image,
|
|
|
|
summary: post.summary,
|
2022-02-14 13:06:33 +01:00
|
|
|
source: { connect: { id: source.id } }
|
|
|
|
}
|
|
|
|
|
|
|
|
if (post.categories.length) {
|
|
|
|
data.tags = { connectOrCreate:
|
|
|
|
post.categories.map(name => ({
|
|
|
|
create: { name },
|
|
|
|
where: { name }
|
|
|
|
}))}
|
|
|
|
}
|
|
|
|
|
|
|
|
await prisma.post.create({ include: { tags: true }, data })
|
|
|
|
|
2022-01-21 11:15:39 +01:00
|
|
|
} catch (e) { console.error(e) }
|
|
|
|
// console.log(post.title)
|
|
|
|
// console.log(JSON.stringify(post, ' ', 4))
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
// Handle our response and pipe it to feedparser
|
|
|
|
if (res.status !== 200) throw new Error('Bad status code')
|
|
|
|
const charset = getParams(res.headers.get('content-type') || '').charset
|
|
|
|
let responseStream = res.body
|
|
|
|
responseStream = maybeTranslate(responseStream, charset)
|
|
|
|
|
|
|
|
// And boom goes the dynamite
|
|
|
|
responseStream.pipe(feedparser)
|
|
|
|
// res.body.pipe(feedparser)
|
|
|
|
} catch (e) {
|
|
|
|
// console.error('sono qui', e)
|
|
|
|
manager.sourceError(e, source)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// const Queue = require('bull');
|
|
|
|
// import pkg from 'bullmq'
|
|
|
|
import Queue from 'bull'
|
|
|
|
import { PrismaClient } from '@prisma/client'
|
|
|
|
// import fetch from 'node-fetch'
|
|
|
|
// import FeedParser from 'feedparser'
|
|
|
|
|
|
|
|
const prisma = new PrismaClient()
|
|
|
|
|
|
|
|
// import { getParams, maybeTranslate } from './helpers.mjs'
|
|
|
|
|
|
|
|
let queue
|
|
|
|
|
|
|
|
export async function add (s) {
|
2022-02-14 13:06:33 +01:00
|
|
|
queue.add(s, { jobId: s.id, repeat: { every: 100000 } })
|
2022-01-21 11:40:58 +01:00
|
|
|
manager.get(s)
|
2022-01-21 11:15:39 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
async function main () {
|
|
|
|
queue = new Queue('foo6', { limiter: { max : 10, duration: 20000 } })
|
|
|
|
queue.clean(1000)
|
|
|
|
await queue.obliterate({ force: true });
|
|
|
|
|
|
|
|
queue.process(job => manager.get(job.data))
|
|
|
|
|
|
|
|
const sources = await prisma.source.findMany()
|
|
|
|
// sources.forEach(manager.get)
|
|
|
|
// manager.get()
|
2022-02-14 13:06:33 +01:00
|
|
|
sources.forEach( s => queue.add(s, { jobId: s.id, repeat: { every: 100000 } }))
|
2022-01-21 11:15:39 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
main()
|
|
|
|
// manager.main()
|