chew/server/helper.js

160 lines
No EOL
4.9 KiB
JavaScript

// const iconv = require('iconv-lite')
import iconv from 'iconv-lite'
import FeedParser from 'feedparser'
import { parseHTML } from 'linkedom'
import fetch from 'node-fetch'
import createDOMPurify from 'dompurify'
import { JSDOM } from 'jsdom'
const window = new JSDOM('').window
const DOMPurify = createDOMPurify(window)
export function getParams (str) {
const params = str.split(';').reduce((params, param) => {
const parts = param.split('=').map(part => part.trim())
if (parts.length === 2) {
params[parts[0]] = parts[1]
}
return params
}, {})
return params
}
DOMPurify.addHook('beforeSanitizeElements', node => {
if (node.hasAttribute && node.hasAttribute('href')) {
const href = node.getAttribute('href')
const text = node.textContent
// remove FB tracking param
if (href.includes('fbclid=')) {
try {
const url = new URL.URL(href)
url.searchParams.delete('fbclid')
node.setAttribute('href', url.href)
if (text.includes('fbclid=')) {
node.textContent = url.href
}
} catch (e) {
return node
}
}
}
return node
})
export function parseContent (html) {
console.error(html)
const saneHTML = DOMPurify.sanitize(html, {
CUSTOM_ELEMENT_HANDLING: {
tagNameCheck: /^(gancio-.*|display-feed)/,
attributeNameCheck: /(feed|id|theme)/,
allowCustomizedBuiltInElements: true, // allow customized built-ins
},
ALLOWED_TAGS: ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'br', 'i', 'span', 'img', 'figure',
'h6', 'b', 'a', 'li', 'ul', 'ol', 'code', 'blockquote', 'u', 's', 'strong'],
ALLOWED_ATTR: ['href', 'target', 'src']
})
console.error(saneHTML)
// const images = window.document.getElementsByTagName('img')
const { document } = new JSDOM(html).window
const img = document.querySelector('img[src]')
console.error('sono dentro il parsing!')
console.error(img)
let image
if (img) {
image = img.getAttribute('src')
}
return { html: saneHTML, image }
}
export function maybeTranslate (res, charset) {
let iconvStream
// Decode using iconv-lite if its not utf8 already.
if (!iconvStream && charset && !/utf-*8/i.test(charset)) {
try {
iconvStream = iconv.decodeStream(charset)
console.log('Converting from charset %s to utf-8', charset)
// iconvStream.on('error', done)
// If we're using iconvStream, stream will be the output of iconvStream
// otherwise it will remain the output of request
res = res.pipe(iconvStream)
} catch(err) {
res.emit('error', err)
}
}
return res
}
/**
* @param {*} URL
* @description Check if URL is a valid atom/rss feed or in case it's an html search for a public feed
* then retrieve feed detailed information
* @returns An object with feed information (title, url)
*/
export async function getFeedDetails (URL) {
// Get a response stream
process.env.NODE_TLS_REJECT_UNAUTHORIZED = 0
const res = await fetch(URL,
{
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36',
'accept': 'text/html,application/xhtml+xml'
})
// Handle our response and pipe it to feedparser
console.error(res.status)
if (res.status !== 200) throw new Error('Bad status code')
const contentType = res.headers.get('content-type')
if (contentType.includes('html')) {
console.error('parse html')
const { document } = parseHTML(await res.text())
const links = document.querySelectorAll('link[rel=alternate]')
const feeds = []
links.forEach(link => {
const type = link.getAttribute('type')
const href = link.getAttribute('href')
if (type && href) {
feeds[type] = feeds[type] || href
}
})
console.error(feeds)
if (feeds['application/atom+xml']) {
return getFeedDetails(feeds['application/atom+xml'])
} else if (feeds['application/rss+xml']) {
return getFeedDetails(feeds['application/rss+xml'])
} else {
throw new Error(feeds)
}
}
console.error('parse atom feed')
// feedparser.on('error', e => manager.sourceError(e, source))
// feedparser.on('end', e => manager.sourceCompleted(source))
return new Promise((resolve, reject) => {
const feedparser = new FeedParser()
feedparser.on('readable', () => {
// console.error('sono dentro readable!', feedparser.read())
feedparser.meta.URL = URL
return resolve(feedparser.meta)
})
feedparser.on('error', reject)
feedparser.on('end', resolve)
// Handle our response and pipe it to feedparser
const charset = getParams(res.headers.get('content-type') || '').charset
console.error('chartset -> ', charset)
let responseStream = maybeTranslate(res.body, charset)
// And boom goes the dynamite
responseStream.pipe(feedparser)
})
}
// module.exports = { getParams, getFeedDetails, maybeTranslate }