Compare commits

...

13 commits

Author SHA1 Message Date
boyska
5dff6c5716 [FilterMore] sort 2021-10-16 00:56:19 +02:00
boyska
97da0be9f0 [docker-compose] error_log 2021-10-16 00:13:01 +02:00
boyska
0089605181 [FilterMore] has_media 2021-10-16 00:12:32 +02:00
boyska
82dd4994ce [FilterMore] include enclosures in output 2021-10-16 00:12:29 +02:00
boyska
d1c224b483 [FilterMore] NEW bridge for advanced filtering 2021-10-15 23:34:51 +02:00
boyska
77e4d3a00c docker-compose dev environment 2021-10-10 00:54:26 +02:00
baz
6533d42be9 RadioRaiBridge: FIX missing <link/> 2019-12-10 16:27:20 +01:00
baz
6c92bfc2d3 [RadioRai] get "final" URLs
Without this, every item has an enclosure with the same basename and
different query parameters.
This is correct, but some podcast readers (gPodder) mess it up, writing
every download to the same file.
2019-10-30 12:21:44 +01:00
baz
fd02965fe9 [RadioRai] NEW podcast from RadioRai (Italian) 2019-10-30 11:02:25 +01:00
7e082083b2 [AutoPodcaster] FIX feeds w/o url, but w/ content 2019-09-26 11:01:39 +02:00
1a4aea48df [AutoPodcasterBridge] fix metadata 2019-09-17 12:40:06 +02:00
9529e44326 [AutoPodcasterBridge] FIX nested lists 2019-09-17 12:37:15 +02:00
312250038e new bridge: AutoPodcasterBridge
if you have a feed of a radio show which does not seem to be a valid
podcast, this bridge will transformt it into a valid podcast
2019-09-17 12:31:06 +02:00
4 changed files with 432 additions and 0 deletions

View file

@ -0,0 +1,113 @@
<?php
class AutoPodcasterBridge extends FeedExpander {
const MAINTAINER='boyska';
const NAME='Auto Podcaster';
const URI = '';
const CACHE_TIMEOUT = 300; // 5 minuti
const DESCRIPTION='Make a "multimedia" podcast out of a normal feed';
const PARAMETERS = array('url' => array(
'url' => array(
'name' => 'URL',
'required' => true
)));
private function archiveIsAudioFormat($formatString) {
return strpos($formatString, 'MP3') !== false ||
strpos($formatString, 'Ogg') === 0;
}
private function extractAudio($dom) {
$audios = [];
foreach($dom->find('audio') as $audioEl) {
$sources = [];
if($audioEl->src !== false) {
$sources[] = $audioEl->src;
}
foreach($audioEl->find('source') as $sourceEl) {
$sources[] = $sourceEl->src;
}
if($sources) {
$audios[$sources[0]] = ['sources' => $sources];
}
}
return $audios;
}
private function extractIframeArchive($dom) {
$audios = [];
foreach($dom->find('iframe') as $iframeEl) {
if(strpos($iframeEl->src, "https://archive.org/embed/") === 0) {
$listURL = preg_replace("/\/embed\//", "/details/", $iframeEl->src, 1) . "?output=json";
$baseURL = preg_replace("/\/embed\//", "/download/", $iframeEl->src, 1);
$list = json_decode(file_get_contents($listURL));
$audios = [];
foreach($list->files as $name =>$data) {
if($data->source === 'original' &&
$this->archiveIsAudioFormat($data->format)) {
$audios[$baseURL . $name] = ['sources' => [$baseURL . $name]];
}
}
foreach($list->files as $name =>$data) {
if($data->source === 'derivative' &&
$this->archiveIsAudioFormat($data->format) &&
isset($audios[$baseURL . "/" . $data->original])) {
$audios[$baseURL . "/" . $data->original]['sources'][] = $baseURL . $name;
}
}
}
}
return $audios;
}
protected function parseItem($newItem){
$item = parent::parseItem($newItem);
$dom = getSimpleHTMLDOMCached($item['uri']);
$audios = [];
if ($dom !== false) {
/* 1st extraction method: by "audio" tag */
$audios = array_merge($audios, $this->extractAudio($dom));
/* 2nd extraction method: by "iframe" tag */
$audios = array_merge($audios, $this->extractIframeArchive($dom));
}
elseif($item['content'] !== NULL) {
/* 1st extraction method: by "audio" tag */
$audios = array_merge($audios, $this->extractAudio(str_get_html($item['content'])));
/* 2nd extraction method: by "iframe" tag */
$audios = array_merge($audios,
$this->extractIframeArchive(str_get_html($item['content'])));
}
if(count($audios) === 0) {
return null;
}
$item['enclosures'] = array_values($audios);
$item['enclosures'] = [];
foreach(array_values($audios) as $audio) {
$item['enclosures'][] = $audio['sources'][0];
}
return $item;
}
public function collectData(){
if($this->getInput('url') && substr($this->getInput('url'), 0, strlen('http')) !== 'http') {
// just in case someone find a way to access local files by playing with the url
returnClientError('The url parameter must either refer to http or https protocol.');
}
$this->collectExpandableDatas($this->getURI());
}
public function getName(){
if(!is_null($this->getInput('url'))) {
return self::NAME . ' : ' . $this->getInput('url');
}
return parent::getName();
}
public function getURI(){
return $this->getInput('url');
}
}

View file

@ -0,0 +1,241 @@
<?php
class FilterMoreBridge extends FeedExpander {
const MAINTAINER = 'boyska';
const NAME = 'FilterMore';
const CACHE_TIMEOUT = 2;
const DESCRIPTION = 'Filters a feed of your choice';
const URI = 'https://git.lattuta.net/boyska/rss-bridge';
const PARAMETERS = [
[
'url' => array(
'name' => 'Feed URL',
'required' => true,
),
'conj_type' => array(
'name' => 'Conjunction type type',
'type' => 'list',
'required' => false,
'values' => array(
'All conditions must be met' => 'and',
'Any condition must be met' => 'or',
),
'defaultValue' => 'permit',
),
'title_re' => array(
'name' => 'Filter item title (regular expression, see php.net/pcre_match for details)',
'required' => false,
'exampleValue' => '/breaking\ news/i',
),
'body_re' => array(
'name' => 'Filter body (regular expression)',
'required' => false,
),
'author_re' => array(
'name' => 'Filter author (regular expression)',
'required' => false,
'exampleValue' => '/(technology|politics)/i',
),
'newer_than' => array(
'name' => 'Filter date: ok if newer than the value (see php.net/strtotime for details)',
'required' => false,
'exampleValue' => '-14 days',
),
'older_than' => array(
'name' => 'Filter date: ok if older than the value (see php.net/strtotime for details)',
'required' => false,
'exampleValue' => '-1 hour',
),
'has_media' => array(
'name' => 'Has at least 1 media inside',
'type' => 'checkbox',
'required' => false,
'defaultValue' => false,
),
'invert_filter' => array(
'name' => 'Invert filter result',
'type' => 'checkbox',
'required' => false,
'defaultValue' => false,
),
'sort_by' => [
'name' => 'Sort by',
'type' => 'list',
'required' => true,
'values' => [
"Don't sort" => 'none',
'Date' => 'timestamp',
'Title' => 'title',
'Random' => 'random',
],
'defaultValue' => 'date',
],
'sort_dir' => [
'name' => 'Sort direction',
'type' => 'list',
'required' => true,
'values' => [
'Ascending' => 'asc',
'Descending' => 'desc',
],
'defaultValue' => 'asc',
],
]];
protected function parseItem($newItem){
$item = parent::parseItem($newItem);
$item['enclosures'] = [];
if(isset($newItem->enclosure)) {
foreach($newItem->enclosure as $encl) {
$serialized = [];
foreach($encl->attributes() as $key => $value) {
$serialized[$key] = (string)$value;
}
$serialized["length"] = intval($serialized["length"]);
$item['enclosures'][] = $serialized;
}
}
if(isset($newItem->link)) {
foreach($newItem->link as $el) {
if(((string)$el['rel']) !== 'enclosure') continue;
$serialized = [];
$serialized['url'] = (string)$el['href'];
$item['enclosures'][] = $serialized;
}
}
$filters = ['filterByTitle', 'filterByBody', 'filterByAuthor', 'filterByDateNewer', 'filterByDateOlder', 'filterByMedia'];
$results = [];
foreach($filters as $filter) {
$filter_res = $this->$filter($item);
if($filter_res === null) continue;
$results[] = $filter_res;
}
$old_enclosures = $item['enclosures'];
$item['enclosures'] = [];
foreach($old_enclosures as $e) {
$item['enclosures'][] = $e['url'];
}
if(count($results) === 0) {
return $item;
}
if($this->getConjType() === 'and') {
$result = !in_array(false, $results);
} else { // or
$result = in_array(true, $results);
}
if($this->getInvertResult()) {
$result = !$result;
}
if($result)
return $item;
else
return null;
}
protected function sortItemKey($item) {
$sort_by = $this->getInput('sort_by');
$key = $item[$sort_by];
return $key;
}
public function collectExpandableDatas($url, $maxItems = -1){
parent::collectExpandableDatas($url, $maxItems);
if($this->getInput('sort_by') === 'random') {
shuffle($this->items);
} elseif($this->getInput('sort_by') !== 'none') {
usort($this->items, function($itemA, $itemB) {
$valA = $this->sortItemKey($itemA);
$valB = $this->sortItemKey($itemB);
$cmp = strcmp($valA, $valB);
return $cmp;
});
}
if($this->getInput('sort_dir') === 'desc')
$this->items = array_reverse($this->items);
}
private function cmp($a, $b) {
if($a > $b) return 1;
if($a < $b) return -1;
return 0;
}
private function filterByFieldRegexp($field, $re){
if($re === "") return null;
if(preg_match($re, $field)) {
return true;
}
return false;
}
protected function filterByTitle($item){
$re = $this->getInput('title_re');
return $this->filterByFieldRegexp($item['title'], $re);
}
protected function filterByBody($item){
$re = $this->getInput('body_re');
return $this->filterByFieldRegexp($item['content'], $re);
}
protected function filterByAuthor($item){
$re = $this->getInput('author_re');
return $this->filterByFieldRegexp($item['author'], $re);
}
private function filterByDate($item, $input, $expected){
$val = $this->getInput($input);
if($val === "") return null;
$ts = strtotime($val);
if($ts === false) {
throw new Exception("Invalid time specification: " . $val);
}
$cmp = $this->cmp($item['timestamp'], $ts); // 1 if newer, -1 if older
return $cmp === $expected;
}
protected function filterByDateNewer($item){
return $this->filterByDate($item, 'newer_than', 1);
}
protected function filterByDateOlder($item){
return $this->filterByDate($item, 'older_than', -1);
}
protected function filterByMedia($item) {
if(!$this->getInput('has_media')) return null;
if(count($item['enclosures']) > 0) return true;
return false;
}
protected function getConjType(){
return $this->getInput('conj_type');
}
protected function getInvertResult(){
return $this->getInput('invert_filter');
}
public function getURI(){
$url = $this->getInput('url');
if(empty($url)) {
$url = parent::getURI();
}
return $url;
}
public function collectData(){
if($this->getInput('url') && substr($this->getInput('url'), 0, strlen('http')) !== 'http') {
// just in case someone find a way to access local files by playing with the url
returnClientError('The url parameter must either refer to http or https protocol.');
}
try{
$this->collectExpandableDatas($this->getURI());
} catch (HttpException $e) {
$this->collectExpandableDatas($this->getURI());
}
}
}

View file

@ -0,0 +1,62 @@
<?php
class RadioRaiBridge extends BridgeAbstract {
const MAINTAINER = 'boyska';
const NAME = 'Radio Rai';
const URI = 'https://www.raiplayradio.it';
const CACHE_TIMEOUT = 900; // 15min
const DESCRIPTION = 'Segui le trasmissioni radio rai con feed/podcast valido';
const PARAMETERS = array( array(
'txname' => array(
'name' => 'txname',
'required' => true
)
));
private function getFinalURL($url) {
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_NOBODY, true);
curl_setopt($ch, CURLOPT_HEADER, true);
$ret = curl_exec($ch);
if($ret === FALSE) {
return null;
}
$redirect = curl_getinfo($ch, CURLINFO_REDIRECT_URL);
if($redirect === false) return $url;
return $redirect;
}
public function collectData(){
$html = getSimpleHTMLDOM($this->getURI())
or returnServerError('No results for this query.');
foreach($html->find('[data-mediapolis]') as $episode) {
// var_dump($episode);
$title = $episode->getAttribute('data-title');
if($title === FALSE) { continue; }
$audiourl = $episode->getAttribute('data-mediapolis');
$item = array();
$item['author'] = $this->getInput('txname');
$item['title'] = $title;
$item['content'] = $episode->plaintext;
$item['enclosures'] = [ $this::getFinalURL($audiourl) ];
$item['uri'] = $this::URI . $episode->getAttribute('data-href');
$this->items[] = $item;
}
}
public function getURI(){
return 'https://www.raiplayradio.it/programmi/' . $this->getInput('txname') . '/archivio/puntate/';
}
public function getName(){
if($this->getInput('txname')) {
return 'Radio Rai - ' . $this->getInput('txname');
}
return parent::getName();
}
}

16
docker-compose.yaml Normal file
View file

@ -0,0 +1,16 @@
version: "3"
services:
php:
build:
context: .
environment:
- 'PHP_DISPLAY_ERRORS=0'
- 'PHP_LOG_ERRORS=1'
- 'PHP_ERROR_REPORTING=E_ALL'
- 'PHP_REPORT_MEMLEAKS=1'
volumes:
- ./:/app/public/
ports:
- "8001:80"