Merge branch 'bridges2feeds' of https://framagit.org/peetah/rss-bridge
This commit is contained in:
commit
2331c4fbab
17 changed files with 222 additions and 637 deletions
|
@ -1,66 +1,11 @@
|
||||||
<?php
|
<?php
|
||||||
#ini_set('display_errors', 'On');
|
require_once('WordPressBridge.php');
|
||||||
#error_reporting(E_ALL);
|
|
||||||
class ArstechnicaBridge extends BridgeAbstract {
|
|
||||||
|
|
||||||
const MAINTAINER = "prysme";
|
class ArstechnicaBridge extends WordPressBridge {
|
||||||
const NAME = "ArstechnicaBridge";
|
|
||||||
const URI = "http://arstechnica.com";
|
|
||||||
const DESCRIPTION = "The PC enthusiast's resource. Power users and the tools they love, without computing religion";
|
|
||||||
|
|
||||||
function StripWithDelimiters($string, $start, $end) {
|
|
||||||
while (strpos($string, $start) !== false) {
|
|
||||||
$section_to_remove = substr($string, strpos($string, $start));
|
|
||||||
$section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
|
|
||||||
$string = str_replace($section_to_remove, '', $string);
|
|
||||||
} return $string;
|
|
||||||
}
|
|
||||||
|
|
||||||
function StripCDATA($string) {
|
|
||||||
$string = str_replace('<![CDATA[', '', $string);
|
|
||||||
$string = str_replace(']]>', '', $string);
|
|
||||||
return $string;
|
|
||||||
}
|
|
||||||
|
|
||||||
function ExtractContent($url) {
|
|
||||||
#echo $url;
|
|
||||||
$html2 = $this->getSimpleHTMLDOM($url);
|
|
||||||
|
|
||||||
$text = $html2->find("section[id='article-guts']", 0);
|
|
||||||
/*foreach ($text->find('<aside id="social-left">') as $node)
|
|
||||||
{ $node = NULL; }*/
|
|
||||||
$text = $this->StripWithDelimiters($text->innertext,'<aside id="social-left">','</aside>');
|
|
||||||
$text = $this->StripWithDelimiters($text,'<figcaption class="caption">','</figcaption>');
|
|
||||||
$text = $this->StripWithDelimiters($text,'<div class="gallery shortcode-gallery">','</div>');
|
|
||||||
//error_log("ICI", 0);
|
|
||||||
//error_log($text, 0);
|
|
||||||
|
|
||||||
return $text;
|
|
||||||
}
|
|
||||||
|
|
||||||
public function collectData(){
|
|
||||||
|
|
||||||
$html = $this->getSimpleHTMLDOM('http://feeds.arstechnica.com/arstechnica/index') or $this->returnServerError('Could not request NextInpact.');
|
|
||||||
$limit = 0;
|
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
|
||||||
if($limit < 5) {
|
|
||||||
$item = array();
|
|
||||||
$item['title'] = $this->StripCDATA($element->find('title', 0)->innertext);
|
|
||||||
$item['uri'] = $this->StripCDATA($element->find('guid', 0)->plaintext);
|
|
||||||
$item['author'] = $this->StripCDATA($element->find('author', 0)->innertext);
|
|
||||||
$item['timestamp'] = strtotime($element->find('pubDate', 0)->plaintext);
|
|
||||||
$item['content'] = $this->ExtractContent($item['uri']);
|
|
||||||
//$item['content'] = $item['uri'];
|
|
||||||
$this->items[] = $item;
|
|
||||||
$limit++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getCacheDuration() {
|
|
||||||
return 7200; // 2h
|
|
||||||
}
|
|
||||||
|
|
||||||
|
const MAINTAINER = "prysme";
|
||||||
|
const NAME = "ArstechnicaBridge";
|
||||||
|
const URI = "http://arstechnica.com";
|
||||||
|
const DESCRIPTION = "The PC enthusiast's resource. Power users and the tools they love, without computing religion";
|
||||||
|
const PARAMETERS = array();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,22 +1,11 @@
|
||||||
<?php
|
<?php
|
||||||
class FreenewsBridge extends FeedExpander {
|
require_once('WordPressBridge.php');
|
||||||
|
|
||||||
|
class FreenewsBridge extends WordPressBridge {
|
||||||
|
|
||||||
const MAINTAINER = "mitsukarenai";
|
const MAINTAINER = "mitsukarenai";
|
||||||
const NAME = "Freenews";
|
const NAME = "Freenews";
|
||||||
const URI = "http://freenews.fr";
|
const URI = "http://freenews.fr";
|
||||||
const DESCRIPTION = "Un site d'actualité pour les freenautes (mais ne parlant pas que de la freebox). Ne rentrez pas d'id si vous voulez accéder aux actualités générales.";
|
const DESCRIPTION = "Un site d'actualité pour les freenautes (mais ne parlant pas que de la freebox)";
|
||||||
|
const PARAMETERS = array();
|
||||||
public function collectData(){
|
|
||||||
parent::collectExpandableDatas('http://feeds.feedburner.com/Freenews-Freebox?format=xml');
|
|
||||||
}
|
|
||||||
|
|
||||||
protected function parseItem($newsItem) {
|
|
||||||
$item = parent::parseItem($newsItem);
|
|
||||||
|
|
||||||
$articlePage = $this->getSimpleHTMLDOMCached($item['uri']);
|
|
||||||
$content = $articlePage->find('.post-container', 0);
|
|
||||||
$item['content'] = $content->innertext;
|
|
||||||
|
|
||||||
return $item;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
35
bridges/GizmodoBridge.php
Normal file
35
bridges/GizmodoBridge.php
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
<?php
|
||||||
|
class GizmodoBridge extends FeedExpander {
|
||||||
|
|
||||||
|
const MAINTAINER = "polopollo";
|
||||||
|
const NAME = "Gizmodo";
|
||||||
|
const URI = "http://gizmodo.com/";
|
||||||
|
const DESCRIPTION = "Returns the newest posts from Gizmodo (full text).";
|
||||||
|
|
||||||
|
protected function parseItem($item){
|
||||||
|
$item = parent::parseItem($item);
|
||||||
|
|
||||||
|
$articleHTMLContent = $this->getSimpleHTMLDOMCached($item['uri']);
|
||||||
|
if(!$articleHTMLContent){
|
||||||
|
$text = 'Could not load '.$item['uri'];
|
||||||
|
}else{
|
||||||
|
$text = $articleHTMLContent->find('div.entry-content', 0)->innertext;
|
||||||
|
foreach($articleHTMLContent->find('pagespeed_iframe') as $element) {
|
||||||
|
$text .= '<p>link to a iframe (could be a video): <a href="'.$element->src.'">'.$element->src.'</a></p><br>';
|
||||||
|
}
|
||||||
|
|
||||||
|
$text = strip_tags($text, '<p><b><a><blockquote><img><em>');
|
||||||
|
}
|
||||||
|
|
||||||
|
$item['content'] = $text;
|
||||||
|
return $item;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function collectData(){
|
||||||
|
$this->collectExpandableDatas('http://feeds.gawker.com/gizmodo/full');
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getCacheDuration(){
|
||||||
|
return 1800; // 30min
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,48 +0,0 @@
|
||||||
<?php
|
|
||||||
class GizmodoFRBridge extends BridgeAbstract{
|
|
||||||
|
|
||||||
const MAINTAINER = "polopollo";
|
|
||||||
const NAME = "GizmodoFR";
|
|
||||||
const URI = "http://www.gizmodo.fr/";
|
|
||||||
const DESCRIPTION = "Returns the 15 newest posts from GizmodoFR (full text).";
|
|
||||||
|
|
||||||
public function collectData(){
|
|
||||||
|
|
||||||
function GizmodoFRExtractContent($url) {
|
|
||||||
$articleHTMLContent = $this->getSimpleHTMLDOM($url);
|
|
||||||
if(!$articleHTMLContent){
|
|
||||||
return 'Could not load '.$url;
|
|
||||||
}
|
|
||||||
$text = $articleHTMLContent->find('div.entry-thumbnail', 0)->innertext;
|
|
||||||
$text = $text.$articleHTMLContent->find('div.entry-excerpt', 0)->innertext;
|
|
||||||
$text = $text.$articleHTMLContent->find('div.entry-content', 0)->innertext;
|
|
||||||
foreach($articleHTMLContent->find('pagespeed_iframe') as $element) {
|
|
||||||
$text = $text.'<p>link to a iframe (could be a video): <a href="'.$element->src.'">'.$element->src.'</a></p><br>';
|
|
||||||
}
|
|
||||||
|
|
||||||
$text = strip_tags($text, '<p><b><a><blockquote><img><em>');
|
|
||||||
return $text;
|
|
||||||
}
|
|
||||||
|
|
||||||
$rssFeed = $this->getSimpleHTMLDOM(self::URI.'/feed')
|
|
||||||
or $this->returnServerError('Could not request '.self::URI.'/feed');
|
|
||||||
$limit = 0;
|
|
||||||
|
|
||||||
foreach($rssFeed->find('item') as $element) {
|
|
||||||
if($limit < 15) {
|
|
||||||
$item = array();
|
|
||||||
$item['title'] = $element->find('title', 0)->innertext;
|
|
||||||
$item['uri'] = $element->find('guid', 0)->plaintext;
|
|
||||||
$item['timestamp'] = strtotime($element->find('pubDate', 0)->plaintext);
|
|
||||||
$item['content'] = GizmodoFRExtractContent($item['uri']);
|
|
||||||
$this->items[] = $item;
|
|
||||||
$limit++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getCacheDuration(){
|
|
||||||
return 1800; // 30min
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,32 +0,0 @@
|
||||||
<?php
|
|
||||||
class GuruMedBridge extends BridgeAbstract{
|
|
||||||
|
|
||||||
const MAINTAINER = "qwertygc";
|
|
||||||
const NAME = "GuruMed";
|
|
||||||
const URI = "http://www.gurumed.org";
|
|
||||||
const DESCRIPTION = "Returns the 5 newest posts from Gurumed (full text)";
|
|
||||||
|
|
||||||
private function GurumedStripCDATA($string) {
|
|
||||||
$string = str_replace('<![CDATA[', '', $string);
|
|
||||||
$string = str_replace(']]>', '', $string);
|
|
||||||
return $string;
|
|
||||||
}
|
|
||||||
|
|
||||||
public function collectData(){
|
|
||||||
$html = $this->getSimpleHTMLDOM(self::URI.'feed')
|
|
||||||
or $this->returnServerError('Could not request Gurumed.');
|
|
||||||
$limit = 0;
|
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
|
||||||
if($limit < 5) {
|
|
||||||
$item = array();
|
|
||||||
$item['title'] = $this->GurumedStripCDATA($element->find('title', 0)->innertext);
|
|
||||||
$item['uri'] = $this->GurumedStripCDATA($element->find('guid', 0)->plaintext);
|
|
||||||
$item['timestamp'] = strtotime($element->find('pubDate', 0)->plaintext);
|
|
||||||
$item['content'] = $this->GurumedStripCDATA(strip_tags($element->find('description', 0), '<p><a><br>'));
|
|
||||||
$this->items[] = $item;
|
|
||||||
$limit++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,38 +1,22 @@
|
||||||
<?php
|
<?php
|
||||||
class KoreusBridge extends BridgeAbstract{
|
class KoreusBridge extends FeedExpander {
|
||||||
|
|
||||||
const MAINTAINER = "pit-fgfjiudghdf";
|
const MAINTAINER = "pit-fgfjiudghdf";
|
||||||
const NAME = "Koreus";
|
const NAME = "Koreus";
|
||||||
const URI = "http://www.koreus.com/";
|
const URI = "http://www.koreus.com/";
|
||||||
const DESCRIPTION = "Returns the 5 newest posts from Koreus (full text)";
|
const DESCRIPTION = "Returns the newest posts from Koreus (full text)";
|
||||||
|
|
||||||
private function KoreusStripCDATA($string) {
|
protected function parseItem($item) {
|
||||||
$string = str_replace('<![CDATA[', '', $string);
|
$item = parent::parseItem($item);
|
||||||
$string = str_replace(']]>', '', $string);
|
|
||||||
return $string;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function KoreusExtractContent($url) {
|
$html = $this->getSimpleHTMLDOMCached($item['uri']);
|
||||||
$html2 = $this->getSimpleHTMLDOM($url);
|
$text = $html->find('p.itemText', 0)->innertext;
|
||||||
$text = $html2->find('p[class=itemText]', 0)->innertext;
|
$item['content'] = utf8_encode($text);
|
||||||
$text = utf8_encode(preg_replace('/(Sur le m.+?)+$/i','',$text));
|
|
||||||
return $text;
|
return $item;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function collectData(){
|
public function collectData(){
|
||||||
$html = $this->getSimpleHTMLDOM('http://feeds.feedburner.com/Koreus-articles') or $this->returnServerError('Could not request Koreus.');
|
$this->collectExpandableDatas('http://feeds.feedburner.com/Koreus-articles');
|
||||||
$limit = 0;
|
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
|
||||||
if($limit < 5) {
|
|
||||||
$item = array();
|
|
||||||
$item['title'] = $this->KoreusStripCDATA($element->find('title', 0)->innertext);
|
|
||||||
$item['uri'] = $this->KoreusStripCDATA($element->find('guid', 0)->plaintext);
|
|
||||||
$item['timestamp'] = strtotime($element->find('pubDate', 0)->plaintext);
|
|
||||||
$item['content'] = $this->KoreusExtractContent($item['uri']);
|
|
||||||
$this->items[] = $item;
|
|
||||||
$limit++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,42 +1,13 @@
|
||||||
<?php
|
<?php
|
||||||
class LeJournalDuGeekBridge extends FeedExpander {
|
require_once('WordPressBridge.php');
|
||||||
|
|
||||||
|
class LeJournalDuGeekBridge extends WordPressBridge{
|
||||||
|
|
||||||
const MAINTAINER = "polopollo";
|
const MAINTAINER = "polopollo";
|
||||||
const NAME = "journaldugeek.com (FR)";
|
const NAME = "journaldugeek.com (FR)";
|
||||||
const URI = "http://www.journaldugeek.com/";
|
const URI = "http://www.journaldugeek.com/";
|
||||||
const DESCRIPTION = "Returns the 5 newest posts from LeJournalDuGeek (full text).";
|
const DESCRIPTION = "Returns the newest posts from LeJournalDuGeek (full text).";
|
||||||
|
const PARAMETERS = array();
|
||||||
public function collectData(){
|
|
||||||
$this->collectExpandableDatas(self::URI . 'rss', 5);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected function parseItem($newsItem){
|
|
||||||
$item = parent::parseItem($newsItem);
|
|
||||||
$item['content'] = $this->LeJournalDuGeekExtractContent($item['uri']);
|
|
||||||
return $item;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function LeJournalDuGeekExtractContent($url) {
|
|
||||||
$articleHTMLContent = $this->getSimpleHTMLDOMCached($url);
|
|
||||||
$text = $articleHTMLContent->find('div.post-content', 0)->innertext;
|
|
||||||
|
|
||||||
foreach($articleHTMLContent->find('a.more') as $element) {
|
|
||||||
if ($element->innertext == "Source") {
|
|
||||||
$text = $text . '<p><a href="' . $element->href . '">Source : ' . $element->href . '</a></p>';
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach($articleHTMLContent->find('iframe') as $element) {
|
|
||||||
if (preg_match("/youtube/i", $element->src)) {
|
|
||||||
$text = $text . '// An IFRAME to Youtube was included in the article: <a href="' . $element->src . '">' . $element->src . '</a><br>';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$text = preg_replace('#<script(.*?)>(.*?)</script>#is', '', $text);
|
|
||||||
$text = strip_tags($text, '<p><b><a><blockquote><img><em><br/><br><ul><li>');
|
|
||||||
return $text;
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getCacheDuration(){
|
public function getCacheDuration(){
|
||||||
return 1800; // 30min
|
return 1800; // 30min
|
||||||
|
|
|
@ -1,68 +1,11 @@
|
||||||
<?php
|
<?php
|
||||||
class NakedSecurityBridge extends BridgeAbstract {
|
require_once('WordPressBridge.php');
|
||||||
|
|
||||||
const MAINTAINER = 'ORelio';
|
class NakedSecurityBridge extends WordPressBridge {
|
||||||
const NAME = 'Naked Security';
|
|
||||||
const URI = 'https://nakedsecurity.sophos.com/';
|
|
||||||
const DESCRIPTION = 'Returns the newest articles.';
|
|
||||||
|
|
||||||
public function collectData(){
|
const MAINTAINER = 'ORelio';
|
||||||
|
const NAME = 'Naked Security';
|
||||||
function StripRecursiveHTMLSection($string, $tag_name, $tag_start) {
|
const URI = 'https://nakedsecurity.sophos.com/';
|
||||||
$open_tag = '<'.$tag_name;
|
const DESCRIPTION = 'Returns the newest articles.';
|
||||||
$close_tag = '</'.$tag_name.'>';
|
const PARAMETERS = array();
|
||||||
$close_tag_length = strlen($close_tag);
|
|
||||||
if (strpos($tag_start, $open_tag) === 0) {
|
|
||||||
while (strpos($string, $tag_start) !== false) {
|
|
||||||
$max_recursion = 100;
|
|
||||||
$section_to_remove = null;
|
|
||||||
$section_start = strpos($string, $tag_start);
|
|
||||||
$search_offset = $section_start;
|
|
||||||
do {
|
|
||||||
$max_recursion--;
|
|
||||||
$section_end = strpos($string, $close_tag, $search_offset);
|
|
||||||
$search_offset = $section_end + $close_tag_length;
|
|
||||||
$section_to_remove = substr($string, $section_start, $section_end - $section_start + $close_tag_length);
|
|
||||||
$open_tag_count = substr_count($section_to_remove, $open_tag);
|
|
||||||
$close_tag_count = substr_count($section_to_remove, $close_tag);
|
|
||||||
} while ($open_tag_count > $close_tag_count && $max_recursion > 0);
|
|
||||||
$string = str_replace($section_to_remove, '', $string);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return $string;
|
|
||||||
}
|
|
||||||
|
|
||||||
$feedUrl = 'https://feeds.feedburner.com/nakedsecurity?format=xml';
|
|
||||||
$html = $this->getSimpleHTMLDOM($feedUrl) or $this->returnServerError('Could not request '.$this->getName().': '.$feedUrl);
|
|
||||||
$limit = 0;
|
|
||||||
|
|
||||||
foreach ($html->find('item') as $element) {
|
|
||||||
if ($limit < 10) {
|
|
||||||
|
|
||||||
//Retrieve article Uri and get that page
|
|
||||||
$article_uri = $element->find('guid', 0)->plaintext;
|
|
||||||
$article_html = $this->getSimpleHTMLDOM($article_uri) or $this->returnServerError('Could not request '.$this->getName().': '.$article_uri);
|
|
||||||
|
|
||||||
//Build article contents from corresponding elements
|
|
||||||
$article_title = trim($element->find('title', 0)->plaintext);
|
|
||||||
$article_image = $article_html->find('img.wp-post-image', 0)->src;
|
|
||||||
$article_summary = strip_tags(html_entity_decode($element->find('description', 0)->plaintext));
|
|
||||||
$article_content = $article_html->find('div.entry-content', 0)->innertext;
|
|
||||||
$article_content = StripRecursiveHTMLSection($article_content , 'div', '<div class="entry-prefix"');
|
|
||||||
$article_content = StripRecursiveHTMLSection($article_content , 'script', '<script');
|
|
||||||
$article_content = StripRecursiveHTMLSection($article_content , 'aside', '<aside');
|
|
||||||
$article_content = '<p><img src="'.$article_image.'" /></p><p><b>'.$article_summary.'</b></p>'.$article_content;
|
|
||||||
|
|
||||||
//Build and add final item
|
|
||||||
$item = array();
|
|
||||||
$item['uri'] = $article_uri;
|
|
||||||
$item['title'] = $article_title;
|
|
||||||
$item['author'] = $article_html->find('a[rel=author]', 0)->plaintext;
|
|
||||||
$item['timestamp'] = strtotime($element->find('pubDate', 0)->plaintext);
|
|
||||||
$item['content'] = $article_content;
|
|
||||||
$this->items[] = $item;
|
|
||||||
$limit++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,54 +1,55 @@
|
||||||
<?php
|
<?php
|
||||||
class NeuviemeArtBridge extends BridgeAbstract {
|
class NeuviemeArtBridge extends FeedExpander {
|
||||||
|
|
||||||
const MAINTAINER = "ORelio";
|
const MAINTAINER = "ORelio";
|
||||||
const NAME = '9ème Art Bridge';
|
const NAME = '9ème Art Bridge';
|
||||||
const URI = "http://www.9emeart.fr/";
|
const URI = "http://www.9emeart.fr/";
|
||||||
const DESCRIPTION = "Returns the newest articles.";
|
const DESCRIPTION = "Returns the newest articles.";
|
||||||
|
|
||||||
public function collectData(){
|
private function StripWithDelimiters($string, $start, $end) {
|
||||||
|
while (strpos($string, $start) !== false) {
|
||||||
|
$section_to_remove = substr($string, strpos($string, $start));
|
||||||
|
$section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
|
||||||
|
$string = str_replace($section_to_remove, '', $string);
|
||||||
|
} return $string;
|
||||||
|
}
|
||||||
|
|
||||||
function StripWithDelimiters($string, $start, $end) {
|
protected function parseItem($item){
|
||||||
while (strpos($string, $start) !== false) {
|
$item = parent::parseItem($item);
|
||||||
$section_to_remove = substr($string, strpos($string, $start));
|
|
||||||
$section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
|
|
||||||
$string = str_replace($section_to_remove, '', $string);
|
|
||||||
} return $string;
|
|
||||||
}
|
|
||||||
|
|
||||||
$feedUrl = self::URI.'9emeart.rss';
|
$article_html = $this->getSimpleHTMLDOMCached($item['uri']);
|
||||||
$html = $this->getSimpleHTMLDOM($feedUrl) or $this->returnServerError('Could not request 9eme Art: '.$feedUrl);
|
if(!$article_html){
|
||||||
$limit = 0;
|
$item['content'] = 'Could not request 9eme Art: '.$item['uri'];
|
||||||
|
return $item;
|
||||||
|
}
|
||||||
|
|
||||||
foreach ($html->find('item') as $element) {
|
$article_image = '';
|
||||||
if ($limit < 5) {
|
foreach ($article_html->find('img.img_full') as $img){
|
||||||
|
if ($img->alt == $item['title']){
|
||||||
|
$article_image = self::URI.$img->src;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//Retrieve article Uri and get that page
|
$article_content='';
|
||||||
$article_uri = $element->find('guid', 0)->plaintext;
|
if($article_image){
|
||||||
$article_html = $this->getSimpleHTMLDOM($article_uri) or $this->returnServerError('Could not request 9eme Art: '.$article_uri);
|
$article_content = '<p><img src="'.$article_image.'" /></p>';
|
||||||
|
}
|
||||||
|
$article_content .= str_replace(
|
||||||
|
'src="/', 'src="'.self::URI,
|
||||||
|
$article_html->find('div.newsGenerique_con', 0)->innertext
|
||||||
|
);
|
||||||
|
$article_content = $this->StripWithDelimiters($article_content, '<script', '</script>');
|
||||||
|
$article_content = $this->StripWithDelimiters($article_content, '<style', '</style>');
|
||||||
|
$article_content = $this->StripWithDelimiters($article_content, '<link', '>');
|
||||||
|
|
||||||
//Build article contents from corresponding elements
|
$item['content'] = $article_content;
|
||||||
$article_title = trim($element->find('title', 0)->plaintext);
|
|
||||||
$article_image = $element->find('enclosure', 0)->url;
|
|
||||||
foreach ($article_html->find('img.img_full') as $img)
|
|
||||||
if ($img->alt == $article_title)
|
|
||||||
$article_image = self::URI.$img->src;
|
|
||||||
$article_content = '<p><img src="'.$article_image.'" /></p>'
|
|
||||||
.str_replace('src="/', 'src="'.self::URI, $article_html->find('div.newsGenerique_con', 0)->innertext);
|
|
||||||
$article_content = StripWithDelimiters($article_content, '<script', '</script>');
|
|
||||||
$article_content = StripWithDelimiters($article_content, '<style', '</style>');
|
|
||||||
$article_content = StripWithDelimiters($article_content, '<link', '>');
|
|
||||||
|
|
||||||
//Build and add final item
|
return $item;
|
||||||
$item = array();
|
}
|
||||||
$item['uri'] = $article_uri;
|
|
||||||
$item['title'] = $article_title;
|
public function collectData(){
|
||||||
$item['author'] = $article_html->find('a[class=upp transition_fast upp]', 0)->plaintext;
|
$feedUrl = self::URI.'9emeart.rss';
|
||||||
$item['timestamp'] = strtotime($element->find('pubDate', 0)->plaintext);
|
$this->collectExpandableDatas($feedUrl);
|
||||||
$item['content'] = $article_content;
|
}
|
||||||
$this->items[] = $item;
|
|
||||||
$limit++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,29 +1,15 @@
|
||||||
<?php
|
<?php
|
||||||
class NumeramaBridge extends FeedExpander {
|
require_once('WordPressBridge.php');
|
||||||
|
|
||||||
|
class NumeramaBridge extends WordPressBridge {
|
||||||
|
|
||||||
const MAINTAINER = 'mitsukarenai';
|
const MAINTAINER = 'mitsukarenai';
|
||||||
const NAME = 'Numerama';
|
const NAME = 'Numerama';
|
||||||
const URI = 'http://www.numerama.com/';
|
const URI = 'http://www.numerama.com/';
|
||||||
const DESCRIPTION = 'Returns the 5 newest posts from Numerama (full text)';
|
const DESCRIPTION = 'Returns the newest posts from Numerama (full text)';
|
||||||
|
const PARAMETERS = array();
|
||||||
public function collectData(){
|
|
||||||
$this->collectExpandableDatas(self::URI . 'feed/', 5);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected function parseItem($newsItem){
|
|
||||||
$item = parent::parseItem($newsItem);
|
|
||||||
$item['content'] = $this->ExtractContent($item['uri']);
|
|
||||||
return $item;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function ExtractContent($url){
|
|
||||||
$article_html = $this->getSimpleHTMLDOMCached('Could not request Numerama: '.$url);
|
|
||||||
$contents = $article_html->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block
|
|
||||||
$contents = '<img alt="" style="max-width:300px;" src="'.$article_html->find('meta[property=og:image]', 0)->getAttribute('content').'">'; // add post picture
|
|
||||||
return $contents . $article_html->find('article[class=post-content]', 0)->innertext; // extract the post
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getCacheDuration() {
|
public function getCacheDuration() {
|
||||||
|
|
||||||
return 1800; // 30min
|
return 1800; // 30min
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,42 +1,22 @@
|
||||||
<?php
|
<?php
|
||||||
class Rue89Bridge extends BridgeAbstract{
|
class Rue89Bridge extends FeedExpander {
|
||||||
|
|
||||||
const MAINTAINER = "pit-fgfjiudghdf";
|
const MAINTAINER = "pit-fgfjiudghdf";
|
||||||
const NAME = "Rue89";
|
const NAME = "Rue89";
|
||||||
const URI = "http://rue89.nouvelobs.com/";
|
const URI = "http://rue89.nouvelobs.com/";
|
||||||
const DESCRIPTION = "Returns the 5 newest posts from Rue89 (full text)";
|
const DESCRIPTION = "Returns the 5 newest posts from Rue89 (full text)";
|
||||||
|
|
||||||
private function rue89getDatas($url){
|
protected function parseItem($item){
|
||||||
|
$item = parent::parseItem($item);
|
||||||
|
|
||||||
$url = "http://api.rue89.nouvelobs.com/export/mobile2/node/" . str_replace(" ", "", substr($url, -8)) . "/full";
|
$url = "http://api.rue89.nouvelobs.com/export/mobile2/node/" . str_replace(" ", "", substr($item['uri'], -8)) . "/full";
|
||||||
$datas = json_decode($this->getContents($url), true);
|
$datas = json_decode($this->getContents($url), true);
|
||||||
|
$item['content'] = $datas['node']['body'];
|
||||||
|
|
||||||
return $datas["node"];
|
return $item;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public function collectData(){
|
public function collectData(){
|
||||||
|
$this->collectExpandableDatas('http://api.rue89.nouvelobs.com/feed');
|
||||||
$html = $this->getSimpleHTMLDOM('http://api.rue89.nouvelobs.com/feed') or $this->returnServerError('Could not request Rue89.');
|
|
||||||
|
|
||||||
$limit = 0;
|
|
||||||
foreach($html->find('item') as $element) {
|
|
||||||
|
|
||||||
if($limit < 5) {
|
|
||||||
|
|
||||||
$datas = $this->rue89getDatas(str_replace('#commentaires', '', ($element->find('comments', 0)->plaintext)));
|
|
||||||
|
|
||||||
$item = array();
|
|
||||||
$item['title'] = $datas["title"];
|
|
||||||
$item['author'] = $datas["author"][0]["name"];
|
|
||||||
$item['timestamp'] = $datas["updated"];
|
|
||||||
$item['content'] = $datas["body"];
|
|
||||||
$item['uri'] = $datas["url"];
|
|
||||||
|
|
||||||
$this->items[] = $item;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,61 +1,15 @@
|
||||||
<?php
|
<?php
|
||||||
class SiliconBridge extends BridgeAbstract {
|
require_once('WordPressBridge.php');
|
||||||
|
|
||||||
|
class SiliconBridge extends WordPressBridge {
|
||||||
|
|
||||||
const MAINTAINER = "ORelio";
|
const MAINTAINER = "ORelio";
|
||||||
const NAME = 'Silicon Bridge';
|
const NAME = 'Silicon Bridge';
|
||||||
const URI = 'http://www.silicon.fr/';
|
const URI = 'http://www.silicon.fr/';
|
||||||
const DESCRIPTION = "Returns the newest articles.";
|
const DESCRIPTION = "Returns the newest articles.";
|
||||||
|
const PARAMETERS = array();
|
||||||
|
|
||||||
public function collectData(){
|
public function getCacheDuration() {
|
||||||
|
return 1800; // 30 minutes
|
||||||
function StripCDATA($string) {
|
}
|
||||||
$string = str_replace('<![CDATA[', '', $string);
|
|
||||||
$string = str_replace(']]>', '', $string);
|
|
||||||
return $string;
|
|
||||||
}
|
|
||||||
|
|
||||||
$feedUrl = self::URI.'feed';
|
|
||||||
$html = $this->getSimpleHTMLDOM($feedUrl)
|
|
||||||
or $this->returnServerError('Could not request Silicon: '.$feedUrl);
|
|
||||||
$limit = 0;
|
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
|
||||||
if($limit < 5) {
|
|
||||||
|
|
||||||
//Retrieve article Uri and get that page
|
|
||||||
$article_uri = $element->innertext;
|
|
||||||
$article_uri = substr($article_uri, strpos($article_uri, '<link>') + 6);
|
|
||||||
$article_uri = substr($article_uri, 0, strpos($article_uri, '</link>'));
|
|
||||||
$article_html = $this->getSimpleHTMLDOM($article_uri)
|
|
||||||
or $this->returnServerError('Could not request Silicon: '.$article_uri);
|
|
||||||
|
|
||||||
//Build article contents from corresponding elements
|
|
||||||
$thumbnailUri = $element->find('enclosure', 0)->url;
|
|
||||||
$article_content = '<p><img src="'.$thumbnailUri.'" /></p>'
|
|
||||||
.'<p><b>'.$article_html->find('div.entry-excerpt', 0)->plaintext.'</b></p>'
|
|
||||||
.$article_html->find('div.entry-content', 0)->innertext;
|
|
||||||
|
|
||||||
//Remove useless scripts left in the page
|
|
||||||
while (strpos($article_content, '<script') !== false) {
|
|
||||||
$script_section = substr($article_content, strpos($article_content, '<script'));
|
|
||||||
$script_section = substr($script_section, 0, strpos($script_section, '</script>') + 9);
|
|
||||||
$article_content = str_replace($script_section, '', $article_content);
|
|
||||||
}
|
|
||||||
|
|
||||||
//Build and add final item
|
|
||||||
$item = array();
|
|
||||||
$item['uri'] = $article_uri;
|
|
||||||
$item['title'] = StripCDATA($element->find('title', 0)->innertext);
|
|
||||||
$item['author'] = StripCDATA($element->find('dc:creator', 0)->innertext);
|
|
||||||
$item['timestamp'] = strtotime($element->find('pubDate', 0)->plaintext);
|
|
||||||
$item['content'] = $article_content;
|
|
||||||
$this->items[] = $item;
|
|
||||||
$limit++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getCacheDuration() {
|
|
||||||
return 1800; // 30 minutes
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,59 +1,41 @@
|
||||||
<?php
|
<?php
|
||||||
class WeLiveSecurityBridge extends BridgeAbstract {
|
class WeLiveSecurityBridge extends FeedExpander {
|
||||||
|
|
||||||
const MAINTAINER = 'ORelio';
|
const MAINTAINER = 'ORelio';
|
||||||
const NAME = 'We Live Security';
|
const NAME = 'We Live Security';
|
||||||
const URI = 'http://www.welivesecurity.com/';
|
const URI = 'http://www.welivesecurity.com/';
|
||||||
const DESCRIPTION = 'Returns the newest articles.';
|
const DESCRIPTION = 'Returns the newest articles.';
|
||||||
|
|
||||||
public function collectData(){
|
private function StripWithDelimiters($string, $start, $end) {
|
||||||
|
while (strpos($string, $start) !== false) {
|
||||||
|
$section_to_remove = substr($string, strpos($string, $start));
|
||||||
|
$section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
|
||||||
|
$string = str_replace($section_to_remove, '', $string);
|
||||||
|
} return $string;
|
||||||
|
}
|
||||||
|
|
||||||
function ExtractFromDelimiters($string, $start, $end) {
|
|
||||||
if (strpos($string, $start) !== false) {
|
|
||||||
$section_retrieved = substr($string, strpos($string, $start) + strlen($start));
|
|
||||||
$section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
|
|
||||||
return $section_retrieved;
|
|
||||||
} return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
function StripWithDelimiters($string, $start, $end) {
|
protected function parseItem($item){
|
||||||
while (strpos($string, $start) !== false) {
|
$item = parent::parseItem($item);
|
||||||
$section_to_remove = substr($string, strpos($string, $start));
|
|
||||||
$section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
|
|
||||||
$string = str_replace($section_to_remove, '', $string);
|
|
||||||
} return $string;
|
|
||||||
}
|
|
||||||
|
|
||||||
$feed = $this->getURI().'feed/';
|
$article_html = $this->getSimpleHTMLDOMCached($item['uri']);
|
||||||
$html = $this->getSimpleHTMLDOM($feed) or $this->returnServerError('Could not request '.$this->getName().': '.$feed);
|
if(!$article_html){
|
||||||
$limit = 0;
|
$item['content'] .= '<p>Could not request '.$this->getName().': '.$item['uri'].'</p>';
|
||||||
|
return $item;
|
||||||
|
}
|
||||||
|
|
||||||
foreach ($html->find('item') as $element) {
|
$article_content = $article_html->find('div.wlistingsingletext', 0)->innertext;
|
||||||
if ($limit < 5) {
|
$article_content = $this->StripWithDelimiters($article_content, '<script', '</script>');
|
||||||
|
$article_content = '<p><b>'.$item['content'].'</b></p>'
|
||||||
|
.trim($article_content);
|
||||||
|
|
||||||
$article_image = $element->find('image', 0)->plaintext;
|
$item['content'] = $article_content;
|
||||||
$article_url = ExtractFromDelimiters($element->innertext, '<link>', '</link>');
|
|
||||||
$article_summary = ExtractFromDelimiters($element->innertext, '<description><![CDATA[<p>', '</p>');
|
|
||||||
$article_html = $this->getContents($article_url) or $this->returnServerError('Could not request '.$this->getName().': '.$article_url);
|
|
||||||
if (substr($article_html, 0, 2) == "\x1f\x8b") //http://www.gzip.org/zlib/rfc-gzip.html#header-trailer -> GZip ID1
|
|
||||||
$article_html = gzdecode($article_html); //Response is GZipped even if we didn't accept GZip!? Let's decompress...
|
|
||||||
$article_html = str_get_html($article_html); //Now we have our HTML data. But still, that's an important HTTP violation...
|
|
||||||
$article_content = $article_html->find('div.wlistingsingletext', 0)->innertext;
|
|
||||||
$article_content = StripWithDelimiters($article_content, '<script', '</script>');
|
|
||||||
$article_content = '<p><img src="'.$article_image.'" /></p>'
|
|
||||||
.'<p><b>'.$article_summary.'</b></p>'
|
|
||||||
.trim($article_content);
|
|
||||||
|
|
||||||
$item = array();
|
return $item;
|
||||||
$item['uri'] = $article_url;
|
}
|
||||||
$item['title'] = $element->find('title', 0)->plaintext;
|
|
||||||
$item['author'] = $article_html->find('a[rel=author]', 0)->plaintext;
|
|
||||||
$item['timestamp'] = strtotime($element->find('pubDate', 0)->plaintext);
|
|
||||||
$item['content'] = $article_content;
|
|
||||||
$this->items[] = $item;
|
|
||||||
$limit++;
|
|
||||||
|
|
||||||
}
|
public function collectData(){
|
||||||
}
|
$feed = static::URI.'feed/';
|
||||||
}
|
$this->collectExpandableDatas($feed);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,14 +1,9 @@
|
||||||
<?php
|
<?php
|
||||||
define('WORDPRESS_TYPE_ATOM', 1); // Content is of type ATOM
|
class WordPressBridge extends FeedExpander {
|
||||||
define('WORDPRESS_TYPE_RSS', 2); // Content is of type RSS
|
|
||||||
class WordPressBridge extends BridgeAbstract {
|
|
||||||
|
|
||||||
public $sitename; // Name of the site
|
|
||||||
|
|
||||||
const MAINTAINER = "aledeg";
|
const MAINTAINER = "aledeg";
|
||||||
const NAME = "Wordpress Bridge";
|
const NAME = "Wordpress Bridge";
|
||||||
const URI = "https://wordpress.org/";
|
const URI = "https://wordpress.org/";
|
||||||
const DESCRIPTION = "Returns the 3 newest full posts of a Wordpress blog";
|
const DESCRIPTION = "Returns the newest full posts of a Wordpress powered website";
|
||||||
|
|
||||||
const PARAMETERS = array( array(
|
const PARAMETERS = array( array(
|
||||||
'url'=>array(
|
'url'=>array(
|
||||||
|
@ -17,112 +12,62 @@ class WordPressBridge extends BridgeAbstract {
|
||||||
)
|
)
|
||||||
));
|
));
|
||||||
|
|
||||||
// Replaces all 'link' tags with 'url' for simplehtmldom to actually find 'links' ('url')
|
private function clearContent($content) {
|
||||||
private function ReplaceLinkTagsWithUrlTags($element){
|
|
||||||
// We need to fix the 'link' tag as simplehtmldom cannot parse it (just rename it and load back as dom)
|
|
||||||
$element_text = $element->outertext;
|
|
||||||
$element_text = str_replace('<link>', '<url>', $element_text);
|
|
||||||
$element_text = str_replace('</link>', '</url>', $element_text);
|
|
||||||
$element_text = str_replace('<link ', '<url ', $element_text);
|
|
||||||
return str_get_html($element_text);
|
|
||||||
}
|
|
||||||
|
|
||||||
private function StripCDATA($string) {
|
|
||||||
$string = str_replace('<![CDATA[', '', $string);
|
|
||||||
$string = str_replace(']]>', '', $string);
|
|
||||||
return $string;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function ClearContent($content) {
|
|
||||||
$content = preg_replace('/<script[^>]*>[^<]*<\/script>/', '', $content);
|
$content = preg_replace('/<script[^>]*>[^<]*<\/script>/', '', $content);
|
||||||
$content = preg_replace('/<div class="wpa".*/', '', $content);
|
$content = preg_replace('/<div class="wpa".*/', '', $content);
|
||||||
$content = preg_replace('/<form.*\/form>/', '', $content);
|
$content = preg_replace('/<form.*\/form>/', '', $content);
|
||||||
return $content;
|
return $content;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function collectData(){
|
protected function parseItem($newItem){
|
||||||
|
$item=parent::parseItem($newItem);
|
||||||
|
|
||||||
$html = $this->getSimpleHTMLDOM($this->getURI().'/feed/atom')
|
$article_html = $this->getSimpleHTMLDOMCached($item['uri']);
|
||||||
or $this->returnServerError("Could not request ".$this->getURI().'/feed/atom');
|
|
||||||
|
|
||||||
// Notice: We requested an ATOM feed, however some sites return RSS feeds instead!
|
$article=null;
|
||||||
if($html->find('entry')){
|
switch(true){
|
||||||
$type=WORDPRESS_TYPE_ATOM;
|
case !is_null($article_html->find('article',0)):
|
||||||
}else if($html->find('item')){
|
// most common content div
|
||||||
$type=WORDPRESS_TYPE_RSS;
|
$article = $article_html->find('article', 0);
|
||||||
}else{
|
break;
|
||||||
$type=WORDPRESS_TYPE_ATOM; // Make ATOM default
|
case !is_null($article_html->find('.single-content',0)):
|
||||||
|
// another common content div
|
||||||
|
$article = $article_html->find('.single-content', 0);
|
||||||
|
break;
|
||||||
|
case !is_null($article_html->find('.post-content',0)):
|
||||||
|
// another common content div
|
||||||
|
$article = $article_html->find('.post-content', 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case !is_null($article_html->find('.post',0)):
|
||||||
|
// for old WordPress themes without HTML5
|
||||||
|
$article = $article_html->find('.post', 0);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if($type === WORDPRESS_TYPE_RSS)
|
if(!is_null($article)){
|
||||||
$posts = $html->find('item');
|
$item['content'] = $this->clearContent($article->innertext);
|
||||||
else
|
|
||||||
$posts = $html->find('entry');
|
|
||||||
|
|
||||||
if(!empty($posts) ) {
|
|
||||||
$this->sitename = $html->find('title', 0)->plaintext;
|
|
||||||
$i=0;
|
|
||||||
|
|
||||||
foreach ($posts as $article) {
|
|
||||||
if($i < 3) {
|
|
||||||
|
|
||||||
$item = array();
|
|
||||||
|
|
||||||
$article = $this->ReplaceLinkTagsWithUrlTags($article);
|
|
||||||
|
|
||||||
if($type === WORDPRESS_TYPE_RSS){
|
|
||||||
$item['uri'] = $article->find('url', 0)->innertext; // 'link' => 'url'!
|
|
||||||
$item['title'] = $article->find('title', 0)->plaintext;
|
|
||||||
$item['author'] = trim($this->StripCDATA($article->find('dc:creator', 0)->innertext));
|
|
||||||
$item['timestamp'] = strtotime($article->find('pubDate', 0)->innertext);
|
|
||||||
} else {
|
|
||||||
$item['uri'] = $article->find('url', 0)->getAttribute('href'); // 'link' => 'url'!
|
|
||||||
$item['title'] = $this->StripCDATA($article->find('title', 0)->plaintext);
|
|
||||||
$item['author'] = trim($article->find('author', 0)->innertext);
|
|
||||||
$item['timestamp'] = strtotime($article->find('updated', 0)->innertext);
|
|
||||||
}
|
|
||||||
|
|
||||||
$article_html = $this->getSimpleHTMLDOMCached($item['uri']);
|
|
||||||
|
|
||||||
// Attempt to find most common content div
|
|
||||||
if(!isset($item['content'])){
|
|
||||||
$article = $article_html->find('article', 0);
|
|
||||||
if(!empty($article)){
|
|
||||||
$item['content'] = $this->ClearContent($article->innertext);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// another common content div
|
|
||||||
if(!isset($item['content'])){
|
|
||||||
$article = $article_html->find('.single-content', 0);
|
|
||||||
if(!empty($article)){
|
|
||||||
$item['content'] = $this->ClearContent($article->innertext);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// for old WordPress themes without HTML5
|
|
||||||
if(!isset($item['content'])){
|
|
||||||
$article = $article_html->find('.post', 0);
|
|
||||||
if(!empty($article)){
|
|
||||||
$item['content'] = $this->ClearContent($article->innertext);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$this->items[] = $item;
|
|
||||||
$i++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
$this->returnServerError("Sorry, ".$this->getURI()." doesn't seem to be a Wordpress blog.");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return $item;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getURI(){
|
public function getURI(){
|
||||||
return $this->getInput('url');
|
$url = $this->getInput('url');
|
||||||
|
if(empty($url)){
|
||||||
|
$url = static::URI;
|
||||||
|
}
|
||||||
|
return $url;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getName() {
|
public function collectData(){
|
||||||
return "{$this->sitename} - Wordpress Bridge";
|
if($this->getInput('url') && substr($this->getInput('url'),0,strlen('http'))!=='http'){
|
||||||
|
// just in case someone find a way to access local files by playing with the url
|
||||||
|
$this->returnClientError('The url parameter must either refer to http or https protocol.');
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->collectExpandableDatas($this->getURI().'/feed/atom/');
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getCacheDuration() {
|
public function getCacheDuration() {
|
||||||
|
|
|
@ -1,41 +1,13 @@
|
||||||
<?php
|
<?php
|
||||||
class ZatazBridge extends BridgeAbstract {
|
require_once('WordPressBridge.php');
|
||||||
|
|
||||||
|
class ZatazBridge extends WordPressBridge{
|
||||||
|
|
||||||
const MAINTAINER = "aledeg";
|
const MAINTAINER = "aledeg";
|
||||||
const NAME = 'Zataz Magazine';
|
const NAME = 'Zataz Magazine';
|
||||||
const URI = 'http://www.zataz.com';
|
const URI = 'http://www.zataz.com';
|
||||||
const DESCRIPTION = "ZATAZ Magazine - S'informer, c'est déjà se sécuriser";
|
const DESCRIPTION = "ZATAZ Magazine - S'informer, c'est déjà se sécuriser";
|
||||||
|
const PARAMETERS = array();
|
||||||
public function collectData(){
|
|
||||||
$html = $this->getSimpleHTMLDOM(self::URI) or $this->returnServerError('Could not request ' . self::URI);
|
|
||||||
|
|
||||||
$recent_posts = $html->find('#recent-posts-3', 0)->find('ul', 0)->find('li');
|
|
||||||
foreach ($recent_posts as $article) {
|
|
||||||
if (count($this->items) < 5) {
|
|
||||||
$uri = $article->find('a', 0)->href;
|
|
||||||
$this->items[] = $this->getDetails($uri);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private function getDetails($uri) {
|
|
||||||
$html = $this->getSimpleHTMLDOM($uri) or exit;
|
|
||||||
|
|
||||||
$item = array();
|
|
||||||
|
|
||||||
$article = $html->find('.gdl-blog-full', 0);
|
|
||||||
$item['uri'] = $uri;
|
|
||||||
$item['title'] = $article->find('.blog-title', 0)->find('a', 0)->innertext;
|
|
||||||
$item['content'] = $article->find('.blog-content', 0)->innertext;
|
|
||||||
$item['timestamp'] = $this->getTimestampFromDate($article->find('.blog-date', 0)->find('a', 0)->href);
|
|
||||||
return $item;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function getTimestampFromDate($uri) {
|
|
||||||
preg_match('/\d{4}\/\d{2}\/\d{2}/', $uri, $matches);
|
|
||||||
$date = new \DateTime($matches[0]);
|
|
||||||
return $date->format('U');
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getCacheDuration() {
|
public function getCacheDuration() {
|
||||||
return 7200; // 2h
|
return 7200; // 2h
|
||||||
|
|
|
@ -1,34 +0,0 @@
|
||||||
<?php
|
|
||||||
class ZoneTelechargementBridge extends BridgeAbstract {
|
|
||||||
|
|
||||||
const MAINTAINER = 'ORelio';
|
|
||||||
const NAME = 'Zone Telechargement Bridge';
|
|
||||||
const URI = 'https://www.zone-telechargement.com/';
|
|
||||||
const DESCRIPTION = 'RSS proxy returning the newest releases.<br />You may specify a category found in RSS URLs, else main feed is selected.';
|
|
||||||
|
|
||||||
const PARAMETERS = array( array(
|
|
||||||
'category'=>array('name'=>'Category')
|
|
||||||
));
|
|
||||||
|
|
||||||
public function collectData(){
|
|
||||||
|
|
||||||
function StripCDATA($string) {
|
|
||||||
$string = str_replace('<![CDATA[', '', $string);
|
|
||||||
$string = str_replace(']]>', '', $string);
|
|
||||||
return $string;
|
|
||||||
}
|
|
||||||
|
|
||||||
$url = $this->getURI().$this->getInput('category').'/rss.xml';
|
|
||||||
$html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request Zone Telechargement: '.$url);
|
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
|
||||||
$item = array();
|
|
||||||
$item['title'] = $element->find('title', 0)->plaintext;
|
|
||||||
$item['uri'] = str_replace('http://', 'https://', $element->find('guid', 0)->plaintext);
|
|
||||||
$item['timestamp'] = strtotime($element->find('pubDate', 0)->plaintext);
|
|
||||||
$item['content'] = StripCDATA($element->find('description', 0)->innertext);
|
|
||||||
$this->items[] = $item;
|
|
||||||
$limit++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -151,6 +151,18 @@ abstract class FeedExpander extends BridgeAbstract {
|
||||||
$namespaces = $feedItem->getNamespaces(true);
|
$namespaces = $feedItem->getNamespaces(true);
|
||||||
if(isset($namespaces['dc'])) $dc = $feedItem->children($namespaces['dc']);
|
if(isset($namespaces['dc'])) $dc = $feedItem->children($namespaces['dc']);
|
||||||
|
|
||||||
|
if(isset($feedItem->guid)){
|
||||||
|
foreach($feedItem->guid->attributes() as $attribute=>$value){
|
||||||
|
if($attribute === 'isPermaLink' && (
|
||||||
|
$value === 'true' ||
|
||||||
|
filter_var($feedItem->guid,FILTER_VALIDATE_URL)
|
||||||
|
)){
|
||||||
|
$item['uri'] = $feedItem->guid;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if(isset($feedItem->pubDate)){
|
if(isset($feedItem->pubDate)){
|
||||||
$item['timestamp'] = strtotime($feedItem->pubDate);
|
$item['timestamp'] = strtotime($feedItem->pubDate);
|
||||||
} elseif(isset($dc->date)){
|
} elseif(isset($dc->date)){
|
||||||
|
|
Loading…
Reference in a new issue