[bridges] make them WordPressBridge derivatives
The specific content filtering used in these bridges will need to be reintegrated later as part of the bridge or as part of the WordPressBridge if they are considered generic enough filters, such as the already existing WordPressBridge <script> removal filter. Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>
This commit is contained in:
parent
43ac961284
commit
3f64d2d65a
7 changed files with 32 additions and 209 deletions
|
@ -1,30 +1,11 @@
|
||||||
<?php
|
<?php
|
||||||
class ArstechnicaBridge extends FeedExpander {
|
require_once('WordPressBridge.php');
|
||||||
|
|
||||||
|
class ArstechnicaBridge extends WordPressBridge {
|
||||||
|
|
||||||
const MAINTAINER = "prysme";
|
const MAINTAINER = "prysme";
|
||||||
const NAME = "ArstechnicaBridge";
|
const NAME = "ArstechnicaBridge";
|
||||||
const URI = "http://arstechnica.com";
|
const URI = "http://arstechnica.com";
|
||||||
const DESCRIPTION = "The PC enthusiast's resource. Power users and the tools they love, without computing religion";
|
const DESCRIPTION = "The PC enthusiast's resource. Power users and the tools they love, without computing religion";
|
||||||
|
const PARAMETERS = array();
|
||||||
protected function parseItem($item){
|
|
||||||
$item = parent::parseItem($item);
|
|
||||||
|
|
||||||
$html = $this->getSimpleHTMLDOMCached($item['uri']);
|
|
||||||
if(!$html){
|
|
||||||
$item['content'] .= '<p>Requesting full article failed.</p>';
|
|
||||||
}else{
|
|
||||||
$item['content'] = $html->find('.article-guts', 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return $item;
|
|
||||||
}
|
|
||||||
|
|
||||||
public function collectData(){
|
|
||||||
$this->collectExpandableDatas('http://feeds.arstechnica.com/arstechnica/index/');
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getCacheDuration() {
|
|
||||||
return 7200; // 2h
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,22 +1,11 @@
|
||||||
<?php
|
<?php
|
||||||
class FreenewsBridge extends FeedExpander {
|
require_once('WordPressBridge.php');
|
||||||
|
|
||||||
|
class FreenewsBridge extends WordPressBridge {
|
||||||
|
|
||||||
const MAINTAINER = "mitsukarenai";
|
const MAINTAINER = "mitsukarenai";
|
||||||
const NAME = "Freenews";
|
const NAME = "Freenews";
|
||||||
const URI = "http://freenews.fr";
|
const URI = "http://freenews.fr";
|
||||||
const DESCRIPTION = "Un site d'actualité pour les freenautes (mais ne parlant pas que de la freebox). Ne rentrez pas d'id si vous voulez accéder aux actualités générales.";
|
const DESCRIPTION = "Un site d'actualité pour les freenautes (mais ne parlant pas que de la freebox)";
|
||||||
|
const PARAMETERS = array();
|
||||||
public function collectData(){
|
|
||||||
parent::collectExpandableDatas('http://feeds.feedburner.com/Freenews-Freebox?format=xml');
|
|
||||||
}
|
|
||||||
|
|
||||||
protected function parseItem($newsItem) {
|
|
||||||
$item = parent::parseItem($newsItem);
|
|
||||||
|
|
||||||
$articlePage = $this->getSimpleHTMLDOMCached($item['uri']);
|
|
||||||
$content = $articlePage->find('.post-container', 0);
|
|
||||||
$item['content'] = $content->innertext;
|
|
||||||
|
|
||||||
return $item;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,42 +1,13 @@
|
||||||
<?php
|
<?php
|
||||||
class LeJournalDuGeekBridge extends FeedExpander {
|
require_once('WordPressBridge.php');
|
||||||
|
|
||||||
|
class LeJournalDuGeekBridge extends WordPressBridge{
|
||||||
|
|
||||||
const MAINTAINER = "polopollo";
|
const MAINTAINER = "polopollo";
|
||||||
const NAME = "journaldugeek.com (FR)";
|
const NAME = "journaldugeek.com (FR)";
|
||||||
const URI = "http://www.journaldugeek.com/";
|
const URI = "http://www.journaldugeek.com/";
|
||||||
const DESCRIPTION = "Returns the 5 newest posts from LeJournalDuGeek (full text).";
|
const DESCRIPTION = "Returns the newest posts from LeJournalDuGeek (full text).";
|
||||||
|
const PARAMETERS = array();
|
||||||
public function collectData(){
|
|
||||||
$this->collectExpandableDatas(self::URI . 'rss', 5);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected function parseItem($newsItem){
|
|
||||||
$item = parent::parseItem($newsItem);
|
|
||||||
$item['content'] = $this->LeJournalDuGeekExtractContent($item['uri']);
|
|
||||||
return $item;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function LeJournalDuGeekExtractContent($url) {
|
|
||||||
$articleHTMLContent = $this->getSimpleHTMLDOMCached($url);
|
|
||||||
$text = $articleHTMLContent->find('div.post-content', 0)->innertext;
|
|
||||||
|
|
||||||
foreach($articleHTMLContent->find('a.more') as $element) {
|
|
||||||
if ($element->innertext == "Source") {
|
|
||||||
$text = $text . '<p><a href="' . $element->href . '">Source : ' . $element->href . '</a></p>';
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach($articleHTMLContent->find('iframe') as $element) {
|
|
||||||
if (preg_match("/youtube/i", $element->src)) {
|
|
||||||
$text = $text . '// An IFRAME to Youtube was included in the article: <a href="' . $element->src . '">' . $element->src . '</a><br>';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$text = preg_replace('#<script(.*?)>(.*?)</script>#is', '', $text);
|
|
||||||
$text = strip_tags($text, '<p><b><a><blockquote><img><em><br/><br><ul><li>');
|
|
||||||
return $text;
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getCacheDuration(){
|
public function getCacheDuration(){
|
||||||
return 1800; // 30min
|
return 1800; // 30min
|
||||||
|
|
|
@ -1,61 +1,11 @@
|
||||||
<?php
|
<?php
|
||||||
class NakedSecurityBridge extends FeedExpander {
|
require_once('WordPressBridge.php');
|
||||||
|
|
||||||
|
class NakedSecurityBridge extends WordPressBridge {
|
||||||
|
|
||||||
const MAINTAINER = 'ORelio';
|
const MAINTAINER = 'ORelio';
|
||||||
const NAME = 'Naked Security';
|
const NAME = 'Naked Security';
|
||||||
const URI = 'https://nakedsecurity.sophos.com/';
|
const URI = 'https://nakedsecurity.sophos.com/';
|
||||||
const DESCRIPTION = 'Returns the newest articles.';
|
const DESCRIPTION = 'Returns the newest articles.';
|
||||||
|
const PARAMETERS = array();
|
||||||
private function StripRecursiveHTMLSection($string, $tag_name, $tag_start) {
|
|
||||||
$open_tag = '<'.$tag_name;
|
|
||||||
$close_tag = '</'.$tag_name.'>';
|
|
||||||
$close_tag_length = strlen($close_tag);
|
|
||||||
if (strpos($tag_start, $open_tag) === 0) {
|
|
||||||
while (strpos($string, $tag_start) !== false) {
|
|
||||||
$max_recursion = 100;
|
|
||||||
$section_to_remove = null;
|
|
||||||
$section_start = strpos($string, $tag_start);
|
|
||||||
$search_offset = $section_start;
|
|
||||||
do {
|
|
||||||
$max_recursion--;
|
|
||||||
$section_end = strpos($string, $close_tag, $search_offset);
|
|
||||||
$search_offset = $section_end + $close_tag_length;
|
|
||||||
$section_to_remove = substr($string, $section_start, $section_end - $section_start + $close_tag_length);
|
|
||||||
$open_tag_count = substr_count($section_to_remove, $open_tag);
|
|
||||||
$close_tag_count = substr_count($section_to_remove, $close_tag);
|
|
||||||
} while ($open_tag_count > $close_tag_count && $max_recursion > 0);
|
|
||||||
$string = str_replace($section_to_remove, '', $string);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return $string;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
protected function parseItem($item){
|
|
||||||
$item = parent::parseItem($item);
|
|
||||||
|
|
||||||
$article_html = $this->getSimpleHTMLDOMCached($item['uri']);
|
|
||||||
if(!$article_html){
|
|
||||||
$item['content'] = 'Could not request '.$this->getName().': '.$item['uri'];
|
|
||||||
return $item;
|
|
||||||
}
|
|
||||||
|
|
||||||
$article_image = $article_html->find('img.wp-post-image', 0)->src;
|
|
||||||
$article_content = $article_html->find('div.entry-content', 0)->innertext;
|
|
||||||
$article_content = $this->StripRecursiveHTMLSection($article_content , 'div', '<div class="entry-prefix"');
|
|
||||||
$article_content = $this->StripRecursiveHTMLSection($article_content , 'script', '<script');
|
|
||||||
$article_content = $this->StripRecursiveHTMLSection($article_content , 'aside', '<aside');
|
|
||||||
$article_content = '<p><img src="'.$article_image.'" /></p><p><b>'.$item['content'].'</b></p>'.$article_content;
|
|
||||||
|
|
||||||
$item['content'] = $article_content;
|
|
||||||
|
|
||||||
return $item;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public function collectData(){
|
|
||||||
|
|
||||||
$feedUrl = 'https://feeds.feedburner.com/nakedsecurity?format=xml';
|
|
||||||
$this->collectExpandableDatas($feedUrl);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,29 +1,15 @@
|
||||||
<?php
|
<?php
|
||||||
class NumeramaBridge extends FeedExpander {
|
require_once('WordPressBridge.php');
|
||||||
|
|
||||||
|
class NumeramaBridge extends WordPressBridge {
|
||||||
|
|
||||||
const MAINTAINER = 'mitsukarenai';
|
const MAINTAINER = 'mitsukarenai';
|
||||||
const NAME = 'Numerama';
|
const NAME = 'Numerama';
|
||||||
const URI = 'http://www.numerama.com/';
|
const URI = 'http://www.numerama.com/';
|
||||||
const DESCRIPTION = 'Returns the 5 newest posts from Numerama (full text)';
|
const DESCRIPTION = 'Returns the newest posts from Numerama (full text)';
|
||||||
|
const PARAMETERS = array();
|
||||||
public function collectData(){
|
|
||||||
$this->collectExpandableDatas(self::URI . 'feed/', 5);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected function parseItem($newsItem){
|
|
||||||
$item = parent::parseItem($newsItem);
|
|
||||||
$item['content'] = $this->ExtractContent($item['uri']);
|
|
||||||
return $item;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function ExtractContent($url){
|
|
||||||
$article_html = $this->getSimpleHTMLDOMCached('Could not request Numerama: '.$url);
|
|
||||||
$contents = $article_html->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block
|
|
||||||
$contents = '<img alt="" style="max-width:300px;" src="'.$article_html->find('meta[property=og:image]', 0)->getAttribute('content').'">'; // add post picture
|
|
||||||
return $contents . $article_html->find('article[class=post-content]', 0)->innertext; // extract the post
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getCacheDuration() {
|
public function getCacheDuration() {
|
||||||
|
|
||||||
return 1800; // 30min
|
return 1800; // 30min
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,39 +1,13 @@
|
||||||
<?php
|
<?php
|
||||||
class SiliconBridge extends FeedExpander {
|
require_once('WordPressBridge.php');
|
||||||
|
|
||||||
|
class SiliconBridge extends WordPressBridge {
|
||||||
|
|
||||||
const MAINTAINER = "ORelio";
|
const MAINTAINER = "ORelio";
|
||||||
const NAME = 'Silicon Bridge';
|
const NAME = 'Silicon Bridge';
|
||||||
const URI = 'http://www.silicon.fr/';
|
const URI = 'http://www.silicon.fr/';
|
||||||
const DESCRIPTION = "Returns the newest articles.";
|
const DESCRIPTION = "Returns the newest articles.";
|
||||||
|
const PARAMETERS = array();
|
||||||
protected function parseItem($item){
|
|
||||||
$item = parent::parseItem($item);
|
|
||||||
|
|
||||||
$article_html = $this->getSimpleHTMLDOMCached($item['uri']);
|
|
||||||
if(!$article_html){
|
|
||||||
$item['content'] .= '<p>Could not request Silicon: '.$item['uri'].'</p>';
|
|
||||||
return $item;
|
|
||||||
}
|
|
||||||
|
|
||||||
$article_content = '<p><b>'.$article_html->find('div.entry-excerpt', 0)->plaintext.'</b></p>'
|
|
||||||
.$article_html->find('div.entry-content', 0)->innertext;
|
|
||||||
|
|
||||||
//Remove useless scripts left in the page
|
|
||||||
while (strpos($article_content, '<script') !== false) {
|
|
||||||
$script_section = substr($article_content, strpos($article_content, '<script'));
|
|
||||||
$script_section = substr($script_section, 0, strpos($script_section, '</script>') + 9);
|
|
||||||
$article_content = str_replace($script_section, '', $article_content);
|
|
||||||
}
|
|
||||||
|
|
||||||
$item['content'] = $article_content;
|
|
||||||
|
|
||||||
return $item;
|
|
||||||
}
|
|
||||||
|
|
||||||
public function collectData(){
|
|
||||||
$feedUrl = self::URI.'feed';
|
|
||||||
$this->collectExpandableDatas($feedUrl);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getCacheDuration() {
|
public function getCacheDuration() {
|
||||||
return 1800; // 30 minutes
|
return 1800; // 30 minutes
|
||||||
|
|
|
@ -1,41 +1,13 @@
|
||||||
<?php
|
<?php
|
||||||
class ZatazBridge extends BridgeAbstract {
|
require_once('WordPressBridge.php');
|
||||||
|
|
||||||
|
class ZatazBridge extends WordPressBridge{
|
||||||
|
|
||||||
const MAINTAINER = "aledeg";
|
const MAINTAINER = "aledeg";
|
||||||
const NAME = 'Zataz Magazine';
|
const NAME = 'Zataz Magazine';
|
||||||
const URI = 'http://www.zataz.com';
|
const URI = 'http://www.zataz.com';
|
||||||
const DESCRIPTION = "ZATAZ Magazine - S'informer, c'est déjà se sécuriser";
|
const DESCRIPTION = "ZATAZ Magazine - S'informer, c'est déjà se sécuriser";
|
||||||
|
const PARAMETERS = array();
|
||||||
public function collectData(){
|
|
||||||
$html = $this->getSimpleHTMLDOM(self::URI) or $this->returnServerError('Could not request ' . self::URI);
|
|
||||||
|
|
||||||
$recent_posts = $html->find('#recent-posts-3', 0)->find('ul', 0)->find('li');
|
|
||||||
foreach ($recent_posts as $article) {
|
|
||||||
if (count($this->items) < 5) {
|
|
||||||
$uri = $article->find('a', 0)->href;
|
|
||||||
$this->items[] = $this->getDetails($uri);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private function getDetails($uri) {
|
|
||||||
$html = $this->getSimpleHTMLDOM($uri) or exit;
|
|
||||||
|
|
||||||
$item = array();
|
|
||||||
|
|
||||||
$article = $html->find('.gdl-blog-full', 0);
|
|
||||||
$item['uri'] = $uri;
|
|
||||||
$item['title'] = $article->find('.blog-title', 0)->find('a', 0)->innertext;
|
|
||||||
$item['content'] = $article->find('.blog-content', 0)->innertext;
|
|
||||||
$item['timestamp'] = $this->getTimestampFromDate($article->find('.blog-date', 0)->find('a', 0)->href);
|
|
||||||
return $item;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function getTimestampFromDate($uri) {
|
|
||||||
preg_match('/\d{4}\/\d{2}\/\d{2}/', $uri, $matches);
|
|
||||||
$date = new \DateTime($matches[0]);
|
|
||||||
return $date->format('U');
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getCacheDuration() {
|
public function getCacheDuration() {
|
||||||
return 7200; // 2h
|
return 7200; // 2h
|
||||||
|
|
Loading…
Reference in a new issue