1
0
Fork 0
forked from blallo/rss-bridge

Merge origin/master

This commit is contained in:
ahiles3005 2016-03-01 20:48:51 +04:00
commit 1d44dafdef
5 changed files with 50 additions and 43 deletions

View file

@ -1,5 +1,5 @@
<?php
class AcrimedBridge extends BridgeAbstract{
class AcrimedBridge extends RssExpander{
public function loadMetadatas() {
@ -10,34 +10,35 @@ class AcrimedBridge extends BridgeAbstract{
$this->update = "2014-05-25";
}
public function collectData(array $param){
function StripCDATA($string) {
$string = str_replace('<![CDATA[', '', $string);
$string = str_replace(']]>', '', $string);
return $string;
}
function ExtractContent($url) {
$html2 = file_get_html($url);
$text = $html2->find('div.texte', 0)->innertext;
return $text;
}
$html = file_get_html('http://www.acrimed.org/spip.php?page=backend') or $this->returnError('Could not request Acrimed.', 404);
$limit = 0;
parent::collectExpandableDatas($param, "http://www.acrimed.org/spip.php?page=backend");
foreach($html->find('item') as $element) {
if($limit < 10) {
$item = new \Item();
$item->title = StripCDATA($element->find('title', 0)->innertext);
$item->uri = StripCDATA($element->find('guid', 0)->plaintext);
$item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
$item->content = ExtractContent($item->uri);
$this->items[] = $item;
$limit++;
}
}
}
protected function parseRSSItem($newsItem) {
$hs = new HTMLSanitizer();
$namespaces = $newsItem->getNameSpaces(true);
$dc = $newsItem->children($namespaces['dc']);
$item = new Item();
$item->uri = trim($newsItem->link);
$item->title = trim($newsItem->title);
$item->timestamp = strtotime($dc->date);
$articlePage = file_get_html($newsItem->link);
$article = $hs->sanitize($articlePage->find('article.article1', 0)->innertext);
$article = HTMLSanitizer::defaultImageSrcTo($article, "http://www.acrimed.org/");
$item->content = $article;
return $item;
}
public function getName() {
@ -52,7 +53,6 @@ class AcrimedBridge extends BridgeAbstract{
}
public function getCacheDuration(){
return 3600*2; // 2 hours
// return 0; // 2 hours
return 4800; // 2 hours
}
}

View file

@ -51,7 +51,8 @@ class PinterestBridge extends BridgeAbstract{
$this->username = $param['u'];
$this->board = $param['b'];
$html = file_get_html($this->getURI().'/'.urlencode($this->username).'/'.urlencode($this->board)) or $this->returnError('Could not request Pinterest.', 404);
$html = file_get_html($this->getURI().'/'.urlencode($this->username).'/'.urlencode($this->board)) or $this->returnError('Username and/or board not found', 404);
} else if (isset($param['q']))
{
$this->query = $param['q'];
@ -76,13 +77,17 @@ class PinterestBridge extends BridgeAbstract{
if (isset($this->query))
{
$avatar = $div->find('img.creditImg', 0);
$username = $div->find('span.creditName', 0);
$board = $div->find('span.creditTitle', 0);
$avatar = $div->find('div.creditImg', 0)->find('img', 0);
$avatar = $avatar->getAttribute('data-src');
$avatar = str_replace("\\", "", $avatar);
$username = $div->find('div.creditName', 0);
$board = $div->find('div.creditTitle', 0);
$item->username =$username->innertext;
$item->fullname = $board->innertext;
$item->avatar = $avatar->getAttribute('src');
$item->avatar = $avatar;
$item->content .= '<br /><img align="left" style="margin: 2px 4px;" src="'.htmlentities($item->avatar).'" /> <strong>'.$item->username.'</strong>';
$item->content .= '<br />'.$item->fullname;

View file

@ -31,10 +31,10 @@ class YoutubeBridge extends BridgeAbstract {
$this->parameters['By channel id'] =
'[
{
"type" : "number",
"type" : "text",
"identifier" : "c",
"name" : "channel id",
"exampleValue" : "15",
"exampleValue" : "test",
"required" : "required"
}
]';

View file

@ -23,7 +23,7 @@ class AtomFormat extends FormatAbstract{
$extraInfos = $this->getExtraInfos();
$title = xml_encode($extraInfos['name']);
$uri = $extraInfos['uri'];
$icon = xml_encode('http://g.etfv.co/'. $uri .'?icon.jpg');
$icon = xml_encode('http://icons.better-idea.org/icon?url='. $uri .'&size=64');
$uri = xml_encode($uri);
$entries = '';

View file

@ -158,16 +158,16 @@ class HTMLSanitizer {
var $onlyKeepText;
public static $DEFAULT_CLEAR_TAGS = ["script", "iframe"];
public static $DEFAULT_CLEAR_TAGS = ["script", "iframe", "input", "form"];
public static $KEPT_ATTRIBUTES = ["title", "href", "src"];
const ONLY_TEXT = null;
public static $ONLY_TEXT = [];
function __construct($tags_to_remove = HTMLSanitizer::DEFAULT_CLEAR_TAGS, $kept_attributes = HTMLSanitizer::KEPT_ATTRIBUTES, $only_keep_text = HTMLSanitizer::ONLY_TEXT) {
function __construct($tags_to_remove = null, $kept_attributes = null, $only_keep_text = null) {
$this->tagsToRemove = $tags_to_remove;
$this->keptAttributes = $kept_attributes;
$this->onlyKeepText = $only_keep_text;
$this->tagsToRemove = $tags_to_remove == null ? HTMLSanitizer::$DEFAULT_CLEAR_TAGS : $tags_to_remove;
$this->keptAttributes = $kept_attributes == null ? HTMLSanitizer::$KEPT_ATTRIBUTES : $kept_attributes;
$this->onlyKeepText = $only_keep_text == null ? HTMLSanitizer::$ONLY_TEXT : $only_keep_text;
}
@ -175,7 +175,7 @@ class HTMLSanitizer {
$htmlContent = str_get_html($textToSanitize);
foreach($htmlContent->find('*[!j_ai_pas_trouve_comment_tout_demander]') as $element) {
foreach($htmlContent->find('*[!b38fd2b1fe7f4747d6b1c1254ccd055e]') as $element) {
if(in_array($element->tag, $this->onlyKeepText)) {
$element->outertext = $element->plaintext;
} else if(in_array($element->tag, $this->tagsToRemove)) {
@ -192,10 +192,12 @@ class HTMLSanitizer {
}
public static function defaultImageSrcTo($content, $server) {
foreach($content->find('img') as $image) {
if(strpos($image->src, '/')==0) {
if(strpos($image->src, "http") == NULL && strpos($image->src, "//") == NULL && strpos($image->src, "data:") == NULL) {
$image->src = $server.$image->src;
}
}
}
return $content;
}
}