1
0
Fork 0
forked from blallo/rss-bridge

Merge pull request #153 from ORelio/master

Improve FaceBook, NextInpact, Wordpress bridges
This commit is contained in:
Mitsu 2015-09-20 17:03:15 +02:00
commit 36ba93eea3
3 changed files with 102 additions and 80 deletions

View file

@ -1,87 +1,99 @@
<?php
/**
*
* @name Facebook
* @homepage http://facebook.com/
* @description Input a page title or a profile log. For a profile log, please insert the parameter as follow : myExamplePage/132621766841117
* @update 03/08/2015
* @update 05/09/2015
* @maintainer teromene
* @use1(u="username")
*/
class FacebookBridge extends BridgeAbstract{
public function collectData(array $param){
$html = '';
if(isset($param['u'])) {
if(!strpos($param['u'], "/")) {
$html = file_get_html('https://facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
private $name;
public function collectData(array $param){
$html = '';
if(isset($param['u'])) {
if(!strpos($param['u'], "/")) {
$html = file_get_html('https://facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
} else {
$html = file_get_html('https://facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
}
} else {
$html = file_get_html('https://facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
$this->returnError('You must specify a Facebook username.', 400);
}
} else {
$this->returnError('You must specify a Facebook username.', 400);
$element = $html->find('[id^=PagePostsSectionPagelet-]')[0]->children(0)->children(0);
}
if(isset($element)) {
$author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext);
$profilePic = 'https://graph.facebook.com/'.$param['u'].'/picture?width=200&amp;height=200';
$this->name = $author;
$element = $html->find("[id^=PagePostsSectionPagelet-]")[0]->children(0)->children(0);
if(isset($element)) {
foreach($element->children() as $post) {
$item = new \Item();
if($post->hasAttribute("data-time")) {
//Clean the content of the page
$content = preg_replace('/(?i)><div class=\"clearfix([^>]+)>(.+?)div\ class=\"userContent\"/i', "", $post);
$content = preg_replace('/(?i)><div class=\"_59tj([^>]+)>(.+?)<\/div><\/div><a/i', "", $content);
$content = preg_replace('/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i', "", $content);
$content = preg_replace('/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i', "", $content);
$content = strip_tags($content,"<a><img>");
foreach($element->children() as $post) {
$item = new \Item();
$date = $post->find("abbr")[0];
if(isset($date) && $date->hasAttribute("data-utime")) {
$date = $date->getAttribute("data-utime");
} else {
$date = 0;
if($post->hasAttribute("data-time")) {
//Clean the content of the page and convert relative links into absolute links
$content = preg_replace('/(?i)><div class=\"clearfix([^>]+)>(.+?)div\ class=\"userContent\"/i', '', $post);
$content = preg_replace('/(?i)><div class=\"_59tj([^>]+)>(.+?)<\/div><\/div><a/i', '', $content);
$content = preg_replace('/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i', '', $content);
$content = preg_replace('/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i', '', $content);
$content = str_replace(' href="/', ' href="https://facebook.com/', $content);
$content = preg_replace('/ onmouseover=\"[^"]+\"/i', '', $content);
$content = preg_replace('/ onclick=\"[^"]+\"/i', '', $content);
$content = preg_replace('/<\/a [^>]+>/i', '</a>', $content);
$content = strip_tags($content,'<a><img>');
//Retrieve date of the post
$date = $post->find("abbr")[0];
if(isset($date) && $date->hasAttribute('data-utime')) {
$date = $date->getAttribute('data-utime');
} else {
$date = 0;
}
//Build title from username and content
$title = $author;
if (strlen($title) > 24)
$title = substr($title, 0, strpos(wordwrap($title, 24), "\n")).'...';
$title = $title.' | '.strip_tags($content);
if (strlen($title) > 64)
$title = substr($title, 0, strpos(wordwrap($title, 64), "\n")).'...';
//Use first image as thumbnail if available, or profile pic fallback
$thumbnail = $post->find('img', 1)->src;
if (strlen($thumbnail) == 0)
$thumbnail = $profilePic;
//Build and add final item
$item->uri = 'https://facebook.com'.str_replace('&amp;', '&', $post->find('abbr')[0]->parent()->getAttribute('href'));
$item->thumbnailUri = $thumbnail;
$item->content = $content;
$item->title = $title;
$item->author = $author;
$item->timestamp = $date;
$this->items[] = $item;
}
$item->uri = 'https://facebook.com'.str_replace("&amp;", "&", $post->find("abbr")[0]->parent()->getAttribute("href"));
$item->content = $content;
$item->title = $param['u']." | ".strip_tags($content);
$item->timestamp = $date;
$this->items[] = $item;
}
}
}
public function getName() {
return (isset($this->name) ? $this->name.' - ' : '').'Facebook Bridge';
}
}
public function getURI() {
return 'http://facebook.com';
}
public function getName(){
return 'Facebook Bridge';
}
public function getURI(){
return 'http://facebook.com';
}
public function getCacheDuration(){
return 300; // 5 minutes
}
public function getCacheDuration() {
return 300; // 5 minutes
}
}
?>

View file

@ -4,10 +4,11 @@
* Returns the newest articles
* 2014-05-25
*
* @name Nextinpact Bridge
* @name NextInpact Bridge
* @homepage http://www.nextinpact.com/
* @description Returns the newest articles.
* @maintainer qwertygc
* @update 2015-09-05
*/
class NextInpactBridge extends BridgeAbstract {
@ -22,12 +23,15 @@ class NextInpactBridge extends BridgeAbstract {
function ExtractContent($url) {
$html2 = file_get_html($url);
$text = '<p><em>'.$html2->find('span.sub_title', 0)->innertext.'</em></p>'
.'<p><img src="'.$html2->find('div.container_main_image_article', 0)->find('img.dedicated',0)->src.'" /></p>'
.'<p><img src="'.$html2->find('div.container_main_image_article', 0)->find('img.dedicated',0)->src.'" alt="-" /></p>'
.'<div>'.$html2->find('div[itemprop=articleBody]', 0)->innertext.'</div>';
$premium_article = $html2->find('h2.title_reserve_article', 0)->innertext;
if (strlen($premium_article) > 0)
$text = $text.'<p><em>'.$premium_article.'</em></p>';
return $text;
}
$html = file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnError('Could not request Nextinpact.', 404);
$html = file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnError('Could not request NextInpact.', 404);
$limit = 0;
foreach($html->find('item') as $element) {
@ -46,16 +50,16 @@ class NextInpactBridge extends BridgeAbstract {
}
public function getName(){
return 'Nextinpact Bridge';
}
public function getName() {
return 'Nextinpact Bridge';
}
public function getURI(){
return 'http://www.nextinpact.com/';
}
public function getURI() {
return 'http://www.nextinpact.com/';
}
public function getCacheDuration(){
return 3600; // 1 hour
public function getCacheDuration() {
return 3600; // 1 hour
// return 0;
}
}
}

View file

@ -8,7 +8,7 @@
* @homepage https://wordpress.com/
* @description Returns the 3 newest full posts of a Wordpress blog
* @maintainer aledeg
* @update 2014-05-26
* @update 2015-09-05
* @use1(url="blog URL (required)", name="blog name")
*/
class WordPressBridge extends BridgeAbstract {
@ -24,14 +24,15 @@ class WordPressBridge extends BridgeAbstract {
}
$html = file_get_html($this->url) or $this->returnError("Could not request {$this->url}.", 404);
$posts = $html->find('.post');
$posts = $html->find('.post');
if(!empty($posts) ) {
$i=0;
foreach ($html->find('.post') as $article) {
if($i < 3) {
$uri = $article->find('a', 0)->href;
$this->items[] = $this->getDetails($uri);
$thumbnail = $article->find('img', 0)->src;
$this->items[] = $this->getDetails($uri, $thumbnail);
$i++;
}
}
@ -41,14 +42,19 @@ class WordPressBridge extends BridgeAbstract {
}
}
private function getDetails($uri) {
private function getDetails($uri, $thumbnail) {
$html = file_get_html($uri) or exit;
$article = $html->find('.post', 0);
$title = $article->find('h1', 0)->innertext;
if (strlen($title) == 0)
$title = $article->find('h2', 0)->innertext;
$item = new \Item();
$article = $html->find('.post', 0);
$item->uri = $uri;
$item->title = $article->find('h1', 0)->innertext;
$item->title = htmlspecialchars_decode($title);
$item->author = $article->find('a[rel=author]', 0)->innertext;
$item->thumbnailUri = $thumbnail;
$item->content = $this->clearContent($article->find('.entry-content,.entry', 0)->innertext);
$item->timestamp = $this->getDate($uri);