forked from blallo/rss-bridge
Merge pull request #153 from ORelio/master
Improve FaceBook, NextInpact, Wordpress bridges
This commit is contained in:
commit
36ba93eea3
3 changed files with 102 additions and 80 deletions
|
@ -1,16 +1,16 @@
|
||||||
<?php
|
<?php
|
||||||
/**
|
/**
|
||||||
*
|
|
||||||
* @name Facebook
|
* @name Facebook
|
||||||
* @homepage http://facebook.com/
|
* @homepage http://facebook.com/
|
||||||
* @description Input a page title or a profile log. For a profile log, please insert the parameter as follow : myExamplePage/132621766841117
|
* @description Input a page title or a profile log. For a profile log, please insert the parameter as follow : myExamplePage/132621766841117
|
||||||
* @update 03/08/2015
|
* @update 05/09/2015
|
||||||
* @maintainer teromene
|
* @maintainer teromene
|
||||||
* @use1(u="username")
|
* @use1(u="username")
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class FacebookBridge extends BridgeAbstract{
|
class FacebookBridge extends BridgeAbstract{
|
||||||
|
|
||||||
|
private $name;
|
||||||
|
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
|
|
||||||
$html = '';
|
$html = '';
|
||||||
|
@ -19,60 +19,74 @@ class FacebookBridge extends BridgeAbstract{
|
||||||
if(!strpos($param['u'], "/")) {
|
if(!strpos($param['u'], "/")) {
|
||||||
$html = file_get_html('https://facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
|
$html = file_get_html('https://facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
$html = file_get_html('https://facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
|
$html = file_get_html('https://facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
|
||||||
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
$this->returnError('You must specify a Facebook username.', 400);
|
$this->returnError('You must specify a Facebook username.', 400);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$element = $html->find('[id^=PagePostsSectionPagelet-]')[0]->children(0)->children(0);
|
||||||
|
|
||||||
$element = $html->find("[id^=PagePostsSectionPagelet-]")[0]->children(0)->children(0);
|
|
||||||
|
|
||||||
if(isset($element)) {
|
if(isset($element)) {
|
||||||
|
|
||||||
|
$author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext);
|
||||||
|
$profilePic = 'https://graph.facebook.com/'.$param['u'].'/picture?width=200&height=200';
|
||||||
|
$this->name = $author;
|
||||||
|
|
||||||
foreach($element->children() as $post) {
|
foreach($element->children() as $post) {
|
||||||
|
|
||||||
$item = new \Item();
|
$item = new \Item();
|
||||||
|
|
||||||
if($post->hasAttribute("data-time")) {
|
if($post->hasAttribute("data-time")) {
|
||||||
|
|
||||||
//Clean the content of the page
|
//Clean the content of the page and convert relative links into absolute links
|
||||||
$content = preg_replace('/(?i)><div class=\"clearfix([^>]+)>(.+?)div\ class=\"userContent\"/i', "", $post);
|
$content = preg_replace('/(?i)><div class=\"clearfix([^>]+)>(.+?)div\ class=\"userContent\"/i', '', $post);
|
||||||
$content = preg_replace('/(?i)><div class=\"_59tj([^>]+)>(.+?)<\/div><\/div><a/i', "", $content);
|
$content = preg_replace('/(?i)><div class=\"_59tj([^>]+)>(.+?)<\/div><\/div><a/i', '', $content);
|
||||||
$content = preg_replace('/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i', "", $content);
|
$content = preg_replace('/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i', '', $content);
|
||||||
$content = preg_replace('/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i', "", $content);
|
$content = preg_replace('/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i', '', $content);
|
||||||
|
$content = str_replace(' href="/', ' href="https://facebook.com/', $content);
|
||||||
$content = strip_tags($content,"<a><img>");
|
$content = preg_replace('/ onmouseover=\"[^"]+\"/i', '', $content);
|
||||||
|
$content = preg_replace('/ onclick=\"[^"]+\"/i', '', $content);
|
||||||
|
$content = preg_replace('/<\/a [^>]+>/i', '</a>', $content);
|
||||||
|
$content = strip_tags($content,'<a><img>');
|
||||||
|
|
||||||
|
//Retrieve date of the post
|
||||||
$date = $post->find("abbr")[0];
|
$date = $post->find("abbr")[0];
|
||||||
if(isset($date) && $date->hasAttribute("data-utime")) {
|
if(isset($date) && $date->hasAttribute('data-utime')) {
|
||||||
$date = $date->getAttribute("data-utime");
|
$date = $date->getAttribute('data-utime');
|
||||||
} else {
|
} else {
|
||||||
$date = 0;
|
$date = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
$item->uri = 'https://facebook.com'.str_replace("&", "&", $post->find("abbr")[0]->parent()->getAttribute("href"));
|
//Build title from username and content
|
||||||
|
$title = $author;
|
||||||
|
if (strlen($title) > 24)
|
||||||
|
$title = substr($title, 0, strpos(wordwrap($title, 24), "\n")).'...';
|
||||||
|
$title = $title.' | '.strip_tags($content);
|
||||||
|
if (strlen($title) > 64)
|
||||||
|
$title = substr($title, 0, strpos(wordwrap($title, 64), "\n")).'...';
|
||||||
|
|
||||||
|
//Use first image as thumbnail if available, or profile pic fallback
|
||||||
|
$thumbnail = $post->find('img', 1)->src;
|
||||||
|
if (strlen($thumbnail) == 0)
|
||||||
|
$thumbnail = $profilePic;
|
||||||
|
|
||||||
|
//Build and add final item
|
||||||
|
$item->uri = 'https://facebook.com'.str_replace('&', '&', $post->find('abbr')[0]->parent()->getAttribute('href'));
|
||||||
|
$item->thumbnailUri = $thumbnail;
|
||||||
$item->content = $content;
|
$item->content = $content;
|
||||||
$item->title = $param['u']." | ".strip_tags($content);
|
$item->title = $title;
|
||||||
|
$item->author = $author;
|
||||||
$item->timestamp = $date;
|
$item->timestamp = $date;
|
||||||
|
|
||||||
$this->items[] = $item;
|
$this->items[] = $item;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getName() {
|
public function getName() {
|
||||||
return 'Facebook Bridge';
|
return (isset($this->name) ? $this->name.' - ' : '').'Facebook Bridge';
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getURI() {
|
public function getURI() {
|
||||||
|
@ -83,5 +97,3 @@ class FacebookBridge extends BridgeAbstract{
|
||||||
return 300; // 5 minutes
|
return 300; // 5 minutes
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
||||||
|
|
|
@ -4,10 +4,11 @@
|
||||||
* Returns the newest articles
|
* Returns the newest articles
|
||||||
* 2014-05-25
|
* 2014-05-25
|
||||||
*
|
*
|
||||||
* @name Nextinpact Bridge
|
* @name NextInpact Bridge
|
||||||
* @homepage http://www.nextinpact.com/
|
* @homepage http://www.nextinpact.com/
|
||||||
* @description Returns the newest articles.
|
* @description Returns the newest articles.
|
||||||
* @maintainer qwertygc
|
* @maintainer qwertygc
|
||||||
|
* @update 2015-09-05
|
||||||
*/
|
*/
|
||||||
class NextInpactBridge extends BridgeAbstract {
|
class NextInpactBridge extends BridgeAbstract {
|
||||||
|
|
||||||
|
@ -22,12 +23,15 @@ class NextInpactBridge extends BridgeAbstract {
|
||||||
function ExtractContent($url) {
|
function ExtractContent($url) {
|
||||||
$html2 = file_get_html($url);
|
$html2 = file_get_html($url);
|
||||||
$text = '<p><em>'.$html2->find('span.sub_title', 0)->innertext.'</em></p>'
|
$text = '<p><em>'.$html2->find('span.sub_title', 0)->innertext.'</em></p>'
|
||||||
.'<p><img src="'.$html2->find('div.container_main_image_article', 0)->find('img.dedicated',0)->src.'" /></p>'
|
.'<p><img src="'.$html2->find('div.container_main_image_article', 0)->find('img.dedicated',0)->src.'" alt="-" /></p>'
|
||||||
.'<div>'.$html2->find('div[itemprop=articleBody]', 0)->innertext.'</div>';
|
.'<div>'.$html2->find('div[itemprop=articleBody]', 0)->innertext.'</div>';
|
||||||
|
$premium_article = $html2->find('h2.title_reserve_article', 0)->innertext;
|
||||||
|
if (strlen($premium_article) > 0)
|
||||||
|
$text = $text.'<p><em>'.$premium_article.'</em></p>';
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
|
|
||||||
$html = file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnError('Could not request Nextinpact.', 404);
|
$html = file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnError('Could not request NextInpact.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
* @homepage https://wordpress.com/
|
* @homepage https://wordpress.com/
|
||||||
* @description Returns the 3 newest full posts of a Wordpress blog
|
* @description Returns the 3 newest full posts of a Wordpress blog
|
||||||
* @maintainer aledeg
|
* @maintainer aledeg
|
||||||
* @update 2014-05-26
|
* @update 2015-09-05
|
||||||
* @use1(url="blog URL (required)", name="blog name")
|
* @use1(url="blog URL (required)", name="blog name")
|
||||||
*/
|
*/
|
||||||
class WordPressBridge extends BridgeAbstract {
|
class WordPressBridge extends BridgeAbstract {
|
||||||
|
@ -24,14 +24,15 @@ class WordPressBridge extends BridgeAbstract {
|
||||||
}
|
}
|
||||||
|
|
||||||
$html = file_get_html($this->url) or $this->returnError("Could not request {$this->url}.", 404);
|
$html = file_get_html($this->url) or $this->returnError("Could not request {$this->url}.", 404);
|
||||||
|
|
||||||
$posts = $html->find('.post');
|
$posts = $html->find('.post');
|
||||||
|
|
||||||
if(!empty($posts) ) {
|
if(!empty($posts) ) {
|
||||||
$i=0;
|
$i=0;
|
||||||
foreach ($html->find('.post') as $article) {
|
foreach ($html->find('.post') as $article) {
|
||||||
if($i < 3) {
|
if($i < 3) {
|
||||||
$uri = $article->find('a', 0)->href;
|
$uri = $article->find('a', 0)->href;
|
||||||
$this->items[] = $this->getDetails($uri);
|
$thumbnail = $article->find('img', 0)->src;
|
||||||
|
$this->items[] = $this->getDetails($uri, $thumbnail);
|
||||||
$i++;
|
$i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -41,14 +42,19 @@ class WordPressBridge extends BridgeAbstract {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private function getDetails($uri) {
|
private function getDetails($uri, $thumbnail) {
|
||||||
$html = file_get_html($uri) or exit;
|
$html = file_get_html($uri) or exit;
|
||||||
|
$article = $html->find('.post', 0);
|
||||||
|
|
||||||
|
$title = $article->find('h1', 0)->innertext;
|
||||||
|
if (strlen($title) == 0)
|
||||||
|
$title = $article->find('h2', 0)->innertext;
|
||||||
|
|
||||||
$item = new \Item();
|
$item = new \Item();
|
||||||
|
|
||||||
$article = $html->find('.post', 0);
|
|
||||||
$item->uri = $uri;
|
$item->uri = $uri;
|
||||||
$item->title = $article->find('h1', 0)->innertext;
|
$item->title = htmlspecialchars_decode($title);
|
||||||
|
$item->author = $article->find('a[rel=author]', 0)->innertext;
|
||||||
|
$item->thumbnailUri = $thumbnail;
|
||||||
$item->content = $this->clearContent($article->find('.entry-content,.entry', 0)->innertext);
|
$item->content = $this->clearContent($article->find('.entry-content,.entry', 0)->innertext);
|
||||||
$item->timestamp = $this->getDate($uri);
|
$item->timestamp = $this->getDate($uri);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue