forked from blallo/rss-bridge
Merge pull request #153 from ORelio/master
Improve FaceBook, NextInpact, Wordpress bridges
This commit is contained in:
commit
36ba93eea3
3 changed files with 102 additions and 80 deletions
|
@ -1,87 +1,99 @@
|
|||
<?php
|
||||
/**
|
||||
*
|
||||
* @name Facebook
|
||||
* @homepage http://facebook.com/
|
||||
* @description Input a page title or a profile log. For a profile log, please insert the parameter as follow : myExamplePage/132621766841117
|
||||
* @update 03/08/2015
|
||||
* @update 05/09/2015
|
||||
* @maintainer teromene
|
||||
* @use1(u="username")
|
||||
*/
|
||||
|
||||
class FacebookBridge extends BridgeAbstract{
|
||||
|
||||
public function collectData(array $param){
|
||||
|
||||
$html = '';
|
||||
|
||||
if(isset($param['u'])) {
|
||||
if(!strpos($param['u'], "/")) {
|
||||
$html = file_get_html('https://facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
|
||||
private $name;
|
||||
|
||||
public function collectData(array $param){
|
||||
|
||||
$html = '';
|
||||
|
||||
if(isset($param['u'])) {
|
||||
if(!strpos($param['u'], "/")) {
|
||||
$html = file_get_html('https://facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
|
||||
} else {
|
||||
$html = file_get_html('https://facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
|
||||
}
|
||||
} else {
|
||||
|
||||
$html = file_get_html('https://facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
|
||||
|
||||
$this->returnError('You must specify a Facebook username.', 400);
|
||||
}
|
||||
} else {
|
||||
|
||||
$this->returnError('You must specify a Facebook username.', 400);
|
||||
$element = $html->find('[id^=PagePostsSectionPagelet-]')[0]->children(0)->children(0);
|
||||
|
||||
}
|
||||
if(isset($element)) {
|
||||
|
||||
$author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext);
|
||||
$profilePic = 'https://graph.facebook.com/'.$param['u'].'/picture?width=200&height=200';
|
||||
$this->name = $author;
|
||||
|
||||
|
||||
$element = $html->find("[id^=PagePostsSectionPagelet-]")[0]->children(0)->children(0);
|
||||
|
||||
if(isset($element)) {
|
||||
|
||||
foreach($element->children() as $post) {
|
||||
|
||||
$item = new \Item();
|
||||
|
||||
if($post->hasAttribute("data-time")) {
|
||||
|
||||
//Clean the content of the page
|
||||
$content = preg_replace('/(?i)><div class=\"clearfix([^>]+)>(.+?)div\ class=\"userContent\"/i', "", $post);
|
||||
$content = preg_replace('/(?i)><div class=\"_59tj([^>]+)>(.+?)<\/div><\/div><a/i', "", $content);
|
||||
$content = preg_replace('/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i', "", $content);
|
||||
$content = preg_replace('/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i', "", $content);
|
||||
|
||||
$content = strip_tags($content,"<a><img>");
|
||||
foreach($element->children() as $post) {
|
||||
|
||||
$item = new \Item();
|
||||
|
||||
$date = $post->find("abbr")[0];
|
||||
if(isset($date) && $date->hasAttribute("data-utime")) {
|
||||
$date = $date->getAttribute("data-utime");
|
||||
} else {
|
||||
$date = 0;
|
||||
if($post->hasAttribute("data-time")) {
|
||||
|
||||
//Clean the content of the page and convert relative links into absolute links
|
||||
$content = preg_replace('/(?i)><div class=\"clearfix([^>]+)>(.+?)div\ class=\"userContent\"/i', '', $post);
|
||||
$content = preg_replace('/(?i)><div class=\"_59tj([^>]+)>(.+?)<\/div><\/div><a/i', '', $content);
|
||||
$content = preg_replace('/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i', '', $content);
|
||||
$content = preg_replace('/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i', '', $content);
|
||||
$content = str_replace(' href="/', ' href="https://facebook.com/', $content);
|
||||
$content = preg_replace('/ onmouseover=\"[^"]+\"/i', '', $content);
|
||||
$content = preg_replace('/ onclick=\"[^"]+\"/i', '', $content);
|
||||
$content = preg_replace('/<\/a [^>]+>/i', '</a>', $content);
|
||||
$content = strip_tags($content,'<a><img>');
|
||||
|
||||
//Retrieve date of the post
|
||||
$date = $post->find("abbr")[0];
|
||||
if(isset($date) && $date->hasAttribute('data-utime')) {
|
||||
$date = $date->getAttribute('data-utime');
|
||||
} else {
|
||||
$date = 0;
|
||||
}
|
||||
|
||||
//Build title from username and content
|
||||
$title = $author;
|
||||
if (strlen($title) > 24)
|
||||
$title = substr($title, 0, strpos(wordwrap($title, 24), "\n")).'...';
|
||||
$title = $title.' | '.strip_tags($content);
|
||||
if (strlen($title) > 64)
|
||||
$title = substr($title, 0, strpos(wordwrap($title, 64), "\n")).'...';
|
||||
|
||||
//Use first image as thumbnail if available, or profile pic fallback
|
||||
$thumbnail = $post->find('img', 1)->src;
|
||||
if (strlen($thumbnail) == 0)
|
||||
$thumbnail = $profilePic;
|
||||
|
||||
//Build and add final item
|
||||
$item->uri = 'https://facebook.com'.str_replace('&', '&', $post->find('abbr')[0]->parent()->getAttribute('href'));
|
||||
$item->thumbnailUri = $thumbnail;
|
||||
$item->content = $content;
|
||||
$item->title = $title;
|
||||
$item->author = $author;
|
||||
$item->timestamp = $date;
|
||||
$this->items[] = $item;
|
||||
}
|
||||
|
||||
$item->uri = 'https://facebook.com'.str_replace("&", "&", $post->find("abbr")[0]->parent()->getAttribute("href"));
|
||||
|
||||
$item->content = $content;
|
||||
$item->title = $param['u']." | ".strip_tags($content);
|
||||
$item->timestamp = $date;
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public function getName() {
|
||||
return (isset($this->name) ? $this->name.' - ' : '').'Facebook Bridge';
|
||||
}
|
||||
|
||||
}
|
||||
public function getURI() {
|
||||
return 'http://facebook.com';
|
||||
}
|
||||
|
||||
public function getName(){
|
||||
return 'Facebook Bridge';
|
||||
}
|
||||
|
||||
public function getURI(){
|
||||
return 'http://facebook.com';
|
||||
}
|
||||
|
||||
public function getCacheDuration(){
|
||||
return 300; // 5 minutes
|
||||
}
|
||||
public function getCacheDuration() {
|
||||
return 300; // 5 minutes
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
||||
|
|
|
@ -4,10 +4,11 @@
|
|||
* Returns the newest articles
|
||||
* 2014-05-25
|
||||
*
|
||||
* @name Nextinpact Bridge
|
||||
* @name NextInpact Bridge
|
||||
* @homepage http://www.nextinpact.com/
|
||||
* @description Returns the newest articles.
|
||||
* @maintainer qwertygc
|
||||
* @update 2015-09-05
|
||||
*/
|
||||
class NextInpactBridge extends BridgeAbstract {
|
||||
|
||||
|
@ -22,12 +23,15 @@ class NextInpactBridge extends BridgeAbstract {
|
|||
function ExtractContent($url) {
|
||||
$html2 = file_get_html($url);
|
||||
$text = '<p><em>'.$html2->find('span.sub_title', 0)->innertext.'</em></p>'
|
||||
.'<p><img src="'.$html2->find('div.container_main_image_article', 0)->find('img.dedicated',0)->src.'" /></p>'
|
||||
.'<p><img src="'.$html2->find('div.container_main_image_article', 0)->find('img.dedicated',0)->src.'" alt="-" /></p>'
|
||||
.'<div>'.$html2->find('div[itemprop=articleBody]', 0)->innertext.'</div>';
|
||||
$premium_article = $html2->find('h2.title_reserve_article', 0)->innertext;
|
||||
if (strlen($premium_article) > 0)
|
||||
$text = $text.'<p><em>'.$premium_article.'</em></p>';
|
||||
return $text;
|
||||
}
|
||||
|
||||
$html = file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnError('Could not request Nextinpact.', 404);
|
||||
$html = file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnError('Could not request NextInpact.', 404);
|
||||
$limit = 0;
|
||||
|
||||
foreach($html->find('item') as $element) {
|
||||
|
@ -46,16 +50,16 @@ class NextInpactBridge extends BridgeAbstract {
|
|||
|
||||
}
|
||||
|
||||
public function getName(){
|
||||
return 'Nextinpact Bridge';
|
||||
}
|
||||
public function getName() {
|
||||
return 'Nextinpact Bridge';
|
||||
}
|
||||
|
||||
public function getURI(){
|
||||
return 'http://www.nextinpact.com/';
|
||||
}
|
||||
public function getURI() {
|
||||
return 'http://www.nextinpact.com/';
|
||||
}
|
||||
|
||||
public function getCacheDuration(){
|
||||
return 3600; // 1 hour
|
||||
public function getCacheDuration() {
|
||||
return 3600; // 1 hour
|
||||
// return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
* @homepage https://wordpress.com/
|
||||
* @description Returns the 3 newest full posts of a Wordpress blog
|
||||
* @maintainer aledeg
|
||||
* @update 2014-05-26
|
||||
* @update 2015-09-05
|
||||
* @use1(url="blog URL (required)", name="blog name")
|
||||
*/
|
||||
class WordPressBridge extends BridgeAbstract {
|
||||
|
@ -24,14 +24,15 @@ class WordPressBridge extends BridgeAbstract {
|
|||
}
|
||||
|
||||
$html = file_get_html($this->url) or $this->returnError("Could not request {$this->url}.", 404);
|
||||
$posts = $html->find('.post');
|
||||
|
||||
$posts = $html->find('.post');
|
||||
if(!empty($posts) ) {
|
||||
$i=0;
|
||||
foreach ($html->find('.post') as $article) {
|
||||
if($i < 3) {
|
||||
$uri = $article->find('a', 0)->href;
|
||||
$this->items[] = $this->getDetails($uri);
|
||||
$thumbnail = $article->find('img', 0)->src;
|
||||
$this->items[] = $this->getDetails($uri, $thumbnail);
|
||||
$i++;
|
||||
}
|
||||
}
|
||||
|
@ -41,14 +42,19 @@ class WordPressBridge extends BridgeAbstract {
|
|||
}
|
||||
}
|
||||
|
||||
private function getDetails($uri) {
|
||||
private function getDetails($uri, $thumbnail) {
|
||||
$html = file_get_html($uri) or exit;
|
||||
$article = $html->find('.post', 0);
|
||||
|
||||
$title = $article->find('h1', 0)->innertext;
|
||||
if (strlen($title) == 0)
|
||||
$title = $article->find('h2', 0)->innertext;
|
||||
|
||||
$item = new \Item();
|
||||
|
||||
$article = $html->find('.post', 0);
|
||||
$item->uri = $uri;
|
||||
$item->title = $article->find('h1', 0)->innertext;
|
||||
$item->title = htmlspecialchars_decode($title);
|
||||
$item->author = $article->find('a[rel=author]', 0)->innertext;
|
||||
$item->thumbnailUri = $thumbnail;
|
||||
$item->content = $this->clearContent($article->find('.entry-content,.entry', 0)->innertext);
|
||||
$item->timestamp = $this->getDate($uri);
|
||||
|
||||
|
|
Loading…
Reference in a new issue