From d7436c2d0a5c26fb350fde4f168894e79adc7562 Mon Sep 17 00:00:00 2001 From: ORelio Date: Sat, 5 Sep 2015 14:31:57 +0200 Subject: [PATCH 1/3] Improve Facebook Bridge Retrieve author display name from page title Build short readable title using name and content Convert relative links into absolute links Remove attributes from tags (may cause issues) Remove onclick and onmouseover (javascript code) Retrieve url of first image of post as thumnail Use author avatar as thumnail if no first image Use display name in feed name: name - Fb bridge Minor code indent fixes and use simple quotes --- bridges/FacebookBridge.php | 134 ++++++++++++++++++++----------------- 1 file changed, 73 insertions(+), 61 deletions(-) diff --git a/bridges/FacebookBridge.php b/bridges/FacebookBridge.php index 351dc39..109be9b 100644 --- a/bridges/FacebookBridge.php +++ b/bridges/FacebookBridge.php @@ -1,87 +1,99 @@ returnError('No results for this query.', 404); + private $name; + + public function collectData(array $param){ + + $html = ''; + + if(isset($param['u'])) { + if(!strpos($param['u'], "/")) { + $html = file_get_html('https://facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404); + } else { + $html = file_get_html('https://facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404); + } } else { - - $html = file_get_html('https://facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404); - + $this->returnError('You must specify a Facebook username.', 400); } - } else { - $this->returnError('You must specify a Facebook username.', 400); + $element = $html->find('[id^=PagePostsSectionPagelet-]')[0]->children(0)->children(0); - } + if(isset($element)) { + $author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext); + $profilePic = 'https://graph.facebook.com/'.$param['u'].'/picture?width=200&height=200'; + $this->name = $author; - - $element = $html->find("[id^=PagePostsSectionPagelet-]")[0]->children(0)->children(0); - - if(isset($element)) { - - foreach($element->children() as $post) { - - $item = new \Item(); - - if($post->hasAttribute("data-time")) { - - //Clean the content of the page - $content = preg_replace('/(?i)>
]+)>(.+?)div\ class=\"userContent\"/i', "", $post); - $content = preg_replace('/(?i)>
]+)>(.+?)<\/div><\/div>
]+)>(.+?)div\ class=\"[^u]+userContent\"/i', "", $content); - $content = preg_replace('/(?i)>
]+)>(.+?)<\/div>/i', "", $content); - - $content = strip_tags($content,""); + foreach($element->children() as $post) { + $item = new \Item(); - $date = $post->find("abbr")[0]; - if(isset($date) && $date->hasAttribute("data-utime")) { - $date = $date->getAttribute("data-utime"); - } else { - $date = 0; + if($post->hasAttribute("data-time")) { + + //Clean the content of the page and convert relative links into absolute links + $content = preg_replace('/(?i)>
]+)>(.+?)div\ class=\"userContent\"/i', '', $post); + $content = preg_replace('/(?i)>
]+)>(.+?)<\/div><\/div>
]+)>(.+?)div\ class=\"[^u]+userContent\"/i', '', $content); + $content = preg_replace('/(?i)>
]+)>(.+?)<\/div>/i', '', $content); + $content = str_replace(' href="/', ' href="https://facebook.com/', $content); + $content = preg_replace('/ onmouseover=\"[^"]+\"/i', '', $content); + $content = preg_replace('/ onclick=\"[^"]+\"/i', '', $content); + $content = preg_replace('/<\/a [^>]+>/i', '', $content); + $content = strip_tags($content,''); + + //Retrieve date of the post + $date = $post->find("abbr")[0]; + if(isset($date) && $date->hasAttribute('data-utime')) { + $date = $date->getAttribute('data-utime'); + } else { + $date = 0; + } + + //Build title from username and content + $title = $author; + if (strlen($title) > 24) + $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")).'...'; + $title = $title.' | '.strip_tags($content); + if (strlen($title) > 64) + $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")).'...'; + + //Use first image as thumbnail if available, or profile pic fallback + $thumbnail = $post->find('img', 1)->src; + if (strlen($thumbnail) == 0) + $thumbnail = $profilePic; + + //Build and add final item + $item->uri = 'https://facebook.com'.str_replace('&', '&', $post->find('abbr')[0]->parent()->getAttribute('href')); + $item->thumbnailUri = $thumbnail; + $item->content = $content; + $item->title = $title; + $item->author = $author; + $item->timestamp = $date; + $this->items[] = $item; } - - $item->uri = 'https://facebook.com'.str_replace("&", "&", $post->find("abbr")[0]->parent()->getAttribute("href")); - - $item->content = $content; - $item->title = $param['u']." | ".strip_tags($content); - $item->timestamp = $date; - - $this->items[] = $item; } } + } + public function getName() { + return (isset($this->name) ? $this->name.' - ' : '').'Facebook Bridge'; + } - } + public function getURI() { + return 'http://facebook.com'; + } - public function getName(){ - return 'Facebook Bridge'; - } - - public function getURI(){ - return 'http://facebook.com'; - } - - public function getCacheDuration(){ - return 300; // 5 minutes - } + public function getCacheDuration() { + return 300; // 5 minutes + } } - -?> From 8b9c40534b2249b7cadd898ccea2af592c31329c Mon Sep 17 00:00:00 2001 From: ORelio Date: Sat, 5 Sep 2015 14:37:52 +0200 Subject: [PATCH 2/3] NextInpactBridge: Add mention for premium articles Add mention on article bottom (n% to discover) Minor name fix: Nextinpact -> NextInpact Further code indentation fixes --- bridges/NextInpactBridge.php | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/bridges/NextInpactBridge.php b/bridges/NextInpactBridge.php index 16126fd..c91a483 100644 --- a/bridges/NextInpactBridge.php +++ b/bridges/NextInpactBridge.php @@ -4,10 +4,11 @@ * Returns the newest articles * 2014-05-25 * -* @name Nextinpact Bridge +* @name NextInpact Bridge * @homepage http://www.nextinpact.com/ * @description Returns the newest articles. * @maintainer qwertygc +* @update 2015-09-05 */ class NextInpactBridge extends BridgeAbstract { @@ -22,12 +23,15 @@ class NextInpactBridge extends BridgeAbstract { function ExtractContent($url) { $html2 = file_get_html($url); $text = '

'.$html2->find('span.sub_title', 0)->innertext.'

' - .'

' + .'

-

' .'
'.$html2->find('div[itemprop=articleBody]', 0)->innertext.'
'; + $premium_article = $html2->find('h2.title_reserve_article', 0)->innertext; + if (strlen($premium_article) > 0) + $text = $text.'

'.$premium_article.'

'; return $text; } - $html = file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnError('Could not request Nextinpact.', 404); + $html = file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnError('Could not request NextInpact.', 404); $limit = 0; foreach($html->find('item') as $element) { @@ -46,16 +50,16 @@ class NextInpactBridge extends BridgeAbstract { } - public function getName(){ - return 'Nextinpact Bridge'; - } + public function getName() { + return 'Nextinpact Bridge'; + } - public function getURI(){ - return 'http://www.nextinpact.com/'; - } + public function getURI() { + return 'http://www.nextinpact.com/'; + } - public function getCacheDuration(){ - return 3600; // 1 hour + public function getCacheDuration() { + return 3600; // 1 hour // return 0; - } + } } From 5b2c8e91d07a02c520b040695df7d644c434af81 Mon Sep 17 00:00:00 2001 From: ORelio Date: Sat, 5 Sep 2015 14:40:31 +0200 Subject: [PATCH 3/3] Improve Wordpress Bridge Retrieve post author Retrieve post thumbnail Retrieve title from

if no

Minor code indentation fixes --- bridges/WordPressBridge.php | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/bridges/WordPressBridge.php b/bridges/WordPressBridge.php index 308835d..a2eee25 100644 --- a/bridges/WordPressBridge.php +++ b/bridges/WordPressBridge.php @@ -8,7 +8,7 @@ * @homepage https://wordpress.com/ * @description Returns the 3 newest full posts of a Wordpress blog * @maintainer aledeg - * @update 2014-05-26 + * @update 2015-09-05 * @use1(url="blog URL (required)", name="blog name") */ class WordPressBridge extends BridgeAbstract { @@ -24,14 +24,15 @@ class WordPressBridge extends BridgeAbstract { } $html = file_get_html($this->url) or $this->returnError("Could not request {$this->url}.", 404); + $posts = $html->find('.post'); - $posts = $html->find('.post'); if(!empty($posts) ) { $i=0; foreach ($html->find('.post') as $article) { if($i < 3) { $uri = $article->find('a', 0)->href; - $this->items[] = $this->getDetails($uri); + $thumbnail = $article->find('img', 0)->src; + $this->items[] = $this->getDetails($uri, $thumbnail); $i++; } } @@ -41,14 +42,19 @@ class WordPressBridge extends BridgeAbstract { } } - private function getDetails($uri) { + private function getDetails($uri, $thumbnail) { $html = file_get_html($uri) or exit; + $article = $html->find('.post', 0); + + $title = $article->find('h1', 0)->innertext; + if (strlen($title) == 0) + $title = $article->find('h2', 0)->innertext; $item = new \Item(); - - $article = $html->find('.post', 0); $item->uri = $uri; - $item->title = $article->find('h1', 0)->innertext; + $item->title = htmlspecialchars_decode($title); + $item->author = $article->find('a[rel=author]', 0)->innertext; + $item->thumbnailUri = $thumbnail; $item->content = $this->clearContent($article->find('.entry-content,.entry', 0)->innertext); $item->timestamp = $this->getDate($uri);