diff --git a/bridges/CollegeDeFranceBridge.php b/bridges/CollegeDeFranceBridge.php new file mode 100644 index 0000000..6526eae --- /dev/null +++ b/bridges/CollegeDeFranceBridge.php @@ -0,0 +1,41 @@ +returnError('Could not request CollegeDeFrance.', 404); + $limit = 0; + foreach($html->find('li.audio') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = $element->find('span.title', 0)->plaintext; + $item->timestamp = strtotime(str_replace($find, $replace, $element->find('span.date', 0)->plaintext)); + $item->content = $element->find('span.lecturer', 0)->innertext . ' - ' . $element->find('span.title', 0)->innertext; + $item->uri = $element->find('a', 0)->href; + $this->items[] = $item; + $limit++; + } + } + + } + public function getName(){ + return 'CollegeDeFrance'; + } + public function getURI(){ + return 'http://www.college-de-france.fr/'; + } + public function getCacheDuration(){ + return 3600*3; // 3 hour + } +} + diff --git a/bridges/FlickrTagBridge.php b/bridges/FlickrTagBridge.php new file mode 100644 index 0000000..fa274b1 --- /dev/null +++ b/bridges/FlickrTagBridge.php @@ -0,0 +1,53 @@ +returnError('Could not request Flickr.', 404); + if (isset($param['q'])) { /* keyword search mode */ + $this->request = $param['q']; + $html = file_get_html('http://www.flickr.com/search/?q='.urlencode($this->request).'&s=rec') or $this->returnError('No results for this query.', 404); + } + elseif (isset($param['u'])) { /* user timeline mode */ + $this->request = $param['u']; + $html = file_get_html('http://www.flickr.com/photos/'.urlencode($this->request).'/') or $this->returnError('Requested username can\'t be found.', 404); + } + + else { + $this->returnError('You must specify a keyword or a Flickr username.', 400); + } + + foreach($html->find('span.photo_container') as $element) { + $item = new \Item(); + $item->uri = 'http://flickr.com'.$element->find('a',0)->href; + $item->thumbnailUri = $element->find('img',0)->getAttribute('data-defer-src'); + $item->content = ''; // FIXME: Filter javascript ? + $item->title = $element->find('a',0)->title; + $this->items[] = $item; + } + } + + public function getName(){ + return 'Flickr Tag'; + } + + public function getURI(){ + return 'http://www.flickr.com/search/'; + } + + public function getCacheDuration(){ + return 21600; // 6 hours + } +} + diff --git a/bridges/KoreusBridge.php b/bridges/KoreusBridge.php new file mode 100644 index 0000000..8ae90e8 --- /dev/null +++ b/bridges/KoreusBridge.php @@ -0,0 +1,56 @@ +', '', $string); + return $string; + } + function KoreusExtractContent($url) { + $html2 = file_get_html($url); + $text = $html2->find('p[class=itemText]', 0)->innertext; + $text = utf8_encode(preg_replace('/(Sur le m.+?)+$/i','',$text)); + return $text; + } + $html = file_get_html('http://feeds.feedburner.com/Koreus-articles') or $this->returnError('Could not request Koreus.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 5) { + $item = new \Item(); + $item->title = KoreusStripCDATA($element->find('title', 0)->innertext); + $item->uri = KoreusStripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = KoreusExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'Koreus'; + } + + public function getURI(){ + return 'http://www.koreus.com/'; + } + + public function getCacheDuration(){ + return 3600; // 1 hour + } +} + diff --git a/bridges/MsnMondeBridge.php b/bridges/MsnMondeBridge.php new file mode 100644 index 0000000..bd7f5b1 --- /dev/null +++ b/bridges/MsnMondeBridge.php @@ -0,0 +1,55 @@ +find('div[id=m6_diaponews_placeholder]', 0)->outertext=''; //Supression de la partie "et aussi" + $text = $html2->find('div[class=svsubtorabs]', 0)->innertext; // ajout du resume + $text .= $html2->find('div[id=page1]', 0)->innertext; // article + $text = preg_replace('/
Lire aussi.*/i','',$text); //Supression de la partie "Lire aussi"
+
+ return $text;
+ }
+
+ $html = file_get_html('http://news.fr.msn.com/m6-actualite/RSS/News_RSS_Monde.aspx') or $this->returnError('Could not request MsnMonde.', 404);
+ $limit = 0;
+
+ foreach($html->find('item') as $element) {
+ if($limit < 10) {
+ $item = new \Item();
+ $item->title = $element->find('title', 0)->innertext;
+ $item->uri = $element->find('guid', 0)->plaintext;
+ $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
+ $item->content = MsnMondeExtractContent($item->uri);
+ $this->items[] = $item;
+ $limit++;
+ }
+ }
+
+ }
+
+ public function getName(){
+ return 'MSN Actu Monde';
+ }
+
+ public function getURI(){
+ return 'http://news.fr.msn.com/m6-actualite/monde/';
+ }
+
+ public function getCacheDuration(){
+ return 3600; // 1 hour
+ }
+}
+
diff --git a/bridges/NiceMatinBridge.php b/bridges/NiceMatinBridge.php
new file mode 100644
index 0000000..7c0441a
--- /dev/null
+++ b/bridges/NiceMatinBridge.php
@@ -0,0 +1,62 @@
+', '', $string);
+ //$string = str_replace('.+', '', $string);
+ $string = preg_replace('/html.*http.*/i','html',$string);
+ $string = preg_replace('/.*http/i','http',$string);
+ return $string;
+ }
+
+ function NiceMatinExtractContent($url) {
+ $html2 = file_get_html($url);
+ $text = $html2->find('figure[itemprop=associatedMedia]', 0)->innertext;
+ $text .= $html2->find('div[id=content-article]', 0)->innertext;
+ return $text;
+ }
+
+ $html = file_get_html('http://www.nicematin.com/derniere-minute/rss') or $this->returnError('Could not request NiceMatin.', 404);
+ $limit = 0;
+
+ foreach($html->find('item') as $element) {
+ if($limit < 10) {
+ $item = new \Item();
+ //$item->title = NiceMatinStripCDATA($element->find('title', 0)->innertext);
+ $item->title = $element->find('title', 0)->innertext;
+ $item->uri = NiceMatinUrl($element->plaintext);
+
+ $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
+ $item->content = NiceMatinExtractContent($item->uri);
+ $this->items[] = $item;
+ $limit++;
+ }
+ }
+
+ }
+
+ public function getName(){
+ return 'NiceMatin';
+ }
+
+ public function getURI(){
+ return 'http://www.nicematin.com/';
+ }
+
+ public function getCacheDuration(){
+ return 3600; // 1 hour
+ }
+}
+
diff --git a/bridges/PlanetLibreBridge.php b/bridges/PlanetLibreBridge.php
new file mode 100644
index 0000000..073ec28
--- /dev/null
+++ b/bridges/PlanetLibreBridge.php
@@ -0,0 +1,49 @@
+', '', $string);
+ return $string;
+ }
+ function PlanetLibreExtractContent($url) {
+ $html2 = file_get_html($url);
+ $text = $html2->find('div[class=post-text]', 0)->innertext;
+ return $text;
+ }
+ $html = file_get_html('http://www.planet-libre.org/rss10.php') or $this->returnError('Could not request PlanetLibre.', 404);
+ $limit = 0;
+ foreach($html->find('item') as $element) {
+ if($limit < 5) {
+ $item = new \Item();
+ $item->title = PlanetLibreStripCDATA($element->find('title', 0)->innertext);
+ $item->uri = PlanetLibreStripCDATA($element->find('guid', 0)->plaintext);
+ $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
+ $item->content = PlanetLibreExtractContent($item->uri);
+ $this->items[] = $item;
+ $limit++;
+ }
+ }
+
+ }
+ public function getName(){
+ return 'PlanetLibre';
+ }
+ public function getURI(){
+ return 'http://www.planet-libre.org/';
+ }
+ public function getCacheDuration(){
+ return 3600*2; // 1 hour
+ }
+}
+
diff --git a/bridges/RaymondBridge.php b/bridges/RaymondBridge.php
new file mode 100644
index 0000000..2d79ebb
--- /dev/null
+++ b/bridges/RaymondBridge.php
@@ -0,0 +1,52 @@
+', '', $string);
+ return $string;
+ }
+ function raymondExtractContent($url) {
+ $html2 = file_get_html($url);
+ $text = $html2->find('div.entry-content', 0)->innertext;
+ $text = preg_replace('/class="ad".*/', '', $text);
+ $text = strip_tags($text, ' ');
+ $text = str_replace('(adsbygoogle = window.adsbygoogle || []).push({});', '', $text);
+ return $text;
+ }
+ $html = file_get_html('http://www.raymond.cc/blog/feed') or $this->returnError('Could not request raymond.', 404);
+ $limit = 0;
+ foreach($html->find('item') as $element) {
+ if($limit < 3) {
+ $item = new \Item();
+ $item->title = raymondStripCDATA($element->find('title', 0)->innertext);
+ $item->uri = raymondStripCDATA($element->find('guid', 0)->plaintext);
+ $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
+ $item->content = raymondExtractContent($item->uri);
+ $this->items[] = $item;
+ $limit++;
+ }
+ }
+
+ }
+ public function getName(){
+ return 'raymond';
+ }
+ public function getURI(){
+ return 'http://www.raymond.cc/blog';
+ }
+ public function getCacheDuration(){
+ return 3600*12; // 12 hour
+ }
+}
+
diff --git a/bridges/Sexactu.php b/bridges/Sexactu.php
new file mode 100644
index 0000000..6339095
--- /dev/null
+++ b/bridges/Sexactu.php
@@ -0,0 +1,91 @@
+getURI()) or $this->returnError('Could not request '.$this->getURI(), 404);
+
+ foreach($html->find('.content-holder') as $contentHolder) {
+ // only use first list as second one only contains pages numbers
+ $articles = $contentHolder->find('ul', 0);
+ foreach($articles->find('li') as $element) {
+ // if you ask about that method_exists, there seems to be a bug in simple html dom
+ // see stackoverflow for more details : http://stackoverflow.com/a/10828479/15619
+ if(is_object($element)) {
+ $item = new Item();
+ // various metadata
+ $titleBlock = $element->find('.title-holder', 0);
+ if(is_object($titleBlock)) {
+ $titleDetails = $titleBlock->find('.article-title',0);
+ $titleData = $titleDetails->find('h2', 0)->find('a',0);
+ $titleTimestamp =$titleDetails->find('h4',0);
+ $item->title = $this->correctCase(trim($titleData->innertext));
+ $item->uri = GQ.$titleData->href;
+
+ // Fugly date parsing due to the fact my DNS-323 doesn't support php intl extension
+ $dateText = $titleTimestamp->innertext;
+ $dateText = substr($dateText, strpos($dateText,',')+1);
+ $dateText = str_replace($find, $replace, strtolower($dateText));
+ $date = strtotime($dateText);
+ $item->timestamp = $date;
+
+ $item->name = "Maïa Mazaurette";
+ $elementText = $element->find('.text-container', 0);
+ // don't forget to replace images server url with gq one
+ foreach($elementText->find('img') as $image) {
+ $image->src = GQ.$image->src;
+ }
+ $item->content = $elementText->innertext;
+ $this->items[] = $item;
+ }
+
+ }
+
+ }
+ }
+ }
+
+ public function getName(){
+ return 'Sexactu';
+ }
+
+ public function getURI(){
+ return GQ.'/sexactu';
+ }
+
+ public function getCacheDuration(){
+ return 7200; // 2h hours
+ }
+ public function getDescription(){
+ return "Sexactu";
+ }
+
+ public function correctCase($str) {
+ $sentences=explode('.', mb_strtolower($str, "UTF-8"));
+ $str="";
+ $sep="";
+ foreach ($sentences as $sentence)
+ {
+ //upper case first char
+ $sentence=ucfirst(trim($sentence));
+
+ //append sentence to output
+ $str=$str.$sep.$sentence;
+ $sep=". ";
+ }
+ return $str;
+ }
+}
+
diff --git a/bridges/ThePirateBayBridge.php b/bridges/ThePirateBayBridge.php
new file mode 100644
index 0000000..c0ce493
--- /dev/null
+++ b/bridges/ThePirateBayBridge.php
@@ -0,0 +1,49 @@
+returnError('You must specify a keyword (?q=...)', 400);
+
+ $html = file_get_html('https://thepiratebay.se/search/'.rawurlencode($param['q']).'/0/99/0') or $this->returnError('Could not request TPB.', 404);
+
+ if($html->find('table#searchResult', 0) == FALSE)
+ $this->returnError('No result for this query', 404);
+
+ foreach($html->find('tr') as $element) {
+ $item = new \Item();
+ $item->uri = 'https://thepiratebay.se/'.$element->find('a.detLink',0)->href;
+ $item->id = $item->uri;
+ $item->timestamp = time();
+ $item->title = $element->find('a.detLink',0)->plaintext;
+ $item->content = $element->find('font',0)->plaintext.'
download';
+ if(!empty($item->title))
+ $this->items[] = $item;
+ }
+ }
+
+ public function getName(){
+ return 'The Pirate Bay';
+ }
+
+ public function getURI(){
+ return 'https://thepiratebay.se/';
+ }
+
+ public function getCacheDuration(){
+ return 3600; // 1 hour
+ }
+}
diff --git a/bridges/WordPressBridge.php b/bridges/WordPressBridge.php
new file mode 100644
index 0000000..c61fbb7
--- /dev/null
+++ b/bridges/WordPressBridge.php
@@ -0,0 +1,94 @@
+processParams($param);
+
+ if (!$this->hasUrl()) {
+ $this->returnError('You must specify a URL', 400);
+ }
+
+ $html = file_get_html($this->url) or $this->returnError("Could not request {$this->url}.", 404);
+
+ if(!empty($html->find('.post')) ) {
+ $i=0;
+ foreach ($html->find('.post') as $article) {
+ if($i < 3) {
+ $uri = $article->find('a', 0)->href;
+ $this->items[] = $this->getDetails($uri);
+ $i++;
+ }
+ }
+ }
+ else {
+ $this->returnError("Sorry, {$this->url} doesn't seem to be a Wordpress blog.", 404);
+ }
+ }
+
+ private function getDetails($uri) {
+ $html = file_get_html($uri) or exit;
+
+ $item = new \Item();
+
+ $article = $html->find('.post', 0);
+ $item->uri = $uri;
+ $item->title = $article->find('h1', 0)->innertext;
+ $item->content = $this->clearContent($article->find('.entry-content,.entry', 0)->innertext);
+ $item->timestamp = $this->getDate($uri);
+
+ return $item;
+ }
+
+ private function clearContent($content) {
+ $content = preg_replace('/