From 9e3ef4869edf9a55abe92d113b5be56723a03a17 Mon Sep 17 00:00:00 2001 From: Qwerty Date: Mon, 2 Sep 2013 11:35:10 +0200 Subject: [PATCH 001/139] Create FSBridge.php I add the RSS feed of www.futura-sciences.com --- bridges/FSBridge.php | 56 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 bridges/FSBridge.php diff --git a/bridges/FSBridge.php b/bridges/FSBridge.php new file mode 100644 index 0000000..1ffd25d --- /dev/null +++ b/bridges/FSBridge.php @@ -0,0 +1,56 @@ +', '', $string); + return $string; + } + function FS_ExtractContent($url) { + $html2 = file_get_html($url); + $text = $html2->find('div.fiche-actualite', 0)->innertext; + return $text; + } + $html = file_get_html('http://www.futura-sciences.com/rss/actualites.xml') or $this->returnError('Could not request Futura Sciences.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 20) { + $item = new \Item(); + $item->title = FS_StripCDATA($element->find('title', 0)->innertext); + $item->uri = FS_StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = FS_ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'Futura Sciences'; + } + + public function getURI(){ + return 'http://www.futura-sciences.com/'; + } + + public function getCacheDuration(){ + // return 3600; // 1 hour + return 0; // 1 hour + } +} From 05e480e6d2aa66909e1e1b7fcbb800fdcf4e2a5f Mon Sep 17 00:00:00 2001 From: Qwerty Date: Mon, 2 Sep 2013 11:37:03 +0200 Subject: [PATCH 002/139] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index e922f58..d751a12 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,8 @@ Supported sites/pages * `Identi.ca` : Identica user timeline (Should be compatible with other Pump.io instances). * `YouTube` : YouTube user channel feed. * `Cryptome` : Returns the most recent documents from Cryptome.org. + * `Futurasciences` : Returns the most recent documents from futura-sciences.com. + Output format From 365033ac6a10d243a2ac8a48b975f995ffb8f743 Mon Sep 17 00:00:00 2001 From: Qwerty Date: Mon, 2 Sep 2013 11:52:49 +0200 Subject: [PATCH 003/139] Create GuruMedBridge.php The bridge for gurumed.org --- bridges/GuruMedBridge.php | 55 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 bridges/GuruMedBridge.php diff --git a/bridges/GuruMedBridge.php b/bridges/GuruMedBridge.php new file mode 100644 index 0000000..d312c24 --- /dev/null +++ b/bridges/GuruMedBridge.php @@ -0,0 +1,55 @@ +', '', $string); + return $string; + } + function GurumedExtractContent($url) { + $html2 = file_get_html($url); + $text = $html2->find('div.entry', 0)->innertext; + return $text; + } + $html = file_get_html('http://gurumed.org/feed') or $this->returnError('Could not request Gurumed.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = GurumedStripCDATA($element->find('title', 0)->innertext); + $item->uri = GurumedStripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = GurumedExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'Gurumed'; + } + + public function getURI(){ + return 'http://gurumed.org/'; + } + + public function getCacheDuration(){ + return 3600; // 1 hour + } +} From aa9a0de6b997b549e9a99379685eb7b87dcdfe94 Mon Sep 17 00:00:00 2001 From: Qwerty Date: Mon, 2 Sep 2013 11:53:47 +0200 Subject: [PATCH 004/139] Update README.md Add gurumed.org --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d751a12..9ee4027 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,8 @@ Supported sites/pages * `Identi.ca` : Identica user timeline (Should be compatible with other Pump.io instances). * `YouTube` : YouTube user channel feed. * `Cryptome` : Returns the most recent documents from Cryptome.org. - * `Futurasciences` : Returns the most recent documents from futura-sciences.com. + * `Futurasciences` : Returns the most recent articles from futura-sciences.com. + * `GuruMed`: Returns the most recent articles for gurumed.org From 4f59e2441c8bf0138081e6db944b77af4e2a43fa Mon Sep 17 00:00:00 2001 From: Astalaseven Date: Mon, 16 Dec 2013 09:07:48 +0100 Subject: [PATCH 005/139] [new] Bridges for DTC and SDZ --- bridges/DansTonChatBridge.php | 42 ++++++++++++++++++++++++++++++++ bridges/OpenClassroomsBridge.php | 39 +++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 bridges/DansTonChatBridge.php create mode 100644 bridges/OpenClassroomsBridge.php diff --git a/bridges/DansTonChatBridge.php b/bridges/DansTonChatBridge.php new file mode 100644 index 0000000..06ad5b2 --- /dev/null +++ b/bridges/DansTonChatBridge.php @@ -0,0 +1,42 @@ +returnError('Could not request DansTonChat.', 404); + + foreach($html->find('div.item') as $element) { + $item = new \Item(); + $item->uri = $element->find('a', 0)->href; + $item->title = 'DansTonChat '.$element->find('a', 1)->plaintext; +<<<<<<< HEAD +======= + //$item->content = $element->find('p.item-content', 0)->innertext; +>>>>>>> d194d02... [new] Bridges for DTC and SDZ + $item->content = $element->find('a', 0)->innertext; + $this->items[] = $item; + } + } + + public function getName(){ + return 'DansTonChat'; + } + + public function getURI(){ + return 'http://danstonchat.com'; + } + + public function getCacheDuration(){ + return 21600; // 6 hours + } +} diff --git a/bridges/OpenClassroomsBridge.php b/bridges/OpenClassroomsBridge.php new file mode 100644 index 0000000..d2d3c4a --- /dev/null +++ b/bridges/OpenClassroomsBridge.php @@ -0,0 +1,39 @@ +returnError('Could not request OpenClassrooms.', 404); + + foreach($html->find('li.col6') as $element) { + $item = new \Item(); + $item->uri = 'http://fr.openclassrooms.com'.$element->find('a', 0)->href; + $item->title = $element->find('div.courses-content strong', 0)->innertext; + $item->content = $element->find('span.course-tags', 0)->innertext; + $this->items[] = $item; + } + } + + public function getName(){ + return 'OpenClassrooms'; + } + + public function getURI(){ + return 'http://fr.openclassrooms.com'; + } + + public function getCacheDuration(){ + return 21600; // 6 hours + } +} From 33cf5a1cd50d88a9fd5c2dc68422a823694370a5 Mon Sep 17 00:00:00 2001 From: Astalaseven Date: Mon, 16 Dec 2013 09:11:33 +0100 Subject: [PATCH 006/139] [enh] Remove old comment --- bridges/DansTonChatBridge.php | 4 ---- 1 file changed, 4 deletions(-) diff --git a/bridges/DansTonChatBridge.php b/bridges/DansTonChatBridge.php index 06ad5b2..3bb6e0b 100644 --- a/bridges/DansTonChatBridge.php +++ b/bridges/DansTonChatBridge.php @@ -19,10 +19,6 @@ class DansTonChatBridge extends BridgeAbstract{ $item = new \Item(); $item->uri = $element->find('a', 0)->href; $item->title = 'DansTonChat '.$element->find('a', 1)->plaintext; -<<<<<<< HEAD -======= - //$item->content = $element->find('p.item-content', 0)->innertext; ->>>>>>> d194d02... [new] Bridges for DTC and SDZ $item->content = $element->find('a', 0)->innertext; $this->items[] = $item; } From fb44d4d90df3b2bbd21294dc28ae0df6abea660c Mon Sep 17 00:00:00 2001 From: Astalaseven Date: Mon, 16 Dec 2013 15:29:33 +0100 Subject: [PATCH 007/139] [new] DuckDuckGo Bridge --- bridges/DuckDuckGoBridge.php | 39 ++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 bridges/DuckDuckGoBridge.php diff --git a/bridges/DuckDuckGoBridge.php b/bridges/DuckDuckGoBridge.php new file mode 100644 index 0000000..a194a5c --- /dev/null +++ b/bridges/DuckDuckGoBridge.php @@ -0,0 +1,39 @@ +returnError('Could not request DuckDuckGo.', 404); + + foreach($html->find('div.results_links') as $element) { + $item = new \Item(); + $item->uri = $element->find('a', 0)->href; + $item->title = $element->find('a', 1)->innertext; + $item->content = $element->find('div.snippet', 0)->plaintext; + $this->items[] = $item; + } + } + + public function getName(){ + return 'DuckDuckGo'; + } + + public function getURI(){ + return 'https://duckduckgo.com'; + } + + public function getCacheDuration(){ + return 21600; // 6 hours + } +} From 25185cff4ee3875ddb58f85ed37aeb8f14b607fe Mon Sep 17 00:00:00 2001 From: Astalaseven Date: Mon, 16 Dec 2013 17:51:49 +0100 Subject: [PATCH 008/139] [new] Scmb Bridge --- bridges/ScmbBridge.php | 49 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 bridges/ScmbBridge.php diff --git a/bridges/ScmbBridge.php b/bridges/ScmbBridge.php new file mode 100644 index 0000000..e58088d --- /dev/null +++ b/bridges/ScmbBridge.php @@ -0,0 +1,49 @@ +returnError('Could not request Se Coucher Moins Bete.', 404); + + foreach($html->find('article') as $article) { + $item = new \Item(); + $item->uri = 'http://secouchermoinsbete.fr'.$article->find('p.summary a',0)->href; + $item->title = $article->find('header h1 a',0)->innertext; + + $article->find('span.read-more',0)->outertext=''; // remove text "En savoir plus" from anecdote content + $content = $article->find('p.summary a',0)->innertext; + $content =substr($content,0,strlen($content)-17); // remove superfluous spaces at the end + + // get publication date + $str_date = $article->find('time',0)->datetime; + list($date, $time) = explode(' ', $str_date); + list($y, $m, $d) = explode('-', $date); + list($h, $i) = explode(':', $time); + $timestamp = mktime($h,$i,0,$m,$d,$y); + $item->timestamp = $timestamp; + + + $item->content = $content; + $this->items[] = $item; + } + } + + public function getName(){ + return 'Se Coucher Moins Bête Bridge'; + } + + public function getURI(){ + return 'http://secouchermoinsbete.fr/'; + } + + public function getCacheDuration(){ + return 21600; // 6 hours + } +} From f01bc29f574260b309e126832183290d1991a163 Mon Sep 17 00:00:00 2001 From: gsurrel Date: Thu, 2 Jan 2014 11:26:15 +0100 Subject: [PATCH 009/139] Added Wikipedia FR bridge --- bridges/WikipediaFRBridge.php | 39 +++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100755 bridges/WikipediaFRBridge.php diff --git a/bridges/WikipediaFRBridge.php b/bridges/WikipediaFRBridge.php new file mode 100755 index 0000000..3dad0c7 --- /dev/null +++ b/bridges/WikipediaFRBridge.php @@ -0,0 +1,39 @@ +returnError('Could not request Wikipedia FR.', 404); + + $element = $html->find('div[id=accueil-lumieresur]', 0); + $item = new \Item(); + $item->uri = $host.$element->find('p', 0)->find('a', 0)->href; + $item->title = $element->find('p',0)->find('a',0)->title; + $item->content = str_replace('href="', 'href="'.$host, $html->find('div[id=mf-lumieresur]', 0)->innertext); + $this->items[] = $item; + } + + public function getName(){ + return 'Wikipedia FR "Lumière sur..."'; + } + + public function getURI(){ + return 'https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Accueil_principal'; + } + + public function getCacheDuration(){ + return 0*3600*4; // 4 hours + } +} From 5288756d04bb407742db39440b564a4ee548f1ce Mon Sep 17 00:00:00 2001 From: gsurrel Date: Thu, 2 Jan 2014 11:42:02 +0100 Subject: [PATCH 010/139] Corrected mistake in Wikipedia FR bridge --- bridges/WikipediaFRBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/WikipediaFRBridge.php b/bridges/WikipediaFRBridge.php index 3dad0c7..9d9dd94 100755 --- a/bridges/WikipediaFRBridge.php +++ b/bridges/WikipediaFRBridge.php @@ -21,7 +21,7 @@ class WikipediaFRBridge extends BridgeAbstract{ $item = new \Item(); $item->uri = $host.$element->find('p', 0)->find('a', 0)->href; $item->title = $element->find('p',0)->find('a',0)->title; - $item->content = str_replace('href="', 'href="'.$host, $html->find('div[id=mf-lumieresur]', 0)->innertext); + $item->content = str_replace('href="', 'href="'.$host, $element->find('div[id=mf-lumieresur]', 0)->innertext); $this->items[] = $item; } @@ -34,6 +34,6 @@ class WikipediaFRBridge extends BridgeAbstract{ } public function getCacheDuration(){ - return 0*3600*4; // 4 hours + return 3600*4; // 4 hours } } From d7e2d65ff7787507cc4071f6b7214e91ecf4e72f Mon Sep 17 00:00:00 2001 From: gsurrel Date: Thu, 2 Jan 2014 11:42:39 +0100 Subject: [PATCH 011/139] Added Wikipedia EN bridge (Today's featured article) --- bridges/WikipediaENBridge.php | 41 +++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100755 bridges/WikipediaENBridge.php diff --git a/bridges/WikipediaENBridge.php b/bridges/WikipediaENBridge.php new file mode 100755 index 0000000..ecd2ccf --- /dev/null +++ b/bridges/WikipediaENBridge.php @@ -0,0 +1,41 @@ +returnError('Could not request Wikipedia EN.', 404); + + $element = $html->find('div[id=mp-tfa]', 0); + // Clean the bottom of the featured article + $element->find('div', -1)->outertext = ''; + $item = new \Item(); + $item->uri = $host.$element->find('p', 0)->find('a', 0)->href; + $item->title = $element->find('p',0)->find('a',0)->title; + $item->content = str_replace('href="/', 'href="'.$host.'/', $element->innertext); + $this->items[] = $item; + } + + public function getName(){ + return 'Wikipedia EN "Today\'s Featued Article"'; + } + + public function getURI(){ + return 'https://en.wikipedia.org/wiki/Main_Page'; + } + + public function getCacheDuration(){ + return 3600*4; // 4 hours + } +} From 02c7b2be87ab2c4d79acc018c37cc14da502ec49 Mon Sep 17 00:00:00 2001 From: gsurrel Date: Thu, 2 Jan 2014 11:43:21 +0100 Subject: [PATCH 012/139] Corrected mistake in Wikipedia FR bridge if using absolute links --- bridges/WikipediaFRBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/WikipediaFRBridge.php b/bridges/WikipediaFRBridge.php index 9d9dd94..20376da 100755 --- a/bridges/WikipediaFRBridge.php +++ b/bridges/WikipediaFRBridge.php @@ -21,7 +21,7 @@ class WikipediaFRBridge extends BridgeAbstract{ $item = new \Item(); $item->uri = $host.$element->find('p', 0)->find('a', 0)->href; $item->title = $element->find('p',0)->find('a',0)->title; - $item->content = str_replace('href="', 'href="'.$host, $element->find('div[id=mf-lumieresur]', 0)->innertext); + $item->content = str_replace('href="/', 'href="'.$host.'/', $element->find('div[id=mf-lumieresur]', 0)->innertext); $this->items[] = $item; } From 07d27e042178f61c52e2317ad4fa5bfb70ab19b1 Mon Sep 17 00:00:00 2001 From: gsurrel Date: Thu, 2 Jan 2014 11:58:10 +0100 Subject: [PATCH 013/139] Added Wikipedia Esperanto bridge --- bridges/WikipediaEOBridge.php | 41 +++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 bridges/WikipediaEOBridge.php diff --git a/bridges/WikipediaEOBridge.php b/bridges/WikipediaEOBridge.php new file mode 100644 index 0000000..19f8cee --- /dev/null +++ b/bridges/WikipediaEOBridge.php @@ -0,0 +1,41 @@ +returnError('Could not request Wikipedia EO.', 404); + + $element = $html->find('div[id=mf-tfa]', 0); + // Link to article + $link = $element->find('p', -2)->find('a', 0); + $item = new \Item(); + $item->uri = $host.$link->href; + $item->title = $link->title; + $item->content = str_replace('href="/', 'href="'.$host.'/', $element->innertext); + $this->items[] = $item; + } + + public function getName(){ + return 'Wikipedia EO "Artikolo de la semajno"'; + } + + public function getURI(){ + return 'https://eo.wikipedia.org/wiki/Vikipedio:%C4%88efpa%C4%9Do'; + } + + public function getCacheDuration(){ + return 3600*12; // 12 hours + } +} From 6b9074da2a5a0f1af599f2877e7e27022502c3f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20SAUVAGE?= Date: Sat, 1 Feb 2014 14:15:49 +0100 Subject: [PATCH 014/139] Update README.md --- README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9ee4027..1baf5d2 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,17 @@ Supported sites/pages * `Cryptome` : Returns the most recent documents from Cryptome.org. * `Futurasciences` : Returns the most recent articles from futura-sciences.com. * `GuruMed`: Returns the most recent articles for gurumed.org - + * `DansTonChat`: Most recent quotes from danstonchat.com + * `DuckDuckGo`: Most recent results from DuckDuckGo.com + * `FSBridge`: Most recent article (full text) from futura-sciences.com + * `GuruMed`: Most recent entries (full text) from gurumed.org + * `Instagram`: Most recent photos from an instagram.com user. + * `OpenClassrooms`: Lastest tutorials from fr.openclassrooms.com. + * `Pinterest`: Most recent photos from user or search. + * `ScmbBridge`: Newest stories from secouchermoinsbete.fr + * `WikipediaENLatest`: highlighted articles from Wikipedia in English. + * `WikipediaFRLatest`: highlighted articles from Wikipedia in French. + * `WikipediaEOLatest`: highlighted articles from Wikipedia in Esperanto. Output format @@ -55,6 +65,9 @@ Patch/contributors : * [Mitsukarenai](https://github.com/Mitsukarenai) : Initial inspiration, TwitterBridge, IdenticaBridge, YoutubeBridge. * [ArthurHoaro](https://github.com/ArthurHoaro) * [BoboTiG](https://github.com/BoboTiG) + * [Astalaseven](https://github.com/Astalaseven) + * [qwertygc](https://github.com/qwertygc) + * [Djuuu](https://github.com/Djuuu) Licence === From a9d6fd3e462d651a462547c7deb6a23579bfda24 Mon Sep 17 00:00:00 2001 From: Sebastien SAUVAGE Date: Sat, 8 Feb 2014 18:12:49 +0100 Subject: [PATCH 015/139] Default to http for DDG bridge Because some hosts do not support https. --- bridges/DuckDuckGoBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/DuckDuckGoBridge.php b/bridges/DuckDuckGoBridge.php index a194a5c..d9f47d7 100644 --- a/bridges/DuckDuckGoBridge.php +++ b/bridges/DuckDuckGoBridge.php @@ -12,7 +12,7 @@ class DuckDuckGoBridge extends BridgeAbstract{ public function collectData(array $param){ $html = ''; - $link = 'https://duckduckgo.com/html/?q='.$param[u].'+sort:date'; + $link = 'http://duckduckgo.com/html/?q='.$param[u].'+sort:date'; $html = file_get_html($link) or $this->returnError('Could not request DuckDuckGo.', 404); From 50a96a3f215607d4112eae71f324e7d20ec4b0eb Mon Sep 17 00:00:00 2001 From: Sebastien SAUVAGE Date: Sat, 8 Feb 2014 18:18:09 +0100 Subject: [PATCH 016/139] Corrected default number of articles in CryptomeBridge. The bridge was outputting a single entry when parameter n was empty. --- bridges/CryptomeBridge.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bridges/CryptomeBridge.php b/bridges/CryptomeBridge.php index 1409ecf..39e3136 100644 --- a/bridges/CryptomeBridge.php +++ b/bridges/CryptomeBridge.php @@ -18,10 +18,11 @@ class CryptomeBridge extends BridgeAbstract{ //$link = 'https://secure.netsolhost.com/cryptome.org/'; $html = file_get_html($link) or $this->returnError('Could not request Cryptome.', 404); - if (isset($param['n'])) { /* number of documents */ + if (!empty($param['n'])) { /* number of documents */ $num = min(max(1, $param['n']+0), $num); } + foreach($html->find('pre') as $element) { for ( $i = 0; $i < $num; ++$i ) { $item = new \Item(); From 531dddecdcb587caeec1e863743931a38e6f159b Mon Sep 17 00:00:00 2001 From: Sebastien SAUVAGE Date: Sun, 9 Feb 2014 10:23:50 +0100 Subject: [PATCH 017/139] Changed default number of article from 90 to 20 in CryptomeBridge. --- bridges/CryptomeBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/CryptomeBridge.php b/bridges/CryptomeBridge.php index 39e3136..87df363 100644 --- a/bridges/CryptomeBridge.php +++ b/bridges/CryptomeBridge.php @@ -12,7 +12,7 @@ class CryptomeBridge extends BridgeAbstract{ public function collectData(array $param){ $html = ''; - $num = 90; + $num = 20; $link = 'http://cryptome.org/'; // If you want HTTPS access instead, uncomment the following line: //$link = 'https://secure.netsolhost.com/cryptome.org/'; From 7c5118959deeee9a8cb5833fd509d25db7ba2727 Mon Sep 17 00:00:00 2001 From: Sebastien SAUVAGE Date: Sun, 9 Feb 2014 15:08:03 +0100 Subject: [PATCH 018/139] Removed non-working bridges --- README.md | 3 --- bridges/FSBridge.php | 56 --------------------------------------- bridges/GuruMedBridge.php | 55 -------------------------------------- 3 files changed, 114 deletions(-) delete mode 100644 bridges/FSBridge.php delete mode 100644 bridges/GuruMedBridge.php diff --git a/README.md b/README.md index 1baf5d2..e8b5151 100644 --- a/README.md +++ b/README.md @@ -12,11 +12,8 @@ Supported sites/pages * `Identi.ca` : Identica user timeline (Should be compatible with other Pump.io instances). * `YouTube` : YouTube user channel feed. * `Cryptome` : Returns the most recent documents from Cryptome.org. - * `Futurasciences` : Returns the most recent articles from futura-sciences.com. - * `GuruMed`: Returns the most recent articles for gurumed.org * `DansTonChat`: Most recent quotes from danstonchat.com * `DuckDuckGo`: Most recent results from DuckDuckGo.com - * `FSBridge`: Most recent article (full text) from futura-sciences.com * `GuruMed`: Most recent entries (full text) from gurumed.org * `Instagram`: Most recent photos from an instagram.com user. * `OpenClassrooms`: Lastest tutorials from fr.openclassrooms.com. diff --git a/bridges/FSBridge.php b/bridges/FSBridge.php deleted file mode 100644 index 1ffd25d..0000000 --- a/bridges/FSBridge.php +++ /dev/null @@ -1,56 +0,0 @@ -', '', $string); - return $string; - } - function FS_ExtractContent($url) { - $html2 = file_get_html($url); - $text = $html2->find('div.fiche-actualite', 0)->innertext; - return $text; - } - $html = file_get_html('http://www.futura-sciences.com/rss/actualites.xml') or $this->returnError('Could not request Futura Sciences.', 404); - $limit = 0; - - foreach($html->find('item') as $element) { - if($limit < 20) { - $item = new \Item(); - $item->title = FS_StripCDATA($element->find('title', 0)->innertext); - $item->uri = FS_StripCDATA($element->find('guid', 0)->plaintext); - $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); - $item->content = FS_ExtractContent($item->uri); - $this->items[] = $item; - $limit++; - } - } - - } - - public function getName(){ - return 'Futura Sciences'; - } - - public function getURI(){ - return 'http://www.futura-sciences.com/'; - } - - public function getCacheDuration(){ - // return 3600; // 1 hour - return 0; // 1 hour - } -} diff --git a/bridges/GuruMedBridge.php b/bridges/GuruMedBridge.php deleted file mode 100644 index d312c24..0000000 --- a/bridges/GuruMedBridge.php +++ /dev/null @@ -1,55 +0,0 @@ -', '', $string); - return $string; - } - function GurumedExtractContent($url) { - $html2 = file_get_html($url); - $text = $html2->find('div.entry', 0)->innertext; - return $text; - } - $html = file_get_html('http://gurumed.org/feed') or $this->returnError('Could not request Gurumed.', 404); - $limit = 0; - - foreach($html->find('item') as $element) { - if($limit < 10) { - $item = new \Item(); - $item->title = GurumedStripCDATA($element->find('title', 0)->innertext); - $item->uri = GurumedStripCDATA($element->find('guid', 0)->plaintext); - $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); - $item->content = GurumedExtractContent($item->uri); - $this->items[] = $item; - $limit++; - } - } - - } - - public function getName(){ - return 'Gurumed'; - } - - public function getURI(){ - return 'http://gurumed.org/'; - } - - public function getCacheDuration(){ - return 3600; // 1 hour - } -} From a1152aee96838bb3a2a328539b4d94f94c7a4838 Mon Sep 17 00:00:00 2001 From: Sebastien SAUVAGE Date: Sun, 9 Feb 2014 15:15:15 +0100 Subject: [PATCH 019/139] Added parameter check of OpenClassRooms. --- bridges/OpenClassroomsBridge.php | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bridges/OpenClassroomsBridge.php b/bridges/OpenClassroomsBridge.php index d2d3c4a..966fd4e 100644 --- a/bridges/OpenClassroomsBridge.php +++ b/bridges/OpenClassroomsBridge.php @@ -11,8 +11,13 @@ class OpenClassroomsBridge extends BridgeAbstract{ public function collectData(array $param){ + if ($param['u']!='informatique' && $param['u']!='sciences') + { + $this->returnError('Error: You must chose "informatique" or "science".', 404); + } + $html = ''; - $link = 'http://fr.openclassrooms.com/'.$param[u].'/cours?title=&sort=updatedAt+desc'; + $link = 'http://fr.openclassrooms.com/'.$param['u'].'/cours?title=&sort=updatedAt+desc'; $html = file_get_html($link) or $this->returnError('Could not request OpenClassrooms.', 404); From b4b3c4b0acb38715da8f2d4414a835f78dfcbebb Mon Sep 17 00:00:00 2001 From: Sebastien SAUVAGE Date: Sun, 9 Feb 2014 15:20:52 +0100 Subject: [PATCH 020/139] Added parameters checks for PinterestBridge --- bridges/PinterestBridge.php | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/bridges/PinterestBridge.php b/bridges/PinterestBridge.php index 8c24ecd..7f58e1d 100644 --- a/bridges/PinterestBridge.php +++ b/bridges/PinterestBridge.php @@ -16,7 +16,18 @@ class PinterestBridge extends BridgeAbstract{ public function collectData(array $param){ $html = ''; - if (isset($param['u']) && isset($param['b'])) { + if (isset($param['u']) || isset($param['b'])) { + + if (empty($param['u'])) + { + $this->returnError('You must specify a Pinterest username (?u=...).', 400); + } + + if (empty($param['b'])) + { + $this->returnError('You must specify a Pinterest board for this username (?b=...).', 400); + } + $this->username = $param['u']; $this->board = $param['b']; $html = file_get_html($this->getURI().'/'.urlencode($this->username).'/'.urlencode($this->board)) or $this->returnError('Could not request Pinterest.', 404); From a5591d9f5431cf9b24664f42a13327c3c34dbb88 Mon Sep 17 00:00:00 2001 From: Sebastien SAUVAGE Date: Sun, 9 Feb 2014 15:33:02 +0100 Subject: [PATCH 021/139] Added Bandcamp Merged manually from https://github.com/sebsauvage/rss-bridge/pull/24 --- README.md | 1 + bridges/BandcampBridge.php | 45 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 bridges/BandcampBridge.php diff --git a/README.md b/README.md index e8b5151..a6dd6da 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ Supported sites/pages * `WikipediaENLatest`: highlighted articles from Wikipedia in English. * `WikipediaFRLatest`: highlighted articles from Wikipedia in French. * `WikipediaEOLatest`: highlighted articles from Wikipedia in Esperanto. + * `Bandcamp` : Returns last release from bandcamp for a tag Output format diff --git a/bridges/BandcampBridge.php b/bridges/BandcampBridge.php new file mode 100644 index 0000000..98eaf9f --- /dev/null +++ b/bridges/BandcampBridge.php @@ -0,0 +1,45 @@ +request = $param['tag']; + $html = file_get_html('http://bandcamp.com/tag/'.urlencode($this->request).'?sort_field=date') or $this->returnError('No results for this query.', 404); + } + else { + $this->returnError('You must specify tag (/tag/...)', 400); + } + + foreach($html->find('li.item') as $release) { + $item = new \Item(); + $item->name = $release->find('div.itemsubtext',0)->plaintext . ' - ' . $release->find('div.itemtext',0)->plaintext; + $item->title = $release->find('div.itemsubtext',0)->plaintext . ' - ' . $release->find('div.itemtext',0)->plaintext; + $item->content = '
' . $release->find('div.itemsubtext',0)->plaintext . ' - ' . $release->find('div.itemtext',0)->plaintext; + $item->id = $release->find('a',0)->getAttribute('href'); + $item->uri = $release->find('a',0)->getAttribute('href'); + $this->items[] = $item; + } + } + + public function getName(){ + return (!empty($this->request) ? $this->request .' - ' : '') .'Bandcamp Tag'; + } + + public function getURI(){ + return 'http://bandcamp.com'; + } + + public function getCacheDuration(){ + return 600; // 10 minutes + } +} From 589f1dbbad45c3aa4cc71d4507aa08151e856047 Mon Sep 17 00:00:00 2001 From: Sebastien SAUVAGE Date: Sun, 9 Feb 2014 15:37:16 +0100 Subject: [PATCH 022/139] Small update on README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a6dd6da..bfacaab 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ Requirements * php 5.3 * [PHP Simple HTML DOM Parser](http://simplehtmldom.sourceforge.net). (Put `simple_html_dom.php` in `vendor/simplehtmldom/`). - * Ssl lib activated in PHP config + * TLS lib activated in PHP config for some bridges. Author From 9c8a9d1d1dde160d7311645bea7b2d77f558f0b2 Mon Sep 17 00:00:00 2001 From: Sebastien SAUVAGE Date: Sun, 9 Feb 2014 15:59:16 +0100 Subject: [PATCH 023/139] Updated README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index bfacaab..8c1673e 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,7 @@ Patch/contributors : * [Astalaseven](https://github.com/Astalaseven) * [qwertygc](https://github.com/qwertygc) * [Djuuu](https://github.com/Djuuu) + * [Anadrark](https://github.com/Anadrark]) Licence === From e9902e2391a2dfe644e15ff9be6c6d0e3b329c56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20SAUVAGE?= Date: Wed, 14 May 2014 12:39:12 +0200 Subject: [PATCH 024/139] Updated from Mitsukarenai --- bridges/YoutubeBridge.php | 62 ++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 8d73925..c7d7111 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -4,8 +4,10 @@ * Returns the newest videos * * @name Youtube Bridge -* @description Returns the newest videos +* @description Returns the newest videos by username or playlist * @use1(u="username") +* @use2(p="playlist id") +* @use3(s="search keyword",pa="page") */ class YoutubeBridge extends BridgeAbstract{ @@ -16,20 +18,52 @@ class YoutubeBridge extends BridgeAbstract{ if (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; $html = file_get_html('https://www.youtube.com/user/'.urlencode($this->request).'/videos') or $this->returnError('Could not request Youtube.', 404); + + foreach($html->find('li.channels-content-item') as $element) { + $item = new \Item(); + $item->uri = 'https://www.youtube.com'.$element->find('a',0)->href; + $item->thumbnailUri = 'https:'.$element->find('img',0)->src; + $item->title = trim($element->find('h3',0)->plaintext); + $item->content = '
' . $item->title . ''; + $this->items[] = $item; + } + } + else if (isset($param['p'])) { /* playlist mode */ + $this->request = $param['p']; + $html = file_get_html('https://www.youtube.com/playlist?list='.urlencode($this->request).'') or $this->returnError('Could not request Youtube.', 404); + + foreach($html->find('li.playlist-video-item') as $element) { + $item = new \Item(); + $item->uri = 'https://www.youtube.com'.$element->find('a',0)->href; + $item->thumbnailUri = 'https:'.$element->find('img',0)->src; + $item->title = trim($element->find('h3',0)->plaintext); + $item->content = '
' . $item->title . ''; + $this->items[] = $item; + } + $this->request = 'Playlist '.str_replace(' - YouTube', '', $html->find('title', 0)->plaintext).', by '.$html->find('h1', 0)->plaintext; + } + else if (isset($param['s'])) { /* search mode */ + $this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']); + $html = file_get_html('https://www.youtube.com/results?search_query='.urlencode($this->request).'&page='.$page.'&filters=video&search_sort=video_date_uploaded') or $this->returnError('Could not request Youtube.', 404); + + foreach($html->find('li.context-data-item') as $element) { + $item = new \Item(); + $item->uri = 'https://www.youtube.com'.$element->find('a',0)->href; + $checkthumb = $element->find('img', 0)->getAttribute('data-thumb'); + if($checkthumb !== FALSE) + $item->thumbnailUri = $checkthumb; + else + $item->thumbnailUri = ''.$element->find('img',0)->src; + $item->title = trim($element->find('h3',0)->plaintext); + $item->content = '
' . $item->title . ''; + $this->items[] = $item; + } + $this->request = 'Search: '.str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); } else { - $this->returnError('You must specify a Youtbe username (?u=...).', 400); - } - - - foreach($html->find('li.channels-content-item') as $element) { - $item = new \Item(); - $item->uri = 'https://www.youtube.com'.$element->find('a',0)->href; - $item->thumbnailUri = 'https:'.$element->find('img',0)->src; - $item->title = trim($element->find('h3',0)->plaintext); - $item->content = '
' . $item->title . ''; - $this->items[] = $item; - } + $this->returnError('You must either specify a Youtube username (?u=...) or a playlist id (?p=...) or search (?s=...)', 400); + } + } public function getName(){ @@ -41,6 +75,6 @@ class YoutubeBridge extends BridgeAbstract{ } public function getCacheDuration(){ - return 21600; // 6 hours + return 10800; // 3 hours } } From e81a19a04ba460bb4bac68136ceb9a4cef1f0427 Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Wed, 14 May 2014 14:27:57 +0200 Subject: [PATCH 025/139] [pull] date fallback for ATOM format --- formats/AtomFormat.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/formats/AtomFormat.php b/formats/AtomFormat.php index cdd0a9a..c9a1e0d 100644 --- a/formats/AtomFormat.php +++ b/formats/AtomFormat.php @@ -53,6 +53,10 @@ EOD; - : RFC look with xhtml, keep this in spite of ? */ +// #### TEMPORARY FIX ### +$feedTimestamp = date(DATE_ATOM, time()); +// ################ + /* Data are prepared, now let's begin the "MAGIE !!!" */ $toReturn = ''; $toReturn .= <<{$title} http{$https}://{$httpHost}{$httpInfo}/ - + {$feedTimestamp} {$entries} From 1e40d6f492486658748dc12b7824d750525b780c Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Wed, 14 May 2014 14:34:06 +0200 Subject: [PATCH 026/139] [pull] Fix Youtube, Twitter --- bridges/TwitterBridge.php | 10 +++++----- bridges/YoutubeBridge.php | 18 +++++++++++------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index fcc3967..cf85cb6 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -26,15 +26,15 @@ class TwitterBridge extends BridgeAbstract{ $this->returnError('You must specify a keyword (?q=...) or a Twitter username (?u=...).', 400); } - foreach($html->find('div.tweet') as $tweet) { + foreach($html->find('div.js-stream-tweet') as $tweet) { $item = new \Item(); - $item->username = trim(substr($tweet->find('span.username', 0)->plaintext, 1)); // extract username and sanitize + $item->username = $tweet->getAttribute('data-screen-name'); // extract username and sanitize $item->fullname = $tweet->getAttribute('data-name'); // extract fullname (pseudonym) $item->avatar = $tweet->find('img', 0)->src; // get avatar link $item->id = $tweet->getAttribute('data-tweet-id'); // get TweetID - $item->uri = 'https://twitter.com'.$tweet->find('a.details', 0)->getAttribute('href'); // get tweet link - $item->timestamp = $tweet->find('span._timestamp', 0)->getAttribute('data-time'); // extract tweet timestamp - $item->content = str_replace('href="/', 'href="https://twitter.com/', strip_tags($tweet->find('p.tweet-text', 0)->innertext, '')); // extract tweet text + $item->uri = 'https://twitter.com'.$tweet->find('a.js-permalink', 0)->getAttribute('href'); // get tweet link + $item->timestamp = $tweet->find('span.js-short-timestamp', 0)->getAttribute('data-time'); // extract tweet timestamp + $item->content = str_replace('href="/', 'href="https://twitter.com/', strip_tags($tweet->find('p.js-tweet-text', 0)->innertext, '')); // extract tweet text $item->title = $item->fullname . ' (@'. $item->username . ') | ' . $item->content; $this->items[] = $item; } diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index c7d7111..974d169 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -8,6 +8,10 @@ * @use1(u="username") * @use2(p="playlist id") * @use3(s="search keyword",pa="page") +* +* WARNING: to parse big playlists (over ~90 videos), you need to edit simple_html_dom.php: +* change: define('MAX_FILE_SIZE', 600000); +* into: define('MAX_FILE_SIZE', 900000); (or more) */ class YoutubeBridge extends BridgeAbstract{ @@ -32,21 +36,21 @@ class YoutubeBridge extends BridgeAbstract{ $this->request = $param['p']; $html = file_get_html('https://www.youtube.com/playlist?list='.urlencode($this->request).'') or $this->returnError('Could not request Youtube.', 404); - foreach($html->find('li.playlist-video-item') as $element) { + foreach($html->find('tr.pl-video') as $element) { $item = new \Item(); - $item->uri = 'https://www.youtube.com'.$element->find('a',0)->href; - $item->thumbnailUri = 'https:'.$element->find('img',0)->src; - $item->title = trim($element->find('h3',0)->plaintext); + $item->uri = 'https://www.youtube.com'.$element->find('.pl-video-title a',0)->href; + $item->thumbnailUri = 'https:'.str_replace('/default.','/mqdefault.',$element->find('.pl-video-thumbnail img',0)->src); + $item->title = trim($element->find('.pl-video-title a',0)->plaintext); $item->content = '
' . $item->title . ''; $this->items[] = $item; } - $this->request = 'Playlist '.str_replace(' - YouTube', '', $html->find('title', 0)->plaintext).', by '.$html->find('h1', 0)->plaintext; + $this->request = 'Playlist '.trim(str_replace(' - YouTube', '', $html->find('title', 0)->plaintext)).', by '.$html->find('h1', 0)->plaintext; } else if (isset($param['s'])) { /* search mode */ $this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']); - $html = file_get_html('https://www.youtube.com/results?search_query='.urlencode($this->request).'&page='.$page.'&filters=video&search_sort=video_date_uploaded') or $this->returnError('Could not request Youtube.', 404); + $html = file_get_html('https://www.youtube.com/results?search_query='.urlencode($this->request).'&&page='.$page.'&filters=video&search_sort=video_date_uploaded') or $this->returnError('Could not request Youtube.', 404); - foreach($html->find('li.context-data-item') as $element) { + foreach($html->find('li.yt-lockup') as $element) { $item = new \Item(); $item->uri = 'https://www.youtube.com'.$element->find('a',0)->href; $checkthumb = $element->find('img', 0)->getAttribute('data-thumb'); From 13da0112f234d5d139276278f213a3feb3b4b5f2 Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Wed, 14 May 2014 21:34:07 +0200 Subject: [PATCH 027/139] [pull/beta] UA spoofing --- index.php | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/index.php b/index.php index a95fcbb..e9754b0 100644 --- a/index.php +++ b/index.php @@ -15,6 +15,10 @@ date_default_timezone_set('UTC'); error_reporting(0); //ini_set('display_errors','1'); error_reporting(E_ALL); // For debugging only. +// FIXME : beta test UA spoofing, please report any blacklisting by PHP-fopen-unfriendly websites +ini_set('user_agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20121202 Firefox/30.0 (rss-bridge/0.1; +https://github.com/sebsauvage/rss-bridge)'); +// ------- + try{ require_once __DIR__ . '/lib/RssBridge.php'; @@ -32,8 +36,7 @@ try{ $format = $_REQUEST['format']; unset($_REQUEST['format']); - // FIXME : necessary ? - // ini_set('user_agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:20.0) Gecko/20100101 Firefox/20.0'); + $cache = Cache::create('FileCache'); @@ -135,4 +138,4 @@ $formats = Format::searchInformation(); RSS-Bridge alpha 0.1 - \ No newline at end of file + From e504573fea67972d91ea8b96f91911bfb40ca362 Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Wed, 21 May 2014 17:21:53 +0200 Subject: [PATCH 028/139] updt readme --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8c1673e..ad6194c 100644 --- a/README.md +++ b/README.md @@ -48,8 +48,7 @@ Minecraft hashtag (#Minecraft) search on Twitter, in ATOM format (as displayed b Requirements === - * php 5.3 - * [PHP Simple HTML DOM Parser](http://simplehtmldom.sourceforge.net). (Put `simple_html_dom.php` in `vendor/simplehtmldom/`). + * PHP 5.3 * TLS lib activated in PHP config for some bridges. @@ -72,6 +71,8 @@ Licence === Code is public domain. +Included `PHP Simple HTML DOM Parser` is under the [MIT License](http://opensource.org/licenses/MIT) + Technical notes === From 7bee7773623afcb04bc312072c7ce10b863583eb Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Wed, 21 May 2014 17:25:59 +0200 Subject: [PATCH 029/139] add simple_html_dom --- vendor/simplehtmldom/simple_html_dom.php | 1742 ++++++++++++++++++++++ 1 file changed, 1742 insertions(+) create mode 100644 vendor/simplehtmldom/simple_html_dom.php diff --git a/vendor/simplehtmldom/simple_html_dom.php b/vendor/simplehtmldom/simple_html_dom.php new file mode 100644 index 0000000..b5d3089 --- /dev/null +++ b/vendor/simplehtmldom/simple_html_dom.php @@ -0,0 +1,1742 @@ +size is the "real" number of bytes the dom was created from. + * but for most purposes, it's a really good estimation. + * Paperg - Added the forceTagsClosed to the dom constructor. Forcing tags closed is great for malformed html, but it CAN lead to parsing errors. + * Allow the user to tell us how much they trust the html. + * Paperg add the text and plaintext to the selectors for the find syntax. plaintext implies text in the innertext of a node. text implies that the tag is a text node. + * This allows for us to find tags based on the text they contain. + * Create find_ancestor_tag to see if a tag is - at any level - inside of another specific tag. + * Paperg: added parse_charset so that we know about the character set of the source document. + * NOTE: If the user's system has a routine called get_last_retrieve_url_contents_content_type availalbe, we will assume it's returning the content-type header from the + * last transfer or curl_exec, and we will parse that and use it in preference to any other method of charset detection. + * + * Found infinite loop in the case of broken html in restore_noise. Rewrote to protect from that. + * PaperG (John Schlick) Added get_display_size for "IMG" tags. + * + * Licensed under The MIT License + * Redistributions of files must retain the above copyright notice. + * + * @author S.C. Chen + * @author John Schlick + * @author Rus Carroll + * @version 1.5 ($Rev: 208 $) + * @package PlaceLocalInclude + * @subpackage simple_html_dom + */ + +/** + * All of the Defines for the classes below. + * @author S.C. Chen + */ +define('HDOM_TYPE_ELEMENT', 1); +define('HDOM_TYPE_COMMENT', 2); +define('HDOM_TYPE_TEXT', 3); +define('HDOM_TYPE_ENDTAG', 4); +define('HDOM_TYPE_ROOT', 5); +define('HDOM_TYPE_UNKNOWN', 6); +define('HDOM_QUOTE_DOUBLE', 0); +define('HDOM_QUOTE_SINGLE', 1); +define('HDOM_QUOTE_NO', 3); +define('HDOM_INFO_BEGIN', 0); +define('HDOM_INFO_END', 1); +define('HDOM_INFO_QUOTE', 2); +define('HDOM_INFO_SPACE', 3); +define('HDOM_INFO_TEXT', 4); +define('HDOM_INFO_INNER', 5); +define('HDOM_INFO_OUTER', 6); +define('HDOM_INFO_ENDSPACE',7); +define('DEFAULT_TARGET_CHARSET', 'UTF-8'); +define('DEFAULT_BR_TEXT', "\r\n"); +define('DEFAULT_SPAN_TEXT', " "); +define('MAX_FILE_SIZE', 10000000); +// helper functions +// ----------------------------------------------------------------------------- +// get html dom from file +// $maxlen is defined in the code as PHP_STREAM_COPY_ALL which is defined as -1. +function file_get_html($url, $use_include_path = false, $context=null, $offset = -1, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) +{ + // We DO force the tags to be terminated. + $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText); + // For sourceforge users: uncomment the next line and comment the retreive_url_contents line 2 lines down if it is not already done. + $contents = file_get_contents($url, $use_include_path, $context, $offset); + // Paperg - use our own mechanism for getting the contents as we want to control the timeout. + //$contents = retrieve_url_contents($url); + if (empty($contents) || strlen($contents) > MAX_FILE_SIZE) + { + return false; + } + // The second parameter can force the selectors to all be lowercase. + $dom->load($contents, $lowercase, $stripRN); + return $dom; +} + +// get html dom from string +function str_get_html($str, $lowercase=true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) +{ + $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText); + if (empty($str) || strlen($str) > MAX_FILE_SIZE) + { + $dom->clear(); + return false; + } + $dom->load($str, $lowercase, $stripRN); + return $dom; +} + +// dump html dom tree +function dump_html_tree($node, $show_attr=true, $deep=0) +{ + $node->dump($node); +} + + +/** + * simple html dom node + * PaperG - added ability for "find" routine to lowercase the value of the selector. + * PaperG - added $tag_start to track the start position of the tag in the total byte index + * + * @package PlaceLocalInclude + */ +class simple_html_dom_node +{ + public $nodetype = HDOM_TYPE_TEXT; + public $tag = 'text'; + public $attr = array(); + public $children = array(); + public $nodes = array(); + public $parent = null; + // The "info" array - see HDOM_INFO_... for what each element contains. + public $_ = array(); + public $tag_start = 0; + private $dom = null; + + function __construct($dom) + { + $this->dom = $dom; + $dom->nodes[] = $this; + } + + function __destruct() + { + $this->clear(); + } + + function __toString() + { + return $this->outertext(); + } + + // clean up memory due to php5 circular references memory leak... + function clear() + { + $this->dom = null; + $this->nodes = null; + $this->parent = null; + $this->children = null; + } + + // dump node's tree + function dump($show_attr=true, $deep=0) + { + $lead = str_repeat(' ', $deep); + + echo $lead.$this->tag; + if ($show_attr && count($this->attr)>0) + { + echo '('; + foreach ($this->attr as $k=>$v) + echo "[$k]=>\"".$this->$k.'", '; + echo ')'; + } + echo "\n"; + + if ($this->nodes) + { + foreach ($this->nodes as $c) + { + $c->dump($show_attr, $deep+1); + } + } + } + + + // Debugging function to dump a single dom node with a bunch of information about it. + function dump_node($echo=true) + { + + $string = $this->tag; + if (count($this->attr)>0) + { + $string .= '('; + foreach ($this->attr as $k=>$v) + { + $string .= "[$k]=>\"".$this->$k.'", '; + } + $string .= ')'; + } + if (count($this->_)>0) + { + $string .= ' $_ ('; + foreach ($this->_ as $k=>$v) + { + if (is_array($v)) + { + $string .= "[$k]=>("; + foreach ($v as $k2=>$v2) + { + $string .= "[$k2]=>\"".$v2.'", '; + } + $string .= ")"; + } else { + $string .= "[$k]=>\"".$v.'", '; + } + } + $string .= ")"; + } + + if (isset($this->text)) + { + $string .= " text: (" . $this->text . ")"; + } + + $string .= " HDOM_INNER_INFO: '"; + if (isset($node->_[HDOM_INFO_INNER])) + { + $string .= $node->_[HDOM_INFO_INNER] . "'"; + } + else + { + $string .= ' NULL '; + } + + $string .= " children: " . count($this->children); + $string .= " nodes: " . count($this->nodes); + $string .= " tag_start: " . $this->tag_start; + $string .= "\n"; + + if ($echo) + { + echo $string; + return; + } + else + { + return $string; + } + } + + // returns the parent of node + // If a node is passed in, it will reset the parent of the current node to that one. + function parent($parent=null) + { + // I am SURE that this doesn't work properly. + // It fails to unset the current node from it's current parents nodes or children list first. + if ($parent !== null) + { + $this->parent = $parent; + $this->parent->nodes[] = $this; + $this->parent->children[] = $this; + } + + return $this->parent; + } + + // verify that node has children + function has_child() + { + return !empty($this->children); + } + + // returns children of node + function children($idx=-1) + { + if ($idx===-1) + { + return $this->children; + } + if (isset($this->children[$idx])) + { + return $this->children[$idx]; + } + return null; + } + + // returns the first child of node + function first_child() + { + if (count($this->children)>0) + { + return $this->children[0]; + } + return null; + } + + // returns the last child of node + function last_child() + { + if (($count=count($this->children))>0) + { + return $this->children[$count-1]; + } + return null; + } + + // returns the next sibling of node + function next_sibling() + { + if ($this->parent===null) + { + return null; + } + + $idx = 0; + $count = count($this->parent->children); + while ($idx<$count && $this!==$this->parent->children[$idx]) + { + ++$idx; + } + if (++$idx>=$count) + { + return null; + } + return $this->parent->children[$idx]; + } + + // returns the previous sibling of node + function prev_sibling() + { + if ($this->parent===null) return null; + $idx = 0; + $count = count($this->parent->children); + while ($idx<$count && $this!==$this->parent->children[$idx]) + ++$idx; + if (--$idx<0) return null; + return $this->parent->children[$idx]; + } + + // function to locate a specific ancestor tag in the path to the root. + function find_ancestor_tag($tag) + { + global $debug_object; + if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } + + // Start by including ourselves in the comparison. + $returnDom = $this; + + while (!is_null($returnDom)) + { + if (is_object($debug_object)) { $debug_object->debug_log(2, "Current tag is: " . $returnDom->tag); } + + if ($returnDom->tag == $tag) + { + break; + } + $returnDom = $returnDom->parent; + } + return $returnDom; + } + + // get dom node's inner html + function innertext() + { + if (isset($this->_[HDOM_INFO_INNER])) return $this->_[HDOM_INFO_INNER]; + if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); + + $ret = ''; + foreach ($this->nodes as $n) + $ret .= $n->outertext(); + return $ret; + } + + // get dom node's outer text (with tag) + function outertext() + { + global $debug_object; + if (is_object($debug_object)) + { + $text = ''; + if ($this->tag == 'text') + { + if (!empty($this->text)) + { + $text = " with text: " . $this->text; + } + } + $debug_object->debug_log(1, 'Innertext of tag: ' . $this->tag . $text); + } + + if ($this->tag==='root') return $this->innertext(); + + // trigger callback + if ($this->dom && $this->dom->callback!==null) + { + call_user_func_array($this->dom->callback, array($this)); + } + + if (isset($this->_[HDOM_INFO_OUTER])) return $this->_[HDOM_INFO_OUTER]; + if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); + + // render begin tag + if ($this->dom && $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]) + { + $ret = $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]->makeup(); + } else { + $ret = ""; + } + + // render inner text + if (isset($this->_[HDOM_INFO_INNER])) + { + // If it's a br tag... don't return the HDOM_INNER_INFO that we may or may not have added. + if ($this->tag != "br") + { + $ret .= $this->_[HDOM_INFO_INNER]; + } + } else { + if ($this->nodes) + { + foreach ($this->nodes as $n) + { + $ret .= $this->convert_text($n->outertext()); + } + } + } + + // render end tag + if (isset($this->_[HDOM_INFO_END]) && $this->_[HDOM_INFO_END]!=0) + $ret .= 'tag.'>'; + return $ret; + } + + // get dom node's plain text + function text() + { + if (isset($this->_[HDOM_INFO_INNER])) return $this->_[HDOM_INFO_INNER]; + switch ($this->nodetype) + { + case HDOM_TYPE_TEXT: return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); + case HDOM_TYPE_COMMENT: return ''; + case HDOM_TYPE_UNKNOWN: return ''; + } + if (strcasecmp($this->tag, 'script')===0) return ''; + if (strcasecmp($this->tag, 'style')===0) return ''; + + $ret = ''; + // In rare cases, (always node type 1 or HDOM_TYPE_ELEMENT - observed for some span tags, and some p tags) $this->nodes is set to NULL. + // NOTE: This indicates that there is a problem where it's set to NULL without a clear happening. + // WHY is this happening? + if (!is_null($this->nodes)) + { + foreach ($this->nodes as $n) + { + $ret .= $this->convert_text($n->text()); + } + + // If this node is a span... add a space at the end of it so multiple spans don't run into each other. This is plaintext after all. + if ($this->tag == "span") + { + $ret .= $this->dom->default_span_text; + } + + + } + return $ret; + } + + function xmltext() + { + $ret = $this->innertext(); + $ret = str_ireplace('', '', $ret); + return $ret; + } + + // build node's text with tag + function makeup() + { + // text, comment, unknown + if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); + + $ret = '<'.$this->tag; + $i = -1; + + foreach ($this->attr as $key=>$val) + { + ++$i; + + // skip removed attribute + if ($val===null || $val===false) + continue; + + $ret .= $this->_[HDOM_INFO_SPACE][$i][0]; + //no value attr: nowrap, checked selected... + if ($val===true) + $ret .= $key; + else { + switch ($this->_[HDOM_INFO_QUOTE][$i]) + { + case HDOM_QUOTE_DOUBLE: $quote = '"'; break; + case HDOM_QUOTE_SINGLE: $quote = '\''; break; + default: $quote = ''; + } + $ret .= $key.$this->_[HDOM_INFO_SPACE][$i][1].'='.$this->_[HDOM_INFO_SPACE][$i][2].$quote.$val.$quote; + } + } + $ret = $this->dom->restore_noise($ret); + return $ret . $this->_[HDOM_INFO_ENDSPACE] . '>'; + } + + // find elements by css selector + //PaperG - added ability for find to lowercase the value of the selector. + function find($selector, $idx=null, $lowercase=false) + { + $selectors = $this->parse_selector($selector); + if (($count=count($selectors))===0) return array(); + $found_keys = array(); + + // find each selector + for ($c=0; $c<$count; ++$c) + { + // The change on the below line was documented on the sourceforge code tracker id 2788009 + // used to be: if (($levle=count($selectors[0]))===0) return array(); + if (($levle=count($selectors[$c]))===0) return array(); + if (!isset($this->_[HDOM_INFO_BEGIN])) return array(); + + $head = array($this->_[HDOM_INFO_BEGIN]=>1); + + // handle descendant selectors, no recursive! + for ($l=0; $l<$levle; ++$l) + { + $ret = array(); + foreach ($head as $k=>$v) + { + $n = ($k===-1) ? $this->dom->root : $this->dom->nodes[$k]; + //PaperG - Pass this optional parameter on to the seek function. + $n->seek($selectors[$c][$l], $ret, $lowercase); + } + $head = $ret; + } + + foreach ($head as $k=>$v) + { + if (!isset($found_keys[$k])) + { + $found_keys[$k] = 1; + } + } + } + + // sort keys + ksort($found_keys); + + $found = array(); + foreach ($found_keys as $k=>$v) + $found[] = $this->dom->nodes[$k]; + + // return nth-element or array + if (is_null($idx)) return $found; + else if ($idx<0) $idx = count($found) + $idx; + return (isset($found[$idx])) ? $found[$idx] : null; + } + + // seek for given conditions + // PaperG - added parameter to allow for case insensitive testing of the value of a selector. + protected function seek($selector, &$ret, $lowercase=false) + { + global $debug_object; + if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } + + list($tag, $key, $val, $exp, $no_key) = $selector; + + // xpath index + if ($tag && $key && is_numeric($key)) + { + $count = 0; + foreach ($this->children as $c) + { + if ($tag==='*' || $tag===$c->tag) { + if (++$count==$key) { + $ret[$c->_[HDOM_INFO_BEGIN]] = 1; + return; + } + } + } + return; + } + + $end = (!empty($this->_[HDOM_INFO_END])) ? $this->_[HDOM_INFO_END] : 0; + if ($end==0) { + $parent = $this->parent; + while (!isset($parent->_[HDOM_INFO_END]) && $parent!==null) { + $end -= 1; + $parent = $parent->parent; + } + $end += $parent->_[HDOM_INFO_END]; + } + + for ($i=$this->_[HDOM_INFO_BEGIN]+1; $i<$end; ++$i) { + $node = $this->dom->nodes[$i]; + + $pass = true; + + if ($tag==='*' && !$key) { + if (in_array($node, $this->children, true)) + $ret[$i] = 1; + continue; + } + + // compare tag + if ($tag && $tag!=$node->tag && $tag!=='*') {$pass=false;} + // compare key + if ($pass && $key) { + if ($no_key) { + if (isset($node->attr[$key])) $pass=false; + } else { + if (($key != "plaintext") && !isset($node->attr[$key])) $pass=false; + } + } + // compare value + if ($pass && $key && $val && $val!=='*') { + // If they have told us that this is a "plaintext" search then we want the plaintext of the node - right? + if ($key == "plaintext") { + // $node->plaintext actually returns $node->text(); + $nodeKeyValue = $node->text(); + } else { + // this is a normal search, we want the value of that attribute of the tag. + $nodeKeyValue = $node->attr[$key]; + } + if (is_object($debug_object)) {$debug_object->debug_log(2, "testing node: " . $node->tag . " for attribute: " . $key . $exp . $val . " where nodes value is: " . $nodeKeyValue);} + + //PaperG - If lowercase is set, do a case insensitive test of the value of the selector. + if ($lowercase) { + $check = $this->match($exp, strtolower($val), strtolower($nodeKeyValue)); + } else { + $check = $this->match($exp, $val, $nodeKeyValue); + } + if (is_object($debug_object)) {$debug_object->debug_log(2, "after match: " . ($check ? "true" : "false"));} + + // handle multiple class + if (!$check && strcasecmp($key, 'class')===0) { + foreach (explode(' ',$node->attr[$key]) as $k) { + // Without this, there were cases where leading, trailing, or double spaces lead to our comparing blanks - bad form. + if (!empty($k)) { + if ($lowercase) { + $check = $this->match($exp, strtolower($val), strtolower($k)); + } else { + $check = $this->match($exp, $val, $k); + } + if ($check) break; + } + } + } + if (!$check) $pass = false; + } + if ($pass) $ret[$i] = 1; + unset($node); + } + // It's passed by reference so this is actually what this function returns. + if (is_object($debug_object)) {$debug_object->debug_log(1, "EXIT - ret: ", $ret);} + } + + protected function match($exp, $pattern, $value) { + global $debug_object; + if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} + + switch ($exp) { + case '=': + return ($value===$pattern); + case '!=': + return ($value!==$pattern); + case '^=': + return preg_match("/^".preg_quote($pattern,'/')."/", $value); + case '$=': + return preg_match("/".preg_quote($pattern,'/')."$/", $value); + case '*=': + if ($pattern[0]=='/') { + return preg_match($pattern, $value); + } + return preg_match("/".$pattern."/i", $value); + } + return false; + } + + protected function parse_selector($selector_string) { + global $debug_object; + if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} + + // pattern of CSS selectors, modified from mootools + // Paperg: Add the colon to the attrbute, so that it properly finds like google does. + // Note: if you try to look at this attribute, yo MUST use getAttribute since $dom->x:y will fail the php syntax check. +// Notice the \[ starting the attbute? and the @? following? This implies that an attribute can begin with an @ sign that is not captured. +// This implies that an html attribute specifier may start with an @ sign that is NOT captured by the expression. +// farther study is required to determine of this should be documented or removed. +// $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; + $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; + preg_match_all($pattern, trim($selector_string).' ', $matches, PREG_SET_ORDER); + if (is_object($debug_object)) {$debug_object->debug_log(2, "Matches Array: ", $matches);} + + $selectors = array(); + $result = array(); + //print_r($matches); + + foreach ($matches as $m) { + $m[0] = trim($m[0]); + if ($m[0]==='' || $m[0]==='/' || $m[0]==='//') continue; + // for browser generated xpath + if ($m[1]==='tbody') continue; + + list($tag, $key, $val, $exp, $no_key) = array($m[1], null, null, '=', false); + if (!empty($m[2])) {$key='id'; $val=$m[2];} + if (!empty($m[3])) {$key='class'; $val=$m[3];} + if (!empty($m[4])) {$key=$m[4];} + if (!empty($m[5])) {$exp=$m[5];} + if (!empty($m[6])) {$val=$m[6];} + + // convert to lowercase + if ($this->dom->lowercase) {$tag=strtolower($tag); $key=strtolower($key);} + //elements that do NOT have the specified attribute + if (isset($key[0]) && $key[0]==='!') {$key=substr($key, 1); $no_key=true;} + + $result[] = array($tag, $key, $val, $exp, $no_key); + if (trim($m[7])===',') { + $selectors[] = $result; + $result = array(); + } + } + if (count($result)>0) + $selectors[] = $result; + return $selectors; + } + + function __get($name) + { + if (isset($this->attr[$name])) + { + return $this->convert_text($this->attr[$name]); + } + switch ($name) + { + case 'outertext': return $this->outertext(); + case 'innertext': return $this->innertext(); + case 'plaintext': return $this->text(); + case 'xmltext': return $this->xmltext(); + default: return array_key_exists($name, $this->attr); + } + } + + function __set($name, $value) + { + global $debug_object; + if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} + + switch ($name) + { + case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value; + case 'innertext': + if (isset($this->_[HDOM_INFO_TEXT])) return $this->_[HDOM_INFO_TEXT] = $value; + return $this->_[HDOM_INFO_INNER] = $value; + } + if (!isset($this->attr[$name])) + { + $this->_[HDOM_INFO_SPACE][] = array(' ', '', ''); + $this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE; + } + $this->attr[$name] = $value; + } + + function __isset($name) + { + switch ($name) + { + case 'outertext': return true; + case 'innertext': return true; + case 'plaintext': return true; + } + //no value attr: nowrap, checked selected... + return (array_key_exists($name, $this->attr)) ? true : isset($this->attr[$name]); + } + + function __unset($name) { + if (isset($this->attr[$name])) + unset($this->attr[$name]); + } + + // PaperG - Function to convert the text from one character set to another if the two sets are not the same. + function convert_text($text) + { + global $debug_object; + if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} + + $converted_text = $text; + + $sourceCharset = ""; + $targetCharset = ""; + + if ($this->dom) + { + $sourceCharset = strtoupper($this->dom->_charset); + $targetCharset = strtoupper($this->dom->_target_charset); + } + if (is_object($debug_object)) {$debug_object->debug_log(3, "source charset: " . $sourceCharset . " target charaset: " . $targetCharset);} + + if (!empty($sourceCharset) && !empty($targetCharset) && (strcasecmp($sourceCharset, $targetCharset) != 0)) + { + // Check if the reported encoding could have been incorrect and the text is actually already UTF-8 + if ((strcasecmp($targetCharset, 'UTF-8') == 0) && ($this->is_utf8($text))) + { + $converted_text = $text; + } + else + { + $converted_text = iconv($sourceCharset, $targetCharset, $text); + } + } + + // Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output. + if ($targetCharset == 'UTF-8') + { + if (substr($converted_text, 0, 3) == "\xef\xbb\xbf") + { + $converted_text = substr($converted_text, 3); + } + if (substr($converted_text, -3) == "\xef\xbb\xbf") + { + $converted_text = substr($converted_text, 0, -3); + } + } + + return $converted_text; + } + + /** + * Returns true if $string is valid UTF-8 and false otherwise. + * + * @param mixed $str String to be tested + * @return boolean + */ + static function is_utf8($str) + { + $c=0; $b=0; + $bits=0; + $len=strlen($str); + for($i=0; $i<$len; $i++) + { + $c=ord($str[$i]); + if($c > 128) + { + if(($c >= 254)) return false; + elseif($c >= 252) $bits=6; + elseif($c >= 248) $bits=5; + elseif($c >= 240) $bits=4; + elseif($c >= 224) $bits=3; + elseif($c >= 192) $bits=2; + else return false; + if(($i+$bits) > $len) return false; + while($bits > 1) + { + $i++; + $b=ord($str[$i]); + if($b < 128 || $b > 191) return false; + $bits--; + } + } + } + return true; + } + /* + function is_utf8($string) + { + //this is buggy + return (utf8_encode(utf8_decode($string)) == $string); + } + */ + + /** + * Function to try a few tricks to determine the displayed size of an img on the page. + * NOTE: This will ONLY work on an IMG tag. Returns FALSE on all other tag types. + * + * @author John Schlick + * @version April 19 2012 + * @return array an array containing the 'height' and 'width' of the image on the page or -1 if we can't figure it out. + */ + function get_display_size() + { + global $debug_object; + + $width = -1; + $height = -1; + + if ($this->tag !== 'img') + { + return false; + } + + // See if there is aheight or width attribute in the tag itself. + if (isset($this->attr['width'])) + { + $width = $this->attr['width']; + } + + if (isset($this->attr['height'])) + { + $height = $this->attr['height']; + } + + // Now look for an inline style. + if (isset($this->attr['style'])) + { + // Thanks to user gnarf from stackoverflow for this regular expression. + $attributes = array(); + preg_match_all("/([\w-]+)\s*:\s*([^;]+)\s*;?/", $this->attr['style'], $matches, PREG_SET_ORDER); + foreach ($matches as $match) { + $attributes[$match[1]] = $match[2]; + } + + // If there is a width in the style attributes: + if (isset($attributes['width']) && $width == -1) + { + // check that the last two characters are px (pixels) + if (strtolower(substr($attributes['width'], -2)) == 'px') + { + $proposed_width = substr($attributes['width'], 0, -2); + // Now make sure that it's an integer and not something stupid. + if (filter_var($proposed_width, FILTER_VALIDATE_INT)) + { + $width = $proposed_width; + } + } + } + + // If there is a width in the style attributes: + if (isset($attributes['height']) && $height == -1) + { + // check that the last two characters are px (pixels) + if (strtolower(substr($attributes['height'], -2)) == 'px') + { + $proposed_height = substr($attributes['height'], 0, -2); + // Now make sure that it's an integer and not something stupid. + if (filter_var($proposed_height, FILTER_VALIDATE_INT)) + { + $height = $proposed_height; + } + } + } + + } + + // Future enhancement: + // Look in the tag to see if there is a class or id specified that has a height or width attribute to it. + + // Far future enhancement + // Look at all the parent tags of this image to see if they specify a class or id that has an img selector that specifies a height or width + // Note that in this case, the class or id will have the img subselector for it to apply to the image. + + // ridiculously far future development + // If the class or id is specified in a SEPARATE css file thats not on the page, go get it and do what we were just doing for the ones on the page. + + $result = array('height' => $height, + 'width' => $width); + return $result; + } + + // camel naming conventions + function getAllAttributes() {return $this->attr;} + function getAttribute($name) {return $this->__get($name);} + function setAttribute($name, $value) {$this->__set($name, $value);} + function hasAttribute($name) {return $this->__isset($name);} + function removeAttribute($name) {$this->__set($name, null);} + function getElementById($id) {return $this->find("#$id", 0);} + function getElementsById($id, $idx=null) {return $this->find("#$id", $idx);} + function getElementByTagName($name) {return $this->find($name, 0);} + function getElementsByTagName($name, $idx=null) {return $this->find($name, $idx);} + function parentNode() {return $this->parent();} + function childNodes($idx=-1) {return $this->children($idx);} + function firstChild() {return $this->first_child();} + function lastChild() {return $this->last_child();} + function nextSibling() {return $this->next_sibling();} + function previousSibling() {return $this->prev_sibling();} + function hasChildNodes() {return $this->has_child();} + function nodeName() {return $this->tag;} + function appendChild($node) {$node->parent($this); return $node;} + +} + +/** + * simple html dom parser + * Paperg - in the find routine: allow us to specify that we want case insensitive testing of the value of the selector. + * Paperg - change $size from protected to public so we can easily access it + * Paperg - added ForceTagsClosed in the constructor which tells us whether we trust the html or not. Default is to NOT trust it. + * + * @package PlaceLocalInclude + */ +class simple_html_dom +{ + public $root = null; + public $nodes = array(); + public $callback = null; + public $lowercase = false; + // Used to keep track of how large the text was when we started. + public $original_size; + public $size; + protected $pos; + protected $doc; + protected $char; + protected $cursor; + protected $parent; + protected $noise = array(); + protected $token_blank = " \t\r\n"; + protected $token_equal = ' =/>'; + protected $token_slash = " />\r\n\t"; + protected $token_attr = ' >'; + // Note that this is referenced by a child node, and so it needs to be public for that node to see this information. + public $_charset = ''; + public $_target_charset = ''; + protected $default_br_text = ""; + public $default_span_text = ""; + + // use isset instead of in_array, performance boost about 30%... + protected $self_closing_tags = array('img'=>1, 'br'=>1, 'input'=>1, 'meta'=>1, 'link'=>1, 'hr'=>1, 'base'=>1, 'embed'=>1, 'spacer'=>1); + protected $block_tags = array('root'=>1, 'body'=>1, 'form'=>1, 'div'=>1, 'span'=>1, 'table'=>1); + // Known sourceforge issue #2977341 + // B tags that are not closed cause us to return everything to the end of the document. + protected $optional_closing_tags = array( + 'tr'=>array('tr'=>1, 'td'=>1, 'th'=>1), + 'th'=>array('th'=>1), + 'td'=>array('td'=>1), + 'li'=>array('li'=>1), + 'dt'=>array('dt'=>1, 'dd'=>1), + 'dd'=>array('dd'=>1, 'dt'=>1), + 'dl'=>array('dd'=>1, 'dt'=>1), + 'p'=>array('p'=>1), + 'nobr'=>array('nobr'=>1), + 'b'=>array('b'=>1), + 'option'=>array('option'=>1), + ); + + function __construct($str=null, $lowercase=true, $forceTagsClosed=true, $target_charset=DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) + { + if ($str) + { + if (preg_match("/^http:\/\//i",$str) || is_file($str)) + { + $this->load_file($str); + } + else + { + $this->load($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText); + } + } + // Forcing tags to be closed implies that we don't trust the html, but it can lead to parsing errors if we SHOULD trust the html. + if (!$forceTagsClosed) { + $this->optional_closing_array=array(); + } + $this->_target_charset = $target_charset; + } + + function __destruct() + { + $this->clear(); + } + + // load html from string + function load($str, $lowercase=true, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) + { + global $debug_object; + + // prepare + $this->prepare($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText); + // strip out cdata + $this->remove_noise("''is", true); + // strip out comments + $this->remove_noise("''is"); + // Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037 + // Script tags removal now preceeds style tag removal. + // strip out @si', '', $text); + return $text; + } + $html = file_get_html('http://www.futura-sciences.com/rss/actualites.xml') or $this->returnError('Could not request Futura Sciences.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 20) { + $item = new \Item(); + $item->title = FS_StripCDATA($element->find('title', 0)->innertext); + $item->uri = FS_StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = FS_ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'Futura Sciences'; + } + + public function getURI(){ + return 'http://www.futura-sciences.com/'; + } + + public function getCacheDuration(){ + return 3600; // 1 hour + // return 0; // 1 hour + } +} From ee19e5c6b53015354682b9178ab6a326164de81c Mon Sep 17 00:00:00 2001 From: Qwerty Date: Fri, 30 May 2014 12:11:02 +0200 Subject: [PATCH 054/139] Create OpenTheoryBridge.php --- bridges/OpenTheoryBridge.php | 58 ++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 bridges/OpenTheoryBridge.php diff --git a/bridges/OpenTheoryBridge.php b/bridges/OpenTheoryBridge.php new file mode 100644 index 0000000..adf4718 --- /dev/null +++ b/bridges/OpenTheoryBridge.php @@ -0,0 +1,58 @@ +', '', $string); + return $string; + } + function ExtractContent($url) { + $html2 = file_get_html($url); + $text = $html2->find('div.entry-content', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + return $text; + } + $html = file_get_html('http://open1theory.com/feed') or $this->returnError('Could not request OpenTheory.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'OpenTheory'; + } + + public function getURI(){ + return 'http://open1theory.com/feed'; + } + + public function getCacheDuration(){ + return 3600; // 1 hour + // return 0; // 1 hour + } +} From e816b2aa805caef02bf8d2ab63ca5cca2ba671bd Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Fri, 30 May 2014 14:07:34 +0200 Subject: [PATCH 055/139] Fix post limit for FS, GuruMed, OpenTheory --- bridges/FSBridge.php | 4 ++-- bridges/GuruMedBridge.php | 6 +++--- bridges/OpenTheoryBridge.php | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/bridges/FSBridge.php b/bridges/FSBridge.php index 54876ba..1011b16 100644 --- a/bridges/FSBridge.php +++ b/bridges/FSBridge.php @@ -4,7 +4,7 @@ * Returns the 5 newest posts from http://www.futura-sciences.com (full text) * * @name Futurasciences -* @description Returns the 20 newest posts from FS (full text) +* @description Returns the 5 newest posts from FS (full text) *@maintainer qwertygc */ class FSBridge extends BridgeAbstract{ @@ -30,7 +30,7 @@ class FSBridge extends BridgeAbstract{ $limit = 0; foreach($html->find('item') as $element) { - if($limit < 20) { + if($limit < 5) { $item = new \Item(); $item->title = FS_StripCDATA($element->find('title', 0)->innertext); $item->uri = FS_StripCDATA($element->find('guid', 0)->plaintext); diff --git a/bridges/GuruMedBridge.php b/bridges/GuruMedBridge.php index 4fbd01c..1f44ec7 100644 --- a/bridges/GuruMedBridge.php +++ b/bridges/GuruMedBridge.php @@ -1,10 +1,10 @@ find('item') as $element) { - if($limit < 10) { + if($limit < 5) { $item = new \Item(); $item->title = GurumedStripCDATA($element->find('title', 0)->innertext); $item->uri = GurumedStripCDATA($element->find('guid', 0)->plaintext); diff --git a/bridges/OpenTheoryBridge.php b/bridges/OpenTheoryBridge.php index adf4718..f3f7cdb 100644 --- a/bridges/OpenTheoryBridge.php +++ b/bridges/OpenTheoryBridge.php @@ -1,10 +1,10 @@ find('item') as $element) { - if($limit < 10) { + if($limit < 5) { $item = new \Item(); $item->title = StripCDATA($element->find('title', 0)->innertext); $item->uri = StripCDATA($element->find('guid', 0)->plaintext); From 237886feb6092e6bd4c6b5adebc84b690a37340e Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Fri, 30 May 2014 17:53:48 +0200 Subject: [PATCH 056/139] Add CoinDesk, update Maliki --- bridges/CoinDeskBridge.php | 56 ++++++++++++++++++++++++++++++++++++++ bridges/MalikiBridge.php | 32 +++++++++++++++++----- 2 files changed, 81 insertions(+), 7 deletions(-) create mode 100644 bridges/CoinDeskBridge.php diff --git a/bridges/CoinDeskBridge.php b/bridges/CoinDeskBridge.php new file mode 100644 index 0000000..6dc5a09 --- /dev/null +++ b/bridges/CoinDeskBridge.php @@ -0,0 +1,56 @@ +', '', $string); + return $string; + } + function CoinDeskExtractContent($url) { + $html2 = file_get_html($url); + $text = $html2->find('div.single-content', 0)->innertext; + $text = strip_tags($text, '

'); + return $text; + } + $html = file_get_html('http://www.coindesk.com/feed/atom/') or $this->returnError('Could not request CoinDesk.', 404); + $limit = 0; + + foreach($html->find('entry') as $element) { + if($limit < 5) { + $item = new \Item(); + $item->title = CoinDeskStripCDATA($element->find('title', 0)->innertext); + $item->author = $element->find('author', 0)->plaintext; + $item->uri = $element->find('link', 0)->href; + $item->timestamp = strtotime($element->find('published', 0)->plaintext); + $item->content = CoinDeskExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'CoinDesk'; + } + + public function getURI(){ + return 'http://www.coindesk.com/'; + } + + public function getCacheDuration(){ + return 1800; // 30min + } +} diff --git a/bridges/MalikiBridge.php b/bridges/MalikiBridge.php index 55b09ab..29e4cfa 100644 --- a/bridges/MalikiBridge.php +++ b/bridges/MalikiBridge.php @@ -1,27 +1,45 @@ returnError('Could not request Maliki.', 404); $count=0; + $latest=1; $latest_title=""; + $latest = $html->find('div.conteneur_page a', 1)->href; + $latest_title = $html->find('div.conteneur_page img', 0)->title; + + function MalikiExtractContent($url) { + $html2 = file_get_html($url); + $text = 'http://www.maliki.com/'.$html2->find('img', 0)->src; + $text = '
'.$html2->find('div.imageetnews', 0)->plaintext; + return $text; + } + + $item = new \Item(); + $item->uri = 'http://www.maliki.com/'.$latest; + $item->title = $latest_title; + $item->timestamp = time(); + $item->content = MalikiExtractContent($item->uri); + $this->items[] = $item; + + foreach($html->find('div.boite_strip') as $element) { - if(!empty($element->find('a',0)->href) and $count < 20) { + if(!empty($element->find('a',0)->href) and $count < 3) { $item = new \Item(); $item->uri = 'http://www.maliki.com/'.$element->find('a',0)->href; - $item->thumbnailUri = 'http://www.maliki.com/'.$element->find('img',0)->src; $item->title = $element->find('img',0)->title; $item->timestamp = strtotime(str_replace('/', '-', $element->find('span.stylepetit', 0)->innertext)); - $item->content = '
'; + $item->content = MalikiExtractContent($item->uri); $this->items[] = $item; $count++; } @@ -37,6 +55,6 @@ class MalikiBridge extends BridgeAbstract{ } public function getCacheDuration(){ - return 86400; // 24 hours + return 86400*6; // 6 days } } From 6b2742b06753e012159a6a7ecdb53927dfb364e8 Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Sat, 31 May 2014 11:46:54 +0200 Subject: [PATCH 057/139] fix PlanetLibre --- bridges/PlanetLibreBridge.php | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/bridges/PlanetLibreBridge.php b/bridges/PlanetLibreBridge.php index 073ec28..5ada535 100644 --- a/bridges/PlanetLibreBridge.php +++ b/bridges/PlanetLibreBridge.php @@ -4,31 +4,27 @@ * Returns the 5 newest posts from PlanetLibre (full text) * * @name PlanetLibre -* @homepage http://www.www.planet-libre.org +* @homepage http://www.planet-libre.org * @description Returns the 5 newest posts from PlanetLibre (full text) * @maintainer pit-fgfjiudghdf * @update 2014-05-26 */ class PlanetLibreBridge extends BridgeAbstract{ public function collectData(array $param){ - function PlanetLibreStripCDATA($string) { - $string = str_replace('', '', $string); - return $string; - } + function PlanetLibreExtractContent($url) { $html2 = file_get_html($url); - $text = $html2->find('div[class=post-text]', 0)->innertext; + $text = $html2->find('div[class="post-text"]', 0)->innertext; return $text; } - $html = file_get_html('http://www.planet-libre.org/rss10.php') or $this->returnError('Could not request PlanetLibre.', 404); + $html = file_get_html('http://www.planet-libre.org/') or $this->returnError('Could not request PlanetLibre.', 404); $limit = 0; - foreach($html->find('item') as $element) { + foreach($html->find('div.post') as $element) { if($limit < 5) { $item = new \Item(); - $item->title = PlanetLibreStripCDATA($element->find('title', 0)->innertext); - $item->uri = PlanetLibreStripCDATA($element->find('guid', 0)->plaintext); - $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->title = $element->find('h1', 0)->plaintext; + $item->uri = $element->find('a', 0)->href; + $item->timestamp = strtotime(str_replace('/', '-', $element->find('div[class="post-date"]', 0)->plaintext)); $item->content = PlanetLibreExtractContent($item->uri); $this->items[] = $item; $limit++; From cd6e00b3263f61f76bafd02c8b45ff1c77173ee0 Mon Sep 17 00:00:00 2001 From: Qwerty Date: Sat, 31 May 2014 18:58:56 +0200 Subject: [PATCH 058/139] Create DauphineLibereBridge.php --- bridges/DauphineLibereBridge.php | 55 ++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 bridges/DauphineLibereBridge.php diff --git a/bridges/DauphineLibereBridge.php b/bridges/DauphineLibereBridge.php new file mode 100644 index 0000000..ad96130 --- /dev/null +++ b/bridges/DauphineLibereBridge.php @@ -0,0 +1,55 @@ +find('div.column', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + return $text; + } + if (isset($param['u'])) { /* user timeline mode */ + $this->request = $param['u']; + $html = file_get_html('http://www.ledauphine.com/'.$this->request.'/rss') or $this->returnError('Could not request DauphineLibere.', 404); + } + else { + $html = file_get_html('http://www.ledauphine.com/rss') or $this->returnError('Could not request DauphineLibere.', 404); + } + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = $element->find('title', 0)->innertext; + $item->uri = $element->find('guid', 0)->plaintext; + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'Dauphine Bridge'; + } + + public function getURI(){ + return 'http://ledauphine.com/'; + } + + public function getCacheDuration(){ + return 3600*2; // 2 hours + // return 0; // 2 hours + } +} From 4cd5b0aec322cb7927defffe82c486d3e9581cb9 Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Sat, 7 Jun 2014 20:32:17 +0200 Subject: [PATCH 059/139] fix readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a0e95f7..1526fcb 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ Requirements === * PHP 5.3 - * TLS lib activated in PHP config for some bridges. + * `openssl` extension enabled in PHP config (`php.ini`) Enabling/Disabling bridges === @@ -75,7 +75,7 @@ Patch/contributors : * [Djuuu](https://github.com/Djuuu) * [Anadrark](https://github.com/Anadrark]) -Licence +License === Code is [Public Domain](UNLICENSE). From 2f4b325a7d8abcd20ce7cd42bd1bc3d0e0ef3eaa Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Fri, 20 Jun 2014 16:41:51 +0200 Subject: [PATCH 060/139] [beta] extract timestamp from Youtube API --- bridges/YoutubeBridge.php | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 061eda8..76ffc59 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -21,6 +21,15 @@ class YoutubeBridge extends BridgeAbstract{ private $request; public function collectData(array $param){ + + function getPublishDate($id) { + # relies on Youtube API; deprecated + $json = json_decode(file_get_contents("https://gdata.youtube.com/feeds/api/videos/$id?v=2&alt=json"), TRUE) or return time(); + $timestamp = strtotime($json['entry']['published']['$t']); + return $timestamp; + } + + $html = ''; if (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; @@ -31,6 +40,8 @@ class YoutubeBridge extends BridgeAbstract{ $item->uri = 'https://www.youtube.com'.$element->find('a',0)->href; $item->thumbnailUri = 'https:'.$element->find('img',0)->src; $item->title = trim($element->find('h3',0)->plaintext); + $item->id = str_replace('/watch?v=', '', $element->find('a',0)->href); + $item->timestamp = getPublishDate($item->id); $item->content = '
' . $item->title . ''; $this->items[] = $item; } @@ -44,6 +55,8 @@ class YoutubeBridge extends BridgeAbstract{ $item->uri = 'https://www.youtube.com'.$element->find('.pl-video-title a',0)->href; $item->thumbnailUri = 'https:'.str_replace('/default.','/mqdefault.',$element->find('.pl-video-thumbnail img',0)->src); $item->title = trim($element->find('.pl-video-title a',0)->plaintext); + $item->id = str_replace('/watch?v=', '', $element->find('a',0)->href); + $item->timestamp = getPublishDate($item->id); $item->content = '
' . $item->title . ''; $this->items[] = $item; } @@ -62,6 +75,8 @@ class YoutubeBridge extends BridgeAbstract{ else $item->thumbnailUri = ''.$element->find('img',0)->src; $item->title = trim($element->find('h3',0)->plaintext); + $item->id = str_replace('/watch?v=', '', $element->find('a',0)->href); + $item->timestamp = getPublishDate($item->id); $item->content = '
' . $item->title . ''; $this->items[] = $item; } From dcf554d8d86ecd842a57e1d1d7c6d7bc692eeb72 Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Fri, 20 Jun 2014 17:00:36 +0200 Subject: [PATCH 061/139] Youtube: fix, indent, limits --- bridges/YoutubeBridge.php | 154 ++++++++++++++++++++------------------ 1 file changed, 81 insertions(+), 73 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 76ffc59..69803d5 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -2,12 +2,12 @@ /** * RssBridgeYoutube * Returns the newest videos -* 2014-05-25 * * @name Youtube Bridge * @homepage https://www.youtube.com/ -* @description Returns the newest videos by username or playlist +* @description Returns the 10 newest videos by username/playlist or search * @maintainer mitsukarenai +* @update 2014-06-20 * @use1(u="username") * @use2(p="playlist id") * @use3(s="search keyword",pa="page") @@ -18,85 +18,93 @@ */ class YoutubeBridge extends BridgeAbstract{ - private $request; + private $request; - public function collectData(array $param){ + public function collectData(array $param){ - function getPublishDate($id) { - # relies on Youtube API; deprecated - $json = json_decode(file_get_contents("https://gdata.youtube.com/feeds/api/videos/$id?v=2&alt=json"), TRUE) or return time(); - $timestamp = strtotime($json['entry']['published']['$t']); - return $timestamp; - } + function getPublishDate($id) { + // relies on Youtube API; deprecated + $json = json_decode(file_get_contents("https://gdata.youtube.com/feeds/api/videos/$id?v=2&alt=json"), TRUE) or $this->returnError('Youtube API is down', 404); + $timestamp = strtotime($json['entry']['published']['$t']); + return $timestamp; + } - $html = ''; - if (isset($param['u'])) { /* user timeline mode */ - $this->request = $param['u']; - $html = file_get_html('https://www.youtube.com/user/'.urlencode($this->request).'/videos') or $this->returnError('Could not request Youtube.', 404); + $html = ''; + $limit = 10; + $count = 0; - foreach($html->find('li.channels-content-item') as $element) { - $item = new \Item(); - $item->uri = 'https://www.youtube.com'.$element->find('a',0)->href; - $item->thumbnailUri = 'https:'.$element->find('img',0)->src; - $item->title = trim($element->find('h3',0)->plaintext); - $item->id = str_replace('/watch?v=', '', $element->find('a',0)->href); - $item->timestamp = getPublishDate($item->id); - $item->content = '
' . $item->title . ''; - $this->items[] = $item; - } - } - else if (isset($param['p'])) { /* playlist mode */ - $this->request = $param['p']; - $html = file_get_html('https://www.youtube.com/playlist?list='.urlencode($this->request).'') or $this->returnError('Could not request Youtube.', 404); + if (isset($param['u'])) { /* user timeline mode */ + $this->request = $param['u']; + $html = file_get_html('https://www.youtube.com/user/'.urlencode($this->request).'/videos') or $this->returnError('Could not request Youtube.', 404); - foreach($html->find('tr.pl-video') as $element) { - $item = new \Item(); - $item->uri = 'https://www.youtube.com'.$element->find('.pl-video-title a',0)->href; - $item->thumbnailUri = 'https:'.str_replace('/default.','/mqdefault.',$element->find('.pl-video-thumbnail img',0)->src); - $item->title = trim($element->find('.pl-video-title a',0)->plaintext); - $item->id = str_replace('/watch?v=', '', $element->find('a',0)->href); - $item->timestamp = getPublishDate($item->id); - $item->content = '
' . $item->title . ''; - $this->items[] = $item; - } - $this->request = 'Playlist '.trim(str_replace(' - YouTube', '', $html->find('title', 0)->plaintext)).', by '.$html->find('h1', 0)->plaintext; - } - else if (isset($param['s'])) { /* search mode */ - $this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']); - $html = file_get_html('https://www.youtube.com/results?search_query='.urlencode($this->request).'&&page='.$page.'&filters=video&search_sort=video_date_uploaded') or $this->returnError('Could not request Youtube.', 404); + foreach($html->find('li.channels-content-item') as $element) { + if($count < $limit) { + $item = new \Item(); + $item->uri = 'https://www.youtube.com'.$element->find('a',0)->href; + $item->thumbnailUri = 'https:'.$element->find('img',0)->src; + $item->title = trim($element->find('h3',0)->plaintext); + $item->id = str_replace('/watch?v=', '', $element->find('a',0)->href); + $item->timestamp = getPublishDate($item->id); + $item->content = '
' . $item->title . ''; + $this->items[] = $item; + $count++; + } + } + } - foreach($html->find('li.yt-lockup') as $element) { - $item = new \Item(); - $item->uri = 'https://www.youtube.com'.$element->find('a',0)->href; - $checkthumb = $element->find('img', 0)->getAttribute('data-thumb'); - if($checkthumb !== FALSE) - $item->thumbnailUri = $checkthumb; - else - $item->thumbnailUri = ''.$element->find('img',0)->src; - $item->title = trim($element->find('h3',0)->plaintext); - $item->id = str_replace('/watch?v=', '', $element->find('a',0)->href); - $item->timestamp = getPublishDate($item->id); - $item->content = '
' . $item->title . ''; - $this->items[] = $item; - } - $this->request = 'Search: '.str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); - } - else { - $this->returnError('You must either specify a Youtube username (?u=...) or a playlist id (?p=...) or search (?s=...)', 400); + else if (isset($param['p'])) { /* playlist mode */ + $this->request = $param['p']; + $html = file_get_html('https://www.youtube.com/playlist?list='.urlencode($this->request).'') or $this->returnError('Could not request Youtube.', 404); + + foreach($html->find('tr.pl-video') as $element) { + if($count < $limit) { + $item = new \Item(); + $item->uri = 'https://www.youtube.com'.$element->find('.pl-video-title a',0)->href; + $item->thumbnailUri = 'https:'.str_replace('/default.','/mqdefault.',$element->find('.pl-video-thumbnail img',0)->src); + $item->title = trim($element->find('.pl-video-title a',0)->plaintext); + $item->id = str_replace('/watch?v=', '', $element->find('a',0)->href); + $item->timestamp = getPublishDate($item->id); + $item->content = '
' . $item->title . ''; + $this->items[] = $item; + $count++; + } + $this->request = 'Playlist '.trim(str_replace(' - YouTube', '', $html->find('title', 0)->plaintext)).', by '.$html->find('h1', 0)->plaintext; + } + + else if (isset($param['s'])) { /* search mode */ + $this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']); + $html = file_get_html('https://www.youtube.com/results?search_query='.urlencode($this->request).'&&page='.$page.'&filters=video&search_sort=video_date_uploaded') or $this->returnError('Could not request Youtube.', 404); + + foreach($html->find('li.yt-lockup') as $element) { + $item = new \Item(); + $item->uri = 'https://www.youtube.com'.$element->find('a',0)->href; + $checkthumb = $element->find('img', 0)->getAttribute('data-thumb'); + if($checkthumb !== FALSE) + $item->thumbnailUri = $checkthumb; + else + $item->thumbnailUri = ''.$element->find('img',0)->src; + $item->title = trim($element->find('h3',0)->plaintext); + $item->id = str_replace('/watch?v=', '', $element->find('a',0)->href); + //$item->timestamp = getPublishDate($item->id); /* better not use it here */ + $item->content = '
' . $item->title . ''; + $this->items[] = $item; + } + $this->request = 'Search: '.str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); + } + else + $this->returnError('You must either specify a Youtube username (?u=...) or a playlist id (?p=...) or search (?s=...)', 400); + } + + public function getName(){ + return (!empty($this->request) ? $this->request .' - ' : '') .'Youtube Bridge'; } - - } - public function getName(){ - return (!empty($this->request) ? $this->request .' - ' : '') .'Youtube Bridge'; - } + public function getURI(){ + return 'https://www.youtube.com/'; + } - public function getURI(){ - return 'https://www.youtube.com/'; - } - - public function getCacheDuration(){ - return 10800; // 3 hours - } + public function getCacheDuration(){ + return 10800; // 3 hours + } } From 29aa605c44d36096b3d563980c2773fae588b700 Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Fri, 20 Jun 2014 17:04:27 +0200 Subject: [PATCH 062/139] =?UTF-8?q?missing=20bracket=20=F0=9F=98=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bridges/YoutubeBridge.php | 1 + 1 file changed, 1 insertion(+) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 69803d5..685a4c3 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -71,6 +71,7 @@ class YoutubeBridge extends BridgeAbstract{ } $this->request = 'Playlist '.trim(str_replace(' - YouTube', '', $html->find('title', 0)->plaintext)).', by '.$html->find('h1', 0)->plaintext; } + } else if (isset($param['s'])) { /* search mode */ $this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']); From 59c931d16f16dfaf6345c602e5a93ec5a56f880f Mon Sep 17 00:00:00 2001 From: Qwerty Date: Mon, 30 Jun 2014 19:08:59 +0200 Subject: [PATCH 063/139] Create LeMotDuJourBridge.php --- bridges/LeMotDuJourBridge.php | 52 +++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 bridges/LeMotDuJourBridge.php diff --git a/bridges/LeMotDuJourBridge.php b/bridges/LeMotDuJourBridge.php new file mode 100644 index 0000000..9149449 --- /dev/null +++ b/bridges/LeMotDuJourBridge.php @@ -0,0 +1,52 @@ +', '', $string); + return $string; + } + function ExtractContent($url) { + $html2 = file_get_html($url); + $text = $html2->find('div.single-contenu', 0)->innertext; + return $text; + } + $html = file_get_html('http://feeds2.feedburner.com/lemotdujour/lemotdujour') or $this->returnError('Could not request LeMotDuJour.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'LeMotDuJour Bridge'; + } + + public function getURI(){ + return 'http://lemotdujour.com/'; + } + + public function getCacheDuration(){ + return 3600*2; // 2 hours + // return 0; // 2 hours + } +} From f9168cac715e47ee1e87e19ff6c46611801173f9 Mon Sep 17 00:00:00 2001 From: Qwerty Date: Sat, 5 Jul 2014 13:41:58 +0200 Subject: [PATCH 064/139] Create SegfaultMintBridge.php --- bridges/SegfaultMintBridge.php | 58 ++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 bridges/SegfaultMintBridge.php diff --git a/bridges/SegfaultMintBridge.php b/bridges/SegfaultMintBridge.php new file mode 100644 index 0000000..48d9aaf --- /dev/null +++ b/bridges/SegfaultMintBridge.php @@ -0,0 +1,58 @@ +', '', $string); + return $string; + } + function ExtractContent($url) { + $html2 = file_get_html($url); + $text = $html2->find('div.post-bodycopy', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + return $text; + } + $html = file_get_html('http://segfault.linuxmint.com/feed/') or $this->returnError('Could not request segfault.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'Segfault Mint'; + } + + public function getURI(){ + return 'http://segfault.linuxmint.com/feed/'; + } + + public function getCacheDuration(){ + // return 3600; // 1 hour + return 0; // 1 hour + } +} From 365b440697c0917bfa125ccd2bf5d354e8bad5e6 Mon Sep 17 00:00:00 2001 From: Qwerty Date: Mon, 7 Jul 2014 11:07:17 +0200 Subject: [PATCH 065/139] Create MemoLinuxBridge.php --- bridges/MemoLinuxBridge.php | 56 +++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 bridges/MemoLinuxBridge.php diff --git a/bridges/MemoLinuxBridge.php b/bridges/MemoLinuxBridge.php new file mode 100644 index 0000000..eec0a8a --- /dev/null +++ b/bridges/MemoLinuxBridge.php @@ -0,0 +1,56 @@ +', '', $string); + return $string; + } + function ExtractContent($url) { + $html2 = file_get_html($url); + $text = $html2->find('div.post-content', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + $text = preg_replace('@]*?>.*?@si', '', $text); + return $text; + } + $html = file_get_html('http://memo-linux.com/feed/') or $this->returnError('Could not request MemoLinux.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'MemoLinux'; + } + + public function getURI(){ + return 'http://memo-linux.com/feed/'; + } + + public function getCacheDuration(){ + // return 3600; // 1 hour + return 0; // 1 hour + } +} From 75773f62f80be5dacafcedee7d4fac123ce7818d Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Tue, 8 Jul 2014 16:17:50 +0200 Subject: [PATCH 066/139] fix Numerama: afterscript text --- bridges/NumeramaBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/NumeramaBridge.php b/bridges/NumeramaBridge.php index 3f3f0fd..8007075 100644 --- a/bridges/NumeramaBridge.php +++ b/bridges/NumeramaBridge.php @@ -22,7 +22,7 @@ class NumeramaBridge extends BridgeAbstract{ $html2 = file_get_html($url); $text = $html2->find('h2.intro', 0)->innertext; $text = $text.$html2->find('div.content', 0)->innertext; - $text = preg_replace('/@si', '', $text); $text = preg_replace('@]*?>.*?@si', '', $text); + $text = preg_replace("/returnError('Could not request MemoLinux.', 404); @@ -50,7 +49,6 @@ class MemoLinuxBridge extends BridgeAbstract{ } public function getCacheDuration(){ - // return 3600; // 1 hour - return 0; // 1 hour + return 3600*12; // 12 hours } } diff --git a/bridges/SegfaultMintBridge.php b/bridges/SegfaultMintBridge.php index 48d9aaf..bb89c45 100644 --- a/bridges/SegfaultMintBridge.php +++ b/bridges/SegfaultMintBridge.php @@ -1,11 +1,10 @@ find('item') as $element) { - if($limit < 10) { + if($limit < 5) { $item = new \Item(); $item->title = StripCDATA($element->find('title', 0)->innertext); $item->uri = StripCDATA($element->find('guid', 0)->plaintext); @@ -52,7 +51,6 @@ class SegfaultMintBridge extends BridgeAbstract{ } public function getCacheDuration(){ - // return 3600; // 1 hour - return 0; // 1 hour + return 3600*24; // 24 hours } } From d4de199501363f397b56cd48f76a74a9e7a74819 Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Tue, 8 Jul 2014 17:06:49 +0200 Subject: [PATCH 068/139] fix issue #64 --- bridges/TuxboardBridge.php | 95 +++++++++++++------------------------- 1 file changed, 31 insertions(+), 64 deletions(-) diff --git a/bridges/TuxboardBridge.php b/bridges/TuxboardBridge.php index 4696d4f..f038fbd 100644 --- a/bridges/TuxboardBridge.php +++ b/bridges/TuxboardBridge.php @@ -1,79 +1,46 @@ returnError('Could not request Tuxboard.', 404); - foreach($html->find('div.posts') as $element) { - $a = $element->find("h2 a", 0); - $category = $element->find("div#category", 0); - $catTxt = $category->innertext; - $posFinDate = strpos(" -", $catTxt); - $list = explode(" ", trim(substr($catTxt, $posFinDate))); - $jour = $list[0]; - $mois = 1; - $annee = $list[2]; + function StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } - switch (strtolower($list[1])) - { - case "janvier" : - $mois = 1; - break; - case "février" : - case "fevrier" : - $mois = 2; - break; - case "mars" : - $mois = 3; - break; - case "avril" : - $mois = 4; - break; - case "mai" : - $mois = 5; - break; - case "juin" : - $mois = 6; - break; - case "juillet" : - $mois = 7; - break; - case "aout" : - case "août" : - $mois = 8; - break; - case "septembre" : - $mois = 9; - break; - case "octobre" : - $mois = 10; - break; - case "novembre" : - $mois = 11; - break; - case "decembre" : - case "décembre" : - $mois = 12; - break; - } + function ExtractContent($url) { + $html2 = file_get_html($url); + $text = $html2->find('article#page', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + return $text; + } - $item = new Item(); + $html = file_get_html('http://www.tuxboard.com/feed/atom/') or $this->returnError('Could not request Tuxboard.', 404); + $limit = 0; - $item->uri = $a->href; - $item->title = $a->innertext; - $item->content = trim($element->find("div.clear", 0)->innertext); - $item->timestamp = mktime(0, 0, 0, $mois, $jour, $annee); - - $this->items[] = $item; - } + foreach($html->find('entry') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = $element->find('link', 0)->href; + $item->timestamp = strtotime($element->find('published', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + + } public function getName(){ @@ -89,7 +56,7 @@ class TuxboardBridge extends BridgeAbstract{ } public function getCacheDuration(){ - return 14600; // 4 hours + return 3600; // 1 hour } } ?> From ede0046d4fe1015a6b52934a24a0d4ac48c5f4ff Mon Sep 17 00:00:00 2001 From: Mitsu Date: Tue, 8 Jul 2014 17:36:44 +0200 Subject: [PATCH 069/139] README: add bridge guidelines --- README.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1526fcb..cd42035 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,18 @@ Including `PHP Simple HTML DOM Parser` under the [MIT License](http://opensource Technical notes === * There is a cache so that source services won't ban you even if you hammer the rss-bridge with requests. Each bridge has a different duration for the cache. The `cache` subdirectory will be automatically created. You can purge it whenever you want. - * To implement a new rss-bridge, create a new class in `bridges` subdirectory. Look at existing bridges for examples. For items you generate in `$this->items`, only `uri` and `title` are mandatory in each item. `timestamp` and `content` are optional but recommended. Any additional key will be ignored by ATOM feed (but outputed to json). + * To implement a new rss-bridge, create a new class in `bridges` subdirectory. Look at existing bridges for examples and the guidelines below. For items you generate in `$this->items`, only `uri` and `title` are mandatory in each item. `timestamp` and `content` are optional but recommended. Any additional key will be ignored by ATOM feed (but outputed to json). + +### Bridge guidelines + + * metatags: `@name` {Name of service}, `@homepage` {URL to homepage}, `@description`, `@update` {YYYY-MM-DD}, `@maintainer` {Github username or nickname} + * scripts (eg. Javascript) must be stripped out. Make good use of `strip_tags()` and `preg_replace()` + * bridge must present data within 8 seconds (adjust iterators accordingly) + * cache timeout must be fine-tuned so that each refresh can provide 1 or 2 new elements on busy periods + * `

link to a iframe (could be a video): '.$element->src.'

'; + } + + $text = strip_tags($text, '

'); + return $text; + } + + $rssFeed = file_get_html('http://www.gizmodo.fr/feed') or $this->returnError('Could not request http://www.gizmodo.fr/feed', 404); + $limit = 0; + + foreach($rssFeed->find('item') as $element) { + if($limit < 15) { + $item = new \Item(); + $item->title = $element->find('title', 0)->innertext; + $item->uri = $element->find('guid', 0)->plaintext; + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = GizmodoFRExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'GizmodoFR'; + } + + public function getURI(){ + return 'http://www.gizmodo.fr/'; + } + + public function getCacheDuration(){ + return 1800; // 30min + } +} diff --git a/bridges/NumeramaBridge.php b/bridges/NumeramaBridge.php index 8007075..0c58197 100644 --- a/bridges/NumeramaBridge.php +++ b/bridges/NumeramaBridge.php @@ -1,6 +1,6 @@ Date: Mon, 14 Jul 2014 12:41:09 -0500 Subject: [PATCH 071/139] Developpez.com (FR) Bridge --- bridges/DeveloppezDotComBridge.php | 57 ++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 bridges/DeveloppezDotComBridge.php diff --git a/bridges/DeveloppezDotComBridge.php b/bridges/DeveloppezDotComBridge.php new file mode 100644 index 0000000..c32a0d6 --- /dev/null +++ b/bridges/DeveloppezDotComBridge.php @@ -0,0 +1,57 @@ +', '', $string); + return $string; + } + + function DeveloppezDotComExtractContent($url) { + $articleHTMLContent = file_get_html($url); + $text = $text.$articleHTMLContent->find('div.content', 0)->innertext; + $text = strip_tags($text, '



  • '); + return $text; + } + + $rssFeed = file_get_html('http://www.developpez.com/index/rss') or $this->returnError('Could not request http://www.developpez.com/index/rss', 404); + $limit = 0; + + foreach($rssFeed->find('item') as $element) { + if($limit < 15) { + $item = new \Item(); + $item->title = DeveloppezDotComStripCDATA($element->find('title', 0)->innertext); + $item->uri = DeveloppezDotComStripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = DeveloppezDotComExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'DeveloppezDotCom'; + } + + public function getURI(){ + return 'http://www.developpez.com/'; + } + + public function getCacheDuration(){ + return 1800; // 30min + } +} From ee45f0a82972cf2fabe04a638272a35af11d87e4 Mon Sep 17 00:00:00 2001 From: Paul Vayssiere Date: Mon, 14 Jul 2014 13:12:52 -0500 Subject: [PATCH 072/139] Le Journal du Geek Bridge --- bridges/GizmodoFRBridge.php | 2 +- bridges/LeJournalDuGeekBridge.php | 69 +++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 bridges/LeJournalDuGeekBridge.php diff --git a/bridges/GizmodoFRBridge.php b/bridges/GizmodoFRBridge.php index bb4bac1..b2e8a21 100644 --- a/bridges/GizmodoFRBridge.php +++ b/bridges/GizmodoFRBridge.php @@ -19,7 +19,7 @@ class GizmodoFRBridge extends BridgeAbstract{ $text = $text.$articleHTMLContent->find('div.entry-excerpt', 0)->innertext; $text = $text.$articleHTMLContent->find('div.entry-content', 0)->innertext; foreach($articleHTMLContent->find('pagespeed_iframe') as $element) { - $text = $text.'

    link to a iframe (could be a video): '.$element->src.'

    '; + $text = $text.'

    link to a iframe (could be a video): '.$element->src.'


    '; } $text = strip_tags($text, '

    '); diff --git a/bridges/LeJournalDuGeekBridge.php b/bridges/LeJournalDuGeekBridge.php new file mode 100644 index 0000000..6620b76 --- /dev/null +++ b/bridges/LeJournalDuGeekBridge.php @@ -0,0 +1,69 @@ +', '', $string); + return $string; + } + + function LeJournalDuGeekExtractContent($url) { + $articleHTMLContent = file_get_html($url); + $text = $text.$articleHTMLContent->find('div.post-content', 0)->innertext; + foreach($articleHTMLContent->find('a.more') as $element) { + if ($element->innertext == "Source") { + $text = $text.'

    Source : '.$element->href.'

    '; + break; + } + } + foreach($articleHTMLContent->find('iframe') as $element) { + if (preg_match("/youtube/i", $element->src)) { + $text = $text.'// An IFRAME to Youtube was included in the article: '.$element->src.'
    '; + } + } + + $text = strip_tags($text, '



    • '); + return $text; + } + + $rssFeed = file_get_html('http://www.journaldugeek.com/rss') or $this->returnError('Could not request http://www.journaldugeek.com/rss', 404); + $limit = 0; + + foreach($rssFeed->find('item') as $element) { + if($limit < 15) { + $item = new \Item(); + $item->title = LeJournalDuGeekStripCDATA($element->find('title', 0)->innertext); + $item->uri = LeJournalDuGeekStripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = LeJournalDuGeekExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'LeJournalDuGeek'; + } + + public function getURI(){ + return 'http://www.journaldugeek.com/'; + } + + public function getCacheDuration(){ + return 1800; // 30min + } +} From 94a6b42a2bd7fc3ca7e5dea40a417c86d03217c2 Mon Sep 17 00:00:00 2001 From: Paul Vayssiere Date: Mon, 14 Jul 2014 13:39:41 -0500 Subject: [PATCH 073/139] Add the funny text that completes the title --- bridges/NextInpactBridge.php | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bridges/NextInpactBridge.php b/bridges/NextInpactBridge.php index ad1d785..ac1ec0f 100644 --- a/bridges/NextInpactBridge.php +++ b/bridges/NextInpactBridge.php @@ -10,7 +10,7 @@ * @maintainer qwertygc */ class NextInpactBridge extends BridgeAbstract{ - + public function collectData(array $param){ function StripCDATA($string) { @@ -20,7 +20,8 @@ class NextInpactBridge extends BridgeAbstract{ } function ExtractContent($url) { $html2 = file_get_html($url); - $text = $html2->find('div[itemprop=articleBody]', 0)->innertext; + $text = '

      '.$html2->find('div#actu_entete > h2', 0)->innertext.'



      '; + $text = $text.$html2->find('div[itemprop=articleBody]', 0)->innertext; return $text; } $html = file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnError('Could not request Nextinpact.', 404); @@ -37,7 +38,7 @@ class NextInpactBridge extends BridgeAbstract{ $limit++; } } - + } public function getName(){ From 2dd8f789fc5225263d1c3ff36630afe3149d55ad Mon Sep 17 00:00:00 2001 From: Paul Vayssiere Date: Tue, 15 Jul 2014 19:31:54 -0500 Subject: [PATCH 074/139] microsoft quotes and UTF8 encoding fixed for Developpez.com --- bridges/DeveloppezDotComBridge.php | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/bridges/DeveloppezDotComBridge.php b/bridges/DeveloppezDotComBridge.php index c32a0d6..bc868ae 100644 --- a/bridges/DeveloppezDotComBridge.php +++ b/bridges/DeveloppezDotComBridge.php @@ -19,23 +19,41 @@ class DeveloppezDotComBridge extends BridgeAbstract{ return $string; } + function convert_smart_quotes($string)//F***ing quotes from Microsoft Word badly encoded, here was the trick: http://stackoverflow.com/questions/1262038/how-to-replace-microsoft-encoded-quotes-in-php + { + $search = array(chr(145), + chr(146), + chr(147), + chr(148), + chr(151)); + + $replace = array("'", + "'", + '"', + '"', + '-'); + + return str_replace($search, $replace, $string); + } + function DeveloppezDotComExtractContent($url) { $articleHTMLContent = file_get_html($url); - $text = $text.$articleHTMLContent->find('div.content', 0)->innertext; - $text = strip_tags($text, '



      • '); - return $text; + $text = convert_smart_quotes($articleHTMLContent->find('div.content', 0)->innertext); + $text = utf8_encode($text); + return trim($text); } $rssFeed = file_get_html('http://www.developpez.com/index/rss') or $this->returnError('Could not request http://www.developpez.com/index/rss', 404); $limit = 0; foreach($rssFeed->find('item') as $element) { - if($limit < 15) { + if($limit < 2) { $item = new \Item(); $item->title = DeveloppezDotComStripCDATA($element->find('title', 0)->innertext); $item->uri = DeveloppezDotComStripCDATA($element->find('guid', 0)->plaintext); $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); - $item->content = DeveloppezDotComExtractContent($item->uri); + $content = DeveloppezDotComExtractContent($item->uri); + $item->content = strlen($content) ? $content : $element->description;//In case of it is a tutorial, we just keep the original description $this->items[] = $item; $limit++; } From aab11fbd94ee67b876b3b0b4bd092abbe76f43d1 Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Wed, 16 Jul 2014 16:32:44 +0200 Subject: [PATCH 075/139] Youtube: fix issue #71 --- bridges/YoutubeBridge.php | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 685a4c3..d21cc20 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -24,7 +24,7 @@ class YoutubeBridge extends BridgeAbstract{ function getPublishDate($id) { // relies on Youtube API; deprecated - $json = json_decode(file_get_contents("https://gdata.youtube.com/feeds/api/videos/$id?v=2&alt=json"), TRUE) or $this->returnError('Youtube API is down', 404); + $json = json_decode(file_get_contents("https://gdata.youtube.com/feeds/api/videos/$id?v=2&alt=json"), TRUE); $timestamp = strtotime($json['entry']['published']['$t']); return $timestamp; } @@ -41,10 +41,11 @@ class YoutubeBridge extends BridgeAbstract{ foreach($html->find('li.channels-content-item') as $element) { if($count < $limit) { $item = new \Item(); - $item->uri = 'https://www.youtube.com'.$element->find('a',0)->href; + $videoquery = parse_url($element->find('a',0)->href, PHP_URL_QUERY); parse_str($videoquery, $videoquery); + $item->id = $videoquery['v']; + $item->uri = 'https://www.youtube.com/watch?v='.$item->id; $item->thumbnailUri = 'https:'.$element->find('img',0)->src; $item->title = trim($element->find('h3',0)->plaintext); - $item->id = str_replace('/watch?v=', '', $element->find('a',0)->href); $item->timestamp = getPublishDate($item->id); $item->content = '
        ' . $item->title . ''; $this->items[] = $item; From b80764282a136d9ff36aab356138129ddd14871e Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Wed, 16 Jul 2014 17:09:11 +0200 Subject: [PATCH 076/139] JournalDuGeek: restrict to 5 posts --- bridges/LeJournalDuGeekBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/LeJournalDuGeekBridge.php b/bridges/LeJournalDuGeekBridge.php index 6620b76..0755901 100644 --- a/bridges/LeJournalDuGeekBridge.php +++ b/bridges/LeJournalDuGeekBridge.php @@ -6,7 +6,7 @@ * * @name journaldugeek.com (FR) * @homepage http://www.journaldugeek.com/ -* @description Returns the 15 newest posts from LeJournalDuGeek (full text). +* @description Returns the 5 newest posts from LeJournalDuGeek (full text). * @maintainer polopollo */ class LeJournalDuGeekBridge extends BridgeAbstract{ @@ -42,7 +42,7 @@ class LeJournalDuGeekBridge extends BridgeAbstract{ $limit = 0; foreach($rssFeed->find('item') as $element) { - if($limit < 15) { + if($limit < 5) { $item = new \Item(); $item->title = LeJournalDuGeekStripCDATA($element->find('title', 0)->innertext); $item->uri = LeJournalDuGeekStripCDATA($element->find('guid', 0)->plaintext); From 136be273cec37e2b685ee919b55f069f103f8422 Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Wed, 16 Jul 2014 18:06:15 +0200 Subject: [PATCH 077/139] Numerama: add lists to output --- bridges/NumeramaBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/NumeramaBridge.php b/bridges/NumeramaBridge.php index 0c58197..a7aef4b 100644 --- a/bridges/NumeramaBridge.php +++ b/bridges/NumeramaBridge.php @@ -22,7 +22,7 @@ class NumeramaBridge extends BridgeAbstract{ $html2 = file_get_html($url); $text = $html2->find('h2.intro', 0)->innertext; $text = $text.$html2->find('div.content', 0)->innertext; - $text = strip_tags($text, '

        '); + $text = strip_tags($text, '

            '); return $text; } $html = file_get_html('http://www.numerama.com/rss/news.rss') or $this->returnError('Could not request Numerama.', 404); From fd7b39eaea5fc9f9d38db5ff3a58940362002732 Mon Sep 17 00:00:00 2001 From: Damien Calesse Date: Fri, 18 Jul 2014 13:19:02 +0200 Subject: [PATCH 078/139] Create WhydBridge.php --- bridges/WhydBridge.php | 54 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 bridges/WhydBridge.php diff --git a/bridges/WhydBridge.php b/bridges/WhydBridge.php new file mode 100644 index 0000000..1e6bcd6 --- /dev/null +++ b/bridges/WhydBridge.php @@ -0,0 +1,54 @@ +request = $param['u']; + $html = file_get_html('http://www.whyd.com/u/'.urlencode($this->request)) or $this->returnError('No results for this query.', 404); + $this->name = $html->find('div#profileTop', 0)->find('h1', 0)->plaintext; + } + else + { + $this->returnError('You must specify username', 400); + } + + for($i = 0; $i < 10; $i++) { + $track = $html->find('div.post', $i); + $item = new \Item(); + $item->name = $track->find('h2', 0)->plaintext; + $item->title = $track->find('h2', 0)->plaintext; + $item->content = $track->find('a.thumb',0) . '
            ' . $track->find('h2', 0)->plaintext; + $item->id = 'http://www.whyd.com' . $track->find('a.no-ajaxy',0)->getAttribute('href'); + $item->uri = 'http://www.whyd.com' . $track->find('a.no-ajaxy',0)->getAttribute('href'); + $this->items[] = $item; + } + } + public function getName(){ + return (!empty($this->name) ? $this->name .' - ' : '') .'Soundcloud Bridge'; + } + + public function getURI(){ + return 'https://www.soundcloud.com/'; + } + + public function getCacheDuration(){ + return 1; // 10 minutes + } +} From ecd81dfed0a61eeda9566b72baf372efcd89f0a1 Mon Sep 17 00:00:00 2001 From: Damien Calesse Date: Fri, 18 Jul 2014 13:31:40 +0200 Subject: [PATCH 079/139] Change WhydBridge.php --- bridges/WhydBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/WhydBridge.php b/bridges/WhydBridge.php index 1e6bcd6..c728cd2 100644 --- a/bridges/WhydBridge.php +++ b/bridges/WhydBridge.php @@ -41,7 +41,7 @@ class WhydBridge extends BridgeAbstract{ } } public function getName(){ - return (!empty($this->name) ? $this->name .' - ' : '') .'Soundcloud Bridge'; + return (!empty($this->name) ? $this->name .' - ' : '') .'Whyd Bridge'; } public function getURI(){ From d5ae7373198d584c542414a16f7a4a14ffd9585f Mon Sep 17 00:00:00 2001 From: Damien Calesse Date: Fri, 18 Jul 2014 13:33:54 +0200 Subject: [PATCH 080/139] WhydBridge.php --- bridges/WhydBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/WhydBridge.php b/bridges/WhydBridge.php index c728cd2..6f98f57 100644 --- a/bridges/WhydBridge.php +++ b/bridges/WhydBridge.php @@ -45,7 +45,7 @@ class WhydBridge extends BridgeAbstract{ } public function getURI(){ - return 'https://www.soundcloud.com/'; + return 'http://www.whyd.com/'; } public function getCacheDuration(){ From 3668475526cbc4950b9ccc6a08d4b21c1b591d13 Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Fri, 18 Jul 2014 14:06:39 +0200 Subject: [PATCH 081/139] Whyd: tightening input, use username if provided --- bridges/WhydBridge.php | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/bridges/WhydBridge.php b/bridges/WhydBridge.php index 6f98f57..78bc303 100644 --- a/bridges/WhydBridge.php +++ b/bridges/WhydBridge.php @@ -8,7 +8,7 @@ * @description Returns 10 newest music from user profile * @maintainer kranack * @update 2014-07-18 -* @use1(u="username") +* @use1(u="username/id") * */ class WhydBridge extends BridgeAbstract{ @@ -21,12 +21,16 @@ class WhydBridge extends BridgeAbstract{ if (isset($param['u'])) { $this->request = $param['u']; - $html = file_get_html('http://www.whyd.com/u/'.urlencode($this->request)) or $this->returnError('No results for this query.', 404); + if (strlen(preg_replace("/[^0-9a-f]/",'', $this->request)) == 24) // is input the userid ? + $html = file_get_html('http://www.whyd.com/u/'.preg_replace("/[^0-9a-f]/",'', $this->request)) or $this->returnError('No results for this query.', 404); + else // input may be the username + $html = file_get_html('http://www.whyd.com/'.urlencode($this->request)) or $this->returnError('No results for this query.', 404); + $this->name = $html->find('div#profileTop', 0)->find('h1', 0)->plaintext; } else { - $this->returnError('You must specify username', 400); + $this->returnError('You must specify username or id', 400); } for($i = 0; $i < 10; $i++) { From 98d551f0b2de50401065fe794795ba063f74793a Mon Sep 17 00:00:00 2001 From: Damien Calesse Date: Fri, 18 Jul 2014 15:07:51 +0200 Subject: [PATCH 082/139] Change WhydBridge.php --- bridges/WhydBridge.php | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/bridges/WhydBridge.php b/bridges/WhydBridge.php index 6f98f57..6b1633e 100644 --- a/bridges/WhydBridge.php +++ b/bridges/WhydBridge.php @@ -21,7 +21,17 @@ class WhydBridge extends BridgeAbstract{ if (isset($param['u'])) { $this->request = $param['u']; - $html = file_get_html('http://www.whyd.com/u/'.urlencode($this->request)) or $this->returnError('No results for this query.', 404); + if (strlen(preg_replace("/[^0-9a-f]/",'', $this->request)) == 24) { // is input the userid ? + $html = file_get_html('http://www.whyd.com/u/'.preg_replace("/[^0-9a-f]/",'', $this->request)) or $this->returnError('No results for this query.', 404); + } else { // input may be the username + $html = file_get_html('http://www.whyd.com/search?q='.urlencode($this->request)) or $this->returnError('No results for this query.', 404); + for ($j = 0; $j < 5; $j++) { + if (strtolower($html->find('div.user', $j)->find('a',0)->plaintext) == strtolower($this->request)) { + $html = file_get_html('http://www.whyd.com' . $html->find('div.user', $j)->find('a', 0)->getAttribute('href')) or $this->returnError('No results for this query', 404); + break; + } + } + } $this->name = $html->find('div#profileTop', 0)->find('h1', 0)->plaintext; } else From ce9a5e2489e8339c411027848778d5c60cd694f0 Mon Sep 17 00:00:00 2001 From: Damien Calesse Date: Fri, 18 Jul 2014 15:10:18 +0200 Subject: [PATCH 083/139] Change WhydBridge.php --- bridges/WhydBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/WhydBridge.php b/bridges/WhydBridge.php index 6b1633e..18a6a76 100644 --- a/bridges/WhydBridge.php +++ b/bridges/WhydBridge.php @@ -59,6 +59,6 @@ class WhydBridge extends BridgeAbstract{ } public function getCacheDuration(){ - return 1; // 10 minutes + return 600; // 10 minutes } } From efca3781179c262fdec2f0ccd5e7fcda3b50eee7 Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Fri, 18 Jul 2014 15:41:33 +0200 Subject: [PATCH 084/139] Whyd: take pull #73 --- bridges/WhydBridge.php | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/bridges/WhydBridge.php b/bridges/WhydBridge.php index 78bc303..2e25873 100644 --- a/bridges/WhydBridge.php +++ b/bridges/WhydBridge.php @@ -8,7 +8,7 @@ * @description Returns 10 newest music from user profile * @maintainer kranack * @update 2014-07-18 -* @use1(u="username/id") +* @use1(u="username") * */ class WhydBridge extends BridgeAbstract{ @@ -21,16 +21,22 @@ class WhydBridge extends BridgeAbstract{ if (isset($param['u'])) { $this->request = $param['u']; - if (strlen(preg_replace("/[^0-9a-f]/",'', $this->request)) == 24) // is input the userid ? + if (strlen(preg_replace("/[^0-9a-f]/",'', $this->request)) == 24) { // is input the userid ? $html = file_get_html('http://www.whyd.com/u/'.preg_replace("/[^0-9a-f]/",'', $this->request)) or $this->returnError('No results for this query.', 404); - else // input may be the username - $html = file_get_html('http://www.whyd.com/'.urlencode($this->request)) or $this->returnError('No results for this query.', 404); - + } else { // input may be the username + $html = file_get_html('http://www.whyd.com/search?q='.urlencode($this->request)) or $this->returnError('No results for this query.', 404); + for ($j = 0; $j < 5; $j++) { + if (strtolower($html->find('div.user', $j)->find('a',0)->plaintext) == strtolower($this->request)) { + $html = file_get_html('http://www.whyd.com' . $html->find('div.user', $j)->find('a', 0)->getAttribute('href')) or $this->returnError('No results for this query', 404); + break; + } + } + } $this->name = $html->find('div#profileTop', 0)->find('h1', 0)->plaintext; } else { - $this->returnError('You must specify username or id', 400); + $this->returnError('You must specify username', 400); } for($i = 0; $i < 10; $i++) { @@ -53,6 +59,7 @@ class WhydBridge extends BridgeAbstract{ } public function getCacheDuration(){ - return 1; // 10 minutes + return 600; // 10 minutes } } + From 9156552f91c0c0edb1c076db43109be1f9d52f53 Mon Sep 17 00:00:00 2001 From: 16mhz Date: Fri, 18 Jul 2014 18:21:07 +0200 Subject: [PATCH 085/139] Add Humble Store Discount Bridge --- bridges/HumbleStoreDiscountBridge.php | 59 +++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 bridges/HumbleStoreDiscountBridge.php diff --git a/bridges/HumbleStoreDiscountBridge.php b/bridges/HumbleStoreDiscountBridge.php new file mode 100644 index 0000000..4bcea23 --- /dev/null +++ b/bridges/HumbleStoreDiscountBridge.php @@ -0,0 +1,59 @@ +returnError('Could not request the Humble Store.', 404); + $string = json_decode($result, true); + $items = $string['results']; + $store_link = 'https://www.humblebundle.com/store/p/'; + $limit = 0; + + foreach ($items as $key => $value) { + if ($limit < 10) { + $new_price = $value['current_price'][0] . ' ' . $value['current_price'][1]; + $full_price = $value['full_price'][0] . ' ' . $value['full_price'][1]; + $product_name = $value['human_name']; + $product_uri = $store_link . $value['machine_name']; + $platforms = str_replace('\'', '', implode("','", $value['platforms'])); + $delivery_methods = str_replace('\'', '', implode("','", $value['delivery_methods'])); + + $content = '' . $product_name . '
            Current price: ' . $new_price . '
            Full price: ' . $full_price + . '
            Delivery methods: ' . $delivery_methods . '
            Platforms: ' + . $platforms . '
            ' . $value['description']; + + $item = new \Item(); + $item->title = $product_name . ' - ' . $new_price; + $item->uri = $product_uri; + $item->content = $content; + $this->items[] = $item; + $limit++; + } + } + } + + public function getName(){ + return 'HumbleStoreDiscount'; + } + + public function getURI(){ + return 'https://www.humblebundle.com/store'; + } + + public function getCacheDuration(){ + return 21600; // 6 hours + } +} + From 92f1d580269848f07f34846b63337681613610b3 Mon Sep 17 00:00:00 2001 From: Paul Vayssiere Date: Fri, 18 Jul 2014 12:52:55 -0500 Subject: [PATCH 086/139] oops, it was a bit too short... --- bridges/DeveloppezDotComBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/DeveloppezDotComBridge.php b/bridges/DeveloppezDotComBridge.php index bc868ae..985b68c 100644 --- a/bridges/DeveloppezDotComBridge.php +++ b/bridges/DeveloppezDotComBridge.php @@ -47,7 +47,7 @@ class DeveloppezDotComBridge extends BridgeAbstract{ $limit = 0; foreach($rssFeed->find('item') as $element) { - if($limit < 2) { + if($limit < 10) { $item = new \Item(); $item->title = DeveloppezDotComStripCDATA($element->find('title', 0)->innertext); $item->uri = DeveloppezDotComStripCDATA($element->find('guid', 0)->plaintext); From d22e2c11059750b722a2d0667472c88dfac1f586 Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Sat, 19 Jul 2014 03:34:35 +0200 Subject: [PATCH 087/139] TwitterExtended [beta] results filtering --- bridges/TwitterBridgeExtended.php | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/bridges/TwitterBridgeExtended.php b/bridges/TwitterBridgeExtended.php index 12a48b2..3912a19 100644 --- a/bridges/TwitterBridgeExtended.php +++ b/bridges/TwitterBridgeExtended.php @@ -8,15 +8,20 @@ * @homepage https://twitter.com/ * @description (same as Twitter Bridge, but with avatar, replies and RTs) * @maintainer mitsukarenai -* @use1(q="keyword or hashtag") +* @use1(q="keyword or hashtag",e="exclude words (commasep)") * @use2(u="username") */ class TwitterBridgeExtended extends BridgeAbstract{ public function collectData(array $param){ - $html = ''; + $html = ''; $exclude_request = ''; if (isset($param['q'])) { /* keyword search mode */ - $html = file_get_html('http://twitter.com/search/realtime?q='.urlencode($param['q']).'+include:retweets&src=typd') or $this->returnError('No results for this query.', 404); + if(!empty($param['e'])) { + $exclude = explode(',', $param['e']); + foreach($exclude as $exclude_pattern) + $exclude_request .= " -$exclude_pattern"; + } + $html = file_get_html('http://twitter.com/search/realtime?q='.urlencode($param['q']).urlencode($exclude_request)'+include:retweets&src=typd') or $this->returnError('No results for this query.', 404); } elseif (isset($param['u'])) { /* user timeline mode */ $html = file_get_html('http://twitter.com/'.urlencode($param['u']).'/with_replies') or $this->returnError('Requested username can\'t be found.', 404); From 035dfbeed886707083e9ab228dcc93dc387aac88 Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Sat, 19 Jul 2014 03:41:34 +0200 Subject: [PATCH 088/139] =?UTF-8?q?=C3=87a=20m'apprendra=20=C3=A0=20coder?= =?UTF-8?q?=20=C3=A0=203h40..?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bridges/TwitterBridgeExtended.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/TwitterBridgeExtended.php b/bridges/TwitterBridgeExtended.php index 3912a19..6ce73c3 100644 --- a/bridges/TwitterBridgeExtended.php +++ b/bridges/TwitterBridgeExtended.php @@ -21,7 +21,7 @@ class TwitterBridgeExtended extends BridgeAbstract{ foreach($exclude as $exclude_pattern) $exclude_request .= " -$exclude_pattern"; } - $html = file_get_html('http://twitter.com/search/realtime?q='.urlencode($param['q']).urlencode($exclude_request)'+include:retweets&src=typd') or $this->returnError('No results for this query.', 404); + $html = file_get_html('http://twitter.com/search/realtime?q='.urlencode($param['q']).urlencode($exclude_request).'+include:retweets&src=typd') or $this->returnError('No results for this query.', 404); } elseif (isset($param['u'])) { /* user timeline mode */ $html = file_get_html('http://twitter.com/'.urlencode($param['u']).'/with_replies') or $this->returnError('Requested username can\'t be found.', 404); From aba7caf3d8c1b73ff75bc8e681a515a596e58b73 Mon Sep 17 00:00:00 2001 From: Grummfy Date: Sun, 20 Jul 2014 01:41:44 +0200 Subject: [PATCH 089/139] ignore some ide --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 0c05f18..2359a6d 100644 --- a/.gitignore +++ b/.gitignore @@ -166,6 +166,11 @@ UpgradeLog*.htm App_Data/*.mdf App_Data/*.ldf +################# +## Other ide stuff +################# +.idea/* + ############# ## Windows detritus ############# From f570a9cc8319552f34a9c5baafad277b958f0d6c Mon Sep 17 00:00:00 2001 From: Grummfy Date: Sun, 20 Jul 2014 10:40:00 +0200 Subject: [PATCH 090/139] google plus bridge --- bridges/GooglePlusPostBridge.php | 63 ++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 bridges/GooglePlusPostBridge.php diff --git a/bridges/GooglePlusPostBridge.php b/bridges/GooglePlusPostBridge.php new file mode 100644 index 0000000..8e03ac7 --- /dev/null +++ b/bridges/GooglePlusPostBridge.php @@ -0,0 +1,63 @@ +returnError('You must specify a username (?username=...).', 400); + } + + $this->request = $param['username']; + $html = file_get_html('https://plus.google.com/' . urlencode($this->request) . '/posts') or $this->returnError('No results for this query.', 404); + + var_dump($html); + foreach($html->find('div.Yp.yt.Xa') as $post) + { + $item = new \Item(); + $item->content = $post->find('dib.Al.pf')->innerHTML; + $item->username = $item->fullname = $post->find('header.lea h3 a', 0)->innertext; + $item->id = $post->getAttribute('id'); + $item->title = $item->fullname = $post->find('header.lea', 0)->innertext; + $item->avatar = $post->find('.ys a.ob.Jk img', 0)->src; + $item->uri = $post->find('a.o-U-s.FI.Rg')->href; + $item->timestamp = $post->find('a.o-U-s.FI.Rg')->title; // 5 juin 2014 23:20:41 + $this->items[] = $item; + } + +// // extract plaintext +// $item->content_simple = str_replace('href="/', 'href="https://twitter.com/', html_entity_decode(strip_tags($tweet->find('p.js-tweet-text', 0)->innertext, '
            '))); +// +// // generate the title +// $item->title = $item->fullname . ' (@'. $item->username . ') | ' . $item->content_simple; +// // put out +// $this->items[] = $item; +// } + } + + public function getName() + { + return 'Google Plus Post Bridge'; + } + + public function getURI() + { + return 'http://plus.google.com/'; + } + + public function getCacheDuration() + { + return 600; // 10 minutes + } +} From 4568ef238b81de7fd1e27d65c7ef2a4b33cfa4c7 Mon Sep 17 00:00:00 2001 From: jvb Date: Sun, 20 Jul 2014 11:59:46 +0200 Subject: [PATCH 091/139] ... --- bridges/GooglePlusPostBridge.php | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/bridges/GooglePlusPostBridge.php b/bridges/GooglePlusPostBridge.php index 8e03ac7..84ebd35 100644 --- a/bridges/GooglePlusPostBridge.php +++ b/bridges/GooglePlusPostBridge.php @@ -22,7 +22,16 @@ class GooglePlusPostBridge extends BridgeAbstract $this->request = $param['username']; $html = file_get_html('https://plus.google.com/' . urlencode($this->request) . '/posts') or $this->returnError('No results for this query.', 404); - var_dump($html); + //var_dump($html); + //exit(); + $item = new \Item(); + $dsd = array(); + foreach (get_object_vars($html) as $k => $v) + { + $dsd[ $k ] = array_keys(get_object_vars($v)); + } + $item->content = var_export($dsd, true); + $this->items[] = $item; foreach($html->find('div.Yp.yt.Xa') as $post) { $item = new \Item(); @@ -34,6 +43,7 @@ class GooglePlusPostBridge extends BridgeAbstract $item->uri = $post->find('a.o-U-s.FI.Rg')->href; $item->timestamp = $post->find('a.o-U-s.FI.Rg')->title; // 5 juin 2014 23:20:41 $this->items[] = $item; + break; } // // extract plaintext From c02e4cae7c3fb03ad21bab094665b644f6cec36e Mon Sep 17 00:00:00 2001 From: jvb Date: Sun, 20 Jul 2014 12:38:45 +0200 Subject: [PATCH 092/139] try to debug stuff --- bridges/GooglePlusPostBridge.php | 13 +- posts | 2568 ++++++++++++++++++++++++++++++ 2 files changed, 2578 insertions(+), 3 deletions(-) create mode 100644 posts diff --git a/bridges/GooglePlusPostBridge.php b/bridges/GooglePlusPostBridge.php index 84ebd35..f07b578 100644 --- a/bridges/GooglePlusPostBridge.php +++ b/bridges/GooglePlusPostBridge.php @@ -6,7 +6,7 @@ * * @name Google Plus Post Bridge * @homepage http://plus.google.com/ - * @description Returns user public post (without using their API). + * @description Returns user public post (using their API because without you need to parse javascript ...). * @maintainer Grummfy * @use1(username="usernameOrId") */ @@ -20,7 +20,8 @@ class GooglePlusPostBridge extends BridgeAbstract } $this->request = $param['username']; - $html = file_get_html('https://plus.google.com/' . urlencode($this->request) . '/posts') or $this->returnError('No results for this query.', 404); + //$html = file_get_html('https://plus.google.com/' . urlencode($this->request) . '/posts') or $this->returnError('No results for this query.', 404); + $html = str_get_html(__DIR__ . '/../posts') or $this->returnError('No results for this query.', 404); //var_dump($html); //exit(); @@ -31,6 +32,12 @@ class GooglePlusPostBridge extends BridgeAbstract $dsd[ $k ] = array_keys(get_object_vars($v)); } $item->content = var_export($dsd, true); + $this->items[] = $item; + $item = new \Item(); + $item->content = var_export((($html->find('div.Dge.fOa'))), true); + $this->items[] = $item; + $item = new \Item(); + $item->content = var_export($html->find('div', 0), true) . $html->dump_node(); $this->items[] = $item; foreach($html->find('div.Yp.yt.Xa') as $post) { @@ -68,6 +75,6 @@ class GooglePlusPostBridge extends BridgeAbstract public function getCacheDuration() { - return 600; // 10 minutes + return 1; // 600; // 10 minutes } } diff --git a/posts b/posts new file mode 100644 index 0000000..677bf52 --- /dev/null +++ b/posts @@ -0,0 +1,2568 @@ +Jonathan Van Belle - Google+
            Profiel

            Profile

            Omslagfoto
            Jonathan Van Belle
            Heeft gewerkt bij Belgium
            24.419 weergaven
            OverPosts

            Stream

            Jonathan Van Belle

            Openbaar gedeeld  - 
             
             
            2 problèmes de moins grâce à #legoland
             ·  Vertalen
            3
            1
            Profielfoto van Christophe DefraineProfielfoto van Quentin Forestier
             
            Jen voudrais 4 pour mon bureau !  merci !
             ·  Vertalen
            Voeg een reactie toe…

            Jonathan Van Belle

            Openbaar gedeeld  - 
             
             
            Un secret d'arrière boutique dévoilé !
             ·  Vertalen
            1
            Profielfoto van sebi jack
             
            Je m'en doutais
            Voeg een reactie toe…
             
            Si tu aimes le #jdr et veux gagner un exemplaire de #Vivere !
             ·  Vertalen
             
            Geeksleague 78 en ligne :


            Au programme

            3′ : La rubrique de l’invité, Vivere
            50′ : Coup de cœur, Le donjon de crépuscule
            51′ : The tiping of the dead (jeu vidéo)
            54′ : Coup coeur (Méga Lonnnnnnngggggg) : The darkness (comics)
            60′ : Le débat : les réseaux sociaux
            1h15 ‘ : Coup de gueule de Méo, la neutralité du web et contre coup de gueule de Marius
            1h18 ‘ : Le défi de la semaine 2048
            1h20′ Plaid et couverture, Resident evil
            1h34′ : La minute du colloc
            1h35′ : Le super Dragon Quizz Point
            1h50′ Coup de cœur, dernier tome de la Triologie du vide
             ·  Vertalen
            1
            Voeg een reactie toe…

            Jonathan Van Belle

            Actu jeux/zines (sorties/souscriptions)  - 
             
            On va parler de vivere dans le geeksleague de ce soir. Un #jdr (ou plutôt #jdra ) space-fantasy financer via #ulule  
             ·  Vertalen
             
            Ce vendredi c’est le live du podcast Geeksleague 78

            Ce vendredi 21 Mars c’est Geeksleague 78 !

            Ce vendredi nous recevrons le créateur du jeu de rôle : http://vivere-jdr.fr qui a récemment réussi avec brio sont Ulule.
             ·  Vertalen
            2
            Profielfoto van Eric NieudanProfielfoto van Quentin ForestierProfielfoto van Jonathan Van BelleProfielfoto van Arjuna Khan
            10 reacties
             ·  Vertalen
            Voeg een reactie toe…
             
             
            If you have 5 minutes, you can take part in this small (25) question survey about cloud computing and security ! 

            https://www.surveymonkey.com/s/situation_awareness_in_cloud_computing_environment 
             ·  Vertalen
            1
            Voeg een reactie toe…

            Jonathan Van Belle

            Openbaar gedeeld  - 
             
             
            Dans un petit pays exotique lointain, le parti au pouvoir avait décidé de ne prendre aucun risque. Heureusement, le vote était principalement électronique et utilisait des disquettes.

            Les machines de vote étaient surveillées et difficiles d'accès. Certes, avec un minimum de complicité, il aurait été possible de les modifier.

            Mais le parti au pouvoir décida d'utiliser un stratagème plus simple et impliquant moins de complices : les disquettes qui serait utilisées furent artificiellement usées en les soumettant au rayonnement d'un électro-aimant. Coût : un seul et unique complice chez le transporteur.

            Lors du comptage des votes, plusieurs disquettes rendirent l'âme et affichèrent des résultats farfelus ou se révélèrent illisibles. Ce qui était bien l'effet escompté. Certes, une majorité fonctionnaient encore mais la somme totale des votes n'était plus vérifiable.

            Selon la procédure en place, des experts furent appelés à la rescousse pour décoder les disquettes. Des experts peu nombreux et parfaitement anonymes dont personne ne savait rien. Or, la société de consultance fournissant les experts était dirigée par un proche du parti au pouvoir. Quelques mois auparavant, il avait donc désigné le responsable du groupe d'experts qui connaissait parfaitement sa mission : il serait chargé d'agréger les chiffres, sur son ordinateur personnel, que les membres de son équipe lui transmettrait. Avant d'annoncer des résultats qui se rapprocheraient autant que possible de la réalité pour qu'ils soient plausible mais qui élimineraient toute concurrence.

            La majorité des experts n'étaient pas au courant et travaillèrent donc réellement à déchiffrer des disquettes corrompues. Certaines furent déclarées illisibles. Mais le chef de notre groupe d'experts expliquait alors à l'ingénieur qu'il transférait la disquette vers un collègue qui pourrait avoir plus de chance.

            Au final, les disquettes furent subtilement détruite pour empêcher tout recompte éventuel. Le chef de notre groupe d'experts annonça les résultats prévu et, même au sein de son équipe, personne ne se douta de rien.

            Le parti au pouvoir confirma sa main-mise sur le pays et personne n'y trouva rien à redire. L'opposition fut muselée.

            Heureusement, ce genre de choses n'arrivent que dans de lointains pays exotiques…
             ·  Vertalen
            4
            Voeg een reactie toe…

            Jonathan Van Belle

            Openbaar gedeeld  - 
             
             
            The European Parliament just voted to end roaming surcharges, safeguard the open internet, and make it easier for you to get the most from your mobile.

            Bravo to Pilar del Castillo and all her colleagues!

            http://europa.eu/!NN89YF

            #‎ConnectedContinent‬
             ·  Vertalen
            1
            Voeg een reactie toe…
             
            http://hackathonegovwallonia.net Un hackaton sur la mobilité pour cette année ...
             ·  Vertalen
            1
            Profielfoto van Jonathan Van BelleProfielfoto van Benjamin Monjoie
            2 reacties
             
            Malheureusement non, pas le temps. Malheureusement ....
             ·  Vertalen
            Voeg een reactie toe…

            Jonathan Van Belle

            Openbaar gedeeld  - 
             
             
            C'est avec une certaine fébrilité que je vous annonce la sortie officielle de Vivere, le jdr de Space-Fantasy.

            Ce jdr a fait l'objet d'une campagne de financement via la plateforme Ulule l'année dernière.
            Maintenant financé et imprimé, il est disponible au prix de 25€ (hors frais de port).
            Ce dernier peut être directement commendé en m'envoyant un message, mais je préfère les mails. ;)

            Les versions électroniques devraient suivre très très vite, ainsi que le premier supplément pdf.

            Un kit de démonstration est disponible sur le site officiel du jeu, et la page facebook annonce l'activité du jeu, les évènements auxquels je participe ainsi que l'avancement du projet. 
            http://vivere-jdr.fr/

            Pour les belges du coin : je serai au Made In Asia la semaine prochaine ;)
            Passez me voir !
             ·  Vertalen
            1
            Voeg een reactie toe…
            Verhaal
            Introductie
            Short bio
            Algemene informatie
            Geslacht
            Man
            Werk
            Beroep
            IT worker
            Werkervaring
            • Belgium
            \ No newline at end of file From 0113c61911f6bbdf764ab84699da4b3e2c915bfa Mon Sep 17 00:00:00 2001 From: Grummfy Date: Sun, 20 Jul 2014 16:28:51 +0200 Subject: [PATCH 093/139] bridge with google plus --- bridges/GooglePlusPostBridge.php | 139 +- posts | 2568 ------------------------------ 2 files changed, 98 insertions(+), 2609 deletions(-) delete mode 100644 posts diff --git a/bridges/GooglePlusPostBridge.php b/bridges/GooglePlusPostBridge.php index f07b578..b6fad07 100644 --- a/bridges/GooglePlusPostBridge.php +++ b/bridges/GooglePlusPostBridge.php @@ -1,17 +1,35 @@ MAX_FILE_SIZE) +// { +// $dom->clear(); +// return false; +// } +// $dom->load_file($str, $lowercase, $stripRN); +// return $dom; +//} + /** - * GooglePlusPostBridge + * Google Plus Post Bridge + * Freely inspired by tweeter bridge * 2014-07-20 * * @name Google Plus Post Bridge * @homepage http://plus.google.com/ - * @description Returns user public post (using their API because without you need to parse javascript ...). + * @description Returns user public post (without API). * @maintainer Grummfy * @use1(username="usernameOrId") */ class GooglePlusPostBridge extends BridgeAbstract { + protected $_title; + protected $_url; + + const GOOGLE_PLUS_BASE_URL = 'https://plus.google.com/'; + public function collectData(array $param) { if (!isset($param['username'])) @@ -20,57 +38,96 @@ class GooglePlusPostBridge extends BridgeAbstract } $this->request = $param['username']; - //$html = file_get_html('https://plus.google.com/' . urlencode($this->request) . '/posts') or $this->returnError('No results for this query.', 404); - $html = str_get_html(__DIR__ . '/../posts') or $this->returnError('No results for this query.', 404); + // get content parsed +// $html = file_get_html(__DIR__ . '/../posts2.html' + $html = file_get_html(self::GOOGLE_PLUS_BASE_URL . urlencode($this->request) . '/posts' + // force language + , false, stream_context_create(array('http'=> array( + 'header' => 'Accept-Language: fr,fr-be,fr-fr;q=0.8,en;q=0.4,en-us;q=0.2;*' . "\r\n" + ))) + ) OR $this->returnError('No results for this query.', 404); - //var_dump($html); - //exit(); - $item = new \Item(); - $dsd = array(); - foreach (get_object_vars($html) as $k => $v) - { - $dsd[ $k ] = array_keys(get_object_vars($v)); - } - $item->content = var_export($dsd, true); - $this->items[] = $item; - $item = new \Item(); - $item->content = var_export((($html->find('div.Dge.fOa'))), true); - $this->items[] = $item; - $item = new \Item(); - $item->content = var_export($html->find('div', 0), true) . $html->dump_node(); - $this->items[] = $item; - foreach($html->find('div.Yp.yt.Xa') as $post) - { - $item = new \Item(); - $item->content = $post->find('dib.Al.pf')->innerHTML; - $item->username = $item->fullname = $post->find('header.lea h3 a', 0)->innertext; - $item->id = $post->getAttribute('id'); - $item->title = $item->fullname = $post->find('header.lea', 0)->innertext; - $item->avatar = $post->find('.ys a.ob.Jk img', 0)->src; - $item->uri = $post->find('a.o-U-s.FI.Rg')->href; - $item->timestamp = $post->find('a.o-U-s.FI.Rg')->title; // 5 juin 2014 23:20:41 - $this->items[] = $item; - break; - } + // get title, url, ... there is a lot of intresting stuff in meta + $this->_title = $html->find('meta[property]', 0)->getAttribute('content'); + $this->_url = $html->find('meta[itemprop=url]', 0)->getAttribute('content'); -// // extract plaintext -// $item->content_simple = str_replace('href="/', 'href="https://twitter.com/', html_entity_decode(strip_tags($tweet->find('p.js-tweet-text', 0)->innertext, ''))); -// -// // generate the title -// $item->title = $item->fullname . ' (@'. $item->username . ') | ' . $item->content_simple; -// // put out +// foreach ($html->find('meta') as $e) +// { +// $item = new \Item(); +// $item->content = var_export($e->attr, true); // $this->items[] = $item; // } + + // div[jsmodel=XNmfOc] + foreach($html->find('div.yt') as $post) + { + $item = new \Item(); +// $item->content = $post->find('div.Al', 0)->innertext; + $item->username = $item->fullname = $post->find('header.lea h3 a', 0)->innertext; + $item->id = $post->getAttribute('id'); +// $item->title = $item->fullname = $post->find('header.lea', 0)->plaintext; + $item->avatar = $post->find('div.ys img', 0)->src; +// var_dump((($post->find('a.o-U-s', 0)->getAllAttributes()))); + $item->uri = $post->find('a.o-U-s', 0)->href; + $item->timestamp = strtotime($post->find('a.o-U-s', 0)->plaintext); + $this->items[] = $item; + + // hashtag to treat : https://plus.google.com/explore/tag + $hashtags = array(); + foreach($post->find('a.d-s') as $hashtag) + { + $hashtags[ trim($hashtag->plaintext) ] = self::GOOGLE_PLUS_BASE_URL . $hashtag->href; + } + + $item->content = ''; + + // avatar display + $item->content .= ''; + + $content = $post->find('div.Al', 0); + + // alter link +// $content = $content->innertext; +// $content = str_replace('href="./', 'href="' . self::GOOGLE_PLUS_BASE_URL, $content); +// $content = str_replace('href="photos', 'href="' . self::GOOGLE_PLUS_BASE_URL . 'photos', $content); + // XXX ugly but I don't have any idea how to do a better stuff, str_replace on link doesn't work as expected and ask too many checks + foreach($content->find('a') as $link) + { + $hasHttp = strpos($link->href, 'http'); + $hasDoubleSlash = strpos($link->href, '//'); + + if ((!$hasHttp && !$hasDoubleSlash) + || (false !== $hasHttp && strpos($link->href, 'http') != 0) + || (false === $hasHttp && false !== $hasDoubleSlash && $hasDoubleSlash != 0)) + { + // skipp bad link, for some hashtag or other stuff + if (strpos($link->href, '/') == 0) + { + $link->href = substr($link->href, 1); + } + $link->href = self::GOOGLE_PLUS_BASE_URL . $link->href; + } + } + $content = $content->innertext; + + $item->content .= '
            ' . $content . '
            '; + + // extract plaintext + $item->content_simple = $post->find('div.Al', 0)->plaintext; + } + +// $html->save(__DIR__ . '/../posts2.html'); } public function getName() { - return 'Google Plus Post Bridge'; + return $this->_title ?: 'Google Plus Post Bridge'; } public function getURI() { - return 'http://plus.google.com/'; + return $this->_url ?: 'http://plus.google.com/'; } public function getCacheDuration() diff --git a/posts b/posts deleted file mode 100644 index 677bf52..0000000 --- a/posts +++ /dev/null @@ -1,2568 +0,0 @@ -Jonathan Van Belle - Google+
            Profiel

            Profile

            Omslagfoto
            Jonathan Van Belle
            Heeft gewerkt bij Belgium
            24.419 weergaven
            OverPosts

            Stream

            Jonathan Van Belle

            Openbaar gedeeld  - 
             
             
            2 problèmes de moins grâce à #legoland
             ·  Vertalen
            3
            1
            Profielfoto van Christophe DefraineProfielfoto van Quentin Forestier
             
            Jen voudrais 4 pour mon bureau !  merci !
             ·  Vertalen
            Voeg een reactie toe…

            Jonathan Van Belle

            Openbaar gedeeld  - 
             
             
            Un secret d'arrière boutique dévoilé !
             ·  Vertalen
            1
            Profielfoto van sebi jack
             
            Je m'en doutais
            Voeg een reactie toe…
             
            Si tu aimes le #jdr et veux gagner un exemplaire de #Vivere !
             ·  Vertalen
             
            Geeksleague 78 en ligne :


            Au programme

            3′ : La rubrique de l’invité, Vivere
            50′ : Coup de cœur, Le donjon de crépuscule
            51′ : The tiping of the dead (jeu vidéo)
            54′ : Coup coeur (Méga Lonnnnnnngggggg) : The darkness (comics)
            60′ : Le débat : les réseaux sociaux
            1h15 ‘ : Coup de gueule de Méo, la neutralité du web et contre coup de gueule de Marius
            1h18 ‘ : Le défi de la semaine 2048
            1h20′ Plaid et couverture, Resident evil
            1h34′ : La minute du colloc
            1h35′ : Le super Dragon Quizz Point
            1h50′ Coup de cœur, dernier tome de la Triologie du vide
             ·  Vertalen
            1
            Voeg een reactie toe…

            Jonathan Van Belle

            Actu jeux/zines (sorties/souscriptions)  - 
             
            On va parler de vivere dans le geeksleague de ce soir. Un #jdr (ou plutôt #jdra ) space-fantasy financer via #ulule  
             ·  Vertalen
             
            Ce vendredi c’est le live du podcast Geeksleague 78

            Ce vendredi 21 Mars c’est Geeksleague 78 !

            Ce vendredi nous recevrons le créateur du jeu de rôle : http://vivere-jdr.fr qui a récemment réussi avec brio sont Ulule.
             ·  Vertalen
            2
            Profielfoto van Eric NieudanProfielfoto van Quentin ForestierProfielfoto van Jonathan Van BelleProfielfoto van Arjuna Khan
            10 reacties
             ·  Vertalen
            Voeg een reactie toe…
             
             
            If you have 5 minutes, you can take part in this small (25) question survey about cloud computing and security ! 

            https://www.surveymonkey.com/s/situation_awareness_in_cloud_computing_environment 
             ·  Vertalen
            1
            Voeg een reactie toe…

            Jonathan Van Belle

            Openbaar gedeeld  - 
             
             
            Dans un petit pays exotique lointain, le parti au pouvoir avait décidé de ne prendre aucun risque. Heureusement, le vote était principalement électronique et utilisait des disquettes.

            Les machines de vote étaient surveillées et difficiles d'accès. Certes, avec un minimum de complicité, il aurait été possible de les modifier.

            Mais le parti au pouvoir décida d'utiliser un stratagème plus simple et impliquant moins de complices : les disquettes qui serait utilisées furent artificiellement usées en les soumettant au rayonnement d'un électro-aimant. Coût : un seul et unique complice chez le transporteur.

            Lors du comptage des votes, plusieurs disquettes rendirent l'âme et affichèrent des résultats farfelus ou se révélèrent illisibles. Ce qui était bien l'effet escompté. Certes, une majorité fonctionnaient encore mais la somme totale des votes n'était plus vérifiable.

            Selon la procédure en place, des experts furent appelés à la rescousse pour décoder les disquettes. Des experts peu nombreux et parfaitement anonymes dont personne ne savait rien. Or, la société de consultance fournissant les experts était dirigée par un proche du parti au pouvoir. Quelques mois auparavant, il avait donc désigné le responsable du groupe d'experts qui connaissait parfaitement sa mission : il serait chargé d'agréger les chiffres, sur son ordinateur personnel, que les membres de son équipe lui transmettrait. Avant d'annoncer des résultats qui se rapprocheraient autant que possible de la réalité pour qu'ils soient plausible mais qui élimineraient toute concurrence.

            La majorité des experts n'étaient pas au courant et travaillèrent donc réellement à déchiffrer des disquettes corrompues. Certaines furent déclarées illisibles. Mais le chef de notre groupe d'experts expliquait alors à l'ingénieur qu'il transférait la disquette vers un collègue qui pourrait avoir plus de chance.

            Au final, les disquettes furent subtilement détruite pour empêcher tout recompte éventuel. Le chef de notre groupe d'experts annonça les résultats prévu et, même au sein de son équipe, personne ne se douta de rien.

            Le parti au pouvoir confirma sa main-mise sur le pays et personne n'y trouva rien à redire. L'opposition fut muselée.

            Heureusement, ce genre de choses n'arrivent que dans de lointains pays exotiques…
             ·  Vertalen
            4
            Voeg een reactie toe…

            Jonathan Van Belle

            Openbaar gedeeld  - 
             
             
            The European Parliament just voted to end roaming surcharges, safeguard the open internet, and make it easier for you to get the most from your mobile.

            Bravo to Pilar del Castillo and all her colleagues!

            http://europa.eu/!NN89YF

            #‎ConnectedContinent‬
             ·  Vertalen
            1
            Voeg een reactie toe…
             
            http://hackathonegovwallonia.net Un hackaton sur la mobilité pour cette année ...
             ·  Vertalen
            1
            Profielfoto van Jonathan Van BelleProfielfoto van Benjamin Monjoie
            2 reacties
             
            Malheureusement non, pas le temps. Malheureusement ....
             ·  Vertalen
            Voeg een reactie toe…

            Jonathan Van Belle

            Openbaar gedeeld  - 
             
             
            C'est avec une certaine fébrilité que je vous annonce la sortie officielle de Vivere, le jdr de Space-Fantasy.

            Ce jdr a fait l'objet d'une campagne de financement via la plateforme Ulule l'année dernière.
            Maintenant financé et imprimé, il est disponible au prix de 25€ (hors frais de port).
            Ce dernier peut être directement commendé en m'envoyant un message, mais je préfère les mails. ;)

            Les versions électroniques devraient suivre très très vite, ainsi que le premier supplément pdf.

            Un kit de démonstration est disponible sur le site officiel du jeu, et la page facebook annonce l'activité du jeu, les évènements auxquels je participe ainsi que l'avancement du projet. 
            http://vivere-jdr.fr/

            Pour les belges du coin : je serai au Made In Asia la semaine prochaine ;)
            Passez me voir !
             ·  Vertalen
            1
            Voeg een reactie toe…
            Verhaal
            Introductie
            Short bio
            Algemene informatie
            Geslacht
            Man
            Werk
            Beroep
            IT worker
            Werkervaring
            • Belgium
            \ No newline at end of file From 2ed55b143e33053e085f330a225c220684c87399 Mon Sep 17 00:00:00 2001 From: Grummfy Date: Sun, 20 Jul 2014 16:32:45 +0200 Subject: [PATCH 094/139] remove useless --- bridges/GooglePlusPostBridge.php | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/bridges/GooglePlusPostBridge.php b/bridges/GooglePlusPostBridge.php index b6fad07..05cffec 100644 --- a/bridges/GooglePlusPostBridge.php +++ b/bridges/GooglePlusPostBridge.php @@ -1,17 +1,5 @@ MAX_FILE_SIZE) -// { -// $dom->clear(); -// return false; -// } -// $dom->load_file($str, $lowercase, $stripRN); -// return $dom; -//} - /** * Google Plus Post Bridge * Freely inspired by tweeter bridge From 785cf75d6017f622ac99be8cc9ddaa6a3df8f064 Mon Sep 17 00:00:00 2001 From: Grummfy Date: Sun, 20 Jul 2014 16:47:02 +0200 Subject: [PATCH 095/139] a bit of cleaning for reading --- index.php | 97 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 57 insertions(+), 40 deletions(-) diff --git a/index.php b/index.php index 0ca1538..9f4bbcd 100644 --- a/index.php +++ b/index.php @@ -144,49 +144,66 @@ $formats = Format::searchInformation();

            RSS-Bridge

            ·Reconnecting the Web·

            + $bridgeInformations): + ?> + +
            +

            '.$bridgeInformations['name'].'' : $bridgeInformations['name'] ?>

            +

            + +

            - $bridgeInformations): ?> - -
            -

            '.$bridgeInformations['name'].'' : $bridgeInformations['name'] ?>

            -

            - -

            + 0 ): ?> +
              + $anUse): ?> +
            1. +
              + + + $argDescription) + { + $idArg = 'arg-' . $bridgeReference . '-' . $anUseNum . '-' . $argName; + echo '' + } - 0 ): ?> -
                - $anUse): ?> -
              1. - - - - $argDescription): ?> - - - - $infos ): ?> - - -
              2. - - -
              - -
              - - - $infos ): ?> - - -
              - - '.$bridgeInformations['maintainer'].'' : '' ?> -
            - + foreach( $formats as $name => $infos ) + { + if ( isset($infos['name']) ) + { + echo getHelperButtonFormat($name, $infos['name']); + } + } + ?> + + + +
          + +
          + + + $infos ) + { + if( isset($infos['name']) ) + { + echo getHelperButtonFormat($name, $infos['name']); + } + } + ?> +
          + + '.$bridgeInformations['maintainer'].'' : '' ?> + + From 42f6ee09e5b943a689556749281b0ed0d5fce2b9 Mon Sep 17 00:00:00 2001 From: Grummfy Date: Sun, 20 Jul 2014 16:48:30 +0200 Subject: [PATCH 096/139] oups --- index.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.php b/index.php index 9f4bbcd..a1d03bc 100644 --- a/index.php +++ b/index.php @@ -166,7 +166,7 @@ $formats = Format::searchInformation(); foreach($anUse as $argName => $argDescription) { $idArg = 'arg-' . $bridgeReference . '-' . $anUseNum . '-' . $argName; - echo '' + echo ''; } foreach( $formats as $name => $infos ) From c297869a3cd0863396581cc17fdcce41d0d58eea Mon Sep 17 00:00:00 2001 From: Grummfy Date: Sun, 20 Jul 2014 16:49:53 +0200 Subject: [PATCH 097/139] - --- index.php | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/index.php b/index.php index a1d03bc..1f71ed5 100644 --- a/index.php +++ b/index.php @@ -147,8 +147,9 @@ $formats = Format::searchInformation(); $bridgeInformations): + + if(BridgeWhitelist($whitelist_selection, $bridgeReference)): ?> -

          '.$bridgeInformations['name'].'' : $bridgeInformations['name'] ?>

          @@ -198,10 +199,11 @@ $formats = Format::searchInformation(); '.$bridgeInformations['maintainer'].'' : '' ?>

          - +