From 3c0d13c1bb72a87f0d671692e205f45f04d94eba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Fri, 8 Jul 2016 19:06:35 +0200 Subject: [PATCH] bridges: use BridgeAbstract::getSimpleHTMLDOM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit instead of BridgeAbstract::file_get_html Signed-off-by: Pierre Mazière --- bridges/ABCTabsBridge.php | 4 +- bridges/AcrimedBridge.php | 2 +- bridges/AllocineFRBridge.php | 8 +-- bridges/AnimeUltimeBridge.php | 4 +- bridges/BandcampBridge.php | 2 +- bridges/BastaBridge.php | 8 +-- bridges/BlaguesDeMerdeBridge.php | 4 +- bridges/BooruprojectBridge.php | 14 ++--- bridges/CADBridge.php | 7 ++- bridges/CNETBridge.php | 4 +- bridges/CoinDeskBridge.php | 56 ++++++++++++++++++++ bridges/CollegeDeFranceBridge.php | 2 +- bridges/CommonDreamsBridge.php | 4 +- bridges/CopieDoubleBridge.php | 8 +-- bridges/CourrierInternationalBridge.php | 34 ++++++------ bridges/CpasbienBridge.php | 8 +-- bridges/CryptomeBridge.php | 2 +- bridges/DailymotionBridge.php | 12 ++--- bridges/DanbooruBridge.php | 14 ++--- bridges/DansTonChatBridge.php | 2 +- bridges/DauphineLibereBridge.php | 8 +-- bridges/DeveloppezDotComBridge.php | 6 +-- bridges/DilbertBridge.php | 2 +- bridges/DollbooruBridge.php | 14 ++--- bridges/DuckDuckGoBridge.php | 2 +- bridges/EZTVBridge.php | 4 +- bridges/EliteDangerousGalnetBridge.php | 4 +- bridges/ElsevierBridge.php | 6 +-- bridges/FSBridge.php | 56 ++++++++++++++++++++ bridges/FacebookBridge.php | 6 +-- bridges/FierPandaBridge.php | 2 +- bridges/FlickrExploreBridge.php | 2 +- bridges/FlickrTagBridge.php | 8 +-- bridges/FootitoBridge.php | 16 +++--- bridges/FourchanBridge.php | 4 +- bridges/FrandroidBridge.php | 69 +++++++++++++++++++++++++ bridges/FuturaSciencesBridge.php | 4 +- bridges/GBAtempBridge.php | 6 +-- bridges/GelbooruBridge.php | 14 ++--- bridges/GiphyBridge.php | 22 ++++---- bridges/GithubIssueBridge.php | 2 +- bridges/GitlabCommitsBridge.php | 2 +- bridges/GizmodoFRBridge.php | 4 +- bridges/GooglePlusPostBridge.php | 4 +- bridges/GoogleSearchBridge.php | 4 +- bridges/GuruMedBridge.php | 2 +- bridges/HDWallpapersBridge.php | 2 +- bridges/HentaiHavenBridge.php | 2 +- bridges/IdenticaBridge.php | 2 +- bridges/InstagramBridge.php | 18 +++---- bridges/JapanExpoBridge.php | 4 +- bridges/KonachanBridge.php | 14 ++--- bridges/KoreusBridge.php | 4 +- bridges/LeBonCoinBridge.php | 24 ++++----- bridges/LeJournalDuGeekBridge.php | 4 +- bridges/LeMondeInformatiqueBridge.php | 4 +- bridges/LeMotDuJourBridge.php | 55 ++++++++++++++++++++ bridges/LesJoiesDuCodeBridge.php | 16 +++--- bridges/LichessBridge.php | 4 +- bridges/LinkedInCompany.php | 2 +- bridges/LolibooruBridge.php | 14 ++--- bridges/MalikiBridge.php | 60 +++++++++++++++++++++ bridges/MemoLinuxBridge.php | 58 +++++++++++++++++++++ bridges/MilbooruBridge.php | 14 ++--- bridges/MondeDiploBridge.php | 4 +- bridges/MsnMondeBridge.php | 4 +- bridges/MspabooruBridge.php | 14 ++--- bridges/NakedSecurityBridge.php | 6 +-- bridges/NasaApodBridge.php | 4 +- bridges/NeuviemeArtBridge.php | 4 +- bridges/NextInpactBridge.php | 4 +- bridges/NiceMatinBridge.php | 8 +-- bridges/NovelUpdatesBridge.php | 2 +- bridges/NumeramaBridge.php | 1 - bridges/OpenClassroomsBridge.php | 4 +- bridges/OpenTheoryBridge.php | 56 ++++++++++++++++++++ bridges/ParuVenduImmoBridge.php | 18 +++---- bridges/PickyWallpapersBridge.php | 2 +- bridges/PinterestBridge.php | 36 ++++++------- bridges/PlanetLibreBridge.php | 4 +- bridges/ProjectMGameBridge.php | 2 +- bridges/RTBFBridge.php | 2 +- bridges/RaymondBridge.php | 53 +++++++++++++++++++ bridges/Releases3DSBridge.php | 2 +- bridges/ReporterreBridge.php | 4 +- bridges/Rue89Bridge.php | 2 +- bridges/Rule34Bridge.php | 14 ++--- bridges/Rule34pahealBridge.php | 14 ++--- bridges/SafebooruBridge.php | 14 ++--- bridges/SakugabooruBridge.php | 14 ++--- bridges/ScilogsBridge.php | 54 +++++++++++++++++++ bridges/ScmbBridge.php | 14 ++--- bridges/ScoopItBridge.php | 6 +-- bridges/SegfaultMintBridge.php | 55 ++++++++++++++++++++ bridges/Sexactu.php | 14 ++--- bridges/SiliconBridge.php | 4 +- bridges/StripeAPIChangeLogBridge.php | 2 +- bridges/SuperbWallpapersBridge.php | 4 +- bridges/T411Bridge.php | 4 +- bridges/TagBoardBridge.php | 6 +-- bridges/TbibBridge.php | 14 ++--- bridges/TheCodingLoveBridge.php | 18 +++---- bridges/ThePirateBayBridge.php | 4 +- bridges/TuxboardBridge.php | 64 +++++++++++++++++++++++ bridges/TwitterBridge.php | 20 +++---- bridges/UnsplashBridge.php | 2 +- bridges/ViadeoCompany.php | 2 +- bridges/VineBridge.php | 2 +- bridges/WallpaperStopBridge.php | 2 +- bridges/WhydBridge.php | 8 +-- bridges/WikipediaDEBridge.php | 48 +++++++++++++++++ bridges/WikipediaENBridge.php | 44 ++++++++++++++++ bridges/WikipediaEOBridge.php | 44 ++++++++++++++++ bridges/WikipediaFRBridge.php | 46 +++++++++++++++++ bridges/WordPressBridge.php | 8 +-- bridges/WorldOfTanks.php | 6 +-- bridges/XbooruBridge.php | 14 ++--- bridges/YandereBridge.php | 14 ++--- bridges/YoutubeBridge.php | 14 ++--- bridges/ZatazBridge.php | 4 +- bridges/ZoneTelechargementBridge.php | 2 +- 121 files changed, 1212 insertions(+), 396 deletions(-) create mode 100644 bridges/CoinDeskBridge.php create mode 100644 bridges/FSBridge.php create mode 100644 bridges/FrandroidBridge.php create mode 100644 bridges/LeMotDuJourBridge.php create mode 100644 bridges/MalikiBridge.php create mode 100644 bridges/MemoLinuxBridge.php create mode 100644 bridges/OpenTheoryBridge.php create mode 100644 bridges/RaymondBridge.php create mode 100644 bridges/ScilogsBridge.php create mode 100644 bridges/SegfaultMintBridge.php create mode 100644 bridges/TuxboardBridge.php create mode 100644 bridges/WikipediaDEBridge.php create mode 100644 bridges/WikipediaENBridge.php create mode 100644 bridges/WikipediaEOBridge.php create mode 100644 bridges/WikipediaFRBridge.php diff --git a/bridges/ABCTabsBridge.php b/bridges/ABCTabsBridge.php index 0c9bc68..4fd2487 100644 --- a/bridges/ABCTabsBridge.php +++ b/bridges/ABCTabsBridge.php @@ -15,9 +15,9 @@ class ABCTabsBridge extends BridgeAbstract{ public function collectData(array $param){ $html = ''; - $html = $this->file_get_html('http://www.abc-tabs.com/tablatures/nouveautes.html') or $this->returnClientError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('http://www.abc-tabs.com/tablatures/nouveautes.html') or $this->returnClientError('No results for this query.'); $table = $html->find('table#myTable', 0)->children(1); - + foreach ($table->find('tr') as $tab) { $item = new \Item(); diff --git a/bridges/AcrimedBridge.php b/bridges/AcrimedBridge.php index 85e93fe..fce0ce1 100644 --- a/bridges/AcrimedBridge.php +++ b/bridges/AcrimedBridge.php @@ -29,7 +29,7 @@ class AcrimedBridge extends RssExpander{ $item->title = trim($newsItem->title); $item->timestamp = strtotime($dc->date); - $articlePage = $this->file_get_html($newsItem->link); + $articlePage = $this->getSimpleHTMLDOM($newsItem->link); $article = $hs->sanitize($articlePage->find('article.article1', 0)->innertext); $article = HTMLSanitizer::defaultImageSrcTo($article, "http://www.acrimed.org/"); diff --git a/bridges/AllocineFRBridge.php b/bridges/AllocineFRBridge.php index ef01746..baaf4a1 100644 --- a/bridges/AllocineFRBridge.php +++ b/bridges/AllocineFRBridge.php @@ -9,7 +9,7 @@ class AllocineFRBridge extends BridgeAbstract{ $this->description = "Bridge for allocine.fr"; $this->update = '2016-08-17'; - $this->parameters[] = + $this->parameters[] = '[ { "name" : "category", @@ -18,7 +18,7 @@ class AllocineFRBridge extends BridgeAbstract{ "required" : true, "exampleValue" : "Faux Raccord", "title" : "Select your category", - "values" : + "values" : [ { "name" : "Faux Raccord", @@ -64,12 +64,12 @@ class AllocineFRBridge extends BridgeAbstract{ // Update bridge name to match selection $this->name .= ' : ' . $category; - $html = $this->file_get_html($this->uri) or $this->returnServerError("Could not request {$this->uri}!"); + $html = $this->getSimpleTMLOM($this->uri) or $this->returnServerError("Could not request {$this->uri}!"); foreach($html->find('figure.media-meta-fig') as $element) { $item = new Item(); - + $title = $element->find('div.titlebar h3.title a', 0); $content = trim($element->innertext); $figCaption = strpos($content, $category); diff --git a/bridges/AnimeUltimeBridge.php b/bridges/AnimeUltimeBridge.php index 0f305e3..7eae61c 100644 --- a/bridges/AnimeUltimeBridge.php +++ b/bridges/AnimeUltimeBridge.php @@ -67,7 +67,7 @@ class AnimeUltimeBridge extends BridgeAbstract { //Retrive page contents $website = 'http://www.anime-ultime.net/'; $url = $website.'history-0-1/'.$requestFilter; - $html = $this->file_get_html($url) or $this->returnServerError('Could not request Anime-Ultime: '.$url); + $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request Anime-Ultime: '.$url); //Relases are sorted by day : process each day individually foreach ($html->find('div.history', 0)->find('h3') as $daySection) { @@ -110,7 +110,7 @@ class AnimeUltimeBridge extends BridgeAbstract { $item->content = $item_description; $this->items[] = $item; $processedOK++; - + //Stop processing once limit is reached if ($processedOK >= 10) return; diff --git a/bridges/BandcampBridge.php b/bridges/BandcampBridge.php index 1e6c2dc..71bf36f 100644 --- a/bridges/BandcampBridge.php +++ b/bridges/BandcampBridge.php @@ -26,7 +26,7 @@ class BandcampBridge extends BridgeAbstract{ $html = ''; if (isset($param['tag'])) { $this->request = $param['tag']; - $html = $this->file_get_html('http://bandcamp.com/tag/'.urlencode($this->request).'?sort_field=date') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('http://bandcamp.com/tag/'.urlencode($this->request).'?sort_field=date') or $this->returnServerError('No results for this query.'); } else { $this->returnClientError('You must specify tag (/tag/...)'); diff --git a/bridges/BastaBridge.php b/bridges/BastaBridge.php index 35c9ae4..6af218f 100644 --- a/bridges/BastaBridge.php +++ b/bridges/BastaBridge.php @@ -7,14 +7,14 @@ class BastaBridge extends BridgeAbstract{ $this->description = "Returns the newest articles."; $this->update = '2016-08-17'; } - + public function collectData(array $param){ // Replaces all relative image URLs by absolute URLs. Relative URLs always start with 'local/'! function ReplaceImageUrl($content){ return preg_replace('/src=["\']{1}([^"\']+)/ims', 'src=\'http://www.bastamag.net/$1\'', $content); } - - $html = $this->file_get_html('http://www.bastamag.net/spip.php?page=backend') or $this->returnServerError('Could not request Bastamag.'); + + $html = $this->getSimpleHTMLDOM('http://www.bastamag.net/spip.php?page=backend') or $this->returnServerError('Could not request Bastamag.'); $limit = 0; foreach($html->find('item') as $element) { @@ -34,4 +34,4 @@ class BastaBridge extends BridgeAbstract{ return 3600*2; // 2 hours } } -?> \ No newline at end of file +?> diff --git a/bridges/BlaguesDeMerdeBridge.php b/bridges/BlaguesDeMerdeBridge.php index 7fb5d45..be36c0c 100644 --- a/bridges/BlaguesDeMerdeBridge.php +++ b/bridges/BlaguesDeMerdeBridge.php @@ -12,8 +12,8 @@ class BlaguesDeMerdeBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://www.blaguesdemerde.fr/') or $this->returnServerError('Could not request BDM.'); - + $html = $this->getSimpleHTMLDOM('http://www.blaguesdemerde.fr/') or $this->returnServerError('Could not request BDM.'); + foreach($html->find('article.joke_contener') as $element) { $item = new Item(); $temp = $element->find('a'); diff --git a/bridges/BooruprojectBridge.php b/bridges/BooruprojectBridge.php index 17c1982..b1423db 100644 --- a/bridges/BooruprojectBridge.php +++ b/bridges/BooruprojectBridge.php @@ -32,29 +32,29 @@ class BooruprojectBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0; $tags = ''; - if (!empty($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (!empty($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 20; } - if (!empty($param['t'])) { - $tags = '&tags='.urlencode($param['t']); + if (!empty($param['t'])) { + $tags = '&tags='.urlencode($param['t']); } if (empty($param['i'])) { $this->returnServerError('Please enter a ***.booru.org instance.'); } - $html = $this->file_get_html("http://".$param['i'].".booru.org/index.php?page=post&s=list&pid=".$page.$tags) or $this->returnServerError('Could not request Booruproject.'); + $html = $this->getSimpleHTMLDOM("http://".$param['i'].".booru.org/index.php?page=post&s=list&pid=".$page.$tags) or $this->returnServerError('Could not request Booruproject.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://'.$param['i'].'.booru.org/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->find('a', 0)->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->find('a', 0)->getAttribute('id')); $item->timestamp = time(); $item->tags = $element->find('img', 0)->getAttribute('title'); $item->title = 'Booruproject '.$param['i'].' | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/CADBridge.php b/bridges/CADBridge.php index 17aa807..d62041a 100644 --- a/bridges/CADBridge.php +++ b/bridges/CADBridge.php @@ -9,7 +9,7 @@ class CADBridge extends BridgeAbstract{ } private function CADExtractContent($url) { - $html3 = $this->file_get_html($url); + $html3 = $this->getSimpleHTMLDOM($url); // The request might fail due to missing https support or wrong URL if($html3 == false) @@ -27,7 +27,6 @@ class CADBridge extends BridgeAbstract{ default: return 'Daily comic not released yet'; } - $img = implode ($url2[0]); $html3->clear(); unset ($html3); @@ -45,7 +44,7 @@ class CADBridge extends BridgeAbstract{ return $string; } - $html = $this->file_get_html('http://cdn2.cad-comic.com/rss.xml') or $this->returnServerError('Could not request CAD.'); + $html = $this->getSimpleHTMLDOM('http://cdn2.cad-comic.com/rss.xml') or $this->returnServerError('Could not request CAD.'); $limit = 0; foreach($html->find('item') as $element) { @@ -67,4 +66,4 @@ class CADBridge extends BridgeAbstract{ return 3600*2; // 2 hours } } -?> \ No newline at end of file +?> diff --git a/bridges/CNETBridge.php b/bridges/CNETBridge.php index 6fb4d93..7be650e 100644 --- a/bridges/CNETBridge.php +++ b/bridges/CNETBridge.php @@ -51,7 +51,7 @@ class CNETBridge extends BridgeAbstract { $this->topicName = $param['topic']; $pageUrl = 'http://www.cnet.com/'.(empty($this->topicName) ? '' : 'topics/'.$this->topicName.'/'); - $html = $this->file_get_html($pageUrl) or $this->returnServerError('Could not request CNET: '.$pageUrl); + $html = $this->getSimpleHTMLDOM($pageUrl) or $this->returnServerError('Could not request CNET: '.$pageUrl); $limit = 0; foreach($html->find('div.assetBody') as $element) { @@ -64,7 +64,7 @@ class CNETBridge extends BridgeAbstract { if (!empty($article_title) && !empty($article_uri) && strpos($article_uri, '/news/') !== false) { - $article_html = $this->file_get_html($article_uri) or $this->returnServerError('Could not request CNET: '.$article_uri); + $article_html = $this->getSimpleHTMLDOM($article_uri) or $this->returnServerError('Could not request CNET: '.$article_uri); $article_content = trim(CleanArticle(ExtractFromDelimiters($article_html, '
name = "CoinDesk"; + $this->uri = "http://www.coindesk.com/"; + $this->description = "Returns the 5 newest posts from CoinDesk (full text)"; + $this->update = "2014-05-30"; + + } + + public function collectData(array $param){ + + function CoinDeskStripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function CoinDeskExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.single-content', 0)->innertext; + $text = strip_tags($text, '

'); + return $text; + } + $html = $this->getSimpleHTMLDOM('http://www.coindesk.com/feed/atom/') or $this->returnError('Could not request CoinDesk.', 404); + $limit = 0; + + foreach($html->find('entry') as $element) { + if($limit < 5) { + $item = new \Item(); + $item->title = CoinDeskStripCDATA($element->find('title', 0)->innertext); + $item->author = $element->find('author', 0)->plaintext; + $item->uri = $element->find('link', 0)->href; + $item->timestamp = strtotime($element->find('published', 0)->plaintext); + $item->content = CoinDeskExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'CoinDesk'; + } + + public function getURI(){ + return 'http://www.coindesk.com/'; + } + + public function getCacheDuration(){ + return 1800; // 30min + } +} diff --git a/bridges/CollegeDeFranceBridge.php b/bridges/CollegeDeFranceBridge.php index e809d6b..1f79c62 100644 --- a/bridges/CollegeDeFranceBridge.php +++ b/bridges/CollegeDeFranceBridge.php @@ -33,7 +33,7 @@ class CollegeDeFranceBridge extends BridgeAbstract{ * * */ - $html = $this->file_get_html('http://www.college-de-france.fr/components/search-audiovideo.jsp?fulltext=&siteid=1156951719600&lang=FR&type=all') or $this->returnServerError('Could not request CollegeDeFrance.'); + $html = $this->getSimpleHTMLDOM('http://www.college-de-france.fr/components/search-audiovideo.jsp?fulltext=&siteid=1156951719600&lang=FR&type=all') or $this->returnServerError('Could not request CollegeDeFrance.'); foreach($html->find('a[data-target]') as $element) { $item = new \Item(); $item->title = $element->find('.title', 0)->plaintext; diff --git a/bridges/CommonDreamsBridge.php b/bridges/CommonDreamsBridge.php index 1177866..9de9b6d 100644 --- a/bridges/CommonDreamsBridge.php +++ b/bridges/CommonDreamsBridge.php @@ -10,7 +10,7 @@ class CommonDreamsBridge extends BridgeAbstract{ } private function CommonDreamsExtractContent($url) { - $html3 = $this->file_get_html($url); + $html3 = $this->getSimpleHTMLDOM($url); $text = $html3->find('div[class=field--type-text-with-summary]', 0)->innertext; $html3->clear(); unset ($html3); @@ -25,7 +25,7 @@ class CommonDreamsBridge extends BridgeAbstract{ return $string; } - $html = $this->file_get_html('http://www.commondreams.org/rss.xml') or $this->returnServerError('Could not request CommonDreams.'); + $html = $this->getSimpleHTMLDOM('http://www.commondreams.org/rss.xml') or $this->returnServerError('Could not request CommonDreams.'); $limit = 0; foreach($html->find('item') as $element) { if($limit < 4) { diff --git a/bridges/CopieDoubleBridge.php b/bridges/CopieDoubleBridge.php index cd3b4f6..cae9650 100644 --- a/bridges/CopieDoubleBridge.php +++ b/bridges/CopieDoubleBridge.php @@ -13,9 +13,9 @@ class CopieDoubleBridge extends BridgeAbstract{ public function collectData(array $param){ - $html = $this->file_get_html('http://www.copie-double.com/') or $this->returnServerError('Could not request CopieDouble.'); + $html = $this->getSimpleHTMLDOM('http://www.copie-double.com/') or $this->returnServerError('Could not request CopieDouble.'); $table = $html->find('table table', 2); - + foreach($table->find('tr') as $element) { $td = $element->find('td', 0); @@ -23,7 +23,7 @@ class CopieDoubleBridge extends BridgeAbstract{ if($td->class == "couleur_1") { $item = new Item(); - + $title = $td->innertext; $pos = strpos($title, "find("a", 0); $item->uri = "http://www.copie-double.com" . $a->href; - + $content = str_replace('src="/', 'src="http://www.copie-double.com/',$element->find("td", 0)->innertext); $content = str_replace('href="/', 'href="http://www.copie-double.com/',$content); $item->content = $content; diff --git a/bridges/CourrierInternationalBridge.php b/bridges/CourrierInternationalBridge.php index ab1a2c3..74ae314 100644 --- a/bridges/CourrierInternationalBridge.php +++ b/bridges/CourrierInternationalBridge.php @@ -12,49 +12,49 @@ class CourrierInternationalBridge extends BridgeAbstract{ } public function collectData(array $param){ - + $html = ''; - $html = $this->file_get_html('http://www.courrierinternational.com/') or $this->returnServerError('Error.'); - + $html = $this->getSimpleHTMLDOM('http://www.courrierinternational.com/') or $this->returnServerError('Error.'); + + - $element = $html->find("article"); - $article_count = 1; + $article_count = 1; foreach($element as $article) { - + $item = new \Item(); - + $item->uri = $article->parent->getAttribute("href"); if(strpos($item->uri, "http") === FALSE) { $item->uri = "http://courrierinternational.fr/".$item->uri; } - - $page = $this->file_get_html($item->uri); + + $page = $this->getSimpleHTMLDOM($item->uri); $cleaner = new HTMLSanitizer(); - + $item->content = $cleaner->sanitize($page->find("div.article-text")[0]); $item->title = strip_tags($article->find(".title")[0]); $dateTime = date_parse($page->find("time")[0]); $item->timestamp = mktime( - $dateTime['hour'], - $dateTime['minute'], - $dateTime['second'], - $dateTime['month'], - $dateTime['day'], + $dateTime['hour'], + $dateTime['minute'], + $dateTime['second'], + $dateTime['month'], + $dateTime['day'], $dateTime['year'] ); - + $this->items[] = $item; $article_count ++; if($article_count > 5) break; - + } diff --git a/bridges/CpasbienBridge.php b/bridges/CpasbienBridge.php index 93ae21a..801968c 100644 --- a/bridges/CpasbienBridge.php +++ b/bridges/CpasbienBridge.php @@ -15,7 +15,7 @@ function content_get_html($contents, $maxLen=-1, $lowercase = true, $forceTagsCl } class CpasbienBridge extends HttpCachingBridgeAbstract{ - + private $request; public function loadMetadatas() { @@ -44,7 +44,7 @@ class CpasbienBridge extends HttpCachingBridgeAbstract{ $html = ''; if (isset($param['q'])) { /* keyword search mode */ $this->request = str_replace(" ","-",trim($param['q'])); - $html = $this->file_get_html($this->uri.'/recherche/'.urlencode($this->request).'.html') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($this->uri.'/recherche/'.urlencode($this->request).'.html') or $this->returnServerError('No results for this query.'); } else { $this->returnClientError('You must specify a keyword (?q=...).'); @@ -53,7 +53,7 @@ class CpasbienBridge extends HttpCachingBridgeAbstract{ foreach ($html->find('#gauche',0)->find('div') as $episode) { if ($episode->getAttribute('class')=='ligne0' || $episode->getAttribute('class')=='ligne1') { - + $htmlepisode=content_get_html($this->get_cached($episode->find('a', 0)->getAttribute('href'))); $item = new \Item(); @@ -65,7 +65,7 @@ class CpasbienBridge extends HttpCachingBridgeAbstract{ $item->content = $textefiche->text(); } else { - $item->content = $htmlepisode->find('#textefiche', 0)->find('p',0)->text(); + $item->content = $htmlepisode->find('#textefiche', 0)->find('p',0)->text(); } $item->id = $episode->find('a', 0)->getAttribute('href'); diff --git a/bridges/CryptomeBridge.php b/bridges/CryptomeBridge.php index 68c8876..4947de3 100644 --- a/bridges/CryptomeBridge.php +++ b/bridges/CryptomeBridge.php @@ -28,7 +28,7 @@ class CryptomeBridge extends BridgeAbstract{ // If you want HTTPS access instead, uncomment the following line: //$link = 'https://secure.netsolhost.com/cryptome.org/'; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request Cryptome.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request Cryptome.'); if (!empty($param['n'])) { /* number of documents */ $num = min(max(1, $param['n']+0), $num); } diff --git a/bridges/DailymotionBridge.php b/bridges/DailymotionBridge.php index c782284..fccaec7 100644 --- a/bridges/DailymotionBridge.php +++ b/bridges/DailymotionBridge.php @@ -47,14 +47,14 @@ class DailymotionBridge extends BridgeAbstract{ function getMetadata($id) { $metadata=array(); - $html2 = file_get_html('http://www.dailymotion.com/video/'.$id) or $this->returnServerError('Could not request Dailymotion.'); + $html2 = $this->getSimpleHTMLDOM('http://www.dailymotion.com/video/'.$id) or $this->returnServerError('Could not request Dailymotion.'); $metadata['title'] = $html2->find('meta[property=og:title]', 0)->getAttribute('content'); $metadata['timestamp'] = strtotime($html2->find('meta[property=video:release_date]', 0)->getAttribute('content') ); $metadata['thumbnailUri'] = $html2->find('meta[property=og:image]', 0)->getAttribute('content'); $metadata['uri'] = $html2->find('meta[property=og:url]', 0)->getAttribute('content'); return $metadata; - } + } $html = ''; @@ -63,15 +63,15 @@ class DailymotionBridge extends BridgeAbstract{ if (isset($param['u'])) { // user timeline mode $this->request = $param['u']; - $html = $this->file_get_html('http://www.dailymotion.com/user/'.urlencode($this->request).'/1') or $this->returnServerError('Could not request Dailymotion.'); + $html = $this->getSimpleHTMLDOM('http://www.dailymotion.com/user/'.urlencode($this->request).'/1') or $this->returnServerError('Could not request Dailymotion.'); } else if (isset($param['p'])) { // playlist mode $this->request = strtok($param['p'], '_'); - $html = $this->file_get_html('http://www.dailymotion.com/playlist/'.urlencode($this->request).'') or $this->returnServerError('Could not request Dailymotion.'); + $html = $this->getSimpleHTMLDOM('http://www.dailymotion.com/playlist/'.urlencode($this->request).'') or $this->returnServerError('Could not request Dailymotion.'); } else if (isset($param['s'])) { // search mode - $this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']); - $html = $this->file_get_html('http://www.dailymotion.com/search/'.urlencode($this->request).'/'.$page.'') or $this->returnServerError('Could not request Dailymotion.'); + $this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']); + $html = $this->getSimpleHTMLDOM('http://www.dailymotion.com/search/'.urlencode($this->request).'/'.$page.'') or $this->returnServerError('Could not request Dailymotion.'); } else { $this->returnClientError('You must either specify a Dailymotion username (?u=...) or a playlist id (?p=...) or search (?s=...)'); diff --git a/bridges/DanbooruBridge.php b/bridges/DanbooruBridge.php index e819193..45ad029 100644 --- a/bridges/DanbooruBridge.php +++ b/bridges/DanbooruBridge.php @@ -25,23 +25,23 @@ class DanbooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 1;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://donmai.us/posts?&page=$page&tags=$tags") or $this->returnServerError('Could not request Danbooru.'); + $html = $this->getSimpleHTMLDOM("http://donmai.us/posts?&page=$page&tags=$tags") or $this->returnServerError('Could not request Danbooru.'); foreach($html->find('div[id=posts] article') as $element) { $item = new \Item(); $item->uri = 'http://donmai.us'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('data-id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('data-id')); $item->timestamp = time(); $thumbnailUri = 'http://donmai.us'.$element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Danbooru | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/DansTonChatBridge.php b/bridges/DansTonChatBridge.php index 748190f..41ee8ec 100644 --- a/bridges/DansTonChatBridge.php +++ b/bridges/DansTonChatBridge.php @@ -15,7 +15,7 @@ class DansTonChatBridge extends BridgeAbstract{ $html = ''; $link = 'http://danstonchat.com/latest.html'; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request DansTonChat.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request DansTonChat.'); foreach($html->find('div.item') as $element) { $item = new \Item(); diff --git a/bridges/DauphineLibereBridge.php b/bridges/DauphineLibereBridge.php index a91ade9..95bfe39 100644 --- a/bridges/DauphineLibereBridge.php +++ b/bridges/DauphineLibereBridge.php @@ -82,7 +82,7 @@ class DauphineLibereBridge extends BridgeAbstract { } private function ExtractContent($url, $context) { - $html2 = $this->file_get_html($url, false, $context); + $html2 = $this->getSimpleHTMLDOM($url,false,$context); $text = $html2->find('div.column', 0)->innertext; $text = preg_replace('@]*?>.*?@si', '', $text); return $text; @@ -102,10 +102,10 @@ class DauphineLibereBridge extends BridgeAbstract { if (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; - $html = $this->file_get_html('http://www.ledauphine.com/'.$this->request.'/rss', false, $context) or $this->returnServerError('Could not request DauphineLibere.'); + $html = $this->getSimpleHTMLDOM('http://www.ledauphine.com/'.$this->request.'/rss',false,$context) or $this->returnServerError('Could not request DauphineLibere.'); } else { - $html = $this->file_get_html('http://www.ledauphine.com/rss', false, $context) or $this->returnServerError('Could not request DauphineLibere.'); + $html = $this->getSimpleHTMLDOM('http://www.ledauphine.com/rss',false,$context) or $this->returnServerError('Could not request DauphineLibere.'); } $limit = 0; @@ -126,4 +126,4 @@ class DauphineLibereBridge extends BridgeAbstract { return 3600*2; // 2 hours } } -?> \ No newline at end of file +?> diff --git a/bridges/DeveloppezDotComBridge.php b/bridges/DeveloppezDotComBridge.php index 5fd7866..bde3f9a 100644 --- a/bridges/DeveloppezDotComBridge.php +++ b/bridges/DeveloppezDotComBridge.php @@ -15,7 +15,7 @@ class DeveloppezDotComBridge extends BridgeAbstract{ return $string; } - // F***ing quotes from Microsoft Word badly encoded, here was the trick: + // F***ing quotes from Microsoft Word badly encoded, here was the trick: // http://stackoverflow.com/questions/1262038/how-to-replace-microsoft-encoded-quotes-in-php private function convert_smart_quotes($string) { @@ -35,14 +35,14 @@ class DeveloppezDotComBridge extends BridgeAbstract{ } private function DeveloppezDotComExtractContent($url) { - $articleHTMLContent = $this->file_get_html($url); + $articleHTMLContent = $this->getSimpleHTMLDOM($url); $text = $this->convert_smart_quotes($articleHTMLContent->find('div.content', 0)->innertext); $text = utf8_encode($text); return trim($text); } public function collectData(array $param){ - $rssFeed = $this->file_get_html('http://www.developpez.com/index/rss') or $this->returnServerError('Could not request http://www.developpez.com/index/rss'); + $rssFeed = $this->getSimpleHTMLDOM('http://www.developpez.com/index/rss') or $this->returnServerError('Could not request http://www.developpez.com/index/rss'); $limit = 0; foreach($rssFeed->find('item') as $element) { diff --git a/bridges/DilbertBridge.php b/bridges/DilbertBridge.php index d94ce9a..7e50e8d 100644 --- a/bridges/DilbertBridge.php +++ b/bridges/DilbertBridge.php @@ -13,7 +13,7 @@ class DilbertBridge extends BridgeAbstract { public function collectData(array $param) { - $html = $this->file_get_html($this->getURI()) or $this->returnServerError('Could not request Dilbert: '.$this->getURI()); + $html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Could not request Dilbert: '.$this->getURI()); foreach ($html->find('section.comic-item') as $element) { diff --git a/bridges/DollbooruBridge.php b/bridges/DollbooruBridge.php index 76d8a40..7205585 100644 --- a/bridges/DollbooruBridge.php +++ b/bridges/DollbooruBridge.php @@ -26,25 +26,25 @@ class DollbooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://dollbooru.org/post/list/$tags/$page") or $this->returnServerError('Could not request Dollbooru.'); + $html = $this->getSimpleHTMLDOM("http://dollbooru.org/post/list/$tags/$page") or $this->returnServerError('Could not request Dollbooru.'); foreach($html->find('div[class=shm-image-list] a') as $element) { $item = new \Item(); $item->uri = 'http://dollbooru.org'.$element->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('data-post-id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('data-post-id')); $item->timestamp = time(); $thumbnailUri = 'http://dollbooru.org'.$element->find('img', 0)->src; $item->tags = $element->getAttribute('data-tags'); $item->title = 'Dollbooru | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/DuckDuckGoBridge.php b/bridges/DuckDuckGoBridge.php index f09b869..5398643 100644 --- a/bridges/DuckDuckGoBridge.php +++ b/bridges/DuckDuckGoBridge.php @@ -22,7 +22,7 @@ class DuckDuckGoBridge extends BridgeAbstract{ $html = ''; $link = 'http://duckduckgo.com/html/?q='.$param[u].'+sort:date'; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request DuckDuckGo.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request DuckDuckGo.'); foreach($html->find('div.results_links') as $element) { $item = new \Item(); diff --git a/bridges/EZTVBridge.php b/bridges/EZTVBridge.php index 07a3469..551c8d9 100644 --- a/bridges/EZTVBridge.php +++ b/bridges/EZTVBridge.php @@ -40,11 +40,11 @@ class EZTVBridge extends BridgeAbstract{ $this->returnClientError('You must provide a list of ID (?i=showID1,showID2,...)'); // Loop on show ids - $showList = explode(",",$param['i']); + $showList = explode(",",$param['i']); foreach($showList as $showID){ // Get show page - $html = $this->file_get_html('https://eztv.ch/shows/'.rawurlencode($showID).'/') or $this->returnServerError('Could not request EZTV for id "'.$showID.'"'); + $html = $this->getSimpleHTMLDOM('https://eztv.ch/shows/'.rawurlencode($showID).'/') or $this->returnServerError('Could not request EZTV for id "'.$showID.'"'); // Loop on each element that look like an episode entry... foreach($html->find('.forum_header_border') as $element) { diff --git a/bridges/EliteDangerousGalnetBridge.php b/bridges/EliteDangerousGalnetBridge.php index e519fc5..d1b4995 100644 --- a/bridges/EliteDangerousGalnetBridge.php +++ b/bridges/EliteDangerousGalnetBridge.php @@ -13,7 +13,7 @@ class EliteDangerousGalnetBridge extends BridgeAbstract public function collectData(array $param) { - $html = $this->file_get_html('https://community.elitedangerous.com/galnet') or $this->returnServerError('Error while downloading the website content'); + $html = $this->getSimpleHTMLDOM('https://community.elitedangerous.com/galnet') or $this->returnServerError('Error while downloading the website content'); foreach($html->find('div.article') as $element) { $item = new Item(); @@ -40,4 +40,4 @@ class EliteDangerousGalnetBridge extends BridgeAbstract { return 3600 * 2; // 2 hours } -} \ No newline at end of file +} diff --git a/bridges/ElsevierBridge.php b/bridges/ElsevierBridge.php index 31ccecc..ebee2a2 100644 --- a/bridges/ElsevierBridge.php +++ b/bridges/ElsevierBridge.php @@ -33,7 +33,7 @@ class ElsevierBridge extends BridgeAbstract{ $time = $article->find('.article-info', 0); if($time){ $timestring = trim($time->plaintext); - /* + /* The format depends on the age of an article: - Available online 29 July 2016 - July 2016 @@ -63,7 +63,7 @@ class ElsevierBridge extends BridgeAbstract{ public function collectData(array $param){ $uri = 'http://www.journals.elsevier.com/' . $param['j'] . '/recent-articles/'; - $html = file_get_html($uri) or $this->returnServerError('No results for Elsevier journal '.$param['j']); + $html = $this->getSimpleHTMLDOM($uri) or $this->returnServerError('No results for Elsevier journal '.$param['j']); foreach($html->find('.pod-listing') as $article){ $item = new \Item(); @@ -80,4 +80,4 @@ class ElsevierBridge extends BridgeAbstract{ return 43200; // 12h } } -?> \ No newline at end of file +?> diff --git a/bridges/FSBridge.php b/bridges/FSBridge.php new file mode 100644 index 0000000..cd212c3 --- /dev/null +++ b/bridges/FSBridge.php @@ -0,0 +1,56 @@ +maintainer = "qwertygc"; + $this->name = "Futurasciences"; + $this->uri = "http://www.futura-sciences.com"; + $this->description = "Returns the 5 newest posts from FS (full text)"; + $this->update = "03/11/2015"; + + } + + public function collectData(array $param){ + + function FS_StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function FS_ExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.fiche-actualite', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + return $text; + } + $html = $this->getSimpleHTMLDOM('http://www.futura-sciences.com/rss/actualites.xml') or $this->returnError('Could not request Futura Sciences.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 5) { + $item = new \Item(); + $item->title = FS_StripCDATA($element->find('title', 0)->innertext); + $item->uri = FS_StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = FS_ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'Futura Sciences'; + } + + public function getURI(){ + return 'http://www.futura-sciences.com/'; + } + + public function getCacheDuration(){ + return 3600; // 1 hour + // return 0; // 1 hour + } +} diff --git a/bridges/FacebookBridge.php b/bridges/FacebookBridge.php index 37eae8d..a61aaba 100644 --- a/bridges/FacebookBridge.php +++ b/bridges/FacebookBridge.php @@ -109,9 +109,9 @@ class FacebookBridge extends BridgeAbstract{ if (is_null($html)) { if (isset($param['u'])) { if (!strpos($param['u'], "/")) { - $html = $this->file_get_html('https://www.facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('https://www.facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnServerError('No results for this query.'); } else { - $html = $this->file_get_html('https://www.facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('https://www.facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnServerError('No results for this query.'); } } else { $this->returnClientError('You must specify a Facebook username.'); @@ -155,7 +155,7 @@ class FacebookBridge extends BridgeAbstract{ $this->name = $author; foreach($element->children() as $post) { - + $item = new \Item(); if (count($post->find('abbr')) > 0) { diff --git a/bridges/FierPandaBridge.php b/bridges/FierPandaBridge.php index 9b610fa..22fd142 100644 --- a/bridges/FierPandaBridge.php +++ b/bridges/FierPandaBridge.php @@ -14,7 +14,7 @@ Class FierPandaBridge extends BridgeAbstract{ public function collectData(array $param){ $link = 'http://www.fier-panda.fr/'; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request Fier Panda.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request Fier Panda.'); foreach($html->find('div.container-content article') as $element) { $item = new \Item(); diff --git a/bridges/FlickrExploreBridge.php b/bridges/FlickrExploreBridge.php index 37b619a..f4443f7 100644 --- a/bridges/FlickrExploreBridge.php +++ b/bridges/FlickrExploreBridge.php @@ -12,7 +12,7 @@ class FlickrExploreBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('https://www.flickr.com/explore') or $this->returnServerError('Could not request Flickr.'); + $html = $this->getSimpleHTMLDOM('https://www.flickr.com/explore') or $this->returnServerError('Could not request Flickr.'); foreach($html->find('.photo-list-photo-view') as $element) { // Get the styles diff --git a/bridges/FlickrTagBridge.php b/bridges/FlickrTagBridge.php index 957efbb..6797d62 100644 --- a/bridges/FlickrTagBridge.php +++ b/bridges/FlickrTagBridge.php @@ -27,16 +27,16 @@ class FlickrTagBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://www.flickr.com/search/?q=vendee&s=rec') or $this->returnServerError('Could not request Flickr.'); + $html = $this->getSimpleHTMLDOM('http://www.flickr.com/search/?q=vendee&s=rec') or $this->returnServerError('Could not request Flickr.'); if (isset($param['q'])) { /* keyword search mode */ $this->request = $param['q']; - $html = $this->file_get_html('http://www.flickr.com/search/?q='.urlencode($this->request).'&s=rec') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('http://www.flickr.com/search/?q='.urlencode($this->request).'&s=rec') or $this->returnServerError('No results for this query.'); } elseif (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; - $html = $this->file_get_html('http://www.flickr.com/photos/'.urlencode($this->request).'/') or $this->returnServerError('Requested username can\'t be found.'); + $html = $this->getSimpleHTMLDOM('http://www.flickr.com/photos/'.urlencode($this->request).'/') or $this->returnServerError('Requested username can\'t be found.'); } - + else { $this->returnClientError('You must specify a keyword or a Flickr username.'); } diff --git a/bridges/FootitoBridge.php b/bridges/FootitoBridge.php index e4030a5..35f382c 100644 --- a/bridges/FootitoBridge.php +++ b/bridges/FootitoBridge.php @@ -12,11 +12,11 @@ class FootitoBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://www.footito.fr/') or $this->returnServerError('Could not request Footito.'); - + $html = $this->getSimpleHTMLDOM('http://www.footito.fr/') or $this->returnServerError('Could not request Footito.'); + foreach($html->find('div.post') as $element) { $item = new Item(); - + $content = trim($element->innertext); $content = str_replace("content = $content; - + $title = $element->find('.contenu .texte ', 0)->plaintext; $item->title = $title; - + $info = $element->find('div.infos', 0); - + $item->timestamp = strtotime($info->find('time', 0)->datetime); $item->author = $info->find('a.auteur', 0)->plaintext; - + $this->items[] = $item; } } diff --git a/bridges/FourchanBridge.php b/bridges/FourchanBridge.php index 6c58761..b0daed2 100644 --- a/bridges/FourchanBridge.php +++ b/bridges/FourchanBridge.php @@ -32,7 +32,7 @@ class FourchanBridge extends BridgeAbstract{ $this->returnClientError('You must specify the thread URL.'); $url = 'https://boards.4chan.org'.$thread['path'].''; - $html = $this->file_get_html($url) or $this->returnServerError("Could not request 4chan, thread not found"); + $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError("Could not request 4chan, thread not found"); foreach($html->find('div.postContainer') as $element) { $item = new \Item(); @@ -59,7 +59,7 @@ class FourchanBridge extends BridgeAbstract{ } $this->items = array_reverse($this->items); } - + public function getCacheDuration(){ return 300; // 5min } diff --git a/bridges/FrandroidBridge.php b/bridges/FrandroidBridge.php new file mode 100644 index 0000000..82bd62f --- /dev/null +++ b/bridges/FrandroidBridge.php @@ -0,0 +1,69 @@ +maintainer = "Daiyousei"; + $this->name = "Frandroid"; + $this->uri = "http://www.frandroid.com/"; + $this->description = "Returns the RSS feed from Frandroid (full text articles)"; + $this->update = "2015-03-05"; + + } + + public function collectData(array $param) + { + + function FrandroidStripCDATA($string) + { + $string = str_replace('', '', $string); + return $string; + } + function FrandroidExtractContent($url) + { + $html2 = $this->getSimpleHTMLDOM($url); + $html3 = $html2->find('div.post-content', 0); + $html3->find('div.no-sidebar-ad-top', 0)->outertext = ''; + $ret = $html3->find('div.shortcode-container'); + foreach ($ret as $value) { + $value->outertext = ''; + } + + $html3->find('div#hrr-link', 0)->outertext = ''; + $text = $html3->innertext; + $text = strip_tags($text, '

      '); + return $text; + } + $html = $this->getSimpleHTMLDOM('http://feeds.feedburner.com/Frandroid?format=xml') or $this->returnError('Could not request Frandroid.', 404); + $limit = 0; + + foreach ($html->find('item') as $element) { + if ($limit < 5) { + $item = new \Item(); + $item->title = FrandroidStripCDATA($element->find('title', 0)->innertext); + $item->uri = FrandroidStripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = FrandroidExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName() + { + return 'Frandroid'; + } + + public function getURI() + { + return 'http://www.frandroid.com/'; + } + + public function getCacheDuration() + { + return 300; // 5min + } +} diff --git a/bridges/FuturaSciencesBridge.php b/bridges/FuturaSciencesBridge.php index 24b5f93..5150562 100644 --- a/bridges/FuturaSciencesBridge.php +++ b/bridges/FuturaSciencesBridge.php @@ -172,13 +172,13 @@ class FuturaSciencesBridge extends BridgeAbstract { $this->returnClientError('Invalid "feed" parameter.'.$url); $url = $this->getURI().'rss/'.$param['feed'].'.xml'; - $html = $this->file_get_html($url) or $this->returnServerError('Could not request Futura-Sciences: '.$url); + $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request Futura-Sciences: '.$url); $limit = 0; foreach($html->find('item') as $element) { if ($limit < 10) { $article_url = str_replace('#xtor=RSS-8', '', StripCDATA($element->find('guid', 0)->plaintext)); - $article = $this->file_get_html($article_url) or $this->returnServerError('Could not request Futura-Sciences: '.$article_url); + $article = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request Futura-Sciences: '.$article_url); $contents = $article->find('div.content', 0)->innertext; foreach (array( diff --git a/bridges/GBAtempBridge.php b/bridges/GBAtempBridge.php index cc59203..20cd0cc 100644 --- a/bridges/GBAtempBridge.php +++ b/bridges/GBAtempBridge.php @@ -74,7 +74,7 @@ class GBAtempBridge extends BridgeAbstract { } private function fetch_post_content($uri, $site_url) { - $html = $this->file_get_html($uri) or $this->returnServerError('Could not request GBAtemp: '.$uri); + $html = $this->getSimpleHTMLDOM($uri) or $this->returnServerError('Could not request GBAtemp: '.$uri); $content = $html->find('div.messageContent', 0)->innertext; return $this->cleanup_post_content($content, $site_url); } @@ -91,7 +91,7 @@ class GBAtempBridge extends BridgeAbstract { } else $this->returnClientError('The provided type filter is invalid. Expecting N, R, T, or F.'); } else $this->returnClientError('Please provide a type filter. Expecting N, R, T, or F.'); - $html = $this->file_get_html($this->uri) or $this->returnServerError('Could not request GBAtemp.'); + $html = $this->getSimpleHTMLDOM($this->uri) or $this->returnServerError('Could not request GBAtemp.'); if ($typeFilter == 'N') { foreach ($html->find('li[class=news_item full]') as $newsItem) { @@ -106,7 +106,7 @@ class GBAtempBridge extends BridgeAbstract { foreach ($html->find('li.portal_review') as $reviewItem) { $url = $this->uri.$reviewItem->find('a', 0)->href; $title = $reviewItem->find('span.review_title', 0)->plaintext; - $content = $this->file_get_html($url) or $this->returnServerError('Could not request GBAtemp: '.$uri); + $content = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request GBAtemp: '.$uri); $author = $content->find('a.username', 0)->plaintext; $time = intval($this->ExtractFromDelimiters($content->find('abbr.DateTime', 0)->outertext, 'data-time="', '"')); $intro = '

      '.($content->find('div#review_intro', 0)->plaintext).'

      '; diff --git a/bridges/GelbooruBridge.php b/bridges/GelbooruBridge.php index 2d77512..cc2faa0 100644 --- a/bridges/GelbooruBridge.php +++ b/bridges/GelbooruBridge.php @@ -26,27 +26,27 @@ class GelbooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 63; } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://gelbooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Gelbooru.'); + $html = $this->getSimpleHTMLDOM("http://gelbooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Gelbooru.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://gelbooru.com/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Gelbooru | '.$item->postid; $item->content = '

      Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/GiphyBridge.php b/bridges/GiphyBridge.php index 5605fd1..497c1af 100644 --- a/bridges/GiphyBridge.php +++ b/bridges/GiphyBridge.php @@ -30,10 +30,10 @@ class GiphyBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = ''; + $html = ''; $base_url = 'http://giphy.com'; if (isset($param['s'])) { /* keyword search mode */ - $html = $this->file_get_html($base_url.'/search/'.urlencode($param['s'].'/')) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($base_url.'/search/'.urlencode($param['s'].'/')) or $this->returnServerError('No results for this query.'); } else { $this->returnClientError('You must specify a search worf (?s=...).'); @@ -43,22 +43,22 @@ class GiphyBridge extends BridgeAbstract{ if (isset($param['n'])) { $max = (integer) $param['n']; } - + $limit = 0; $kw = urlencode($param['s']); foreach($html->find('div.hoverable-gif') as $entry) { if($limit < $max) { - $node = $entry->first_child(); - $href = $node->getAttribute('href'); - - $html2 = $this->file_get_html($base_url . $href) or $this->returnServerError('No results for this query.'); + $node = $entry->first_child(); + $href = $node->getAttribute('href'); + + $html2 = $this->getSimpleHTMLDOM($base_url . $href) or $this->returnServerError('No results for this query.'); $figure = $html2->getElementByTagName('figure'); $img = $figure->firstChild(); $caption = $figure->lastChild(); - + $item = new \Item(); $item->id = $img->getAttribute('data-gif_id'); - $item->uri = $img->getAttribute('data-bitly_gif_url'); + $item->uri = $img->getAttribute('data-bitly_gif_url'); $item->username = 'Giphy - '.ucfirst($kw); $title = $caption->innertext(); $title = preg_replace('/\s+/', ' ',$title); @@ -74,8 +74,8 @@ class GiphyBridge extends BridgeAbstract{ '' .'' .''; - - $this->items[] = $item; + + $this->items[] = $item; $limit++; } } diff --git a/bridges/GithubIssueBridge.php b/bridges/GithubIssueBridge.php index febbf34..6d22823 100644 --- a/bridges/GithubIssueBridge.php +++ b/bridges/GithubIssueBridge.php @@ -34,7 +34,7 @@ class GithubIssueBridge extends BridgeAbstract{ public function collectData(array $param){ $uri = 'https://github.com/'.$param['u'].'/'.$param['p'].'/issues/'.$param['i']; - $html = file_get_html($uri) + $html = $this->getSimpleHTMLDOM($uri) or $this->returnServerError('No results for Github Issue '.$param['i'].' in project '.$param['u'].'/'.$param['p']); foreach($html->find('.js-comment-container') as $comment){ diff --git a/bridges/GitlabCommitsBridge.php b/bridges/GitlabCommitsBridge.php index 85c6332..16862d2 100644 --- a/bridges/GitlabCommitsBridge.php +++ b/bridges/GitlabCommitsBridge.php @@ -44,7 +44,7 @@ class GitlabCommitsBridge extends BridgeAbstract{ $uri.='master'; } - $html = file_get_html($uri) + $html = $this->getSimpleHTMLDOM($uri) or $this->returnServerError('No results for Gitlab Commits of project '.$param['uri'].'/'.$param['u'].'/'.$param['p']); diff --git a/bridges/GizmodoFRBridge.php b/bridges/GizmodoFRBridge.php index ce0d44e..0d900f9 100644 --- a/bridges/GizmodoFRBridge.php +++ b/bridges/GizmodoFRBridge.php @@ -14,7 +14,7 @@ class GizmodoFRBridge extends BridgeAbstract{ public function collectData(array $param){ function GizmodoFRExtractContent($url) { - $articleHTMLContent = $this->file_get_html($url); + $articleHTMLContent = $this->getSimpleHTMLDOM($url); $text = $articleHTMLContent->find('div.entry-thumbnail', 0)->innertext; $text = $text.$articleHTMLContent->find('div.entry-excerpt', 0)->innertext; $text = $text.$articleHTMLContent->find('div.entry-content', 0)->innertext; @@ -26,7 +26,7 @@ class GizmodoFRBridge extends BridgeAbstract{ return $text; } - $rssFeed = $this->file_get_html('http://www.gizmodo.fr/feed') or $this->returnServerError('Could not request http://www.gizmodo.fr/feed'); + $rssFeed = $this->getSimpleHTMLDOM('http://www.gizmodo.fr/feed') or $this->returnServerError('Could not request http://www.gizmodo.fr/feed'); $limit = 0; foreach($rssFeed->find('item') as $element) { diff --git a/bridges/GooglePlusPostBridge.php b/bridges/GooglePlusPostBridge.php index 104a13e..9f1ffdb 100644 --- a/bridges/GooglePlusPostBridge.php +++ b/bridges/GooglePlusPostBridge.php @@ -33,8 +33,8 @@ class GooglePlusPostBridge extends BridgeAbstract $this->request = $param['username']; // get content parsed -// $html = $this->file_get_html(__DIR__ . '/../posts2.html' - $html = $this->file_get_html(self::GOOGLE_PLUS_BASE_URL . urlencode($this->request) . '/posts' +// $html = $this->getSimpleHTMLDOM(__DIR__ . '/../posts2.html' + $html = $this->getSimpleHTMLDOM(self::GOOGLE_PLUS_BASE_URL . urlencode($this->request) . '/posts' // force language , false, stream_context_create(array('http'=> array( 'header' => 'Accept-Language: fr,fr-be,fr-fr;q=0.8,en;q=0.4,en-us;q=0.2;*' . "\r\n" diff --git a/bridges/GoogleSearchBridge.php b/bridges/GoogleSearchBridge.php index 0c68a29..e7db5a0 100644 --- a/bridges/GoogleSearchBridge.php +++ b/bridges/GoogleSearchBridge.php @@ -35,7 +35,7 @@ class GoogleSearchBridge extends BridgeAbstract{ if (isset($param['q'])) { /* keyword search mode */ $this->request = $param['q']; - $html = $this->file_get_html('https://www.google.com/search?q=' . urlencode($this->request) . '&num=100&complete=0&tbs=qdr:y,sbd:1') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('https://www.google.com/search?q=' . urlencode($this->request) . '&num=100&complete=0&tbs=qdr:y,sbd:1') or $this->returnServerError('No results for this query.'); } else{ $this->returnClientError('You must specify a keyword (?q=...).'); @@ -45,7 +45,7 @@ class GoogleSearchBridge extends BridgeAbstract{ if( !is_null($emIsRes) ){ foreach($emIsRes->find('li[class=g]') as $element) { $item = new Item(); - + // Extract direct URL from google href (eg. /url?q=...) $t = $element->find('a[href]',0)->href; $item->uri = ''.$t; diff --git a/bridges/GuruMedBridge.php b/bridges/GuruMedBridge.php index 51c3e8d..f6585ae 100644 --- a/bridges/GuruMedBridge.php +++ b/bridges/GuruMedBridge.php @@ -16,7 +16,7 @@ class GuruMedBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://gurumed.org/feed') or $this->returnServerError('Could not request Gurumed.'); + $html = $this->getSimpleHTMLDOM('http://gurumed.org/feed') or $this->returnServerError('Could not request Gurumed.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/HDWallpapersBridge.php b/bridges/HDWallpapersBridge.php index c6cc6b8..dc4a3bf 100644 --- a/bridges/HDWallpapersBridge.php +++ b/bridges/HDWallpapersBridge.php @@ -48,7 +48,7 @@ class HDWallpapersBridge extends BridgeAbstract { for ($page = 1; $page <= $lastpage; $page++) { $link = $baseUri.'/'.$category.'/page/'.$page; - $html = $this->file_get_html($link) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('No results for this query.'); if ($page === 1) { preg_match('/page\/(\d+)$/', $html->find('.pagination a', -2)->href, $matches); diff --git a/bridges/HentaiHavenBridge.php b/bridges/HentaiHavenBridge.php index 5f41a1b..25934a0 100644 --- a/bridges/HentaiHavenBridge.php +++ b/bridges/HentaiHavenBridge.php @@ -12,7 +12,7 @@ class HentaiHavenBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://hentaihaven.org/') or $this->returnServerError('Could not request Hentai Haven.'); + $html = $this->getSimpleHTMLDOM('http://hentaihaven.org/') or $this->returnServerError('Could not request Hentai Haven.'); foreach($html->find('div.zoe-grid') as $element) { $item = new \Item(); $item->uri = $element->find('div.brick-content h3 a', 0)->href; diff --git a/bridges/IdenticaBridge.php b/bridges/IdenticaBridge.php index a2c7d90..23f54a9 100644 --- a/bridges/IdenticaBridge.php +++ b/bridges/IdenticaBridge.php @@ -25,7 +25,7 @@ class IdenticaBridge extends BridgeAbstract{ $html = ''; if (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; - $html = $this->file_get_html('https://identi.ca/'.urlencode($this->request)) or $this->returnServerError('Requested username can\'t be found.'); + $html = $this->getSimpleHTMLDOM('https://identi.ca/'.urlencode($this->request)) or $this->returnServerError('Requested username can\'t be found.'); } else { $this->returnClientError('You must specify an Identica username (?u=...).'); diff --git a/bridges/InstagramBridge.php b/bridges/InstagramBridge.php index e8f53a6..0e4359f 100644 --- a/bridges/InstagramBridge.php +++ b/bridges/InstagramBridge.php @@ -25,35 +25,35 @@ class InstagramBridge extends BridgeAbstract{ $html = ''; if (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; - $html = $this->file_get_html('http://instagram.com/'.urlencode($this->request)) or $this->returnServerError('Could not request Instagram.'); + $html = $this->getSimpleHTMLDOM('http://instagram.com/'.urlencode($this->request)) or $this->returnServerError('Could not request Instagram.'); } else { $this->returnClientError('You must specify a Instagram username (?u=...).'); } - + $innertext = null; - + foreach($html->find('script') as $script) { if ('' === $script->innertext) { continue; } - + $pos = strpos(trim($script->innertext), 'window._sharedData'); if (0 !== $pos) { continue; } - + $innertext = $script->innertext; break; } $json = trim(substr($innertext, $pos+18), ' =;'); $data = json_decode($json); - - - + + + $userMedia = $data->entry_data->ProfilePage[0]->user->media->nodes; foreach($userMedia as $media) @@ -70,7 +70,7 @@ class InstagramBridge extends BridgeAbstract{ } $item->timestamp = $media->date; $this->items[] = $item; - + } } diff --git a/bridges/JapanExpoBridge.php b/bridges/JapanExpoBridge.php index 0b26211..5b9d103 100644 --- a/bridges/JapanExpoBridge.php +++ b/bridges/JapanExpoBridge.php @@ -59,7 +59,7 @@ class JapanExpoBridge extends BridgeAbstract{ }; $link = 'http://www.japan-expo-paris.com/fr/actualites'; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request JapanExpo: '.$link); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request JapanExpo: '.$link); $fullcontent = (!empty($param['mode']) && $param['mode'] == 'full'); $count = 0; @@ -73,7 +73,7 @@ class JapanExpoBridge extends BridgeAbstract{ if ($fullcontent) { if ($count < 5) { - $article_html = $this->file_get_html($url) or $this->returnServerError('Could not request JapanExpo: '.$url); + $article_html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request JapanExpo: '.$url); $header = $article_html->find('header.pageHeadBox', 0); $timestamp = strtotime($header->find('time', 0)->datetime); $title_html = $header->find('div.section', 0)->next_sibling(); diff --git a/bridges/KonachanBridge.php b/bridges/KonachanBridge.php index 064a1fd..9bb08a5 100644 --- a/bridges/KonachanBridge.php +++ b/bridges/KonachanBridge.php @@ -25,18 +25,18 @@ class KonachanBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 1;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://konachan.com/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Konachan.'); + $html = $this->getSimpleHTMLDOM("http://konachan.com/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Konachan.'); $input_json = explode('Post.register(', $html); foreach($input_json as $element) $data[] = preg_replace('/}\)(.*)/', '}', $element); unset($data[0]); - + foreach($data as $datai) { $json = json_decode($datai, TRUE); $item = new \Item(); @@ -45,7 +45,7 @@ class KonachanBridge extends BridgeAbstract{ $item->timestamp = $json['created_at']; $item->imageUri = $json['file_url']; $item->title = 'Konachan | '.$json['id']; - $item->content = '
      Tags: '.$json['tags']; + $item->content = '
      Tags: '.$json['tags']; $this->items[] = $item; } } diff --git a/bridges/KoreusBridge.php b/bridges/KoreusBridge.php index f532d05..4c516c6 100644 --- a/bridges/KoreusBridge.php +++ b/bridges/KoreusBridge.php @@ -16,14 +16,14 @@ class KoreusBridge extends BridgeAbstract{ } private function KoreusExtractContent($url) { - $html2 = $this->file_get_html($url); + $html2 = $this->getSimpeHTMLDOM($url); $text = $html2->find('p[class=itemText]', 0)->innertext; $text = utf8_encode(preg_replace('/(Sur le m.+?)+$/i','',$text)); return $text; } public function collectData(array $param){ - $html = $this->file_get_html('http://feeds.feedburner.com/Koreus-articles') or $this->returnServerError('Could not request Koreus.'); + $html = $this->getSimpleHTMLDOM('http://feeds.feedburner.com/Koreus-articles') or $this->returnServerError('Could not request Koreus.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/LeBonCoinBridge.php b/bridges/LeBonCoinBridge.php index 6072030..2fb6b4c 100755 --- a/bridges/LeBonCoinBridge.php +++ b/bridges/LeBonCoinBridge.php @@ -134,10 +134,10 @@ class LeBonCoinBridge extends BridgeAbstract{ "type" : "list", "values" : [ { "name" : "---- Select ----", "value" : "" }, - + { "name" : "", "value" : "" }, { "name" : "EMPLOI", "value" : "_emploi_" }, - + { "name" : "", "value" : "" }, { "name" : "VEHICULES", "value" : "_vehicules_" }, { "name" : "    Voitures", "value" : "voitures" }, @@ -149,14 +149,14 @@ class LeBonCoinBridge extends BridgeAbstract{ { "name" : "    Equipement Caravaning", "value" : "equipement_caravaning" }, { "name" : "    Nautisme", "value" : "nautisme" }, { "name" : "    Equipement Nautisme", "value" : "equipement_nautisme" }, - + { "name" : "", "value" : "" }, { "name" : "IMMOBILIER", "value" : "_immobilier_" }, { "name" : "    Ventes immobilieres", "value" : "ventes_immobilieres" }, { "name" : "    Locations", "value" : "locations" }, { "name" : "    Colocations", "value" : "colocations" }, { "name" : "    Bureaux & Commerces", "value" : "bureaux_commerces" }, - + { "name" : "", "value" : "" }, { "name" : "VACANCES", "value" : "_vacances_" }, { "name" : "    Locations gites", "value" : "locations_gites" }, @@ -164,14 +164,14 @@ class LeBonCoinBridge extends BridgeAbstract{ { "name" : "    Campings", "value" : "campings" }, { "name" : "    Hôtels", "value" : "hotels" }, { "name" : "    Hébergements insolites", "value" : "hebergements_insolites" }, - + { "name" : "", "value" : "" }, { "name" : "MULTIMEDIA", "value" : "_multimedia_" }, { "name" : "    Informatique", "value" : "informatique" }, { "name" : "    Consoles & Jeux vidéo", "value" : "consoles_jeux_video" }, { "name" : "    Image & Son", "value" : "image_son" }, { "name" : "    Téléphonie", "value" : "telephonie" }, - + { "name" : "", "value" : "" }, { "name" : "LOISIRS", "value" : "_loisirs_" }, { "name" : "    DVD / Films", "value" : "dvd_films" }, @@ -184,7 +184,7 @@ class LeBonCoinBridge extends BridgeAbstract{ { "name" : "    Collection", "value" : "collection" }, { "name" : "    Jeux & Jouets", "value" : "jeux_jouets" }, { "name" : "    Vins & Gastronomie", "value" : "vins_gastronomie" }, - + { "name" : "", "value" : "" }, { "name" : "MATERIEL PROFESSIONNEL", "value" : "_materiel_professionnel_" }, { "name" : "    Materiel Agricole", "value" : "materiel_agricole" }, @@ -196,7 +196,7 @@ class LeBonCoinBridge extends BridgeAbstract{ { "name" : "    Fournitures de Bureau", "value" : "fournitures_de_bureau" }, { "name" : "    Commerces & Marchés", "value" : "commerces_marches" }, { "name" : "    Matériel médical", "value" : "materiel_medical" }, - + { "name" : "", "value" : "" }, { "name" : "SERVICES", "value" : "_services_" }, { "name" : "    Prestations de services", "value" : "prestations_de_services" }, @@ -204,7 +204,7 @@ class LeBonCoinBridge extends BridgeAbstract{ { "name" : "    Evénements", "value" : "evenements" }, { "name" : "    Cours particuliers", "value" : "cours_particuliers" }, { "name" : "    Covoiturage", "value" : "covoiturage" }, - + { "name" : "", "value" : "" }, { "name" : "MAISON", "value" : "_maison_" }, { "name" : "    Ameublement", "value" : "ameublement" }, @@ -220,7 +220,7 @@ class LeBonCoinBridge extends BridgeAbstract{ { "name" : "    Montres & Bijoux", "value" : "montres_bijoux" }, { "name" : "    Equipement bébé", "value" : "equipement_bebe" }, { "name" : "    Vêtements bébé", "value" : "vetements_bebe" }, - + { "name" : "", "value" : "" }, { "name" : "AUTRES", "value" : "autres" } ] @@ -239,7 +239,7 @@ class LeBonCoinBridge extends BridgeAbstract{ else { $link = 'http://www.leboncoin.fr/' . $param['c'] . '/offres/' . $param['r'] . '/?f=a&th=1&q=' . urlencode($param['k']); } - $html = $this->file_get_html($link) or $this->returnServerError('Could not request LeBonCoin.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request LeBonCoin.'); $list = $html->find('.tabsContent', 0); if($list === NULL) { @@ -275,4 +275,4 @@ class LeBonCoinBridge extends BridgeAbstract{ $this->items[] = $item; } } -} \ No newline at end of file +} diff --git a/bridges/LeJournalDuGeekBridge.php b/bridges/LeJournalDuGeekBridge.php index 24c2685..afbdad3 100644 --- a/bridges/LeJournalDuGeekBridge.php +++ b/bridges/LeJournalDuGeekBridge.php @@ -16,7 +16,7 @@ class LeJournalDuGeekBridge extends BridgeAbstract{ } private function LeJournalDuGeekExtractContent($url) { - $articleHTMLContent = $this->file_get_html($url); + $articleHTMLContent = $this->getSimpleHMLDOM($url); $text = $articleHTMLContent->find('div.post-content', 0)->innertext; foreach($articleHTMLContent->find('a.more') as $element) { @@ -38,7 +38,7 @@ class LeJournalDuGeekBridge extends BridgeAbstract{ } public function collectData(array $param){ - $rssFeed = $this->file_get_html('http://www.journaldugeek.com/rss') or $this->returnServerError('Could not request http://www.journaldugeek.com/rss'); + $rssFeed = $this->getSimpleHTMLDOM('http://www.journaldugeek.com/rss') or $this->returnServerError('Could not request http://www.journaldugeek.com/rss'); $limit = 0; foreach($rssFeed->find('item') as $element) { diff --git a/bridges/LeMondeInformatiqueBridge.php b/bridges/LeMondeInformatiqueBridge.php index 89914b5..d97d1c6 100644 --- a/bridges/LeMondeInformatiqueBridge.php +++ b/bridges/LeMondeInformatiqueBridge.php @@ -34,7 +34,7 @@ class LeMondeInformatiqueBridge extends BridgeAbstract { } $feedUrl = 'http://www.lemondeinformatique.fr/rss/rss.xml'; - $html = $this->file_get_html($feedUrl) or $this->returnServerError('Could not request LeMondeInformatique: '.$feedUrl); + $html = $this->getSimpleHTMLDOM($feedUrl) or $this->returnServerError('Could not request LeMondeInformatique: '.$feedUrl); $limit = 0; foreach($html->find('item') as $element) { @@ -44,7 +44,7 @@ class LeMondeInformatiqueBridge extends BridgeAbstract { $article_uri = $element->innertext; $article_uri = substr($article_uri, strpos($article_uri, '') + 6); $article_uri = substr($article_uri, 0, strpos($article_uri, '')); - $article_html = $this->file_get_html($article_uri) or $this->returnServerError('Could not request LeMondeInformatique: '.$article_uri); + $article_html = $this->getSimpleHTMLDOM($article_uri) or $this->returnServerError('Could not request LeMondeInformatique: '.$article_uri); $article_content = CleanArticle($article_html->find('div#article', 0)->innertext); $article_title = $article_html->find('h1.cleanprint-title', 0)->plaintext; diff --git a/bridges/LeMotDuJourBridge.php b/bridges/LeMotDuJourBridge.php new file mode 100644 index 0000000..f9f1229 --- /dev/null +++ b/bridges/LeMotDuJourBridge.php @@ -0,0 +1,55 @@ +maintainer = "qwertygc"; + $this->name = "LeMotDuJour Bridge"; + $this->uri = "http://www.lemotdujour.com/"; + $this->description = "Returns the newest articles."; + $this->update = "2014-05-25"; + + } + + public function collectData(array $param){ + + function StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function ExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.single-contenu', 0)->innertext; + return $text; + } + $html = $this->getSimpleHTMLDOM('http://feeds2.feedburner.com/lemotdujour/lemotdujour') or $this->returnError('Could not request LeMotDuJour.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'LeMotDuJour Bridge'; + } + + public function getURI(){ + return 'http://lemotdujour.com/'; + } + + public function getCacheDuration(){ + return 3600*2; // 2 hours + // return 0; // 2 hours + } +} diff --git a/bridges/LesJoiesDuCodeBridge.php b/bridges/LesJoiesDuCodeBridge.php index ddde42d..7511cda 100644 --- a/bridges/LesJoiesDuCodeBridge.php +++ b/bridges/LesJoiesDuCodeBridge.php @@ -12,14 +12,14 @@ class LesJoiesDuCodeBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://lesjoiesducode.fr/') or $this->returnServerError('Could not request LesJoiesDuCode.'); - + $html = $this->getSimpleHTMLDOM('http://lesjoiesducode.fr/') or $this->returnServerError('Could not request LesJoiesDuCode.'); + foreach($html->find('div.blog-post') as $element) { $item = new Item(); $temp = $element->find('h1 a', 0); $titre = html_entity_decode($temp->innertext); $url = $temp->href; - + $temp = $element->find('div.blog-post-content', 0); // retrieve .gif instead of static .jpg @@ -29,21 +29,21 @@ class LesJoiesDuCodeBridge extends BridgeAbstract{ $image->src = $img_src; } $content = $temp->innertext; - + $auteur = $temp->find('i', 0); $pos = strpos($auteur->innertext, "by"); - + if($pos > 0) { $auteur = trim(str_replace("*/", "", substr($auteur->innertext, ($pos + 2)))); $item->author = $auteur; } - - + + $item->content .= trim($content); $item->uri = $url; $item->title = trim($titre); - + $this->items[] = $item; } } diff --git a/bridges/LichessBridge.php b/bridges/LichessBridge.php index 8145cf7..7501d51 100644 --- a/bridges/LichessBridge.php +++ b/bridges/LichessBridge.php @@ -13,7 +13,7 @@ class LichessBridge extends BridgeAbstract public function collectData(array $param) { - $xml_feed = $this->file_get_html('http://fr.lichess.org/blog.atom') or $this->returnServerError('Could not retrieve Lichess blog feed.'); + $xml_feed = $this->getSimpleHTMLDOM('http://fr.lichess.org/blog.atom') or $this->returnServerError('Could not retrieve Lichess blog feed.'); $posts_loaded = 0; foreach($xml_feed->find('entry') as $entry) @@ -37,7 +37,7 @@ class LichessBridge extends BridgeAbstract private function retrieve_lichess_post($blog_post_uri) { - $blog_post_html = $this->file_get_html($blog_post_uri); + $blog_post_html = $this->getSimpleHTMLDOM($blog_post_uri); $blog_post_div = $blog_post_html->find('#lichess_blog', 0); $post_chapo = $blog_post_div->find('.shortlede', 0)->innertext; diff --git a/bridges/LinkedInCompany.php b/bridges/LinkedInCompany.php index 1f94301..950524a 100644 --- a/bridges/LinkedInCompany.php +++ b/bridges/LinkedInCompany.php @@ -22,7 +22,7 @@ class LinkedInCompany extends BridgeAbstract{ $html = ''; $link = 'https://www.linkedin.com/company/'.$param[c]; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request LinkedIn.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request LinkedIn.'); foreach($html->find('//*[@id="my-feed-post"]/li') as $element) { $title = $element->find('span.share-body', 0)->innertext; diff --git a/bridges/LolibooruBridge.php b/bridges/LolibooruBridge.php index 3f4cae1..b10c39d 100644 --- a/bridges/LolibooruBridge.php +++ b/bridges/LolibooruBridge.php @@ -26,18 +26,18 @@ class LolibooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 1; $tags = ''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://lolibooru.moe/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Lolibooru.'); + $html = $this->getSimpleHTMLDOM("http://lolibooru.moe/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Lolibooru.'); $input_json = explode('Post.register(', $html); foreach($input_json as $element) $data[] = preg_replace('/}\)(.*)/', '}', $element); unset($data[0]); - + foreach($data as $datai) { $json = json_decode($datai, TRUE); $item = new \Item(); @@ -46,7 +46,7 @@ class LolibooruBridge extends BridgeAbstract{ $item->timestamp = $json['created_at']; $item->imageUri = $json['file_url']; $item->title = 'Lolibooru | '.$json['id']; - $item->content = '
      Tags: '.$json['tags']; + $item->content = '
      Tags: '.$json['tags']; $this->items[] = $item; } } diff --git a/bridges/MalikiBridge.php b/bridges/MalikiBridge.php new file mode 100644 index 0000000..66c8081 --- /dev/null +++ b/bridges/MalikiBridge.php @@ -0,0 +1,60 @@ +maintainer = "mitsukarenai"; + $this->name = "Maliki"; + $this->uri = "http://www.maliki.com/"; + $this->description = "Returns Maliki's newest strips"; + $this->update = "2014-05-30"; + + } + + public function collectData(array $param){ + $html = $this->getSimpleHTMLDOM('http://www.maliki.com/') or $this->returnError('Could not request Maliki.', 404); + $count=0; + $latest=1; $latest_title=""; + $latest = $html->find('div.conteneur_page a', 1)->href; + $latest_title = $html->find('div.conteneur_page img', 0)->title; + + function MalikiExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = 'http://www.maliki.com/'.$html2->find('img', 0)->src; + $text = '
      '.$html2->find('div.imageetnews', 0)->plaintext; + return $text; + } + + $item = new \Item(); + $item->uri = 'http://www.maliki.com/'.$latest; + $item->title = $latest_title; + $item->timestamp = time(); + $item->content = MalikiExtractContent($item->uri); + $this->items[] = $item; + + + foreach($html->find('div.boite_strip') as $element) { + if(!empty($element->find('a',0)->href) and $count < 3) { + $item = new \Item(); + $item->uri = 'http://www.maliki.com/'.$element->find('a',0)->href; + $item->title = $element->find('img',0)->title; + $item->timestamp = strtotime(str_replace('/', '-', $element->find('span.stylepetit', 0)->innertext)); + $item->content = MalikiExtractContent($item->uri); + $this->items[] = $item; + $count++; + } + } + } + + public function getName(){ + return 'Maliki'; + } + + public function getURI(){ + return 'http://www.maliki.com/'; + } + + public function getCacheDuration(){ + return 86400*6; // 6 days + } +} diff --git a/bridges/MemoLinuxBridge.php b/bridges/MemoLinuxBridge.php new file mode 100644 index 0000000..1c41f14 --- /dev/null +++ b/bridges/MemoLinuxBridge.php @@ -0,0 +1,58 @@ +maintainer = "qwertygc"; + $this->name = "MemoLinux"; + $this->uri = "http://memo-linux.com/"; + $this->description = "Returns the 10 newest posts from MemoLinux (full text)"; + $this->update = "2015-01-30"; + + } + + public function collectData(array $param){ + + function StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + + function ExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.entry-content', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + $text = preg_replace('@]*?>.*?

@si', '', $text); + $text = preg_replace("/getSimpleHTMLDOM('http://memo-linux.com/feed/') or $this->returnError('Could not request MemoLinux.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + } + + public function getName(){ + return 'MemoLinux'; + } + + public function getURI(){ + return 'http://memo-linux.com/feed/'; + } + + public function getCacheDuration(){ + return 3600*12; // 12 hours + } +} diff --git a/bridges/MilbooruBridge.php b/bridges/MilbooruBridge.php index 3ead724..7e5fdaa 100644 --- a/bridges/MilbooruBridge.php +++ b/bridges/MilbooruBridge.php @@ -25,25 +25,25 @@ class MilbooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://sheslostcontrol.net/moe/shimmie/index.php?q=/post/list/$tags/$page") or $this->returnServerError('Could not request Milbooru.'); + $html = $this->getSimpleHTMLDOM("http://sheslostcontrol.net/moe/shimmie/index.php?q=/post/list/$tags/$page") or $this->returnServerError('Could not request Milbooru.'); foreach($html->find('div[class=shm-image-list] span[class=thumb]') as $element) { $item = new \Item(); $item->uri = 'http://sheslostcontrol.net/moe/shimmie/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->find('a', 0)->getAttribute('data-post-id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->find('a', 0)->getAttribute('data-post-id')); $item->timestamp = time(); $thumbnailUri = 'http://sheslostcontrol.net/moe/shimmie/'.$element->find('img', 0)->src; $item->tags = $element->find('a', 0)->getAttribute('data-tags'); $item->title = 'Milbooru | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/MondeDiploBridge.php b/bridges/MondeDiploBridge.php index d60621c..3cc0d81 100644 --- a/bridges/MondeDiploBridge.php +++ b/bridges/MondeDiploBridge.php @@ -9,8 +9,8 @@ class MondeDiploBridge extends BridgeAbstract{ $this->update = '2016-08-17'; } - public function collectData(array $param){ - $html = $this->file_get_html($this->uri) or $this->returnServerError('Could not request MondeDiplo. for : ' . $link); + public function collectData(array $param){ + $html = $this->getSimpleHTMLDOM($this->uri) or $this->returnServerError('Could not request MondeDiplo. for : ' . $link); foreach($html->find('div.unarticle') as $article) { $element = $article->parent(); diff --git a/bridges/MsnMondeBridge.php b/bridges/MsnMondeBridge.php index f4419fe..e28c0e3 100644 --- a/bridges/MsnMondeBridge.php +++ b/bridges/MsnMondeBridge.php @@ -10,13 +10,13 @@ class MsnMondeBridge extends BridgeAbstract{ } private function MsnMondeExtractContent($url, &$item) { - $html2 = $this->file_get_html($url); + $html2 = $this->getSimpleHTLDOM($url); $item->content = $html2->find('#content', 0)->find('article', 0)->find('section', 0)->plaintext; $item->timestamp = strtotime($html2->find('.authorinfo-txt', 0)->find('time', 0)->datetime); } public function collectData(array $param){ - $html = $this->file_get_html($this->uri) or $this->returnServerError('Could not request MsnMonde.'); + $html = $this->getSimpleHTMLDOM($this->uri) or $this->returnServerError('Could not request MsnMonde.'); $limit = 0; foreach($html->find('.smalla') as $article) { if($limit < 10) { diff --git a/bridges/MspabooruBridge.php b/bridges/MspabooruBridge.php index a67d566..2e465fd 100644 --- a/bridges/MspabooruBridge.php +++ b/bridges/MspabooruBridge.php @@ -26,27 +26,27 @@ class MspabooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 50; } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://mspabooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Mspabooru.'); + $html = $this->getSimpleHTMLDOM("http://mspabooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Mspabooru.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://mspabooru.com/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Mspabooru | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/NakedSecurityBridge.php b/bridges/NakedSecurityBridge.php index 01e46c1..568e857 100644 --- a/bridges/NakedSecurityBridge.php +++ b/bridges/NakedSecurityBridge.php @@ -36,7 +36,7 @@ class NakedSecurityBridge extends BridgeAbstract { } $feedUrl = 'https://feeds.feedburner.com/nakedsecurity?format=xml'; - $html = $this->file_get_html($feedUrl) or $this->returnServerError('Could not request '.$this->getName().': '.$feedUrl); + $html = $this->getSimpleHTMLDOM($feedUrl) or $this->returnServerError('Could not request '.$this->getName().': '.$feedUrl); $limit = 0; foreach ($html->find('item') as $element) { @@ -44,7 +44,7 @@ class NakedSecurityBridge extends BridgeAbstract { //Retrieve article Uri and get that page $article_uri = $element->find('guid', 0)->plaintext; - $article_html = $this->file_get_html($article_uri) or $this->returnServerError('Could not request '.$this->getName().': '.$article_uri); + $article_html = $this->getSimpleHTMLDOM($article_uri) or $this->returnServerError('Could not request '.$this->getName().': '.$article_uri); //Build article contents from corresponding elements $article_title = trim($element->find('title', 0)->plaintext); @@ -68,4 +68,4 @@ class NakedSecurityBridge extends BridgeAbstract { } } } -} \ No newline at end of file +} diff --git a/bridges/NasaApodBridge.php b/bridges/NasaApodBridge.php index 8bf7d9b..0e0e5f8 100644 --- a/bridges/NasaApodBridge.php +++ b/bridges/NasaApodBridge.php @@ -13,7 +13,7 @@ class NasaApodBridge extends BridgeAbstract{ public function collectData(array $param) { - $html = $this->file_get_html('http://apod.nasa.gov/apod/archivepix.html') or $this->returnServerError('Error while downloading the website content'); + $html = $this->getSimpleHTMLDOM('http://apod.nasa.gov/apod/archivepix.html') or $this->returnServerError('Error while downloading the website content'); $list = explode("
", $html->find('b', 0)->innertext); for($i = 0; $i < 3;$i++) @@ -25,7 +25,7 @@ class NasaApodBridge extends BridgeAbstract{ $uri = 'http://apod.nasa.gov/apod/'.$uri_page; $item->uri = $uri; - $picture_html = $this->file_get_html($uri); + $picture_html = $this->getSimpleHTMLDOM($uri); $picture_html_string = $picture_html->innertext; //Extract image and explanation diff --git a/bridges/NeuviemeArtBridge.php b/bridges/NeuviemeArtBridge.php index 96375ec..b8a0ce8 100644 --- a/bridges/NeuviemeArtBridge.php +++ b/bridges/NeuviemeArtBridge.php @@ -20,7 +20,7 @@ class NeuviemeArtBridge extends BridgeAbstract { } $feedUrl = 'http://www.9emeart.fr/9emeart.rss'; - $html = $this->file_get_html($feedUrl) or $this->returnServerError('Could not request 9eme Art: '.$feedUrl); + $html = $this->getSimpleHTMLDOM($feedUrl) or $this->returnServerError('Could not request 9eme Art: '.$feedUrl); $limit = 0; foreach ($html->find('item') as $element) { @@ -28,7 +28,7 @@ class NeuviemeArtBridge extends BridgeAbstract { //Retrieve article Uri and get that page $article_uri = $element->find('guid', 0)->plaintext; - $article_html = $this->file_get_html($article_uri) or $this->returnServerError('Could not request 9eme Art: '.$article_uri); + $article_html = $this->getSimpleHTMLDOM($article_uri) or $this->returnServerError('Could not request 9eme Art: '.$article_uri); //Build article contents from corresponding elements $article_title = trim($element->find('title', 0)->plaintext); diff --git a/bridges/NextInpactBridge.php b/bridges/NextInpactBridge.php index 5b121f7..db74a24 100644 --- a/bridges/NextInpactBridge.php +++ b/bridges/NextInpactBridge.php @@ -16,7 +16,7 @@ class NextInpactBridge extends BridgeAbstract { } private function ExtractContent($url) { - $html2 = $this->file_get_html($url); + $html2 = $this->getSimpleHTLDOM($url); $text = '

'.$html2->find('span.sub_title', 0)->innertext.'

' .'

-

' .'
'.$html2->find('div[itemprop=articleBody]', 0)->innertext.'
'; @@ -27,7 +27,7 @@ class NextInpactBridge extends BridgeAbstract { } public function collectData(array $param) { - $html = $this->file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnServerError('Could not request NextInpact.'); + $html = $this->getSimpleHTMLDOM('http://www.nextinpact.com/rss/news.xml') or $this->returnServerError('Could not request NextInpact.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/NiceMatinBridge.php b/bridges/NiceMatinBridge.php index 5960ffc..aaf006c 100644 --- a/bridges/NiceMatinBridge.php +++ b/bridges/NiceMatinBridge.php @@ -10,21 +10,21 @@ class NiceMatinBridge extends BridgeAbstract{ } private function NiceMatinExtractContent($url) { - $html = $this->file_get_html($url); + $html = $this->getSimpleHTMLDOM($url); if(!$html) $this->returnServerError('Could not acquire content from url: ' . $url . '!'); - + $content = $html->find('article', 0); if(!$content) $this->returnServerError('Could not find \'section\'!'); - + $text = preg_replace('#(.*?)#is', '', $content->innertext); $text = strip_tags($text, '

'); return $text; } public function collectData(array $param){ - $html = $this->file_get_html('http://www.nicematin.com/derniere-minute/rss') or $this->returnServerError('Could not request NiceMatin.'); + $html = $this->getSimpleHTMLDOM('http://www.nicematin.com/derniere-minute/rss') or $this->returnServerError('Could not request NiceMatin.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/NovelUpdatesBridge.php b/bridges/NovelUpdatesBridge.php index f7dedd1..2a5b960 100644 --- a/bridges/NovelUpdatesBridge.php +++ b/bridges/NovelUpdatesBridge.php @@ -26,7 +26,7 @@ class NovelUpdatesBridge extends BridgeAbstract{ if(strpos($thread['path'], 'series/') === FALSE) $this->returnClientError('You must specify the novel URL.'); $url = 'http://www.novelupdates.com'.$thread['path'].''; - $fullhtml = $this->file_get_html($url) or $this->returnServerError("Could not request NovelUpdates, novel not found"); + $fullhtml = $this->getSimpleHTMLDOM($url) or $this->returnServerError("Could not request NovelUpdates, novel not found"); $this->request = $fullhtml->find('h4.seriestitle', 0)->plaintext; // dirty fix for nasty simpledom bug: https://github.com/sebsauvage/rss-bridge/issues/259 // forcefully removes tbody diff --git a/bridges/NumeramaBridge.php b/bridges/NumeramaBridge.php index cc949c2..f5671b7 100644 --- a/bridges/NumeramaBridge.php +++ b/bridges/NumeramaBridge.php @@ -42,7 +42,6 @@ class NumeramaBridge extends BridgeAbstract{ $limit++; } } - } public function getCacheDuration() { diff --git a/bridges/OpenClassroomsBridge.php b/bridges/OpenClassroomsBridge.php index 4a0c3e0..4c093ab 100644 --- a/bridges/OpenClassroomsBridge.php +++ b/bridges/OpenClassroomsBridge.php @@ -64,11 +64,11 @@ class OpenClassroomsBridge extends BridgeAbstract{ { $this->returnServerError('Error: You must chose a category.'); } - + $html = ''; $link = 'https://openclassrooms.com/courses?categories='.$param['u'].'&title=&sort=updatedAt+desc'; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request OpenClassrooms.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request OpenClassrooms.'); foreach($html->find('.courseListItem') as $element) { $item = new \Item(); diff --git a/bridges/OpenTheoryBridge.php b/bridges/OpenTheoryBridge.php new file mode 100644 index 0000000..cccfaf3 --- /dev/null +++ b/bridges/OpenTheoryBridge.php @@ -0,0 +1,56 @@ +maintainer = "qwertygc"; + $this->name = "Opentheory"; + $this->uri = "http://open1theory.com"; + $this->description = "Returns the 5 newest posts from OpenTheory (full text)"; + $this->update = "02-08-2014"; + + } + + public function collectData(array $param){ + + function StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function ExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.entry-content', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + return $text; + } + $html = $this->getSimpleHTMLDOM('http://open1theory.com/feed') or $this->returnError('Could not request OpenTheory.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 5) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'OpenTheory'; + } + + public function getURI(){ + return 'http://open1theory.com/feed'; + } + + public function getCacheDuration(){ + return 3600; // 1 hour + // return 0; // 1 hour + } +} diff --git a/bridges/ParuVenduImmoBridge.php b/bridges/ParuVenduImmoBridge.php index 97285a5..65947df 100644 --- a/bridges/ParuVenduImmoBridge.php +++ b/bridges/ParuVenduImmoBridge.php @@ -47,7 +47,7 @@ class ParuVenduImmoBridge extends BridgeAbstract $appartment = '&tbApp=1&tbDup=1&tbChb=1&tbLof=1&tbAtl=1&tbPla=1'; $maison = '&tbMai=1&tbVil=1&tbCha=1&tbPro=1&tbHot=1&tbMou=1&tbFer=1'; $link = $this->uri.'/immobilier/annonceimmofo/liste/listeAnnonces?tt=1'.$appartment.$maison; - + if (isset($param['minarea'])) { $this->request .= ' '.$param['minarea'].' m2'; $link .= '&sur0='.urlencode($param['minarea']); @@ -56,39 +56,39 @@ class ParuVenduImmoBridge extends BridgeAbstract if (isset($param['maxprice'])) { $link .= '&px1='.urlencode($param['maxprice']); } - + if (isset($param['pa'])) { $link .= '&pa='.urlencode($param['pa']); } - + if (isset($param['lo'])) { $this->request .= ' In: '.$param['lo']; $link .= '&lo='.urlencode($param['lo']); } - $html = $this->file_get_html($link) or $this->returnServerError('Could not request paruvendu.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request paruvendu.'); foreach($html->find('div.annonce a') as $element) { - + if (!$element->title) { continue; } - + $img =''; foreach($element->find('span.img img') as $img) { if ($img->original) { $img = ''; } } - + $desc = $element->find('span.desc')[0]->innertext; $desc = str_replace("voir l'annonce", '', $desc); - + $price = $element->find('span.price')[0]->innertext; list($href) = explode('#', $element->href); - + $item = new \Item(); $item->uri = $this->uri.$href; $item->title = $element->title; diff --git a/bridges/PickyWallpapersBridge.php b/bridges/PickyWallpapersBridge.php index 2e44cb2..a4feffe 100644 --- a/bridges/PickyWallpapersBridge.php +++ b/bridges/PickyWallpapersBridge.php @@ -55,7 +55,7 @@ class PickyWallpapersBridge extends BridgeAbstract { for ($page = 1; $page <= $lastpage; $page++) { $link = $baseUri.'/'.$this->resolution.'/'.$this->category.'/'.(!empty($this->subcategory)?$this->subcategory.'/':'').'page-'.$page.'/'; - $html = $this->file_get_html($link) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('No results for this query.'); if ($page === 1) { preg_match('/page-(\d+)\/$/', $html->find('.pages li a', -2)->href, $matches); diff --git a/bridges/PinterestBridge.php b/bridges/PinterestBridge.php index 70b4105..1012ea2 100644 --- a/bridges/PinterestBridge.php +++ b/bridges/PinterestBridge.php @@ -38,7 +38,7 @@ class PinterestBridge extends BridgeAbstract{ public function collectData(array $param){ $html = ''; if (isset($param['u']) || isset($param['b'])) { - + if (empty($param['u'])) { $this->returnClientError('You must specify a Pinterest username (?u=...).'); @@ -48,33 +48,33 @@ class PinterestBridge extends BridgeAbstract{ { $this->returnClientError('You must specify a Pinterest board for this username (?b=...).'); } - + $this->username = $param['u']; $this->board = $param['b']; - $html = $this->file_get_html($this->getURI().'/'.urlencode($this->username).'/'.urlencode($this->board)) or $this->returnServerError('Username and/or board not found'); + $html = $this->getSimpleHTMLDOM($this->getURI().'/'.urlencode($this->username).'/'.urlencode($this->board)) or $this->returnServerError('Username and/or board not found'); } else if (isset($param['q'])) { $this->query = $param['q']; - $html = $this->file_get_html($this->getURI().'/search/?q='.urlencode($this->query)) or $this->returnServerError('Could not request Pinterest.'); + $html = $this->getSimpleHTMLDOM($this->getURI().'/search/?q='.urlencode($this->query)) or $this->returnServerError('Could not request Pinterest.'); } - + else { $this->returnClientError('You must specify a Pinterest username and a board name (?u=...&b=...).'); } - - + + foreach($html->find('div.pinWrapper') as $div) { $a = $div->find('a.pinImageWrapper',0); - + $img = $a->find('img', 0); - + $item = new \Item(); $item->uri = $this->getURI().$a->getAttribute('href'); $item->content = ''; - - + + if (isset($this->query)) { $avatar = $div->find('div.creditImg', 0)->find('img', 0); @@ -84,25 +84,25 @@ class PinterestBridge extends BridgeAbstract{ $username = $div->find('div.creditName', 0); $board = $div->find('div.creditTitle', 0); - - $item->username =$username->innertext; + + $item->username =$username->innertext; $item->fullname = $board->innertext; $item->avatar = $avatar; - + $item->content .= '
'.$item->username.''; $item->content .= '
'.$item->fullname; } - + $item->title = $img->getAttribute('alt'); - + //$item->timestamp = $media->created_time; $this->items[] = $item; - + } } public function getName(){ - + if (isset($this->query)) { return $this->query .' - Pinterest'; diff --git a/bridges/PlanetLibreBridge.php b/bridges/PlanetLibreBridge.php index fc536cd..28a7a68 100644 --- a/bridges/PlanetLibreBridge.php +++ b/bridges/PlanetLibreBridge.php @@ -10,13 +10,13 @@ class PlanetLibreBridge extends BridgeAbstract{ } private function PlanetLibreExtractContent($url){ - $html2 = $this->file_get_html($url); + $html2 = $this->geSimpleHTMLDOM($url); $text = $html2->find('div[class="post-text"]', 0)->innertext; return $text; } public function collectData(array $param){ - $html = $this->file_get_html('http://www.planet-libre.org/') or $this->returnServerError('Could not request PlanetLibre.'); + $html = $this->getSimpleHTMLDOM('http://www.planet-libre.org/') or $this->returnServerError('Could not request PlanetLibre.'); $limit = 0; foreach($html->find('div.post') as $element) { if($limit < 5) { diff --git a/bridges/ProjectMGameBridge.php b/bridges/ProjectMGameBridge.php index 865b38d..080029d 100644 --- a/bridges/ProjectMGameBridge.php +++ b/bridges/ProjectMGameBridge.php @@ -14,7 +14,7 @@ class ProjectMGameBridge extends BridgeAbstract{ public function collectData(array $param){ $html = ''; - $html = $this->file_get_html('http://projectmgame.com/en/') or $this->returnServerError('Error while downloading the Project M homepage'); + $html = $this->getSimpleHTMLDOM('http://projectmgame.com/en/') or $this->returnServerError('Error while downloading the Project M homepage'); foreach($html->find('article') as $article) { $item = new \Item(); diff --git a/bridges/RTBFBridge.php b/bridges/RTBFBridge.php index 433792e..490ed7a 100644 --- a/bridges/RTBFBridge.php +++ b/bridges/RTBFBridge.php @@ -25,7 +25,7 @@ class RTBFBridge extends BridgeAbstract { $count = 0; if (isset($param['c'])) { - $html = $this->file_get_html('http://www.rtbf.be/auvio/emissions/detail?id='.$param['c']) or $this->returnServerError('Could not request RTBF.'); + $html = $this->getSimpleHTMLDOM('http://www.rtbf.be/auvio/emissions/detail?id='.$param['c']) or $this->returnServerError('Could not request RTBF.'); foreach($html->find('section[id!=widget-ml-avoiraussi-] .rtbf-media-grid article') as $element) { if($count < $limit) { diff --git a/bridges/RaymondBridge.php b/bridges/RaymondBridge.php new file mode 100644 index 0000000..d255052 --- /dev/null +++ b/bridges/RaymondBridge.php @@ -0,0 +1,53 @@ +maintainer = "pit-fgfjiudghdf"; + $this->name = "Raymond"; + $this->uri = "http://www.raymond.cc"; + $this->description = "Returns the 3 newest posts from Raymond.cc (full text)"; + $this->update = "2014-05-26"; + + } + + public function collectData(array $param){ + function raymondStripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function raymondExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.entry-content', 0)->innertext; + $text = preg_replace('/class="ad".*/', '', $text); + $text = strip_tags($text, '

'); + $text = str_replace('(adsbygoogle = window.adsbygoogle || []).push({});', '', $text); + return $text; + } + $html = $this->getSimpleHTMLDOM('http://www.raymond.cc/blog/feed') or $this->returnError('Could not request raymond.', 404); + $limit = 0; + foreach($html->find('item') as $element) { + if($limit < 3) { + $item = new \Item(); + $item->title = raymondStripCDATA($element->find('title', 0)->innertext); + $item->uri = raymondStripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = raymondExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + public function getName(){ + return 'raymond'; + } + public function getURI(){ + return 'http://www.raymond.cc/blog'; + } + public function getCacheDuration(){ + return 3600*12; // 12 hour + } +} + diff --git a/bridges/Releases3DSBridge.php b/bridges/Releases3DSBridge.php index 8031582..2a563f6 100644 --- a/bridges/Releases3DSBridge.php +++ b/bridges/Releases3DSBridge.php @@ -66,7 +66,7 @@ class Releases3DSBridge extends BridgeAbstract { //Retrieve cover art and short desc from IGN? $ignResult = false; $ignDescription = ''; $ignLink = ''; $ignDate = time(); $ignCoverArt = ''; $ignSearchUrl = 'http://www.ign.com/search?q='.urlencode($name); - if ($ignResult = $this->file_get_html($ignSearchUrl)) { + if ($ignResult = $this->getSimpleHTMLDOM($ignSearchUrl)) { $ignCoverArt = $ignResult->find('div.search-item-media', 0)->find('img', 0)->src; $ignDesc = $ignResult->find('div.search-item-description', 0)->plaintext; $ignLink = $ignResult->find('div.search-item-sub-title', 0)->find('a', 1)->href; diff --git a/bridges/ReporterreBridge.php b/bridges/ReporterreBridge.php index 8c1f553..e93cec4 100644 --- a/bridges/ReporterreBridge.php +++ b/bridges/ReporterreBridge.php @@ -10,7 +10,7 @@ class ReporterreBridge extends BridgeAbstract{ } private function ExtractContentReporterre($url) { - $html2 = $this->file_get_html($url); + $html2 = $this->getSimpleHTMLDOM($url); foreach($html2->find('div[style=text-align:justify]') as $e) { $text = $e->outertext; @@ -27,7 +27,7 @@ class ReporterreBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://www.reporterre.net/spip.php?page=backend') or $this->returnServerError('Could not request Reporterre.'); + $html = $this->getSimpleHTMLDOM('http://www.reporterre.net/spip.php?page=backend') or $this->returnServerError('Could not request Reporterre.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/Rue89Bridge.php b/bridges/Rue89Bridge.php index 54c86d4..5dd813c 100644 --- a/bridges/Rue89Bridge.php +++ b/bridges/Rue89Bridge.php @@ -22,7 +22,7 @@ class Rue89Bridge extends BridgeAbstract{ public function collectData(array $param){ - $html = $this->file_get_html('http://api.rue89.nouvelobs.com/feed') or $this->returnServerError('Could not request Rue89.'); + $html = $this->getSimpleHTMLDOM('http://api.rue89.nouvelobs.com/feed') or $this->returnServerError('Could not request Rue89.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/Rule34Bridge.php b/bridges/Rule34Bridge.php index b868d91..b0d5f88 100644 --- a/bridges/Rule34Bridge.php +++ b/bridges/Rule34Bridge.php @@ -25,27 +25,27 @@ class Rule34Bridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 50; } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://rule34.xxx/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Rule34.'); + $html = $this->getSimpleHTMLDOM("http://rule34.xxx/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Rule34.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://rule34.xxx/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Rule34 | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/Rule34pahealBridge.php b/bridges/Rule34pahealBridge.php index 77b0319..42014df 100644 --- a/bridges/Rule34pahealBridge.php +++ b/bridges/Rule34pahealBridge.php @@ -26,25 +26,25 @@ class Rule34pahealBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://rule34.paheal.net/post/list/$tags/$page") or $this->returnServerError('Could not request Rule34paheal.'); + $html = $this->getSimpleHTMLDOM("http://rule34.paheal.net/post/list/$tags/$page") or $this->returnServerError('Could not request Rule34paheal.'); foreach($html->find('div[class=shm-image-list] div[class=shm-thumb]') as $element) { $item = new \Item(); $item->uri = 'http://rule34.paheal.net'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->find('img', 0)->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->find('img', 0)->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->getAttribute('data-tags'); $item->title = 'Rule34paheal | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/SafebooruBridge.php b/bridges/SafebooruBridge.php index cd0f1a3..b4b45f5 100644 --- a/bridges/SafebooruBridge.php +++ b/bridges/SafebooruBridge.php @@ -26,27 +26,27 @@ class SafebooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 40; } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://safebooru.org/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Safebooru.'); + $html = $this->getSimpleHTMLDOM("http://safebooru.org/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Safebooru.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://safebooru.org/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Safebooru | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/SakugabooruBridge.php b/bridges/SakugabooruBridge.php index 8e7e3aa..aea7133 100644 --- a/bridges/SakugabooruBridge.php +++ b/bridges/SakugabooruBridge.php @@ -25,18 +25,18 @@ class SakugabooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 1;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://sakuga.yshi.org/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Sakugabooru.'); + $html = $this->getSimpleHTMLDOM("http://sakuga.yshi.org/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Sakugabooru.'); $input_json = explode('Post.register(', $html); foreach($input_json as $element) $data[] = preg_replace('/}\)(.*)/', '}', $element); unset($data[0]); - + foreach($data as $datai) { $json = json_decode($datai, TRUE); $item = new \Item(); @@ -45,7 +45,7 @@ class SakugabooruBridge extends BridgeAbstract{ $item->timestamp = $json['created_at']; $item->imageUri = $json['file_url']; $item->title = 'Sakugabooru | '.$json['id']; - $item->content = '
Tags: '.$json['tags']; + $item->content = '
Tags: '.$json['tags']; $this->items[] = $item; } } diff --git a/bridges/ScilogsBridge.php b/bridges/ScilogsBridge.php new file mode 100644 index 0000000..7186be4 --- /dev/null +++ b/bridges/ScilogsBridge.php @@ -0,0 +1,54 @@ +maintainer = "qwertygc"; + $this->name = "Scilogs Bridge"; + $this->uri = "http://www.scilogs.fr/"; + $this->description = "Returns the newest articles."; + $this->update = "2014-05-25"; + + } + + public function collectData(array $param){ + + function ScilogsStripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function ScilogsExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.entrybody', 0)->innertext; + return $text; + } + $html = $this->getSimpleHTMLDOM('http://www.scilogs.fr/?wpmu-feed=posts') or $this->returnError('Could not request Scilogs.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = ScilogsStripCDATA($element->find('title', 0)->innertext); + $item->uri = ScilogsStripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ScilogsExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'Scilogs Bridge'; + } + + public function getURI(){ + return 'http://scilogs.fr/'; + } + + public function getCacheDuration(){ + return 3600*2; // 2 hours + } +} diff --git a/bridges/ScmbBridge.php b/bridges/ScmbBridge.php index 864d39c..e2f631f 100644 --- a/bridges/ScmbBridge.php +++ b/bridges/ScmbBridge.php @@ -10,20 +10,20 @@ class ScmbBridge extends BridgeAbstract{ $this->update = '2016-08-17'; } - + public function collectData(array $param){ $html = ''; - $html = $this->file_get_html('http://secouchermoinsbete.fr/') or $this->returnServerError('Could not request Se Coucher Moins Bete.'); - + $html = $this->getSimpleHTMLDOM('http://secouchermoinsbete.fr/') or $this->returnServerError('Could not request Se Coucher Moins Bete.'); + foreach($html->find('article') as $article) { $item = new \Item(); $item->uri = 'http://secouchermoinsbete.fr'.$article->find('p.summary a',0)->href; $item->title = $article->find('header h1 a',0)->innertext; - + $article->find('span.read-more',0)->outertext=''; // remove text "En savoir plus" from anecdote content $content = $article->find('p.summary a',0)->innertext; $content =substr($content,0,strlen($content)-17); // remove superfluous spaces at the end - + // get publication date $str_date = $article->find('time',0)->datetime; list($date, $time) = explode(' ', $str_date); @@ -31,8 +31,8 @@ class ScmbBridge extends BridgeAbstract{ list($h, $i) = explode(':', $time); $timestamp = mktime($h,$i,0,$m,$d,$y); $item->timestamp = $timestamp; - - + + $item->content = $content; $this->items[] = $item; } diff --git a/bridges/ScoopItBridge.php b/bridges/ScoopItBridge.php index ba3fc0b..d7b4ed3 100644 --- a/bridges/ScoopItBridge.php +++ b/bridges/ScoopItBridge.php @@ -24,9 +24,9 @@ class ScoopItBridge extends BridgeAbstract{ if ($param['u'] != '') { $this->request = $param['u']; $link = 'http://scoop.it/search?q=' .urlencode($this->request); - - $html = $this->file_get_html($link) or $this->returnServerError('Could not request ScoopIt. for : ' . $link); - + + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request ScoopIt. for : ' . $link); + foreach($html->find('div.post-view') as $element) { $item = new Item(); $item->uri = $element->find('a', 0)->href; diff --git a/bridges/SegfaultMintBridge.php b/bridges/SegfaultMintBridge.php new file mode 100644 index 0000000..b2ede5d --- /dev/null +++ b/bridges/SegfaultMintBridge.php @@ -0,0 +1,55 @@ +maintainer = "qwertygc"; + $this->name = "SegfaultMint"; + $this->uri = "http://segfault.linuxmint.com/"; + $this->description = "Returns the 5 newest posts from SegfaultMint (full text)"; + $this->update = "2014-07-05"; + + } + + public function collectData(array $param){ + + function StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function ExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.post-bodycopy', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + return $text; + } + $html = $this->getSimpleHTMLDOM('http://segfault.linuxmint.com/feed/') or $this->returnError('Could not request segfault.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 5) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'Segfault Mint'; + } + + public function getURI(){ + return 'http://segfault.linuxmint.com/feed/'; + } + + public function getCacheDuration(){ + return 3600*24; // 24 hours + } +} diff --git a/bridges/Sexactu.php b/bridges/Sexactu.php index 9bb82c9..2972ede 100644 --- a/bridges/Sexactu.php +++ b/bridges/Sexactu.php @@ -15,10 +15,10 @@ class Sexactu extends BridgeAbstract{ $find = array('janvier', 'février', 'mars', 'avril', 'mai', 'juin', 'juillet', 'août', 'septembre', 'novembre', 'décembre'); $replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'); - $html = $this->file_get_html($this->getURI()) or $this->returnServerError('Could not request '.$this->getURI()); + $html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Could not request '.$this->getURI()); foreach($html->find('.content-holder') as $contentHolder) { - // only use first list as second one only contains pages numbers + // only use first list as second one only contains pages numbers $articles = $contentHolder->find('ul', 0); foreach($articles->find('li') as $element) { // if you ask about that method_exists, there seems to be a bug in simple html dom @@ -38,7 +38,7 @@ $replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', $dateText = $titleTimestamp->innertext; $dateText = substr($dateText, strpos($dateText,',')+1); $dateText = str_replace($find, $replace, strtolower($dateText)); - $date = strtotime($dateText); + $date = strtotime($dateText); $item->timestamp = $date; $item->author = "Maïa Mazaurette"; @@ -50,9 +50,9 @@ $replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', $item->content = $elementText->innertext; $this->items[] = $item; } - + } - + } } } @@ -64,7 +64,7 @@ $replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', public function getCacheDuration(){ return 7200; // 2h hours } - + private function correctCase($str) { $sentences=explode('.', mb_strtolower($str, "UTF-8")); $str=""; @@ -73,7 +73,7 @@ $replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', { //upper case first char $sentence=ucfirst(trim($sentence)); - + //append sentence to output $str=$str.$sep.$sentence; $sep=". "; diff --git a/bridges/SiliconBridge.php b/bridges/SiliconBridge.php index df582bb..e1bf84a 100644 --- a/bridges/SiliconBridge.php +++ b/bridges/SiliconBridge.php @@ -20,7 +20,7 @@ class SiliconBridge extends BridgeAbstract { } $feedUrl = 'http://www.silicon.fr/feed'; - $html = $this->file_get_html($feedUrl) or $this->returnServerError('Could not request Silicon: '.$feedUrl); + $html = $this->getSimpleHTMLDOM($feedUrl) or $this->returnServerError('Could not request Silicon: '.$feedUrl); $limit = 0; foreach($html->find('item') as $element) { @@ -30,7 +30,7 @@ class SiliconBridge extends BridgeAbstract { $article_uri = $element->innertext; $article_uri = substr($article_uri, strpos($article_uri, '') + 6); $article_uri = substr($article_uri, 0, strpos($article_uri, '')); - $article_html = $this->file_get_html($article_uri) or $this->returnServerError('Could not request Silicon: '.$article_uri); + $article_html = $this->getSimpleHTMLDOM($article_uri) or $this->returnServerError('Could not request Silicon: '.$article_uri); //Build article contents from corresponding elements $thumbnailUri = $element->find('enclosure', 0)->url; diff --git a/bridges/StripeAPIChangeLogBridge.php b/bridges/StripeAPIChangeLogBridge.php index 0fb1d85..334955f 100644 --- a/bridges/StripeAPIChangeLogBridge.php +++ b/bridges/StripeAPIChangeLogBridge.php @@ -16,7 +16,7 @@ class StripeAPIChangeLogBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('https://stripe.com/docs/upgrades') + $html = $this->getSimpleHTMLDOM('https://stripe.com/docs/upgrades') or $this->returnServerError('No results for Stripe API Changelog'); diff --git a/bridges/SuperbWallpapersBridge.php b/bridges/SuperbWallpapersBridge.php index 36928be..eb54dea 100644 --- a/bridges/SuperbWallpapersBridge.php +++ b/bridges/SuperbWallpapersBridge.php @@ -47,12 +47,12 @@ class SuperbWallpapersBridge extends BridgeAbstract { // Get last page number $link = $baseUri.'/'.$this->category.'/9999.html'; - $html = $this->file_get_html($link); + $html = $this->getSimpleHTMLDOM($link); $lastpage = min($html->find('.paging .cpage', 0)->innertext(), ceil($max/36)); for ($page = 1; $page <= $lastpage; $page++) { $link = $baseUri.'/'.$this->category.'/'.$page.'.html'; - $html = $this->file_get_html($link) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('No results for this query.'); foreach($html->find('.wpl .i a') as $element) { $thumbnail = $element->find('img', 0); diff --git a/bridges/T411Bridge.php b/bridges/T411Bridge.php index 22f02c8..83f7a30 100644 --- a/bridges/T411Bridge.php +++ b/bridges/T411Bridge.php @@ -36,7 +36,7 @@ class T411Bridge extends BridgeAbstract { //Retrieve torrent listing from search results, which does not contain torrent description $url = $this->uri.'torrents/search/?'.$param['search'].'&order=added&type=desc'; - $html = $this->file_get_html($url) or $this->returnServerError('Could not request t411: '.$url); + $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request t411: '.$url); $results = $html->find('table.results', 0); if (is_null($results)) $this->returnServerError('No results from t411: '.$url); @@ -57,7 +57,7 @@ class T411Bridge extends BridgeAbstract { $item_date = strtotime($element->find('dd', 0)->plaintext); //Retrieve full description from torrent page - if ($item_html = $this->file_get_html($item_uri)) { + if ($item_html = $this->getSimpleHTMLDOM($item_uri)) { //Retrieve data from page contents $item_desc = $item_html->find('div.description', 0); diff --git a/bridges/TagBoardBridge.php b/bridges/TagBoardBridge.php index 7382a52..f3bd4c9 100644 --- a/bridges/TagBoardBridge.php +++ b/bridges/TagBoardBridge.php @@ -23,8 +23,8 @@ class TagBoardBridge extends BridgeAbstract{ $html = ''; $this->request = $param['u']; $link = 'https://post-cache.tagboard.com/search/' .$this->request; - - $html = $this->file_get_html($link) or $this->returnServerError('Could not request TagBoard for : ' . $link); + + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request TagBoard for : ' . $link); $parsed_json = json_decode($html); foreach($parsed_json->{'posts'} as $element) { @@ -49,4 +49,4 @@ class TagBoardBridge extends BridgeAbstract{ return 21600; // 6 hours } } - + diff --git a/bridges/TbibBridge.php b/bridges/TbibBridge.php index aeacdd6..59fa3e3 100644 --- a/bridges/TbibBridge.php +++ b/bridges/TbibBridge.php @@ -25,27 +25,27 @@ class TbibBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 50; } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://tbib.org/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Tbib.'); + $html = $this->getSimpleHTMLDOM("http://tbib.org/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Tbib.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://tbib.org/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Tbib | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/TheCodingLoveBridge.php b/bridges/TheCodingLoveBridge.php index b8ca7c5..f5d4b7f 100644 --- a/bridges/TheCodingLoveBridge.php +++ b/bridges/TheCodingLoveBridge.php @@ -12,17 +12,17 @@ class TheCodingLoveBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://thecodinglove.com/') or $this->returnServerError('Could not request The Coding Love.'); - + $html = $this->getSimpleHTMLDOM('http://thecodinglove.com/') or $this->returnServerError('Could not request The Coding Love.'); + foreach($html->find('div.post') as $element) { $item = new Item(); $temp = $element->find('h3 a', 0); - + $titre = $temp->innertext; $url = $temp->href; - + $temp = $element->find('div.bodytype', 0); - + // retrieve .gif instead of static .jpg $images = $temp->find('p.e img'); foreach($images as $image){ @@ -33,18 +33,18 @@ class TheCodingLoveBridge extends BridgeAbstract{ $auteur = $temp->find('i', 0); $pos = strpos($auteur->innertext, "by"); - + if($pos > 0) { $auteur = trim(str_replace("*/", "", substr($auteur->innertext, ($pos + 2)))); $item->author = $auteur; } - - + + $item->content .= trim($content); $item->uri = $url; $item->title = trim($titre); - + $this->items[] = $item; } } diff --git a/bridges/ThePirateBayBridge.php b/bridges/ThePirateBayBridge.php index 35b1391..4f5650d 100644 --- a/bridges/ThePirateBayBridge.php +++ b/bridges/ThePirateBayBridge.php @@ -58,9 +58,9 @@ class ThePirateBayBridge extends BridgeAbstract{ if (!isset($param['q'])) $this->returnClientError('You must specify keywords (?q=...)'); - $keywordsList = explode(";",$param['q']); + $keywordsList = explode(";",$param['q']); foreach($keywordsList as $keywords){ - $html = $this->file_get_html('https://thepiratebay.org/search/'.rawurlencode($keywords).'/0/3/0') or $this->returnServerError('Could not request TPB.'); + $html = $this->getSimpleHTMLDOM('https://thepiratebay.org/search/'.rawurlencode($keywords).'/0/3/0') or $this->returnServerError('Could not request TPB.'); if ($html->find('table#searchResult', 0) == FALSE) $this->returnServerError('No result for query '.$keywords); diff --git a/bridges/TuxboardBridge.php b/bridges/TuxboardBridge.php new file mode 100644 index 0000000..301e9b6 --- /dev/null +++ b/bridges/TuxboardBridge.php @@ -0,0 +1,64 @@ +maintainer = "superbaillot.net"; + $this->name = "Tuxboard"; + $this->uri = "http://www.tuxboard.com/"; + $this->description = "Tuxboard"; + $this->update = "2014-07-08"; + + } + + public function collectData(array $param){ + + function StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + + function ExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('article#page', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + return $text; + } + + $html = $this->getSimpleHTMLDOM('http://www.tuxboard.com/feed/atom/') or $this->returnError('Could not request Tuxboard.', 404); + $limit = 0; + + foreach($html->find('entry') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = $element->find('link', 0)->href; + $item->timestamp = strtotime($element->find('published', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + + + } + + public function getName(){ + return 'Tuxboard'; + } + + public function getURI(){ + return 'http://www.tuxboard.com'; + } + + public function getDescription(){ + return 'Tuxboard via rss-bridge'; + } + + public function getCacheDuration(){ + return 3600; // 1 hour + } +} +?> diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index 24970ca..8510806 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -9,7 +9,7 @@ class TwitterBridge extends BridgeAbstract{ $this->description = "Returns tweets by keyword/hashtag or user name"; $this->update = '2016-08-17'; - $this->parameters["global"] = + $this->parameters["global"] = '[ { "name" : "Hide profile pictures", @@ -48,12 +48,12 @@ class TwitterBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = ''; + $html = ''; if (isset($param['q'])) { /* keyword search mode */ - $html = $this->file_get_html('https://twitter.com/search?q='.urlencode($param['q']).'&f=tweets') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('https://twitter.com/search?q='.urlencode($param['q']).'&f=tweets') or $this->returnServerError('No results for this query.'); } elseif (isset($param['u'])) { /* user timeline mode */ - $html = $this->file_get_html('https://twitter.com/'.urlencode($param['u']).'/with_replies') or $this->returnServerError('Requested username can\'t be found.'); + $html = $this->getSimpleHTMLDOM('https://twitter.com/'.urlencode($param['u']).'/with_replies') or $this->returnServerError('Requested username can\'t be found.'); } else { $this->returnClientError('You must specify a keyword (?q=...) or a Twitter username (?u=...).'); @@ -68,20 +68,20 @@ class TwitterBridge extends BridgeAbstract{ // extract username and sanitize $item->username = $tweet->getAttribute('data-screen-name'); // extract fullname (pseudonym) - $item->fullname = $tweet->getAttribute('data-name'); + $item->fullname = $tweet->getAttribute('data-name'); // get author $item->author = $item->fullname . ' (@' . $item->username . ')'; // get avatar link - $item->avatar = $tweet->find('img', 0)->src; + $item->avatar = $tweet->find('img', 0)->src; // get TweetID $item->id = $tweet->getAttribute('data-tweet-id'); - // get tweet link - $item->uri = 'https://twitter.com'.$tweet->find('a.js-permalink', 0)->getAttribute('href'); + // get tweet link + $item->uri = 'https://twitter.com'.$tweet->find('a.js-permalink', 0)->getAttribute('href'); // extract tweet timestamp $item->timestamp = $tweet->find('span.js-short-timestamp', 0)->getAttribute('data-time'); // generate the title - $item->title = strip_tags($tweet->find('p.js-tweet-text', 0)->innertext); - + $item->title = strip_tags($tweet->find('p.js-tweet-text', 0)->innertext); + // processing content links foreach($tweet->find('a') as $link) { if($link->hasAttribute('data-expanded-url') ) { diff --git a/bridges/UnsplashBridge.php b/bridges/UnsplashBridge.php index adf7544..492de7b 100644 --- a/bridges/UnsplashBridge.php +++ b/bridges/UnsplashBridge.php @@ -44,7 +44,7 @@ class UnsplashBridge extends BridgeAbstract { for ($page = 1; $page <= $lastpage; $page++) { $link = $baseUri.'/grid?page='.$page; - $html = $this->file_get_html($link) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('No results for this query.'); if ($page === 1) { preg_match('/=(\d+)$/', $html->find('.pagination > a[!class]', -1)->href, $matches); diff --git a/bridges/ViadeoCompany.php b/bridges/ViadeoCompany.php index fb4badb..2eef3b6 100644 --- a/bridges/ViadeoCompany.php +++ b/bridges/ViadeoCompany.php @@ -22,7 +22,7 @@ class ViadeoCompany extends BridgeAbstract{ $html = ''; $link = 'http://www.viadeo.com/fr/company/'.$param[c]; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request Viadeo.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request Viadeo.'); foreach($html->find('//*[@id="company-newsfeed"]/ul/li') as $element) { $title = $element->find('p', 0)->innertext; diff --git a/bridges/VineBridge.php b/bridges/VineBridge.php index b871444..8cfb552 100644 --- a/bridges/VineBridge.php +++ b/bridges/VineBridge.php @@ -24,7 +24,7 @@ class VineBridge extends BridgeAbstract { $html = ''; $uri = 'http://vine.co/u/'.$param['u'].'?mode=list'; - $html = $this->file_get_html($uri) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($uri) or $this->returnServerError('No results for this query.'); foreach($html->find('.post') as $element) { $a = $element->find('a', 0); diff --git a/bridges/WallpaperStopBridge.php b/bridges/WallpaperStopBridge.php index 1888333..36ba674 100644 --- a/bridges/WallpaperStopBridge.php +++ b/bridges/WallpaperStopBridge.php @@ -56,7 +56,7 @@ class WallpaperStopBridge extends BridgeAbstract { for ($page = 1; $page <= $lastpage; $page++) { $link = $baseUri.'/'.$this->category.'-wallpaper/'.(!empty($this->subcategory)?$this->subcategory.'-wallpaper/':'').'desktop-wallpaper-'.$page.'.html'; - $html = $this->file_get_html($link) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('No results for this query.'); if ($page === 1) { preg_match('/-(\d+)\.html$/', $html->find('.pagination > .last', 0)->href, $matches); diff --git a/bridges/WhydBridge.php b/bridges/WhydBridge.php index b7739e4..c32ca8f 100644 --- a/bridges/WhydBridge.php +++ b/bridges/WhydBridge.php @@ -28,18 +28,18 @@ class WhydBridge extends BridgeAbstract{ { $this->request = $param['u']; if (strlen(preg_replace("/[^0-9a-f]/",'', $this->request)) == 24) { // is input the userid ? - $html = $this->file_get_html('http://www.whyd.com/u/'.preg_replace("/[^0-9a-f]/",'', $this->request)) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('http://www.whyd.com/u/'.preg_replace("/[^0-9a-f]/",'', $this->request)) or $this->returnServerError('No results for this query.'); } else { // input may be the username - $html = $this->file_get_html('http://www.whyd.com/search?q='.urlencode($this->request)) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('http://www.whyd.com/search?q='.urlencode($this->request)) or $this->returnServerError('No results for this query.'); for ($j = 0; $j < 5; $j++) { if (strtolower($html->find('div.user', $j)->find('a',0)->plaintext) == strtolower($this->request)) { - $html = $this->file_get_html('http://www.whyd.com' . $html->find('div.user', $j)->find('a', 0)->getAttribute('href')) or $this->returnServerError('No results for this query'); + $html = $this->getSimpleHTMLDOM('http://www.whyd.com' . $html->find('div.user', $j)->find('a', 0)->getAttribute('href')) or $this->returnServerError('No results for this query'); break; } } } $this->name = $html->find('div#profileTop', 0)->find('h1', 0)->plaintext; - } + } else { $this->returnClientError('You must specify username'); diff --git a/bridges/WikipediaDEBridge.php b/bridges/WikipediaDEBridge.php new file mode 100644 index 0000000..9ccd0ca --- /dev/null +++ b/bridges/WikipediaDEBridge.php @@ -0,0 +1,48 @@ +maintainer = "cnlpete"; + $this->name = "Wikipedia DE Today's Featured Article..."; + $this->uri = "https://de.wikipedia.org/"; + $this->description = "Returns the highlighted en.wikipedia.org article."; + $this->update = "2015-11-04"; + + } + + public function collectData(array $param){ + $html = ''; + $host = 'http://de.wikipedia.org'; + // If you want HTTPS access instead, uncomment the following line: + //$host = 'https://de.wikipedia.org'; + $link = '/wiki/Wikipedia:Hauptseite'; + + $html = $this->getSimpleHTMLDOM($host.$link) or $this->returnError('Could not request Wikipedia DE.', 404); + + $element = $html->find('div[id=mf-tfa]', 0); + $element->find('div', -1)->outertext = ''; + + $item = new \Item(); + $item->uri = $host.$element->find('p', 0)->find('a', 0)->href; + $item->title = $element->find('p',0)->find('a',0)->title; + + $html2 = $this->getSimpleHTMLDOM($item->uri) or $this->returnError('Could not request Wikipedia DE '.$item->title.'.', 404); + $element2 = $html2->find('div[id=mw-content-text]', 0); + $item->content = str_replace('href="/', 'href="'.$host.'/', $element2->innertext); + + $this->items[] = $item; + } + + public function getName(){ + return 'Wikipedia DE "Today\'s Featured Article"'; + } + + public function getURI(){ + return 'https://de.wikipedia.org/wiki/Wikipedia:Hauptseite'; + } + + public function getCacheDuration(){ + return 3600*8; // 8 hours + } +} diff --git a/bridges/WikipediaENBridge.php b/bridges/WikipediaENBridge.php new file mode 100644 index 0000000..ac8ab29 --- /dev/null +++ b/bridges/WikipediaENBridge.php @@ -0,0 +1,44 @@ +maintainer = "gsurrel"; + $this->name = "Wikipedia EN 'Today's Featured Article...'"; + $this->uri = "https://en.wikipedia.org/"; + $this->description = "Returns the highlighted en.wikipedia.org article."; + $this->update = "2014-05-25"; + + } + + public function collectData(array $param){ + $html = ''; + $host = 'http://en.wikipedia.org'; + // If you want HTTPS access instead, uncomment the following line: + //$host = 'https://en.wikipedia.org'; + $link = '/wiki/Main_Page'; + + $html = $this->getSimpleHTMLDOM($host.$link) or $this->returnError('Could not request Wikipedia EN.', 404); + + $element = $html->find('div[id=mp-tfa]', 0); + // Clean the bottom of the featured article + $element->find('div', -1)->outertext = ''; + $item = new \Item(); + $item->uri = $host.$element->find('p', 0)->find('a', 0)->href; + $item->title = $element->find('p',0)->find('a',0)->title; + $item->content = str_replace('href="/', 'href="'.$host.'/', $element->innertext); + $this->items[] = $item; + } + + public function getName(){ + return 'Wikipedia EN "Today\'s Featued Article"'; + } + + public function getURI(){ + return 'https://en.wikipedia.org/wiki/Main_Page'; + } + + public function getCacheDuration(){ + return 3600*4; // 4 hours + } +} diff --git a/bridges/WikipediaEOBridge.php b/bridges/WikipediaEOBridge.php new file mode 100644 index 0000000..a90c503 --- /dev/null +++ b/bridges/WikipediaEOBridge.php @@ -0,0 +1,44 @@ +maintainer = "gsurrel"; + $this->name = "Wikipedia EO 'Artikolo de la semajno'"; + $this->uri = "https://eo.wikipedia.org/"; + $this->description = "Returns the highlighted eo.wikipedia.org article."; + $this->update = "2014-05-25"; + + } + + public function collectData(array $param){ + $html = ''; + $host = 'http://eo.wikipedia.org'; + // If you want HTTPS access instead, uncomment the following line: + //$host = 'https://eo.wikipedia.org'; + $link = '/wiki/Vikipedio:%C4%88efpa%C4%9Do'; + + $html = $this->getSimpleHTMLDOM($host.$link) or $this->returnError('Could not request Wikipedia EO.', 404); + + $element = $html->find('div[id=mf-tfa]', 0); + // Link to article + $link = $element->find('p', -2)->find('a', 0); + $item = new \Item(); + $item->uri = $host.$link->href; + $item->title = $link->title; + $item->content = str_replace('href="/', 'href="'.$host.'/', $element->innertext); + $this->items[] = $item; + } + + public function getName(){ + return 'Wikipedia EO "Artikolo de la semajno"'; + } + + public function getURI(){ + return 'https://eo.wikipedia.org/wiki/Vikipedio:%C4%88efpa%C4%9Do'; + } + + public function getCacheDuration(){ + return 3600*12; // 12 hours + } +} diff --git a/bridges/WikipediaFRBridge.php b/bridges/WikipediaFRBridge.php new file mode 100644 index 0000000..8ee65cd --- /dev/null +++ b/bridges/WikipediaFRBridge.php @@ -0,0 +1,46 @@ +maintainer = "gsurrel"; + $this->name = "Wikipedia FR 'Lumière sur...'"; + $this->uri = "https://fr.wikipedia.org/"; + $this->description = "Returns the highlighted fr.wikipedia.org article."; + $this->update = "2016-06-04"; + + } + + public function collectData(array $param){ + $html = ''; + $host = 'http://fr.wikipedia.org'; + // If you want HTTPS access instead, uncomment the following line: + //$host = 'https://fr.wikipedia.org'; + $link = '/wiki/Wikip%C3%A9dia:Accueil_principal'; + + $html = $this->getSimpleHTMLDOM($host.$link) or $this->returnError('Could not request Wikipedia FR.', 404); + + $element = $html->find('div[id=mf-lumieresur]', 0); + # Use the "Lire la suite" link to dependably get the title of the article + # usually it's a child of a li.BA element (Bon article) + # occasionally it's a li.AdQ (Article de qualité) + $lirelasuite_link = $element->find('.BA > i > a, .AdQ > i > a', 0); + $item = new \Item(); + $item->uri = $host.$lirelasuite_link->href; + $item->title = $lirelasuite_link->title; + $item->content = str_replace('href="/', 'href="'.$host.'/', $element->innertext); + $this->items[] = $item; + } + + public function getName(){ + return 'Wikipedia FR "Lumière sur..."'; + } + + public function getURI(){ + return 'https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Accueil_principal'; + } + + public function getCacheDuration(){ + return 3600*4; // 4 hours + } +} diff --git a/bridges/WordPressBridge.php b/bridges/WordPressBridge.php index f848101..ef728e5 100644 --- a/bridges/WordPressBridge.php +++ b/bridges/WordPressBridge.php @@ -33,7 +33,7 @@ class WordPressBridge extends BridgeAbstract { return WORDPRESS_TYPE_ATOM; // Make ATOM default } - // Replaces all 'link' tags with 'url' for simplehtmldom to actually find 'links' ('url') + // Replaces all 'link' tags with 'url' for simplehtmldom to actually find 'links' ('url') private function ReplaceLinkTagsWithUrlTags($element){ // We need to fix the 'link' tag as simplehtmldom cannot parse it (just rename it and load back as dom) $element_text = $element->outertext; @@ -64,7 +64,7 @@ class WordPressBridge extends BridgeAbstract { } $this->url = $this->url.'/feed/atom'; - $html = $this->file_get_html($this->url) or $this->returnServerError("Could not request {$this->url}."); + $html = $this->getSimpleHTMLDOM($this->url) or $this->returnServerError("Could not request {$this->url}."); // Notice: We requested an ATOM feed, however some sites return RSS feeds instead! $type = $this->DetectContentType($html); @@ -97,7 +97,7 @@ class WordPressBridge extends BridgeAbstract { $item->timestamp = strtotime($article->find('updated', 0)->innertext); } - $article_html = $this->file_get_html($item->uri); + $article_html = $this->getSimpleHTMLDOM($item->uri); // Attempt to find most common content div if(empty($item->content)){ @@ -126,7 +126,7 @@ class WordPressBridge extends BridgeAbstract { $this->items[] = $item; $i++; } - } + } } else { $this->returnServerError("Sorry, {$this->url} doesn't seem to be a Wordpress blog."); } diff --git a/bridges/WorldOfTanks.php b/bridges/WorldOfTanks.php index 77fa7b1..fcfc57b 100644 --- a/bridges/WorldOfTanks.php +++ b/bridges/WorldOfTanks.php @@ -70,15 +70,15 @@ class WorldOfTanks extends HttpCachingBridgeAbstract{ } else { $this->uri = WORLD_OF_TANKS.$this->lang.NEWS.'pc-browser/'.$param['category']."/"; } - $html = $this->file_get_html($this->getURI()) or $this->returnServerError('Could not request '.$this->getURI()); + $html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Could not request '.$this->getURI()); $this->message("loaded HTML from ".$this->getURI()); - // customize name + // customize name $this->name = $html->find('title', 0)->innertext; foreach($html->find('.b-imgblock_ico') as $infoLink) { $this->parseLine($infoLink); } } - + private function parseLine($infoLink) { $item = new Item(); $item->uri = WORLD_OF_TANKS.$infoLink->href; diff --git a/bridges/XbooruBridge.php b/bridges/XbooruBridge.php index 691b46c..4becacc 100644 --- a/bridges/XbooruBridge.php +++ b/bridges/XbooruBridge.php @@ -25,27 +25,27 @@ class XbooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 50; } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://xbooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Xbooru.'); + $html = $this->getSimpleHTMLDOM("http://xbooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Xbooru.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://xbooru.com/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Xbooru | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/YandereBridge.php b/bridges/YandereBridge.php index 223c941..39afb25 100644 --- a/bridges/YandereBridge.php +++ b/bridges/YandereBridge.php @@ -25,18 +25,18 @@ class YandereBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 1; $tags = ''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("https://yande.re/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Yandere.'); + $html = $this->getSimpleHTMLDOM("https://yande.re/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Yandere.'); $input_json = explode('Post.register(', $html); foreach($input_json as $element) $data[] = preg_replace('/}\)(.*)/', '}', $element); unset($data[0]); - + foreach($data as $datai) { $json = json_decode($datai, TRUE); $item = new \Item(); @@ -45,7 +45,7 @@ class YandereBridge extends BridgeAbstract{ $item->timestamp = $json['created_at']; $item->imageUri = $json['file_url']; $item->title = 'Yandere | '.$json['id']; - $item->content = '
Tags: '.$json['tags']; + $item->content = '
Tags: '.$json['tags']; $this->items[] = $item; } } diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 02ded71..52be476 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -1,6 +1,6 @@ file_get_html($this->uri."watch?v=$vid"); + $html = $this->getSimpleHTMLDOM($this->uri."watch?v=$vid"); $author = $html->innertext; $author = substr($author, strpos($author, '"author=') + 8); $author = substr($author, 0, strpos($author, '\u0026')); @@ -138,9 +138,9 @@ class YoutubeBridge extends BridgeAbstract { $url_listing = $this->uri.'channel/'.urlencode($this->request).'/videos'; } if (!empty($url_feed) && !empty($url_listing)) { - if ($xml = $this->file_get_html($url_feed)) { + if ($xml = $this->getSimpleHTMLDOM($url_feed)) { $this->ytBridgeParseXmlFeed($xml); - } else if ($html = $this->file_get_html($url_listing)) { + } else if ($html = $this->getSimpleHTMLDOM($url_listing)) { $this->ytBridgeParseHtmlListing($html, 'li.channels-content-item', 'h3'); } else $this->returnServerError("Could not request YouTube. Tried:\n - $url_feed\n - $url_listing"); } @@ -148,15 +148,15 @@ class YoutubeBridge extends BridgeAbstract { else if (isset($param['p'])) { /* playlist mode */ $this->request = $param['p']; $url_listing = $this->uri.'playlist?list='.urlencode($this->request); - $html = $this->file_get_html($url_listing) or $this->returnServerError("Could not request YouTube. Tried:\n - $url_listing"); + $html = $this->getSimpleHTMLDOM($url_listing) or $this->returnServerError("Could not request YouTube. Tried:\n - $url_listing"); $this->ytBridgeParseHtmlListing($html, 'tr.pl-video', '.pl-video-title a'); $this->request = 'Playlist: '.str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); } else if (isset($param['s'])) { /* search mode */ - $this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']); + $this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']); $url_listing = $this->uri.'results?search_query='.urlencode($this->request).'&page='.$page.'&filters=video&search_sort=video_date_uploaded'; - $html = $this->file_get_html($url_listing) or $this->returnServerError("Could not request YouTube. Tried:\n - $url_listing"); + $html = $this->getSimpleHTMLDOM($url_listing) or $this->returnServerError("Could not request YouTube. Tried:\n - $url_listing"); $this->ytBridgeParseHtmlListing($html, 'div.yt-lockup', 'h3'); $this->request = 'Search: '.str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); } diff --git a/bridges/ZatazBridge.php b/bridges/ZatazBridge.php index 0c381dc..d0a53b2 100644 --- a/bridges/ZatazBridge.php +++ b/bridges/ZatazBridge.php @@ -12,7 +12,7 @@ class ZatazBridge extends BridgeAbstract { } public function collectData(array $param) { - $html = $this->file_get_html($this->uri) or $this->returnServerError('Could not request ' . $this->uri); + $html = $this->getSimpleHTMLDOM($this->uri) or $this->returnServerError('Could not request ' . $this->uri); $recent_posts = $html->find('#recent-posts-3', 0)->find('ul', 0)->find('li'); foreach ($recent_posts as $article) { @@ -24,7 +24,7 @@ class ZatazBridge extends BridgeAbstract { } private function getDetails($uri) { - $html = $this->file_get_html($uri) or exit; + $html = $this->getSimpleHTMLDOM($uri) or exit; $item = new \Item(); diff --git a/bridges/ZoneTelechargementBridge.php b/bridges/ZoneTelechargementBridge.php index d6e6714..7d4b0be 100644 --- a/bridges/ZoneTelechargementBridge.php +++ b/bridges/ZoneTelechargementBridge.php @@ -31,7 +31,7 @@ class ZoneTelechargementBridge extends BridgeAbstract { $category = '/'.$param['category'].'/'; $url = $this->getURI().$category.'rss.xml'; - $html = $this->file_get_html($url) or $this->returnServerError('Could not request Zone Telechargement: '.$url); + $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request Zone Telechargement: '.$url); foreach($html->find('item') as $element) { $item = new \Item();