From 3dd63ab6fd99a74f2d84a9fd1eefdde039510b3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Sun, 3 Jul 2016 23:11:28 +0200 Subject: [PATCH 01/14] enable proxy usage for individual bridge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- index.php | 6 +++++- lib/Bridge.php | 3 ++- lib/HTMLUtils.php | 22 +++++++++++++++++----- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/index.php b/index.php index 9440625..b26741c 100644 --- a/index.php +++ b/index.php @@ -100,7 +100,7 @@ try{ // whitelist control if(!Bridge::isWhitelisted($whitelist_selection, $bridge)) { throw new \HttpException('This bridge is not whitelisted', 401); - die; + die; } $cache = Cache::create('FileCache'); @@ -111,6 +111,10 @@ try{ } else { $bridge->setCache($cache); // just add disable cache to your query to disable caching } + if(isset($_REQUEST['_p'])){ + $bridge->useProxy=true; + } + $bridge->setDatas($_REQUEST); $bridge->loadMetadatas(); $bridge->setDatas($_REQUEST); // Data transformation diff --git a/lib/Bridge.php b/lib/Bridge.php index a8732e0..2af6c15 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -21,6 +21,7 @@ abstract class BridgeAbstract implements BridgeInterface{ public $uri = ""; public $description = 'No description provided'; public $maintainer = 'No maintainer'; + public $useProxy = false; public $parameters = array(); /** @@ -111,7 +112,7 @@ abstract class BridgeAbstract implements BridgeInterface{ ), ); - if(defined('PROXY_URL')) { + if(defined('PROXY_URL') && $this->useProxy) { $contextOptions['http']['proxy'] = PROXY_URL; $contextOptions['http']['request_fulluri'] = true; diff --git a/lib/HTMLUtils.php b/lib/HTMLUtils.php index ade0ca4..7547dfc 100644 --- a/lib/HTMLUtils.php +++ b/lib/HTMLUtils.php @@ -28,6 +28,12 @@ CARD; $card .= HTMLUtils::getFormHeader($bridgeName); if ($isActive){ + if(defined('PROXY_URL')){ + $idArg = 'arg-' . urlencode($bridgeName) . '-' . urlencode('proxy') . '-' . urlencode('_p'); + $card .= '' . PHP_EOL; + $card .= '
' . PHP_EOL; + } + $card .= HTMLUtils::getHelperButtonsFormat($formats); } else { $card .= 'Inactive'; @@ -40,13 +46,13 @@ CARD; if($hasGlobalParameter) $globalParameters = json_decode($bridgeElement->parameters['global'], true); - + foreach($bridgeElement->parameters as $parameterName => $parameter){ $parameter = json_decode($parameter, true); if(!is_numeric($parameterName) && $parameterName == 'global') continue; - + if($hasGlobalParameter) $parameter = array_merge($parameter, $globalParameters); @@ -82,7 +88,7 @@ CARD; $card .= '
' . PHP_EOL; } else if($inputEntry['type'] == 'list') { $card .= '' . PHP_EOL; + $card .= '
' . PHP_EOL; + } + $card .= HTMLUtils::getHelperButtonsFormat($formats); } else { $card .= 'Inactive'; } - + $card .= '' . PHP_EOL; } @@ -161,7 +173,7 @@ class HTMLSanitizer { $element->outertext = ''; } else { foreach($element->getAllAttributes() as $attributeName => $attribute) { - if(!in_array($attributeName, $this->keptAttributes)) + if(!in_array($attributeName, $this->keptAttributes)) $element->removeAttribute($attributeName); } } From 09d4e21bdcc88d060d47d02218efb34d03622bce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Sun, 3 Jul 2016 23:18:30 +0200 Subject: [PATCH 02/14] possibility to hide proxy address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- index.php | 2 ++ lib/HTMLUtils.php | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/index.php b/index.php index b26741c..b21cd4c 100644 --- a/index.php +++ b/index.php @@ -11,6 +11,8 @@ TODO : */ //define('PROXY_URL', 'tcp://192.168.0.0:28'); +// keep PROXY_NAME empty to display PROXY_URL instead +define('PROXY_NAME','Hidden Proxy Name'); date_default_timezone_set('UTC'); error_reporting(0); diff --git a/lib/HTMLUtils.php b/lib/HTMLUtils.php index 7547dfc..7cf1ec3 100644 --- a/lib/HTMLUtils.php +++ b/lib/HTMLUtils.php @@ -31,7 +31,7 @@ CARD; if(defined('PROXY_URL')){ $idArg = 'arg-' . urlencode($bridgeName) . '-' . urlencode('proxy') . '-' . urlencode('_p'); $card .= '' . PHP_EOL; - $card .= '
' . PHP_EOL; + $card .= '
' . PHP_EOL; } $card .= HTMLUtils::getHelperButtonsFormat($formats); @@ -109,7 +109,7 @@ CARD; if(defined('PROXY_URL')){ $idArg = 'arg-' . urlencode($bridgeName) . '-' . urlencode('proxy') . '-' . urlencode('_p'); $card .= '' . PHP_EOL; - $card .= '
' . PHP_EOL; + $card .= '
' . PHP_EOL; } $card .= HTMLUtils::getHelperButtonsFormat($formats); From d71674d2ba1fa613f42cd1c168bdbba4601505d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Sun, 3 Jul 2016 23:49:43 +0200 Subject: [PATCH 03/14] proxy settings must be the service provider decision MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit therefore, the provider decides if the service use a proxy or not, and if users can have the possibility to disable it on a bridge basis. Signed-off-by: Pierre Mazière --- index.php | 8 ++++++-- lib/Bridge.php | 2 +- lib/HTMLUtils.php | 16 ++++++++-------- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/index.php b/index.php index b21cd4c..778a4b2 100644 --- a/index.php +++ b/index.php @@ -11,6 +11,8 @@ TODO : */ //define('PROXY_URL', 'tcp://192.168.0.0:28'); +// Set to true if you allow users to disable proxy usage for specific bridges +define('PROXY_BYBRIDGE',false); // keep PROXY_NAME empty to display PROXY_URL instead define('PROXY_NAME','Hidden Proxy Name'); @@ -113,8 +115,10 @@ try{ } else { $bridge->setCache($cache); // just add disable cache to your query to disable caching } - if(isset($_REQUEST['_p'])){ - $bridge->useProxy=true; + if(defined('PROXY_URL') && PROXY_BYBRIDGE && + isset($_REQUEST['_noproxy']) + ){ + $bridge->useProxy=false; } $bridge->setDatas($_REQUEST); $bridge->loadMetadatas(); diff --git a/lib/Bridge.php b/lib/Bridge.php index 2af6c15..664fc93 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -21,7 +21,7 @@ abstract class BridgeAbstract implements BridgeInterface{ public $uri = ""; public $description = 'No description provided'; public $maintainer = 'No maintainer'; - public $useProxy = false; + public $useProxy = true; public $parameters = array(); /** diff --git a/lib/HTMLUtils.php b/lib/HTMLUtils.php index 7cf1ec3..1ab7799 100644 --- a/lib/HTMLUtils.php +++ b/lib/HTMLUtils.php @@ -28,10 +28,10 @@ CARD; $card .= HTMLUtils::getFormHeader($bridgeName); if ($isActive){ - if(defined('PROXY_URL')){ - $idArg = 'arg-' . urlencode($bridgeName) . '-' . urlencode('proxy') . '-' . urlencode('_p'); - $card .= '' . PHP_EOL; - $card .= '
' . PHP_EOL; + if(defined('PROXY_URL') && PROXY_BYBRIDGE){ + $idArg = 'arg-' . urlencode($bridgeName) . '-' . urlencode('proxyoff') . '-' . urlencode('_noproxy'); + $card .= '' . PHP_EOL; + $card .= '
' . PHP_EOL; } $card .= HTMLUtils::getHelperButtonsFormat($formats); @@ -106,10 +106,10 @@ CARD; } if ($isActive){ - if(defined('PROXY_URL')){ - $idArg = 'arg-' . urlencode($bridgeName) . '-' . urlencode('proxy') . '-' . urlencode('_p'); - $card .= '' . PHP_EOL; - $card .= '
' . PHP_EOL; + if(defined('PROXY_URL') && PROXY_BYBRIDGE){ + $idArg = 'arg-' . urlencode($bridgeName) . '-' . urlencode('proxyoff') . '-' . urlencode('_noproxy'); + $card .= '' . PHP_EOL; + $card .= '
' . PHP_EOL; } $card .= HTMLUtils::getHelperButtonsFormat($formats); From f43bbda83e9328832915f017f7a8cf83fcb2f50a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Fri, 8 Jul 2016 19:05:01 +0200 Subject: [PATCH 04/14] core: split and rename BridgeAbstract::file_get_html MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to have one method to get the data, potentially using the proxy if defined, and one method to get the Simple DOM HTML object from these data, with a more informative name Signed-off-by: Pierre Mazière --- lib/Bridge.php | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index 664fc93..2daca2a 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -105,7 +105,7 @@ abstract class BridgeAbstract implements BridgeInterface{ return $this; } - protected function file_get_html($url, $use_include_path = false, $context=null, $offset = -1, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT){ + protected function getContents($url,$use_include_path=false,$context=null,$offset=0,$maxlen=null){ $contextOptions = array( 'http' => array( 'user_agent'=>ini_get('user_agent') @@ -125,9 +125,22 @@ abstract class BridgeAbstract implements BridgeInterface{ }; } } - return file_get_html($url,$use_include_path,$context,$offset,$maxLen, - $lowercase,$forceTagsClosed,$target_charset,$stripRN,$defaultBRText, - $defaultSpanText); + + if(is_null($maxlen)){ + $content=@file_get_contents($url, $use_include_path, $context, $offset); + }else{ + $content=@file_get_contents($url, $use_include_path, $context, $offset,$maxlen); + } + + if($content===false){ + $this->message('Cant\'t download '.$url ); + } + return $content; + } + + protected function getSimpleHTMLDOM($url, $use_include_path = false, $context=null, $offset = 0, $maxLen=null, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT){ + $content=$this->getContents($url,$use_include_path,$context,$offset,$maxLen); + return str_get_html($content,$lowercase,$forceTagsClosed,$target_charset,$stripRN,$defaultBRText,$defaultSpanText); } } From 3c0d13c1bb72a87f0d671692e205f45f04d94eba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Fri, 8 Jul 2016 19:06:35 +0200 Subject: [PATCH 05/14] bridges: use BridgeAbstract::getSimpleHTMLDOM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit instead of BridgeAbstract::file_get_html Signed-off-by: Pierre Mazière --- bridges/ABCTabsBridge.php | 4 +- bridges/AcrimedBridge.php | 2 +- bridges/AllocineFRBridge.php | 8 +-- bridges/AnimeUltimeBridge.php | 4 +- bridges/BandcampBridge.php | 2 +- bridges/BastaBridge.php | 8 +-- bridges/BlaguesDeMerdeBridge.php | 4 +- bridges/BooruprojectBridge.php | 14 ++--- bridges/CADBridge.php | 7 ++- bridges/CNETBridge.php | 4 +- bridges/CoinDeskBridge.php | 56 ++++++++++++++++++++ bridges/CollegeDeFranceBridge.php | 2 +- bridges/CommonDreamsBridge.php | 4 +- bridges/CopieDoubleBridge.php | 8 +-- bridges/CourrierInternationalBridge.php | 34 ++++++------ bridges/CpasbienBridge.php | 8 +-- bridges/CryptomeBridge.php | 2 +- bridges/DailymotionBridge.php | 12 ++--- bridges/DanbooruBridge.php | 14 ++--- bridges/DansTonChatBridge.php | 2 +- bridges/DauphineLibereBridge.php | 8 +-- bridges/DeveloppezDotComBridge.php | 6 +-- bridges/DilbertBridge.php | 2 +- bridges/DollbooruBridge.php | 14 ++--- bridges/DuckDuckGoBridge.php | 2 +- bridges/EZTVBridge.php | 4 +- bridges/EliteDangerousGalnetBridge.php | 4 +- bridges/ElsevierBridge.php | 6 +-- bridges/FSBridge.php | 56 ++++++++++++++++++++ bridges/FacebookBridge.php | 6 +-- bridges/FierPandaBridge.php | 2 +- bridges/FlickrExploreBridge.php | 2 +- bridges/FlickrTagBridge.php | 8 +-- bridges/FootitoBridge.php | 16 +++--- bridges/FourchanBridge.php | 4 +- bridges/FrandroidBridge.php | 69 +++++++++++++++++++++++++ bridges/FuturaSciencesBridge.php | 4 +- bridges/GBAtempBridge.php | 6 +-- bridges/GelbooruBridge.php | 14 ++--- bridges/GiphyBridge.php | 22 ++++---- bridges/GithubIssueBridge.php | 2 +- bridges/GitlabCommitsBridge.php | 2 +- bridges/GizmodoFRBridge.php | 4 +- bridges/GooglePlusPostBridge.php | 4 +- bridges/GoogleSearchBridge.php | 4 +- bridges/GuruMedBridge.php | 2 +- bridges/HDWallpapersBridge.php | 2 +- bridges/HentaiHavenBridge.php | 2 +- bridges/IdenticaBridge.php | 2 +- bridges/InstagramBridge.php | 18 +++---- bridges/JapanExpoBridge.php | 4 +- bridges/KonachanBridge.php | 14 ++--- bridges/KoreusBridge.php | 4 +- bridges/LeBonCoinBridge.php | 24 ++++----- bridges/LeJournalDuGeekBridge.php | 4 +- bridges/LeMondeInformatiqueBridge.php | 4 +- bridges/LeMotDuJourBridge.php | 55 ++++++++++++++++++++ bridges/LesJoiesDuCodeBridge.php | 16 +++--- bridges/LichessBridge.php | 4 +- bridges/LinkedInCompany.php | 2 +- bridges/LolibooruBridge.php | 14 ++--- bridges/MalikiBridge.php | 60 +++++++++++++++++++++ bridges/MemoLinuxBridge.php | 58 +++++++++++++++++++++ bridges/MilbooruBridge.php | 14 ++--- bridges/MondeDiploBridge.php | 4 +- bridges/MsnMondeBridge.php | 4 +- bridges/MspabooruBridge.php | 14 ++--- bridges/NakedSecurityBridge.php | 6 +-- bridges/NasaApodBridge.php | 4 +- bridges/NeuviemeArtBridge.php | 4 +- bridges/NextInpactBridge.php | 4 +- bridges/NiceMatinBridge.php | 8 +-- bridges/NovelUpdatesBridge.php | 2 +- bridges/NumeramaBridge.php | 1 - bridges/OpenClassroomsBridge.php | 4 +- bridges/OpenTheoryBridge.php | 56 ++++++++++++++++++++ bridges/ParuVenduImmoBridge.php | 18 +++---- bridges/PickyWallpapersBridge.php | 2 +- bridges/PinterestBridge.php | 36 ++++++------- bridges/PlanetLibreBridge.php | 4 +- bridges/ProjectMGameBridge.php | 2 +- bridges/RTBFBridge.php | 2 +- bridges/RaymondBridge.php | 53 +++++++++++++++++++ bridges/Releases3DSBridge.php | 2 +- bridges/ReporterreBridge.php | 4 +- bridges/Rue89Bridge.php | 2 +- bridges/Rule34Bridge.php | 14 ++--- bridges/Rule34pahealBridge.php | 14 ++--- bridges/SafebooruBridge.php | 14 ++--- bridges/SakugabooruBridge.php | 14 ++--- bridges/ScilogsBridge.php | 54 +++++++++++++++++++ bridges/ScmbBridge.php | 14 ++--- bridges/ScoopItBridge.php | 6 +-- bridges/SegfaultMintBridge.php | 55 ++++++++++++++++++++ bridges/Sexactu.php | 14 ++--- bridges/SiliconBridge.php | 4 +- bridges/StripeAPIChangeLogBridge.php | 2 +- bridges/SuperbWallpapersBridge.php | 4 +- bridges/T411Bridge.php | 4 +- bridges/TagBoardBridge.php | 6 +-- bridges/TbibBridge.php | 14 ++--- bridges/TheCodingLoveBridge.php | 18 +++---- bridges/ThePirateBayBridge.php | 4 +- bridges/TuxboardBridge.php | 64 +++++++++++++++++++++++ bridges/TwitterBridge.php | 20 +++---- bridges/UnsplashBridge.php | 2 +- bridges/ViadeoCompany.php | 2 +- bridges/VineBridge.php | 2 +- bridges/WallpaperStopBridge.php | 2 +- bridges/WhydBridge.php | 8 +-- bridges/WikipediaDEBridge.php | 48 +++++++++++++++++ bridges/WikipediaENBridge.php | 44 ++++++++++++++++ bridges/WikipediaEOBridge.php | 44 ++++++++++++++++ bridges/WikipediaFRBridge.php | 46 +++++++++++++++++ bridges/WordPressBridge.php | 8 +-- bridges/WorldOfTanks.php | 6 +-- bridges/XbooruBridge.php | 14 ++--- bridges/YandereBridge.php | 14 ++--- bridges/YoutubeBridge.php | 14 ++--- bridges/ZatazBridge.php | 4 +- bridges/ZoneTelechargementBridge.php | 2 +- 121 files changed, 1212 insertions(+), 396 deletions(-) create mode 100644 bridges/CoinDeskBridge.php create mode 100644 bridges/FSBridge.php create mode 100644 bridges/FrandroidBridge.php create mode 100644 bridges/LeMotDuJourBridge.php create mode 100644 bridges/MalikiBridge.php create mode 100644 bridges/MemoLinuxBridge.php create mode 100644 bridges/OpenTheoryBridge.php create mode 100644 bridges/RaymondBridge.php create mode 100644 bridges/ScilogsBridge.php create mode 100644 bridges/SegfaultMintBridge.php create mode 100644 bridges/TuxboardBridge.php create mode 100644 bridges/WikipediaDEBridge.php create mode 100644 bridges/WikipediaENBridge.php create mode 100644 bridges/WikipediaEOBridge.php create mode 100644 bridges/WikipediaFRBridge.php diff --git a/bridges/ABCTabsBridge.php b/bridges/ABCTabsBridge.php index 0c9bc68..4fd2487 100644 --- a/bridges/ABCTabsBridge.php +++ b/bridges/ABCTabsBridge.php @@ -15,9 +15,9 @@ class ABCTabsBridge extends BridgeAbstract{ public function collectData(array $param){ $html = ''; - $html = $this->file_get_html('http://www.abc-tabs.com/tablatures/nouveautes.html') or $this->returnClientError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('http://www.abc-tabs.com/tablatures/nouveautes.html') or $this->returnClientError('No results for this query.'); $table = $html->find('table#myTable', 0)->children(1); - + foreach ($table->find('tr') as $tab) { $item = new \Item(); diff --git a/bridges/AcrimedBridge.php b/bridges/AcrimedBridge.php index 85e93fe..fce0ce1 100644 --- a/bridges/AcrimedBridge.php +++ b/bridges/AcrimedBridge.php @@ -29,7 +29,7 @@ class AcrimedBridge extends RssExpander{ $item->title = trim($newsItem->title); $item->timestamp = strtotime($dc->date); - $articlePage = $this->file_get_html($newsItem->link); + $articlePage = $this->getSimpleHTMLDOM($newsItem->link); $article = $hs->sanitize($articlePage->find('article.article1', 0)->innertext); $article = HTMLSanitizer::defaultImageSrcTo($article, "http://www.acrimed.org/"); diff --git a/bridges/AllocineFRBridge.php b/bridges/AllocineFRBridge.php index ef01746..baaf4a1 100644 --- a/bridges/AllocineFRBridge.php +++ b/bridges/AllocineFRBridge.php @@ -9,7 +9,7 @@ class AllocineFRBridge extends BridgeAbstract{ $this->description = "Bridge for allocine.fr"; $this->update = '2016-08-17'; - $this->parameters[] = + $this->parameters[] = '[ { "name" : "category", @@ -18,7 +18,7 @@ class AllocineFRBridge extends BridgeAbstract{ "required" : true, "exampleValue" : "Faux Raccord", "title" : "Select your category", - "values" : + "values" : [ { "name" : "Faux Raccord", @@ -64,12 +64,12 @@ class AllocineFRBridge extends BridgeAbstract{ // Update bridge name to match selection $this->name .= ' : ' . $category; - $html = $this->file_get_html($this->uri) or $this->returnServerError("Could not request {$this->uri}!"); + $html = $this->getSimpleTMLOM($this->uri) or $this->returnServerError("Could not request {$this->uri}!"); foreach($html->find('figure.media-meta-fig') as $element) { $item = new Item(); - + $title = $element->find('div.titlebar h3.title a', 0); $content = trim($element->innertext); $figCaption = strpos($content, $category); diff --git a/bridges/AnimeUltimeBridge.php b/bridges/AnimeUltimeBridge.php index 0f305e3..7eae61c 100644 --- a/bridges/AnimeUltimeBridge.php +++ b/bridges/AnimeUltimeBridge.php @@ -67,7 +67,7 @@ class AnimeUltimeBridge extends BridgeAbstract { //Retrive page contents $website = 'http://www.anime-ultime.net/'; $url = $website.'history-0-1/'.$requestFilter; - $html = $this->file_get_html($url) or $this->returnServerError('Could not request Anime-Ultime: '.$url); + $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request Anime-Ultime: '.$url); //Relases are sorted by day : process each day individually foreach ($html->find('div.history', 0)->find('h3') as $daySection) { @@ -110,7 +110,7 @@ class AnimeUltimeBridge extends BridgeAbstract { $item->content = $item_description; $this->items[] = $item; $processedOK++; - + //Stop processing once limit is reached if ($processedOK >= 10) return; diff --git a/bridges/BandcampBridge.php b/bridges/BandcampBridge.php index 1e6c2dc..71bf36f 100644 --- a/bridges/BandcampBridge.php +++ b/bridges/BandcampBridge.php @@ -26,7 +26,7 @@ class BandcampBridge extends BridgeAbstract{ $html = ''; if (isset($param['tag'])) { $this->request = $param['tag']; - $html = $this->file_get_html('http://bandcamp.com/tag/'.urlencode($this->request).'?sort_field=date') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('http://bandcamp.com/tag/'.urlencode($this->request).'?sort_field=date') or $this->returnServerError('No results for this query.'); } else { $this->returnClientError('You must specify tag (/tag/...)'); diff --git a/bridges/BastaBridge.php b/bridges/BastaBridge.php index 35c9ae4..6af218f 100644 --- a/bridges/BastaBridge.php +++ b/bridges/BastaBridge.php @@ -7,14 +7,14 @@ class BastaBridge extends BridgeAbstract{ $this->description = "Returns the newest articles."; $this->update = '2016-08-17'; } - + public function collectData(array $param){ // Replaces all relative image URLs by absolute URLs. Relative URLs always start with 'local/'! function ReplaceImageUrl($content){ return preg_replace('/src=["\']{1}([^"\']+)/ims', 'src=\'http://www.bastamag.net/$1\'', $content); } - - $html = $this->file_get_html('http://www.bastamag.net/spip.php?page=backend') or $this->returnServerError('Could not request Bastamag.'); + + $html = $this->getSimpleHTMLDOM('http://www.bastamag.net/spip.php?page=backend') or $this->returnServerError('Could not request Bastamag.'); $limit = 0; foreach($html->find('item') as $element) { @@ -34,4 +34,4 @@ class BastaBridge extends BridgeAbstract{ return 3600*2; // 2 hours } } -?> \ No newline at end of file +?> diff --git a/bridges/BlaguesDeMerdeBridge.php b/bridges/BlaguesDeMerdeBridge.php index 7fb5d45..be36c0c 100644 --- a/bridges/BlaguesDeMerdeBridge.php +++ b/bridges/BlaguesDeMerdeBridge.php @@ -12,8 +12,8 @@ class BlaguesDeMerdeBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://www.blaguesdemerde.fr/') or $this->returnServerError('Could not request BDM.'); - + $html = $this->getSimpleHTMLDOM('http://www.blaguesdemerde.fr/') or $this->returnServerError('Could not request BDM.'); + foreach($html->find('article.joke_contener') as $element) { $item = new Item(); $temp = $element->find('a'); diff --git a/bridges/BooruprojectBridge.php b/bridges/BooruprojectBridge.php index 17c1982..b1423db 100644 --- a/bridges/BooruprojectBridge.php +++ b/bridges/BooruprojectBridge.php @@ -32,29 +32,29 @@ class BooruprojectBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0; $tags = ''; - if (!empty($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (!empty($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 20; } - if (!empty($param['t'])) { - $tags = '&tags='.urlencode($param['t']); + if (!empty($param['t'])) { + $tags = '&tags='.urlencode($param['t']); } if (empty($param['i'])) { $this->returnServerError('Please enter a ***.booru.org instance.'); } - $html = $this->file_get_html("http://".$param['i'].".booru.org/index.php?page=post&s=list&pid=".$page.$tags) or $this->returnServerError('Could not request Booruproject.'); + $html = $this->getSimpleHTMLDOM("http://".$param['i'].".booru.org/index.php?page=post&s=list&pid=".$page.$tags) or $this->returnServerError('Could not request Booruproject.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://'.$param['i'].'.booru.org/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->find('a', 0)->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->find('a', 0)->getAttribute('id')); $item->timestamp = time(); $item->tags = $element->find('img', 0)->getAttribute('title'); $item->title = 'Booruproject '.$param['i'].' | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/CADBridge.php b/bridges/CADBridge.php index 17aa807..d62041a 100644 --- a/bridges/CADBridge.php +++ b/bridges/CADBridge.php @@ -9,7 +9,7 @@ class CADBridge extends BridgeAbstract{ } private function CADExtractContent($url) { - $html3 = $this->file_get_html($url); + $html3 = $this->getSimpleHTMLDOM($url); // The request might fail due to missing https support or wrong URL if($html3 == false) @@ -27,7 +27,6 @@ class CADBridge extends BridgeAbstract{ default: return 'Daily comic not released yet'; } - $img = implode ($url2[0]); $html3->clear(); unset ($html3); @@ -45,7 +44,7 @@ class CADBridge extends BridgeAbstract{ return $string; } - $html = $this->file_get_html('http://cdn2.cad-comic.com/rss.xml') or $this->returnServerError('Could not request CAD.'); + $html = $this->getSimpleHTMLDOM('http://cdn2.cad-comic.com/rss.xml') or $this->returnServerError('Could not request CAD.'); $limit = 0; foreach($html->find('item') as $element) { @@ -67,4 +66,4 @@ class CADBridge extends BridgeAbstract{ return 3600*2; // 2 hours } } -?> \ No newline at end of file +?> diff --git a/bridges/CNETBridge.php b/bridges/CNETBridge.php index 6fb4d93..7be650e 100644 --- a/bridges/CNETBridge.php +++ b/bridges/CNETBridge.php @@ -51,7 +51,7 @@ class CNETBridge extends BridgeAbstract { $this->topicName = $param['topic']; $pageUrl = 'http://www.cnet.com/'.(empty($this->topicName) ? '' : 'topics/'.$this->topicName.'/'); - $html = $this->file_get_html($pageUrl) or $this->returnServerError('Could not request CNET: '.$pageUrl); + $html = $this->getSimpleHTMLDOM($pageUrl) or $this->returnServerError('Could not request CNET: '.$pageUrl); $limit = 0; foreach($html->find('div.assetBody') as $element) { @@ -64,7 +64,7 @@ class CNETBridge extends BridgeAbstract { if (!empty($article_title) && !empty($article_uri) && strpos($article_uri, '/news/') !== false) { - $article_html = $this->file_get_html($article_uri) or $this->returnServerError('Could not request CNET: '.$article_uri); + $article_html = $this->getSimpleHTMLDOM($article_uri) or $this->returnServerError('Could not request CNET: '.$article_uri); $article_content = trim(CleanArticle(ExtractFromDelimiters($article_html, '
name = "CoinDesk"; + $this->uri = "http://www.coindesk.com/"; + $this->description = "Returns the 5 newest posts from CoinDesk (full text)"; + $this->update = "2014-05-30"; + + } + + public function collectData(array $param){ + + function CoinDeskStripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function CoinDeskExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.single-content', 0)->innertext; + $text = strip_tags($text, '

'); + return $text; + } + $html = $this->getSimpleHTMLDOM('http://www.coindesk.com/feed/atom/') or $this->returnError('Could not request CoinDesk.', 404); + $limit = 0; + + foreach($html->find('entry') as $element) { + if($limit < 5) { + $item = new \Item(); + $item->title = CoinDeskStripCDATA($element->find('title', 0)->innertext); + $item->author = $element->find('author', 0)->plaintext; + $item->uri = $element->find('link', 0)->href; + $item->timestamp = strtotime($element->find('published', 0)->plaintext); + $item->content = CoinDeskExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'CoinDesk'; + } + + public function getURI(){ + return 'http://www.coindesk.com/'; + } + + public function getCacheDuration(){ + return 1800; // 30min + } +} diff --git a/bridges/CollegeDeFranceBridge.php b/bridges/CollegeDeFranceBridge.php index e809d6b..1f79c62 100644 --- a/bridges/CollegeDeFranceBridge.php +++ b/bridges/CollegeDeFranceBridge.php @@ -33,7 +33,7 @@ class CollegeDeFranceBridge extends BridgeAbstract{ * * */ - $html = $this->file_get_html('http://www.college-de-france.fr/components/search-audiovideo.jsp?fulltext=&siteid=1156951719600&lang=FR&type=all') or $this->returnServerError('Could not request CollegeDeFrance.'); + $html = $this->getSimpleHTMLDOM('http://www.college-de-france.fr/components/search-audiovideo.jsp?fulltext=&siteid=1156951719600&lang=FR&type=all') or $this->returnServerError('Could not request CollegeDeFrance.'); foreach($html->find('a[data-target]') as $element) { $item = new \Item(); $item->title = $element->find('.title', 0)->plaintext; diff --git a/bridges/CommonDreamsBridge.php b/bridges/CommonDreamsBridge.php index 1177866..9de9b6d 100644 --- a/bridges/CommonDreamsBridge.php +++ b/bridges/CommonDreamsBridge.php @@ -10,7 +10,7 @@ class CommonDreamsBridge extends BridgeAbstract{ } private function CommonDreamsExtractContent($url) { - $html3 = $this->file_get_html($url); + $html3 = $this->getSimpleHTMLDOM($url); $text = $html3->find('div[class=field--type-text-with-summary]', 0)->innertext; $html3->clear(); unset ($html3); @@ -25,7 +25,7 @@ class CommonDreamsBridge extends BridgeAbstract{ return $string; } - $html = $this->file_get_html('http://www.commondreams.org/rss.xml') or $this->returnServerError('Could not request CommonDreams.'); + $html = $this->getSimpleHTMLDOM('http://www.commondreams.org/rss.xml') or $this->returnServerError('Could not request CommonDreams.'); $limit = 0; foreach($html->find('item') as $element) { if($limit < 4) { diff --git a/bridges/CopieDoubleBridge.php b/bridges/CopieDoubleBridge.php index cd3b4f6..cae9650 100644 --- a/bridges/CopieDoubleBridge.php +++ b/bridges/CopieDoubleBridge.php @@ -13,9 +13,9 @@ class CopieDoubleBridge extends BridgeAbstract{ public function collectData(array $param){ - $html = $this->file_get_html('http://www.copie-double.com/') or $this->returnServerError('Could not request CopieDouble.'); + $html = $this->getSimpleHTMLDOM('http://www.copie-double.com/') or $this->returnServerError('Could not request CopieDouble.'); $table = $html->find('table table', 2); - + foreach($table->find('tr') as $element) { $td = $element->find('td', 0); @@ -23,7 +23,7 @@ class CopieDoubleBridge extends BridgeAbstract{ if($td->class == "couleur_1") { $item = new Item(); - + $title = $td->innertext; $pos = strpos($title, "find("a", 0); $item->uri = "http://www.copie-double.com" . $a->href; - + $content = str_replace('src="/', 'src="http://www.copie-double.com/',$element->find("td", 0)->innertext); $content = str_replace('href="/', 'href="http://www.copie-double.com/',$content); $item->content = $content; diff --git a/bridges/CourrierInternationalBridge.php b/bridges/CourrierInternationalBridge.php index ab1a2c3..74ae314 100644 --- a/bridges/CourrierInternationalBridge.php +++ b/bridges/CourrierInternationalBridge.php @@ -12,49 +12,49 @@ class CourrierInternationalBridge extends BridgeAbstract{ } public function collectData(array $param){ - + $html = ''; - $html = $this->file_get_html('http://www.courrierinternational.com/') or $this->returnServerError('Error.'); - + $html = $this->getSimpleHTMLDOM('http://www.courrierinternational.com/') or $this->returnServerError('Error.'); + + - $element = $html->find("article"); - $article_count = 1; + $article_count = 1; foreach($element as $article) { - + $item = new \Item(); - + $item->uri = $article->parent->getAttribute("href"); if(strpos($item->uri, "http") === FALSE) { $item->uri = "http://courrierinternational.fr/".$item->uri; } - - $page = $this->file_get_html($item->uri); + + $page = $this->getSimpleHTMLDOM($item->uri); $cleaner = new HTMLSanitizer(); - + $item->content = $cleaner->sanitize($page->find("div.article-text")[0]); $item->title = strip_tags($article->find(".title")[0]); $dateTime = date_parse($page->find("time")[0]); $item->timestamp = mktime( - $dateTime['hour'], - $dateTime['minute'], - $dateTime['second'], - $dateTime['month'], - $dateTime['day'], + $dateTime['hour'], + $dateTime['minute'], + $dateTime['second'], + $dateTime['month'], + $dateTime['day'], $dateTime['year'] ); - + $this->items[] = $item; $article_count ++; if($article_count > 5) break; - + } diff --git a/bridges/CpasbienBridge.php b/bridges/CpasbienBridge.php index 93ae21a..801968c 100644 --- a/bridges/CpasbienBridge.php +++ b/bridges/CpasbienBridge.php @@ -15,7 +15,7 @@ function content_get_html($contents, $maxLen=-1, $lowercase = true, $forceTagsCl } class CpasbienBridge extends HttpCachingBridgeAbstract{ - + private $request; public function loadMetadatas() { @@ -44,7 +44,7 @@ class CpasbienBridge extends HttpCachingBridgeAbstract{ $html = ''; if (isset($param['q'])) { /* keyword search mode */ $this->request = str_replace(" ","-",trim($param['q'])); - $html = $this->file_get_html($this->uri.'/recherche/'.urlencode($this->request).'.html') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($this->uri.'/recherche/'.urlencode($this->request).'.html') or $this->returnServerError('No results for this query.'); } else { $this->returnClientError('You must specify a keyword (?q=...).'); @@ -53,7 +53,7 @@ class CpasbienBridge extends HttpCachingBridgeAbstract{ foreach ($html->find('#gauche',0)->find('div') as $episode) { if ($episode->getAttribute('class')=='ligne0' || $episode->getAttribute('class')=='ligne1') { - + $htmlepisode=content_get_html($this->get_cached($episode->find('a', 0)->getAttribute('href'))); $item = new \Item(); @@ -65,7 +65,7 @@ class CpasbienBridge extends HttpCachingBridgeAbstract{ $item->content = $textefiche->text(); } else { - $item->content = $htmlepisode->find('#textefiche', 0)->find('p',0)->text(); + $item->content = $htmlepisode->find('#textefiche', 0)->find('p',0)->text(); } $item->id = $episode->find('a', 0)->getAttribute('href'); diff --git a/bridges/CryptomeBridge.php b/bridges/CryptomeBridge.php index 68c8876..4947de3 100644 --- a/bridges/CryptomeBridge.php +++ b/bridges/CryptomeBridge.php @@ -28,7 +28,7 @@ class CryptomeBridge extends BridgeAbstract{ // If you want HTTPS access instead, uncomment the following line: //$link = 'https://secure.netsolhost.com/cryptome.org/'; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request Cryptome.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request Cryptome.'); if (!empty($param['n'])) { /* number of documents */ $num = min(max(1, $param['n']+0), $num); } diff --git a/bridges/DailymotionBridge.php b/bridges/DailymotionBridge.php index c782284..fccaec7 100644 --- a/bridges/DailymotionBridge.php +++ b/bridges/DailymotionBridge.php @@ -47,14 +47,14 @@ class DailymotionBridge extends BridgeAbstract{ function getMetadata($id) { $metadata=array(); - $html2 = file_get_html('http://www.dailymotion.com/video/'.$id) or $this->returnServerError('Could not request Dailymotion.'); + $html2 = $this->getSimpleHTMLDOM('http://www.dailymotion.com/video/'.$id) or $this->returnServerError('Could not request Dailymotion.'); $metadata['title'] = $html2->find('meta[property=og:title]', 0)->getAttribute('content'); $metadata['timestamp'] = strtotime($html2->find('meta[property=video:release_date]', 0)->getAttribute('content') ); $metadata['thumbnailUri'] = $html2->find('meta[property=og:image]', 0)->getAttribute('content'); $metadata['uri'] = $html2->find('meta[property=og:url]', 0)->getAttribute('content'); return $metadata; - } + } $html = ''; @@ -63,15 +63,15 @@ class DailymotionBridge extends BridgeAbstract{ if (isset($param['u'])) { // user timeline mode $this->request = $param['u']; - $html = $this->file_get_html('http://www.dailymotion.com/user/'.urlencode($this->request).'/1') or $this->returnServerError('Could not request Dailymotion.'); + $html = $this->getSimpleHTMLDOM('http://www.dailymotion.com/user/'.urlencode($this->request).'/1') or $this->returnServerError('Could not request Dailymotion.'); } else if (isset($param['p'])) { // playlist mode $this->request = strtok($param['p'], '_'); - $html = $this->file_get_html('http://www.dailymotion.com/playlist/'.urlencode($this->request).'') or $this->returnServerError('Could not request Dailymotion.'); + $html = $this->getSimpleHTMLDOM('http://www.dailymotion.com/playlist/'.urlencode($this->request).'') or $this->returnServerError('Could not request Dailymotion.'); } else if (isset($param['s'])) { // search mode - $this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']); - $html = $this->file_get_html('http://www.dailymotion.com/search/'.urlencode($this->request).'/'.$page.'') or $this->returnServerError('Could not request Dailymotion.'); + $this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']); + $html = $this->getSimpleHTMLDOM('http://www.dailymotion.com/search/'.urlencode($this->request).'/'.$page.'') or $this->returnServerError('Could not request Dailymotion.'); } else { $this->returnClientError('You must either specify a Dailymotion username (?u=...) or a playlist id (?p=...) or search (?s=...)'); diff --git a/bridges/DanbooruBridge.php b/bridges/DanbooruBridge.php index e819193..45ad029 100644 --- a/bridges/DanbooruBridge.php +++ b/bridges/DanbooruBridge.php @@ -25,23 +25,23 @@ class DanbooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 1;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://donmai.us/posts?&page=$page&tags=$tags") or $this->returnServerError('Could not request Danbooru.'); + $html = $this->getSimpleHTMLDOM("http://donmai.us/posts?&page=$page&tags=$tags") or $this->returnServerError('Could not request Danbooru.'); foreach($html->find('div[id=posts] article') as $element) { $item = new \Item(); $item->uri = 'http://donmai.us'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('data-id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('data-id')); $item->timestamp = time(); $thumbnailUri = 'http://donmai.us'.$element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Danbooru | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/DansTonChatBridge.php b/bridges/DansTonChatBridge.php index 748190f..41ee8ec 100644 --- a/bridges/DansTonChatBridge.php +++ b/bridges/DansTonChatBridge.php @@ -15,7 +15,7 @@ class DansTonChatBridge extends BridgeAbstract{ $html = ''; $link = 'http://danstonchat.com/latest.html'; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request DansTonChat.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request DansTonChat.'); foreach($html->find('div.item') as $element) { $item = new \Item(); diff --git a/bridges/DauphineLibereBridge.php b/bridges/DauphineLibereBridge.php index a91ade9..95bfe39 100644 --- a/bridges/DauphineLibereBridge.php +++ b/bridges/DauphineLibereBridge.php @@ -82,7 +82,7 @@ class DauphineLibereBridge extends BridgeAbstract { } private function ExtractContent($url, $context) { - $html2 = $this->file_get_html($url, false, $context); + $html2 = $this->getSimpleHTMLDOM($url,false,$context); $text = $html2->find('div.column', 0)->innertext; $text = preg_replace('@]*?>.*?@si', '', $text); return $text; @@ -102,10 +102,10 @@ class DauphineLibereBridge extends BridgeAbstract { if (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; - $html = $this->file_get_html('http://www.ledauphine.com/'.$this->request.'/rss', false, $context) or $this->returnServerError('Could not request DauphineLibere.'); + $html = $this->getSimpleHTMLDOM('http://www.ledauphine.com/'.$this->request.'/rss',false,$context) or $this->returnServerError('Could not request DauphineLibere.'); } else { - $html = $this->file_get_html('http://www.ledauphine.com/rss', false, $context) or $this->returnServerError('Could not request DauphineLibere.'); + $html = $this->getSimpleHTMLDOM('http://www.ledauphine.com/rss',false,$context) or $this->returnServerError('Could not request DauphineLibere.'); } $limit = 0; @@ -126,4 +126,4 @@ class DauphineLibereBridge extends BridgeAbstract { return 3600*2; // 2 hours } } -?> \ No newline at end of file +?> diff --git a/bridges/DeveloppezDotComBridge.php b/bridges/DeveloppezDotComBridge.php index 5fd7866..bde3f9a 100644 --- a/bridges/DeveloppezDotComBridge.php +++ b/bridges/DeveloppezDotComBridge.php @@ -15,7 +15,7 @@ class DeveloppezDotComBridge extends BridgeAbstract{ return $string; } - // F***ing quotes from Microsoft Word badly encoded, here was the trick: + // F***ing quotes from Microsoft Word badly encoded, here was the trick: // http://stackoverflow.com/questions/1262038/how-to-replace-microsoft-encoded-quotes-in-php private function convert_smart_quotes($string) { @@ -35,14 +35,14 @@ class DeveloppezDotComBridge extends BridgeAbstract{ } private function DeveloppezDotComExtractContent($url) { - $articleHTMLContent = $this->file_get_html($url); + $articleHTMLContent = $this->getSimpleHTMLDOM($url); $text = $this->convert_smart_quotes($articleHTMLContent->find('div.content', 0)->innertext); $text = utf8_encode($text); return trim($text); } public function collectData(array $param){ - $rssFeed = $this->file_get_html('http://www.developpez.com/index/rss') or $this->returnServerError('Could not request http://www.developpez.com/index/rss'); + $rssFeed = $this->getSimpleHTMLDOM('http://www.developpez.com/index/rss') or $this->returnServerError('Could not request http://www.developpez.com/index/rss'); $limit = 0; foreach($rssFeed->find('item') as $element) { diff --git a/bridges/DilbertBridge.php b/bridges/DilbertBridge.php index d94ce9a..7e50e8d 100644 --- a/bridges/DilbertBridge.php +++ b/bridges/DilbertBridge.php @@ -13,7 +13,7 @@ class DilbertBridge extends BridgeAbstract { public function collectData(array $param) { - $html = $this->file_get_html($this->getURI()) or $this->returnServerError('Could not request Dilbert: '.$this->getURI()); + $html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Could not request Dilbert: '.$this->getURI()); foreach ($html->find('section.comic-item') as $element) { diff --git a/bridges/DollbooruBridge.php b/bridges/DollbooruBridge.php index 76d8a40..7205585 100644 --- a/bridges/DollbooruBridge.php +++ b/bridges/DollbooruBridge.php @@ -26,25 +26,25 @@ class DollbooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://dollbooru.org/post/list/$tags/$page") or $this->returnServerError('Could not request Dollbooru.'); + $html = $this->getSimpleHTMLDOM("http://dollbooru.org/post/list/$tags/$page") or $this->returnServerError('Could not request Dollbooru.'); foreach($html->find('div[class=shm-image-list] a') as $element) { $item = new \Item(); $item->uri = 'http://dollbooru.org'.$element->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('data-post-id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('data-post-id')); $item->timestamp = time(); $thumbnailUri = 'http://dollbooru.org'.$element->find('img', 0)->src; $item->tags = $element->getAttribute('data-tags'); $item->title = 'Dollbooru | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/DuckDuckGoBridge.php b/bridges/DuckDuckGoBridge.php index f09b869..5398643 100644 --- a/bridges/DuckDuckGoBridge.php +++ b/bridges/DuckDuckGoBridge.php @@ -22,7 +22,7 @@ class DuckDuckGoBridge extends BridgeAbstract{ $html = ''; $link = 'http://duckduckgo.com/html/?q='.$param[u].'+sort:date'; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request DuckDuckGo.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request DuckDuckGo.'); foreach($html->find('div.results_links') as $element) { $item = new \Item(); diff --git a/bridges/EZTVBridge.php b/bridges/EZTVBridge.php index 07a3469..551c8d9 100644 --- a/bridges/EZTVBridge.php +++ b/bridges/EZTVBridge.php @@ -40,11 +40,11 @@ class EZTVBridge extends BridgeAbstract{ $this->returnClientError('You must provide a list of ID (?i=showID1,showID2,...)'); // Loop on show ids - $showList = explode(",",$param['i']); + $showList = explode(",",$param['i']); foreach($showList as $showID){ // Get show page - $html = $this->file_get_html('https://eztv.ch/shows/'.rawurlencode($showID).'/') or $this->returnServerError('Could not request EZTV for id "'.$showID.'"'); + $html = $this->getSimpleHTMLDOM('https://eztv.ch/shows/'.rawurlencode($showID).'/') or $this->returnServerError('Could not request EZTV for id "'.$showID.'"'); // Loop on each element that look like an episode entry... foreach($html->find('.forum_header_border') as $element) { diff --git a/bridges/EliteDangerousGalnetBridge.php b/bridges/EliteDangerousGalnetBridge.php index e519fc5..d1b4995 100644 --- a/bridges/EliteDangerousGalnetBridge.php +++ b/bridges/EliteDangerousGalnetBridge.php @@ -13,7 +13,7 @@ class EliteDangerousGalnetBridge extends BridgeAbstract public function collectData(array $param) { - $html = $this->file_get_html('https://community.elitedangerous.com/galnet') or $this->returnServerError('Error while downloading the website content'); + $html = $this->getSimpleHTMLDOM('https://community.elitedangerous.com/galnet') or $this->returnServerError('Error while downloading the website content'); foreach($html->find('div.article') as $element) { $item = new Item(); @@ -40,4 +40,4 @@ class EliteDangerousGalnetBridge extends BridgeAbstract { return 3600 * 2; // 2 hours } -} \ No newline at end of file +} diff --git a/bridges/ElsevierBridge.php b/bridges/ElsevierBridge.php index 31ccecc..ebee2a2 100644 --- a/bridges/ElsevierBridge.php +++ b/bridges/ElsevierBridge.php @@ -33,7 +33,7 @@ class ElsevierBridge extends BridgeAbstract{ $time = $article->find('.article-info', 0); if($time){ $timestring = trim($time->plaintext); - /* + /* The format depends on the age of an article: - Available online 29 July 2016 - July 2016 @@ -63,7 +63,7 @@ class ElsevierBridge extends BridgeAbstract{ public function collectData(array $param){ $uri = 'http://www.journals.elsevier.com/' . $param['j'] . '/recent-articles/'; - $html = file_get_html($uri) or $this->returnServerError('No results for Elsevier journal '.$param['j']); + $html = $this->getSimpleHTMLDOM($uri) or $this->returnServerError('No results for Elsevier journal '.$param['j']); foreach($html->find('.pod-listing') as $article){ $item = new \Item(); @@ -80,4 +80,4 @@ class ElsevierBridge extends BridgeAbstract{ return 43200; // 12h } } -?> \ No newline at end of file +?> diff --git a/bridges/FSBridge.php b/bridges/FSBridge.php new file mode 100644 index 0000000..cd212c3 --- /dev/null +++ b/bridges/FSBridge.php @@ -0,0 +1,56 @@ +maintainer = "qwertygc"; + $this->name = "Futurasciences"; + $this->uri = "http://www.futura-sciences.com"; + $this->description = "Returns the 5 newest posts from FS (full text)"; + $this->update = "03/11/2015"; + + } + + public function collectData(array $param){ + + function FS_StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function FS_ExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.fiche-actualite', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + return $text; + } + $html = $this->getSimpleHTMLDOM('http://www.futura-sciences.com/rss/actualites.xml') or $this->returnError('Could not request Futura Sciences.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 5) { + $item = new \Item(); + $item->title = FS_StripCDATA($element->find('title', 0)->innertext); + $item->uri = FS_StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = FS_ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'Futura Sciences'; + } + + public function getURI(){ + return 'http://www.futura-sciences.com/'; + } + + public function getCacheDuration(){ + return 3600; // 1 hour + // return 0; // 1 hour + } +} diff --git a/bridges/FacebookBridge.php b/bridges/FacebookBridge.php index 37eae8d..a61aaba 100644 --- a/bridges/FacebookBridge.php +++ b/bridges/FacebookBridge.php @@ -109,9 +109,9 @@ class FacebookBridge extends BridgeAbstract{ if (is_null($html)) { if (isset($param['u'])) { if (!strpos($param['u'], "/")) { - $html = $this->file_get_html('https://www.facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('https://www.facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnServerError('No results for this query.'); } else { - $html = $this->file_get_html('https://www.facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('https://www.facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnServerError('No results for this query.'); } } else { $this->returnClientError('You must specify a Facebook username.'); @@ -155,7 +155,7 @@ class FacebookBridge extends BridgeAbstract{ $this->name = $author; foreach($element->children() as $post) { - + $item = new \Item(); if (count($post->find('abbr')) > 0) { diff --git a/bridges/FierPandaBridge.php b/bridges/FierPandaBridge.php index 9b610fa..22fd142 100644 --- a/bridges/FierPandaBridge.php +++ b/bridges/FierPandaBridge.php @@ -14,7 +14,7 @@ Class FierPandaBridge extends BridgeAbstract{ public function collectData(array $param){ $link = 'http://www.fier-panda.fr/'; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request Fier Panda.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request Fier Panda.'); foreach($html->find('div.container-content article') as $element) { $item = new \Item(); diff --git a/bridges/FlickrExploreBridge.php b/bridges/FlickrExploreBridge.php index 37b619a..f4443f7 100644 --- a/bridges/FlickrExploreBridge.php +++ b/bridges/FlickrExploreBridge.php @@ -12,7 +12,7 @@ class FlickrExploreBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('https://www.flickr.com/explore') or $this->returnServerError('Could not request Flickr.'); + $html = $this->getSimpleHTMLDOM('https://www.flickr.com/explore') or $this->returnServerError('Could not request Flickr.'); foreach($html->find('.photo-list-photo-view') as $element) { // Get the styles diff --git a/bridges/FlickrTagBridge.php b/bridges/FlickrTagBridge.php index 957efbb..6797d62 100644 --- a/bridges/FlickrTagBridge.php +++ b/bridges/FlickrTagBridge.php @@ -27,16 +27,16 @@ class FlickrTagBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://www.flickr.com/search/?q=vendee&s=rec') or $this->returnServerError('Could not request Flickr.'); + $html = $this->getSimpleHTMLDOM('http://www.flickr.com/search/?q=vendee&s=rec') or $this->returnServerError('Could not request Flickr.'); if (isset($param['q'])) { /* keyword search mode */ $this->request = $param['q']; - $html = $this->file_get_html('http://www.flickr.com/search/?q='.urlencode($this->request).'&s=rec') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('http://www.flickr.com/search/?q='.urlencode($this->request).'&s=rec') or $this->returnServerError('No results for this query.'); } elseif (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; - $html = $this->file_get_html('http://www.flickr.com/photos/'.urlencode($this->request).'/') or $this->returnServerError('Requested username can\'t be found.'); + $html = $this->getSimpleHTMLDOM('http://www.flickr.com/photos/'.urlencode($this->request).'/') or $this->returnServerError('Requested username can\'t be found.'); } - + else { $this->returnClientError('You must specify a keyword or a Flickr username.'); } diff --git a/bridges/FootitoBridge.php b/bridges/FootitoBridge.php index e4030a5..35f382c 100644 --- a/bridges/FootitoBridge.php +++ b/bridges/FootitoBridge.php @@ -12,11 +12,11 @@ class FootitoBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://www.footito.fr/') or $this->returnServerError('Could not request Footito.'); - + $html = $this->getSimpleHTMLDOM('http://www.footito.fr/') or $this->returnServerError('Could not request Footito.'); + foreach($html->find('div.post') as $element) { $item = new Item(); - + $content = trim($element->innertext); $content = str_replace("content = $content; - + $title = $element->find('.contenu .texte ', 0)->plaintext; $item->title = $title; - + $info = $element->find('div.infos', 0); - + $item->timestamp = strtotime($info->find('time', 0)->datetime); $item->author = $info->find('a.auteur', 0)->plaintext; - + $this->items[] = $item; } } diff --git a/bridges/FourchanBridge.php b/bridges/FourchanBridge.php index 6c58761..b0daed2 100644 --- a/bridges/FourchanBridge.php +++ b/bridges/FourchanBridge.php @@ -32,7 +32,7 @@ class FourchanBridge extends BridgeAbstract{ $this->returnClientError('You must specify the thread URL.'); $url = 'https://boards.4chan.org'.$thread['path'].''; - $html = $this->file_get_html($url) or $this->returnServerError("Could not request 4chan, thread not found"); + $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError("Could not request 4chan, thread not found"); foreach($html->find('div.postContainer') as $element) { $item = new \Item(); @@ -59,7 +59,7 @@ class FourchanBridge extends BridgeAbstract{ } $this->items = array_reverse($this->items); } - + public function getCacheDuration(){ return 300; // 5min } diff --git a/bridges/FrandroidBridge.php b/bridges/FrandroidBridge.php new file mode 100644 index 0000000..82bd62f --- /dev/null +++ b/bridges/FrandroidBridge.php @@ -0,0 +1,69 @@ +maintainer = "Daiyousei"; + $this->name = "Frandroid"; + $this->uri = "http://www.frandroid.com/"; + $this->description = "Returns the RSS feed from Frandroid (full text articles)"; + $this->update = "2015-03-05"; + + } + + public function collectData(array $param) + { + + function FrandroidStripCDATA($string) + { + $string = str_replace('', '', $string); + return $string; + } + function FrandroidExtractContent($url) + { + $html2 = $this->getSimpleHTMLDOM($url); + $html3 = $html2->find('div.post-content', 0); + $html3->find('div.no-sidebar-ad-top', 0)->outertext = ''; + $ret = $html3->find('div.shortcode-container'); + foreach ($ret as $value) { + $value->outertext = ''; + } + + $html3->find('div#hrr-link', 0)->outertext = ''; + $text = $html3->innertext; + $text = strip_tags($text, '

      '); + return $text; + } + $html = $this->getSimpleHTMLDOM('http://feeds.feedburner.com/Frandroid?format=xml') or $this->returnError('Could not request Frandroid.', 404); + $limit = 0; + + foreach ($html->find('item') as $element) { + if ($limit < 5) { + $item = new \Item(); + $item->title = FrandroidStripCDATA($element->find('title', 0)->innertext); + $item->uri = FrandroidStripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = FrandroidExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName() + { + return 'Frandroid'; + } + + public function getURI() + { + return 'http://www.frandroid.com/'; + } + + public function getCacheDuration() + { + return 300; // 5min + } +} diff --git a/bridges/FuturaSciencesBridge.php b/bridges/FuturaSciencesBridge.php index 24b5f93..5150562 100644 --- a/bridges/FuturaSciencesBridge.php +++ b/bridges/FuturaSciencesBridge.php @@ -172,13 +172,13 @@ class FuturaSciencesBridge extends BridgeAbstract { $this->returnClientError('Invalid "feed" parameter.'.$url); $url = $this->getURI().'rss/'.$param['feed'].'.xml'; - $html = $this->file_get_html($url) or $this->returnServerError('Could not request Futura-Sciences: '.$url); + $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request Futura-Sciences: '.$url); $limit = 0; foreach($html->find('item') as $element) { if ($limit < 10) { $article_url = str_replace('#xtor=RSS-8', '', StripCDATA($element->find('guid', 0)->plaintext)); - $article = $this->file_get_html($article_url) or $this->returnServerError('Could not request Futura-Sciences: '.$article_url); + $article = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request Futura-Sciences: '.$article_url); $contents = $article->find('div.content', 0)->innertext; foreach (array( diff --git a/bridges/GBAtempBridge.php b/bridges/GBAtempBridge.php index cc59203..20cd0cc 100644 --- a/bridges/GBAtempBridge.php +++ b/bridges/GBAtempBridge.php @@ -74,7 +74,7 @@ class GBAtempBridge extends BridgeAbstract { } private function fetch_post_content($uri, $site_url) { - $html = $this->file_get_html($uri) or $this->returnServerError('Could not request GBAtemp: '.$uri); + $html = $this->getSimpleHTMLDOM($uri) or $this->returnServerError('Could not request GBAtemp: '.$uri); $content = $html->find('div.messageContent', 0)->innertext; return $this->cleanup_post_content($content, $site_url); } @@ -91,7 +91,7 @@ class GBAtempBridge extends BridgeAbstract { } else $this->returnClientError('The provided type filter is invalid. Expecting N, R, T, or F.'); } else $this->returnClientError('Please provide a type filter. Expecting N, R, T, or F.'); - $html = $this->file_get_html($this->uri) or $this->returnServerError('Could not request GBAtemp.'); + $html = $this->getSimpleHTMLDOM($this->uri) or $this->returnServerError('Could not request GBAtemp.'); if ($typeFilter == 'N') { foreach ($html->find('li[class=news_item full]') as $newsItem) { @@ -106,7 +106,7 @@ class GBAtempBridge extends BridgeAbstract { foreach ($html->find('li.portal_review') as $reviewItem) { $url = $this->uri.$reviewItem->find('a', 0)->href; $title = $reviewItem->find('span.review_title', 0)->plaintext; - $content = $this->file_get_html($url) or $this->returnServerError('Could not request GBAtemp: '.$uri); + $content = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request GBAtemp: '.$uri); $author = $content->find('a.username', 0)->plaintext; $time = intval($this->ExtractFromDelimiters($content->find('abbr.DateTime', 0)->outertext, 'data-time="', '"')); $intro = '

      '.($content->find('div#review_intro', 0)->plaintext).'

      '; diff --git a/bridges/GelbooruBridge.php b/bridges/GelbooruBridge.php index 2d77512..cc2faa0 100644 --- a/bridges/GelbooruBridge.php +++ b/bridges/GelbooruBridge.php @@ -26,27 +26,27 @@ class GelbooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 63; } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://gelbooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Gelbooru.'); + $html = $this->getSimpleHTMLDOM("http://gelbooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Gelbooru.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://gelbooru.com/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Gelbooru | '.$item->postid; $item->content = '

      Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/GiphyBridge.php b/bridges/GiphyBridge.php index 5605fd1..497c1af 100644 --- a/bridges/GiphyBridge.php +++ b/bridges/GiphyBridge.php @@ -30,10 +30,10 @@ class GiphyBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = ''; + $html = ''; $base_url = 'http://giphy.com'; if (isset($param['s'])) { /* keyword search mode */ - $html = $this->file_get_html($base_url.'/search/'.urlencode($param['s'].'/')) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($base_url.'/search/'.urlencode($param['s'].'/')) or $this->returnServerError('No results for this query.'); } else { $this->returnClientError('You must specify a search worf (?s=...).'); @@ -43,22 +43,22 @@ class GiphyBridge extends BridgeAbstract{ if (isset($param['n'])) { $max = (integer) $param['n']; } - + $limit = 0; $kw = urlencode($param['s']); foreach($html->find('div.hoverable-gif') as $entry) { if($limit < $max) { - $node = $entry->first_child(); - $href = $node->getAttribute('href'); - - $html2 = $this->file_get_html($base_url . $href) or $this->returnServerError('No results for this query.'); + $node = $entry->first_child(); + $href = $node->getAttribute('href'); + + $html2 = $this->getSimpleHTMLDOM($base_url . $href) or $this->returnServerError('No results for this query.'); $figure = $html2->getElementByTagName('figure'); $img = $figure->firstChild(); $caption = $figure->lastChild(); - + $item = new \Item(); $item->id = $img->getAttribute('data-gif_id'); - $item->uri = $img->getAttribute('data-bitly_gif_url'); + $item->uri = $img->getAttribute('data-bitly_gif_url'); $item->username = 'Giphy - '.ucfirst($kw); $title = $caption->innertext(); $title = preg_replace('/\s+/', ' ',$title); @@ -74,8 +74,8 @@ class GiphyBridge extends BridgeAbstract{ '' .'' .''; - - $this->items[] = $item; + + $this->items[] = $item; $limit++; } } diff --git a/bridges/GithubIssueBridge.php b/bridges/GithubIssueBridge.php index febbf34..6d22823 100644 --- a/bridges/GithubIssueBridge.php +++ b/bridges/GithubIssueBridge.php @@ -34,7 +34,7 @@ class GithubIssueBridge extends BridgeAbstract{ public function collectData(array $param){ $uri = 'https://github.com/'.$param['u'].'/'.$param['p'].'/issues/'.$param['i']; - $html = file_get_html($uri) + $html = $this->getSimpleHTMLDOM($uri) or $this->returnServerError('No results for Github Issue '.$param['i'].' in project '.$param['u'].'/'.$param['p']); foreach($html->find('.js-comment-container') as $comment){ diff --git a/bridges/GitlabCommitsBridge.php b/bridges/GitlabCommitsBridge.php index 85c6332..16862d2 100644 --- a/bridges/GitlabCommitsBridge.php +++ b/bridges/GitlabCommitsBridge.php @@ -44,7 +44,7 @@ class GitlabCommitsBridge extends BridgeAbstract{ $uri.='master'; } - $html = file_get_html($uri) + $html = $this->getSimpleHTMLDOM($uri) or $this->returnServerError('No results for Gitlab Commits of project '.$param['uri'].'/'.$param['u'].'/'.$param['p']); diff --git a/bridges/GizmodoFRBridge.php b/bridges/GizmodoFRBridge.php index ce0d44e..0d900f9 100644 --- a/bridges/GizmodoFRBridge.php +++ b/bridges/GizmodoFRBridge.php @@ -14,7 +14,7 @@ class GizmodoFRBridge extends BridgeAbstract{ public function collectData(array $param){ function GizmodoFRExtractContent($url) { - $articleHTMLContent = $this->file_get_html($url); + $articleHTMLContent = $this->getSimpleHTMLDOM($url); $text = $articleHTMLContent->find('div.entry-thumbnail', 0)->innertext; $text = $text.$articleHTMLContent->find('div.entry-excerpt', 0)->innertext; $text = $text.$articleHTMLContent->find('div.entry-content', 0)->innertext; @@ -26,7 +26,7 @@ class GizmodoFRBridge extends BridgeAbstract{ return $text; } - $rssFeed = $this->file_get_html('http://www.gizmodo.fr/feed') or $this->returnServerError('Could not request http://www.gizmodo.fr/feed'); + $rssFeed = $this->getSimpleHTMLDOM('http://www.gizmodo.fr/feed') or $this->returnServerError('Could not request http://www.gizmodo.fr/feed'); $limit = 0; foreach($rssFeed->find('item') as $element) { diff --git a/bridges/GooglePlusPostBridge.php b/bridges/GooglePlusPostBridge.php index 104a13e..9f1ffdb 100644 --- a/bridges/GooglePlusPostBridge.php +++ b/bridges/GooglePlusPostBridge.php @@ -33,8 +33,8 @@ class GooglePlusPostBridge extends BridgeAbstract $this->request = $param['username']; // get content parsed -// $html = $this->file_get_html(__DIR__ . '/../posts2.html' - $html = $this->file_get_html(self::GOOGLE_PLUS_BASE_URL . urlencode($this->request) . '/posts' +// $html = $this->getSimpleHTMLDOM(__DIR__ . '/../posts2.html' + $html = $this->getSimpleHTMLDOM(self::GOOGLE_PLUS_BASE_URL . urlencode($this->request) . '/posts' // force language , false, stream_context_create(array('http'=> array( 'header' => 'Accept-Language: fr,fr-be,fr-fr;q=0.8,en;q=0.4,en-us;q=0.2;*' . "\r\n" diff --git a/bridges/GoogleSearchBridge.php b/bridges/GoogleSearchBridge.php index 0c68a29..e7db5a0 100644 --- a/bridges/GoogleSearchBridge.php +++ b/bridges/GoogleSearchBridge.php @@ -35,7 +35,7 @@ class GoogleSearchBridge extends BridgeAbstract{ if (isset($param['q'])) { /* keyword search mode */ $this->request = $param['q']; - $html = $this->file_get_html('https://www.google.com/search?q=' . urlencode($this->request) . '&num=100&complete=0&tbs=qdr:y,sbd:1') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('https://www.google.com/search?q=' . urlencode($this->request) . '&num=100&complete=0&tbs=qdr:y,sbd:1') or $this->returnServerError('No results for this query.'); } else{ $this->returnClientError('You must specify a keyword (?q=...).'); @@ -45,7 +45,7 @@ class GoogleSearchBridge extends BridgeAbstract{ if( !is_null($emIsRes) ){ foreach($emIsRes->find('li[class=g]') as $element) { $item = new Item(); - + // Extract direct URL from google href (eg. /url?q=...) $t = $element->find('a[href]',0)->href; $item->uri = ''.$t; diff --git a/bridges/GuruMedBridge.php b/bridges/GuruMedBridge.php index 51c3e8d..f6585ae 100644 --- a/bridges/GuruMedBridge.php +++ b/bridges/GuruMedBridge.php @@ -16,7 +16,7 @@ class GuruMedBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://gurumed.org/feed') or $this->returnServerError('Could not request Gurumed.'); + $html = $this->getSimpleHTMLDOM('http://gurumed.org/feed') or $this->returnServerError('Could not request Gurumed.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/HDWallpapersBridge.php b/bridges/HDWallpapersBridge.php index c6cc6b8..dc4a3bf 100644 --- a/bridges/HDWallpapersBridge.php +++ b/bridges/HDWallpapersBridge.php @@ -48,7 +48,7 @@ class HDWallpapersBridge extends BridgeAbstract { for ($page = 1; $page <= $lastpage; $page++) { $link = $baseUri.'/'.$category.'/page/'.$page; - $html = $this->file_get_html($link) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('No results for this query.'); if ($page === 1) { preg_match('/page\/(\d+)$/', $html->find('.pagination a', -2)->href, $matches); diff --git a/bridges/HentaiHavenBridge.php b/bridges/HentaiHavenBridge.php index 5f41a1b..25934a0 100644 --- a/bridges/HentaiHavenBridge.php +++ b/bridges/HentaiHavenBridge.php @@ -12,7 +12,7 @@ class HentaiHavenBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://hentaihaven.org/') or $this->returnServerError('Could not request Hentai Haven.'); + $html = $this->getSimpleHTMLDOM('http://hentaihaven.org/') or $this->returnServerError('Could not request Hentai Haven.'); foreach($html->find('div.zoe-grid') as $element) { $item = new \Item(); $item->uri = $element->find('div.brick-content h3 a', 0)->href; diff --git a/bridges/IdenticaBridge.php b/bridges/IdenticaBridge.php index a2c7d90..23f54a9 100644 --- a/bridges/IdenticaBridge.php +++ b/bridges/IdenticaBridge.php @@ -25,7 +25,7 @@ class IdenticaBridge extends BridgeAbstract{ $html = ''; if (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; - $html = $this->file_get_html('https://identi.ca/'.urlencode($this->request)) or $this->returnServerError('Requested username can\'t be found.'); + $html = $this->getSimpleHTMLDOM('https://identi.ca/'.urlencode($this->request)) or $this->returnServerError('Requested username can\'t be found.'); } else { $this->returnClientError('You must specify an Identica username (?u=...).'); diff --git a/bridges/InstagramBridge.php b/bridges/InstagramBridge.php index e8f53a6..0e4359f 100644 --- a/bridges/InstagramBridge.php +++ b/bridges/InstagramBridge.php @@ -25,35 +25,35 @@ class InstagramBridge extends BridgeAbstract{ $html = ''; if (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; - $html = $this->file_get_html('http://instagram.com/'.urlencode($this->request)) or $this->returnServerError('Could not request Instagram.'); + $html = $this->getSimpleHTMLDOM('http://instagram.com/'.urlencode($this->request)) or $this->returnServerError('Could not request Instagram.'); } else { $this->returnClientError('You must specify a Instagram username (?u=...).'); } - + $innertext = null; - + foreach($html->find('script') as $script) { if ('' === $script->innertext) { continue; } - + $pos = strpos(trim($script->innertext), 'window._sharedData'); if (0 !== $pos) { continue; } - + $innertext = $script->innertext; break; } $json = trim(substr($innertext, $pos+18), ' =;'); $data = json_decode($json); - - - + + + $userMedia = $data->entry_data->ProfilePage[0]->user->media->nodes; foreach($userMedia as $media) @@ -70,7 +70,7 @@ class InstagramBridge extends BridgeAbstract{ } $item->timestamp = $media->date; $this->items[] = $item; - + } } diff --git a/bridges/JapanExpoBridge.php b/bridges/JapanExpoBridge.php index 0b26211..5b9d103 100644 --- a/bridges/JapanExpoBridge.php +++ b/bridges/JapanExpoBridge.php @@ -59,7 +59,7 @@ class JapanExpoBridge extends BridgeAbstract{ }; $link = 'http://www.japan-expo-paris.com/fr/actualites'; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request JapanExpo: '.$link); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request JapanExpo: '.$link); $fullcontent = (!empty($param['mode']) && $param['mode'] == 'full'); $count = 0; @@ -73,7 +73,7 @@ class JapanExpoBridge extends BridgeAbstract{ if ($fullcontent) { if ($count < 5) { - $article_html = $this->file_get_html($url) or $this->returnServerError('Could not request JapanExpo: '.$url); + $article_html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request JapanExpo: '.$url); $header = $article_html->find('header.pageHeadBox', 0); $timestamp = strtotime($header->find('time', 0)->datetime); $title_html = $header->find('div.section', 0)->next_sibling(); diff --git a/bridges/KonachanBridge.php b/bridges/KonachanBridge.php index 064a1fd..9bb08a5 100644 --- a/bridges/KonachanBridge.php +++ b/bridges/KonachanBridge.php @@ -25,18 +25,18 @@ class KonachanBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 1;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://konachan.com/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Konachan.'); + $html = $this->getSimpleHTMLDOM("http://konachan.com/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Konachan.'); $input_json = explode('Post.register(', $html); foreach($input_json as $element) $data[] = preg_replace('/}\)(.*)/', '}', $element); unset($data[0]); - + foreach($data as $datai) { $json = json_decode($datai, TRUE); $item = new \Item(); @@ -45,7 +45,7 @@ class KonachanBridge extends BridgeAbstract{ $item->timestamp = $json['created_at']; $item->imageUri = $json['file_url']; $item->title = 'Konachan | '.$json['id']; - $item->content = '
      Tags: '.$json['tags']; + $item->content = '
      Tags: '.$json['tags']; $this->items[] = $item; } } diff --git a/bridges/KoreusBridge.php b/bridges/KoreusBridge.php index f532d05..4c516c6 100644 --- a/bridges/KoreusBridge.php +++ b/bridges/KoreusBridge.php @@ -16,14 +16,14 @@ class KoreusBridge extends BridgeAbstract{ } private function KoreusExtractContent($url) { - $html2 = $this->file_get_html($url); + $html2 = $this->getSimpeHTMLDOM($url); $text = $html2->find('p[class=itemText]', 0)->innertext; $text = utf8_encode(preg_replace('/(Sur le m.+?)+$/i','',$text)); return $text; } public function collectData(array $param){ - $html = $this->file_get_html('http://feeds.feedburner.com/Koreus-articles') or $this->returnServerError('Could not request Koreus.'); + $html = $this->getSimpleHTMLDOM('http://feeds.feedburner.com/Koreus-articles') or $this->returnServerError('Could not request Koreus.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/LeBonCoinBridge.php b/bridges/LeBonCoinBridge.php index 6072030..2fb6b4c 100755 --- a/bridges/LeBonCoinBridge.php +++ b/bridges/LeBonCoinBridge.php @@ -134,10 +134,10 @@ class LeBonCoinBridge extends BridgeAbstract{ "type" : "list", "values" : [ { "name" : "---- Select ----", "value" : "" }, - + { "name" : "", "value" : "" }, { "name" : "EMPLOI", "value" : "_emploi_" }, - + { "name" : "", "value" : "" }, { "name" : "VEHICULES", "value" : "_vehicules_" }, { "name" : "    Voitures", "value" : "voitures" }, @@ -149,14 +149,14 @@ class LeBonCoinBridge extends BridgeAbstract{ { "name" : "    Equipement Caravaning", "value" : "equipement_caravaning" }, { "name" : "    Nautisme", "value" : "nautisme" }, { "name" : "    Equipement Nautisme", "value" : "equipement_nautisme" }, - + { "name" : "", "value" : "" }, { "name" : "IMMOBILIER", "value" : "_immobilier_" }, { "name" : "    Ventes immobilieres", "value" : "ventes_immobilieres" }, { "name" : "    Locations", "value" : "locations" }, { "name" : "    Colocations", "value" : "colocations" }, { "name" : "    Bureaux & Commerces", "value" : "bureaux_commerces" }, - + { "name" : "", "value" : "" }, { "name" : "VACANCES", "value" : "_vacances_" }, { "name" : "    Locations gites", "value" : "locations_gites" }, @@ -164,14 +164,14 @@ class LeBonCoinBridge extends BridgeAbstract{ { "name" : "    Campings", "value" : "campings" }, { "name" : "    Hôtels", "value" : "hotels" }, { "name" : "    Hébergements insolites", "value" : "hebergements_insolites" }, - + { "name" : "", "value" : "" }, { "name" : "MULTIMEDIA", "value" : "_multimedia_" }, { "name" : "    Informatique", "value" : "informatique" }, { "name" : "    Consoles & Jeux vidéo", "value" : "consoles_jeux_video" }, { "name" : "    Image & Son", "value" : "image_son" }, { "name" : "    Téléphonie", "value" : "telephonie" }, - + { "name" : "", "value" : "" }, { "name" : "LOISIRS", "value" : "_loisirs_" }, { "name" : "    DVD / Films", "value" : "dvd_films" }, @@ -184,7 +184,7 @@ class LeBonCoinBridge extends BridgeAbstract{ { "name" : "    Collection", "value" : "collection" }, { "name" : "    Jeux & Jouets", "value" : "jeux_jouets" }, { "name" : "    Vins & Gastronomie", "value" : "vins_gastronomie" }, - + { "name" : "", "value" : "" }, { "name" : "MATERIEL PROFESSIONNEL", "value" : "_materiel_professionnel_" }, { "name" : "    Materiel Agricole", "value" : "materiel_agricole" }, @@ -196,7 +196,7 @@ class LeBonCoinBridge extends BridgeAbstract{ { "name" : "    Fournitures de Bureau", "value" : "fournitures_de_bureau" }, { "name" : "    Commerces & Marchés", "value" : "commerces_marches" }, { "name" : "    Matériel médical", "value" : "materiel_medical" }, - + { "name" : "", "value" : "" }, { "name" : "SERVICES", "value" : "_services_" }, { "name" : "    Prestations de services", "value" : "prestations_de_services" }, @@ -204,7 +204,7 @@ class LeBonCoinBridge extends BridgeAbstract{ { "name" : "    Evénements", "value" : "evenements" }, { "name" : "    Cours particuliers", "value" : "cours_particuliers" }, { "name" : "    Covoiturage", "value" : "covoiturage" }, - + { "name" : "", "value" : "" }, { "name" : "MAISON", "value" : "_maison_" }, { "name" : "    Ameublement", "value" : "ameublement" }, @@ -220,7 +220,7 @@ class LeBonCoinBridge extends BridgeAbstract{ { "name" : "    Montres & Bijoux", "value" : "montres_bijoux" }, { "name" : "    Equipement bébé", "value" : "equipement_bebe" }, { "name" : "    Vêtements bébé", "value" : "vetements_bebe" }, - + { "name" : "", "value" : "" }, { "name" : "AUTRES", "value" : "autres" } ] @@ -239,7 +239,7 @@ class LeBonCoinBridge extends BridgeAbstract{ else { $link = 'http://www.leboncoin.fr/' . $param['c'] . '/offres/' . $param['r'] . '/?f=a&th=1&q=' . urlencode($param['k']); } - $html = $this->file_get_html($link) or $this->returnServerError('Could not request LeBonCoin.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request LeBonCoin.'); $list = $html->find('.tabsContent', 0); if($list === NULL) { @@ -275,4 +275,4 @@ class LeBonCoinBridge extends BridgeAbstract{ $this->items[] = $item; } } -} \ No newline at end of file +} diff --git a/bridges/LeJournalDuGeekBridge.php b/bridges/LeJournalDuGeekBridge.php index 24c2685..afbdad3 100644 --- a/bridges/LeJournalDuGeekBridge.php +++ b/bridges/LeJournalDuGeekBridge.php @@ -16,7 +16,7 @@ class LeJournalDuGeekBridge extends BridgeAbstract{ } private function LeJournalDuGeekExtractContent($url) { - $articleHTMLContent = $this->file_get_html($url); + $articleHTMLContent = $this->getSimpleHMLDOM($url); $text = $articleHTMLContent->find('div.post-content', 0)->innertext; foreach($articleHTMLContent->find('a.more') as $element) { @@ -38,7 +38,7 @@ class LeJournalDuGeekBridge extends BridgeAbstract{ } public function collectData(array $param){ - $rssFeed = $this->file_get_html('http://www.journaldugeek.com/rss') or $this->returnServerError('Could not request http://www.journaldugeek.com/rss'); + $rssFeed = $this->getSimpleHTMLDOM('http://www.journaldugeek.com/rss') or $this->returnServerError('Could not request http://www.journaldugeek.com/rss'); $limit = 0; foreach($rssFeed->find('item') as $element) { diff --git a/bridges/LeMondeInformatiqueBridge.php b/bridges/LeMondeInformatiqueBridge.php index 89914b5..d97d1c6 100644 --- a/bridges/LeMondeInformatiqueBridge.php +++ b/bridges/LeMondeInformatiqueBridge.php @@ -34,7 +34,7 @@ class LeMondeInformatiqueBridge extends BridgeAbstract { } $feedUrl = 'http://www.lemondeinformatique.fr/rss/rss.xml'; - $html = $this->file_get_html($feedUrl) or $this->returnServerError('Could not request LeMondeInformatique: '.$feedUrl); + $html = $this->getSimpleHTMLDOM($feedUrl) or $this->returnServerError('Could not request LeMondeInformatique: '.$feedUrl); $limit = 0; foreach($html->find('item') as $element) { @@ -44,7 +44,7 @@ class LeMondeInformatiqueBridge extends BridgeAbstract { $article_uri = $element->innertext; $article_uri = substr($article_uri, strpos($article_uri, '') + 6); $article_uri = substr($article_uri, 0, strpos($article_uri, '')); - $article_html = $this->file_get_html($article_uri) or $this->returnServerError('Could not request LeMondeInformatique: '.$article_uri); + $article_html = $this->getSimpleHTMLDOM($article_uri) or $this->returnServerError('Could not request LeMondeInformatique: '.$article_uri); $article_content = CleanArticle($article_html->find('div#article', 0)->innertext); $article_title = $article_html->find('h1.cleanprint-title', 0)->plaintext; diff --git a/bridges/LeMotDuJourBridge.php b/bridges/LeMotDuJourBridge.php new file mode 100644 index 0000000..f9f1229 --- /dev/null +++ b/bridges/LeMotDuJourBridge.php @@ -0,0 +1,55 @@ +maintainer = "qwertygc"; + $this->name = "LeMotDuJour Bridge"; + $this->uri = "http://www.lemotdujour.com/"; + $this->description = "Returns the newest articles."; + $this->update = "2014-05-25"; + + } + + public function collectData(array $param){ + + function StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function ExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.single-contenu', 0)->innertext; + return $text; + } + $html = $this->getSimpleHTMLDOM('http://feeds2.feedburner.com/lemotdujour/lemotdujour') or $this->returnError('Could not request LeMotDuJour.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'LeMotDuJour Bridge'; + } + + public function getURI(){ + return 'http://lemotdujour.com/'; + } + + public function getCacheDuration(){ + return 3600*2; // 2 hours + // return 0; // 2 hours + } +} diff --git a/bridges/LesJoiesDuCodeBridge.php b/bridges/LesJoiesDuCodeBridge.php index ddde42d..7511cda 100644 --- a/bridges/LesJoiesDuCodeBridge.php +++ b/bridges/LesJoiesDuCodeBridge.php @@ -12,14 +12,14 @@ class LesJoiesDuCodeBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://lesjoiesducode.fr/') or $this->returnServerError('Could not request LesJoiesDuCode.'); - + $html = $this->getSimpleHTMLDOM('http://lesjoiesducode.fr/') or $this->returnServerError('Could not request LesJoiesDuCode.'); + foreach($html->find('div.blog-post') as $element) { $item = new Item(); $temp = $element->find('h1 a', 0); $titre = html_entity_decode($temp->innertext); $url = $temp->href; - + $temp = $element->find('div.blog-post-content', 0); // retrieve .gif instead of static .jpg @@ -29,21 +29,21 @@ class LesJoiesDuCodeBridge extends BridgeAbstract{ $image->src = $img_src; } $content = $temp->innertext; - + $auteur = $temp->find('i', 0); $pos = strpos($auteur->innertext, "by"); - + if($pos > 0) { $auteur = trim(str_replace("*/", "", substr($auteur->innertext, ($pos + 2)))); $item->author = $auteur; } - - + + $item->content .= trim($content); $item->uri = $url; $item->title = trim($titre); - + $this->items[] = $item; } } diff --git a/bridges/LichessBridge.php b/bridges/LichessBridge.php index 8145cf7..7501d51 100644 --- a/bridges/LichessBridge.php +++ b/bridges/LichessBridge.php @@ -13,7 +13,7 @@ class LichessBridge extends BridgeAbstract public function collectData(array $param) { - $xml_feed = $this->file_get_html('http://fr.lichess.org/blog.atom') or $this->returnServerError('Could not retrieve Lichess blog feed.'); + $xml_feed = $this->getSimpleHTMLDOM('http://fr.lichess.org/blog.atom') or $this->returnServerError('Could not retrieve Lichess blog feed.'); $posts_loaded = 0; foreach($xml_feed->find('entry') as $entry) @@ -37,7 +37,7 @@ class LichessBridge extends BridgeAbstract private function retrieve_lichess_post($blog_post_uri) { - $blog_post_html = $this->file_get_html($blog_post_uri); + $blog_post_html = $this->getSimpleHTMLDOM($blog_post_uri); $blog_post_div = $blog_post_html->find('#lichess_blog', 0); $post_chapo = $blog_post_div->find('.shortlede', 0)->innertext; diff --git a/bridges/LinkedInCompany.php b/bridges/LinkedInCompany.php index 1f94301..950524a 100644 --- a/bridges/LinkedInCompany.php +++ b/bridges/LinkedInCompany.php @@ -22,7 +22,7 @@ class LinkedInCompany extends BridgeAbstract{ $html = ''; $link = 'https://www.linkedin.com/company/'.$param[c]; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request LinkedIn.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request LinkedIn.'); foreach($html->find('//*[@id="my-feed-post"]/li') as $element) { $title = $element->find('span.share-body', 0)->innertext; diff --git a/bridges/LolibooruBridge.php b/bridges/LolibooruBridge.php index 3f4cae1..b10c39d 100644 --- a/bridges/LolibooruBridge.php +++ b/bridges/LolibooruBridge.php @@ -26,18 +26,18 @@ class LolibooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 1; $tags = ''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://lolibooru.moe/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Lolibooru.'); + $html = $this->getSimpleHTMLDOM("http://lolibooru.moe/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Lolibooru.'); $input_json = explode('Post.register(', $html); foreach($input_json as $element) $data[] = preg_replace('/}\)(.*)/', '}', $element); unset($data[0]); - + foreach($data as $datai) { $json = json_decode($datai, TRUE); $item = new \Item(); @@ -46,7 +46,7 @@ class LolibooruBridge extends BridgeAbstract{ $item->timestamp = $json['created_at']; $item->imageUri = $json['file_url']; $item->title = 'Lolibooru | '.$json['id']; - $item->content = '
      Tags: '.$json['tags']; + $item->content = '
      Tags: '.$json['tags']; $this->items[] = $item; } } diff --git a/bridges/MalikiBridge.php b/bridges/MalikiBridge.php new file mode 100644 index 0000000..66c8081 --- /dev/null +++ b/bridges/MalikiBridge.php @@ -0,0 +1,60 @@ +maintainer = "mitsukarenai"; + $this->name = "Maliki"; + $this->uri = "http://www.maliki.com/"; + $this->description = "Returns Maliki's newest strips"; + $this->update = "2014-05-30"; + + } + + public function collectData(array $param){ + $html = $this->getSimpleHTMLDOM('http://www.maliki.com/') or $this->returnError('Could not request Maliki.', 404); + $count=0; + $latest=1; $latest_title=""; + $latest = $html->find('div.conteneur_page a', 1)->href; + $latest_title = $html->find('div.conteneur_page img', 0)->title; + + function MalikiExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = 'http://www.maliki.com/'.$html2->find('img', 0)->src; + $text = '
      '.$html2->find('div.imageetnews', 0)->plaintext; + return $text; + } + + $item = new \Item(); + $item->uri = 'http://www.maliki.com/'.$latest; + $item->title = $latest_title; + $item->timestamp = time(); + $item->content = MalikiExtractContent($item->uri); + $this->items[] = $item; + + + foreach($html->find('div.boite_strip') as $element) { + if(!empty($element->find('a',0)->href) and $count < 3) { + $item = new \Item(); + $item->uri = 'http://www.maliki.com/'.$element->find('a',0)->href; + $item->title = $element->find('img',0)->title; + $item->timestamp = strtotime(str_replace('/', '-', $element->find('span.stylepetit', 0)->innertext)); + $item->content = MalikiExtractContent($item->uri); + $this->items[] = $item; + $count++; + } + } + } + + public function getName(){ + return 'Maliki'; + } + + public function getURI(){ + return 'http://www.maliki.com/'; + } + + public function getCacheDuration(){ + return 86400*6; // 6 days + } +} diff --git a/bridges/MemoLinuxBridge.php b/bridges/MemoLinuxBridge.php new file mode 100644 index 0000000..1c41f14 --- /dev/null +++ b/bridges/MemoLinuxBridge.php @@ -0,0 +1,58 @@ +maintainer = "qwertygc"; + $this->name = "MemoLinux"; + $this->uri = "http://memo-linux.com/"; + $this->description = "Returns the 10 newest posts from MemoLinux (full text)"; + $this->update = "2015-01-30"; + + } + + public function collectData(array $param){ + + function StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + + function ExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.entry-content', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + $text = preg_replace('@]*?>.*?

@si', '', $text); + $text = preg_replace("/getSimpleHTMLDOM('http://memo-linux.com/feed/') or $this->returnError('Could not request MemoLinux.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + } + + public function getName(){ + return 'MemoLinux'; + } + + public function getURI(){ + return 'http://memo-linux.com/feed/'; + } + + public function getCacheDuration(){ + return 3600*12; // 12 hours + } +} diff --git a/bridges/MilbooruBridge.php b/bridges/MilbooruBridge.php index 3ead724..7e5fdaa 100644 --- a/bridges/MilbooruBridge.php +++ b/bridges/MilbooruBridge.php @@ -25,25 +25,25 @@ class MilbooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://sheslostcontrol.net/moe/shimmie/index.php?q=/post/list/$tags/$page") or $this->returnServerError('Could not request Milbooru.'); + $html = $this->getSimpleHTMLDOM("http://sheslostcontrol.net/moe/shimmie/index.php?q=/post/list/$tags/$page") or $this->returnServerError('Could not request Milbooru.'); foreach($html->find('div[class=shm-image-list] span[class=thumb]') as $element) { $item = new \Item(); $item->uri = 'http://sheslostcontrol.net/moe/shimmie/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->find('a', 0)->getAttribute('data-post-id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->find('a', 0)->getAttribute('data-post-id')); $item->timestamp = time(); $thumbnailUri = 'http://sheslostcontrol.net/moe/shimmie/'.$element->find('img', 0)->src; $item->tags = $element->find('a', 0)->getAttribute('data-tags'); $item->title = 'Milbooru | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/MondeDiploBridge.php b/bridges/MondeDiploBridge.php index d60621c..3cc0d81 100644 --- a/bridges/MondeDiploBridge.php +++ b/bridges/MondeDiploBridge.php @@ -9,8 +9,8 @@ class MondeDiploBridge extends BridgeAbstract{ $this->update = '2016-08-17'; } - public function collectData(array $param){ - $html = $this->file_get_html($this->uri) or $this->returnServerError('Could not request MondeDiplo. for : ' . $link); + public function collectData(array $param){ + $html = $this->getSimpleHTMLDOM($this->uri) or $this->returnServerError('Could not request MondeDiplo. for : ' . $link); foreach($html->find('div.unarticle') as $article) { $element = $article->parent(); diff --git a/bridges/MsnMondeBridge.php b/bridges/MsnMondeBridge.php index f4419fe..e28c0e3 100644 --- a/bridges/MsnMondeBridge.php +++ b/bridges/MsnMondeBridge.php @@ -10,13 +10,13 @@ class MsnMondeBridge extends BridgeAbstract{ } private function MsnMondeExtractContent($url, &$item) { - $html2 = $this->file_get_html($url); + $html2 = $this->getSimpleHTLDOM($url); $item->content = $html2->find('#content', 0)->find('article', 0)->find('section', 0)->plaintext; $item->timestamp = strtotime($html2->find('.authorinfo-txt', 0)->find('time', 0)->datetime); } public function collectData(array $param){ - $html = $this->file_get_html($this->uri) or $this->returnServerError('Could not request MsnMonde.'); + $html = $this->getSimpleHTMLDOM($this->uri) or $this->returnServerError('Could not request MsnMonde.'); $limit = 0; foreach($html->find('.smalla') as $article) { if($limit < 10) { diff --git a/bridges/MspabooruBridge.php b/bridges/MspabooruBridge.php index a67d566..2e465fd 100644 --- a/bridges/MspabooruBridge.php +++ b/bridges/MspabooruBridge.php @@ -26,27 +26,27 @@ class MspabooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 50; } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://mspabooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Mspabooru.'); + $html = $this->getSimpleHTMLDOM("http://mspabooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Mspabooru.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://mspabooru.com/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Mspabooru | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/NakedSecurityBridge.php b/bridges/NakedSecurityBridge.php index 01e46c1..568e857 100644 --- a/bridges/NakedSecurityBridge.php +++ b/bridges/NakedSecurityBridge.php @@ -36,7 +36,7 @@ class NakedSecurityBridge extends BridgeAbstract { } $feedUrl = 'https://feeds.feedburner.com/nakedsecurity?format=xml'; - $html = $this->file_get_html($feedUrl) or $this->returnServerError('Could not request '.$this->getName().': '.$feedUrl); + $html = $this->getSimpleHTMLDOM($feedUrl) or $this->returnServerError('Could not request '.$this->getName().': '.$feedUrl); $limit = 0; foreach ($html->find('item') as $element) { @@ -44,7 +44,7 @@ class NakedSecurityBridge extends BridgeAbstract { //Retrieve article Uri and get that page $article_uri = $element->find('guid', 0)->plaintext; - $article_html = $this->file_get_html($article_uri) or $this->returnServerError('Could not request '.$this->getName().': '.$article_uri); + $article_html = $this->getSimpleHTMLDOM($article_uri) or $this->returnServerError('Could not request '.$this->getName().': '.$article_uri); //Build article contents from corresponding elements $article_title = trim($element->find('title', 0)->plaintext); @@ -68,4 +68,4 @@ class NakedSecurityBridge extends BridgeAbstract { } } } -} \ No newline at end of file +} diff --git a/bridges/NasaApodBridge.php b/bridges/NasaApodBridge.php index 8bf7d9b..0e0e5f8 100644 --- a/bridges/NasaApodBridge.php +++ b/bridges/NasaApodBridge.php @@ -13,7 +13,7 @@ class NasaApodBridge extends BridgeAbstract{ public function collectData(array $param) { - $html = $this->file_get_html('http://apod.nasa.gov/apod/archivepix.html') or $this->returnServerError('Error while downloading the website content'); + $html = $this->getSimpleHTMLDOM('http://apod.nasa.gov/apod/archivepix.html') or $this->returnServerError('Error while downloading the website content'); $list = explode("
", $html->find('b', 0)->innertext); for($i = 0; $i < 3;$i++) @@ -25,7 +25,7 @@ class NasaApodBridge extends BridgeAbstract{ $uri = 'http://apod.nasa.gov/apod/'.$uri_page; $item->uri = $uri; - $picture_html = $this->file_get_html($uri); + $picture_html = $this->getSimpleHTMLDOM($uri); $picture_html_string = $picture_html->innertext; //Extract image and explanation diff --git a/bridges/NeuviemeArtBridge.php b/bridges/NeuviemeArtBridge.php index 96375ec..b8a0ce8 100644 --- a/bridges/NeuviemeArtBridge.php +++ b/bridges/NeuviemeArtBridge.php @@ -20,7 +20,7 @@ class NeuviemeArtBridge extends BridgeAbstract { } $feedUrl = 'http://www.9emeart.fr/9emeart.rss'; - $html = $this->file_get_html($feedUrl) or $this->returnServerError('Could not request 9eme Art: '.$feedUrl); + $html = $this->getSimpleHTMLDOM($feedUrl) or $this->returnServerError('Could not request 9eme Art: '.$feedUrl); $limit = 0; foreach ($html->find('item') as $element) { @@ -28,7 +28,7 @@ class NeuviemeArtBridge extends BridgeAbstract { //Retrieve article Uri and get that page $article_uri = $element->find('guid', 0)->plaintext; - $article_html = $this->file_get_html($article_uri) or $this->returnServerError('Could not request 9eme Art: '.$article_uri); + $article_html = $this->getSimpleHTMLDOM($article_uri) or $this->returnServerError('Could not request 9eme Art: '.$article_uri); //Build article contents from corresponding elements $article_title = trim($element->find('title', 0)->plaintext); diff --git a/bridges/NextInpactBridge.php b/bridges/NextInpactBridge.php index 5b121f7..db74a24 100644 --- a/bridges/NextInpactBridge.php +++ b/bridges/NextInpactBridge.php @@ -16,7 +16,7 @@ class NextInpactBridge extends BridgeAbstract { } private function ExtractContent($url) { - $html2 = $this->file_get_html($url); + $html2 = $this->getSimpleHTLDOM($url); $text = '

'.$html2->find('span.sub_title', 0)->innertext.'

' .'

-

' .'
'.$html2->find('div[itemprop=articleBody]', 0)->innertext.'
'; @@ -27,7 +27,7 @@ class NextInpactBridge extends BridgeAbstract { } public function collectData(array $param) { - $html = $this->file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnServerError('Could not request NextInpact.'); + $html = $this->getSimpleHTMLDOM('http://www.nextinpact.com/rss/news.xml') or $this->returnServerError('Could not request NextInpact.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/NiceMatinBridge.php b/bridges/NiceMatinBridge.php index 5960ffc..aaf006c 100644 --- a/bridges/NiceMatinBridge.php +++ b/bridges/NiceMatinBridge.php @@ -10,21 +10,21 @@ class NiceMatinBridge extends BridgeAbstract{ } private function NiceMatinExtractContent($url) { - $html = $this->file_get_html($url); + $html = $this->getSimpleHTMLDOM($url); if(!$html) $this->returnServerError('Could not acquire content from url: ' . $url . '!'); - + $content = $html->find('article', 0); if(!$content) $this->returnServerError('Could not find \'section\'!'); - + $text = preg_replace('#(.*?)#is', '', $content->innertext); $text = strip_tags($text, '

'); return $text; } public function collectData(array $param){ - $html = $this->file_get_html('http://www.nicematin.com/derniere-minute/rss') or $this->returnServerError('Could not request NiceMatin.'); + $html = $this->getSimpleHTMLDOM('http://www.nicematin.com/derniere-minute/rss') or $this->returnServerError('Could not request NiceMatin.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/NovelUpdatesBridge.php b/bridges/NovelUpdatesBridge.php index f7dedd1..2a5b960 100644 --- a/bridges/NovelUpdatesBridge.php +++ b/bridges/NovelUpdatesBridge.php @@ -26,7 +26,7 @@ class NovelUpdatesBridge extends BridgeAbstract{ if(strpos($thread['path'], 'series/') === FALSE) $this->returnClientError('You must specify the novel URL.'); $url = 'http://www.novelupdates.com'.$thread['path'].''; - $fullhtml = $this->file_get_html($url) or $this->returnServerError("Could not request NovelUpdates, novel not found"); + $fullhtml = $this->getSimpleHTMLDOM($url) or $this->returnServerError("Could not request NovelUpdates, novel not found"); $this->request = $fullhtml->find('h4.seriestitle', 0)->plaintext; // dirty fix for nasty simpledom bug: https://github.com/sebsauvage/rss-bridge/issues/259 // forcefully removes tbody diff --git a/bridges/NumeramaBridge.php b/bridges/NumeramaBridge.php index cc949c2..f5671b7 100644 --- a/bridges/NumeramaBridge.php +++ b/bridges/NumeramaBridge.php @@ -42,7 +42,6 @@ class NumeramaBridge extends BridgeAbstract{ $limit++; } } - } public function getCacheDuration() { diff --git a/bridges/OpenClassroomsBridge.php b/bridges/OpenClassroomsBridge.php index 4a0c3e0..4c093ab 100644 --- a/bridges/OpenClassroomsBridge.php +++ b/bridges/OpenClassroomsBridge.php @@ -64,11 +64,11 @@ class OpenClassroomsBridge extends BridgeAbstract{ { $this->returnServerError('Error: You must chose a category.'); } - + $html = ''; $link = 'https://openclassrooms.com/courses?categories='.$param['u'].'&title=&sort=updatedAt+desc'; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request OpenClassrooms.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request OpenClassrooms.'); foreach($html->find('.courseListItem') as $element) { $item = new \Item(); diff --git a/bridges/OpenTheoryBridge.php b/bridges/OpenTheoryBridge.php new file mode 100644 index 0000000..cccfaf3 --- /dev/null +++ b/bridges/OpenTheoryBridge.php @@ -0,0 +1,56 @@ +maintainer = "qwertygc"; + $this->name = "Opentheory"; + $this->uri = "http://open1theory.com"; + $this->description = "Returns the 5 newest posts from OpenTheory (full text)"; + $this->update = "02-08-2014"; + + } + + public function collectData(array $param){ + + function StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function ExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.entry-content', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + return $text; + } + $html = $this->getSimpleHTMLDOM('http://open1theory.com/feed') or $this->returnError('Could not request OpenTheory.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 5) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'OpenTheory'; + } + + public function getURI(){ + return 'http://open1theory.com/feed'; + } + + public function getCacheDuration(){ + return 3600; // 1 hour + // return 0; // 1 hour + } +} diff --git a/bridges/ParuVenduImmoBridge.php b/bridges/ParuVenduImmoBridge.php index 97285a5..65947df 100644 --- a/bridges/ParuVenduImmoBridge.php +++ b/bridges/ParuVenduImmoBridge.php @@ -47,7 +47,7 @@ class ParuVenduImmoBridge extends BridgeAbstract $appartment = '&tbApp=1&tbDup=1&tbChb=1&tbLof=1&tbAtl=1&tbPla=1'; $maison = '&tbMai=1&tbVil=1&tbCha=1&tbPro=1&tbHot=1&tbMou=1&tbFer=1'; $link = $this->uri.'/immobilier/annonceimmofo/liste/listeAnnonces?tt=1'.$appartment.$maison; - + if (isset($param['minarea'])) { $this->request .= ' '.$param['minarea'].' m2'; $link .= '&sur0='.urlencode($param['minarea']); @@ -56,39 +56,39 @@ class ParuVenduImmoBridge extends BridgeAbstract if (isset($param['maxprice'])) { $link .= '&px1='.urlencode($param['maxprice']); } - + if (isset($param['pa'])) { $link .= '&pa='.urlencode($param['pa']); } - + if (isset($param['lo'])) { $this->request .= ' In: '.$param['lo']; $link .= '&lo='.urlencode($param['lo']); } - $html = $this->file_get_html($link) or $this->returnServerError('Could not request paruvendu.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request paruvendu.'); foreach($html->find('div.annonce a') as $element) { - + if (!$element->title) { continue; } - + $img =''; foreach($element->find('span.img img') as $img) { if ($img->original) { $img = ''; } } - + $desc = $element->find('span.desc')[0]->innertext; $desc = str_replace("voir l'annonce", '', $desc); - + $price = $element->find('span.price')[0]->innertext; list($href) = explode('#', $element->href); - + $item = new \Item(); $item->uri = $this->uri.$href; $item->title = $element->title; diff --git a/bridges/PickyWallpapersBridge.php b/bridges/PickyWallpapersBridge.php index 2e44cb2..a4feffe 100644 --- a/bridges/PickyWallpapersBridge.php +++ b/bridges/PickyWallpapersBridge.php @@ -55,7 +55,7 @@ class PickyWallpapersBridge extends BridgeAbstract { for ($page = 1; $page <= $lastpage; $page++) { $link = $baseUri.'/'.$this->resolution.'/'.$this->category.'/'.(!empty($this->subcategory)?$this->subcategory.'/':'').'page-'.$page.'/'; - $html = $this->file_get_html($link) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('No results for this query.'); if ($page === 1) { preg_match('/page-(\d+)\/$/', $html->find('.pages li a', -2)->href, $matches); diff --git a/bridges/PinterestBridge.php b/bridges/PinterestBridge.php index 70b4105..1012ea2 100644 --- a/bridges/PinterestBridge.php +++ b/bridges/PinterestBridge.php @@ -38,7 +38,7 @@ class PinterestBridge extends BridgeAbstract{ public function collectData(array $param){ $html = ''; if (isset($param['u']) || isset($param['b'])) { - + if (empty($param['u'])) { $this->returnClientError('You must specify a Pinterest username (?u=...).'); @@ -48,33 +48,33 @@ class PinterestBridge extends BridgeAbstract{ { $this->returnClientError('You must specify a Pinterest board for this username (?b=...).'); } - + $this->username = $param['u']; $this->board = $param['b']; - $html = $this->file_get_html($this->getURI().'/'.urlencode($this->username).'/'.urlencode($this->board)) or $this->returnServerError('Username and/or board not found'); + $html = $this->getSimpleHTMLDOM($this->getURI().'/'.urlencode($this->username).'/'.urlencode($this->board)) or $this->returnServerError('Username and/or board not found'); } else if (isset($param['q'])) { $this->query = $param['q']; - $html = $this->file_get_html($this->getURI().'/search/?q='.urlencode($this->query)) or $this->returnServerError('Could not request Pinterest.'); + $html = $this->getSimpleHTMLDOM($this->getURI().'/search/?q='.urlencode($this->query)) or $this->returnServerError('Could not request Pinterest.'); } - + else { $this->returnClientError('You must specify a Pinterest username and a board name (?u=...&b=...).'); } - - + + foreach($html->find('div.pinWrapper') as $div) { $a = $div->find('a.pinImageWrapper',0); - + $img = $a->find('img', 0); - + $item = new \Item(); $item->uri = $this->getURI().$a->getAttribute('href'); $item->content = ''; - - + + if (isset($this->query)) { $avatar = $div->find('div.creditImg', 0)->find('img', 0); @@ -84,25 +84,25 @@ class PinterestBridge extends BridgeAbstract{ $username = $div->find('div.creditName', 0); $board = $div->find('div.creditTitle', 0); - - $item->username =$username->innertext; + + $item->username =$username->innertext; $item->fullname = $board->innertext; $item->avatar = $avatar; - + $item->content .= '
'.$item->username.''; $item->content .= '
'.$item->fullname; } - + $item->title = $img->getAttribute('alt'); - + //$item->timestamp = $media->created_time; $this->items[] = $item; - + } } public function getName(){ - + if (isset($this->query)) { return $this->query .' - Pinterest'; diff --git a/bridges/PlanetLibreBridge.php b/bridges/PlanetLibreBridge.php index fc536cd..28a7a68 100644 --- a/bridges/PlanetLibreBridge.php +++ b/bridges/PlanetLibreBridge.php @@ -10,13 +10,13 @@ class PlanetLibreBridge extends BridgeAbstract{ } private function PlanetLibreExtractContent($url){ - $html2 = $this->file_get_html($url); + $html2 = $this->geSimpleHTMLDOM($url); $text = $html2->find('div[class="post-text"]', 0)->innertext; return $text; } public function collectData(array $param){ - $html = $this->file_get_html('http://www.planet-libre.org/') or $this->returnServerError('Could not request PlanetLibre.'); + $html = $this->getSimpleHTMLDOM('http://www.planet-libre.org/') or $this->returnServerError('Could not request PlanetLibre.'); $limit = 0; foreach($html->find('div.post') as $element) { if($limit < 5) { diff --git a/bridges/ProjectMGameBridge.php b/bridges/ProjectMGameBridge.php index 865b38d..080029d 100644 --- a/bridges/ProjectMGameBridge.php +++ b/bridges/ProjectMGameBridge.php @@ -14,7 +14,7 @@ class ProjectMGameBridge extends BridgeAbstract{ public function collectData(array $param){ $html = ''; - $html = $this->file_get_html('http://projectmgame.com/en/') or $this->returnServerError('Error while downloading the Project M homepage'); + $html = $this->getSimpleHTMLDOM('http://projectmgame.com/en/') or $this->returnServerError('Error while downloading the Project M homepage'); foreach($html->find('article') as $article) { $item = new \Item(); diff --git a/bridges/RTBFBridge.php b/bridges/RTBFBridge.php index 433792e..490ed7a 100644 --- a/bridges/RTBFBridge.php +++ b/bridges/RTBFBridge.php @@ -25,7 +25,7 @@ class RTBFBridge extends BridgeAbstract { $count = 0; if (isset($param['c'])) { - $html = $this->file_get_html('http://www.rtbf.be/auvio/emissions/detail?id='.$param['c']) or $this->returnServerError('Could not request RTBF.'); + $html = $this->getSimpleHTMLDOM('http://www.rtbf.be/auvio/emissions/detail?id='.$param['c']) or $this->returnServerError('Could not request RTBF.'); foreach($html->find('section[id!=widget-ml-avoiraussi-] .rtbf-media-grid article') as $element) { if($count < $limit) { diff --git a/bridges/RaymondBridge.php b/bridges/RaymondBridge.php new file mode 100644 index 0000000..d255052 --- /dev/null +++ b/bridges/RaymondBridge.php @@ -0,0 +1,53 @@ +maintainer = "pit-fgfjiudghdf"; + $this->name = "Raymond"; + $this->uri = "http://www.raymond.cc"; + $this->description = "Returns the 3 newest posts from Raymond.cc (full text)"; + $this->update = "2014-05-26"; + + } + + public function collectData(array $param){ + function raymondStripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function raymondExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.entry-content', 0)->innertext; + $text = preg_replace('/class="ad".*/', '', $text); + $text = strip_tags($text, '

'); + $text = str_replace('(adsbygoogle = window.adsbygoogle || []).push({});', '', $text); + return $text; + } + $html = $this->getSimpleHTMLDOM('http://www.raymond.cc/blog/feed') or $this->returnError('Could not request raymond.', 404); + $limit = 0; + foreach($html->find('item') as $element) { + if($limit < 3) { + $item = new \Item(); + $item->title = raymondStripCDATA($element->find('title', 0)->innertext); + $item->uri = raymondStripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = raymondExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + public function getName(){ + return 'raymond'; + } + public function getURI(){ + return 'http://www.raymond.cc/blog'; + } + public function getCacheDuration(){ + return 3600*12; // 12 hour + } +} + diff --git a/bridges/Releases3DSBridge.php b/bridges/Releases3DSBridge.php index 8031582..2a563f6 100644 --- a/bridges/Releases3DSBridge.php +++ b/bridges/Releases3DSBridge.php @@ -66,7 +66,7 @@ class Releases3DSBridge extends BridgeAbstract { //Retrieve cover art and short desc from IGN? $ignResult = false; $ignDescription = ''; $ignLink = ''; $ignDate = time(); $ignCoverArt = ''; $ignSearchUrl = 'http://www.ign.com/search?q='.urlencode($name); - if ($ignResult = $this->file_get_html($ignSearchUrl)) { + if ($ignResult = $this->getSimpleHTMLDOM($ignSearchUrl)) { $ignCoverArt = $ignResult->find('div.search-item-media', 0)->find('img', 0)->src; $ignDesc = $ignResult->find('div.search-item-description', 0)->plaintext; $ignLink = $ignResult->find('div.search-item-sub-title', 0)->find('a', 1)->href; diff --git a/bridges/ReporterreBridge.php b/bridges/ReporterreBridge.php index 8c1f553..e93cec4 100644 --- a/bridges/ReporterreBridge.php +++ b/bridges/ReporterreBridge.php @@ -10,7 +10,7 @@ class ReporterreBridge extends BridgeAbstract{ } private function ExtractContentReporterre($url) { - $html2 = $this->file_get_html($url); + $html2 = $this->getSimpleHTMLDOM($url); foreach($html2->find('div[style=text-align:justify]') as $e) { $text = $e->outertext; @@ -27,7 +27,7 @@ class ReporterreBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://www.reporterre.net/spip.php?page=backend') or $this->returnServerError('Could not request Reporterre.'); + $html = $this->getSimpleHTMLDOM('http://www.reporterre.net/spip.php?page=backend') or $this->returnServerError('Could not request Reporterre.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/Rue89Bridge.php b/bridges/Rue89Bridge.php index 54c86d4..5dd813c 100644 --- a/bridges/Rue89Bridge.php +++ b/bridges/Rue89Bridge.php @@ -22,7 +22,7 @@ class Rue89Bridge extends BridgeAbstract{ public function collectData(array $param){ - $html = $this->file_get_html('http://api.rue89.nouvelobs.com/feed') or $this->returnServerError('Could not request Rue89.'); + $html = $this->getSimpleHTMLDOM('http://api.rue89.nouvelobs.com/feed') or $this->returnServerError('Could not request Rue89.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/Rule34Bridge.php b/bridges/Rule34Bridge.php index b868d91..b0d5f88 100644 --- a/bridges/Rule34Bridge.php +++ b/bridges/Rule34Bridge.php @@ -25,27 +25,27 @@ class Rule34Bridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 50; } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://rule34.xxx/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Rule34.'); + $html = $this->getSimpleHTMLDOM("http://rule34.xxx/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Rule34.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://rule34.xxx/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Rule34 | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/Rule34pahealBridge.php b/bridges/Rule34pahealBridge.php index 77b0319..42014df 100644 --- a/bridges/Rule34pahealBridge.php +++ b/bridges/Rule34pahealBridge.php @@ -26,25 +26,25 @@ class Rule34pahealBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://rule34.paheal.net/post/list/$tags/$page") or $this->returnServerError('Could not request Rule34paheal.'); + $html = $this->getSimpleHTMLDOM("http://rule34.paheal.net/post/list/$tags/$page") or $this->returnServerError('Could not request Rule34paheal.'); foreach($html->find('div[class=shm-image-list] div[class=shm-thumb]') as $element) { $item = new \Item(); $item->uri = 'http://rule34.paheal.net'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->find('img', 0)->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->find('img', 0)->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->getAttribute('data-tags'); $item->title = 'Rule34paheal | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/SafebooruBridge.php b/bridges/SafebooruBridge.php index cd0f1a3..b4b45f5 100644 --- a/bridges/SafebooruBridge.php +++ b/bridges/SafebooruBridge.php @@ -26,27 +26,27 @@ class SafebooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 40; } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://safebooru.org/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Safebooru.'); + $html = $this->getSimpleHTMLDOM("http://safebooru.org/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Safebooru.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://safebooru.org/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Safebooru | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/SakugabooruBridge.php b/bridges/SakugabooruBridge.php index 8e7e3aa..aea7133 100644 --- a/bridges/SakugabooruBridge.php +++ b/bridges/SakugabooruBridge.php @@ -25,18 +25,18 @@ class SakugabooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 1;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://sakuga.yshi.org/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Sakugabooru.'); + $html = $this->getSimpleHTMLDOM("http://sakuga.yshi.org/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Sakugabooru.'); $input_json = explode('Post.register(', $html); foreach($input_json as $element) $data[] = preg_replace('/}\)(.*)/', '}', $element); unset($data[0]); - + foreach($data as $datai) { $json = json_decode($datai, TRUE); $item = new \Item(); @@ -45,7 +45,7 @@ class SakugabooruBridge extends BridgeAbstract{ $item->timestamp = $json['created_at']; $item->imageUri = $json['file_url']; $item->title = 'Sakugabooru | '.$json['id']; - $item->content = '
Tags: '.$json['tags']; + $item->content = '
Tags: '.$json['tags']; $this->items[] = $item; } } diff --git a/bridges/ScilogsBridge.php b/bridges/ScilogsBridge.php new file mode 100644 index 0000000..7186be4 --- /dev/null +++ b/bridges/ScilogsBridge.php @@ -0,0 +1,54 @@ +maintainer = "qwertygc"; + $this->name = "Scilogs Bridge"; + $this->uri = "http://www.scilogs.fr/"; + $this->description = "Returns the newest articles."; + $this->update = "2014-05-25"; + + } + + public function collectData(array $param){ + + function ScilogsStripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function ScilogsExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.entrybody', 0)->innertext; + return $text; + } + $html = $this->getSimpleHTMLDOM('http://www.scilogs.fr/?wpmu-feed=posts') or $this->returnError('Could not request Scilogs.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = ScilogsStripCDATA($element->find('title', 0)->innertext); + $item->uri = ScilogsStripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ScilogsExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'Scilogs Bridge'; + } + + public function getURI(){ + return 'http://scilogs.fr/'; + } + + public function getCacheDuration(){ + return 3600*2; // 2 hours + } +} diff --git a/bridges/ScmbBridge.php b/bridges/ScmbBridge.php index 864d39c..e2f631f 100644 --- a/bridges/ScmbBridge.php +++ b/bridges/ScmbBridge.php @@ -10,20 +10,20 @@ class ScmbBridge extends BridgeAbstract{ $this->update = '2016-08-17'; } - + public function collectData(array $param){ $html = ''; - $html = $this->file_get_html('http://secouchermoinsbete.fr/') or $this->returnServerError('Could not request Se Coucher Moins Bete.'); - + $html = $this->getSimpleHTMLDOM('http://secouchermoinsbete.fr/') or $this->returnServerError('Could not request Se Coucher Moins Bete.'); + foreach($html->find('article') as $article) { $item = new \Item(); $item->uri = 'http://secouchermoinsbete.fr'.$article->find('p.summary a',0)->href; $item->title = $article->find('header h1 a',0)->innertext; - + $article->find('span.read-more',0)->outertext=''; // remove text "En savoir plus" from anecdote content $content = $article->find('p.summary a',0)->innertext; $content =substr($content,0,strlen($content)-17); // remove superfluous spaces at the end - + // get publication date $str_date = $article->find('time',0)->datetime; list($date, $time) = explode(' ', $str_date); @@ -31,8 +31,8 @@ class ScmbBridge extends BridgeAbstract{ list($h, $i) = explode(':', $time); $timestamp = mktime($h,$i,0,$m,$d,$y); $item->timestamp = $timestamp; - - + + $item->content = $content; $this->items[] = $item; } diff --git a/bridges/ScoopItBridge.php b/bridges/ScoopItBridge.php index ba3fc0b..d7b4ed3 100644 --- a/bridges/ScoopItBridge.php +++ b/bridges/ScoopItBridge.php @@ -24,9 +24,9 @@ class ScoopItBridge extends BridgeAbstract{ if ($param['u'] != '') { $this->request = $param['u']; $link = 'http://scoop.it/search?q=' .urlencode($this->request); - - $html = $this->file_get_html($link) or $this->returnServerError('Could not request ScoopIt. for : ' . $link); - + + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request ScoopIt. for : ' . $link); + foreach($html->find('div.post-view') as $element) { $item = new Item(); $item->uri = $element->find('a', 0)->href; diff --git a/bridges/SegfaultMintBridge.php b/bridges/SegfaultMintBridge.php new file mode 100644 index 0000000..b2ede5d --- /dev/null +++ b/bridges/SegfaultMintBridge.php @@ -0,0 +1,55 @@ +maintainer = "qwertygc"; + $this->name = "SegfaultMint"; + $this->uri = "http://segfault.linuxmint.com/"; + $this->description = "Returns the 5 newest posts from SegfaultMint (full text)"; + $this->update = "2014-07-05"; + + } + + public function collectData(array $param){ + + function StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function ExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.post-bodycopy', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + return $text; + } + $html = $this->getSimpleHTMLDOM('http://segfault.linuxmint.com/feed/') or $this->returnError('Could not request segfault.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 5) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'Segfault Mint'; + } + + public function getURI(){ + return 'http://segfault.linuxmint.com/feed/'; + } + + public function getCacheDuration(){ + return 3600*24; // 24 hours + } +} diff --git a/bridges/Sexactu.php b/bridges/Sexactu.php index 9bb82c9..2972ede 100644 --- a/bridges/Sexactu.php +++ b/bridges/Sexactu.php @@ -15,10 +15,10 @@ class Sexactu extends BridgeAbstract{ $find = array('janvier', 'février', 'mars', 'avril', 'mai', 'juin', 'juillet', 'août', 'septembre', 'novembre', 'décembre'); $replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'); - $html = $this->file_get_html($this->getURI()) or $this->returnServerError('Could not request '.$this->getURI()); + $html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Could not request '.$this->getURI()); foreach($html->find('.content-holder') as $contentHolder) { - // only use first list as second one only contains pages numbers + // only use first list as second one only contains pages numbers $articles = $contentHolder->find('ul', 0); foreach($articles->find('li') as $element) { // if you ask about that method_exists, there seems to be a bug in simple html dom @@ -38,7 +38,7 @@ $replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', $dateText = $titleTimestamp->innertext; $dateText = substr($dateText, strpos($dateText,',')+1); $dateText = str_replace($find, $replace, strtolower($dateText)); - $date = strtotime($dateText); + $date = strtotime($dateText); $item->timestamp = $date; $item->author = "Maïa Mazaurette"; @@ -50,9 +50,9 @@ $replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', $item->content = $elementText->innertext; $this->items[] = $item; } - + } - + } } } @@ -64,7 +64,7 @@ $replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', public function getCacheDuration(){ return 7200; // 2h hours } - + private function correctCase($str) { $sentences=explode('.', mb_strtolower($str, "UTF-8")); $str=""; @@ -73,7 +73,7 @@ $replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', { //upper case first char $sentence=ucfirst(trim($sentence)); - + //append sentence to output $str=$str.$sep.$sentence; $sep=". "; diff --git a/bridges/SiliconBridge.php b/bridges/SiliconBridge.php index df582bb..e1bf84a 100644 --- a/bridges/SiliconBridge.php +++ b/bridges/SiliconBridge.php @@ -20,7 +20,7 @@ class SiliconBridge extends BridgeAbstract { } $feedUrl = 'http://www.silicon.fr/feed'; - $html = $this->file_get_html($feedUrl) or $this->returnServerError('Could not request Silicon: '.$feedUrl); + $html = $this->getSimpleHTMLDOM($feedUrl) or $this->returnServerError('Could not request Silicon: '.$feedUrl); $limit = 0; foreach($html->find('item') as $element) { @@ -30,7 +30,7 @@ class SiliconBridge extends BridgeAbstract { $article_uri = $element->innertext; $article_uri = substr($article_uri, strpos($article_uri, '') + 6); $article_uri = substr($article_uri, 0, strpos($article_uri, '')); - $article_html = $this->file_get_html($article_uri) or $this->returnServerError('Could not request Silicon: '.$article_uri); + $article_html = $this->getSimpleHTMLDOM($article_uri) or $this->returnServerError('Could not request Silicon: '.$article_uri); //Build article contents from corresponding elements $thumbnailUri = $element->find('enclosure', 0)->url; diff --git a/bridges/StripeAPIChangeLogBridge.php b/bridges/StripeAPIChangeLogBridge.php index 0fb1d85..334955f 100644 --- a/bridges/StripeAPIChangeLogBridge.php +++ b/bridges/StripeAPIChangeLogBridge.php @@ -16,7 +16,7 @@ class StripeAPIChangeLogBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('https://stripe.com/docs/upgrades') + $html = $this->getSimpleHTMLDOM('https://stripe.com/docs/upgrades') or $this->returnServerError('No results for Stripe API Changelog'); diff --git a/bridges/SuperbWallpapersBridge.php b/bridges/SuperbWallpapersBridge.php index 36928be..eb54dea 100644 --- a/bridges/SuperbWallpapersBridge.php +++ b/bridges/SuperbWallpapersBridge.php @@ -47,12 +47,12 @@ class SuperbWallpapersBridge extends BridgeAbstract { // Get last page number $link = $baseUri.'/'.$this->category.'/9999.html'; - $html = $this->file_get_html($link); + $html = $this->getSimpleHTMLDOM($link); $lastpage = min($html->find('.paging .cpage', 0)->innertext(), ceil($max/36)); for ($page = 1; $page <= $lastpage; $page++) { $link = $baseUri.'/'.$this->category.'/'.$page.'.html'; - $html = $this->file_get_html($link) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('No results for this query.'); foreach($html->find('.wpl .i a') as $element) { $thumbnail = $element->find('img', 0); diff --git a/bridges/T411Bridge.php b/bridges/T411Bridge.php index 22f02c8..83f7a30 100644 --- a/bridges/T411Bridge.php +++ b/bridges/T411Bridge.php @@ -36,7 +36,7 @@ class T411Bridge extends BridgeAbstract { //Retrieve torrent listing from search results, which does not contain torrent description $url = $this->uri.'torrents/search/?'.$param['search'].'&order=added&type=desc'; - $html = $this->file_get_html($url) or $this->returnServerError('Could not request t411: '.$url); + $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request t411: '.$url); $results = $html->find('table.results', 0); if (is_null($results)) $this->returnServerError('No results from t411: '.$url); @@ -57,7 +57,7 @@ class T411Bridge extends BridgeAbstract { $item_date = strtotime($element->find('dd', 0)->plaintext); //Retrieve full description from torrent page - if ($item_html = $this->file_get_html($item_uri)) { + if ($item_html = $this->getSimpleHTMLDOM($item_uri)) { //Retrieve data from page contents $item_desc = $item_html->find('div.description', 0); diff --git a/bridges/TagBoardBridge.php b/bridges/TagBoardBridge.php index 7382a52..f3bd4c9 100644 --- a/bridges/TagBoardBridge.php +++ b/bridges/TagBoardBridge.php @@ -23,8 +23,8 @@ class TagBoardBridge extends BridgeAbstract{ $html = ''; $this->request = $param['u']; $link = 'https://post-cache.tagboard.com/search/' .$this->request; - - $html = $this->file_get_html($link) or $this->returnServerError('Could not request TagBoard for : ' . $link); + + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request TagBoard for : ' . $link); $parsed_json = json_decode($html); foreach($parsed_json->{'posts'} as $element) { @@ -49,4 +49,4 @@ class TagBoardBridge extends BridgeAbstract{ return 21600; // 6 hours } } - + diff --git a/bridges/TbibBridge.php b/bridges/TbibBridge.php index aeacdd6..59fa3e3 100644 --- a/bridges/TbibBridge.php +++ b/bridges/TbibBridge.php @@ -25,27 +25,27 @@ class TbibBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 50; } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://tbib.org/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Tbib.'); + $html = $this->getSimpleHTMLDOM("http://tbib.org/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Tbib.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://tbib.org/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Tbib | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/TheCodingLoveBridge.php b/bridges/TheCodingLoveBridge.php index b8ca7c5..f5d4b7f 100644 --- a/bridges/TheCodingLoveBridge.php +++ b/bridges/TheCodingLoveBridge.php @@ -12,17 +12,17 @@ class TheCodingLoveBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://thecodinglove.com/') or $this->returnServerError('Could not request The Coding Love.'); - + $html = $this->getSimpleHTMLDOM('http://thecodinglove.com/') or $this->returnServerError('Could not request The Coding Love.'); + foreach($html->find('div.post') as $element) { $item = new Item(); $temp = $element->find('h3 a', 0); - + $titre = $temp->innertext; $url = $temp->href; - + $temp = $element->find('div.bodytype', 0); - + // retrieve .gif instead of static .jpg $images = $temp->find('p.e img'); foreach($images as $image){ @@ -33,18 +33,18 @@ class TheCodingLoveBridge extends BridgeAbstract{ $auteur = $temp->find('i', 0); $pos = strpos($auteur->innertext, "by"); - + if($pos > 0) { $auteur = trim(str_replace("*/", "", substr($auteur->innertext, ($pos + 2)))); $item->author = $auteur; } - - + + $item->content .= trim($content); $item->uri = $url; $item->title = trim($titre); - + $this->items[] = $item; } } diff --git a/bridges/ThePirateBayBridge.php b/bridges/ThePirateBayBridge.php index 35b1391..4f5650d 100644 --- a/bridges/ThePirateBayBridge.php +++ b/bridges/ThePirateBayBridge.php @@ -58,9 +58,9 @@ class ThePirateBayBridge extends BridgeAbstract{ if (!isset($param['q'])) $this->returnClientError('You must specify keywords (?q=...)'); - $keywordsList = explode(";",$param['q']); + $keywordsList = explode(";",$param['q']); foreach($keywordsList as $keywords){ - $html = $this->file_get_html('https://thepiratebay.org/search/'.rawurlencode($keywords).'/0/3/0') or $this->returnServerError('Could not request TPB.'); + $html = $this->getSimpleHTMLDOM('https://thepiratebay.org/search/'.rawurlencode($keywords).'/0/3/0') or $this->returnServerError('Could not request TPB.'); if ($html->find('table#searchResult', 0) == FALSE) $this->returnServerError('No result for query '.$keywords); diff --git a/bridges/TuxboardBridge.php b/bridges/TuxboardBridge.php new file mode 100644 index 0000000..301e9b6 --- /dev/null +++ b/bridges/TuxboardBridge.php @@ -0,0 +1,64 @@ +maintainer = "superbaillot.net"; + $this->name = "Tuxboard"; + $this->uri = "http://www.tuxboard.com/"; + $this->description = "Tuxboard"; + $this->update = "2014-07-08"; + + } + + public function collectData(array $param){ + + function StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + + function ExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('article#page', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + return $text; + } + + $html = $this->getSimpleHTMLDOM('http://www.tuxboard.com/feed/atom/') or $this->returnError('Could not request Tuxboard.', 404); + $limit = 0; + + foreach($html->find('entry') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = $element->find('link', 0)->href; + $item->timestamp = strtotime($element->find('published', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + + + } + + public function getName(){ + return 'Tuxboard'; + } + + public function getURI(){ + return 'http://www.tuxboard.com'; + } + + public function getDescription(){ + return 'Tuxboard via rss-bridge'; + } + + public function getCacheDuration(){ + return 3600; // 1 hour + } +} +?> diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index 24970ca..8510806 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -9,7 +9,7 @@ class TwitterBridge extends BridgeAbstract{ $this->description = "Returns tweets by keyword/hashtag or user name"; $this->update = '2016-08-17'; - $this->parameters["global"] = + $this->parameters["global"] = '[ { "name" : "Hide profile pictures", @@ -48,12 +48,12 @@ class TwitterBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = ''; + $html = ''; if (isset($param['q'])) { /* keyword search mode */ - $html = $this->file_get_html('https://twitter.com/search?q='.urlencode($param['q']).'&f=tweets') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('https://twitter.com/search?q='.urlencode($param['q']).'&f=tweets') or $this->returnServerError('No results for this query.'); } elseif (isset($param['u'])) { /* user timeline mode */ - $html = $this->file_get_html('https://twitter.com/'.urlencode($param['u']).'/with_replies') or $this->returnServerError('Requested username can\'t be found.'); + $html = $this->getSimpleHTMLDOM('https://twitter.com/'.urlencode($param['u']).'/with_replies') or $this->returnServerError('Requested username can\'t be found.'); } else { $this->returnClientError('You must specify a keyword (?q=...) or a Twitter username (?u=...).'); @@ -68,20 +68,20 @@ class TwitterBridge extends BridgeAbstract{ // extract username and sanitize $item->username = $tweet->getAttribute('data-screen-name'); // extract fullname (pseudonym) - $item->fullname = $tweet->getAttribute('data-name'); + $item->fullname = $tweet->getAttribute('data-name'); // get author $item->author = $item->fullname . ' (@' . $item->username . ')'; // get avatar link - $item->avatar = $tweet->find('img', 0)->src; + $item->avatar = $tweet->find('img', 0)->src; // get TweetID $item->id = $tweet->getAttribute('data-tweet-id'); - // get tweet link - $item->uri = 'https://twitter.com'.$tweet->find('a.js-permalink', 0)->getAttribute('href'); + // get tweet link + $item->uri = 'https://twitter.com'.$tweet->find('a.js-permalink', 0)->getAttribute('href'); // extract tweet timestamp $item->timestamp = $tweet->find('span.js-short-timestamp', 0)->getAttribute('data-time'); // generate the title - $item->title = strip_tags($tweet->find('p.js-tweet-text', 0)->innertext); - + $item->title = strip_tags($tweet->find('p.js-tweet-text', 0)->innertext); + // processing content links foreach($tweet->find('a') as $link) { if($link->hasAttribute('data-expanded-url') ) { diff --git a/bridges/UnsplashBridge.php b/bridges/UnsplashBridge.php index adf7544..492de7b 100644 --- a/bridges/UnsplashBridge.php +++ b/bridges/UnsplashBridge.php @@ -44,7 +44,7 @@ class UnsplashBridge extends BridgeAbstract { for ($page = 1; $page <= $lastpage; $page++) { $link = $baseUri.'/grid?page='.$page; - $html = $this->file_get_html($link) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('No results for this query.'); if ($page === 1) { preg_match('/=(\d+)$/', $html->find('.pagination > a[!class]', -1)->href, $matches); diff --git a/bridges/ViadeoCompany.php b/bridges/ViadeoCompany.php index fb4badb..2eef3b6 100644 --- a/bridges/ViadeoCompany.php +++ b/bridges/ViadeoCompany.php @@ -22,7 +22,7 @@ class ViadeoCompany extends BridgeAbstract{ $html = ''; $link = 'http://www.viadeo.com/fr/company/'.$param[c]; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request Viadeo.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request Viadeo.'); foreach($html->find('//*[@id="company-newsfeed"]/ul/li') as $element) { $title = $element->find('p', 0)->innertext; diff --git a/bridges/VineBridge.php b/bridges/VineBridge.php index b871444..8cfb552 100644 --- a/bridges/VineBridge.php +++ b/bridges/VineBridge.php @@ -24,7 +24,7 @@ class VineBridge extends BridgeAbstract { $html = ''; $uri = 'http://vine.co/u/'.$param['u'].'?mode=list'; - $html = $this->file_get_html($uri) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($uri) or $this->returnServerError('No results for this query.'); foreach($html->find('.post') as $element) { $a = $element->find('a', 0); diff --git a/bridges/WallpaperStopBridge.php b/bridges/WallpaperStopBridge.php index 1888333..36ba674 100644 --- a/bridges/WallpaperStopBridge.php +++ b/bridges/WallpaperStopBridge.php @@ -56,7 +56,7 @@ class WallpaperStopBridge extends BridgeAbstract { for ($page = 1; $page <= $lastpage; $page++) { $link = $baseUri.'/'.$this->category.'-wallpaper/'.(!empty($this->subcategory)?$this->subcategory.'-wallpaper/':'').'desktop-wallpaper-'.$page.'.html'; - $html = $this->file_get_html($link) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('No results for this query.'); if ($page === 1) { preg_match('/-(\d+)\.html$/', $html->find('.pagination > .last', 0)->href, $matches); diff --git a/bridges/WhydBridge.php b/bridges/WhydBridge.php index b7739e4..c32ca8f 100644 --- a/bridges/WhydBridge.php +++ b/bridges/WhydBridge.php @@ -28,18 +28,18 @@ class WhydBridge extends BridgeAbstract{ { $this->request = $param['u']; if (strlen(preg_replace("/[^0-9a-f]/",'', $this->request)) == 24) { // is input the userid ? - $html = $this->file_get_html('http://www.whyd.com/u/'.preg_replace("/[^0-9a-f]/",'', $this->request)) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('http://www.whyd.com/u/'.preg_replace("/[^0-9a-f]/",'', $this->request)) or $this->returnServerError('No results for this query.'); } else { // input may be the username - $html = $this->file_get_html('http://www.whyd.com/search?q='.urlencode($this->request)) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('http://www.whyd.com/search?q='.urlencode($this->request)) or $this->returnServerError('No results for this query.'); for ($j = 0; $j < 5; $j++) { if (strtolower($html->find('div.user', $j)->find('a',0)->plaintext) == strtolower($this->request)) { - $html = $this->file_get_html('http://www.whyd.com' . $html->find('div.user', $j)->find('a', 0)->getAttribute('href')) or $this->returnServerError('No results for this query'); + $html = $this->getSimpleHTMLDOM('http://www.whyd.com' . $html->find('div.user', $j)->find('a', 0)->getAttribute('href')) or $this->returnServerError('No results for this query'); break; } } } $this->name = $html->find('div#profileTop', 0)->find('h1', 0)->plaintext; - } + } else { $this->returnClientError('You must specify username'); diff --git a/bridges/WikipediaDEBridge.php b/bridges/WikipediaDEBridge.php new file mode 100644 index 0000000..9ccd0ca --- /dev/null +++ b/bridges/WikipediaDEBridge.php @@ -0,0 +1,48 @@ +maintainer = "cnlpete"; + $this->name = "Wikipedia DE Today's Featured Article..."; + $this->uri = "https://de.wikipedia.org/"; + $this->description = "Returns the highlighted en.wikipedia.org article."; + $this->update = "2015-11-04"; + + } + + public function collectData(array $param){ + $html = ''; + $host = 'http://de.wikipedia.org'; + // If you want HTTPS access instead, uncomment the following line: + //$host = 'https://de.wikipedia.org'; + $link = '/wiki/Wikipedia:Hauptseite'; + + $html = $this->getSimpleHTMLDOM($host.$link) or $this->returnError('Could not request Wikipedia DE.', 404); + + $element = $html->find('div[id=mf-tfa]', 0); + $element->find('div', -1)->outertext = ''; + + $item = new \Item(); + $item->uri = $host.$element->find('p', 0)->find('a', 0)->href; + $item->title = $element->find('p',0)->find('a',0)->title; + + $html2 = $this->getSimpleHTMLDOM($item->uri) or $this->returnError('Could not request Wikipedia DE '.$item->title.'.', 404); + $element2 = $html2->find('div[id=mw-content-text]', 0); + $item->content = str_replace('href="/', 'href="'.$host.'/', $element2->innertext); + + $this->items[] = $item; + } + + public function getName(){ + return 'Wikipedia DE "Today\'s Featured Article"'; + } + + public function getURI(){ + return 'https://de.wikipedia.org/wiki/Wikipedia:Hauptseite'; + } + + public function getCacheDuration(){ + return 3600*8; // 8 hours + } +} diff --git a/bridges/WikipediaENBridge.php b/bridges/WikipediaENBridge.php new file mode 100644 index 0000000..ac8ab29 --- /dev/null +++ b/bridges/WikipediaENBridge.php @@ -0,0 +1,44 @@ +maintainer = "gsurrel"; + $this->name = "Wikipedia EN 'Today's Featured Article...'"; + $this->uri = "https://en.wikipedia.org/"; + $this->description = "Returns the highlighted en.wikipedia.org article."; + $this->update = "2014-05-25"; + + } + + public function collectData(array $param){ + $html = ''; + $host = 'http://en.wikipedia.org'; + // If you want HTTPS access instead, uncomment the following line: + //$host = 'https://en.wikipedia.org'; + $link = '/wiki/Main_Page'; + + $html = $this->getSimpleHTMLDOM($host.$link) or $this->returnError('Could not request Wikipedia EN.', 404); + + $element = $html->find('div[id=mp-tfa]', 0); + // Clean the bottom of the featured article + $element->find('div', -1)->outertext = ''; + $item = new \Item(); + $item->uri = $host.$element->find('p', 0)->find('a', 0)->href; + $item->title = $element->find('p',0)->find('a',0)->title; + $item->content = str_replace('href="/', 'href="'.$host.'/', $element->innertext); + $this->items[] = $item; + } + + public function getName(){ + return 'Wikipedia EN "Today\'s Featued Article"'; + } + + public function getURI(){ + return 'https://en.wikipedia.org/wiki/Main_Page'; + } + + public function getCacheDuration(){ + return 3600*4; // 4 hours + } +} diff --git a/bridges/WikipediaEOBridge.php b/bridges/WikipediaEOBridge.php new file mode 100644 index 0000000..a90c503 --- /dev/null +++ b/bridges/WikipediaEOBridge.php @@ -0,0 +1,44 @@ +maintainer = "gsurrel"; + $this->name = "Wikipedia EO 'Artikolo de la semajno'"; + $this->uri = "https://eo.wikipedia.org/"; + $this->description = "Returns the highlighted eo.wikipedia.org article."; + $this->update = "2014-05-25"; + + } + + public function collectData(array $param){ + $html = ''; + $host = 'http://eo.wikipedia.org'; + // If you want HTTPS access instead, uncomment the following line: + //$host = 'https://eo.wikipedia.org'; + $link = '/wiki/Vikipedio:%C4%88efpa%C4%9Do'; + + $html = $this->getSimpleHTMLDOM($host.$link) or $this->returnError('Could not request Wikipedia EO.', 404); + + $element = $html->find('div[id=mf-tfa]', 0); + // Link to article + $link = $element->find('p', -2)->find('a', 0); + $item = new \Item(); + $item->uri = $host.$link->href; + $item->title = $link->title; + $item->content = str_replace('href="/', 'href="'.$host.'/', $element->innertext); + $this->items[] = $item; + } + + public function getName(){ + return 'Wikipedia EO "Artikolo de la semajno"'; + } + + public function getURI(){ + return 'https://eo.wikipedia.org/wiki/Vikipedio:%C4%88efpa%C4%9Do'; + } + + public function getCacheDuration(){ + return 3600*12; // 12 hours + } +} diff --git a/bridges/WikipediaFRBridge.php b/bridges/WikipediaFRBridge.php new file mode 100644 index 0000000..8ee65cd --- /dev/null +++ b/bridges/WikipediaFRBridge.php @@ -0,0 +1,46 @@ +maintainer = "gsurrel"; + $this->name = "Wikipedia FR 'Lumière sur...'"; + $this->uri = "https://fr.wikipedia.org/"; + $this->description = "Returns the highlighted fr.wikipedia.org article."; + $this->update = "2016-06-04"; + + } + + public function collectData(array $param){ + $html = ''; + $host = 'http://fr.wikipedia.org'; + // If you want HTTPS access instead, uncomment the following line: + //$host = 'https://fr.wikipedia.org'; + $link = '/wiki/Wikip%C3%A9dia:Accueil_principal'; + + $html = $this->getSimpleHTMLDOM($host.$link) or $this->returnError('Could not request Wikipedia FR.', 404); + + $element = $html->find('div[id=mf-lumieresur]', 0); + # Use the "Lire la suite" link to dependably get the title of the article + # usually it's a child of a li.BA element (Bon article) + # occasionally it's a li.AdQ (Article de qualité) + $lirelasuite_link = $element->find('.BA > i > a, .AdQ > i > a', 0); + $item = new \Item(); + $item->uri = $host.$lirelasuite_link->href; + $item->title = $lirelasuite_link->title; + $item->content = str_replace('href="/', 'href="'.$host.'/', $element->innertext); + $this->items[] = $item; + } + + public function getName(){ + return 'Wikipedia FR "Lumière sur..."'; + } + + public function getURI(){ + return 'https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Accueil_principal'; + } + + public function getCacheDuration(){ + return 3600*4; // 4 hours + } +} diff --git a/bridges/WordPressBridge.php b/bridges/WordPressBridge.php index f848101..ef728e5 100644 --- a/bridges/WordPressBridge.php +++ b/bridges/WordPressBridge.php @@ -33,7 +33,7 @@ class WordPressBridge extends BridgeAbstract { return WORDPRESS_TYPE_ATOM; // Make ATOM default } - // Replaces all 'link' tags with 'url' for simplehtmldom to actually find 'links' ('url') + // Replaces all 'link' tags with 'url' for simplehtmldom to actually find 'links' ('url') private function ReplaceLinkTagsWithUrlTags($element){ // We need to fix the 'link' tag as simplehtmldom cannot parse it (just rename it and load back as dom) $element_text = $element->outertext; @@ -64,7 +64,7 @@ class WordPressBridge extends BridgeAbstract { } $this->url = $this->url.'/feed/atom'; - $html = $this->file_get_html($this->url) or $this->returnServerError("Could not request {$this->url}."); + $html = $this->getSimpleHTMLDOM($this->url) or $this->returnServerError("Could not request {$this->url}."); // Notice: We requested an ATOM feed, however some sites return RSS feeds instead! $type = $this->DetectContentType($html); @@ -97,7 +97,7 @@ class WordPressBridge extends BridgeAbstract { $item->timestamp = strtotime($article->find('updated', 0)->innertext); } - $article_html = $this->file_get_html($item->uri); + $article_html = $this->getSimpleHTMLDOM($item->uri); // Attempt to find most common content div if(empty($item->content)){ @@ -126,7 +126,7 @@ class WordPressBridge extends BridgeAbstract { $this->items[] = $item; $i++; } - } + } } else { $this->returnServerError("Sorry, {$this->url} doesn't seem to be a Wordpress blog."); } diff --git a/bridges/WorldOfTanks.php b/bridges/WorldOfTanks.php index 77fa7b1..fcfc57b 100644 --- a/bridges/WorldOfTanks.php +++ b/bridges/WorldOfTanks.php @@ -70,15 +70,15 @@ class WorldOfTanks extends HttpCachingBridgeAbstract{ } else { $this->uri = WORLD_OF_TANKS.$this->lang.NEWS.'pc-browser/'.$param['category']."/"; } - $html = $this->file_get_html($this->getURI()) or $this->returnServerError('Could not request '.$this->getURI()); + $html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Could not request '.$this->getURI()); $this->message("loaded HTML from ".$this->getURI()); - // customize name + // customize name $this->name = $html->find('title', 0)->innertext; foreach($html->find('.b-imgblock_ico') as $infoLink) { $this->parseLine($infoLink); } } - + private function parseLine($infoLink) { $item = new Item(); $item->uri = WORLD_OF_TANKS.$infoLink->href; diff --git a/bridges/XbooruBridge.php b/bridges/XbooruBridge.php index 691b46c..4becacc 100644 --- a/bridges/XbooruBridge.php +++ b/bridges/XbooruBridge.php @@ -25,27 +25,27 @@ class XbooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 50; } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://xbooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Xbooru.'); + $html = $this->getSimpleHTMLDOM("http://xbooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Xbooru.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://xbooru.com/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Xbooru | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/YandereBridge.php b/bridges/YandereBridge.php index 223c941..39afb25 100644 --- a/bridges/YandereBridge.php +++ b/bridges/YandereBridge.php @@ -25,18 +25,18 @@ class YandereBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 1; $tags = ''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("https://yande.re/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Yandere.'); + $html = $this->getSimpleHTMLDOM("https://yande.re/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Yandere.'); $input_json = explode('Post.register(', $html); foreach($input_json as $element) $data[] = preg_replace('/}\)(.*)/', '}', $element); unset($data[0]); - + foreach($data as $datai) { $json = json_decode($datai, TRUE); $item = new \Item(); @@ -45,7 +45,7 @@ class YandereBridge extends BridgeAbstract{ $item->timestamp = $json['created_at']; $item->imageUri = $json['file_url']; $item->title = 'Yandere | '.$json['id']; - $item->content = '
Tags: '.$json['tags']; + $item->content = '
Tags: '.$json['tags']; $this->items[] = $item; } } diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 02ded71..52be476 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -1,6 +1,6 @@ file_get_html($this->uri."watch?v=$vid"); + $html = $this->getSimpleHTMLDOM($this->uri."watch?v=$vid"); $author = $html->innertext; $author = substr($author, strpos($author, '"author=') + 8); $author = substr($author, 0, strpos($author, '\u0026')); @@ -138,9 +138,9 @@ class YoutubeBridge extends BridgeAbstract { $url_listing = $this->uri.'channel/'.urlencode($this->request).'/videos'; } if (!empty($url_feed) && !empty($url_listing)) { - if ($xml = $this->file_get_html($url_feed)) { + if ($xml = $this->getSimpleHTMLDOM($url_feed)) { $this->ytBridgeParseXmlFeed($xml); - } else if ($html = $this->file_get_html($url_listing)) { + } else if ($html = $this->getSimpleHTMLDOM($url_listing)) { $this->ytBridgeParseHtmlListing($html, 'li.channels-content-item', 'h3'); } else $this->returnServerError("Could not request YouTube. Tried:\n - $url_feed\n - $url_listing"); } @@ -148,15 +148,15 @@ class YoutubeBridge extends BridgeAbstract { else if (isset($param['p'])) { /* playlist mode */ $this->request = $param['p']; $url_listing = $this->uri.'playlist?list='.urlencode($this->request); - $html = $this->file_get_html($url_listing) or $this->returnServerError("Could not request YouTube. Tried:\n - $url_listing"); + $html = $this->getSimpleHTMLDOM($url_listing) or $this->returnServerError("Could not request YouTube. Tried:\n - $url_listing"); $this->ytBridgeParseHtmlListing($html, 'tr.pl-video', '.pl-video-title a'); $this->request = 'Playlist: '.str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); } else if (isset($param['s'])) { /* search mode */ - $this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']); + $this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']); $url_listing = $this->uri.'results?search_query='.urlencode($this->request).'&page='.$page.'&filters=video&search_sort=video_date_uploaded'; - $html = $this->file_get_html($url_listing) or $this->returnServerError("Could not request YouTube. Tried:\n - $url_listing"); + $html = $this->getSimpleHTMLDOM($url_listing) or $this->returnServerError("Could not request YouTube. Tried:\n - $url_listing"); $this->ytBridgeParseHtmlListing($html, 'div.yt-lockup', 'h3'); $this->request = 'Search: '.str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); } diff --git a/bridges/ZatazBridge.php b/bridges/ZatazBridge.php index 0c381dc..d0a53b2 100644 --- a/bridges/ZatazBridge.php +++ b/bridges/ZatazBridge.php @@ -12,7 +12,7 @@ class ZatazBridge extends BridgeAbstract { } public function collectData(array $param) { - $html = $this->file_get_html($this->uri) or $this->returnServerError('Could not request ' . $this->uri); + $html = $this->getSimpleHTMLDOM($this->uri) or $this->returnServerError('Could not request ' . $this->uri); $recent_posts = $html->find('#recent-posts-3', 0)->find('ul', 0)->find('li'); foreach ($recent_posts as $article) { @@ -24,7 +24,7 @@ class ZatazBridge extends BridgeAbstract { } private function getDetails($uri) { - $html = $this->file_get_html($uri) or exit; + $html = $this->getSimpleHTMLDOM($uri) or exit; $item = new \Item(); diff --git a/bridges/ZoneTelechargementBridge.php b/bridges/ZoneTelechargementBridge.php index d6e6714..7d4b0be 100644 --- a/bridges/ZoneTelechargementBridge.php +++ b/bridges/ZoneTelechargementBridge.php @@ -31,7 +31,7 @@ class ZoneTelechargementBridge extends BridgeAbstract { $category = '/'.$param['category'].'/'; $url = $this->getURI().$category.'rss.xml'; - $html = $this->file_get_html($url) or $this->returnServerError('Could not request Zone Telechargement: '.$url); + $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request Zone Telechargement: '.$url); foreach($html->find('item') as $element) { $item = new \Item(); From d74beb6c6a46f00190778be6ebd6f2a522444135 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Fri, 8 Jul 2016 19:07:30 +0200 Subject: [PATCH 06/14] core: move message() method to parent class BridgeAbstract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- lib/Bridge.php | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index 2daca2a..c5a8ae0 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -105,6 +105,18 @@ abstract class BridgeAbstract implements BridgeInterface{ return $this; } + public function message($text) { + if(!file_exists('DEBUG')){ + return; + } + $backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 3); + $calling = $backtrace[2]; + $message = $calling["file"].":".$calling["line"] + ." class ".get_class($this)."->".$calling["function"] + ." - ".$text; + error_log($message); + } + protected function getContents($url,$use_include_path=false,$context=null,$offset=0,$maxlen=null){ $contextOptions = array( 'http' => array( @@ -233,15 +245,7 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { // filename is NO GOOD // unlink($filename); } - - public function message($text) { - $backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 3); - $calling = $backtrace[2]; - $message = $calling["file"].":".$calling["line"] - ." class ".get_class($this)."->".$calling["function"] - ." - ".$text; - error_log($message); - } + } class Bridge{ From fe58d23c17f3cfdb7d0e2f5ee4baa7c7bf4936cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Fri, 8 Jul 2016 19:08:48 +0200 Subject: [PATCH 07/14] core: use proxy when defined in HttpCachingBridgeAbstract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- lib/Bridge.php | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index c5a8ae0..f8592f3 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -185,6 +185,7 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { // $this->message("loading cached file from ".$filename." for page at url ".$url); // TODO touch file and its parent, and try to do neighbour deletion $this->refresh_in_cache($pageCacheDir, $filename); + $content=file_get_contents($filename); } else { // $this->message("we have no local copy of ".$url." Downloading to ".$filename); $dir = substr($filename, 0, strrpos($filename, '/')); @@ -192,11 +193,14 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { // $this->message("creating directories for ".$dir); mkdir($dir, 0777, true); } - $this->download_remote($url, $filename); + $content=$this->getContents($url); + if($content!==false){ + file_put_contents($filename,$content); + } } - return file_get_contents($filename); + return $content; } - + public function get_cached_time($url) { $simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url); // TODO build this from the variable given to Cache From 226484ba221a368ec033fecce074a8da4ce48ba2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Fri, 8 Jul 2016 19:09:37 +0200 Subject: [PATCH 08/14] core: remove useless HttpCachingBridgeAbstract::download_remote() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- lib/Bridge.php | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index f8592f3..b2010ec 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -223,23 +223,6 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { } } - public function download_remote($url , $save_path) { - $f = fopen( $save_path , 'w+'); - if($f) { - $handle = fopen($url , "rb"); - if($handle) { - while (!feof($handle)) { - $contents = fread($handle, 8192); - if($contents) { - fwrite($f , $contents); - } - } - fclose($handle); - } - fclose($f); - } - } - public function remove_from_cache($url) { $simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url); // TODO build this from the variable given to Cache From 86515a1560c77def9404eb484e088b19fca4da64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Fri, 8 Jul 2016 19:26:07 +0200 Subject: [PATCH 09/14] core: use proxy when defined in RssExpander MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- lib/Bridge.php | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index b2010ec..cca4b5a 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -77,7 +77,7 @@ abstract class BridgeAbstract implements BridgeInterface{ /** * Define default bridge name - */ + */ public function getName(){ return $this->name; } @@ -166,7 +166,7 @@ abstract class BridgeAbstract implements BridgeInterface{ * After all, rss-bridge is not respaw, isn't it ? */ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { - + /** * Maintain locally cached versions of pages to download to avoid multiple doiwnloads. * A file name is generated by replacing all "/" by "_", and the file is saved below this bridge cache @@ -213,7 +213,7 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { $this->get_cached($url); } return filectime($filename); - } + } private function refresh_in_cache($pageCacheDir, $filename) { $currentPath = $filename; @@ -267,7 +267,7 @@ class Bridge{ } $pathBridge = self::getDir() . $nameBridge . '.php'; - + if( !file_exists($pathBridge) ){ throw new \Exception('The bridge you looking for does not exist. It should be at path '.$pathBridge); } @@ -349,8 +349,11 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{ } // $this->message("Loading from ".$param['url']); // Notice WE DO NOT use cache here on purpose : we want a fresh view of the RSS stream each time - $rssContent = simplexml_load_file($name) or $this->returnServerError('Could not request '.$name); -// $this->message("loaded RSS from ".$param['url']); + $content=$this->getContents($name) or + $this->returnServerError('Could not request '.$name); + + $rssContent = simplexml_load_string($content); + // $this->message("loaded RSS from ".$param['url']); // TODO insert RSS format detection // we suppose for now, we have some RSS 2.0 $this->collect_RSS_2_0_data($rssContent); From 6e2c7ceaf43c9e3f504fc2778c6eef23d9d1e25b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Tue, 9 Aug 2016 14:57:42 +0200 Subject: [PATCH 10/14] bridges: rename file_get_html to getSimpleHTMLDOM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- bridges/ArstechnicaBridge.php | 4 ++-- bridges/BastaBridge.php | 2 +- bridges/CastorusBridge.php | 22 +++++++++++----------- bridges/EstCeQuonMetEnProdBridge.php | 4 ++-- bridges/NextgovBridge.php | 6 +++--- bridges/NumeramaBridge.php | 4 ++-- bridges/SensCritiqueBridge.php | 12 ++++++------ bridges/ShanaprojectBridge.php | 4 ++-- bridges/TheHackerNewsBridge.php | 6 +++--- bridges/WeLiveSecurityBridge.php | 4 ++-- bridges/WikipediaBridge.php | 4 ++-- bridges/ZDNetBridge.php | 4 ++-- 12 files changed, 38 insertions(+), 38 deletions(-) diff --git a/bridges/ArstechnicaBridge.php b/bridges/ArstechnicaBridge.php index 59f0fab..a0fcb54 100644 --- a/bridges/ArstechnicaBridge.php +++ b/bridges/ArstechnicaBridge.php @@ -29,7 +29,7 @@ class ArstechnicaBridge extends BridgeAbstract { function ExtractContent($url) { #echo $url; - $html2 = file_get_html($url); + $html2 = getSimpleHTMLDOM($url); $text = $html2->find("section[id='article-guts']", 0); /*foreach ($text->find('