From 3dcdaa1595a80ff78222e622a544d469af51acc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Mon, 26 Sep 2016 00:50:26 +0200 Subject: [PATCH] [CourrierInternationalBridge] fix content parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- bridges/CourrierInternationalBridge.php | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/bridges/CourrierInternationalBridge.php b/bridges/CourrierInternationalBridge.php index f91916f..010f3d8 100644 --- a/bridges/CourrierInternationalBridge.php +++ b/bridges/CourrierInternationalBridge.php @@ -25,14 +25,20 @@ class CourrierInternationalBridge extends BridgeAbstract{ $item['uri'] = self::URI.$item['uri']; } - $page = $this->getSimpleHTMLDOM($item['uri']); + + $page = $this->getSimpleHTMLDOMCached($item['uri']); $cleaner = new HTMLSanitizer(); - $item['content'] = $cleaner->sanitize($page->find("div.article-text")[0]); - $item['title'] = strip_tags($article->find(".title")[0]); + $content = $page->find('.article-text',0); + if(!$content){ + $content = $page->find('.depeche-text',0); + } - $dateTime = date_parse($page->find("time")[0]); + $item['content'] = $cleaner->sanitize($content); + $item['title'] = strip_tags($article->find(".title",0)); + + $dateTime = date_parse($page->find("time",0)); $item['timestamp'] = mktime( $dateTime['hour'],