From b48a44c9793b872317fc9f806348b6c9d4b7fcec Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 19 Mar 2017 14:07:25 +0100 Subject: [PATCH] [UsbekEtRicaBridge] Add new bridge Adds a new bridge to fetch contents from https://usbeketrica.com/ Feeds are build from cards displayed on the front page This bridge provides two options: - limit: Defines how many articles are returned - fullarticle: Defines whether or not the full article is retured Requested via #457 --- bridges/UsbekEtRicaBridge.php | 110 ++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 bridges/UsbekEtRicaBridge.php diff --git a/bridges/UsbekEtRicaBridge.php b/bridges/UsbekEtRicaBridge.php new file mode 100644 index 0000000..27d847a --- /dev/null +++ b/bridges/UsbekEtRicaBridge.php @@ -0,0 +1,110 @@ + array( + 'name' => 'Number of articles to return', + 'type' => 'number', + 'required' => false, + 'title' => 'Specifies the maximum number of articles to return', + 'defaultValue' => -1 + ), + 'fullarticle' => array( + 'name' => 'Load full article', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Activate to load full articles', + ) + ) + ); + + public function collectData(){ + $limit = $this->getInput('limit'); + $fullarticle = $this->getInput('fullarticle'); + $html = getSimpleHTMLDOM($this->getURI()); + + $articles = $html->find('div.details'); + + foreach($articles as $article){ + $item = array(); + + $title = $article->find('div.card-title', 0); + if($title){ + $item['title'] = $title->plaintext; + } else { + // Sometimes we get rubbish, ignore. + continue; + } + + $author = $article->find('div.author span', 0); + if($author){ + $item['author'] = $author->plaintext; + } + + $uri = $article->find('a.read', 0)->href; + if(substr($uri, 0, 1) === 'h'){ // absolute uri + $item['uri'] = $uri; + } else { // relative uri + $item['uri'] = $this->getURI() . $uri; + } + + if($fullarticle){ + $content = $this->loadFullArticle($item['uri']); + } + + if($fullarticle && !is_null($content)){ + $item['content'] = $content; + } else { + $excerpt = $article->find('div.card-excerpt', 0); + if($excerpt){ + $item['content'] = $excerpt->plaintext; + } + } + + $image = $article->find('div.card-img img', 0); + if($image){ + $item['enclosures'] = array( + $image->src + ); + } + + $this->items[] = $item; + + if($limit > 0 && count($this->items) >= $limit){ + break; + } + } + } + + /** + * Loads the full article and returns the contents + * @param $uri The article URI + * @return The article content + */ + private function loadFullArticle($uri){ + $html = getSimpleHTMLDOMCached($uri); + + $content = $html->find('section.main', 0); + if($content){ + return $this->replaceUriInHtmlElement($content); + } + + return null; + } + + /** + * Replaces all relative URIs with absolute ones + * @param $element A simplehtmldom element + * @return The $element->innertext with all URIs replaced + */ + private function replaceUriInHtmlElement($element){ + return str_replace('href="/', 'href="' . $this->getURI() . '/', $element->innertext); + } + +}