2014-02-04 17:54:18 +01:00
|
|
|
<?php
|
2017-02-11 16:16:56 +01:00
|
|
|
class SexactuBridge extends BridgeAbstract {
|
2014-02-04 17:54:18 +01:00
|
|
|
|
2017-02-11 16:16:56 +01:00
|
|
|
const MAINTAINER = 'Riduidel';
|
|
|
|
const NAME = 'Sexactu';
|
2017-04-09 21:44:05 +02:00
|
|
|
const AUTHOR = 'Maïa Mazaurette';
|
2017-04-09 21:33:46 +02:00
|
|
|
const URI = 'http://www.gqmagazine.fr';
|
2016-09-25 17:04:28 +02:00
|
|
|
const CACHE_TIMEOUT = 7200; // 2h
|
2017-02-11 16:16:56 +01:00
|
|
|
const DESCRIPTION = 'Sexactu via rss-bridge';
|
2015-11-05 12:20:11 +01:00
|
|
|
|
2017-04-09 21:15:01 +02:00
|
|
|
const REPLACED_ATTRIBUTES = array(
|
|
|
|
'href' => 'href',
|
|
|
|
'src' => 'src',
|
|
|
|
'data-original' => 'src'
|
|
|
|
);
|
|
|
|
|
|
|
|
public function getURI(){
|
2017-04-09 21:33:46 +02:00
|
|
|
return self::URI . '/sexactu';
|
2017-04-09 21:15:01 +02:00
|
|
|
}
|
2014-02-18 11:55:47 +01:00
|
|
|
|
2017-04-09 21:15:01 +02:00
|
|
|
public function collectData(){
|
2017-02-11 16:16:56 +01:00
|
|
|
$html = getSimpleHTMLDOM($this->getURI())
|
|
|
|
or returnServerError('Could not request ' . $this->getURI());
|
2014-02-18 11:55:47 +01:00
|
|
|
|
2017-04-09 21:15:01 +02:00
|
|
|
$sexactu = $html->find('.container_sexactu', 0);
|
|
|
|
$rowList = $sexactu->find('.row');
|
2017-07-29 19:28:00 +02:00
|
|
|
foreach($rowList as $row) {
|
2017-02-11 16:16:56 +01:00
|
|
|
// only use first list as second one only contains pages numbers
|
2016-07-08 19:06:35 +02:00
|
|
|
|
2017-04-09 21:15:01 +02:00
|
|
|
$title = $row->find('.title', 0);
|
2017-07-29 19:28:00 +02:00
|
|
|
if($title) {
|
2017-04-09 21:15:01 +02:00
|
|
|
$item = array();
|
|
|
|
$item['author'] = self::AUTHOR;
|
|
|
|
$item['title'] = $title->plaintext;
|
|
|
|
$urlAttribute = "data-href";
|
|
|
|
$uri = $title->$urlAttribute;
|
|
|
|
if($uri === false)
|
|
|
|
continue;
|
2017-07-29 19:28:00 +02:00
|
|
|
if(substr($uri, 0, 1) === 'h') { // absolute uri
|
2017-04-09 21:15:01 +02:00
|
|
|
$item['uri'] = $uri;
|
2017-07-29 19:28:00 +02:00
|
|
|
} else if(substr($uri, 0, 1) === '/') { // domain relative url
|
2017-04-09 21:33:46 +02:00
|
|
|
$item['uri'] = self::URI . $uri;
|
2017-04-09 21:15:01 +02:00
|
|
|
} else {
|
|
|
|
$item['uri'] = $this->getURI() . $uri;
|
2017-02-11 16:16:56 +01:00
|
|
|
}
|
2017-04-09 21:15:01 +02:00
|
|
|
$article = $this->loadFullArticle($item['uri']);
|
|
|
|
$item['content'] = $this->replaceUriInHtmlElement($article->find('.article_content', 0));
|
|
|
|
|
|
|
|
$publicationDate = $article->find('time[itemprop=datePublished]', 0);
|
|
|
|
$short_date = $publicationDate->datetime;
|
2017-04-09 21:33:46 +02:00
|
|
|
$item['timestamp'] = strtotime($short_date);
|
2017-04-09 21:15:01 +02:00
|
|
|
} else {
|
|
|
|
// Sometimes we get rubbish, ignore.
|
|
|
|
continue;
|
2017-02-11 16:16:56 +01:00
|
|
|
}
|
2017-04-09 21:15:01 +02:00
|
|
|
$this->items[] = $item;
|
2017-02-11 16:16:56 +01:00
|
|
|
}
|
|
|
|
}
|
2016-07-08 19:06:35 +02:00
|
|
|
|
2017-04-09 21:15:01 +02:00
|
|
|
/**
|
|
|
|
* Loads the full article and returns the contents
|
|
|
|
* @param $uri The article URI
|
|
|
|
* @return The article content
|
|
|
|
*/
|
|
|
|
private function loadFullArticle($uri){
|
|
|
|
$html = getSimpleHTMLDOMCached($uri);
|
2014-02-04 17:54:18 +01:00
|
|
|
|
2017-04-09 21:15:01 +02:00
|
|
|
$content = $html->find('#article', 0);
|
2017-07-29 19:28:00 +02:00
|
|
|
if($content) {
|
2017-04-09 21:15:01 +02:00
|
|
|
return $content;
|
|
|
|
}
|
|
|
|
|
|
|
|
return null;
|
|
|
|
}
|
2014-02-04 17:54:18 +01:00
|
|
|
|
2017-04-09 21:15:01 +02:00
|
|
|
/**
|
|
|
|
* Replaces all relative URIs with absolute ones
|
|
|
|
* @param $element A simplehtmldom element
|
|
|
|
* @return The $element->innertext with all URIs replaced
|
|
|
|
*/
|
|
|
|
private function replaceUriInHtmlElement($element){
|
|
|
|
$returned = $element->innertext;
|
|
|
|
foreach (self::REPLACED_ATTRIBUTES as $initial => $final) {
|
2017-04-09 21:33:46 +02:00
|
|
|
$returned = str_replace($initial . '="/', $final . '="' . self::URI . '/', $returned);
|
2017-02-11 16:16:56 +01:00
|
|
|
}
|
2017-04-09 21:15:01 +02:00
|
|
|
return $returned;
|
2017-02-11 16:16:56 +01:00
|
|
|
}
|
2014-02-04 17:54:18 +01:00
|
|
|
}
|