SexactuBridge.php 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. <?php
  2. class SexactuBridge extends BridgeAbstract {
  3. const MAINTAINER = 'Riduidel';
  4. const NAME = 'Sexactu';
  5. const AUTHOR = 'Maïa Mazaurette';
  6. const URI = 'http://www.gqmagazine.fr';
  7. const CACHE_TIMEOUT = 7200; // 2h
  8. const DESCRIPTION = 'Sexactu via rss-bridge';
  9. const REPLACED_ATTRIBUTES = array(
  10. 'href' => 'href',
  11. 'src' => 'src',
  12. 'data-original' => 'src'
  13. );
  14. public function getURI(){
  15. return self::URI . '/sexactu';
  16. }
  17. public function collectData(){
  18. $html = getSimpleHTMLDOM($this->getURI())
  19. or returnServerError('Could not request ' . $this->getURI());
  20. $sexactu = $html->find('.container_sexactu', 0);
  21. $rowList = $sexactu->find('.row');
  22. foreach($rowList as $row) {
  23. // only use first list as second one only contains pages numbers
  24. $title = $row->find('.title', 0);
  25. if($title) {
  26. $item = array();
  27. $item['author'] = self::AUTHOR;
  28. $item['title'] = $title->plaintext;
  29. $urlAttribute = 'data-href';
  30. $uri = $title->$urlAttribute;
  31. if($uri === false)
  32. continue;
  33. if(substr($uri, 0, 1) === 'h') { // absolute uri
  34. $item['uri'] = $uri;
  35. } else if(substr($uri, 0, 1) === '/') { // domain relative url
  36. $item['uri'] = self::URI . $uri;
  37. } else {
  38. $item['uri'] = $this->getURI() . $uri;
  39. }
  40. $article = $this->loadFullArticle($item['uri']);
  41. $item['content'] = $this->replaceUriInHtmlElement($article->find('.article_content', 0));
  42. $publicationDate = $article->find('time[itemprop=datePublished]', 0);
  43. $short_date = $publicationDate->datetime;
  44. $item['timestamp'] = strtotime($short_date);
  45. } else {
  46. // Sometimes we get rubbish, ignore.
  47. continue;
  48. }
  49. $this->items[] = $item;
  50. }
  51. }
  52. /**
  53. * Loads the full article and returns the contents
  54. * @param $uri The article URI
  55. * @return The article content
  56. */
  57. private function loadFullArticle($uri){
  58. $html = getSimpleHTMLDOMCached($uri);
  59. $content = $html->find('#article', 0);
  60. if($content) {
  61. return $content;
  62. }
  63. return null;
  64. }
  65. /**
  66. * Replaces all relative URIs with absolute ones
  67. * @param $element A simplehtmldom element
  68. * @return The $element->innertext with all URIs replaced
  69. */
  70. private function replaceUriInHtmlElement($element){
  71. $returned = $element->innertext;
  72. foreach (self::REPLACED_ATTRIBUTES as $initial => $final) {
  73. $returned = str_replace($initial . '="/', $final . '="' . self::URI . '/', $returned);
  74. }
  75. return $returned;
  76. }
  77. }