SexactuBridge.php 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. <?php
  2. class SexactuBridge extends BridgeAbstract {
  3. const MAINTAINER = 'Riduidel';
  4. const NAME = 'Sexactu';
  5. const URI = 'https://www.gqmagazine.fr';
  6. const CACHE_TIMEOUT = 7200; // 2h
  7. const DESCRIPTION = 'Sexactu via rss-bridge';
  8. public function collectData(){
  9. $find = array(
  10. 'janvier',
  11. 'février',
  12. 'mars',
  13. 'avril',
  14. 'mai',
  15. 'juin',
  16. 'juillet',
  17. 'août',
  18. 'septembre',
  19. 'novembre',
  20. 'décembre'
  21. );
  22. $replace = array(
  23. 'January',
  24. 'February',
  25. 'March',
  26. 'April',
  27. 'May',
  28. 'June',
  29. 'July',
  30. 'August',
  31. 'September',
  32. 'October',
  33. 'November',
  34. 'December'
  35. );
  36. $html = getSimpleHTMLDOM($this->getURI())
  37. or returnServerError('Could not request ' . $this->getURI());
  38. foreach($html->find('.content-holder') as $contentHolder){
  39. // only use first list as second one only contains pages numbers
  40. $articles = $contentHolder->find('ul', 0);
  41. foreach($articles->find('li') as $element){
  42. // if you ask about that method_exists, there seems to be a bug in simple html dom
  43. // see stackoverflow for more details : http://stackoverflow.com/a/10828479/15619
  44. if(is_object($element)){
  45. $item = array();
  46. // various metadata
  47. $titleBlock = $element->find('.title-holder', 0);
  48. if(is_object($titleBlock)){
  49. $titleDetails = $titleBlock->find('.article-title', 0);
  50. $titleData = $titleDetails->find('h2', 0)->find('a', 0);
  51. $titleTimestamp = $titleDetails->find('h4', 0);
  52. $item['title'] = $this->correctCase(trim($titleData->innertext));
  53. $item['uri'] = self::URI . $titleData->href;
  54. // Fugly date parsing due to the fact my DNS-323 doesn't support php intl extension
  55. $dateText = $titleTimestamp->innertext;
  56. $dateText = substr($dateText, strpos($dateText, ',') + 1);
  57. $dateText = str_replace($find, $replace, strtolower($dateText));
  58. $date = strtotime($dateText);
  59. $item['timestamp'] = $date;
  60. $item['author'] = 'Maïa Mazaurette';
  61. $elementText = $element->find('.text-container', 0);
  62. // don't forget to replace images server url with gq one
  63. foreach($elementText->find('img') as $image){
  64. $image->src = self::URI . $image->src;
  65. }
  66. $item['content'] = $elementText->innertext;
  67. $this->items[] = $item;
  68. }
  69. }
  70. }
  71. }
  72. }
  73. public function getURI(){
  74. return self::URI . '/sexactu';
  75. }
  76. private function correctCase($str){
  77. $sentences = explode('.', mb_strtolower($str, 'UTF-8'));
  78. $str = '';
  79. $sep = '';
  80. foreach ($sentences as $sentence){
  81. //upper case first char
  82. $sentence = ucfirst(trim($sentence));
  83. //append sentence to output
  84. $str = $str . $sep . $sentence;
  85. $sep = '. ';
  86. }
  87. return $str;
  88. }
  89. }