contents.php 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. <?php
  2. function getContents($url, $header = array(), $opts = array()){
  3. $ch = curl_init($url);
  4. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  5. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  6. if(is_array($header) && count($header) !== 0)
  7. curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
  8. curl_setopt($ch, CURLOPT_USERAGENT, ini_get('user_agent'));
  9. curl_setopt($ch, CURLOPT_ENCODING, '');
  10. curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
  11. if(is_array($opts)) {
  12. foreach($opts as $key => $value) {
  13. curl_setopt($ch, $key, $value);
  14. }
  15. }
  16. if(defined('PROXY_URL') && !defined('NOPROXY')) {
  17. curl_setopt($ch, CURLOPT_PROXY, PROXY_URL);
  18. }
  19. // We always want the resonse header as part of the data!
  20. curl_setopt($ch, CURLOPT_HEADER, true);
  21. $data = curl_exec($ch);
  22. $curlError = curl_error($ch);
  23. $curlErrno = curl_errno($ch);
  24. if($data === false)
  25. debugMessage('Cant\'t download ' . $url . ' cUrl error: ' . $curlError . ' (' . $curlErrno . ')');
  26. $headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
  27. $header = substr($data, 0, $headerSize);
  28. $headers = parseResponseHeader($header);
  29. $finalHeader = end($headers);
  30. if(array_key_exists('http_code', $finalHeader)
  31. && strpos($finalHeader['http_code'], '200') === false
  32. && array_key_exists('Server', $finalHeader)
  33. && strpos($finalHeader['Server'], 'cloudflare') !== false) {
  34. returnServerError(<<< EOD
  35. The server responded with a Cloudflare challenge, which is not supported by RSS-Bridge!<br>
  36. If this error persists longer than a week, please consider opening an issue on GitHub!
  37. EOD
  38. );
  39. }
  40. curl_close($ch);
  41. return substr($data, $headerSize);
  42. }
  43. function getSimpleHTMLDOM($url,
  44. $header = array(),
  45. $opts = array(),
  46. $lowercase = true,
  47. $forceTagsClosed = true,
  48. $target_charset = DEFAULT_TARGET_CHARSET,
  49. $stripRN = true,
  50. $defaultBRText = DEFAULT_BR_TEXT,
  51. $defaultSpanText = DEFAULT_SPAN_TEXT){
  52. $content = getContents($url, $header, $opts);
  53. return str_get_html($content,
  54. $lowercase,
  55. $forceTagsClosed,
  56. $target_charset,
  57. $stripRN,
  58. $defaultBRText,
  59. $defaultSpanText);
  60. }
  61. /**
  62. * Maintain locally cached versions of pages to avoid multiple downloads.
  63. * @param url url to cache
  64. * @param duration duration of the cache file in seconds (default: 24h/86400s)
  65. * @return content of the file as string
  66. */
  67. function getSimpleHTMLDOMCached($url,
  68. $duration = 86400,
  69. $header = array(),
  70. $opts = array(),
  71. $lowercase = true,
  72. $forceTagsClosed = true,
  73. $target_charset = DEFAULT_TARGET_CHARSET,
  74. $stripRN = true,
  75. $defaultBRText = DEFAULT_BR_TEXT,
  76. $defaultSpanText = DEFAULT_SPAN_TEXT){
  77. debugMessage('Caching url ' . $url . ', duration ' . $duration);
  78. // Initialize cache
  79. $cache = Cache::create('FileCache');
  80. $cache->setPath(CACHE_DIR . '/pages');
  81. $cache->purgeCache(86400); // 24 hours (forced)
  82. $params = [$url];
  83. $cache->setParameters($params);
  84. // Determine if cached file is within duration
  85. $time = $cache->getTime();
  86. if($time !== false
  87. && (time() - $duration < $time)
  88. && (!defined('DEBUG') || DEBUG !== true)) { // Contents within duration
  89. $content = $cache->loadData();
  90. } else { // Content not within duration
  91. $content = getContents($url, $header, $opts);
  92. if($content !== false) {
  93. $cache->saveData($content);
  94. }
  95. }
  96. return str_get_html($content,
  97. $lowercase,
  98. $forceTagsClosed,
  99. $target_charset,
  100. $stripRN,
  101. $defaultBRText,
  102. $defaultSpanText);
  103. }
  104. /**
  105. * Parses the provided response header into an associative array
  106. *
  107. * Based on https://stackoverflow.com/a/18682872
  108. */
  109. function parseResponseHeader($header) {
  110. $headers = array();
  111. $requests = explode("\r\n\r\n", trim($header));
  112. foreach ($requests as $request) {
  113. $header = array();
  114. foreach (explode("\r\n", $request) as $i => $line) {
  115. if($i === 0) {
  116. $header['http_code'] = $line;
  117. } else {
  118. list ($key, $value) = explode(': ', $line);
  119. $header[$key] = $value;
  120. }
  121. }
  122. $headers[] = $header;
  123. }
  124. return $headers;
  125. }