contents.php 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. <?php
  2. function getContents($url, $header = array(), $opts = array()){
  3. $ch = curl_init($url);
  4. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  5. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  6. if(is_array($header) && count($header) !== 0)
  7. curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
  8. curl_setopt($ch, CURLOPT_USERAGENT, ini_get('user_agent'));
  9. curl_setopt($ch, CURLOPT_ENCODING, '');
  10. curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
  11. if(is_array($opts)) {
  12. foreach($opts as $key => $value) {
  13. curl_setopt($ch, $key, $value);
  14. }
  15. }
  16. if(defined('PROXY_URL') && !defined('NOPROXY')) {
  17. curl_setopt($ch, CURLOPT_PROXY, PROXY_URL);
  18. }
  19. $content = curl_exec($ch);
  20. $curlError = curl_error($ch);
  21. $curlErrno = curl_errno($ch);
  22. curl_close($ch);
  23. if($content === false)
  24. debugMessage('Cant\'t download ' . $url . ' cUrl error: ' . $curlError . ' (' . $curlErrno . ')');
  25. return $content;
  26. }
  27. function getSimpleHTMLDOM($url,
  28. $header = array(),
  29. $opts = array(),
  30. $lowercase = true,
  31. $forceTagsClosed = true,
  32. $target_charset = DEFAULT_TARGET_CHARSET,
  33. $stripRN = true,
  34. $defaultBRText = DEFAULT_BR_TEXT,
  35. $defaultSpanText = DEFAULT_SPAN_TEXT){
  36. $content = getContents($url, $header, $opts);
  37. return str_get_html($content,
  38. $lowercase,
  39. $forceTagsClosed,
  40. $target_charset,
  41. $stripRN,
  42. $defaultBRText,
  43. $defaultSpanText);
  44. }
  45. /**
  46. * Maintain locally cached versions of pages to avoid multiple downloads.
  47. * @param url url to cache
  48. * @param duration duration of the cache file in seconds (default: 24h/86400s)
  49. * @return content of the file as string
  50. */
  51. function getSimpleHTMLDOMCached($url,
  52. $duration = 86400,
  53. $header = array(),
  54. $opts = array(),
  55. $lowercase = true,
  56. $forceTagsClosed = true,
  57. $target_charset = DEFAULT_TARGET_CHARSET,
  58. $stripRN = true,
  59. $defaultBRText = DEFAULT_BR_TEXT,
  60. $defaultSpanText = DEFAULT_SPAN_TEXT){
  61. debugMessage('Caching url ' . $url . ', duration ' . $duration);
  62. // Initialize cache
  63. $cache = Cache::create('FileCache');
  64. $cache->setPath(CACHE_DIR . '/pages');
  65. $cache->purgeCache(86400); // 24 hours (forced)
  66. $params = [$url];
  67. $cache->setParameters($params);
  68. // Determine if cached file is within duration
  69. $time = $cache->getTime();
  70. if($time !== false
  71. && (time() - $duration < $time)
  72. && (!defined('DEBUG') || DEBUG !== true)) { // Contents within duration
  73. $content = $cache->loadData();
  74. } else { // Content not within duration
  75. $content = getContents($url, $header, $opts);
  76. if($content !== false) {
  77. $cache->saveData($content);
  78. }
  79. }
  80. return str_get_html($content,
  81. $lowercase,
  82. $forceTagsClosed,
  83. $target_charset,
  84. $stripRN,
  85. $defaultBRText,
  86. $defaultSpanText);
  87. }