contents.php 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. <?php
  2. function getContents($url, $header = array(), $opts = array()){
  3. $ch = curl_init($url);
  4. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  5. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  6. curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
  7. curl_setopt($ch, CURLOPT_USERAGENT, ini_get('user_agent'));
  8. curl_setopt($ch, CURLOPT_ENCODING, '');
  9. if(is_array($opts)) {
  10. foreach($opts as $key => $value) {
  11. curl_setopt($ch, $key, $value);
  12. }
  13. }
  14. if(defined('PROXY_URL') && !defined('NOPROXY')) {
  15. curl_setopt($ch, CURLOPT_PROXY, PROXY_URL);
  16. }
  17. $content = curl_exec($ch);
  18. curl_close($ch);
  19. if($content === false)
  20. debugMessage('Cant\'t download ' . $url);
  21. return $content;
  22. }
  23. function getSimpleHTMLDOM($url,
  24. $header = array(),
  25. $opts = array(),
  26. $lowercase = true,
  27. $forceTagsClosed = true,
  28. $target_charset = DEFAULT_TARGET_CHARSET,
  29. $stripRN = true,
  30. $defaultBRText = DEFAULT_BR_TEXT,
  31. $defaultSpanText = DEFAULT_SPAN_TEXT){
  32. $content = getContents($url, $header, $opts);
  33. return str_get_html($content,
  34. $lowercase,
  35. $forceTagsClosed,
  36. $target_charset,
  37. $stripRN,
  38. $defaultBRText,
  39. $defaultSpanText);
  40. }
  41. /**
  42. * Maintain locally cached versions of pages to avoid multiple downloads.
  43. * @param url url to cache
  44. * @param duration duration of the cache file in seconds (default: 24h/86400s)
  45. * @return content of the file as string
  46. */
  47. function getSimpleHTMLDOMCached($url,
  48. $duration = 86400,
  49. $header = array(),
  50. $opts = array(),
  51. $lowercase = true,
  52. $forceTagsClosed = true,
  53. $target_charset = DEFAULT_TARGET_CHARSET,
  54. $stripRN = true,
  55. $defaultBRText = DEFAULT_BR_TEXT,
  56. $defaultSpanText = DEFAULT_SPAN_TEXT){
  57. debugMessage('Caching url ' . $url . ', duration ' . $duration);
  58. // Initialize cache
  59. $cache = Cache::create('FileCache');
  60. $cache->setPath(CACHE_DIR . '/pages');
  61. $cache->purgeCache(86400); // 24 hours (forced)
  62. $params = [$url];
  63. $cache->setParameters($params);
  64. // Determine if cached file is within duration
  65. $time = $cache->getTime();
  66. if($time !== false
  67. && (time() - $duration < $time)
  68. && (!defined('DEBUG') || DEBUG !== true)) { // Contents within duration
  69. $content = $cache->loadData();
  70. } else { // Content not within duration
  71. $content = getContents($url, $header, $opts);
  72. if($content !== false) {
  73. $cache->saveData($content);
  74. }
  75. }
  76. return str_get_html($content,
  77. $lowercase,
  78. $forceTagsClosed,
  79. $target_charset,
  80. $stripRN,
  81. $defaultBRText,
  82. $defaultSpanText);
  83. }