contents.php 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. <?php
  2. function getContents($url,
  3. $use_include_path = false,
  4. $context = null,
  5. $offset = 0,
  6. $maxlen = null){
  7. $contextOptions = array(
  8. 'http' => array(
  9. 'user_agent' => ini_get('user_agent'),
  10. 'accept_encoding' => 'gzip'
  11. )
  12. );
  13. if(defined('PROXY_URL') && !defined('NOPROXY')){
  14. $contextOptions['http']['proxy'] = PROXY_URL;
  15. $contextOptions['http']['request_fulluri'] = true;
  16. if(is_null($context)){
  17. $context = stream_context_create($contextOptions);
  18. } else {
  19. $prevContext = $context;
  20. if(!stream_context_set_option($context, $contextOptions)){
  21. $context = $prevContext;
  22. }
  23. }
  24. }
  25. if(is_null($maxlen)){
  26. $content = file_get_contents($url, $use_include_path, $context, $offset);
  27. } else {
  28. $content = file_get_contents($url, $use_include_path, $context, $offset, $maxlen);
  29. }
  30. if($content === false)
  31. debugMessage('Cant\'t download ' . $url);
  32. // handle compressed data
  33. foreach($http_response_header as $header){
  34. if(stristr($header, 'content-encoding')){
  35. switch(true){
  36. case stristr($header, 'gzip'):
  37. $content = gzinflate(substr($content, 10, -8));
  38. break;
  39. case stristr($header, 'compress'):
  40. //TODO
  41. case stristr($header, 'deflate'):
  42. //TODO
  43. case stristr($header, 'brotli'):
  44. //TODO
  45. returnServerError($header . '=> Not implemented yet');
  46. break;
  47. case stristr($header, 'identity'):
  48. break;
  49. default:
  50. returnServerError($header . '=> Unknown compression');
  51. }
  52. }
  53. }
  54. return $content;
  55. }
  56. function getSimpleHTMLDOM($url,
  57. $use_include_path = false,
  58. $context = null,
  59. $offset = 0,
  60. $maxLen = null,
  61. $lowercase = true,
  62. $forceTagsClosed = true,
  63. $target_charset = DEFAULT_TARGET_CHARSET,
  64. $stripRN = true,
  65. $defaultBRText = DEFAULT_BR_TEXT,
  66. $defaultSpanText = DEFAULT_SPAN_TEXT){
  67. $content = getContents($url, $use_include_path, $context, $offset, $maxLen);
  68. return str_get_html($content,
  69. $lowercase,
  70. $forceTagsClosed,
  71. $target_charset,
  72. $stripRN,
  73. $defaultBRText,
  74. $defaultSpanText);
  75. }
  76. /**
  77. * Maintain locally cached versions of pages to avoid multiple downloads.
  78. * @param url url to cache
  79. * @param duration duration of the cache file in seconds (default: 24h/86400s)
  80. * @return content of the file as string
  81. */
  82. function getSimpleHTMLDOMCached($url,
  83. $duration = 86400,
  84. $use_include_path = false,
  85. $context = null,
  86. $offset = 0,
  87. $maxLen = null,
  88. $lowercase = true,
  89. $forceTagsClosed = true,
  90. $target_charset = DEFAULT_TARGET_CHARSET,
  91. $stripRN = true,
  92. $defaultBRText = DEFAULT_BR_TEXT,
  93. $defaultSpanText = DEFAULT_SPAN_TEXT){
  94. debugMessage('Caching url ' . $url . ', duration ' . $duration);
  95. // Initialize cache
  96. $cache = Cache::create('FileCache');
  97. $cache->setPath(CACHE_DIR . '/pages');
  98. $cache->purgeCache(86400); // 24 hours (forced)
  99. $params = [$url];
  100. $cache->setParameters($params);
  101. // Determine if cached file is within duration
  102. $time = $cache->getTime();
  103. if($time !== false
  104. && (time() - $duration < $time)
  105. && (!defined('DEBUG') || DEBUG !== true)){ // Contents within duration
  106. $content = $cache->loadData();
  107. } else { // Content not within duration
  108. $content = getContents($url, $use_include_path, $context, $offset, $maxLen);
  109. if($content !== false){
  110. $cache->saveData($content);
  111. }
  112. }
  113. return str_get_html($content,
  114. $lowercase,
  115. $forceTagsClosed,
  116. $target_charset,
  117. $stripRN,
  118. $defaultBRText,
  119. $defaultSpanText);
  120. }