contents.php 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. <?php
  2. function getContents($url
  3. , $use_include_path = false
  4. , $context = null
  5. , $offset = 0
  6. , $maxlen = null
  7. ){
  8. $contextOptions = array(
  9. 'http' => array(
  10. 'user_agent' => ini_get('user_agent')
  11. )
  12. );
  13. if(defined('PROXY_URL') && !defined('NOPROXY')){
  14. $contextOptions['http']['proxy'] = PROXY_URL;
  15. $contextOptions['http']['request_fulluri'] = true;
  16. if(is_null($context)){
  17. $context = stream_context_create($contextOptions);
  18. } else {
  19. $prevContext = $context;
  20. if(!stream_context_set_option($context, $contextOptions)){
  21. $context = $prevContext;
  22. }
  23. }
  24. }
  25. if(is_null($maxlen)){
  26. $content = @file_get_contents($url, $use_include_path, $context, $offset);
  27. } else {
  28. $content = @file_get_contents($url, $use_include_path, $context, $offset, $maxlen);
  29. }
  30. if($content === false)
  31. debugMessage('Cant\'t download ' . $url);
  32. // handle compressed data
  33. foreach($http_response_header as $header){
  34. if(stristr($header, 'content-encoding')){
  35. switch(true){
  36. case stristr($header, 'gzip'):
  37. $content = gzinflate(substr($content, 10, -8));
  38. break;
  39. case stristr($header, 'compress'):
  40. //TODO
  41. case stristr($header, 'deflate'):
  42. //TODO
  43. case stristr($header, 'brotli'):
  44. //TODO
  45. returnServerError($header . '=> Not implemented yet');
  46. break;
  47. case stristr($header, 'identity'):
  48. break;
  49. default:
  50. returnServerError($header . '=> Unknown compression');
  51. }
  52. }
  53. }
  54. return $content;
  55. }
  56. function getSimpleHTMLDOM($url
  57. , $use_include_path = false
  58. , $context = null
  59. , $offset = 0
  60. , $maxLen = null
  61. , $lowercase = true
  62. , $forceTagsClosed = true
  63. , $target_charset = DEFAULT_TARGET_CHARSET
  64. , $stripRN = true
  65. , $defaultBRText = DEFAULT_BR_TEXT
  66. , $defaultSpanText = DEFAULT_SPAN_TEXT
  67. ){
  68. $content = getContents($url, $use_include_path, $context, $offset, $maxLen);
  69. return str_get_html($content
  70. , $lowercase
  71. , $forceTagsClosed
  72. , $target_charset
  73. , $stripRN
  74. , $defaultBRText
  75. , $defaultSpanText);
  76. }
  77. /**
  78. * Maintain locally cached versions of pages to avoid multiple downloads.
  79. * @param url url to cache
  80. * @param duration duration of the cache file in seconds (default: 24h/86400s)
  81. * @return content of the file as string
  82. */
  83. function getSimpleHTMLDOMCached($url
  84. , $duration = 86400
  85. , $use_include_path = false
  86. , $context = null
  87. , $offset = 0
  88. , $maxLen = null
  89. , $lowercase = true
  90. , $forceTagsClosed = true
  91. , $target_charset = DEFAULT_TARGET_CHARSET
  92. , $stripRN = true
  93. , $defaultBRText = DEFAULT_BR_TEXT
  94. , $defaultSpanText = DEFAULT_SPAN_TEXT
  95. ){
  96. debugMessage('Caching url ' . $url . ', duration ' . $duration);
  97. // Initialize cache
  98. $cache = Cache::create('FileCache');
  99. $cache->setPath(CACHE_DIR . '/pages');
  100. $cache->purgeCache(86400); // 24 hours (forced)
  101. $params = [$url];
  102. $cache->setParameters($params);
  103. // Determine if cached file is within duration
  104. $time = $cache->getTime();
  105. if($time !== false
  106. && (time() - $duration < $time)
  107. && (!defined('DEBUG') || DEBUG !== true)){ // Contents within duration
  108. $content = $cache->loadData();
  109. } else { // Content not within duration
  110. $content = getContents($url, $use_include_path, $context, $offset, $maxLen);
  111. if($content !== false){
  112. $cache->saveData($content);
  113. }
  114. }
  115. return str_get_html($content
  116. , $lowercase
  117. , $forceTagsClosed
  118. , $target_charset
  119. , $stripRN
  120. , $defaultBRText
  121. , $defaultSpanText);
  122. }
  123. ?>