contents.php 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. <?php
  2. function getContents($url
  3. , $use_include_path = false
  4. , $context = null
  5. , $offset = 0
  6. , $maxlen = null
  7. ){
  8. $contextOptions = array(
  9. 'http' => array(
  10. 'user_agent' => ini_get('user_agent')
  11. )
  12. );
  13. if(defined('PROXY_URL') && !defined('NOPROXY')){
  14. $contextOptions['http']['proxy'] = PROXY_URL;
  15. $contextOptions['http']['request_fulluri'] = true;
  16. if(is_null($context)){
  17. $context = stream_context_create($contextOptions);
  18. } else {
  19. $prevContext = $context;
  20. if(!stream_context_set_option($context, $contextOptions)){
  21. $context = $prevContext;
  22. }
  23. }
  24. }
  25. if(is_null($maxlen)){
  26. $content = @file_get_contents($url, $use_include_path, $context, $offset);
  27. } else {
  28. $content = @file_get_contents($url, $use_include_path, $context, $offset, $maxlen);
  29. }
  30. if($content === false)
  31. debugMessage('Cant\'t download ' . $url);
  32. // handle compressed data
  33. foreach($http_response_header as $header){
  34. if(stristr($header, 'content-encoding')){
  35. switch(true){
  36. case stristr($header, 'gzip'):
  37. $content = gzinflate(substr($content, 10, -8));
  38. break;
  39. case stristr($header, 'compress'):
  40. //TODO
  41. case stristr($header, 'deflate'):
  42. //TODO
  43. case stristr($header, 'brotli'):
  44. //TODO
  45. returnServerError($header . '=> Not implemented yet');
  46. break;
  47. case stristr($header, 'identity'):
  48. break;
  49. default:
  50. returnServerError($header . '=> Unknown compression');
  51. }
  52. }
  53. }
  54. return $content;
  55. }
  56. function getSimpleHTMLDOM($url
  57. , $use_include_path = false
  58. , $context = null
  59. , $offset = 0
  60. , $maxLen = null
  61. , $lowercase = true
  62. , $forceTagsClosed = true
  63. , $target_charset = DEFAULT_TARGET_CHARSET
  64. , $stripRN = true
  65. , $defaultBRText = DEFAULT_BR_TEXT
  66. , $defaultSpanText = DEFAULT_SPAN_TEXT
  67. ){
  68. $content = getContents($url, $use_include_path, $context, $offset, $maxLen);
  69. return str_get_html($content
  70. , $lowercase
  71. , $forceTagsClosed
  72. , $target_charset
  73. , $stripRN
  74. , $defaultBRText
  75. , $defaultSpanText);
  76. }
  77. /**
  78. * Maintain locally cached versions of pages to avoid multiple downloads.
  79. * @param url url to cache
  80. * @param duration duration of the cache file in seconds (default: 24h/86400s)
  81. * @return content of the file as string
  82. */
  83. function getSimpleHTMLDOMCached($url
  84. , $duration = 86400
  85. , $use_include_path = false
  86. , $context = null
  87. , $offset = 0
  88. , $maxLen = null
  89. , $lowercase = true
  90. , $forceTagsClosed = true
  91. , $target_charset = DEFAULT_TARGET_CHARSET
  92. , $stripRN = true
  93. , $defaultBRText = DEFAULT_BR_TEXT
  94. , $defaultSpanText = DEFAULT_SPAN_TEXT
  95. ){
  96. debugMessage('Caching url ' . $url . ', duration ' . $duration);
  97. $filepath = __DIR__ . '/../cache/pages/' . sha1($url) . '.cache';
  98. debugMessage('Cache file ' . $filepath);
  99. if(file_exists($filepath) && filectime($filepath) < time() - $duration){
  100. unlink ($filepath);
  101. debugMessage('Cached file deleted: ' . $filepath);
  102. }
  103. if(file_exists($filepath)){
  104. debugMessage('Loading cached file ' . $filepath);
  105. touch($filepath);
  106. $content = file_get_contents($filepath);
  107. } else {
  108. debugMessage('Caching ' . $url . ' to ' . $filepath);
  109. $dir = substr($filepath, 0, strrpos($filepath, '/'));
  110. if(!is_dir($dir)){
  111. debugMessage('Creating directory ' . $dir);
  112. mkdir($dir, 0777, true);
  113. }
  114. $content = getContents($url, $use_include_path, $context, $offset, $maxLen);
  115. if($content !== false){
  116. file_put_contents($filepath, $content);
  117. }
  118. }
  119. return str_get_html($content
  120. , $lowercase
  121. , $forceTagsClosed
  122. , $target_charset
  123. , $stripRN
  124. , $defaultBRText
  125. , $defaultSpanText);
  126. }
  127. ?>