IPBBridge.php 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. <?php
  2. class IPBBridge extends FeedExpander {
  3. const NAME = 'IPB Bridge';
  4. const URI = 'https://www.invisionpower.com';
  5. const DESCRIPTION = 'Returns feeds for forums powered by IPB';
  6. const MAINTAINER = 'logmanoriginal';
  7. const PARAMETERS = array(
  8. array(
  9. 'uri' => array(
  10. 'name' => 'URI',
  11. 'type' => 'text',
  12. 'required' => true,
  13. 'title' => 'Insert forum, subforum or topic URI',
  14. 'exampleValue' => 'https://invisioncommunity.com/forums/forum/499-feedback-and-ideas/'
  15. ),
  16. 'limit' => array(
  17. 'name' => 'Limit',
  18. 'type' => 'number',
  19. 'required' => false,
  20. 'title' => 'Specifies the number of items to return on each request (-1: all)',
  21. 'defaultValue' => 10
  22. )
  23. )
  24. );
  25. const CACHE_TIMEOUT = 3600;
  26. // Constants for internal use
  27. const FORUM_TYPE_LIST_FILTER = '.cForumTopicTable';
  28. const FORUM_TYPE_TABLE_FILTER = '#forum_table';
  29. const TOPIC_TYPE_ARTICLE = 'article';
  30. const TOPIC_TYPE_DIV = 'div.post_block';
  31. public function getURI(){
  32. return $this->getInput('uri') ?: parent::getURI();
  33. }
  34. public function collectData(){
  35. // The URI cannot be the mainpage (or anything related)
  36. switch(parse_url($this->getInput('uri'), PHP_URL_PATH)) {
  37. case null:
  38. case '/index.php':
  39. returnClientError('Provided URI is invalid!');
  40. break;
  41. default:
  42. break;
  43. }
  44. // Sanitize the URI (because else it won't work)
  45. $uri = rtrim($this->getInput('uri'), '/'); // No trailing slashes!
  46. // Forums might provide feeds, though that's optional *facepalm*
  47. // Let's check if there is a valid feed available
  48. $headers = get_headers($uri . '.xml');
  49. if($headers[0] === 'HTTP/1.1 200 OK') { // Heureka! It's a valid feed!
  50. return $this->collectExpandableDatas($uri);
  51. }
  52. // No valid feed, so do it the hard way
  53. $html = getSimpleHTMLDOM($uri)
  54. or returnServerError('Could not request ' . $this->getInput('uri') . '!');
  55. $limit = $this->getInput('limit');
  56. // Determine if this is a topic or a forum
  57. switch(true) {
  58. case $this->isTopic($html):
  59. $this->collectTopic($html, $limit);
  60. break;
  61. case $this->isForum($html);
  62. $this->collectForum($html);
  63. break;
  64. default:
  65. returnClientError('Unknown type!');
  66. break;
  67. }
  68. }
  69. private function isForum($html){
  70. return !is_null($html->find('div[data-controller*=forums.front.forum.forumPage]', 0))
  71. || !is_null($html->find(static::FORUM_TYPE_TABLE_FILTER, 0));
  72. }
  73. private function isTopic($html){
  74. return !is_null($html->find('div[data-controller*=core.front.core.commentFeed]', 0))
  75. || !is_null($html->find(static::TOPIC_TYPE_DIV, 0));
  76. }
  77. private function collectForum($html){
  78. // There are multiple forum designs in use (depends on version?)
  79. // 1 - Uses an ordered list (based on https://invisioncommunity.com/forums)
  80. // 2 - Uses a table (based on https://onehallyu.com)
  81. switch(true) {
  82. case !is_null($html->find(static::FORUM_TYPE_LIST_FILTER, 0)):
  83. $this->collectForumList($html);
  84. break;
  85. case !is_null($html->find(static::FORUM_TYPE_TABLE_FILTER, 0)):
  86. $this->collectForumTable($html);
  87. break;
  88. default:
  89. returnClientError('Unknown forum format!');
  90. break;
  91. }
  92. }
  93. private function collectForumList($html){
  94. foreach($html->find(static::FORUM_TYPE_LIST_FILTER, 0)->children() as $row) {
  95. // Columns: Title, Statistics, Last modified
  96. $item = array();
  97. $item['uri'] = $row->find('a', 0)->href;
  98. $item['title'] = $row->find('a', 0)->title;
  99. $item['author'] = $row->find('a', 1)->innertext;
  100. $item['timestamp'] = strtotime($row->find('time', 0)->getAttribute('datetime'));
  101. $this->items[] = $item;
  102. }
  103. }
  104. private function collectForumTable($html){
  105. foreach($html->find(static::FORUM_TYPE_TABLE_FILTER, 0)->children() as $row) {
  106. // Columns: Icon, Content, Preview, Statistics, Last modified
  107. $item = array();
  108. // Skip header row
  109. if(!is_null($row->find('th', 0))) continue;
  110. $item['uri'] = $row->find('a', 0)->href;
  111. $item['title'] = $row->find('.title', 0)->plaintext;
  112. $item['timestamp'] = strtotime($row->find('[itemprop=dateCreated]', 0)->plaintext);
  113. $this->items[] = $item;
  114. }
  115. }
  116. private function collectTopic($html, $limit){
  117. // There are multiple topic designs in use (depends on version?)
  118. // 1 - Uses articles (based on https://invisioncommunity.com/forums)
  119. // 2 - Uses divs (based on https://onehallyu.com)
  120. switch(true) {
  121. case !is_null($html->find(static::TOPIC_TYPE_ARTICLE, 0)):
  122. $this->collectTopicHistory($html, $limit, 'collectTopicArticle');
  123. break;
  124. case !is_null($html->find(static::TOPIC_TYPE_DIV, 0)):
  125. $this->collectTopicHistory($html, $limit, 'collectTopicDiv');
  126. break;
  127. default:
  128. returnClientError('Unknown topic format!');
  129. break;
  130. }
  131. }
  132. private function collectTopicHistory($html, $limit, $callback){
  133. // Make sure the callback is valid!
  134. if(!method_exists($this, $callback))
  135. returnServerError('Unknown function (\'' . $callback . '\')!');
  136. $next = null; // Holds the URI of the next page
  137. while(true) {
  138. $next = $this->$callback($html, is_null($next));
  139. if(is_null($next) || ($limit > 0 && count($this->items) >= $limit)) {
  140. break;
  141. }
  142. $html = getSimpleHTMLDOMCached($next);
  143. }
  144. // We might have more items than specified, remove excess
  145. $this->items = array_slice($this->items, 0, $limit);
  146. }
  147. private function collectTopicArticle($html, $firstrun = true){
  148. $title = $html->find('h1.ipsType_pageTitle', 0)->plaintext;
  149. // Are we on last page?
  150. if($firstrun && !is_null($html->find('.ipsPagination', 0))) {
  151. $last = $html->find('.ipsPagination_last a', 0)->{'data-page'};
  152. $active = $html->find('.ipsPagination_active a', 0)->{'data-page'};
  153. if($active !== $last) {
  154. // Load last page into memory (cached)
  155. $html = getSimpleHTMLDOMCached($html->find('.ipsPagination_last a', 0)->href);
  156. }
  157. }
  158. foreach(array_reverse($html->find(static::TOPIC_TYPE_ARTICLE)) as $article) {
  159. $item = array();
  160. $item['uri'] = $article->find('time', 0)->parent()->href;
  161. $item['author'] = $article->find('aside a', 0)->plaintext;
  162. $item['title'] = $item['author'] . ' - ' . $title;
  163. $item['timestamp'] = strtotime($article->find('time', 0)->getAttribute('datetime'));
  164. $content = $article->find('[data-role=commentContent]', 0);
  165. $content = $this->scaleImages($content);
  166. $item['content'] = $this->fixContent($content);
  167. $item['enclosures'] = $this->findImages($article->find('[data-role=commentContent]', 0)) ?: null;
  168. $this->items[] = $item;
  169. }
  170. // Return whatever page comes next (previous, as we add in inverse order)
  171. // Do we have a previous page? (inactive means no)
  172. if(!is_null($html->find('li[class=ipsPagination_prev ipsPagination_inactive]', 0))) {
  173. return null; // No, or no more
  174. } elseif(!is_null($html->find('li[class=ipsPagination_prev]', 0))) {
  175. return $html->find('.ipsPagination_prev a', 0)->href;
  176. }
  177. return null;
  178. }
  179. private function collectTopicDiv($html, $firstrun = true){
  180. $title = $html->find('h1.ipsType_pagetitle', 0)->plaintext;
  181. // Are we on last page?
  182. if($firstrun && !is_null($html->find('.pagination', 0))) {
  183. $active = $html->find('li[class=page active]', 0)->plaintext;
  184. // There are two ways the 'last' page is displayed:
  185. // - With a distict 'last' button (only if there are enough pages)
  186. // - With a button for each page (use last button)
  187. if(!is_null($html->find('li.last', 0))) {
  188. $last = $html->find('li.last a', 0);
  189. } else {
  190. $last = $html->find('li[class=page] a', -1);
  191. }
  192. if($active !== $last->plaintext) {
  193. // Load last page into memory (cached)
  194. $html = getSimpleHTMLDOMCached($last->href);
  195. }
  196. }
  197. foreach(array_reverse($html->find(static::TOPIC_TYPE_DIV)) as $article) {
  198. $item = array();
  199. $item['uri'] = $article->find('a[rel=bookmark]', 0)->href;
  200. $item['author'] = $article->find('.author', 0)->plaintext;
  201. $item['title'] = $item['author'] . ' - ' . $title;
  202. $item['timestamp'] = strtotime($article->find('.published', 0)->getAttribute('title'));
  203. $content = $article->find('[itemprop=commentText]', 0);
  204. $content = $this->scaleImages($content);
  205. $item['content'] = $this->fixContent($content);
  206. $item['enclosures'] = $this->findImages($article->find('.post_body', 0)) ?: null;
  207. $this->items[] = $item;
  208. }
  209. // Return whatever page comes next (previous, as we add in inverse order)
  210. // Do we have a previous page?
  211. if(!is_null($html->find('li.prev', 0))) {
  212. return $html->find('li.prev a', 0)->href;
  213. }
  214. return null;
  215. }
  216. /** Returns all images from the provide HTML DOM */
  217. private function findImages($html){
  218. $images = array();
  219. foreach($html->find('img') as $img) {
  220. $images[] = $img->src;
  221. }
  222. return $images;
  223. }
  224. /** Sets the maximum width and height for all images */
  225. private function scaleImages($html, $width = 400, $height = 400){
  226. foreach($html->find('img') as $img) {
  227. $img->style = "max-width: {$width}px; max-height: {$height}px;";
  228. }
  229. return $html;
  230. }
  231. /** Removes all unnecessary tags and adds formatting */
  232. private function fixContent($html){
  233. // Restore quote highlighting
  234. foreach($html->find('blockquote') as $quote) {
  235. $quote->style = <<<EOD
  236. padding: 0px 15px;
  237. border-width: 1px 1px 1px 2px;
  238. border-style: solid;
  239. border-color: #ededed #e8e8e8 #dbdbdb #666666;
  240. background: #fbfbfb;
  241. EOD;
  242. }
  243. // Remove unnecessary tags
  244. $content = strip_tags(
  245. $html->innertext,
  246. '<p><a><img><ol><ul><li><table><tr><th><td><strong><blockquote><br><hr><h>'
  247. );
  248. return $content;
  249. }
  250. }