123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310 |
- <?php
- class IPBBridge extends FeedExpander {
- const NAME = 'IPB Bridge';
- const URI = 'https://www.invisionpower.com';
- const DESCRIPTION = 'Returns feeds for forums powered by IPB';
- const MAINTAINER = 'logmanoriginal';
- const PARAMETERS = array(
- array(
- 'uri' => array(
- 'name' => 'URI',
- 'type' => 'text',
- 'required' => true,
- 'title' => 'Insert forum, subforum or topic URI',
- 'exampleValue' => 'https://invisioncommunity.com/forums/forum/499-feedback-and-ideas/'
- ),
- 'limit' => array(
- 'name' => 'Limit',
- 'type' => 'number',
- 'required' => false,
- 'title' => 'Specifies the number of items to return on each request (-1: all)',
- 'defaultValue' => 10
- )
- )
- );
- const CACHE_TIMEOUT = 3600;
- // Constants for internal use
- const FORUM_TYPE_LIST_FILTER = '.cForumTopicTable';
- const FORUM_TYPE_TABLE_FILTER = '#forum_table';
- const TOPIC_TYPE_ARTICLE = 'article';
- const TOPIC_TYPE_DIV = 'div.post_block';
- public function getURI(){
- return $this->getInput('uri') ?: parent::getURI();
- }
- public function collectData(){
- // The URI cannot be the mainpage (or anything related)
- switch(parse_url($this->getInput('uri'), PHP_URL_PATH)) {
- case null:
- case '/index.php':
- returnClientError('Provided URI is invalid!');
- break;
- default:
- break;
- }
- // Sanitize the URI (because else it won't work)
- $uri = rtrim($this->getInput('uri'), '/'); // No trailing slashes!
- // Forums might provide feeds, though that's optional *facepalm*
- // Let's check if there is a valid feed available
- $headers = get_headers($uri . '.xml');
- if($headers[0] === 'HTTP/1.1 200 OK') { // Heureka! It's a valid feed!
- return $this->collectExpandableDatas($uri);
- }
- // No valid feed, so do it the hard way
- $html = getSimpleHTMLDOM($uri)
- or returnServerError('Could not request ' . $this->getInput('uri') . '!');
- $limit = $this->getInput('limit');
- // Determine if this is a topic or a forum
- switch(true) {
- case $this->isTopic($html):
- $this->collectTopic($html, $limit);
- break;
- case $this->isForum($html);
- $this->collectForum($html);
- break;
- default:
- returnClientError('Unknown type!');
- break;
- }
- }
- private function isForum($html){
- return !is_null($html->find('div[data-controller*=forums.front.forum.forumPage]', 0))
- || !is_null($html->find(static::FORUM_TYPE_TABLE_FILTER, 0));
- }
- private function isTopic($html){
- return !is_null($html->find('div[data-controller*=core.front.core.commentFeed]', 0))
- || !is_null($html->find(static::TOPIC_TYPE_DIV, 0));
- }
- private function collectForum($html){
- // There are multiple forum designs in use (depends on version?)
- // 1 - Uses an ordered list (based on https://invisioncommunity.com/forums)
- // 2 - Uses a table (based on https://onehallyu.com)
- switch(true) {
- case !is_null($html->find(static::FORUM_TYPE_LIST_FILTER, 0)):
- $this->collectForumList($html);
- break;
- case !is_null($html->find(static::FORUM_TYPE_TABLE_FILTER, 0)):
- $this->collectForumTable($html);
- break;
- default:
- returnClientError('Unknown forum format!');
- break;
- }
- }
- private function collectForumList($html){
- foreach($html->find(static::FORUM_TYPE_LIST_FILTER, 0)->children() as $row) {
- // Columns: Title, Statistics, Last modified
- $item = array();
- $item['uri'] = $row->find('a', 0)->href;
- $item['title'] = $row->find('a', 0)->title;
- $item['author'] = $row->find('a', 1)->innertext;
- $item['timestamp'] = strtotime($row->find('time', 0)->getAttribute('datetime'));
- $this->items[] = $item;
- }
- }
- private function collectForumTable($html){
- foreach($html->find(static::FORUM_TYPE_TABLE_FILTER, 0)->children() as $row) {
- // Columns: Icon, Content, Preview, Statistics, Last modified
- $item = array();
- // Skip header row
- if(!is_null($row->find('th', 0))) continue;
- $item['uri'] = $row->find('a', 0)->href;
- $item['title'] = $row->find('.title', 0)->plaintext;
- $item['timestamp'] = strtotime($row->find('[itemprop=dateCreated]', 0)->plaintext);
- $this->items[] = $item;
- }
- }
- private function collectTopic($html, $limit){
- // There are multiple topic designs in use (depends on version?)
- // 1 - Uses articles (based on https://invisioncommunity.com/forums)
- // 2 - Uses divs (based on https://onehallyu.com)
- switch(true) {
- case !is_null($html->find(static::TOPIC_TYPE_ARTICLE, 0)):
- $this->collectTopicHistory($html, $limit, 'collectTopicArticle');
- break;
- case !is_null($html->find(static::TOPIC_TYPE_DIV, 0)):
- $this->collectTopicHistory($html, $limit, 'collectTopicDiv');
- break;
- default:
- returnClientError('Unknown topic format!');
- break;
- }
- }
- private function collectTopicHistory($html, $limit, $callback){
- // Make sure the callback is valid!
- if(!method_exists($this, $callback))
- returnServerError('Unknown function (\'' . $callback . '\')!');
- $next = null; // Holds the URI of the next page
- while(true) {
- $next = $this->$callback($html, is_null($next));
- if(is_null($next) || ($limit > 0 && count($this->items) >= $limit)) {
- break;
- }
- $html = getSimpleHTMLDOMCached($next);
- }
- // We might have more items than specified, remove excess
- $this->items = array_slice($this->items, 0, $limit);
- }
- private function collectTopicArticle($html, $firstrun = true){
- $title = $html->find('h1.ipsType_pageTitle', 0)->plaintext;
- // Are we on last page?
- if($firstrun && !is_null($html->find('.ipsPagination', 0))) {
- $last = $html->find('.ipsPagination_last a', 0)->{'data-page'};
- $active = $html->find('.ipsPagination_active a', 0)->{'data-page'};
- if($active !== $last) {
- // Load last page into memory (cached)
- $html = getSimpleHTMLDOMCached($html->find('.ipsPagination_last a', 0)->href);
- }
- }
- foreach(array_reverse($html->find(static::TOPIC_TYPE_ARTICLE)) as $article) {
- $item = array();
- $item['uri'] = $article->find('time', 0)->parent()->href;
- $item['author'] = $article->find('aside a', 0)->plaintext;
- $item['title'] = $item['author'] . ' - ' . $title;
- $item['timestamp'] = strtotime($article->find('time', 0)->getAttribute('datetime'));
- $content = $article->find('[data-role=commentContent]', 0);
- $content = $this->scaleImages($content);
- $item['content'] = $this->fixContent($content);
- $item['enclosures'] = $this->findImages($article->find('[data-role=commentContent]', 0)) ?: null;
- $this->items[] = $item;
- }
- // Return whatever page comes next (previous, as we add in inverse order)
- // Do we have a previous page? (inactive means no)
- if(!is_null($html->find('li[class=ipsPagination_prev ipsPagination_inactive]', 0))) {
- return null; // No, or no more
- } elseif(!is_null($html->find('li[class=ipsPagination_prev]', 0))) {
- return $html->find('.ipsPagination_prev a', 0)->href;
- }
- return null;
- }
- private function collectTopicDiv($html, $firstrun = true){
- $title = $html->find('h1.ipsType_pagetitle', 0)->plaintext;
- // Are we on last page?
- if($firstrun && !is_null($html->find('.pagination', 0))) {
- $active = $html->find('li[class=page active]', 0)->plaintext;
- // There are two ways the 'last' page is displayed:
- // - With a distict 'last' button (only if there are enough pages)
- // - With a button for each page (use last button)
- if(!is_null($html->find('li.last', 0))) {
- $last = $html->find('li.last a', 0);
- } else {
- $last = $html->find('li[class=page] a', -1);
- }
- if($active !== $last->plaintext) {
- // Load last page into memory (cached)
- $html = getSimpleHTMLDOMCached($last->href);
- }
- }
- foreach(array_reverse($html->find(static::TOPIC_TYPE_DIV)) as $article) {
- $item = array();
- $item['uri'] = $article->find('a[rel=bookmark]', 0)->href;
- $item['author'] = $article->find('.author', 0)->plaintext;
- $item['title'] = $item['author'] . ' - ' . $title;
- $item['timestamp'] = strtotime($article->find('.published', 0)->getAttribute('title'));
- $content = $article->find('[itemprop=commentText]', 0);
- $content = $this->scaleImages($content);
- $item['content'] = $this->fixContent($content);
- $item['enclosures'] = $this->findImages($article->find('.post_body', 0)) ?: null;
- $this->items[] = $item;
- }
- // Return whatever page comes next (previous, as we add in inverse order)
- // Do we have a previous page?
- if(!is_null($html->find('li.prev', 0))) {
- return $html->find('li.prev a', 0)->href;
- }
- return null;
- }
- /** Returns all images from the provide HTML DOM */
- private function findImages($html){
- $images = array();
- foreach($html->find('img') as $img) {
- $images[] = $img->src;
- }
- return $images;
- }
- /** Sets the maximum width and height for all images */
- private function scaleImages($html, $width = 400, $height = 400){
- foreach($html->find('img') as $img) {
- $img->style = "max-width: {$width}px; max-height: {$height}px;";
- }
- return $html;
- }
- /** Removes all unnecessary tags and adds formatting */
- private function fixContent($html){
- // Restore quote highlighting
- foreach($html->find('blockquote') as $quote) {
- $quote->style = <<<EOD
- padding: 0px 15px;
- border-width: 1px 1px 1px 2px;
- border-style: solid;
- border-color: #ededed #e8e8e8 #dbdbdb #666666;
- background: #fbfbfb;
- EOD;
- }
- // Remove unnecessary tags
- $content = strip_tags(
- $html->innertext,
- '<p><a><img><ol><ul><li><table><tr><th><td><strong><blockquote><br><hr><h>'
- );
- return $content;
- }
- }
|