[PinterestBridge] Fix implementation after DOM changes

Due to breaking DOM changes this bridge required re-implementation.
With this fix the brige will make use of the JSON data embedded in
the returned HTML. The content returned for all contexts is similar
with only a few differences due to limitations of the JSON.

Feeds returned for a given username and board will by default make
use of the provided RSS feed instead of using the custom filter.
This bahaviour can be changed by setting the  optional parameter
'&r=off' (on by default)

Notice: The JSON data for userdata and search results is very
different, so two functions were implemented to account for that.

References #498
This commit is contained in:
logmanoriginal 2017-04-09 23:26:35 +02:00
parent 4124c707d4
commit 2500d0df93

View file

@ -1,9 +1,9 @@
<?php <?php
class PinterestBridge extends BridgeAbstract { class PinterestBridge extends FeedExpander {
const MAINTAINER = 'pauder'; const MAINTAINER = 'pauder';
const NAME = 'Pinterest Bridge'; const NAME = 'Pinterest Bridge';
const URI = 'http://www.pinterest.com/'; const URI = 'https://www.pinterest.com';
const DESCRIPTION = 'Returns the newest images on a board'; const DESCRIPTION = 'Returns the newest images on a board';
const PARAMETERS = array( const PARAMETERS = array(
@ -15,6 +15,13 @@ class PinterestBridge extends BridgeAbstract {
'b' => array( 'b' => array(
'name' => 'board', 'name' => 'board',
'required' => true 'required' => true
),
'r' => array(
'name' => 'Use provided RSS',
'type' => 'checkbox',
'required' => false,
'defaultValue' => 'checked',
'title' => 'Uncheck to return data via custom filters (more data)'
) )
), ),
'From search' => array( 'From search' => array(
@ -26,81 +33,116 @@ class PinterestBridge extends BridgeAbstract {
); );
public function collectData(){ public function collectData(){
$html = getSimpleHTMLDOM($this->getURI());
if(!$html){
switch($this->queriedContext){ switch($this->queriedContext){
case 'By username and board': case 'By username and board':
returnServerError('Username and/or board not found'); if($this->getInput('r')){
$this->collectExpandableDatas($this->getURI() . '.rss');
} else {
$html = getSimpleHTMLDOMCached($this->getURI());
$this->getUserResults($html);
}
break;
case 'From search': case 'From search':
returnServerError('Could not request Pinterest.'); default:
$html = getSimpleHTMLDOMCached($this->getURI());
$this->getSearchResults($html);
} }
} }
if($this->queriedContext === 'From search'){ private function getUserResults($html){
foreach($html->find('div.pinWrapper') as $div){ $json = json_decode($html->find('#jsInit1', 0)->innertext, true);
$results = $json['tree']['children'][0]['children'][0]['children'][0]['options']['props']['data']['board_feed'];
$username = $json['resourceDataCache'][0]['data']['owner']['username'];
$fullname = $json['resourceDataCache'][0]['data']['owner']['full_name'];
$avatar = $json['resourceDataCache'][0]['data']['owner']['image_small_url'];
foreach($results as $result){
$item = array(); $item = array();
$a = $div->find('a.pinImageWrapper', 0); $item['uri'] = $result['link'];
$img = $a->find('img', 0);
$item['uri'] = $this->getURI() . $a->getAttribute('href'); // Some use regular titles, others provide 'advanced' infos, a few
$item['content'] = '<img src="' // provide even less info. Thus we attempt multiple options.
. htmlentities(str_replace('/236x/', '/736x/', $img->getAttribute('src'))) $item['title'] = trim($result['title']);
. '" alt="" />';
$avatar = $div->find('div.creditImg', 0)->find('img', 0); if($item['title'] === "")
$avatar = $avatar->getAttribute('data-src'); $item['title'] = trim($result['rich_summary']['display_name']);
$avatar = str_replace("\\", "", $avatar);
$username = $div->find('div.creditName', 0); if($item['title'] === "")
$board = $div->find('div.creditTitle', 0); $item['title'] = trim($result['description']);
$item['username'] = $username->innertext; $item['timestamp'] = strtotime($result['created_at']);
$item['fullname'] = $board->innertext; $item['username'] = $username;
$item['fullname'] = $fullname;
$item['avatar'] = $avatar; $item['avatar'] = $avatar;
$item['author'] = $item['username'] . ' (' . $item['fullname'] . ')';
$item['content'] .= '<br /><img align="left" style="margin: 2px 4px;" src="' $item['content'] = '<img align="left" style="margin: 2px 4px;" src="'
. htmlentities($item['avatar']) . htmlentities($item['avatar'])
. '" /> <strong>' . '" /><p><strong>'
. $item['username'] . $item['username']
. '</strong><br />' . '</strong><br>'
. $item['fullname']; . $item['fullname']
. '</p><br><img src="'
. $result['images']['736x']['url']
. '" alt="" /><br><p>'
. $result['description']
. '</p>';
$item['enclosures'] = array($result['images']['orig']['url']);
$item['title'] = $img->getAttribute('alt');
$this->items[] = $item; $this->items[] = $item;
} }
} elseif($this->queriedContext === 'By username and board'){ }
$container = $html->find('SCRIPT[type="application/ld+json"]', 0)
or returnServerError('Unable to find data container!');
$json = json_decode($container->innertext, true); private function getSearchResults($html){
$json = json_decode($html->find('#jsInit1', 0)->innertext, true);
$results = $json['resourceDataCache'][0]['data']['results'];
foreach($json['itemListElement'] as $element){ foreach($results as $result){
$item = array(); $item = array();
$item['uri'] = $element['item']['sharedContent']['author']['url']; $item['uri'] = self::URI . $result['board']['url'];
$item['title'] = $element['item']['name'];
$item['author'] = $element['item']['user']['name']; // Some use regular titles, others provide 'advanced' infos, a few
$item['timestamp'] = strtotime($element['item']['datePublished']); // provide even less info. Thus we attempt multiple options.
$item['content'] = <<<EOD $item['title'] = trim($result['title']);
<a href="{$item['uri']}">
<img src="{$element['item']['image']}"> if($item['title'] === "")
</a> $item['title'] = trim($result['rich_summary']['display_name']);
<p>{$element['item']['text']}</p>
EOD; if($item['title'] === "")
$item['title'] = trim($result['grid_description']);
$item['timestamp'] = strtotime($result['created_at']);
$item['username'] = $result['pinner']['username'];
$item['fullname'] = $result['pinner']['full_name'];
$item['avatar'] = $result['pinner']['image_small_url'];
$item['author'] = $item['username'] . ' (' . $item['fullname'] . ')';
$item['content'] = '<img align="left" style="margin: 2px 4px;" src="'
. htmlentities($item['avatar'])
. '" /><p><strong>'
. $item['username']
. '</strong><br>'
. $item['fullname']
. '</p><br><img src="'
. $result['images']['736x']['url']
. '" alt="" /><br><p>'
. $result['description']
. '</p>';
$item['enclosures'] = array($result['images']['orig']['url']);
$this->items[] = $item; $this->items[] = $item;
} }
} }
}
public function getURI(){ public function getURI(){
switch($this->queriedContext){ switch($this->queriedContext){
case 'By username and board': case 'By username and board':
$uri = self::URI . urlencode($this->getInput('u')) . '/' . urlencode($this->getInput('b')); $uri = self::URI . '/' . urlencode($this->getInput('u')) . '/' . urlencode($this->getInput('b'));// . '.rss';
break; break;
case 'From search': case 'From search':
$uri = self::URI . 'search/?q=' . urlencode($this->getInput('q')); $uri = self::URI . '/search/?q=' . urlencode($this->getInput('q'));
break; break;
default: return parent::getURI(); default: return parent::getURI();
} }
@ -110,7 +152,7 @@ EOD;
public function getName(){ public function getName(){
switch($this->queriedContext){ switch($this->queriedContext){
case 'By username and board': case 'By username and board':
$specific = $this->getInput('u') . '-' . $this->getInput('b'); $specific = $this->getInput('u') . ' - ' . $this->getInput('b');
break; break;
case 'From search': case 'From search':
$specific = $this->getInput('q'); $specific = $this->getInput('q');