1
0
Fork 0
forked from blallo/rss-bridge

[GooglePlusPostBridge] Fix content loading

- Do not force language via HTTP header
The header enforced the language to be french which caused problems parsing
the exact time due to spellings (strtotime cannot work with 'semaines'). If
further issues are experienced try forcing en-us instead.
=> This should really be done in the RSS-Bridge core

- Fix loading problems due to pinned articles
Pinned articles do not provide a timestamp. Building the timestamp step-by-step
solves parsing errors.

- Use class names instead of CSS paths
CSS paths change based on the article. Pinned articles provide a different
DOM structure which caused parsing errors.

Reported via #499
This commit is contained in:
logmanoriginal 2017-03-26 16:40:05 +02:00
parent 88586381e7
commit 3276d4e3d5

View file

@ -19,15 +19,8 @@ class GooglePlusPostBridge extends BridgeAbstract{
public function collectData(){ public function collectData(){
// get content parsed // get content parsed
$html = getSimpleHTMLDOMCached(self::URI . urlencode($this->getInput('username')) . '/posts', $html = getSimpleHTMLDOMCached(self::URI . urlencode($this->getInput('username')) . '/posts')
// force language or returnServerError('No results for this query.');
84600,
false,
stream_context_create(array(
'http' => array(
'header' => 'Accept-Language: fr,fr-be,fr-fr;q=0.8,en;q=0.4,en-us;q=0.2;*' . "\r\n"
)))
) or returnServerError('No results for this query.');
// get title, url, ... there is a lot of intresting stuff in meta // get title, url, ... there is a lot of intresting stuff in meta
$this->_title = $html->find('meta[property=og:title]', 0)->getAttribute('content'); $this->_title = $html->find('meta[property=og:title]', 0)->getAttribute('content');
@ -41,10 +34,15 @@ class GooglePlusPostBridge extends BridgeAbstract{
$item['id'] = $post->find('div div div', 0)->getAttribute('id'); $item['id'] = $post->find('div div div', 0)->getAttribute('id');
$item['avatar'] = $post->find('div img', 0)->src; $item['avatar'] = $post->find('div img', 0)->src;
$item['uri'] = self::URI . $post->find('div div div a', 1)->href; $item['uri'] = self::URI . $post->find('div div div a', 1)->href;
$item['timestamp'] = strtotime(
'+' . preg_replace('/[^0-9A-Za-z]/', $timestamp = $post->find('a.qXj2He span', 0);
'',
$post->find('div div div a span', 1)->getAttribute('aria-label'))); if($timestamp){
$item['timestamp'] = strtotime('+' . preg_replace(
'/[^0-9A-Za-z]/',
'',
$timestamp->getAttribute('aria-label')));
}
// hashtag to treat : https://plus.google.com/explore/tag // hashtag to treat : https://plus.google.com/explore/tag
// $hashtags = array(); // $hashtags = array();
@ -65,7 +63,7 @@ class GooglePlusPostBridge extends BridgeAbstract{
. $item['avatar'] . $item['avatar']
. '" /></a></div>'; . '" /></a></div>';
$content = $post->find('div div[id^=body] div div', 0); $content = $post->find('div[jsname=EjRJtf]', 0);
// extract plaintext // extract plaintext
$item['content_simple'] = $content->plaintext; $item['content_simple'] = $content->plaintext;
$item['title'] = substr($item['content_simple'], 0, 72) . '...'; $item['title'] = substr($item['content_simple'], 0, 72) . '...';