forked from blallo/rss-bridge
[AmazonPriceTrackerBridge] Improve Amazon scraper logic (#761)
- Now works on all websites, and even with products with multiple prices - Closes #750
This commit is contained in:
parent
53bdfa3bf0
commit
8f9a385b4d
1 changed files with 53 additions and 15 deletions
|
@ -92,6 +92,14 @@ class AmazonPriceTrackerBridge extends BridgeAbstract {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function parseDynamicImage($attribute) {
|
||||||
|
$json = json_decode(html_entity_decode($attribute), true);
|
||||||
|
|
||||||
|
if ($json and count($json) > 0) {
|
||||||
|
return array_keys($json)[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a generated image tag for the product
|
* Returns a generated image tag for the product
|
||||||
*/
|
*/
|
||||||
|
@ -99,11 +107,15 @@ class AmazonPriceTrackerBridge extends BridgeAbstract {
|
||||||
$imageSrc = $html->find('#main-image-container img', 0);
|
$imageSrc = $html->find('#main-image-container img', 0);
|
||||||
|
|
||||||
if ($imageSrc) {
|
if ($imageSrc) {
|
||||||
$imageSrc = $imageSrc ? $imageSrc->getAttribute('data-old-hires') : '';
|
$hiresImage = $imageSrc->getAttribute('data-old-hires');
|
||||||
return <<<EOT
|
$dynamicImageAttribute = $imageSrc->getAttribute('data-a-dynamic-image');
|
||||||
<img width="300" style="max-width:300;max-height:300" src="$imageSrc" alt="{$this->title}" />
|
$image = $hiresImage ?: $this->parseDynamicImage($dynamicImageAttribute);
|
||||||
EOT;
|
|
||||||
}
|
}
|
||||||
|
$image = $image ?: 'https://placekitten.com/200/300';
|
||||||
|
|
||||||
|
return <<<EOT
|
||||||
|
<img width="300" style="max-width:300;max-height:300" src="$image" alt="{$this->title}" />
|
||||||
|
EOT;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -116,6 +128,39 @@ EOT;
|
||||||
return getSimpleHTMLDOM($uri) ?: returnServerError('Could not request Amazon.');
|
return getSimpleHTMLDOM($uri) ?: returnServerError('Could not request Amazon.');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function scrapePriceFromMetrics($html) {
|
||||||
|
$asinData = $html->find('#cerberus-data-metrics', 0);
|
||||||
|
|
||||||
|
// <div id="cerberus-data-metrics" style="display: none;"
|
||||||
|
// data-asin="B00WTHJ5SU" data-asin-price="14.99" data-asin-shipping="0"
|
||||||
|
// data-asin-currency-code="USD" data-substitute-count="-1" ... />
|
||||||
|
if ($asinData) {
|
||||||
|
return [
|
||||||
|
'price' => $asinData->getAttribute('data-asin-price'),
|
||||||
|
'currency' => $asinData->getAttribute('data-asin-currency-code'),
|
||||||
|
'shipping' => $asinData->getAttribute('data-asin-shipping')
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function scrapePriceGeneric($html) {
|
||||||
|
$priceDiv = $html->find('span.offer-price', 0) ?: $html->find('.a-color-price', 0);
|
||||||
|
|
||||||
|
preg_match('/^\s*([A-Z]{3}|£|\$)\s?([\d.,]+)\s*$/', $priceDiv->plaintext, $matches);
|
||||||
|
|
||||||
|
if (count($matches) === 3) {
|
||||||
|
return [
|
||||||
|
'price' => $matches[2],
|
||||||
|
'currency' => $matches[1],
|
||||||
|
'shipping' => '0'
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Scrape method for Amazon product page
|
* Scrape method for Amazon product page
|
||||||
* @return [type] [description]
|
* @return [type] [description]
|
||||||
|
@ -125,23 +170,16 @@ EOT;
|
||||||
$this->title = $this->getTitle($html);
|
$this->title = $this->getTitle($html);
|
||||||
$imageTag = $this->getImage($html);
|
$imageTag = $this->getImage($html);
|
||||||
|
|
||||||
$asinData = $html->find('#cerberus-data-metrics', 0);
|
$data = $this->scrapePriceFromMetrics($html) ?: $this->scrapePriceGeneric($html);
|
||||||
|
|
||||||
// <div id="cerberus-data-metrics" style="display: none;"
|
|
||||||
// data-asin="B00WTHJ5SU" data-asin-price="14.99" data-asin-shipping="0"
|
|
||||||
// data-asin-currency-code="USD" data-substitute-count="-1" ... />
|
|
||||||
$currency = $asinData->getAttribute('data-asin-currency-code');
|
|
||||||
$shipping = $asinData->getAttribute('data-asin-shipping');
|
|
||||||
$price = $asinData->getAttribute('data-asin-price');
|
|
||||||
|
|
||||||
$item = array(
|
$item = array(
|
||||||
'title' => $this->title,
|
'title' => $this->title,
|
||||||
'uri' => $this->getURI(),
|
'uri' => $this->getURI(),
|
||||||
'content' => "$imageTag<br/>Price: $price $currency",
|
'content' => "$imageTag<br/>Price: {$data['price']} {$data['currency']}",
|
||||||
);
|
);
|
||||||
|
|
||||||
if ($shipping !== '0') {
|
if ($data['shipping'] !== '0') {
|
||||||
$item['content'] .= "<br>Shipping: $shipping $currency</br>";
|
$item['content'] .= "<br>Shipping: {$data['shipping']} {$data['currency']}</br>";
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->items[] = $item;
|
$this->items[] = $item;
|
||||||
|
|
Loading…
Reference in a new issue