AmazonPriceTrackerBridge.php 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. <?php
  2. class AmazonPriceTrackerBridge extends BridgeAbstract {
  3. const MAINTAINER = 'captn3m0';
  4. const NAME = 'Amazon Price Tracker';
  5. const URI = 'https://www.amazon.com/';
  6. const CACHE_TIMEOUT = 3600; // 1h
  7. const DESCRIPTION = 'Tracks price for a single product on Amazon';
  8. const PARAMETERS = array(
  9. array(
  10. 'asin' => array(
  11. 'name' => 'ASIN',
  12. 'required' => true,
  13. 'exampleValue' => 'B071GB1VMQ',
  14. // https://stackoverflow.com/a/12827734
  15. 'pattern' => 'B[\dA-Z]{9}|\d{9}(X|\d)',
  16. ),
  17. 'tld' => array(
  18. 'name' => 'Country',
  19. 'type' => 'list',
  20. 'required' => true,
  21. 'values' => array(
  22. 'Australia' => 'com.au',
  23. 'Brazil' => 'com.br',
  24. 'Canada' => 'ca',
  25. 'China' => 'cn',
  26. 'France' => 'fr',
  27. 'Germany' => 'de',
  28. 'India' => 'in',
  29. 'Italy' => 'it',
  30. 'Japan' => 'co.jp',
  31. 'Mexico' => 'com.mx',
  32. 'Netherlands' => 'nl',
  33. 'Spain' => 'es',
  34. 'United Kingdom' => 'co.uk',
  35. 'United States' => 'com',
  36. ),
  37. 'defaultValue' => 'com',
  38. ),
  39. ));
  40. protected $title;
  41. /**
  42. * Generates domain name given a amazon TLD
  43. */
  44. private function getDomainName() {
  45. return 'https://www.amazon.' . $this->getInput('tld');
  46. }
  47. /**
  48. * Generates URI for a Amazon product page
  49. */
  50. public function getURI() {
  51. if (!is_null($this->getInput('asin'))) {
  52. return $this->getDomainName() . '/dp/' . $this->getInput('asin') . '/';
  53. }
  54. return parent::getURI();
  55. }
  56. /**
  57. * Scrapes the product title from the html page
  58. * returns the default title if scraping fails
  59. */
  60. private function getTitle($html) {
  61. $titleTag = $html->find('#productTitle', 0);
  62. if (!$titleTag) {
  63. return $this->getDefaultTitle();
  64. } else {
  65. return trim(html_entity_decode($titleTag->innertext, ENT_QUOTES));
  66. }
  67. }
  68. /**
  69. * Title used by the feed if none could be found
  70. */
  71. private function getDefaultTitle() {
  72. return 'Amazon.' . $this->getInput('tld') . ': ' . $this->getInput('asin');
  73. }
  74. /**
  75. * Returns name for the feed
  76. * Uses title (already scraped) if it has one
  77. */
  78. public function getName() {
  79. if (isset($this->title)) {
  80. return $this->title;
  81. } else {
  82. return parent::getName();
  83. }
  84. }
  85. private function parseDynamicImage($attribute) {
  86. $json = json_decode(html_entity_decode($attribute), true);
  87. if ($json and count($json) > 0) {
  88. return array_keys($json)[0];
  89. }
  90. }
  91. /**
  92. * Returns a generated image tag for the product
  93. */
  94. private function getImage($html) {
  95. $imageSrc = $html->find('#main-image-container img', 0);
  96. if ($imageSrc) {
  97. $hiresImage = $imageSrc->getAttribute('data-old-hires');
  98. $dynamicImageAttribute = $imageSrc->getAttribute('data-a-dynamic-image');
  99. $image = $hiresImage ?: $this->parseDynamicImage($dynamicImageAttribute);
  100. }
  101. $image = $image ?: 'https://placekitten.com/200/300';
  102. return <<<EOT
  103. <img width="300" style="max-width:300;max-height:300" src="$image" alt="{$this->title}" />
  104. EOT;
  105. }
  106. /**
  107. * Return \simple_html_dom object
  108. * for the entire html of the product page
  109. */
  110. private function getHtml() {
  111. $uri = $this->getURI();
  112. return getSimpleHTMLDOM($uri) ?: returnServerError('Could not request Amazon.');
  113. }
  114. private function scrapePriceFromMetrics($html) {
  115. $asinData = $html->find('#cerberus-data-metrics', 0);
  116. // <div id="cerberus-data-metrics" style="display: none;"
  117. // data-asin="B00WTHJ5SU" data-asin-price="14.99" data-asin-shipping="0"
  118. // data-asin-currency-code="USD" data-substitute-count="-1" ... />
  119. if ($asinData) {
  120. return [
  121. 'price' => $asinData->getAttribute('data-asin-price'),
  122. 'currency' => $asinData->getAttribute('data-asin-currency-code'),
  123. 'shipping' => $asinData->getAttribute('data-asin-shipping')
  124. ];
  125. }
  126. return false;
  127. }
  128. private function scrapePriceGeneric($html) {
  129. $priceDiv = $html->find('span.offer-price', 0) ?: $html->find('.a-color-price', 0);
  130. preg_match('/^\s*([A-Z]{3}|£|\$)\s?([\d.,]+)\s*$/', $priceDiv->plaintext, $matches);
  131. if (count($matches) === 3) {
  132. return [
  133. 'price' => $matches[2],
  134. 'currency' => $matches[1],
  135. 'shipping' => '0'
  136. ];
  137. }
  138. return false;
  139. }
  140. /**
  141. * Scrape method for Amazon product page
  142. * @return [type] [description]
  143. */
  144. public function collectData() {
  145. $html = $this->getHtml();
  146. $this->title = $this->getTitle($html);
  147. $imageTag = $this->getImage($html);
  148. $data = $this->scrapePriceFromMetrics($html) ?: $this->scrapePriceGeneric($html);
  149. $item = array(
  150. 'title' => $this->title,
  151. 'uri' => $this->getURI(),
  152. 'content' => "$imageTag<br/>Price: {$data['price']} {$data['currency']}",
  153. );
  154. if ($data['shipping'] !== '0') {
  155. $item['content'] .= "<br>Shipping: {$data['shipping']} {$data['currency']}</br>";
  156. }
  157. $this->items[] = $item;
  158. }
  159. }