JustETFBridge.php 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. <?php
  2. class JustETFBridge extends BridgeAbstract {
  3. const NAME = 'justETF Bridge';
  4. const URI = 'https://www.justetf.com';
  5. const DESCRIPTION = 'Currently only supports the news feed';
  6. const MAINTAINER = 'logmanoriginal';
  7. const PARAMETERS = array(
  8. 'News' => array(
  9. 'full' => array(
  10. 'name' => 'Full Article',
  11. 'type' => 'checkbox',
  12. 'title' => 'Enable to load full articles'
  13. )
  14. ),
  15. 'Profile' => array(
  16. 'isin' => array(
  17. 'name' => 'ISIN',
  18. 'type' => 'text',
  19. 'required' => true,
  20. 'pattern' => '[a-zA-Z]{2}[a-zA-Z0-9]{10}',
  21. 'title' => 'ISIN, consisting of 2-letter country code, 9-character identifier, check character'
  22. ),
  23. 'strategy' => array(
  24. 'name' => 'Include Strategy',
  25. 'type' => 'checkbox',
  26. 'defaultValue' => 'checked'
  27. ),
  28. 'description' => array(
  29. 'name' => 'Include Description',
  30. 'type' => 'checkbox',
  31. 'defaultValue' => 'checked'
  32. )
  33. ),
  34. 'global' => array(
  35. 'lang' => array(
  36. 'name' => 'Language',
  37. 'required' => true,
  38. 'type' => 'list',
  39. 'values' => array(
  40. 'Englisch' => 'en',
  41. 'Deutsch' => 'de',
  42. 'Italiano' => 'it'
  43. ),
  44. 'defaultValue' => 'Englisch'
  45. )
  46. )
  47. );
  48. public function collectData() {
  49. $html = getSimpleHTMLDOM($this->getURI())
  50. or returnServerError('Failed loading contents from ' . $this->getURI());
  51. defaultLinkTo($html, static::URI);
  52. switch($this->queriedContext) {
  53. case 'News':
  54. $this->collectNews($html);
  55. break;
  56. case 'Profile':
  57. $this->collectProfile($html);
  58. break;
  59. }
  60. }
  61. public function getURI() {
  62. $uri = static::URI;
  63. if($this->getInput('lang')) {
  64. $uri .= '/' . $this->getInput('lang');
  65. }
  66. switch($this->queriedContext) {
  67. case 'News':
  68. $uri .= '/news';
  69. break;
  70. case 'Profile':
  71. $uri .= '/etf-profile.html?' . http_build_query(array(
  72. 'isin' => strtoupper($this->getInput('isin'))
  73. ));
  74. break;
  75. }
  76. return $uri;
  77. }
  78. public function getName() {
  79. $name = static::NAME;
  80. $name .= ($this->queriedContext) ? ' - ' . $this->queriedContext : '';
  81. switch($this->queriedContext) {
  82. case 'News': break;
  83. case 'Profile':
  84. if($this->getInput('isin')) {
  85. $name .= ' ISIN ' . strtoupper($this->getInput('isin'));
  86. }
  87. }
  88. if($this->getInput('lang')) {
  89. $name .= ' (' . strtoupper($this->getInput('lang')) . ')';
  90. }
  91. return $name;
  92. }
  93. #region Common
  94. /**
  95. * Fixes dates depending on the choosen language:
  96. *
  97. * de : dd.mm.yy
  98. * en : dd.mm.yy
  99. * it : dd/mm/yy
  100. *
  101. * Basically strtotime doesn't convert dates correctly due to formats
  102. * being hard to interpret. So we use the DateTime object, manually
  103. * fixing dates and times (set to 00:00:00.000).
  104. *
  105. * We don't know the timezone, so just assume +00:00 (or whatever
  106. * DateTime chooses)
  107. */
  108. private function fixDate($date) {
  109. switch($this->getInput('lang')) {
  110. case 'en':
  111. case 'de':
  112. $df = date_create_from_format('d.m.y', $date);
  113. break;
  114. case 'it':
  115. $df = date_create_from_format('d/m/y', $date);
  116. break;
  117. }
  118. date_time_set($df, 0, 0);
  119. // debugMessage(date_format($df, 'U'));
  120. return date_format($df, 'U');
  121. }
  122. private function extractImages($article) {
  123. // Notice: We can have zero or more images (though it should mostly be 1)
  124. $elements = $article->find('img');
  125. $images = array();
  126. foreach($elements as $img) {
  127. // Skip the logo (mostly provided part of a hidden div)
  128. if(substr($img->src, strrpos($img->src, '/') + 1) === 'logo.png')
  129. continue;
  130. $images[] = $img->src;
  131. }
  132. return $images;
  133. }
  134. #endregion
  135. #region News
  136. private function collectNews($html) {
  137. $articles = $html->find('div.newsTopArticle')
  138. or returnServerError('No articles found! Layout might have changed!');
  139. foreach($articles as $article) {
  140. $item = array();
  141. // Common data
  142. $item['uri'] = $this->extractNewsUri($article);
  143. $item['timestamp'] = $this->extractNewsDate($article);
  144. $item['title'] = $this->extractNewsTitle($article);
  145. if($this->getInput('full')) {
  146. $uri = $this->extractNewsUri($article);
  147. $html = getSimpleHTMLDOMCached($uri)
  148. or returnServerError('Failed loading full article from ' . $uri);
  149. $fullArticle = $html->find('div.article', 0)
  150. or returnServerError('No content found! Layout might have changed!');
  151. defaultLinkTo($fullArticle, static::URI);
  152. $item['author'] = $this->extractFullArticleAuthor($fullArticle);
  153. $item['content'] = $this->extractFullArticleContent($fullArticle);
  154. $item['enclosures'] = $this->extractImages($fullArticle);
  155. } else {
  156. $item['content'] = $this->extractNewsDescription($article);
  157. $item['enclosures'] = $this->extractImages($article);
  158. }
  159. $this->items[] = $item;
  160. }
  161. }
  162. private function extractNewsUri($article) {
  163. $element = $article->find('a', 0)
  164. or returnServerError('Anchor not found!');
  165. return $element->href;
  166. }
  167. private function extractNewsDate($article) {
  168. $element = $article->find('div.subheadline', 0)
  169. or returnServerError('Date not found!');
  170. // debugMessage($element->plaintext);
  171. $date = trim(explode('|', $element->plaintext)[0]);
  172. return $this->fixDate($date);
  173. }
  174. private function extractNewsDescription($article) {
  175. $element = $article->find('span.newsText', 0)
  176. or returnServerError('Description not found!');
  177. $element->find('a', 0)->onclick = '';
  178. // debugMessage($element->innertext);
  179. return $element->innertext;
  180. }
  181. private function extractNewsTitle($article) {
  182. $element = $article->find('h3', 0)
  183. or returnServerError('Title not found!');
  184. return $element->plaintext;
  185. }
  186. private function extractFullArticleContent($article) {
  187. $element = $article->find('div.article_body', 0)
  188. or returnServerError('Article body not found!');
  189. // Remove teaser image
  190. $element->find('img.teaser-img', 0)->outertext = '';
  191. // Remove self advertisements
  192. foreach($element->find('.call-action') as $adv) {
  193. $adv->outertext = '';
  194. }
  195. // Remove tips
  196. foreach($element->find('.panel-edu') as $tip) {
  197. $tip->outertext = '';
  198. }
  199. // Remove inline scripts (used for i.e. interactive graphs) as they are
  200. // rendered as a long series of strings
  201. foreach($element->find('script') as $script) {
  202. $script->outertext = '[Content removed! Visit site to see full contents!]';
  203. }
  204. return $element->innertext;
  205. }
  206. private function extractFullArticleAuthor($article) {
  207. $element = $article->find('span[itemprop=name]', 0)
  208. or returnServerError('Author not found!');
  209. return $element->plaintext;
  210. }
  211. #endregion
  212. #region Profile
  213. private function collectProfile($html) {
  214. $item = array();
  215. $item['uri'] = $this->getURI();
  216. $item['timestamp'] = $this->extractProfileDate($html);
  217. $item['title'] = $this->extractProfiletitle($html);
  218. $item['author'] = $this->extractProfileAuthor($html);
  219. $item['content'] = $this->extractProfileContent($html);
  220. $this->items[] = $item;
  221. }
  222. private function extractProfileDate($html) {
  223. $element = $html->find('div.infobox div.vallabel', 0)
  224. or returnServerError('Date not found!');
  225. // debugMessage($element->plaintext);
  226. $date = trim(explode("\r\n", $element->plaintext)[1]);
  227. return $this->fixDate($date);
  228. }
  229. private function extractProfileTitle($html) {
  230. $element = $html->find('span.h1', 0)
  231. or returnServerError('Title not found!');
  232. return $element->plaintext;
  233. }
  234. private function extractProfileContent($html) {
  235. // There are a few thins we are interested:
  236. // - Investment Strategy
  237. // - Description
  238. // - Quote
  239. $strategy = $html->find('div.tab-container div.col-sm-6 p', 0)
  240. or returnServerError('Investment Strategy not found!');
  241. // Description requires a bit of cleanup due to lack of propper identification
  242. $description = $html->find('div.headline', 5)
  243. or returnServerError('Description container not found!');
  244. $description = $description->parent();
  245. foreach($description->find('div') as $div) {
  246. $div->outertext = '';
  247. }
  248. $quote = $html->find('div.infobox div.val', 0)
  249. or returnServerError('Quote not found!');
  250. $quote_html = '<strong>Quote</strong><br><p>' . $quote . '</p>';
  251. $strategy_html = '';
  252. $description_html = '';
  253. if($this->getInput('strategy') === true) {
  254. $strategy_html = '<strong>Strategy</strong><br><p>' . $strategy . '</p><br>';
  255. }
  256. if($this->getInput('description') === true) {
  257. $description_html = '<strong>Description</strong><br><p>' . $description . '</p><br>';
  258. }
  259. return $strategy_html . $description_html . $quote_html;
  260. }
  261. private function extractProfileAuthor($html) {
  262. // Use ISIN + WKN as author
  263. // Notice: "identfier" is not a typo [sic]!
  264. $element = $html->find('span.identfier', 0)
  265. or returnServerError('Author not found!');
  266. return $element->plaintext;
  267. }
  268. #endregion
  269. }