1
0

ElsevierBridge.php 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. <?php
  2. /**
  3. * ElsevierBridge
  4. *
  5. * @name Elsevier Bridge
  6. * @description Returns the recent articles published in Elsevier journals
  7. */
  8. class ElsevierBridge extends BridgeAbstract{
  9. public function loadMetadatas() {
  10. $this->maintainer = 'Pierre Mazière';
  11. $this->name = 'Elsevier journals recent articles';
  12. $this->uri = 'http://www.journals.elsevier.com';
  13. $this->description = 'Returns the recent articles published in Elsevier journals';
  14. $this->update = '2016-08-02';
  15. $this->parameters[] =
  16. '[
  17. {
  18. "name" : "Journal name",
  19. "identifier" : "j",
  20. "required" : "true",
  21. "exampleValue" : "academic-pediatrics",
  22. "title" : "Insert html-part of your journal"
  23. }
  24. ]';
  25. }
  26. // Extracts the list of names from an article as string
  27. function ExtractArticleName ($article){
  28. $names = $article->find('small', 0);
  29. if($names)
  30. return trim($names->plaintext);
  31. return '';
  32. }
  33. // Extracts the timestamp from an article
  34. function ExtractArticleTimestamp ($article){
  35. $time = $article->find('.article-info', 0);
  36. if($time){
  37. $timestring = trim($time->plaintext);
  38. /*
  39. The format depends on the age of an article:
  40. - Available online 29 July 2016
  41. - July 2016
  42. - May–June 2016
  43. */
  44. if(preg_match('/\S*(\d+\s\S+\s\d{4})/ims', $timestring, $matches)){
  45. return strtotime($matches[0]);
  46. } elseif (preg_match('/([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
  47. return strtotime($matches[0]);
  48. } elseif (preg_match('/[A-Za-z]+\-([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
  49. return strtotime($matches[0]);
  50. } else {
  51. return 0;
  52. }
  53. }
  54. return 0;
  55. }
  56. // Extracts the content from an article
  57. function ExtractArticleContent ($article){
  58. $content = $article->find('.article-content', 0);
  59. if($content){
  60. return trim($content->plaintext);
  61. }
  62. return '';
  63. }
  64. public function collectData(array $param){
  65. $uri = 'http://www.journals.elsevier.com/'.$param['j'].'/recent-articles/';
  66. $html = file_get_html($uri)
  67. or $this->returnError('No results for Elsevier journal '.$param['j'], 404);
  68. foreach($html->find('.pod-listing') as $article){
  69. $item = new \Item();
  70. $item->uri=$article->find('.pod-listing-header>a',0)->getAttribute('href').'?np=y';
  71. $item->title=$article->find('.pod-listing-header>a',0)->plaintext;
  72. $item->name=$this->ExtractArticleName($article);
  73. $item->timestamp=$this->ExtractArticleTimestamp($article);
  74. $item->content=$this->ExtractArticleContent($article);
  75. $this->items[]=$item;
  76. }
  77. }
  78. public function getName(){
  79. return 'Elsevier journals recent articles';
  80. }
  81. public function getURI(){
  82. return 'http://www.journals.elsevier.com';
  83. }
  84. public function getCacheDuration(){
  85. return 43200; // 12h
  86. }
  87. }