123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899 |
- <?php
- /**
- * ElsevierBridge
- *
- * @name Elsevier Bridge
- * @description Returns the recent articles published in Elsevier journals
- */
- class ElsevierBridge extends BridgeAbstract{
- public function loadMetadatas() {
- $this->maintainer = 'Pierre Mazière';
- $this->name = 'Elsevier journals recent articles';
- $this->uri = 'http://www.journals.elsevier.com';
- $this->description = 'Returns the recent articles published in Elsevier journals';
- $this->update = '2016-08-02';
- $this->parameters[] =
- '[
- {
- "name" : "Journal name",
- "identifier" : "j",
- "required" : "true",
- "exampleValue" : "academic-pediatrics",
- "title" : "Insert html-part of your journal"
- }
- ]';
- }
- // Extracts the list of names from an article as string
- function ExtractArticleName ($article){
- $names = $article->find('small', 0);
- if($names)
- return trim($names->plaintext);
- return '';
- }
- // Extracts the timestamp from an article
- function ExtractArticleTimestamp ($article){
- $time = $article->find('.article-info', 0);
- if($time){
- $timestring = trim($time->plaintext);
- /*
- The format depends on the age of an article:
- - Available online 29 July 2016
- - July 2016
- - May–June 2016
- */
- if(preg_match('/\S*(\d+\s\S+\s\d{4})/ims', $timestring, $matches)){
- return strtotime($matches[0]);
- } elseif (preg_match('/([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
- return strtotime($matches[0]);
- } elseif (preg_match('/[A-Za-z]+\-([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
- return strtotime($matches[0]);
- } else {
- return 0;
- }
- }
- return 0;
- }
- // Extracts the content from an article
- function ExtractArticleContent ($article){
- $content = $article->find('.article-content', 0);
- if($content){
- return trim($content->plaintext);
- }
- return '';
- }
- public function collectData(array $param){
- $uri = 'http://www.journals.elsevier.com/'.$param['j'].'/recent-articles/';
- $html = file_get_html($uri)
- or $this->returnError('No results for Elsevier journal '.$param['j'], 404);
- foreach($html->find('.pod-listing') as $article){
- $item = new \Item();
- $item->uri=$article->find('.pod-listing-header>a',0)->getAttribute('href').'?np=y';
- $item->title=$article->find('.pod-listing-header>a',0)->plaintext;
- $item->name=$this->ExtractArticleName($article);
- $item->timestamp=$this->ExtractArticleTimestamp($article);
- $item->content=$this->ExtractArticleContent($article);
- $this->items[]=$item;
- }
- }
- public function getName(){
- return 'Elsevier journals recent articles';
- }
- public function getURI(){
- return 'http://www.journals.elsevier.com';
- }
- public function getCacheDuration(){
- return 43200; // 12h
- }
- }
|