Browse Source

Merge pull request #308 from LogMANOriginal/ElsevierBridge

Elsevier bridge
Mitsu 7 years ago
parent
commit
84847bf85f
1 changed files with 88 additions and 52 deletions
  1. 88 52
      bridges/ElsevierBridge.php

+ 88 - 52
bridges/ElsevierBridge.php

@@ -1,55 +1,91 @@
 <?php
-/**
- * ElsevierBridge
- *
- * @name Elsevier Bridge
- * @description Returns the recent articles published in Elsevier journals
- */
 class ElsevierBridge extends BridgeAbstract{
-  public function loadMetadatas() {
-
-    $this->maintainer = 'Pierre Mazière';
-    $this->name = 'Elsevier journals recent articles';
-    $this->uri = 'http://www.journals.elsevier.com';
-    $this->description = 'Returns the recent articles published in Elsevier journals';
-    $this->update = '2016-06-26';
-
-    $this->parameters=
-      '[
-         {
-           "name" : "Journal name",
-           "identifier" : "j"
-         }
-       ]';
-  }
-
-  public function collectData(array $param){
-    $uri = 'http://www.journals.elsevier.com/'.$param['j'].'/recent-articles/';
-    $html = file_get_html($uri)
-      or $this->returnError('No results for Elsevier journal '.$param['j'], 404);
-
-    foreach($html->find('.pod-listing') as $article){
-
-      $item = new \Item();
-      $item->uri=$article->find('.pod-listing-header>a',0)->getAttribute('href').'?np=y';
-      $item->title=$article->find('.pod-listing-header>a',0)->plaintext;
-      $item->name=trim($article->find('small',0)->plaintext);
-      $item->timestamp=strtotime($article->find('.article-info',0)->plaintext);
-      $item->content=trim($article->find('.article-content',0)->plaintext);
-
-      $this->items[]=$item;
-    }
-  }
-
-  public function getName(){
-    return 'Elsevier journals recent articles';
-  }
-
-  public function getURI(){
-    return 'http://www.journals.elsevier.com';
-  }
-
-  public function getCacheDuration(){
-    return 43200; // 12h
-  }
+	public function loadMetadatas() {
+
+		$this->maintainer = 'Pierre Mazière';
+		$this->name = 'Elsevier journals recent articles';
+		$this->uri = 'http://www.journals.elsevier.com';
+		$this->description = 'Returns the recent articles published in Elsevier journals';
+		$this->update = '2016-08-02';
+
+		$this->parameters[] =
+			'[
+				 {
+					 "name" : "Journal name",
+					 "identifier" : "j",
+					 "required" : "true",
+					 "exampleValue" : "academic-pediatrics",
+					 "title" : "Insert html-part of your journal"
+				 }
+			 ]';
+	}
+
+	// Extracts the list of names from an article as string
+	function ExtractArticleName ($article){
+		$names = $article->find('small', 0);
+		if($names)
+			return trim($names->plaintext);
+		return '';
+	}
+
+	// Extracts the timestamp from an article
+	function ExtractArticleTimestamp ($article){
+		$time = $article->find('.article-info', 0);
+		if($time){
+			$timestring = trim($time->plaintext);
+			/* 
+				The format depends on the age of an article:
+				- Available online 29 July 2016
+				- July 2016
+				- May–June 2016
+			*/
+			if(preg_match('/\S*(\d+\s\S+\s\d{4})/ims', $timestring, $matches)){
+				return strtotime($matches[0]);
+			} elseif (preg_match('/([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
+				return strtotime($matches[0]);
+			} elseif (preg_match('/[A-Za-z]+\-([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
+				return strtotime($matches[0]);
+			} else {
+				return 0;
+			}
+		}
+		return 0;
+	}
+
+	// Extracts the content from an article
+	function ExtractArticleContent ($article){
+		$content = $article->find('.article-content', 0);
+		if($content){
+			return trim($content->plaintext);
+		}
+		return '';
+	}
+
+	public function collectData(array $param){
+		$uri = 'http://www.journals.elsevier.com/' . $param['j'] . '/recent-articles/';
+		$html = file_get_html($uri) or $this->returnError('No results for Elsevier journal '.$param['j'], 404);
+
+		foreach($html->find('.pod-listing') as $article){
+			$item = new \Item();
+			$item->uri = $article->find('.pod-listing-header>a',0)->getAttribute('href').'?np=y';
+			$item->title = $article->find('.pod-listing-header>a',0)->plaintext;
+			$item->name = $this->ExtractArticleName($article);
+			$item->timestamp = $this->ExtractArticleTimestamp($article);
+			$item->content = $this->ExtractArticleContent($article);
+			$this->items[] = $item;
+		}
+	}
+
+	public function getName(){
+		return 'Elsevier journals recent articles';
+	}
+
+	public function getURI(){
+		return 'http://www.journals.elsevier.com';
+	}
+
+	public function getCacheDuration(){
+		return 43200; // 12h
+	}
 }
+?>