|
@@ -1,99 +1,91 @@
|
|
|
<?php
|
|
|
-/**
|
|
|
- * ElsevierBridge
|
|
|
- *
|
|
|
- * @name Elsevier Bridge
|
|
|
- * @description Returns the recent articles published in Elsevier journals
|
|
|
- */
|
|
|
class ElsevierBridge extends BridgeAbstract{
|
|
|
- public function loadMetadatas() {
|
|
|
+ public function loadMetadatas() {
|
|
|
|
|
|
- $this->maintainer = 'Pierre Mazière';
|
|
|
- $this->name = 'Elsevier journals recent articles';
|
|
|
- $this->uri = 'http://www.journals.elsevier.com';
|
|
|
- $this->description = 'Returns the recent articles published in Elsevier journals';
|
|
|
- $this->update = '2016-08-02';
|
|
|
+ $this->maintainer = 'Pierre Mazière';
|
|
|
+ $this->name = 'Elsevier journals recent articles';
|
|
|
+ $this->uri = 'http://www.journals.elsevier.com';
|
|
|
+ $this->description = 'Returns the recent articles published in Elsevier journals';
|
|
|
+ $this->update = '2016-08-02';
|
|
|
|
|
|
- $this->parameters[] =
|
|
|
- '[
|
|
|
- {
|
|
|
- "name" : "Journal name",
|
|
|
- "identifier" : "j",
|
|
|
- "required" : "true",
|
|
|
- "exampleValue" : "academic-pediatrics",
|
|
|
- "title" : "Insert html-part of your journal"
|
|
|
- }
|
|
|
- ]';
|
|
|
- }
|
|
|
+ $this->parameters[] =
|
|
|
+ '[
|
|
|
+ {
|
|
|
+ "name" : "Journal name",
|
|
|
+ "identifier" : "j",
|
|
|
+ "required" : "true",
|
|
|
+ "exampleValue" : "academic-pediatrics",
|
|
|
+ "title" : "Insert html-part of your journal"
|
|
|
+ }
|
|
|
+ ]';
|
|
|
+ }
|
|
|
|
|
|
- // Extracts the list of names from an article as string
|
|
|
- function ExtractArticleName ($article){
|
|
|
- $names = $article->find('small', 0);
|
|
|
- if($names)
|
|
|
- return trim($names->plaintext);
|
|
|
- return '';
|
|
|
- }
|
|
|
+ // Extracts the list of names from an article as string
|
|
|
+ function ExtractArticleName ($article){
|
|
|
+ $names = $article->find('small', 0);
|
|
|
+ if($names)
|
|
|
+ return trim($names->plaintext);
|
|
|
+ return '';
|
|
|
+ }
|
|
|
|
|
|
- // Extracts the timestamp from an article
|
|
|
- function ExtractArticleTimestamp ($article){
|
|
|
- $time = $article->find('.article-info', 0);
|
|
|
- if($time){
|
|
|
- $timestring = trim($time->plaintext);
|
|
|
- /*
|
|
|
- The format depends on the age of an article:
|
|
|
- - Available online 29 July 2016
|
|
|
- - July 2016
|
|
|
- - May–June 2016
|
|
|
- */
|
|
|
- if(preg_match('/\S*(\d+\s\S+\s\d{4})/ims', $timestring, $matches)){
|
|
|
- return strtotime($matches[0]);
|
|
|
- } elseif (preg_match('/([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
|
|
|
- return strtotime($matches[0]);
|
|
|
- } elseif (preg_match('/[A-Za-z]+\-([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
|
|
|
- return strtotime($matches[0]);
|
|
|
- } else {
|
|
|
- return 0;
|
|
|
- }
|
|
|
- }
|
|
|
- return 0;
|
|
|
- }
|
|
|
+ // Extracts the timestamp from an article
|
|
|
+ function ExtractArticleTimestamp ($article){
|
|
|
+ $time = $article->find('.article-info', 0);
|
|
|
+ if($time){
|
|
|
+ $timestring = trim($time->plaintext);
|
|
|
+ /*
|
|
|
+ The format depends on the age of an article:
|
|
|
+ - Available online 29 July 2016
|
|
|
+ - July 2016
|
|
|
+ - May–June 2016
|
|
|
+ */
|
|
|
+ if(preg_match('/\S*(\d+\s\S+\s\d{4})/ims', $timestring, $matches)){
|
|
|
+ return strtotime($matches[0]);
|
|
|
+ } elseif (preg_match('/([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
|
|
|
+ return strtotime($matches[0]);
|
|
|
+ } elseif (preg_match('/[A-Za-z]+\-([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
|
|
|
+ return strtotime($matches[0]);
|
|
|
+ } else {
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
|
|
|
- // Extracts the content from an article
|
|
|
- function ExtractArticleContent ($article){
|
|
|
- $content = $article->find('.article-content', 0);
|
|
|
- if($content){
|
|
|
- return trim($content->plaintext);
|
|
|
- }
|
|
|
- return '';
|
|
|
- }
|
|
|
+ // Extracts the content from an article
|
|
|
+ function ExtractArticleContent ($article){
|
|
|
+ $content = $article->find('.article-content', 0);
|
|
|
+ if($content){
|
|
|
+ return trim($content->plaintext);
|
|
|
+ }
|
|
|
+ return '';
|
|
|
+ }
|
|
|
|
|
|
- public function collectData(array $param){
|
|
|
- $uri = 'http://www.journals.elsevier.com/'.$param['j'].'/recent-articles/';
|
|
|
- $html = file_get_html($uri)
|
|
|
- or $this->returnError('No results for Elsevier journal '.$param['j'], 404);
|
|
|
+ public function collectData(array $param){
|
|
|
+ $uri = 'http://www.journals.elsevier.com/' . $param['j'] . '/recent-articles/';
|
|
|
+ $html = file_get_html($uri) or $this->returnError('No results for Elsevier journal '.$param['j'], 404);
|
|
|
|
|
|
- foreach($html->find('.pod-listing') as $article){
|
|
|
+ foreach($html->find('.pod-listing') as $article){
|
|
|
+ $item = new \Item();
|
|
|
+ $item->uri = $article->find('.pod-listing-header>a',0)->getAttribute('href').'?np=y';
|
|
|
+ $item->title = $article->find('.pod-listing-header>a',0)->plaintext;
|
|
|
+ $item->name = $this->ExtractArticleName($article);
|
|
|
+ $item->timestamp = $this->ExtractArticleTimestamp($article);
|
|
|
+ $item->content = $this->ExtractArticleContent($article);
|
|
|
+ $this->items[] = $item;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- $item = new \Item();
|
|
|
- $item->uri=$article->find('.pod-listing-header>a',0)->getAttribute('href').'?np=y';
|
|
|
- $item->title=$article->find('.pod-listing-header>a',0)->plaintext;
|
|
|
- $item->name=$this->ExtractArticleName($article);
|
|
|
- $item->timestamp=$this->ExtractArticleTimestamp($article);
|
|
|
- $item->content=$this->ExtractArticleContent($article);
|
|
|
+ public function getName(){
|
|
|
+ return 'Elsevier journals recent articles';
|
|
|
+ }
|
|
|
|
|
|
- $this->items[]=$item;
|
|
|
- }
|
|
|
- }
|
|
|
+ public function getURI(){
|
|
|
+ return 'http://www.journals.elsevier.com';
|
|
|
+ }
|
|
|
|
|
|
- public function getName(){
|
|
|
- return 'Elsevier journals recent articles';
|
|
|
- }
|
|
|
-
|
|
|
- public function getURI(){
|
|
|
- return 'http://www.journals.elsevier.com';
|
|
|
- }
|
|
|
-
|
|
|
- public function getCacheDuration(){
|
|
|
- return 43200; // 12h
|
|
|
- }
|
|
|
+ public function getCacheDuration(){
|
|
|
+ return 43200; // 12h
|
|
|
+ }
|
|
|
}
|
|
|
+?>
|