forked from blallo/rss-bridge
Merge pull request #308 from LogMANOriginal/ElsevierBridge
Elsevier bridge
This commit is contained in:
commit
84847bf85f
1 changed files with 79 additions and 43 deletions
|
@ -1,10 +1,4 @@
|
||||||
<?php
|
<?php
|
||||||
/**
|
|
||||||
* ElsevierBridge
|
|
||||||
*
|
|
||||||
* @name Elsevier Bridge
|
|
||||||
* @description Returns the recent articles published in Elsevier journals
|
|
||||||
*/
|
|
||||||
class ElsevierBridge extends BridgeAbstract{
|
class ElsevierBridge extends BridgeAbstract{
|
||||||
public function loadMetadatas() {
|
public function loadMetadatas() {
|
||||||
|
|
||||||
|
@ -12,31 +6,72 @@ class ElsevierBridge extends BridgeAbstract{
|
||||||
$this->name = 'Elsevier journals recent articles';
|
$this->name = 'Elsevier journals recent articles';
|
||||||
$this->uri = 'http://www.journals.elsevier.com';
|
$this->uri = 'http://www.journals.elsevier.com';
|
||||||
$this->description = 'Returns the recent articles published in Elsevier journals';
|
$this->description = 'Returns the recent articles published in Elsevier journals';
|
||||||
$this->update = '2016-06-26';
|
$this->update = '2016-08-02';
|
||||||
|
|
||||||
$this->parameters=
|
$this->parameters[] =
|
||||||
'[
|
'[
|
||||||
{
|
{
|
||||||
"name" : "Journal name",
|
"name" : "Journal name",
|
||||||
"identifier" : "j"
|
"identifier" : "j",
|
||||||
|
"required" : "true",
|
||||||
|
"exampleValue" : "academic-pediatrics",
|
||||||
|
"title" : "Insert html-part of your journal"
|
||||||
}
|
}
|
||||||
]';
|
]';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Extracts the list of names from an article as string
|
||||||
|
function ExtractArticleName ($article){
|
||||||
|
$names = $article->find('small', 0);
|
||||||
|
if($names)
|
||||||
|
return trim($names->plaintext);
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extracts the timestamp from an article
|
||||||
|
function ExtractArticleTimestamp ($article){
|
||||||
|
$time = $article->find('.article-info', 0);
|
||||||
|
if($time){
|
||||||
|
$timestring = trim($time->plaintext);
|
||||||
|
/*
|
||||||
|
The format depends on the age of an article:
|
||||||
|
- Available online 29 July 2016
|
||||||
|
- July 2016
|
||||||
|
- May–June 2016
|
||||||
|
*/
|
||||||
|
if(preg_match('/\S*(\d+\s\S+\s\d{4})/ims', $timestring, $matches)){
|
||||||
|
return strtotime($matches[0]);
|
||||||
|
} elseif (preg_match('/([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
|
||||||
|
return strtotime($matches[0]);
|
||||||
|
} elseif (preg_match('/[A-Za-z]+\-([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
|
||||||
|
return strtotime($matches[0]);
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extracts the content from an article
|
||||||
|
function ExtractArticleContent ($article){
|
||||||
|
$content = $article->find('.article-content', 0);
|
||||||
|
if($content){
|
||||||
|
return trim($content->plaintext);
|
||||||
|
}
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
$uri = 'http://www.journals.elsevier.com/' . $param['j'] . '/recent-articles/';
|
$uri = 'http://www.journals.elsevier.com/' . $param['j'] . '/recent-articles/';
|
||||||
$html = file_get_html($uri)
|
$html = file_get_html($uri) or $this->returnError('No results for Elsevier journal '.$param['j'], 404);
|
||||||
or $this->returnError('No results for Elsevier journal '.$param['j'], 404);
|
|
||||||
|
|
||||||
foreach($html->find('.pod-listing') as $article){
|
foreach($html->find('.pod-listing') as $article){
|
||||||
|
|
||||||
$item = new \Item();
|
$item = new \Item();
|
||||||
$item->uri = $article->find('.pod-listing-header>a',0)->getAttribute('href').'?np=y';
|
$item->uri = $article->find('.pod-listing-header>a',0)->getAttribute('href').'?np=y';
|
||||||
$item->title = $article->find('.pod-listing-header>a',0)->plaintext;
|
$item->title = $article->find('.pod-listing-header>a',0)->plaintext;
|
||||||
$item->name=trim($article->find('small',0)->plaintext);
|
$item->name = $this->ExtractArticleName($article);
|
||||||
$item->timestamp=strtotime($article->find('.article-info',0)->plaintext);
|
$item->timestamp = $this->ExtractArticleTimestamp($article);
|
||||||
$item->content=trim($article->find('.article-content',0)->plaintext);
|
$item->content = $this->ExtractArticleContent($article);
|
||||||
|
|
||||||
$this->items[] = $item;
|
$this->items[] = $item;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -53,3 +88,4 @@ class ElsevierBridge extends BridgeAbstract{
|
||||||
return 43200; // 12h
|
return 43200; // 12h
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
?>
|
Loading…
Reference in a new issue