add LegifranceJOBridge
Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>
This commit is contained in:
parent
691d4ad0ee
commit
0eeb511676
1 changed files with 70 additions and 0 deletions
70
bridges/LegifranceJOBridge.php
Normal file
70
bridges/LegifranceJOBridge.php
Normal file
|
@ -0,0 +1,70 @@
|
|||
<?php
|
||||
class LegifranceJOBridge extends BridgeAbstract{
|
||||
|
||||
const MAINTAINER = 'Pierre Mazière';
|
||||
const NAME = 'Journal Officiel de la République Française';
|
||||
const URI = 'https://www.legifrance.gouv.fr/affichJO.do';
|
||||
const DESCRIPTION = 'Returns the laws and decrees officially registered daily in France';
|
||||
|
||||
const PARAMETERS=array();
|
||||
|
||||
private $author;
|
||||
private $timestamp;
|
||||
private $uri;
|
||||
|
||||
private function extractItem($section,$subsection=null,$origin=null){
|
||||
$item=array();
|
||||
$item['author']=$this->author;
|
||||
$item['timestamp']=$this->timestamp;
|
||||
$item['uri']=$this->uri.'#'.count($this->items);
|
||||
$item['title']=$section->plaintext;
|
||||
|
||||
if(!is_null($origin)){
|
||||
$item['title']='[ '.$item['title'].' / '.$subsection->plaintext.' ] '.$origin->plaintext;
|
||||
$data=$origin;
|
||||
}elseif(!is_null($subsection)){
|
||||
$item['title']='[ '.$item['title'].' ] '.$subsection->plaintext;
|
||||
$data=$subsection;
|
||||
}else{
|
||||
$data=$section;
|
||||
}
|
||||
|
||||
$item['content']='';
|
||||
foreach($data->nextSibling()->find('a') as $content){
|
||||
$text=$content->plaintext;
|
||||
$href=$content->nextSibling()->getAttribute('resource');
|
||||
$item['content'].='<p><a href="'.$href.'">'.$text.'</a></p>';
|
||||
}
|
||||
return $item;
|
||||
}
|
||||
|
||||
public function collectData(){
|
||||
$html=$this->getSimpleHTMLDOM(self::URI)
|
||||
or $this->returnServer('Unable to download '.self::URI);
|
||||
|
||||
$this->author=trim($html->find('h2.title',0)->plaintext);
|
||||
$uri=$html->find('h2.titleELI',0)->plaintext;
|
||||
$this->uri=trim(substr($uri,strpos($uri,'https')));
|
||||
$this->timestamp=strtotime(substr($this->uri,strpos($this->uri,'eli/jo/')+strlen('eli/jo/')));
|
||||
|
||||
foreach($html->find('h3') as $section){
|
||||
$subsections=$section->nextSibling()->find('h4');
|
||||
foreach($subsections as $subsection){
|
||||
$origins=$subsection->nextSibling()->find('h5');
|
||||
foreach($origins as $origin){
|
||||
$this->items[]=$this->extractItem($section,$subsection,$origin);
|
||||
}
|
||||
if(!empty($origins)){
|
||||
continue;
|
||||
}
|
||||
$this->items[]=$this->extractItem($section,$subsection);
|
||||
}
|
||||
if(!empty($subsections)){
|
||||
continue;
|
||||
}
|
||||
$this->items[]=$this->extractItem($section);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in a new issue