forked from blallo/rss-bridge
bridges: rename file_get_html to getSimpleHTMLDOM
Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>
This commit is contained in:
parent
86515a1560
commit
6e2c7ceaf4
12 changed files with 38 additions and 38 deletions
|
@ -29,7 +29,7 @@ class ArstechnicaBridge extends BridgeAbstract {
|
|||
|
||||
function ExtractContent($url) {
|
||||
#echo $url;
|
||||
$html2 = file_get_html($url);
|
||||
$html2 = getSimpleHTMLDOM($url);
|
||||
|
||||
$text = $html2->find("section[id='article-guts']", 0);
|
||||
/*foreach ($text->find('<aside id="social-left">') as $node)
|
||||
|
@ -43,7 +43,7 @@ class ArstechnicaBridge extends BridgeAbstract {
|
|||
return $text;
|
||||
}
|
||||
|
||||
$html = $this->file_get_html('http://feeds.arstechnica.com/arstechnica/index') or $this->returnServerError('Could not request NextInpact.');
|
||||
$html = $this->getSimpleHTMLDOM('http://feeds.arstechnica.com/arstechnica/index') or $this->returnServerError('Could not request NextInpact.');
|
||||
$limit = 0;
|
||||
|
||||
foreach($html->find('item') as $element) {
|
||||
|
|
|
@ -23,7 +23,7 @@ class BastaBridge extends BridgeAbstract{
|
|||
$item->title = $element->find('title', 0)->innertext;
|
||||
$item->uri = $element->find('guid', 0)->plaintext;
|
||||
$item->timestamp = strtotime($element->find('dc:date', 0)->plaintext);
|
||||
$item->content = ReplaceImageUrl($this->file_get_html($item->uri)->find('div.texte', 0)->innertext);
|
||||
$item->content = ReplaceImageUrl($this->getSimpleHTMLDOM($item->uri)->find('div.texte', 0)->innertext);
|
||||
$this->items[] = $item;
|
||||
$limit++;
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ class CastorusBridge extends BridgeAbstract {
|
|||
$this->update = '2016-08-17';
|
||||
|
||||
$this->parameters["Get latest changes"] = '[]';
|
||||
$this->parameters["Get latest changes via ZIP code"] =
|
||||
$this->parameters["Get latest changes via ZIP code"] =
|
||||
'[
|
||||
{
|
||||
"name": "ZIP code",
|
||||
|
@ -19,7 +19,7 @@ class CastorusBridge extends BridgeAbstract {
|
|||
"title" : "Insert ZIP code (complete or partial)"
|
||||
}
|
||||
]';
|
||||
$this->parameters["Get latest changes via city name"] =
|
||||
$this->parameters["Get latest changes via city name"] =
|
||||
'[
|
||||
{
|
||||
"name": "City name",
|
||||
|
@ -38,7 +38,7 @@ class CastorusBridge extends BridgeAbstract {
|
|||
|
||||
if(!$title)
|
||||
$this->returnServerError('Cannot find title!');
|
||||
|
||||
|
||||
return htmlspecialchars(trim($title->plaintext));
|
||||
}
|
||||
|
||||
|
@ -48,19 +48,19 @@ class CastorusBridge extends BridgeAbstract {
|
|||
|
||||
if(!$url)
|
||||
$this->returnServerError('Cannot find url!');
|
||||
|
||||
|
||||
return $this->uri . $url->href;
|
||||
}
|
||||
|
||||
// Extracts the time from an activity
|
||||
private function ExtractActivityTime($activity){
|
||||
// Unfortunately the time is part of the parent node,
|
||||
// Unfortunately the time is part of the parent node,
|
||||
// so we have to clear all child nodes first
|
||||
$nodes = $activity->find('*');
|
||||
|
||||
if(!$nodes)
|
||||
$this->returnServerError('Cannot find nodes!');
|
||||
|
||||
|
||||
foreach($nodes as $node){
|
||||
$node->outertext = '';
|
||||
}
|
||||
|
@ -74,7 +74,7 @@ class CastorusBridge extends BridgeAbstract {
|
|||
|
||||
if(!$price)
|
||||
$this->returnServerError('Cannot find price!');
|
||||
|
||||
|
||||
return $price->innertext;
|
||||
}
|
||||
|
||||
|
@ -85,23 +85,23 @@ class CastorusBridge extends BridgeAbstract {
|
|||
if(isset($params['city']))
|
||||
$city_filter = trim($params['city']);
|
||||
|
||||
$html = $this->file_get_html($this->uri);
|
||||
$html = $this->getSimpleHTMLDOM($this->uri);
|
||||
|
||||
if(!$html)
|
||||
$this->returnServerError('Could not load data from ' . $this->uri . '!');
|
||||
|
||||
|
||||
$activities = $html->find('div#activite/li');
|
||||
|
||||
if(!$activities)
|
||||
$this->returnServerError('Failed to find activities!');
|
||||
|
||||
|
||||
foreach($activities as $activity){
|
||||
$item = new \Item();
|
||||
|
||||
$item->title = $this->ExtractActivityTitle($activity);
|
||||
$item->uri = $this->ExtractActivityUrl($activity);
|
||||
$item->timestamp = $this->ExtractActivityTime($activity);
|
||||
$item->content = '<a href="' . $item->uri . '">' . $item->title . '</a><br><p>'
|
||||
$item->content = '<a href="' . $item->uri . '">' . $item->title . '</a><br><p>'
|
||||
. $this->ExtractActivityPrice($activity) . '</p>';
|
||||
|
||||
if(isset($zip_filter) && !(substr($item->title, 0, strlen($zip_filter)) === $zip_filter)){
|
||||
|
|
|
@ -18,7 +18,7 @@ class EstCeQuonMetEnProdBridge extends BridgeAbstract {
|
|||
} return false;
|
||||
}
|
||||
|
||||
$html = $this->file_get_html($this->getURI()) or $this->returnServerError('Could not request EstCeQuonMetEnProd: '.$this->getURI());
|
||||
$html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Could not request EstCeQuonMetEnProd: '.$this->getURI());
|
||||
|
||||
$item = new \Item();
|
||||
$item->uri = $this->getURI().'#'.date('Y-m-d');
|
||||
|
@ -33,4 +33,4 @@ class EstCeQuonMetEnProdBridge extends BridgeAbstract {
|
|||
return 21600; // 6 hours
|
||||
}
|
||||
}
|
||||
?>
|
||||
?>
|
||||
|
|
|
@ -57,7 +57,7 @@ class NextgovBridge extends BridgeAbstract {
|
|||
if ($category !== preg_replace('/[^a-z-]+/', '', $category) || strlen($category > 32))
|
||||
$this->returnClientError('Invalid "category" parameter.');
|
||||
$url = $this->getURI().'rss/'.$category.'/';
|
||||
$html = $this->file_get_html($url) or $this->returnServerError('Could not request Nextgov: '.$url);
|
||||
$html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request Nextgov: '.$url);
|
||||
$limit = 0;
|
||||
|
||||
foreach ($html->find('item') as $element) {
|
||||
|
@ -69,7 +69,7 @@ class NextgovBridge extends BridgeAbstract {
|
|||
$article_subtitle = $element->find('description', 0)->plaintext;
|
||||
$article_timestamp = strtotime($element->find('pubDate', 0)->plaintext);
|
||||
$article_thumbnail = ExtractFromDelimiters($element->innertext, '<media:content url="', '"');
|
||||
$article = $this->file_get_html($article_url) or $this->returnServerError('Could not request Nextgov: '.$article_url);
|
||||
$article = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request Nextgov: '.$article_url);
|
||||
|
||||
$contents = $article->find('div.wysiwyg', 0)->innertext;
|
||||
$contents = StripWithDelimiters($contents, '<div class="ad-container">', '</div>');
|
||||
|
@ -91,4 +91,4 @@ class NextgovBridge extends BridgeAbstract {
|
|||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ class NumeramaBridge extends BridgeAbstract{
|
|||
}
|
||||
|
||||
$feed = $this->uri.'feed/';
|
||||
$html = $this->file_get_html($feed) or $this->returnServerError('Could not request Numerama: '.$feed);
|
||||
$html = $this->getSimpleHTMLDOM($feed) or $this->returnServerError('Could not request Numerama: '.$feed);
|
||||
$limit = 0;
|
||||
|
||||
foreach($html->find('item') as $element) {
|
||||
|
@ -32,7 +32,7 @@ class NumeramaBridge extends BridgeAbstract{
|
|||
$item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
|
||||
|
||||
$article_url = NumeramaStripCDATA($element->find('guid', 0)->plaintext);
|
||||
$article_html = $this->file_get_html($article_url) or $this->returnServerError('Could not request Numerama: '.$article_url);
|
||||
$article_html = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request Numerama: '.$article_url);
|
||||
$contents = $article_html->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block
|
||||
$contents = '<img alt="" style="max-width:300px;" src="'.$article_html->find('meta[property=og:image]', 0)->getAttribute('content').'">'; // add post picture
|
||||
$contents = $contents.$article_html->find('article[class=post-content]', 0)->innertext; // extract the post
|
||||
|
|
|
@ -65,7 +65,7 @@ class SensCritiqueBridge extends BridgeAbstract {
|
|||
|
||||
private function collectMoviesData() {
|
||||
$html = '';
|
||||
$html = $this->file_get_html('http://www.senscritique.com/films/cette-semaine') or $this->returnServerError('No results for this query.');
|
||||
$html = $this->getSimpleHTMLDOM('http://www.senscritique.com/films/cette-semaine') or $this->returnServerError('No results for this query.');
|
||||
$list = $html->find('ul.elpr-list', 0);
|
||||
|
||||
$this->extractDataFromList($list);
|
||||
|
@ -73,7 +73,7 @@ class SensCritiqueBridge extends BridgeAbstract {
|
|||
|
||||
private function collectSeriesData() {
|
||||
$html = '';
|
||||
$html = $this->file_get_html('http://www.senscritique.com/series/actualite') or $this->returnServerError('No results for this query.');
|
||||
$html = $this->getSimpleHTMLDOM('http://www.senscritique.com/series/actualite') or $this->returnServerError('No results for this query.');
|
||||
$list = $html->find('ul.elpr-list', 0);
|
||||
|
||||
$this->extractDataFromList($list);
|
||||
|
@ -81,7 +81,7 @@ class SensCritiqueBridge extends BridgeAbstract {
|
|||
|
||||
private function collectGamesData() {
|
||||
$html = '';
|
||||
$html = $this->file_get_html('http://www.senscritique.com/jeuxvideo/actualite') or $this->returnServerError('No results for this query.');
|
||||
$html = $this->getSimpleHTMLDOM('http://www.senscritique.com/jeuxvideo/actualite') or $this->returnServerError('No results for this query.');
|
||||
$list = $html->find('ul.elpr-list', 0);
|
||||
|
||||
$this->extractDataFromList($list);
|
||||
|
@ -89,7 +89,7 @@ class SensCritiqueBridge extends BridgeAbstract {
|
|||
|
||||
private function collectBooksData() {
|
||||
$html = '';
|
||||
$html = $this->file_get_html('http://www.senscritique.com/livres/actualite') or $this->returnServerError('No results for this query.');
|
||||
$html = $this->getSimpleHTMLDOM('http://www.senscritique.com/livres/actualite') or $this->returnServerError('No results for this query.');
|
||||
$list = $html->find('ul.elpr-list', 0);
|
||||
|
||||
$this->extractDataFromList($list);
|
||||
|
@ -97,7 +97,7 @@ class SensCritiqueBridge extends BridgeAbstract {
|
|||
|
||||
private function collectBDsData() {
|
||||
$html = '';
|
||||
$html = $this->file_get_html('http://www.senscritique.com/bd/actualite') or $this->returnServerError('No results for this query.');
|
||||
$html = $this->getSimpleHTMLDOM('http://www.senscritique.com/bd/actualite') or $this->returnServerError('No results for this query.');
|
||||
$list = $html->find('ul.elpr-list', 0);
|
||||
|
||||
$this->extractDataFromList($list);
|
||||
|
@ -105,7 +105,7 @@ class SensCritiqueBridge extends BridgeAbstract {
|
|||
|
||||
private function collectMusicsData() {
|
||||
$html = '';
|
||||
$html = $this->file_get_html('http://www.senscritique.com/musique/actualite') or $this->returnServerError('No results for this query.');
|
||||
$html = $this->getSimpleHTMLDOM('http://www.senscritique.com/musique/actualite') or $this->returnServerError('No results for this query.');
|
||||
$list = $html->find('ul.elpr-list', 0);
|
||||
|
||||
$this->extractDataFromList($list);
|
||||
|
|
|
@ -11,7 +11,7 @@ class ShanaprojectBridge extends BridgeAbstract {
|
|||
// Returns an html object for the Season Anime List (latest season)
|
||||
private function LoadSeasonAnimeList(){
|
||||
// First we need to find the URI to the latest season from the 'seasons' page searching for 'Season Anime List'
|
||||
$html = $this->file_get_html($this->getURI() . '/seasons');
|
||||
$html = $this->getSimpleHTMLDOM($this->getURI() . '/seasons');
|
||||
if(!$html)
|
||||
$this->returnServerError('Could not load \'seasons\' page!');
|
||||
|
||||
|
@ -19,7 +19,7 @@ class ShanaprojectBridge extends BridgeAbstract {
|
|||
if(!$season)
|
||||
$this->returnServerError('Could not find \'Season Anime List\'!');
|
||||
|
||||
$html = $this->file_get_html($this->getURI() . $season->href);
|
||||
$html = $this->getSimpleHTMLDOM($this->getURI() . $season->href);
|
||||
if(!$html)
|
||||
$this->returnServerError('Could not load \'Season Anime List\' from \'' . $season->innertext . '\'!');
|
||||
|
||||
|
|
|
@ -45,7 +45,7 @@ class TheHackerNewsBridge extends BridgeAbstract {
|
|||
return $string;
|
||||
}
|
||||
|
||||
$html = $this->file_get_html($this->getURI()) or $this->returnServerError('Could not request TheHackerNews: '.$this->getURI());
|
||||
$html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Could not request TheHackerNews: '.$this->getURI());
|
||||
$limit = 0;
|
||||
|
||||
foreach ($html->find('article') as $element) {
|
||||
|
@ -55,7 +55,7 @@ class TheHackerNewsBridge extends BridgeAbstract {
|
|||
$article_author = trim($element->find('span.vcard', 0)->plaintext);
|
||||
$article_title = $element->find('a.entry-title', 0)->plaintext;
|
||||
$article_timestamp = strtotime($element->find('span.updated', 0)->plaintext);
|
||||
$article = $this->file_get_html($article_url) or $this->returnServerError('Could not request TheHackerNews: '.$article_url);
|
||||
$article = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request TheHackerNews: '.$article_url);
|
||||
|
||||
$contents = $article->find('div.articlebodyonly', 0)->innertext;
|
||||
$contents = StripRecursiveHTMLSection($contents, 'div', '<div class=\'clear\'');
|
||||
|
@ -73,4 +73,4 @@ class TheHackerNewsBridge extends BridgeAbstract {
|
|||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,7 +28,7 @@ class WeLiveSecurityBridge extends BridgeAbstract {
|
|||
}
|
||||
|
||||
$feed = $this->getURI().'feed/';
|
||||
$html = $this->file_get_html($feed) or $this->returnServerError('Could not request '.$this->getName().': '.$feed);
|
||||
$html = $this->getSimpleHTMLDOM($feed) or $this->returnServerError('Could not request '.$this->getName().': '.$feed);
|
||||
$limit = 0;
|
||||
|
||||
foreach ($html->find('item') as $element) {
|
||||
|
@ -59,4 +59,4 @@ class WeLiveSecurityBridge extends BridgeAbstract {
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -112,7 +112,7 @@ class WikipediaBridge extends BridgeAbstract{
|
|||
}
|
||||
|
||||
// This will automatically send us to the correct main page in any language (try it!)
|
||||
$html = $this->file_get_html($this->uri . '/wiki');
|
||||
$html = $this->getSimpleHTMLDOM($this->uri . '/wiki');
|
||||
|
||||
if(!$html)
|
||||
$this->returnServerError('Could not load site: ' . $this->uri . '!');
|
||||
|
@ -209,7 +209,7 @@ class WikipediaBridge extends BridgeAbstract{
|
|||
* Loads the full article from a given URI
|
||||
*/
|
||||
private function LoadFullArticle($uri){
|
||||
$content_html = $this->file_get_html($uri);
|
||||
$content_html = $this->getSimpleHTMLDOM($uri);
|
||||
|
||||
if(!$content_html)
|
||||
$this->returnServerError('Could not load site: ' . $uri . '!');
|
||||
|
|
|
@ -237,7 +237,7 @@ class ZDNetBridge extends BridgeAbstract {
|
|||
if ($feed !== preg_replace('/[^a-zA-Z0-9-\/]+/', '', $feed) || substr_count($feed, '/') > 1 || strlen($feed > 64))
|
||||
$this->returnClientError('Invalid "feed" parameter.');
|
||||
$url = $baseUri.trim($feed, '/').'/rss.xml';
|
||||
$html = $this->file_get_html($url) or $this->returnServerError('Could not request ZDNet: '.$url);
|
||||
$html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request ZDNet: '.$url);
|
||||
$limit = 0;
|
||||
|
||||
foreach ($html->find('item') as $element) {
|
||||
|
@ -247,7 +247,7 @@ class ZDNetBridge extends BridgeAbstract {
|
|||
$article_title = StripCDATA($element->find('title', 0)->plaintext);
|
||||
$article_subtitle = StripCDATA($element->find('description', 0)->plaintext);
|
||||
$article_timestamp = strtotime(StripCDATA($element->find('pubDate', 0)->plaintext));
|
||||
$article = $this->file_get_html($article_url) or $this->returnServerError('Could not request ZDNet: '.$article_url);
|
||||
$article = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request ZDNet: '.$article_url);
|
||||
|
||||
if (!empty($article_author))
|
||||
$author = $article_author;
|
||||
|
|
Loading…
Reference in a new issue