diff --git a/bridges/CADBridge.php b/bridges/CADBridge.php
index 86dfdb0..595160e 100644
--- a/bridges/CADBridge.php
+++ b/bridges/CADBridge.php
@@ -16,7 +16,7 @@ class CADBridge extends FeedExpander {
}
private function CADExtractContent($url) {
- $html3 = $this->get_cached($url);
+ $html3 = $this->getSimpleHTMLDOMCached($url);
// The request might fail due to missing https support or wrong URL
if($html3 == false)
diff --git a/bridges/CommonDreamsBridge.php b/bridges/CommonDreamsBridge.php
index e8a4af3..224b309 100644
--- a/bridges/CommonDreamsBridge.php
+++ b/bridges/CommonDreamsBridge.php
@@ -17,7 +17,7 @@ class CommonDreamsBridge extends FeedExpander {
}
private function CommonDreamsExtractContent($url) {
- $html3 = $this->get_cached($url);
+ $html3 = $this->getSimpleHTMLDOMCached($url);
$text = $html3->find('div[class=field--type-text-with-summary]', 0)->innertext;
$html3->clear();
unset ($html3);
diff --git a/bridges/CpasbienBridge.php b/bridges/CpasbienBridge.php
index 829c596..10af594 100644
--- a/bridges/CpasbienBridge.php
+++ b/bridges/CpasbienBridge.php
@@ -1,5 +1,5 @@
getAttribute('class')=='ligne0' ||
$episode->getAttribute('class')=='ligne1')
{
- $htmlepisode=$this->get_cached($episode->find('a', 0)->getAttribute('href'));
+ $htmlepisode=$this->getSimpleHTMLDOMCached($episode->find('a', 0)->getAttribute('href'));
$item = array();
$item['author'] = $episode->find('a', 0)->text();
diff --git a/bridges/DauphineLibereBridge.php b/bridges/DauphineLibereBridge.php
index fe4775c..9e9aacd 100644
--- a/bridges/DauphineLibereBridge.php
+++ b/bridges/DauphineLibereBridge.php
@@ -47,7 +47,7 @@ class DauphineLibereBridge extends FeedExpander {
}
private function ExtractContent($url) {
- $html2 = $this->get_cached($url);
+ $html2 = $this->getSimpleHTMLDOMCached($url);
$text = $html2->find('div.column', 0)->innertext;
$text = preg_replace('@@si', '', $text);
return $text;
diff --git a/bridges/DeveloppezDotComBridge.php b/bridges/DeveloppezDotComBridge.php
index fe08d28..5cbd576 100644
--- a/bridges/DeveloppezDotComBridge.php
+++ b/bridges/DeveloppezDotComBridge.php
@@ -42,7 +42,7 @@ class DeveloppezDotComBridge extends FeedExpander {
}
private function DeveloppezDotComExtractContent($url) {
- $articleHTMLContent = $this->get_cached($url);
+ $articleHTMLContent = $this->getSimpleHTMLDOMCached($url);
$text = $this->convert_smart_quotes($articleHTMLContent->find('div.content', 0)->innertext);
$text = utf8_encode($text);
return trim($text);
diff --git a/bridges/FreenewsBridge.php b/bridges/FreenewsBridge.php
index dbc46b9..1934e0b 100644
--- a/bridges/FreenewsBridge.php
+++ b/bridges/FreenewsBridge.php
@@ -13,7 +13,7 @@ class FreenewsBridge extends FeedExpander {
protected function parseItem($newsItem) {
$item = $this->parseRSS_2_0_Item($newsItem);
- $articlePage = $this->get_cached($item['uri']);
+ $articlePage = $this->getSimpleHTMLDOMCached($item['uri']);
$content = $articlePage->find('.post-container', 0);
$item['content'] = $content->innertext;
diff --git a/bridges/FuturaSciencesBridge.php b/bridges/FuturaSciencesBridge.php
index 2cf846c..73f1b53 100644
--- a/bridges/FuturaSciencesBridge.php
+++ b/bridges/FuturaSciencesBridge.php
@@ -86,7 +86,7 @@ class FuturaSciencesBridge extends FeedExpander {
protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem);
$item['uri'] = str_replace('#xtor=RSS-8', '', $item['uri']);
- $article = $this->get_cached($item['uri'])
+ $article = $this->getSimpleHTMLDOMCached($item['uri'])
or $this->returnServerError('Could not request Futura-Sciences: ' . $item['uri']);
$item['content'] = $this->ExtractArticleContent($article);
$item['author'] = empty($this->ExtractAuthor($article)) ? $item['author'] : $this->ExtractAuthor($article);
diff --git a/bridges/JapanExpoBridge.php b/bridges/JapanExpoBridge.php
index 4019ae8..dcd951a 100644
--- a/bridges/JapanExpoBridge.php
+++ b/bridges/JapanExpoBridge.php
@@ -1,5 +1,5 @@
get_cached($url) or $this->returnServerError('Could not request JapanExpo: '.$url);
+ $article_html = $this->getSimpleHTMLDOMCached('Could not request JapanExpo: '.$url);
$header = $article_html->find('header.pageHeadBox', 0);
$timestamp = strtotime($header->find('time', 0)->datetime);
$title_html = $header->find('div.section', 0)->next_sibling();
diff --git a/bridges/KununuBridge.php b/bridges/KununuBridge.php
index da159ce..a958b77 100644
--- a/bridges/KununuBridge.php
+++ b/bridges/KununuBridge.php
@@ -1,5 +1,5 @@
get_cached($uri);
+ $html = $this->getSimpleHTMLDOMCached($uri);
if($html === false)
$this->returnServerError('Could not load full description!');
diff --git a/bridges/LeJournalDuGeekBridge.php b/bridges/LeJournalDuGeekBridge.php
index c723a2f..95bd960 100644
--- a/bridges/LeJournalDuGeekBridge.php
+++ b/bridges/LeJournalDuGeekBridge.php
@@ -17,7 +17,7 @@ class LeJournalDuGeekBridge extends FeedExpander {
}
private function LeJournalDuGeekExtractContent($url) {
- $articleHTMLContent = $this->get_cached($url);
+ $articleHTMLContent = $this->getSimpleHTMLDOMCached($url);
$text = $articleHTMLContent->find('div.post-content', 0)->innertext;
foreach($articleHTMLContent->find('a.more') as $element) {
diff --git a/bridges/LeMondeInformatiqueBridge.php b/bridges/LeMondeInformatiqueBridge.php
index 010228a..f609517 100644
--- a/bridges/LeMondeInformatiqueBridge.php
+++ b/bridges/LeMondeInformatiqueBridge.php
@@ -12,7 +12,7 @@ class LeMondeInformatiqueBridge extends FeedExpander {
protected function parseItem($newsItem){
$item = $this->parseRSS_1_0_Item($newsItem);
- $article_html = $this->get_cached($item['uri'])
+ $article_html = $this->getSimpleHTMLDOMCached($item['uri'])
or $this->returnServerError('Could not request LeMondeInformatique: ' . $item['uri']);
$item['content'] = $this->CleanArticle($article_html->find('div#article', 0)->innertext);
$item['title'] = $article_html->find('h1.cleanprint-title', 0)->plaintext;
diff --git a/bridges/LichessBridge.php b/bridges/LichessBridge.php
index 638811d..6f64539 100644
--- a/bridges/LichessBridge.php
+++ b/bridges/LichessBridge.php
@@ -17,7 +17,7 @@ class LichessBridge extends FeedExpander {
}
private function retrieve_lichess_post($blog_post_uri){
- $blog_post_html = $this->get_cached($blog_post_uri);
+ $blog_post_html = $this->getSimpleHTMLDOMCached($blog_post_uri);
$blog_post_div = $blog_post_html->find('#lichess_blog', 0);
$post_chapo = $blog_post_div->find('.shortlede', 0)->innertext;
diff --git a/bridges/NextInpactBridge.php b/bridges/NextInpactBridge.php
index 815a236..3152b09 100644
--- a/bridges/NextInpactBridge.php
+++ b/bridges/NextInpactBridge.php
@@ -17,7 +17,7 @@ class NextInpactBridge extends FeedExpander {
}
private function ExtractContent($url) {
- $html2 = $this->get_cached($url);
+ $html2 = $this->getSimpleHTMLDOMCached($url);
$text = '
'.$html2->find('span.sub_title', 0)->innertext.'
'
.''
.''.$html2->find('div[itemprop=articleBody]', 0)->innertext.'
';
diff --git a/bridges/NextgovBridge.php b/bridges/NextgovBridge.php
index 5d26ec5..d706119 100644
--- a/bridges/NextgovBridge.php
+++ b/bridges/NextgovBridge.php
@@ -56,7 +56,7 @@ class NextgovBridge extends FeedExpander {
}
private function ExtractContent($url){
- $article = $this->get_cached($url)
+ $article = $this->getSimpleHTMLDOMCached($url)
or $this->returnServerError('Could not request Nextgov: ' . $url);
$contents = $article->find('div.wysiwyg', 0)->innertext;
diff --git a/bridges/NiceMatinBridge.php b/bridges/NiceMatinBridge.php
index 6d148ad..4e83cff 100644
--- a/bridges/NiceMatinBridge.php
+++ b/bridges/NiceMatinBridge.php
@@ -17,7 +17,7 @@ class NiceMatinBridge extends FeedExpander {
}
private function NiceMatinExtractContent($url) {
- $html = $this->get_cached($url);
+ $html = $this->getSimpleHTMLDOMCached($url);
if(!$html)
return 'Could not acquire content from url: ' . $url . '!';
diff --git a/bridges/NumeramaBridge.php b/bridges/NumeramaBridge.php
index ead340a..d018fbd 100644
--- a/bridges/NumeramaBridge.php
+++ b/bridges/NumeramaBridge.php
@@ -17,7 +17,7 @@ class NumeramaBridge extends FeedExpander {
}
private function ExtractContent($url){
- $article_html = $this->get_cached($url) or $this->returnServerError('Could not request Numerama: '.$url);
+ $article_html = $this->getSimpleHTMLDOMCached('Could not request Numerama: '.$url);
$contents = $article_html->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block
$contents = ''; // add post picture
return $contents . $article_html->find('article[class=post-content]', 0)->innertext; // extract the post
diff --git a/bridges/TheOatMealBridge.php b/bridges/TheOatMealBridge.php
index eee9283..3c3d216 100644
--- a/bridges/TheOatMealBridge.php
+++ b/bridges/TheOatMealBridge.php
@@ -13,7 +13,7 @@ class TheOatmealBridge extends FeedExpander{
protected function parseItem($newsItem) {
$item = $this->parseRSS_1_0_Item($newsItem);
- $articlePage = $this->get_cached($item['uri']);
+ $articlePage = $this->getSimpleHTMLDOMCached($item['uri']);
$content = $articlePage->find('#comic', 0);
if(is_null($content)) // load alternative
$content = $articlePage->find('#blog', 0);
diff --git a/bridges/WikipediaBridge.php b/bridges/WikipediaBridge.php
index d7a90dc..5feb429 100644
--- a/bridges/WikipediaBridge.php
+++ b/bridges/WikipediaBridge.php
@@ -3,7 +3,7 @@
define('WIKIPEDIA_SUBJECT_TFA', 0); // Today's featured article
define('WIKIPEDIA_SUBJECT_DYK', 1); // Did you know...
-class WikipediaBridge extends HttpCachingBridgeAbstract {
+class WikipediaBridge extends BridgeAbstract {
const MAINTAINER = 'logmanoriginal';
const NAME = 'Wikipedia bridge for many languages';
const URI = 'https://www.wikipedia.org/';
@@ -175,7 +175,7 @@ class WikipediaBridge extends HttpCachingBridgeAbstract {
* Loads the full article from a given URI
*/
private function LoadFullArticle($uri){
- $content_html = $this->get_cached($uri);
+ $content_html = $this->getSimpleHTMLDOMCached($uri);
if(!$content_html)
$this->returnServerError('Could not load site: ' . $uri . '!');
diff --git a/bridges/WordPressBridge.php b/bridges/WordPressBridge.php
index 30e7e2a..6c7d5f6 100644
--- a/bridges/WordPressBridge.php
+++ b/bridges/WordPressBridge.php
@@ -1,7 +1,7 @@
find('updated', 0)->innertext);
}
- $article_html = $this->get_cached($item['uri']);
+ $article_html = $this->getSimpleHTMLDOMCached($item['uri']);
// Attempt to find most common content div
if(!isset($item['content'])){
diff --git a/bridges/WorldOfTanksBridge.php b/bridges/WorldOfTanksBridge.php
index b723526..a3179be 100644
--- a/bridges/WorldOfTanksBridge.php
+++ b/bridges/WorldOfTanksBridge.php
@@ -1,5 +1,5 @@
href;
// now load that uri from cache
$this->debugMessage("loading page ".$item['uri']);
- $articlePage = $this->get_cached($item['uri']);
+ $articlePage = $this->getSimpleHTMLDOMCached($item['uri']);
$content = $articlePage->find('.l-content', 0);
HTMLSanitizer::defaultImageSrcTo($content, self::URI);
$item['title'] = $content->find('h1', 0)->innertext;
diff --git a/lib/BridgeAbstract.php b/lib/BridgeAbstract.php
index f58e056..abcda79 100644
--- a/lib/BridgeAbstract.php
+++ b/lib/BridgeAbstract.php
@@ -386,4 +386,60 @@ abstract class BridgeAbstract implements BridgeInterface {
, $defaultBRText
, $defaultSpanText);
}
+
+ /**
+ * Maintain locally cached versions of pages to avoid multiple downloads.
+ * @param url url to cache
+ * @param duration duration of the cache file in seconds (default: 24h/86400s)
+ * @return content of the file as string
+ */
+ public function getSimpleHTMLDOMCached($url
+ , $duration = 86400
+ , $use_include_path = false
+ , $context = null
+ , $offset = 0
+ , $maxLen = null
+ , $lowercase = true
+ , $forceTagsClosed = true
+ , $target_charset = DEFAULT_TARGET_CHARSET
+ , $stripRN = true
+ , $defaultBRText = DEFAULT_BR_TEXT
+ , $defaultSpanText = DEFAULT_SPAN_TEXT){
+ $this->debugMessage('Caching url ' . $url . ', duration ' . $duration);
+
+ $filepath = __DIR__ . '/../cache/pages/' . sha1($url) . '.cache';
+ $this->debugMessage('Cache file ' . $filepath);
+
+ if(file_exists($filepath) && filectime($filepath) < time() - $duration){
+ unlink ($filepath);
+ $this->debugMessage('Cached file deleted: ' . $filepath);
+ }
+
+ if(file_exists($filepath)){
+ $this->debugMessage('Loading cached file ' . $filepath);
+ touch($filepath);
+ $content = file_get_contents($filepath);
+ } else {
+ $this->debugMessage('Caching ' . $url . ' to ' . $filepath);
+ $dir = substr($filepath, 0, strrpos($filepath, '/'));
+
+ if(!is_dir($dir)){
+ $this->debugMessage('Creating directory ' . $dir);
+ mkdir($dir, 0777, true);
+ }
+
+ $content = $this->getContents($url, $use_include_path, $context, $offset, $maxLen);
+ if($content !== false){
+ file_put_contents($filepath, $content);
+ }
+ }
+
+ return str_get_html($content
+ , $lowercase
+ , $forceTagsClosed
+ , $target_charset
+ , $stripRN
+ , $defaultBRText
+ , $defaultSpanText);
+ }
}
diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php
index abaf121..5566f7c 100644
--- a/lib/FeedExpander.php
+++ b/lib/FeedExpander.php
@@ -1,6 +1,6 @@
debugMessage('Caching url ' . $url . ', duration ' . $duration);
-
- $filepath = __DIR__ . '/../cache/pages/' . sha1($url) . '.cache';
- $this->debugMessage('Cache file ' . $filepath);
-
- if(file_exists($filepath) && filectime($filepath) < time() - $duration){
- unlink ($filepath);
- $this->debugMessage('Cached file deleted: ' . $filepath);
- }
-
- if(file_exists($filepath)){
- $this->debugMessage('Loading cached file ' . $filepath);
- touch($filepath);
- $content = file_get_contents($filepath);
- } else {
- $this->debugMessage('Caching ' . $url . ' to ' . $filepath);
- $dir = substr($filepath, 0, strrpos($filepath, '/'));
-
- if(!is_dir($dir)){
- $this->debugMessage('Creating directory ' . $dir);
- mkdir($dir, 0777, true);
- }
-
- $content = $this->getContents($url);
- if($content !== false){
- file_put_contents($filepath, $content);
- }
- }
-
- return str_get_html($content);
- }
-}
diff --git a/lib/RssBridge.php b/lib/RssBridge.php
index 6dd2663..0728683 100644
--- a/lib/RssBridge.php
+++ b/lib/RssBridge.php
@@ -12,7 +12,6 @@ require __DIR__ . '/Format.php';
require __DIR__ . '/FormatAbstract.php';
require __DIR__ . '/Bridge.php';
require __DIR__ . '/BridgeAbstract.php';
-require __DIR__ . '/HttpCachingBridgeAbstract.php';
require __DIR__ . '/FeedExpander.php';
require __DIR__ . '/Cache.php';
require __DIR__ . '/CacheAbstract.php';