forked from blallo/rss-bridge
Merge branch 'Caching' of https://github.com/logmanoriginal/rss-bridge
This commit is contained in:
commit
7e2129fa2a
13 changed files with 39 additions and 21 deletions
|
@ -33,7 +33,7 @@ class CpasbienBridge extends HttpCachingBridgeAbstract{
|
||||||
if ($episode->getAttribute('class')=='ligne0' || $episode->getAttribute('class')=='ligne1')
|
if ($episode->getAttribute('class')=='ligne0' || $episode->getAttribute('class')=='ligne1')
|
||||||
{
|
{
|
||||||
|
|
||||||
$htmlepisode=str_get_html($this->get_cached($episode->find('a', 0)->getAttribute('href')));
|
$htmlepisode=$this->get_cached($episode->find('a', 0)->getAttribute('href'));
|
||||||
|
|
||||||
$item = array();
|
$item = array();
|
||||||
$item['author'] = $episode->find('a', 0)->text();
|
$item['author'] = $episode->find('a', 0)->text();
|
||||||
|
|
|
@ -25,7 +25,7 @@ class FreenewsBridge extends RssExpander {
|
||||||
}
|
}
|
||||||
// now load that uri from cache
|
// now load that uri from cache
|
||||||
$this->debugMessage("now loading page ".$item['uri']);
|
$this->debugMessage("now loading page ".$item['uri']);
|
||||||
$articlePage = str_get_html($this->get_cached($item['uri']));
|
$articlePage = $this->get_cached($item['uri']);
|
||||||
|
|
||||||
$content = $articlePage->find('.post-container', 0);
|
$content = $articlePage->find('.post-container', 0);
|
||||||
$item['content'] = $content->innertext;
|
$item['content'] = $content->innertext;
|
||||||
|
|
|
@ -45,7 +45,7 @@ class GawkerBridge extends RssExpander{
|
||||||
try {
|
try {
|
||||||
// now load that uri from cache
|
// now load that uri from cache
|
||||||
$this->debugMessage("loading page ".$item['uri']);
|
$this->debugMessage("loading page ".$item['uri']);
|
||||||
$articlePage = str_get_html($this->get_cached($item['uri']));
|
$articlePage = $this->get_cached($item['uri']);
|
||||||
if(is_object($articlePage)) {
|
if(is_object($articlePage)) {
|
||||||
$content = $articlePage->find('.post-content', 0);
|
$content = $articlePage->find('.post-content', 0);
|
||||||
HTMLSanitizer::defaultImageSrcTo($content, $this->getURI());
|
HTMLSanitizer::defaultImageSrcTo($content, $this->getURI());
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
<?php
|
<?php
|
||||||
class JapanExpoBridge extends BridgeAbstract{
|
class JapanExpoBridge extends HttpCachingBridgeAbstract {
|
||||||
|
|
||||||
public function loadMetadatas() {
|
public function loadMetadatas() {
|
||||||
$this->maintainer = 'Ginko';
|
$this->maintainer = 'Ginko';
|
||||||
|
@ -64,7 +64,10 @@ class JapanExpoBridge extends BridgeAbstract{
|
||||||
|
|
||||||
if ($fullcontent) {
|
if ($fullcontent) {
|
||||||
if ($count < 5) {
|
if ($count < 5) {
|
||||||
$article_html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request JapanExpo: '.$url);
|
if($this->get_cached_time($url) <= strtotime('-24 hours'))
|
||||||
|
$this->remove_from_cache($url);
|
||||||
|
|
||||||
|
$article_html = $this->get_cached($url) or $this->returnServerError('Could not request JapanExpo: '.$url);
|
||||||
$header = $article_html->find('header.pageHeadBox', 0);
|
$header = $article_html->find('header.pageHeadBox', 0);
|
||||||
$timestamp = strtotime($header->find('time', 0)->datetime);
|
$timestamp = strtotime($header->find('time', 0)->datetime);
|
||||||
$title_html = $header->find('div.section', 0)->next_sibling();
|
$title_html = $header->find('div.section', 0)->next_sibling();
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
<?php
|
<?php
|
||||||
class KununuBridge extends BridgeAbstract{
|
class KununuBridge extends HttpCachingBridgeAbstract {
|
||||||
public function loadMetadatas(){
|
public function loadMetadatas(){
|
||||||
$this->maintainer = "logmanoriginal";
|
$this->maintainer = "logmanoriginal";
|
||||||
$this->name = "Kununu Bridge"; /* This will be replaced later! */
|
$this->name = "Kununu Bridge"; /* This will be replaced later! */
|
||||||
|
@ -248,7 +248,10 @@ class KununuBridge extends BridgeAbstract{
|
||||||
*/
|
*/
|
||||||
private function extract_full_description($uri){
|
private function extract_full_description($uri){
|
||||||
// Load full article
|
// Load full article
|
||||||
$html = $this->getSimpleHTMLDOM($uri);
|
if($this->get_cached_time($uri) <= strtotime('-24 hours'))
|
||||||
|
$this->remove_from_cache($uri);
|
||||||
|
|
||||||
|
$html = $this->get_cached($uri);
|
||||||
if($html === false)
|
if($html === false)
|
||||||
$this->returnServerError('Could not load full description!');
|
$this->returnServerError('Could not load full description!');
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,7 @@ class Les400CulsBridge extends RssExpander{
|
||||||
}
|
}
|
||||||
// now load that uri from cache
|
// now load that uri from cache
|
||||||
$this->debugMessage("now loading page ".$item['uri']);
|
$this->debugMessage("now loading page ".$item['uri']);
|
||||||
// $articlePage = str_get_html($this->get_cached($item['uri']));
|
// $articlePage = $this->get_cached($item['uri']);
|
||||||
|
|
||||||
// $content = $articlePage->find('.post-container', 0);
|
// $content = $articlePage->find('.post-container', 0);
|
||||||
$item['content'] = (string) $newsItem->description;
|
$item['content'] = (string) $newsItem->description;
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
class LichessBridge extends BridgeAbstract
|
class LichessBridge extends HttpCachingBridgeAbstract
|
||||||
{
|
{
|
||||||
public function loadMetadatas()
|
public function loadMetadatas()
|
||||||
{
|
{
|
||||||
|
@ -36,7 +36,10 @@ class LichessBridge extends BridgeAbstract
|
||||||
|
|
||||||
private function retrieve_lichess_post($blog_post_uri)
|
private function retrieve_lichess_post($blog_post_uri)
|
||||||
{
|
{
|
||||||
$blog_post_html = $this->getSimpleHTMLDOM($blog_post_uri);
|
if($this->get_cached_time($blog_post_uri) <= strtotime('-24 hours'))
|
||||||
|
$this->remove_from_cache($blog_post_uriuri);
|
||||||
|
|
||||||
|
$blog_post_html = $this->get_cached($blog_post_uri);
|
||||||
$blog_post_div = $blog_post_html->find('#lichess_blog', 0);
|
$blog_post_div = $blog_post_html->find('#lichess_blog', 0);
|
||||||
|
|
||||||
$post_chapo = $blog_post_div->find('.shortlede', 0)->innertext;
|
$post_chapo = $blog_post_div->find('.shortlede', 0)->innertext;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
<?php
|
<?php
|
||||||
class NumeramaBridge extends BridgeAbstract{
|
class NumeramaBridge extends HttpCachingBridgeAbstract {
|
||||||
|
|
||||||
public function loadMetadatas() {
|
public function loadMetadatas() {
|
||||||
|
|
||||||
|
@ -31,7 +31,10 @@ class NumeramaBridge extends BridgeAbstract{
|
||||||
$item['timestamp'] = strtotime($element->find('pubDate', 0)->plaintext);
|
$item['timestamp'] = strtotime($element->find('pubDate', 0)->plaintext);
|
||||||
|
|
||||||
$article_url = NumeramaStripCDATA($element->find('guid', 0)->plaintext);
|
$article_url = NumeramaStripCDATA($element->find('guid', 0)->plaintext);
|
||||||
$article_html = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request Numerama: '.$article_url);
|
if($this->get_cached_time($article_url) <= strtotime('-24 hours'))
|
||||||
|
$this->remove_from_cache($article_url);
|
||||||
|
|
||||||
|
$article_html = $this->get_cached($article_url) or $this->returnServerError('Could not request Numerama: '.$article_url);
|
||||||
$contents = $article_html->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block
|
$contents = $article_html->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block
|
||||||
$contents = '<img alt="" style="max-width:300px;" src="'.$article_html->find('meta[property=og:image]', 0)->getAttribute('content').'">'; // add post picture
|
$contents = '<img alt="" style="max-width:300px;" src="'.$article_html->find('meta[property=og:image]', 0)->getAttribute('content').'">'; // add post picture
|
||||||
$contents = $contents.$article_html->find('article[class=post-content]', 0)->innertext; // extract the post
|
$contents = $contents.$article_html->find('article[class=post-content]', 0)->innertext; // extract the post
|
||||||
|
|
|
@ -43,7 +43,7 @@ class TheOatmealBridge extends RssExpander{
|
||||||
$item['uri']=(string) $newsItem->attributes($namespaces['rdf'])->about;
|
$item['uri']=(string) $newsItem->attributes($namespaces['rdf'])->about;
|
||||||
// now load that uri from cache
|
// now load that uri from cache
|
||||||
$this->debugMessage("now loading page ".$item['uri']);
|
$this->debugMessage("now loading page ".$item['uri']);
|
||||||
$articlePage = str_get_html($this->get_cached($item['uri']));
|
$articlePage = $this->get_cached($item['uri']);
|
||||||
|
|
||||||
$content = $articlePage->find('#comic', 0);
|
$content = $articlePage->find('#comic', 0);
|
||||||
if($content==null) {
|
if($content==null) {
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
define('WIKIPEDIA_SUBJECT_TFA', 0); // Today's featured article
|
define('WIKIPEDIA_SUBJECT_TFA', 0); // Today's featured article
|
||||||
define('WIKIPEDIA_SUBJECT_DYK', 1); // Did you know...
|
define('WIKIPEDIA_SUBJECT_DYK', 1); // Did you know...
|
||||||
|
|
||||||
class WikipediaBridge extends BridgeAbstract{
|
class WikipediaBridge extends HttpCachingBridgeAbstract {
|
||||||
public function loadMetadatas(){
|
public function loadMetadatas(){
|
||||||
$this->maintainer = 'logmanoriginal';
|
$this->maintainer = 'logmanoriginal';
|
||||||
$this->name = 'Wikipedia bridge for many languages';
|
$this->name = 'Wikipedia bridge for many languages';
|
||||||
|
@ -188,7 +188,10 @@ class WikipediaBridge extends BridgeAbstract{
|
||||||
* Loads the full article from a given URI
|
* Loads the full article from a given URI
|
||||||
*/
|
*/
|
||||||
private function LoadFullArticle($uri){
|
private function LoadFullArticle($uri){
|
||||||
$content_html = $this->getSimpleHTMLDOM($uri);
|
if($this->get_cached_time($uri) <= strtotime('-24 hours'))
|
||||||
|
$this->remove_from_cache($uri);
|
||||||
|
|
||||||
|
$content_html = $this->get_cached($uri);
|
||||||
|
|
||||||
if(!$content_html)
|
if(!$content_html)
|
||||||
$this->returnServerError('Could not load site: ' . $uri . '!');
|
$this->returnServerError('Could not load site: ' . $uri . '!');
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
<?php
|
<?php
|
||||||
define('WORDPRESS_TYPE_ATOM', 1); // Content is of type ATOM
|
define('WORDPRESS_TYPE_ATOM', 1); // Content is of type ATOM
|
||||||
define('WORDPRESS_TYPE_RSS', 2); // Content is of type RSS
|
define('WORDPRESS_TYPE_RSS', 2); // Content is of type RSS
|
||||||
class WordPressBridge extends BridgeAbstract {
|
class WordPressBridge extends HttpCachingBridgeAbstract {
|
||||||
|
|
||||||
private $url;
|
private $url;
|
||||||
public $sitename; // Name of the site
|
public $sitename; // Name of the site
|
||||||
|
@ -95,7 +95,10 @@ class WordPressBridge extends BridgeAbstract {
|
||||||
$item['timestamp'] = strtotime($article->find('updated', 0)->innertext);
|
$item['timestamp'] = strtotime($article->find('updated', 0)->innertext);
|
||||||
}
|
}
|
||||||
|
|
||||||
$article_html = $this->getSimpleHTMLDOM($item['uri']);
|
if($this->get_cached_time($item['uri']) <= strtotime('-24 hours'))
|
||||||
|
$this->remove_from_cache($item['uri']);
|
||||||
|
|
||||||
|
$article_html = $this->get_cached($item['uri']);
|
||||||
|
|
||||||
// Attempt to find most common content div
|
// Attempt to find most common content div
|
||||||
if(!isset($item['content'])){
|
if(!isset($item['content'])){
|
||||||
|
|
|
@ -58,7 +58,7 @@ class WorldOfTanksBridge extends HttpCachingBridgeAbstract{
|
||||||
$item['uri'] = $this->uri.$infoLink->href;
|
$item['uri'] = $this->uri.$infoLink->href;
|
||||||
// now load that uri from cache
|
// now load that uri from cache
|
||||||
$this->debugMessage("loading page ".$item['uri']);
|
$this->debugMessage("loading page ".$item['uri']);
|
||||||
$articlePage = str_get_html($this->get_cached($item['uri']));
|
$articlePage = $this->get_cached($item['uri']);
|
||||||
$content = $articlePage->find('.l-content', 0);
|
$content = $articlePage->find('.l-content', 0);
|
||||||
HTMLSanitizer::defaultImageSrcTo($content, $this->uri);
|
HTMLSanitizer::defaultImageSrcTo($content, $this->uri);
|
||||||
$item['title'] = $content->find('h1', 0)->innertext;
|
$item['title'] = $content->find('h1', 0)->innertext;
|
||||||
|
|
|
@ -419,7 +419,7 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return $content;
|
return str_get_html($content);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function get_cached_time($url){
|
public function get_cached_time($url){
|
||||||
|
@ -465,8 +465,8 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract {
|
||||||
// TODO build this from the variable given to Cache
|
// TODO build this from the variable given to Cache
|
||||||
$cacheDir = __DIR__ . '/../cache/pages/';
|
$cacheDir = __DIR__ . '/../cache/pages/';
|
||||||
$filepath = $this->buildCacheFilePath($url, $cacheDir);
|
$filepath = $this->buildCacheFilePath($url, $cacheDir);
|
||||||
$this->debugMessage('removing from cache \'' . $filepath . '\' WELL, NOT REALLY');
|
$this->debugMessage('removing from cache \'' . $filepath . '\'');
|
||||||
// unlink($filepath);
|
unlink($filepath);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue