1
0
Fork 0
forked from blallo/rss-bridge

Add extra bridges, second batch (TPB inside)

This commit is contained in:
Mitsukarenai 2014-05-26 19:45:10 +02:00
parent a23dd1d1e5
commit c07eacfd6a
10 changed files with 602 additions and 0 deletions

View file

@ -0,0 +1,41 @@
<?php
/**
* RssBridgeCollegeDeFrance
* Returns the 10 newest posts from http://www.college-de-france.fr
*
* @name CollegeDeFrance
* @homepage http://www.college-de-france.fr/
* @description Returns the 10 newest posts from CollegeDeFrance
* @maintainer pit-fgfjiudghdf
* @update 2014-05-26
*/
class CollegeDeFranceBridge extends BridgeAbstract{
public function collectData(array $param){
$find = array('janvier', 'février', 'mars', 'avril', 'mai', 'juin', 'juillet', 'août', 'septembre', 'novembre', 'décembre');
$replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December');
$html = file_get_html('http://www.college-de-france.fr/site/audio-video/_audiovideos.jsp?index=0&prompt=&fulltextdefault=mots-cles...&fulltext=mots-cles...&fields=TYPE2_ACTIVITY&fieldsdefault=0_0&TYPE2=0') or $this->returnError('Could not request CollegeDeFrance.', 404);
$limit = 0;
foreach($html->find('li.audio') as $element) {
if($limit < 10) {
$item = new \Item();
$item->title = $element->find('span.title', 0)->plaintext;
$item->timestamp = strtotime(str_replace($find, $replace, $element->find('span.date', 0)->plaintext));
$item->content = $element->find('span.lecturer', 0)->innertext . ' - ' . $element->find('span.title', 0)->innertext;
$item->uri = $element->find('a', 0)->href;
$this->items[] = $item;
$limit++;
}
}
}
public function getName(){
return 'CollegeDeFrance';
}
public function getURI(){
return 'http://www.college-de-france.fr/';
}
public function getCacheDuration(){
return 3600*3; // 3 hour
}
}

View file

@ -0,0 +1,53 @@
<?php
/**
* RssBridgeFlickrTagUser
* Returns the tagged images from http://www.flickr.com/
* 2014-05-26
*
* @name Flickr TagUser
* @homepage http://www.flickr.com/
* @description Returns the tagged or user images from Flickr
* @maintainer erwang
* @use1(q="keyword")
* @use2(u="username")
*/
class FlickrTagBridge extends BridgeAbstract{
public function collectData(array $param){
$html = file_get_html('http://www.flickr.com/search/?q=vendee&s=rec') or $this->returnError('Could not request Flickr.', 404);
if (isset($param['q'])) { /* keyword search mode */
$this->request = $param['q'];
$html = file_get_html('http://www.flickr.com/search/?q='.urlencode($this->request).'&s=rec') or $this->returnError('No results for this query.', 404);
}
elseif (isset($param['u'])) { /* user timeline mode */
$this->request = $param['u'];
$html = file_get_html('http://www.flickr.com/photos/'.urlencode($this->request).'/') or $this->returnError('Requested username can\'t be found.', 404);
}
else {
$this->returnError('You must specify a keyword or a Flickr username.', 400);
}
foreach($html->find('span.photo_container') as $element) {
$item = new \Item();
$item->uri = 'http://flickr.com'.$element->find('a',0)->href;
$item->thumbnailUri = $element->find('img',0)->getAttribute('data-defer-src');
$item->content = '<a href="' . $item->uri . '"><img src="' . $item->thumbnailUri . '" /></a>'; // FIXME: Filter javascript ?
$item->title = $element->find('a',0)->title;
$this->items[] = $item;
}
}
public function getName(){
return 'Flickr Tag';
}
public function getURI(){
return 'http://www.flickr.com/search/';
}
public function getCacheDuration(){
return 21600; // 6 hours
}
}

56
bridges/KoreusBridge.php Normal file
View file

@ -0,0 +1,56 @@
<?php
/**
* RssBridgeKoreus
* Returns the 5 newest posts from Koreus (full text)
*
* @name Koreus
* @homepage http://www.koreus.com/
* @description Returns the 5 newest posts from Koreus (full text)
* @maintainer pit-fgfjiudghdf
* @update 2014-05-26
*/
class KoreusBridge extends BridgeAbstract{
public function collectData(array $param){
function KoreusStripCDATA($string) {
$string = str_replace('<![CDATA[', '', $string);
$string = str_replace(']]>', '', $string);
return $string;
}
function KoreusExtractContent($url) {
$html2 = file_get_html($url);
$text = $html2->find('p[class=itemText]', 0)->innertext;
$text = utf8_encode(preg_replace('/(Sur le m.+?)+$/i','',$text));
return $text;
}
$html = file_get_html('http://feeds.feedburner.com/Koreus-articles') or $this->returnError('Could not request Koreus.', 404);
$limit = 0;
foreach($html->find('item') as $element) {
if($limit < 5) {
$item = new \Item();
$item->title = KoreusStripCDATA($element->find('title', 0)->innertext);
$item->uri = KoreusStripCDATA($element->find('guid', 0)->plaintext);
$item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
$item->content = KoreusExtractContent($item->uri);
$this->items[] = $item;
$limit++;
}
}
}
public function getName(){
return 'Koreus';
}
public function getURI(){
return 'http://www.koreus.com/';
}
public function getCacheDuration(){
return 3600; // 1 hour
}
}

View file

@ -0,0 +1,55 @@
<?php
/**
* RssBridgeMsnMonde
* Returns the 10 newest posts from MSN Actualités (full text)
*
* @name MSN Actu Monde
* @homepage http://news.fr.msn.com/m6-actualite/monde/
* @description Returns the 10 newest posts from MSN Actualités (full text)
* @maintainer pit-fgfjiudghdf
* @update 2014-05-26
*/
class MsnMondeBridge extends BridgeAbstract{
public function collectData(array $param){
function MsnMondeExtractContent($url) {
$html2 = file_get_html($url);
$html2->find('div[id=m6_diaponews_placeholder]', 0)->outertext=''; //Supression de la partie "et aussi"
$text = $html2->find('div[class=svsubtorabs]', 0)->innertext; // ajout du resume
$text .= $html2->find('div[id=page1]', 0)->innertext; // article
$text = preg_replace('/<p><strong>Lire aussi.*/i','',$text); //Supression de la partie "Lire aussi"
return $text;
}
$html = file_get_html('http://news.fr.msn.com/m6-actualite/RSS/News_RSS_Monde.aspx') or $this->returnError('Could not request MsnMonde.', 404);
$limit = 0;
foreach($html->find('item') as $element) {
if($limit < 10) {
$item = new \Item();
$item->title = $element->find('title', 0)->innertext;
$item->uri = $element->find('guid', 0)->plaintext;
$item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
$item->content = MsnMondeExtractContent($item->uri);
$this->items[] = $item;
$limit++;
}
}
}
public function getName(){
return 'MSN Actu Monde';
}
public function getURI(){
return 'http://news.fr.msn.com/m6-actualite/monde/';
}
public function getCacheDuration(){
return 3600; // 1 hour
}
}

View file

@ -0,0 +1,62 @@
<?php
/**
* RssBridgeNiceMatin
* Returns the 10 newest posts from Nice Matin (full text)
*
* @name NiceMatin
* @homepage http://www.nicematin.com/
* @description Returns the 10 newest posts from NiceMatin (full text)
* @maintainer pit-fgfjiudghdf
* @update 2014-05-26
*/
class NiceMatinBridge extends BridgeAbstract{
public function collectData(array $param){
function NiceMatinUrl($string) {
$string = str_replace('</link>', '', $string);
//$string = str_replace('.+', '', $string);
$string = preg_replace('/html.*http.*/i','html',$string);
$string = preg_replace('/.*http/i','http',$string);
return $string;
}
function NiceMatinExtractContent($url) {
$html2 = file_get_html($url);
$text = $html2->find('figure[itemprop=associatedMedia]', 0)->innertext;
$text .= $html2->find('div[id=content-article]', 0)->innertext;
return $text;
}
$html = file_get_html('http://www.nicematin.com/derniere-minute/rss') or $this->returnError('Could not request NiceMatin.', 404);
$limit = 0;
foreach($html->find('item') as $element) {
if($limit < 10) {
$item = new \Item();
//$item->title = NiceMatinStripCDATA($element->find('title', 0)->innertext);
$item->title = $element->find('title', 0)->innertext;
$item->uri = NiceMatinUrl($element->plaintext);
$item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
$item->content = NiceMatinExtractContent($item->uri);
$this->items[] = $item;
$limit++;
}
}
}
public function getName(){
return 'NiceMatin';
}
public function getURI(){
return 'http://www.nicematin.com/';
}
public function getCacheDuration(){
return 3600; // 1 hour
}
}

View file

@ -0,0 +1,49 @@
<?php
/**
* RssBridgePlanetLibre
* Returns the 5 newest posts from PlanetLibre (full text)
*
* @name PlanetLibre
* @homepage http://www.www.planet-libre.org
* @description Returns the 5 newest posts from PlanetLibre (full text)
* @maintainer pit-fgfjiudghdf
* @update 2014-05-26
*/
class PlanetLibreBridge extends BridgeAbstract{
public function collectData(array $param){
function PlanetLibreStripCDATA($string) {
$string = str_replace('<![CDATA[', '', $string);
$string = str_replace(']]>', '', $string);
return $string;
}
function PlanetLibreExtractContent($url) {
$html2 = file_get_html($url);
$text = $html2->find('div[class=post-text]', 0)->innertext;
return $text;
}
$html = file_get_html('http://www.planet-libre.org/rss10.php') or $this->returnError('Could not request PlanetLibre.', 404);
$limit = 0;
foreach($html->find('item') as $element) {
if($limit < 5) {
$item = new \Item();
$item->title = PlanetLibreStripCDATA($element->find('title', 0)->innertext);
$item->uri = PlanetLibreStripCDATA($element->find('guid', 0)->plaintext);
$item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
$item->content = PlanetLibreExtractContent($item->uri);
$this->items[] = $item;
$limit++;
}
}
}
public function getName(){
return 'PlanetLibre';
}
public function getURI(){
return 'http://www.planet-libre.org/';
}
public function getCacheDuration(){
return 3600*2; // 1 hour
}
}

52
bridges/RaymondBridge.php Normal file
View file

@ -0,0 +1,52 @@
<?php
/**
* RssBridgeRaymond
* Returns the 3 newest posts from Raymond.cc (full text)
*
* @name Raymond
* @homepage http://www.raymond.cc
* @description Returns the 3 newest posts from Raymond.cc (full text)
* @maintainer pit-fgfjiudghdf
* @update 2014-05-26
*/
class RaymondBridge extends BridgeAbstract{
public function collectData(array $param){
function raymondStripCDATA($string) {
$string = str_replace('<![CDATA[', '', $string);
$string = str_replace(']]>', '', $string);
return $string;
}
function raymondExtractContent($url) {
$html2 = file_get_html($url);
$text = $html2->find('div.entry-content', 0)->innertext;
$text = preg_replace('/class="ad".*/', '', $text);
$text = strip_tags($text, '<p><a><i><strong><em><img>');
$text = str_replace('(adsbygoogle = window.adsbygoogle || []).push({});', '', $text);
return $text;
}
$html = file_get_html('http://www.raymond.cc/blog/feed') or $this->returnError('Could not request raymond.', 404);
$limit = 0;
foreach($html->find('item') as $element) {
if($limit < 3) {
$item = new \Item();
$item->title = raymondStripCDATA($element->find('title', 0)->innertext);
$item->uri = raymondStripCDATA($element->find('guid', 0)->plaintext);
$item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
$item->content = raymondExtractContent($item->uri);
$this->items[] = $item;
$limit++;
}
}
}
public function getName(){
return 'raymond';
}
public function getURI(){
return 'http://www.raymond.cc/blog';
}
public function getCacheDuration(){
return 3600*12; // 12 hour
}
}

91
bridges/Sexactu.php Normal file
View file

@ -0,0 +1,91 @@
<?php
/**
* 2014-05-26
*
* @name Sexactu
* @homepage http://www.gqmagazine.fr/sexactu
* @description Sexactu
* @maintainer Riduidel
* @update 04/02/2014
*/
define("GQ", "http://www.gqmagazine.fr");
class Sexactu extends BridgeAbstract{
public function collectData(array $param){
$find = array('janvier', 'février', 'mars', 'avril', 'mai', 'juin', 'juillet', 'août', 'septembre', 'novembre', 'décembre');
$replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December');
$html = file_get_html($this->getURI()) or $this->returnError('Could not request '.$this->getURI(), 404);
foreach($html->find('.content-holder') as $contentHolder) {
// only use first list as second one only contains pages numbers
$articles = $contentHolder->find('ul', 0);
foreach($articles->find('li') as $element) {
// if you ask about that method_exists, there seems to be a bug in simple html dom
// see stackoverflow for more details : http://stackoverflow.com/a/10828479/15619
if(is_object($element)) {
$item = new Item();
// various metadata
$titleBlock = $element->find('.title-holder', 0);
if(is_object($titleBlock)) {
$titleDetails = $titleBlock->find('.article-title',0);
$titleData = $titleDetails->find('h2', 0)->find('a',0);
$titleTimestamp =$titleDetails->find('h4',0);
$item->title = $this->correctCase(trim($titleData->innertext));
$item->uri = GQ.$titleData->href;
// Fugly date parsing due to the fact my DNS-323 doesn't support php intl extension
$dateText = $titleTimestamp->innertext;
$dateText = substr($dateText, strpos($dateText,',')+1);
$dateText = str_replace($find, $replace, strtolower($dateText));
$date = strtotime($dateText);
$item->timestamp = $date;
$item->name = "Maïa Mazaurette";
$elementText = $element->find('.text-container', 0);
// don't forget to replace images server url with gq one
foreach($elementText->find('img') as $image) {
$image->src = GQ.$image->src;
}
$item->content = $elementText->innertext;
$this->items[] = $item;
}
}
}
}
}
public function getName(){
return 'Sexactu';
}
public function getURI(){
return GQ.'/sexactu';
}
public function getCacheDuration(){
return 7200; // 2h hours
}
public function getDescription(){
return "Sexactu";
}
public function correctCase($str) {
$sentences=explode('.', mb_strtolower($str, "UTF-8"));
$str="";
$sep="";
foreach ($sentences as $sentence)
{
//upper case first char
$sentence=ucfirst(trim($sentence));
//append sentence to output
$str=$str.$sep.$sentence;
$sep=". ";
}
return $str;
}
}

View file

@ -0,0 +1,49 @@
<?php
/**
* RssBridgeThePirateBay
* Returns the newest interesting images from http://www.flickr.com/explore
* 2014-05-25
*
* @name The Pirate Bay
* @homepage https://thepiratebay.se/
* @description Returns results for the keywords
* @maintainer mitsukarenai
* @update 2014-05-26
* @use1(q="keywords")
*/
class ThePirateBayBridge extends BridgeAbstract{
public function collectData(array $param){
if (!isset($param['q']))
$this->returnError('You must specify a keyword (?q=...)', 400);
$html = file_get_html('https://thepiratebay.se/search/'.rawurlencode($param['q']).'/0/99/0') or $this->returnError('Could not request TPB.', 404);
if($html->find('table#searchResult', 0) == FALSE)
$this->returnError('No result for this query', 404);
foreach($html->find('tr') as $element) {
$item = new \Item();
$item->uri = 'https://thepiratebay.se/'.$element->find('a.detLink',0)->href;
$item->id = $item->uri;
$item->timestamp = time();
$item->title = $element->find('a.detLink',0)->plaintext;
$item->content = $element->find('font',0)->plaintext.'<br><a href="'.$element->find('a',3)->href.'">download</a>';
if(!empty($item->title))
$this->items[] = $item;
}
}
public function getName(){
return 'The Pirate Bay';
}
public function getURI(){
return 'https://thepiratebay.se/';
}
public function getCacheDuration(){
return 3600; // 1 hour
}
}

View file

@ -0,0 +1,94 @@
<?php
/**
* RssBridgeWordpress
* Returns the 3 newest full posts of a Wordpress blog
*
* @name Wordpress Bridge
* @homepage https://wordpress.com/
* @description Returns the 3 newest full posts of a Wordpress blog
* @maintainer aledeg
* @update 2014-05-26
* @use1(url="blog URL (required)", name="blog name")
*/
class WordpressBridge extends BridgeAbstract {
private $url;
private $name;
public function collectData(array $param) {
$this->processParams($param);
if (!$this->hasUrl()) {
$this->returnError('You must specify a URL', 400);
}
$html = file_get_html($this->url) or $this->returnError("Could not request {$this->url}.", 404);
if(!empty($html->find('.post')) ) {
$i=0;
foreach ($html->find('.post') as $article) {
if($i < 3) {
$uri = $article->find('a', 0)->href;
$this->items[] = $this->getDetails($uri);
$i++;
}
}
}
else {
$this->returnError("Sorry, {$this->url} doesn't seem to be a Wordpress blog.", 404);
}
}
private function getDetails($uri) {
$html = file_get_html($uri) or exit;
$item = new \Item();
$article = $html->find('.post', 0);
$item->uri = $uri;
$item->title = $article->find('h1', 0)->innertext;
$item->content = $this->clearContent($article->find('.entry-content,.entry', 0)->innertext);
$item->timestamp = $this->getDate($uri);
return $item;
}
private function clearContent($content) {
$content = preg_replace('/<script.*\/script>/', '', $content);
$content = preg_replace('/<div class="wpa".*/', '', $content);
return $content;
}
private function getDate($uri) {
preg_match('/\d{4}\/\d{2}\/\d{2}/', $uri, $matches);
$date = new \DateTime($matches[0]);
return $date->format('U');
}
public function getName() {
return "{$this->name} - Wordpress Bridge";
}
public function getURI() {
return $this->url;
}
public function getCacheDuration() {
return 3600*3; // 3 hours
}
private function hasUrl() {
if (empty($this->url)) {
return false;
}
return true;
}
private function processParams($param) {
$this->url = $param['url'];
$this->name = $param['name'];
}
}