1
0
Fork 0
forked from blallo/rss-bridge

Merge pull request #161 from ORelio/master

Add AnimeUltime, CNET, LeMondeInformatique, Silicon, 3DSDB, T411
This commit is contained in:
Mitsu 2015-10-22 17:36:24 +02:00
commit 3780eb58bd
6 changed files with 597 additions and 0 deletions

View file

@ -0,0 +1,109 @@
<?php
/**
* AnimeUltimeBridge
* Returns 10 newest releases posted on Anime-Ultime
*
* @name Anime-Ultime
* @homepage http://www.anime-ultime.net/
* @description Returns the 10 newest releases posted on Anime-Ultime <br /> Type = A (Anime), D (Drama), T (Tokusatsu), or leave empty for everything
* @maintainer ORelio
* @update 2015-09-07
* @use1(type="A/D/T/Empty")
*/
class AnimeUltimeBridge extends BridgeAbstract {
private $filter = 'Releases';
public function collectData(array $param) {
//Add type filter if provided
$typeFilter = '';
if (!empty($param['type'])) {
if ($param['type'] == 'A' || $param['type'] == 'D' || $param['type'] == 'T') {
$typeFilter = $param['type'];
if ($typeFilter == 'A') { $this->filter = 'Anime'; }
if ($typeFilter == 'D') { $this->filter = 'Drama'; }
if ($typeFilter == 'T') { $this->filter = 'Tokusatsu'; }
} else $this->returnError('The provided type filter is invalid. Expecting A, D, T, or no filter', 400);
}
//Build date and filters for making requests
$thismonth = date('mY').$typeFilter;
$lastmonth = date('mY', mktime(0, 0, 0, date('n') - 1, 1, date('Y'))).$typeFilter;
//Process each HTML page until having 10 releases
$processedOK = 0;
foreach (array($thismonth, $lastmonth) as $requestFilter) {
//Retrive page contents
$website = 'http://www.anime-ultime.net/';
$url = $website.'history-0-1/'.$requestFilter;
$html = file_get_html($url) or $this->returnError('Could not request Anime-Ultime: '.$url, 500);
//Relases are sorted by day : process each day individually
foreach ($html->find('div.history', 0)->find('h3') as $daySection) {
//Retrieve day and build date information
$dateString = $daySection->plaintext;
$day = intval(substr($dateString, strpos($dateString, ' ') + 1, 2));
$item_date = strtotime(str_pad($day, 2, '0', STR_PAD_LEFT).'-'.substr($requestFilter, 0, 2).'-'.substr($requestFilter, 2, 4));
$release = $daySection->next_sibling()->next_sibling()->first_child(); //<h3>day</h3><br /><table><tr> <-- useful data in table rows
//Process each release of that day, ignoring first table row: contains table headers
while (!is_null($release = $release->next_sibling())) {
if (count($release->find('td')) > 0) {
//Retrieve metadata from table columns
$item_link_element = $release->find('td', 0)->find('a', 0);
$item_uri = $website.$item_link_element->href;
$item_name = html_entity_decode($item_link_element->plaintext);
$item_image = $website.substr($item_link_element->onmouseover, 37, strpos($item_link_element->onmouseover, ' ', 37) - 37);
$item_episode = html_entity_decode(str_pad($release->find('td', 1)->plaintext, 2, '0', STR_PAD_LEFT));
$item_fansub = $release->find('td', 2)->plaintext;
$item_type = $release->find('td', 4)->plaintext;
if (!empty($item_uri)) {
//Retrieve description from description page and convert relative image src info absolute image src
$html_item = file_get_contents($item_uri) or $this->returnError('Could not request Anime-Ultime: '.$item_uri, 500);
$item_description = substr($html_item, strpos($html_item, 'class="principal_contain" align="center">') + 41);
$item_description = substr($item_description, 0, strpos($item_description, '<div id="table">'));
$item_description = str_replace('src="images', 'src="'.$website.'images', $item_description);
$item_description = str_replace("\r", '', $item_description);
$item_description = str_replace("\n", '', $item_description);
$item_description = utf8_encode($item_description);
//Build and add final item
$item = new \Item();
$item->uri = $item_uri;
$item->title = $item_name.' '.$item_type.' '.$item_episode;
$item->author = $item_fansub;
$item->timestamp = $item_date;
$item->thumbnailUri = $item_image;
$item->content = $item_description;
$this->items[] = $item;
$processedOK++;
//Stop processing once limit is reached
if ($processedOK >= 10)
return;
}
}
}
}
}
}
public function getName() {
return 'Latest '.$this->filter.' - Anime-Ultime Bridge';
}
public function getURI() {
return 'http://www.anime-ultime.net/';
}
public function getCacheDuration() {
return 3600*3; // 3 hours
}
}

99
bridges/CNETBridge.php Normal file
View file

@ -0,0 +1,99 @@
<?php
/**
* CNET Bridge
* Returns the newest articles
* 2015-09-08
*
* @name CNET News
* @homepage http://www.cnet.com/
* @description Returns the newest articles. <br /> You may specify a topic, else all topics are selected.
* @maintainer ORelio
* @update 2015-09-10
* @use1(topic="Topic name")
*/
class CNETBridge extends BridgeAbstract {
private $topicName = '';
public function collectData(array $param) {
function ExtractFromDelimiters($string, $start, $end) {
if (strpos($string, $start) !== false) {
$section_retrieved = substr($string, strpos($string, $start) + strlen($start));
$section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
return $section_retrieved;
} return false;
}
function StripWithDelimiters($string, $start, $end) {
while (strpos($string, $start) !== false) {
$section_to_remove = substr($string, strpos($string, $start));
$section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
$string = str_replace($section_to_remove, '', $string);
} return $string;
}
function CleanArticle($article_html) {
$article_html = '<p>'.substr($article_html, strpos($article_html, '</script></div><p>') + 18);
$article_html = StripWithDelimiters($article_html, '<script>', '</script>');
$article_html = StripWithDelimiters($article_html, '<div class="shortcode related-links', '</div>');
$article_html = StripWithDelimiters($article_html, '<a class="clickToEnlarge">', '</a>');
return $article_html;
}
if (!empty($param['topic']))
$this->topicName = $param['topic'];
$pageUrl = 'http://www.cnet.com/'.(empty($this->topicName) ? '' : 'topics/'.$this->topicName.'/');
$html = file_get_html($pageUrl) or $this->returnError('Could not request CNET: '.$pageUrl, 500);
$limit = 0;
foreach($html->find('div.socialSharingSmall') as $element) {
if ($limit < 4) {
$article_meta = json_decode(ExtractFromDelimiters($element->outertext, 'data-social-counts-options=\'', '\'>'));
$article_title = $article_meta->title;
$article_uri = $article_meta->url;
if (!empty($article_title) && !empty($article_uri) && strpos($article_uri, '/news/') !== false) {
$article_html = file_get_html($article_uri) or $this->returnError('Could not request CNET: '.$article_uri, 500);
$article_timestamp = strtotime(ExtractFromDelimiters($article_html->innertext, '<time itemprop="datePublished" class="', '">'));
$article_thumbnail = $article_html->find('div.originalImage', 0);
if (is_null($article_thumbnail))
$article_thumbnail = $article_html->find('span.imageContainer', 0);
if (!is_null($article_thumbnail))
$article_thumbnail = $article_thumbnail->find('img', 0)->src;
$article_content = trim(CleanArticle(ExtractFromDelimiters($article_html, '</div></div></div><div class="col-8">', '<footer>')));
$article_author = trim($article_html->find('a.author', 0)->plaintext);
$item = new \Item();
$item->uri = $article_uri;
$item->thumbnailUri = $article_thumbnail;
$item->title = $article_title;
$item->author = $article_author;
$item->timestamp = $article_timestamp;
$item->content = $article_content;
$this->items[] = $item;
$limit++;
}
}
}
}
public function getName() {
return 'CNET News Bridge'.(empty($this->topicName) ? '' : ' - '.$this->topicName);
}
public function getURI() {
return 'http://www.cnet.com/';
}
public function getCacheDuration() {
return 1800; // 30 minutes
// return 0;
}
}

View file

@ -0,0 +1,79 @@
<?php
/**
* LeMondeInformatique Bridge
* Returns the newest articles
* 2015-09-08
*
* @name Le Monde Informatique
* @homepage http://www.lemondeinformatique.fr/
* @description Returns the newest articles.
* @maintainer ORelio
* @update 2015-09-08
*/
class LeMondeInformatiqueBridge extends BridgeAbstract {
public function collectData(array $param) {
function StripCDATA($string) {
$string = str_replace('<![CDATA[', '', $string);
$string = str_replace(']]>', '', $string);
return $string;
}
function StripWithDelimiters($string, $start, $end) {
while (strpos($string, $start) !== false) {
$section_to_remove = substr($string, strpos($string, $start));
$section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
$string = str_replace($section_to_remove, '', $string);
} return $string;
}
function CleanArticle($article_html) {
$article_html = StripWithDelimiters($article_html, '<script', '</script>');
$article_html = StripWithDelimiters($article_html, '<h1 class="cleanprint-title">', '</h1>');
return $article_html;
}
$feedUrl = 'http://www.lemondeinformatique.fr/rss/rss.xml';
$html = file_get_html($feedUrl) or $this->returnError('Could not request LeMondeInformatique: '.$feedUrl, 500);
$limit = 0;
foreach($html->find('item') as $element) {
if($limit < 5) {
//Retrieve article details
$article_uri = $element->innertext;
$article_uri = substr($article_uri, strpos($article_uri, '<link>') + 6);
$article_uri = substr($article_uri, 0, strpos($article_uri, '</link>'));
$article_html = file_get_html($article_uri) or $this->returnError('Could not request LeMondeInformatique: '.$article_uri, 500);
$thumbnailUri = $article_html->find('div#article', 0)->find('img#illustration', 0)->src;
$article_content = CleanArticle($article_html->find('div#article', 0)->innertext);
$article_title = $article_html->find('h1.cleanprint-title', 0)->plaintext;
//Build and add final item
$item = new \Item();
$item->uri = $article_uri;
$item->thumbnailUri = $thumbnailUri;
$item->title = $article_title;
$item->author = StripCDATA($element->find('dc:creator', 0)->innertext);
$item->timestamp = strtotime($element->find('dc:date', 0)->plaintext);
$item->content = $article_content;
$this->items[] = $item;
$limit++;
}
}
}
public function getName() {
return 'Le Monde Informatique';
}
public function getURI() {
return 'http://www.lemondeinformatique.fr/';
}
public function getCacheDuration() {
return 1800; // 30 minutes
// return 0;
}
}

View file

@ -0,0 +1,134 @@
<?php
/**
* 3DS Scene Releases
* Returns the newest releases
* 2015-09-08
*
* @name 3DS Scene Releases
* @homepage http://www.3dsdb.com/
* @description Returns the newest scene releases.
* @maintainer ORelio
* @update 2015-09-17
*/
class Releases3DSBridge extends BridgeAbstract {
public function collectData(array $param) {
function ExtractFromDelimiters($string, $start, $end) {
if (strpos($string, $start) !== false) {
$section_retrieved = substr($string, strpos($string, $start) + strlen($start));
$section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
return $section_retrieved;
} return false;
}
function TypeToString($type) {
switch ($type) {
case 1: return '3DS Game';
case 4: return 'eShop';
default: return '??? ('.$type.')';
}
}
function CardToString($card) {
switch ($card) {
case 1: return 'Regular (CARD1)';
case 2: return 'NAND (CARD2)';
default: return '??? ('.$card.')';
}
}
$dataUrl = 'http://3dsdb.com/xml.php';
$xml = file_get_contents($dataUrl) or $this->returnError('Could not request 3dsdb: '.$dataUrl, 500);
$limit = 0;
foreach (array_reverse(explode('<release>', $xml)) as $element) {
if ($limit < 5) {
if (strpos($element, '</release>') !== false) {
$id = ExtractFromDelimiters($element, '<id>', '</id>');
$name = ExtractFromDelimiters($element, '<name>', '</name>');
$publisher = ExtractFromDelimiters($element, '<publisher>', '</publisher>');
$region = ExtractFromDelimiters($element, '<region>', '</region>');
$group = ExtractFromDelimiters($element, '<group>', '</group>');
$imagesize = ExtractFromDelimiters($element, '<imagesize>', '</imagesize>');
$serial = ExtractFromDelimiters($element, '<serial>', '</serial>');
$titleid = ExtractFromDelimiters($element, '<titleid>', '</titleid>');
$imgcrc = ExtractFromDelimiters($element, '<imgcrc>', '</imgcrc>');
$filename = ExtractFromDelimiters($element, '<filename>', '</filename>');
$releasename = ExtractFromDelimiters($element, '<releasename>', '</releasename>');
$trimmedsize = ExtractFromDelimiters($element, '<trimmedsize>', '</trimmedsize>');
$firmware = ExtractFromDelimiters($element, '<firmware>', '</firmware>');
$type = ExtractFromDelimiters($element, '<type>', '</type>');
$card = ExtractFromDelimiters($element, '<card>', '</card>');
if (!empty($releasename)) {
//Retrieve cover art and short desc from IGN?
$ignResult = false; $ignDescription = ''; $ignLink = ''; $ignDate = time(); $ignCoverArt = '';
$ignSearchUrl = 'http://www.ign.com/search?q='.urlencode($name);
if ($ignResult = file_get_html($ignSearchUrl)) {
$ignCoverArt = $ignResult->find('div.search-item-media', 0)->find('img', 0)->src;
$ignDesc = $ignResult->find('div.search-item-description', 0)->plaintext;
$ignLink = $ignResult->find('div.search-item-sub-title', 0)->find('a', 1)->href;
$ignDate = strtotime(trim($ignResult->find('span.publish-date', 0)->plaintext));
$ignDescription = '<div><img src="'.$ignCoverArt.'" /></div><div>'.$ignDesc.' <a href="'.$ignLink.'">More at IGN</a></div>';
}
//Main section : Release description from 3DS database
$releaseDescription = '<h3>Release Details</h3>'
.'<b>Release ID: </b>'.$id.'<br />'
.'<b>Game Name: </b>'.$name.'<br />'
.'<b>Publisher: </b>'.$publisher.'<br />'
.'<b>Region: </b>'.$region.'<br />'
.'<b>Group: </b>'.$group.'<br />'
.'<b>Image size: </b>'.(intval($imagesize)/8).'MB<br />'
.'<b>Serial: </b>'.$serial.'<br />'
.'<b>Title ID: </b>'.$titleid.'<br />'
.'<b>Image CRC: </b>'.$imgcrc.'<br />'
.'<b>File Name: </b>'.$filename.'<br />'
.'<b>Release Name: </b>'.$releasename.'<br />'
.'<b>Trimmed size: </b>'.intval(intval($trimmedsize)/1048576).'MB<br />'
.'<b>Firmware: </b>'.$firmware.'<br />'
.'<b>Type: </b>'.TypeToString($type).'<br />'
.'<b>Card: </b>'.CardToString($card).'<br />';
//Build search links section to facilitate release search using search engines
$releaseNameEncoded = urlencode(str_replace(' ', '+', $releasename));
$searchLinkGoogle = 'https://google.com/?q='.$releaseNameEncoded;
$searchLinkDuckDuckGo = 'https://duckduckgo.com/?q='.$releaseNameEncoded;
$searchLinkQwant = 'https://lite.qwant.com/?q='.$releaseNameEncoded.'&t=web';
$releaseSearchLinks = '<h3>Search this release</h3><ul>'
.'<li><a href="'.$searchLinkGoogle.'">Search using Google</a></li>'
.'<li><a href="'.$searchLinkDuckDuckGo.'">Search using DuckDuckGo</a></li>'
.'<li><a href="'.$searchLinkQwant.'">Search using Qwant</a></li>'
.'</ul>';
//Build and add final item with the above three sections
$item = new \Item();
$item->title = $name;
$item->author = $publisher;
$item->timestamp = $ignDate;
$item->thumbnailUri = $ignCoverArt;
$item->uri = empty($ignLink) ? $searchLinkDuckDuckGo : $ignLink;
$item->content = $ignDescription.$releaseDescription.$releaseSearchLinks;
$this->items[] = $item;
$limit++;
}
}
}
}
}
public function getName() {
return '3DS Scene Releases';
}
public function getURI() {
return 'http://www.3dsdb.com/';
}
public function getCacheDuration() {
return 10800; //3 hours
}
}

75
bridges/SiliconBridge.php Normal file
View file

@ -0,0 +1,75 @@
<?php
/**
* Silicon Bridge
* Returns the newest articles
* 2015-09-08
*
* @name Silicon.fr
* @homepage http://www.silicon.fr/
* @description Returns the newest articles.
* @maintainer ORelio
* @update 2015-09-08
*/
class SiliconBridge extends BridgeAbstract {
public function collectData(array $param) {
function StripCDATA($string) {
$string = str_replace('<![CDATA[', '', $string);
$string = str_replace(']]>', '', $string);
return $string;
}
$feedUrl = 'http://www.silicon.fr/feed';
$html = file_get_html($feedUrl) or $this->returnError('Could not request Silicon: '.$feedUrl, 500);
$limit = 0;
foreach($html->find('item') as $element) {
if($limit < 5) {
//Retrieve article Uri and get that page
$article_uri = $element->innertext;
$article_uri = substr($article_uri, strpos($article_uri, '<link>') + 6);
$article_uri = substr($article_uri, 0, strpos($article_uri, '</link>'));
$article_html = file_get_html($article_uri) or $this->returnError('Could not request Silicon: '.$article_uri, 500);
//Build article contents from corresponding elements
$thumbnailUri = $element->find('enclosure', 0)->url;
$article_content = '<p><img src="'.$thumbnailUri.'" /></p>'
.'<p><b>'.$article_html->find('div.entry-excerpt', 0)->plaintext.'</b></p>'
.$article_html->find('div.entry-content', 0)->innertext;
//Remove useless scripts left in the page
while (strpos($article_content, '<script') !== false) {
$script_section = substr($article_content, strpos($article_content, '<script'));
$script_section = substr($script_section, 0, strpos($script_section, '</script>') + 9);
$article_content = str_replace($script_section, '', $article_content);
}
//Build and add final item
$item = new \Item();
$item->uri = $article_uri;
$item->thumbnailUri = $thumbnailUri;
$item->title = StripCDATA($element->find('title', 0)->innertext);
$item->author = StripCDATA($element->find('dc:creator', 0)->innertext);
$item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
$item->content = $article_content;
$this->items[] = $item;
$limit++;
}
}
}
public function getName() {
return 'Silicon Bridge';
}
public function getURI() {
return 'http://www.silicon.fr/';
}
public function getCacheDuration() {
return 1800; // 30 minutes
// return 0;
}
}

101
bridges/T411Bridge.php Normal file
View file

@ -0,0 +1,101 @@
<?php
/**
* T411Bridge
* Returns 5 newest torrents with specified search criteria
*
* @name T411
* @homepage https://t411.in/
* @description Returns the 5 newest torrents with specified search terms <br /> Use url part after '?' mark when using their search engine
* @maintainer ORelio
* @update 2015-10-22
* @use1(search="search criteria")
*/
class T411Bridge extends BridgeAbstract {
public function collectData(array $param) {
//Utility function for extracting CDATA fields
function StripCDATA($string) {
$string = str_replace('<![CDATA[', '', $string);
$string = str_replace(']]>', '', $string);
return $string;
}
//Utility function for removing text based on specified delimiters
function StripWithDelimiters($string, $start, $end) {
while (strpos($string, $start) !== false) {
$section_to_remove = substr($string, strpos($string, $start));
$section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
$string = str_replace($section_to_remove, '', $string);
} return $string;
}
//Ensure proper parameters have been provided
if (empty($param['search'])) {
$this->returnError('You must specify a search criteria', 400);
}
//Retrieve torrent listing as truncated rss, which does not contain torrent description
$url = 'http://www.t411.in/torrents/rss/?'.$param['search'].'&order=added&type=desc';
$html = file_get_html($url) or $this->returnError('Could not request t411: '.$url, 500);
$limit = 0;
//Process each item individually
foreach($html->find('item') as $element) {
//Limit total amount of requests
if ($limit < 5) {
//Requests are rate-limited
sleep(1); //So we need to wait
//Retrieve data from RSS entry
$item_uri = StripCDATA($element->find('guid', 0)->plaintext);
$item_title = StripWithDelimiters(StripCDATA($element->find('title', 0)->innertext), ' (S:', ')');
$item_date = strtotime($element->find('pubDate', 0)->plaintext);
//Retrieve full description from torrent page
if ($item_html = file_get_html($item_uri)) {
//Retrieve data from page contents
$item_desc = $item_html->find('div.description', 0);
$item_author = $item_html->find('a.profile', 0)->innertext;
//Retrieve image for thumbnail or generic logo fallback
$item_image = 'http://www.t411.in/themes/blue/images/logo.png';
foreach ($item_desc->find('img') as $img) {
if (strpos($img->src, 'dreamprez') === false) {
$item_image = $img->src;
break;
}
}
//Build and add final item
$item = new \Item();
$item->uri = $item_uri;
$item->title = $item_title;
$item->author = $item_author;
$item->timestamp = $item_date;
$item->thumbnailUri = $item_image;
$item->content = utf8_encode($item_desc->innertext);
$this->items[] = $item;
$limit++;
}
}
}
}
public function getName() {
return "T411 Bridge";
}
public function getURI() {
return 'https://t411.in';
}
public function getCacheDuration() {
return 3600*3; // 3 hours
}
}