YoutubeBridge.php 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. <?php
  2. /**
  3. * RssBridgeYoutube
  4. * Returns the newest videos
  5. * WARNING: to parse big playlists (over ~90 videos), you need to edit simple_html_dom.php:
  6. * change: define('MAX_FILE_SIZE', 600000);
  7. * into: define('MAX_FILE_SIZE', 900000); (or more)
  8. */
  9. class YoutubeBridge extends BridgeAbstract {
  10. const NAME = 'YouTube Bridge';
  11. const URI = 'https://www.youtube.com/';
  12. const CACHE_TIMEOUT = 10800; // 3h
  13. const DESCRIPTION = 'Returns the 10 newest videos by username/channel/playlist or search';
  14. const MAINTAINER = 'mitsukarenai';
  15. const PARAMETERS = array(
  16. 'By username' => array(
  17. 'u' => array(
  18. 'name' => 'username',
  19. 'exampleValue' => 'test',
  20. 'required' => true
  21. )
  22. ),
  23. 'By channel id' => array(
  24. 'c' => array(
  25. 'name' => 'channel id',
  26. 'exampleValue' => '15',
  27. 'required' => true
  28. )
  29. ),
  30. 'By playlist Id' => array(
  31. 'p' => array(
  32. 'name' => 'playlist id',
  33. 'exampleValue' => '15'
  34. )
  35. ),
  36. 'Search result' => array(
  37. 's' => array(
  38. 'name' => 'search keyword',
  39. 'exampleValue' => 'test'
  40. ),
  41. 'pa' => array(
  42. 'name' => 'page',
  43. 'type' => 'number',
  44. 'exampleValue' => 1
  45. )
  46. ),
  47. 'global' => array(
  48. 'duration_min' => array(
  49. 'name' => 'min. duration (minutes)',
  50. 'type' => 'number',
  51. 'title' => 'Minimum duration for the video in minutes',
  52. 'exampleValue' => 5
  53. ),
  54. 'duration_max' => array(
  55. 'name' => 'max. duration (minutes)',
  56. 'type' => 'number',
  57. 'title' => 'Maximum duration for the video in minutes',
  58. 'exampleValue' => 10
  59. )
  60. )
  61. );
  62. private $feedName = '';
  63. private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){
  64. $html = $this->ytGetSimpleHTMLDOM(self::URI . "watch?v=$vid");
  65. // Skip unavailable videos
  66. if(!strpos($html->innertext, 'IS_UNAVAILABLE_PAGE')) {
  67. return;
  68. }
  69. foreach($html->find('script') as $script) {
  70. $data = trim($script->innertext);
  71. if(strpos($data, '{') !== 0)
  72. continue; // Wrong script
  73. $json = json_decode($data);
  74. if(!isset($json->itemListElement))
  75. continue; // Wrong script
  76. $author = $json->itemListElement[0]->item->name;
  77. }
  78. if(!is_null($html->find('#watch-description-text', 0)))
  79. $desc = $html->find('#watch-description-text', 0)->innertext;
  80. if(!is_null($html->find('meta[itemprop=datePublished]', 0)))
  81. $time = strtotime($html->find('meta[itemprop=datePublished]', 0)->getAttribute('content'));
  82. }
  83. private function ytBridgeAddItem($vid, $title, $author, $desc, $time){
  84. $item = array();
  85. $item['id'] = $vid;
  86. $item['title'] = $title;
  87. $item['author'] = $author;
  88. $item['timestamp'] = $time;
  89. $item['uri'] = self::URI . 'watch?v=' . $vid;
  90. $thumbnailUri = str_replace('/www.', '/img.', self::URI) . 'vi/' . $vid . '/0.jpg';
  91. $item['content'] = '<a href="' . $item['uri'] . '"><img src="' . $thumbnailUri . '" /></a><br />' . $desc;
  92. $this->items[] = $item;
  93. }
  94. private function ytBridgeParseXmlFeed($xml) {
  95. foreach($xml->find('entry') as $element) {
  96. $title = $this->ytBridgeFixTitle($element->find('title', 0)->plaintext);
  97. $author = $element->find('name', 0)->plaintext;
  98. $desc = $element->find('media:description', 0)->innertext;
  99. // Make sure the description is easy on the eye :)
  100. $desc = htmlspecialchars($desc);
  101. $desc = nl2br($desc);
  102. $desc = preg_replace('/(http[s]{0,1}\:\/\/[a-zA-Z0-9.\/\?\&=\-_]{4,})/ims',
  103. '<a href="$1" target="_blank">$1</a> ',
  104. $desc);
  105. $vid = str_replace('yt:video:', '', $element->find('id', 0)->plaintext);
  106. $time = strtotime($element->find('published', 0)->plaintext);
  107. if(strpos($vid, 'googleads') === false)
  108. $this->ytBridgeAddItem($vid, $title, $author, $desc, $time);
  109. }
  110. $this->feedName = $this->ytBridgeFixTitle($xml->find('feed > title', 0)->plaintext); // feedName will be used by getName()
  111. }
  112. private function ytBridgeParseHtmlListing($html, $element_selector, $title_selector, $add_parsed_items = true) {
  113. $limit = $add_parsed_items ? 10 : INF;
  114. $count = 0;
  115. $duration_min = $this->getInput('duration_min') ?: -1;
  116. $duration_min = $duration_min * 60;
  117. $duration_max = $this->getInput('duration_max') ?: INF;
  118. $duration_max = $duration_max * 60;
  119. if($duration_max < $duration_min) {
  120. returnClientError('Max duration must be greater than min duration!');
  121. }
  122. foreach($html->find($element_selector) as $element) {
  123. if($count < $limit) {
  124. $author = '';
  125. $desc = '';
  126. $time = 0;
  127. $vid = str_replace('/watch?v=', '', $element->find('a', 0)->href);
  128. $vid = substr($vid, 0, strpos($vid, '&') ?: strlen($vid));
  129. $title = $this->ytBridgeFixTitle($element->find($title_selector, 0)->plaintext);
  130. // The duration comes in one of the formats:
  131. // hh:mm:ss / mm:ss / m:ss
  132. // 01:03:30 / 15:06 / 1:24
  133. $durationText = trim($element->find('span[class="video-time"]', 0)->plaintext);
  134. $durationText = preg_replace('/([\d]{1,2})\:([\d]{2})/', '00:$1:$2', $durationText);
  135. sscanf($durationText, '%d:%d:%d', $hours, $minutes, $seconds);
  136. $duration = $hours * 3600 + $minutes * 60 + $seconds;
  137. if($duration < $duration_min || $duration > $duration_max) {
  138. continue;
  139. }
  140. if($title != '[Private Video]' && strpos($vid, 'googleads') === false) {
  141. if ($add_parsed_items) {
  142. $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time);
  143. $this->ytBridgeAddItem($vid, $title, $author, $desc, $time);
  144. }
  145. $count++;
  146. }
  147. }
  148. }
  149. return $count;
  150. }
  151. private function ytBridgeFixTitle($title) {
  152. // convert both &#1234; and &quot; to UTF-8
  153. return html_entity_decode($title, ENT_QUOTES, 'UTF-8');
  154. }
  155. private function ytGetSimpleHTMLDOM($url){
  156. return getSimpleHTMLDOM($url,
  157. $header = array(),
  158. $opts = array(),
  159. $lowercase = true,
  160. $forceTagsClosed = true,
  161. $target_charset = DEFAULT_TARGET_CHARSET,
  162. $stripRN = false,
  163. $defaultBRText = DEFAULT_BR_TEXT,
  164. $defaultSpanText = DEFAULT_SPAN_TEXT);
  165. }
  166. public function collectData(){
  167. $xml = '';
  168. $html = '';
  169. $url_feed = '';
  170. $url_listing = '';
  171. if($this->getInput('u')) { /* User and Channel modes */
  172. $this->request = $this->getInput('u');
  173. $url_feed = self::URI . 'feeds/videos.xml?user=' . urlencode($this->request);
  174. $url_listing = self::URI . 'user/' . urlencode($this->request) . '/videos';
  175. } elseif($this->getInput('c')) {
  176. $this->request = $this->getInput('c');
  177. $url_feed = self::URI . 'feeds/videos.xml?channel_id=' . urlencode($this->request);
  178. $url_listing = self::URI . 'channel/' . urlencode($this->request) . '/videos';
  179. }
  180. if(!empty($url_feed) && !empty($url_listing)) {
  181. if(!$this->skipFeeds() && $xml = $this->ytGetSimpleHTMLDOM($url_feed)) {
  182. $this->ytBridgeParseXmlFeed($xml);
  183. } elseif($html = $this->ytGetSimpleHTMLDOM($url_listing)) {
  184. $this->ytBridgeParseHtmlListing($html, 'li.channels-content-item', 'h3');
  185. } else {
  186. returnServerError("Could not request YouTube. Tried:\n - $url_feed\n - $url_listing");
  187. }
  188. } elseif($this->getInput('p')) { /* playlist mode */
  189. $this->request = $this->getInput('p');
  190. $url_feed = self::URI . 'feeds/videos.xml?playlist_id=' . urlencode($this->request);
  191. $url_listing = self::URI . 'playlist?list=' . urlencode($this->request);
  192. $html = $this->ytGetSimpleHTMLDOM($url_listing)
  193. or returnServerError("Could not request YouTube. Tried:\n - $url_listing");
  194. $item_count = $this->ytBridgeParseHtmlListing($html, 'tr.pl-video', '.pl-video-title a', false);
  195. if ($item_count <= 15 && !$this->skipFeeds() && ($xml = $this->ytGetSimpleHTMLDOM($url_feed))) {
  196. $this->ytBridgeParseXmlFeed($xml);
  197. } else {
  198. $this->ytBridgeParseHtmlListing($html, 'tr.pl-video', '.pl-video-title a');
  199. }
  200. $this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName()
  201. usort($this->items, function ($item1, $item2) {
  202. return $item2['timestamp'] - $item1['timestamp'];
  203. });
  204. } elseif($this->getInput('s')) { /* search mode */
  205. $this->request = $this->getInput('s');
  206. $page = 1;
  207. if($this->getInput('pa'))
  208. $page = (int)preg_replace('/[^0-9]/', '', $this->getInput('pa'));
  209. $url_listing = self::URI
  210. . 'results?search_query='
  211. . urlencode($this->request)
  212. . '&page='
  213. . $page
  214. . '&filters=video&search_sort=video_date_uploaded';
  215. $html = $this->ytGetSimpleHTMLDOM($url_listing)
  216. or returnServerError("Could not request YouTube. Tried:\n - $url_listing");
  217. $this->ytBridgeParseHtmlListing($html, 'div.yt-lockup', 'h3 > a');
  218. $this->feedName = 'Search: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName()
  219. } else { /* no valid mode */
  220. returnClientError("You must either specify either:\n - YouTube
  221. username (?u=...)\n - Channel id (?c=...)\n - Playlist id (?p=...)\n - Search (?s=...)");
  222. }
  223. }
  224. private function skipFeeds() {
  225. return ($this->getInput('duration_min') || $this->getInput('duration_max'));
  226. }
  227. public function getName(){
  228. // Name depends on queriedContext:
  229. switch($this->queriedContext) {
  230. case 'By username':
  231. case 'By channel id':
  232. case 'By playlist Id':
  233. case 'Search result':
  234. return $this->feedName . ' - YouTube'; // We already know it's a bridge, right?
  235. default:
  236. return parent::getName();
  237. }
  238. }
  239. }