DanbooruBridge.php 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. <?php
  2. class DanbooruBridge extends BridgeAbstract {
  3. const MAINTAINER = 'mitsukarenai, logmanoriginal';
  4. const NAME = 'Danbooru';
  5. const URI = 'http://donmai.us/';
  6. const CACHE_TIMEOUT = 1800; // 30min
  7. const DESCRIPTION = 'Returns images from given page';
  8. const PARAMETERS = array(
  9. 'global' => array(
  10. 'p' => array(
  11. 'name' => 'page',
  12. 'defaultValue' => 1,
  13. 'type' => 'number'
  14. ),
  15. 't' => array(
  16. 'name' => 'tags'
  17. )
  18. ),
  19. 0 => array()
  20. );
  21. const PATHTODATA = 'article';
  22. const IDATTRIBUTE = 'data-id';
  23. const TAGATTRIBUTE = 'alt';
  24. protected function getFullURI(){
  25. return $this->getURI()
  26. . 'posts?&page=' . $this->getInput('p')
  27. . '&tags=' . urlencode($this->getInput('t'));
  28. }
  29. protected function getTags($element){
  30. return $element->find('img', 0)->getAttribute(static::TAGATTRIBUTE);
  31. }
  32. protected function getItemFromElement($element){
  33. // Fix links
  34. defaultLinkTo($element, $this->getURI());
  35. $item = array();
  36. $item['uri'] = $element->find('a', 0)->href;
  37. $item['postid'] = (int)preg_replace('/[^0-9]/', '', $element->getAttribute(static::IDATTRIBUTE));
  38. $item['timestamp'] = time();
  39. $thumbnailUri = $element->find('img', 0)->src;
  40. $item['tags'] = $this->getTags($element);
  41. $item['title'] = $this->getName() . ' | ' . $item['postid'];
  42. $item['content'] = '<a href="'
  43. . $item['uri']
  44. . '"><img src="'
  45. . $thumbnailUri
  46. . '" /></a><br>Tags: '
  47. . $item['tags'];
  48. return $item;
  49. }
  50. public function collectData(){
  51. $content = getContents($this->getFullURI())
  52. or returnServerError('Could not request ' . $this->getName());
  53. $html = Fix_Simple_Html_Dom::str_get_html($content);
  54. foreach($html->find(static::PATHTODATA) as $element) {
  55. $this->items[] = $this->getItemFromElement($element);
  56. }
  57. }
  58. }
  59. /**
  60. * This class is a monkey patch to 'extend' simplehtmldom to recognize <source>
  61. * tags (HTML5) as self closing tag. This patch should be removed once
  62. * simplehtmldom was fixed. This seems to be a issue with more tags:
  63. * https://sourceforge.net/p/simplehtmldom/bugs/83/
  64. *
  65. * The tag itself is valid according to Mozilla:
  66. *
  67. * The HTML <picture> element serves as a container for zero or more <source>
  68. * elements and one <img> element to provide versions of an image for different
  69. * display device scenarios. The browser will consider each of the child <source>
  70. * elements and select one corresponding to the best match found; if no matches
  71. * are found among the <source> elements, the file specified by the <img>
  72. * element's src attribute is selected. The selected image is then presented in
  73. * the space occupied by the <img> element.
  74. *
  75. * -- https://developer.mozilla.org/en-US/docs/Web/HTML/Element/picture
  76. *
  77. * Notice: This class uses parts of the original simplehtmldom, adjusted to pass
  78. * the guidelines of RSS-Bridge (formatting)
  79. */
  80. final class Fix_Simple_Html_Dom extends simple_html_dom {
  81. /* copy from simple_html_dom, added 'source' at the end */
  82. protected $self_closing_tags = array(
  83. 'img' => 1,
  84. 'br' => 1,
  85. 'input' => 1,
  86. 'meta' => 1,
  87. 'link' => 1,
  88. 'hr' => 1,
  89. 'base' => 1,
  90. 'embed' => 1,
  91. 'spacer' => 1,
  92. 'source' => 1
  93. );
  94. /* copy from simplehtmldom, changed 'simple_html_dom' to 'Fix_Simple_Html_Dom' */
  95. public static function str_get_html($str,
  96. $lowercase = true,
  97. $forceTagsClosed = true,
  98. $target_charset = DEFAULT_TARGET_CHARSET,
  99. $stripRN = true,
  100. $defaultBRText = DEFAULT_BR_TEXT,
  101. $defaultSpanText = DEFAULT_SPAN_TEXT)
  102. {
  103. $dom = new Fix_Simple_Html_Dom(null,
  104. $lowercase,
  105. $forceTagsClosed,
  106. $target_charset,
  107. $stripRN,
  108. $defaultBRText,
  109. $defaultSpanText);
  110. if (empty($str) || strlen($str) > MAX_FILE_SIZE) {
  111. $dom->clear();
  112. return false;
  113. }
  114. $dom->load($str, $lowercase, $stripRN);
  115. return $dom;
  116. }
  117. }