FilterMoreBridge.php 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. <?php
  2. class FilterMoreBridge extends FeedExpander {
  3. const MAINTAINER = 'boyska';
  4. const NAME = 'FilterMore';
  5. const CACHE_TIMEOUT = 2;
  6. const DESCRIPTION = 'Filters a feed of your choice';
  7. const URI = 'https://git.lattuta.net/boyska/rss-bridge';
  8. const PARAMETERS = array(array(
  9. 'url' => array(
  10. 'name' => 'Feed URL',
  11. 'required' => true,
  12. ),
  13. 'conj_type' => array(
  14. 'name' => 'Conjunction type type',
  15. 'type' => 'list',
  16. 'required' => false,
  17. 'values' => array(
  18. 'All conditions must be met' => 'and',
  19. 'Any condition must be met' => 'or',
  20. ),
  21. 'defaultValue' => 'permit',
  22. ),
  23. 'title_re' => array(
  24. 'name' => 'Filter item title (regular expression, see php.net/pcre_match for details)',
  25. 'required' => false,
  26. 'exampleValue' => '/breaking\ news/i',
  27. ),
  28. 'body_re' => array(
  29. 'name' => 'Filter body (regular expression)',
  30. 'required' => false,
  31. ),
  32. 'author_re' => array(
  33. 'name' => 'Filter author (regular expression)',
  34. 'required' => false,
  35. 'exampleValue' => '/(technology|politics)/i',
  36. ),
  37. 'newer_than' => array(
  38. 'name' => 'Filter date: ok if newer than the value (see php.net/strtotime for details)',
  39. 'required' => false,
  40. 'exampleValue' => '-14 days',
  41. ),
  42. 'older_than' => array(
  43. 'name' => 'Filter date: ok if older than the value (see php.net/strtotime for details)',
  44. 'required' => false,
  45. 'exampleValue' => '-1 hour',
  46. ),
  47. 'has_media' => array(
  48. 'name' => 'Has at least 1 media inside',
  49. 'type' => 'checkbox',
  50. 'required' => false,
  51. 'defaultValue' => false,
  52. ),
  53. 'invert_filter' => array(
  54. 'name' => 'Invert filter result',
  55. 'type' => 'checkbox',
  56. 'required' => false,
  57. 'defaultValue' => false,
  58. ),
  59. ));
  60. protected function parseItem($newItem){
  61. $item = parent::parseItem($newItem);
  62. $item['enclosures'] = [];
  63. if(isset($newItem->enclosure)) {
  64. foreach($newItem->enclosure as $encl) {
  65. $serialized = [];
  66. foreach($encl->attributes() as $key => $value) {
  67. $serialized[$key] = (string)$value;
  68. }
  69. $serialized["length"] = intval($serialized["length"]);
  70. $item['enclosures'][] = $serialized;
  71. }
  72. }
  73. if(isset($newItem->link)) {
  74. foreach($newItem->link as $el) {
  75. if(((string)$el['rel']) !== 'enclosure') continue;
  76. $serialized = [];
  77. $serialized['url'] = (string)$el['href'];
  78. $item['enclosures'][] = $serialized;
  79. }
  80. }
  81. $filters = ['filterByTitle', 'filterByBody', 'filterByAuthor', 'filterByDateNewer', 'filterByDateOlder', 'filterByMedia'];
  82. $results = [];
  83. foreach($filters as $filter) {
  84. $filter_res = $this->$filter($item);
  85. if($filter_res === null) continue;
  86. $results[] = $filter_res;
  87. }
  88. $old_enclosures = $item['enclosures'];
  89. $item['enclosures'] = [];
  90. foreach($old_enclosures as $e) {
  91. $item['enclosures'][] = $e['url'];
  92. }
  93. if(count($results) === 0) {
  94. return $item;
  95. }
  96. if($this->getConjType() === 'and') {
  97. $result = !in_array(false, $results);
  98. } else { // or
  99. $result = in_array(true, $results);
  100. }
  101. if($this->getInvertResult()) {
  102. $result = !$result;
  103. }
  104. if($result)
  105. return $item;
  106. else
  107. return null;
  108. }
  109. private function cmp($a, $b) {
  110. if($a > $b) return 1;
  111. if($a < $b) return -1;
  112. return 0;
  113. }
  114. private function filterByFieldRegexp($field, $re){
  115. if($re === "") return null;
  116. if(preg_match($re, $field)) {
  117. return true;
  118. }
  119. return false;
  120. }
  121. protected function filterByTitle($item){
  122. $re = $this->getInput('title_re');
  123. return $this->filterByFieldRegexp($item['title'], $re);
  124. }
  125. protected function filterByBody($item){
  126. $re = $this->getInput('body_re');
  127. return $this->filterByFieldRegexp($item['content'], $re);
  128. }
  129. protected function filterByAuthor($item){
  130. $re = $this->getInput('author_re');
  131. return $this->filterByFieldRegexp($item['author'], $re);
  132. }
  133. private function filterByDate($item, $input, $expected){
  134. $val = $this->getInput($input);
  135. if($val === "") return null;
  136. $ts = strtotime($val);
  137. if($ts === false) {
  138. throw new Exception("Invalid time specification: " . $val);
  139. }
  140. $cmp = $this->cmp($item['timestamp'], $ts); // 1 if newer, -1 if older
  141. return $cmp === $expected;
  142. }
  143. protected function filterByDateNewer($item){
  144. return $this->filterByDate($item, 'newer_than', 1);
  145. }
  146. protected function filterByDateOlder($item){
  147. return $this->filterByDate($item, 'older_than', -1);
  148. }
  149. protected function filterByMedia($item) {
  150. if(!$this->getInput('has_media')) return null;
  151. if(count($item['enclosures']) > 0) return true;
  152. return false;
  153. }
  154. protected function getConjType(){
  155. return $this->getInput('conj_type');
  156. }
  157. protected function getInvertResult(){
  158. return $this->getInput('invert_filter');
  159. }
  160. public function getURI(){
  161. $url = $this->getInput('url');
  162. if(empty($url)) {
  163. $url = parent::getURI();
  164. }
  165. return $url;
  166. }
  167. public function collectData(){
  168. if($this->getInput('url') && substr($this->getInput('url'), 0, strlen('http')) !== 'http') {
  169. // just in case someone find a way to access local files by playing with the url
  170. returnClientError('The url parameter must either refer to http or https protocol.');
  171. }
  172. try{
  173. $this->collectExpandableDatas($this->getURI());
  174. } catch (HttpException $e) {
  175. $this->collectExpandableDatas($this->getURI());
  176. }
  177. }
  178. }