WordPressBridge.php 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. <?php
  2. class WordPressBridge extends FeedExpander {
  3. const MAINTAINER = 'aledeg';
  4. const NAME = 'Wordpress Bridge';
  5. const URI = 'https://wordpress.org/';
  6. const CACHE_TIMEOUT = 10800; // 3h
  7. const DESCRIPTION = 'Returns the newest full posts of a Wordpress powered website';
  8. const PARAMETERS = array( array(
  9. 'url' => array(
  10. 'name' => 'Blog URL',
  11. 'required' => true
  12. )
  13. ));
  14. private function clearContent($content){
  15. $content = preg_replace('/<script[^>]*>[^<]*<\/script>/', '', $content);
  16. $content = preg_replace('/<div class="wpa".*/', '', $content);
  17. $content = preg_replace('/<form.*\/form>/', '', $content);
  18. return $content;
  19. }
  20. protected function parseItem($newItem){
  21. $item = parent::parseItem($newItem);
  22. $article_html = getSimpleHTMLDOMCached($item['uri']);
  23. $article = null;
  24. switch(true) {
  25. case !is_null($article_html->find('article', 0)):
  26. // most common content div
  27. $article = $article_html->find('article', 0);
  28. break;
  29. case !is_null($article_html->find('.single-content', 0)):
  30. // another common content div
  31. $article = $article_html->find('.single-content', 0);
  32. break;
  33. case !is_null($article_html->find('.post-content', 0)):
  34. // another common content div
  35. $article = $article_html->find('.post-content', 0);
  36. break;
  37. case !is_null($article_html->find('.post', 0)):
  38. // for old WordPress themes without HTML5
  39. $article = $article_html->find('.post', 0);
  40. break;
  41. }
  42. if(!is_null($article)) {
  43. $item['content'] = $this->clearContent($article->innertext);
  44. }
  45. return $item;
  46. }
  47. public function getURI(){
  48. $url = $this->getInput('url');
  49. if(empty($url)) {
  50. $url = parent::getURI();
  51. }
  52. return $url;
  53. }
  54. public function collectData(){
  55. if($this->getInput('url') && substr($this->getInput('url'), 0, strlen('http')) !== 'http') {
  56. // just in case someone find a way to access local files by playing with the url
  57. returnClientError('The url parameter must either refer to http or https protocol.');
  58. }
  59. try{
  60. $this->collectExpandableDatas($this->getURI() . '/feed/atom/');
  61. } catch (HttpException $e) {
  62. $this->collectExpandableDatas($this->getURI() . '/?feed=atom');
  63. }
  64. }
  65. }