From 2c954a848df01ec0dde94b4b8a59eb4726d9e92a Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 7 Aug 2016 00:06:14 +0200 Subject: [PATCH 1/7] [Wikipedia] Create bridge for multiple languages --- bridges/WikipediaBridge.php | 176 ++++++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 bridges/WikipediaBridge.php diff --git a/bridges/WikipediaBridge.php b/bridges/WikipediaBridge.php new file mode 100644 index 0000000..e348df1 --- /dev/null +++ b/bridges/WikipediaBridge.php @@ -0,0 +1,176 @@ +maintainer = 'logmanoriginal'; + $this->name = 'Wikipedia bridge for \'Today\'s featured article...\' for many languages'; + $this->uri = 'https://www.wikipedia.org/'; + $this->description = 'Returns \'Today\'s featured article...\' for a language of your choice'; + $this->update = '2016-08-07'; + + $this->parameters[] = + '[ + { + "name": "Language", + "identifier": "language", + "type": "list", + "required": "true", + "title": "Select your language", + "exampleValue": "English", + "values": [ + { + "name": "English", + "value": "en" + }, + { + "name": "German", + "value": "de" + }, + { + "name": "French", + "value": "fr" + }, + { + "name": "Esperanto", + "value": "eo" + } + ] + }, + { + "name": "Load full article", + "identifier": "fullarticle", + "type": "checkbox", + "required": "false", + "title": "Activate to always load the full article", + "exampleValue": "false" + } + ]'; + } + + public function collectData(array $params){ + if(!isset($params['language'])) + $this->returnError('You must specify a valid language via \'&language=\'!', 400); + + if(!$this->CheckLanguageCode(strtolower($params['language']))) + $this->returnError('The language code you provided (\'' . $params['language'] . '\') is not supported!', 400); + + $fullArticle = false; + if(isset($params['fullarticle'])) + $fullArticle = $params['fullarticle'] === 'on' ? true : false; + + // We store the correct URI as URI of this bridge (so it can be used later!) + $this->uri = 'https://' . strtolower($params['language']) . '.wikipedia.org'; + + // While we at it let's also update the name for the feed + $this->name = 'Today\'s featured articles from ' . strtolower($params['language']) . '.wikipedia.org'; + + // This will automatically send us to the correct main page in any language (try it!) + $html = $this->file_get_html($this->uri . '/wiki'); + + if(!$html) + $this->returnError('Could not load site: ' . $this->uri . '!', 404); + + /* + * Now read content depending on the language (make sure to create one function per language!) + * We build the function name automatically, just make sure you create a private function ending + * with your desired language code, where the language code is upper case! (en -> GetContentsEN). + */ + $function = 'GetContents' . strtoupper($params['language']); + + if(!method_exists($this, $function)) + $this->returnError('A function to get the contents for your langauage is missing (\'' . $function . '\')!', 501); + + /* + * The method takes care of creating all items. + */ + $this->$function($html, $fullArticle); + } + + /** + * Returns true if the language code is part of the parameters list + */ + private function CheckLanguageCode($languageCode){ + $parameter = json_decode($this->parameters[0], true); + $languages = $parameter[0]['values']; + + $language_names = array(); + + foreach($languages as $language) + $language_names[] = $language['value']; + + return in_array($languageCode, $language_names); + } + + /* + * Adds a new item to $items using a generic operation (should work for most (all?) wikis) + */ + private function AddElementGeneric($element, $fullArticle){ + // Clean the bottom of the featured article + $element->find('div', -1)->outertext = ''; + + $item = new \Item(); + $item->uri = $this->uri . $element->find('p', 0)->find('a', 0)->href; + $item->title = $element->find('p', 0)->find('a', 0)->title; + + if(!$fullArticle) + $item->content = strip_tags(str_replace('href="/', 'href="' . $this->uri . '/', $element->innertext), '


'); + else + $item->content = $this->LoadFullArticle($item->uri); + + $this->items[] = $item; + } + + /** + * Loads the full article from a given URI + */ + private function LoadFullArticle($uri){ + $content_html = $this->file_get_html($uri); + + if(!$content_html) + $this->returnError('Could not load site: ' . $uri . '!', 404); + + $content = $content_html->find('#mw-content-text', 0); + + if(!$content) + $this->returnError('Could not find content in page: ' . $uri . '!', 404); + + // Let's remove a couple of things from the article + $content->find('#toc', 0)->outertext = ''; // 'Contents' table + + foreach($content->find('ol.references') as $reference) // References + $reference->outertext = ''; + + return str_replace('href="/', 'href="' . $this->uri . '/', $content->innertext); + } + + /** + * Implementation for de.wikipedia.org + */ + private function GetContentsDE($html, $fullArticle){ + $element = $html->find('div[id=mf-tfa]', 0); + $this->AddElementGeneric($element, $fullArticle); + } + + /** + * Implementation for fr.wikipedia.org + */ + private function GetContentsFR($html, $fullArticle){ + $element = $html->find('div[id=accueil-lumieresur]', 0); + $this->AddElementGeneric($element, $fullArticle); + } + + /** + * Implementation for en.wikipedia.org + */ + private function GetContentsEN($html, $fullArticle){ + $element = $html->find('div[id=mp-tfa]', 0); + $this->AddElementGeneric($element, $fullArticle); + } + + /** + * Implementation for eo.wikipedia.org + */ + private function GetContentsEO($html, $fullArticle){ + $element = $html->find('div[id=mf-artikolo-de-la-semajno]', 0); + $this->AddElementGeneric($element, $fullArticle); + } +} From d4435e0b13f706cdd3a84b7ccd17e46ad44cc2bd Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 7 Aug 2016 00:07:28 +0200 Subject: [PATCH 2/7] [Wikipedia] Remove other bridges All previous bridges are now completely covered by the new WikipediaBridge! --- bridges/WikipediaDEBridge.php | 48 ----------------------------------- bridges/WikipediaENBridge.php | 44 -------------------------------- bridges/WikipediaEOBridge.php | 44 -------------------------------- bridges/WikipediaFRBridge.php | 46 --------------------------------- 4 files changed, 182 deletions(-) delete mode 100644 bridges/WikipediaDEBridge.php delete mode 100644 bridges/WikipediaENBridge.php delete mode 100644 bridges/WikipediaEOBridge.php delete mode 100644 bridges/WikipediaFRBridge.php diff --git a/bridges/WikipediaDEBridge.php b/bridges/WikipediaDEBridge.php deleted file mode 100644 index 5bb974a..0000000 --- a/bridges/WikipediaDEBridge.php +++ /dev/null @@ -1,48 +0,0 @@ -maintainer = "cnlpete"; - $this->name = "Wikipedia DE Today's Featured Article..."; - $this->uri = "https://de.wikipedia.org/"; - $this->description = "Returns the highlighted en.wikipedia.org article."; - $this->update = "2015-11-04"; - - } - - public function collectData(array $param){ - $html = ''; - $host = 'http://de.wikipedia.org'; - // If you want HTTPS access instead, uncomment the following line: - //$host = 'https://de.wikipedia.org'; - $link = '/wiki/Wikipedia:Hauptseite'; - - $html = $this->file_get_html($host.$link) or $this->returnError('Could not request Wikipedia DE.', 404); - - $element = $html->find('div[id=mf-tfa]', 0); - $element->find('div', -1)->outertext = ''; - - $item = new \Item(); - $item->uri = $host.$element->find('p', 0)->find('a', 0)->href; - $item->title = $element->find('p',0)->find('a',0)->title; - - $html2 = $this->file_get_html($item->uri) or $this->returnError('Could not request Wikipedia DE '.$item->title.'.', 404); - $element2 = $html2->find('div[id=mw-content-text]', 0); - $item->content = str_replace('href="/', 'href="'.$host.'/', $element2->innertext); - - $this->items[] = $item; - } - - public function getName(){ - return 'Wikipedia DE "Today\'s Featured Article"'; - } - - public function getURI(){ - return 'https://de.wikipedia.org/wiki/Wikipedia:Hauptseite'; - } - - public function getCacheDuration(){ - return 3600*8; // 8 hours - } -} diff --git a/bridges/WikipediaENBridge.php b/bridges/WikipediaENBridge.php deleted file mode 100644 index 37f1f57..0000000 --- a/bridges/WikipediaENBridge.php +++ /dev/null @@ -1,44 +0,0 @@ -maintainer = "gsurrel"; - $this->name = "Wikipedia EN 'Today's Featured Article...'"; - $this->uri = "https://en.wikipedia.org/"; - $this->description = "Returns the highlighted en.wikipedia.org article."; - $this->update = "2014-05-25"; - - } - - public function collectData(array $param){ - $html = ''; - $host = 'http://en.wikipedia.org'; - // If you want HTTPS access instead, uncomment the following line: - //$host = 'https://en.wikipedia.org'; - $link = '/wiki/Main_Page'; - - $html = $this->file_get_html($host.$link) or $this->returnError('Could not request Wikipedia EN.', 404); - - $element = $html->find('div[id=mp-tfa]', 0); - // Clean the bottom of the featured article - $element->find('div', -1)->outertext = ''; - $item = new \Item(); - $item->uri = $host.$element->find('p', 0)->find('a', 0)->href; - $item->title = $element->find('p',0)->find('a',0)->title; - $item->content = str_replace('href="/', 'href="'.$host.'/', $element->innertext); - $this->items[] = $item; - } - - public function getName(){ - return 'Wikipedia EN "Today\'s Featued Article"'; - } - - public function getURI(){ - return 'https://en.wikipedia.org/wiki/Main_Page'; - } - - public function getCacheDuration(){ - return 3600*4; // 4 hours - } -} diff --git a/bridges/WikipediaEOBridge.php b/bridges/WikipediaEOBridge.php deleted file mode 100644 index 4ca8864..0000000 --- a/bridges/WikipediaEOBridge.php +++ /dev/null @@ -1,44 +0,0 @@ -maintainer = "gsurrel"; - $this->name = "Wikipedia EO 'Artikolo de la semajno'"; - $this->uri = "https://eo.wikipedia.org/"; - $this->description = "Returns the highlighted eo.wikipedia.org article."; - $this->update = "2014-05-25"; - - } - - public function collectData(array $param){ - $html = ''; - $host = 'http://eo.wikipedia.org'; - // If you want HTTPS access instead, uncomment the following line: - //$host = 'https://eo.wikipedia.org'; - $link = '/wiki/Vikipedio:%C4%88efpa%C4%9Do'; - - $html = $this->file_get_html($host.$link) or $this->returnError('Could not request Wikipedia EO.', 404); - - $element = $html->find('div[id=mf-artikolo-de-la-semajno]', 0); - // Link to article - $link = $element->find('p', 3)->find('a', 0); - $item = new \Item(); - $item->uri = $host.$link->href; - $item->title = $element->find('p',0)->find('i',0)->innertext; - $item->content = str_replace('href="/', 'href="'.$host.'/', $element->innertext); - $this->items[] = $item; - } - - public function getName(){ - return 'Wikipedia EO "Artikolo de la semajno"'; - } - - public function getURI(){ - return 'https://eo.wikipedia.org/wiki/Vikipedio:%C4%88efpa%C4%9Do'; - } - - public function getCacheDuration(){ - return 3600*12; // 12 hours - } -} diff --git a/bridges/WikipediaFRBridge.php b/bridges/WikipediaFRBridge.php deleted file mode 100644 index 293d7c9..0000000 --- a/bridges/WikipediaFRBridge.php +++ /dev/null @@ -1,46 +0,0 @@ -maintainer = "gsurrel"; - $this->name = "Wikipedia FR 'Lumière sur...'"; - $this->uri = "https://fr.wikipedia.org/"; - $this->description = "Returns the highlighted fr.wikipedia.org article."; - $this->update = "2016-06-04"; - - } - - public function collectData(array $param){ - $html = ''; - $host = 'http://fr.wikipedia.org'; - // If you want HTTPS access instead, uncomment the following line: - //$host = 'https://fr.wikipedia.org'; - $link = '/wiki/Wikip%C3%A9dia:Accueil_principal'; - - $html = $this->file_get_html($host.$link) or $this->returnError('Could not request Wikipedia FR.', 404); - - $element = $html->find('div[id=mf-lumieresur]', 0); - # Use the "Lire la suite" link to dependably get the title of the article - # usually it's a child of a li.BA element (Bon article) - # occasionally it's a li.AdQ (Article de qualité) - $lirelasuite_link = $element->find('.BA > i > a, .AdQ > i > a', 0); - $item = new \Item(); - $item->uri = $host.$lirelasuite_link->href; - $item->title = $lirelasuite_link->title; - $item->content = str_replace('href="/', 'href="'.$host.'/', $element->innertext); - $this->items[] = $item; - } - - public function getName(){ - return 'Wikipedia FR "Lumière sur..."'; - } - - public function getURI(){ - return 'https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Accueil_principal'; - } - - public function getCacheDuration(){ - return 3600*4; // 4 hours - } -} From 0b923ab76a78c2d660d9521289c19d4506f94d6b Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 7 Aug 2016 11:26:45 +0200 Subject: [PATCH 3/7] [Wikipedia] Improve title search algorithm --- bridges/WikipediaBridge.php | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/bridges/WikipediaBridge.php b/bridges/WikipediaBridge.php index e348df1..c69b9b1 100644 --- a/bridges/WikipediaBridge.php +++ b/bridges/WikipediaBridge.php @@ -107,9 +107,18 @@ class WikipediaBridge extends BridgeAbstract{ // Clean the bottom of the featured article $element->find('div', -1)->outertext = ''; + // The title and URI of the article is best defined in an anchor containint the string '...' ('full article ...') + $target = $element->find('p/a', 0); // We'll use the first anchor as fallback + foreach($element->find('//a') as $anchor){ + if(strpos($anchor->innertext, '...') !== false){ + $target = $anchor; + break; + } + } + $item = new \Item(); - $item->uri = $this->uri . $element->find('p', 0)->find('a', 0)->href; - $item->title = $element->find('p', 0)->find('a', 0)->title; + $item->uri = $this->uri . $target->href; + $item->title = $target->title; if(!$fullArticle) $item->content = strip_tags(str_replace('href="/', 'href="' . $this->uri . '/', $element->innertext), '


'); From 17c9d8fc3713b0d2c828a2e55113c4c68064f8bb Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 7 Aug 2016 11:29:00 +0200 Subject: [PATCH 4/7] [Wikipedia] Fix typo --- bridges/WikipediaBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/WikipediaBridge.php b/bridges/WikipediaBridge.php index c69b9b1..005a0a9 100644 --- a/bridges/WikipediaBridge.php +++ b/bridges/WikipediaBridge.php @@ -61,7 +61,7 @@ class WikipediaBridge extends BridgeAbstract{ $this->uri = 'https://' . strtolower($params['language']) . '.wikipedia.org'; // While we at it let's also update the name for the feed - $this->name = 'Today\'s featured articles from ' . strtolower($params['language']) . '.wikipedia.org'; + $this->name = 'Today\'s featured article from ' . strtolower($params['language']) . '.wikipedia.org'; // This will automatically send us to the correct main page in any language (try it!) $html = $this->file_get_html($this->uri . '/wiki'); From 04b4dd1db986ec6abee9609af8ac6c5c5c6fa553 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 7 Aug 2016 12:51:09 +0200 Subject: [PATCH 5/7] [Wikipedia] Add support for 'Did you know?' articles --- bridges/WikipediaBridge.php | 155 +++++++++++++++++++++++++++++++----- 1 file changed, 136 insertions(+), 19 deletions(-) diff --git a/bridges/WikipediaBridge.php b/bridges/WikipediaBridge.php index 005a0a9..8434c8e 100644 --- a/bridges/WikipediaBridge.php +++ b/bridges/WikipediaBridge.php @@ -1,10 +1,14 @@ maintainer = 'logmanoriginal'; - $this->name = 'Wikipedia bridge for \'Today\'s featured article...\' for many languages'; + $this->name = 'Wikipedia bridge for many languages'; $this->uri = 'https://www.wikipedia.org/'; - $this->description = 'Returns \'Today\'s featured article...\' for a language of your choice'; + $this->description = 'Returns articles for a language of your choice'; $this->update = '2016-08-07'; $this->parameters[] = @@ -35,6 +39,24 @@ class WikipediaBridge extends BridgeAbstract{ } ] }, + { + "name": "Subject", + "identifier": "subject", + "type": "list", + "required": "true", + "title": "What subject are you interested in?", + "exampleValue": "Today\'s featured article", + "values": [ + { + "name": "Today\'s featured article", + "value": "tfa" + }, + { + "name": "Did you know...", + "value": "dyk" + } + ] + }, { "name": "Load full article", "identifier": "fullarticle", @@ -53,6 +75,22 @@ class WikipediaBridge extends BridgeAbstract{ if(!$this->CheckLanguageCode(strtolower($params['language']))) $this->returnError('The language code you provided (\'' . $params['language'] . '\') is not supported!', 400); + if(!isset($params['subject'])) + $this->returnError('You must specify a valid subject via \'&subject=\'!', 400); + + $subject = WIKIPEDIA_SUBJECT_TFA; + switch($params['subject']){ + case 'tfa': + $subject = WIKIPEDIA_SUBJECT_TFA; + break; + case 'dyk': + $subject = WIKIPEDIA_SUBJECT_DYK; + break; + default: + $subject = WIKIPEDIA_SUBJECT_TFA; + break; + } + $fullArticle = false; if(isset($params['fullarticle'])) $fullArticle = $params['fullarticle'] === 'on' ? true : false; @@ -61,7 +99,17 @@ class WikipediaBridge extends BridgeAbstract{ $this->uri = 'https://' . strtolower($params['language']) . '.wikipedia.org'; // While we at it let's also update the name for the feed - $this->name = 'Today\'s featured article from ' . strtolower($params['language']) . '.wikipedia.org'; + switch($subject){ + case WIKIPEDIA_SUBJECT_TFA: + $this->name = 'Today\'s featured article from ' . strtolower($params['language']) . '.wikipedia.org'; + break; + case WIKIPEDIA_SUBJECT_DYK: + $this->name = 'Did you know? - articles from ' . strtolower($params['language']) . '.wikipedia.org'; + break; + default: + $this->name = 'Articles from ' . strtolower($params['language']) . '.wikipedia.org'; + break; + } // This will automatically send us to the correct main page in any language (try it!) $html = $this->file_get_html($this->uri . '/wiki'); @@ -82,7 +130,7 @@ class WikipediaBridge extends BridgeAbstract{ /* * The method takes care of creating all items. */ - $this->$function($html, $fullArticle); + $this->$function($html, $subject, $fullArticle); } /** @@ -100,10 +148,19 @@ class WikipediaBridge extends BridgeAbstract{ return in_array($languageCode, $language_names); } + /** + * Replaces all relative URIs with absolute ones + * @param $element A simplehtmldom element + * @return The $element->innertext with all URIs replaced + */ + private function ReplaceURIInHTMLElement($element){ + return str_replace('href="/', 'href="' . $this->uri . '/', $element->innertext); + } + /* * Adds a new item to $items using a generic operation (should work for most (all?) wikis) */ - private function AddElementGeneric($element, $fullArticle){ + private function AddTodaysFeaturedArticleGeneric($element, $fullArticle){ // Clean the bottom of the featured article $element->find('div', -1)->outertext = ''; @@ -121,13 +178,31 @@ class WikipediaBridge extends BridgeAbstract{ $item->title = $target->title; if(!$fullArticle) - $item->content = strip_tags(str_replace('href="/', 'href="' . $this->uri . '/', $element->innertext), '


'); + $item->content = strip_tags($this->ReplaceURIInHTMLElement($element), '


'); else $item->content = $this->LoadFullArticle($item->uri); $this->items[] = $item; } + /* + * Adds a new item to $items using a generic operation (should work for most (all?) wikis) + */ + private function AddDidYouKnowGeneric($element, $fullArticle){ + foreach($element->find('ul', 0)->find('li') as $entry){ + $item = new \Item(); + $item->uri = $this->uri . $entry->find('a', 0)->href; + $item->title = $entry->find('a', 0)->title; + + if(!$fullArticle) + $item->content = $this->ReplaceURIInHTMLElement($entry); + else + $item->content = $this->LoadFullArticle($item->uri); + + $this->items[] = $item; + } + } + /** * Loads the full article from a given URI */ @@ -143,7 +218,9 @@ class WikipediaBridge extends BridgeAbstract{ $this->returnError('Could not find content in page: ' . $uri . '!', 404); // Let's remove a couple of things from the article - $content->find('#toc', 0)->outertext = ''; // 'Contents' table + $table = $content->find('#toc', 0); // Table of contents + if(!$table === false) + $table->outertext = ''; foreach($content->find('ol.references') as $reference) // References $reference->outertext = ''; @@ -154,32 +231,72 @@ class WikipediaBridge extends BridgeAbstract{ /** * Implementation for de.wikipedia.org */ - private function GetContentsDE($html, $fullArticle){ - $element = $html->find('div[id=mf-tfa]', 0); - $this->AddElementGeneric($element, $fullArticle); + private function GetContentsDE($html, $subject, $fullArticle){ + switch($subject){ + case WIKIPEDIA_SUBJECT_TFA: + $element = $html->find('div[id=mf-tfa]', 0); + $this->AddTodaysFeaturedArticleGeneric($element, $fullArticle); + break; + case WIKIPEDIA_SUBJECT_DYK: + $element = $html->find('div[id=mf-dyk]', 0); + $this->AddDidYouKnowGeneric($element, $fullArticle); + break; + default: + break; + } } /** * Implementation for fr.wikipedia.org */ - private function GetContentsFR($html, $fullArticle){ - $element = $html->find('div[id=accueil-lumieresur]', 0); - $this->AddElementGeneric($element, $fullArticle); + private function GetContentsFR($html, $subject, $fullArticle){ + switch($subject){ + case WIKIPEDIA_SUBJECT_TFA: + $element = $html->find('div[id=accueil-lumieresur]', 0); + $this->AddTodaysFeaturedArticleGeneric($element, $fullArticle); + break; + case WIKIPEDIA_SUBJECT_DYK: + $element = $html->find('div[id=SaviezVous]', 0); + $this->AddDidYouKnowGeneric($element, $fullArticle); + break; + default: + break; + } } /** * Implementation for en.wikipedia.org */ - private function GetContentsEN($html, $fullArticle){ - $element = $html->find('div[id=mp-tfa]', 0); - $this->AddElementGeneric($element, $fullArticle); + private function GetContentsEN($html, $subject, $fullArticle){ + switch($subject){ + case WIKIPEDIA_SUBJECT_TFA: + $element = $html->find('div[id=mp-tfa]', 0); + $this->AddTodaysFeaturedArticleGeneric($element, $fullArticle); + break; + case WIKIPEDIA_SUBJECT_DYK: + $element = $html->find('div[id=mp-dyk]', 0); + $this->AddDidYouKnowGeneric($element, $fullArticle); + break; + default: + break; + } } /** * Implementation for eo.wikipedia.org */ - private function GetContentsEO($html, $fullArticle){ - $element = $html->find('div[id=mf-artikolo-de-la-semajno]', 0); - $this->AddElementGeneric($element, $fullArticle); + private function GetContentsEO($html, $subject, $fullArticle){ + switch($subject){ + case WIKIPEDIA_SUBJECT_TFA: + $element = $html->find('div[id=mf-artikolo-de-la-semajno]', 0); + $this->AddTodaysFeaturedArticleGeneric($element, $fullArticle); + break; + case WIKIPEDIA_SUBJECT_DYK: + $element = $html->find('div[id=mw-content-text]', 0)->find('table', 4)->find('td', 4); + $this->AddDidYouKnowGeneric($element, $fullArticle); + break; + default: + break; + } } } From 1cfb8c02a07681e3f10459b085c00e6304602f33 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 7 Aug 2016 13:02:10 +0200 Subject: [PATCH 6/7] [Wikipedia] Use entire entry as title for 'Did you know' --- bridges/WikipediaBridge.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bridges/WikipediaBridge.php b/bridges/WikipediaBridge.php index 8434c8e..535b11a 100644 --- a/bridges/WikipediaBridge.php +++ b/bridges/WikipediaBridge.php @@ -191,8 +191,10 @@ class WikipediaBridge extends BridgeAbstract{ private function AddDidYouKnowGeneric($element, $fullArticle){ foreach($element->find('ul', 0)->find('li') as $entry){ $item = new \Item(); + + // We can only use the first anchor, there is no way of finding the 'correct' one if there are multiple $item->uri = $this->uri . $entry->find('a', 0)->href; - $item->title = $entry->find('a', 0)->title; + $item->title = strip_tags($entry->innertext); if(!$fullArticle) $item->content = $this->ReplaceURIInHTMLElement($entry); From 13d42746d79a9d9257609258eec2f05434a574ea Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 7 Aug 2016 13:26:30 +0200 Subject: [PATCH 7/7] [Wikipedia] Fix typo --- bridges/WikipediaBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/WikipediaBridge.php b/bridges/WikipediaBridge.php index 535b11a..5fb48f9 100644 --- a/bridges/WikipediaBridge.php +++ b/bridges/WikipediaBridge.php @@ -1,6 +1,6 @@