Kaynağa Gözat

microsoft quotes and UTF8 encoding fixed for

Developpez.com
Paul Vayssiere 10 yıl önce
ebeveyn
işleme
2dd8f789fc
1 değiştirilmiş dosya ile 23 ekleme ve 5 silme
  1. 23 5
      bridges/DeveloppezDotComBridge.php

+ 23 - 5
bridges/DeveloppezDotComBridge.php

@@ -19,23 +19,41 @@ class DeveloppezDotComBridge extends BridgeAbstract{
             return $string;
         }
 
+        function convert_smart_quotes($string)//F***ing quotes from Microsoft Word badly encoded, here was the trick: http://stackoverflow.com/questions/1262038/how-to-replace-microsoft-encoded-quotes-in-php
+        {
+            $search = array(chr(145),
+                            chr(146),
+                            chr(147),
+                            chr(148),
+                            chr(151));
+
+            $replace = array("'",
+                             "'",
+                             '"',
+                             '"',
+                             '-');
+
+            return str_replace($search, $replace, $string);
+        }
+
         function DeveloppezDotComExtractContent($url) {
             $articleHTMLContent = file_get_html($url);
-            $text = $text.$articleHTMLContent->find('div.content', 0)->innertext;
-            $text = strip_tags($text, '<p><b><a><blockquote><img><em><br/><br><ul><li>');
-            return $text;
+            $text = convert_smart_quotes($articleHTMLContent->find('div.content', 0)->innertext);
+            $text = utf8_encode($text);
+            return trim($text);
         }
 
         $rssFeed = file_get_html('http://www.developpez.com/index/rss') or $this->returnError('Could not request http://www.developpez.com/index/rss', 404);
     	$limit = 0;
 
     	foreach($rssFeed->find('item') as $element) {
-            if($limit < 15) {
+            if($limit < 2) {
                 $item = new \Item();
                 $item->title = DeveloppezDotComStripCDATA($element->find('title', 0)->innertext);
                 $item->uri = DeveloppezDotComStripCDATA($element->find('guid', 0)->plaintext);
                 $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
-                $item->content = DeveloppezDotComExtractContent($item->uri);
+                $content = DeveloppezDotComExtractContent($item->uri);
+                $item->content = strlen($content) ? $content : $element->description;//In case of it is a tutorial, we just keep the original description
                 $this->items[] = $item;
                 $limit++;
             }