Explorar o código

af_redditimgur updates:
1. remove special handling for imgur albums/pages because its full on cancerous json garbage now with no scrapeable layout
2. skip all urls leading to reddit.com while trying to embed content
3. implement generic last resort embedding based on meta og:image element

Andrew Dolgov %!s(int64=6) %!d(string=hai) anos
pai
achega
b70c3daffe
Modificáronse 1 ficheiros con 43 adicións e 6 borrados
  1. 43 6
      plugins/af_redditimgur/init.php

+ 43 - 6
plugins/af_redditimgur/init.php

@@ -84,15 +84,16 @@ class Af_RedditImgur extends Plugin {
 		$img_entries = $xpath->query("(//img[@src])");
 		$img_entries = $xpath->query("(//img[@src])");
 
 
 		$found = false;
 		$found = false;
+		//$debug = 1;
 
 
 		foreach ($entries as $entry) {
 		foreach ($entries as $entry) {
-			if ($entry->hasAttribute("href")) {
+			if ($entry->hasAttribute("href") && strpos($entry->getAttribute("href"), "reddit.com") === FALSE) {
 
 
 				_debug("processing href: " . $entry->getAttribute("href"), $debug);
 				_debug("processing href: " . $entry->getAttribute("href"), $debug);
 
 
 				$matches = array();
 				$matches = array();
 
 
-				if (preg_match("/^https?:\/\/twitter.com\/(.*?)\/status\/(.*)/", $entry->getAttribute("href"), $matches)) {
+				if (!$found && preg_match("/^https?:\/\/twitter.com\/(.*?)\/status\/(.*)/", $entry->getAttribute("href"), $matches)) {
 					_debug("handling as twitter: " . $matches[1] . " " . $matches[2], $debug);
 					_debug("handling as twitter: " . $matches[1] . " " . $matches[2], $debug);
 
 
 					$oembed_result = fetch_file_contents("https://publish.twitter.com/oembed?url=" . urlencode($entry->getAttribute("href")));
 					$oembed_result = fetch_file_contents("https://publish.twitter.com/oembed?url=" . urlencode($entry->getAttribute("href")));
@@ -285,7 +286,7 @@ class Af_RedditImgur extends Plugin {
 
 
 				// linked albums & pages
 				// linked albums & pages
 
 
-				if (!$found && preg_match("/^https?:\/\/(m\.)?imgur.com\/([^\.\/]+$)/", $entry->getAttribute("href"), $matches) ||
+				/*if (!$found && preg_match("/^https?:\/\/(m\.)?imgur.com\/([^\.\/]+$)/", $entry->getAttribute("href"), $matches) ||
 					preg_match("/^https?:\/\/(m\.)?imgur.com\/(a|album|gallery)\/[^\.]+$/", $entry->getAttribute("href"), $matches)) {
 					preg_match("/^https?:\/\/(m\.)?imgur.com\/(a|album|gallery)\/[^\.]+$/", $entry->getAttribute("href"), $matches)) {
 
 
 					_debug("Handling as an imgur page/album/gallery", $debug);
 					_debug("Handling as an imgur page/album/gallery", $debug);
@@ -339,7 +340,7 @@ class Af_RedditImgur extends Plugin {
 							if ($debug) print_r($urls);
 							if ($debug) print_r($urls);
 						}
 						}
 					}
 					}
-				}
+				} */
 
 
 				// wtf is this even
 				// wtf is this even
 				if (!$found && preg_match("/^https?:\/\/gyazo\.com\/([^\.\/]+$)/", $entry->getAttribute("href"), $matches)) {
 				if (!$found && preg_match("/^https?:\/\/gyazo\.com\/([^\.\/]+$)/", $entry->getAttribute("href"), $matches)) {
@@ -356,6 +357,41 @@ class Af_RedditImgur extends Plugin {
 
 
 					$found = true;
 					$found = true;
 				}
 				}
+
+				// let's try meta properties
+				if (!$found) {
+					_debug("looking for meta og:image", $debug);
+
+					$content = fetch_file_contents(["url" => $entry->getAttribute("href"),
+						"http_accept" => "text/*"]);
+
+					if ($content) {
+						$cdoc = new DOMDocument();
+
+						if (@$cdoc->loadHTML($content)) {
+							$cxpath = new DOMXPath($cdoc);
+
+							$og_image = $cxpath->query("//meta[@property='og:image']")->item(0);
+
+							if ($og_image) {
+
+								$og_src = $og_image->getAttribute("content");
+
+								if ($og_src) {
+									$img = $doc->createElement('img');
+									$img->setAttribute("src", $og_src);
+
+									$br = $doc->createElement('br');
+									$entry->parentNode->insertBefore($img, $entry);
+									$entry->parentNode->insertBefore($br, $entry);
+
+									$found = true;
+								}
+							}
+						}
+					}
+				}
+
 			}
 			}
 
 
 			// remove tiny thumbnails
 			// remove tiny thumbnails
@@ -534,8 +570,9 @@ class Af_RedditImgur extends Plugin {
 
 
 				if ($content_type && strpos($content_type, "text/html") !== FALSE) {
 				if ($content_type && strpos($content_type, "text/html") !== FALSE) {
 
 
-					$tmp = fetch_file_contents(array("url" => $url,
-						"useragent" => $useragent_compat));
+					$tmp = fetch_file_contents(["url" => $url,
+						"useragent" => $useragent_compat,
+						"http_accept" => "text/html"]);
 
 
 					if ($debug) _debug("tmplen: " . mb_strlen($tmp));
 					if ($debug) _debug("tmplen: " . mb_strlen($tmp));