Browse Source

[YoutubeBridge] Fix parsing author name breaks the bridge

The author name is parsed by searching a string within the entire
HTML document:

$author = $html->innertext;
$author = substr($author, strpos($author, '"author=') + 8);
$author = substr($author, 0, strpos($author, '\u0026'));

This solution will return big portions of the HTML document if
the strpos function returns zero (not found).

This commit replaces the previous implementation by searching for
a specific script tag and making use of the JSON data inside it.

References #580
logmanoriginal 6 years ago
parent
commit
4f6277b6b5
1 changed files with 13 additions and 3 deletions
  1. 13 3
      bridges/YoutubeBridge.php

+ 13 - 3
bridges/YoutubeBridge.php

@@ -56,9 +56,19 @@ class YoutubeBridge extends BridgeAbstract {
 			return;
 		}
 
-		$author = $html->innertext;
-		$author = substr($author, strpos($author, '"author=') + 8);
-		$author = substr($author, 0, strpos($author, '\u0026'));
+		foreach($html->find('script') as $script){
+			$data = trim($script->innertext);
+
+			if(strpos($data, '{') !== 0)
+				continue; // Wrong script
+
+			$json = json_decode($data);
+
+			if(!isset($json->itemListElement))
+				continue; // Wrong script
+
+			$author = $json->itemListElement[0]->item->name;
+		}
 
 		if(!is_null($html->find('div#watch-description-text', 0)))
 			$desc = $html->find('div#watch-description-text', 0)->innertext;