add af_* plugin workaround for already processed article

This commit is contained in:
Andrew Dolgov 2013-02-23 12:07:46 +04:00
parent 389859b5d4
commit e02555c1b8
6 changed files with 128 additions and 108 deletions

View file

@ -516,14 +516,20 @@
_debug("update_rss_feed: applying plugin filters..");
}
// Todo unify with id checking below
$result = db_query($link, "SELECT plugin_data FROM ttrss_entries
WHERE guid = '".db_escape_string($entry_guid)."'");
// FIXME not sure if owner_uid is a good idea here, we may have a base entry without user entry (?)
$result = db_query($link, "SELECT plugin_data,title,content,link,tag_cache,author FROM ttrss_entries, ttrss_user_entries
WHERE ref_id = id AND guid = '".db_escape_string($entry_guid)."' AND owner_uid = $owner_uid");
if (db_num_rows($result) != 0) {
$entry_plugin_data = db_fetch_result($result, 0, "plugin_data");
$stored_article = array("title" => db_fetch_result($result, 0, "title"),
"content" => db_fetch_result($result, 0, "content"),
"link" => db_fetch_result($result, 0, "link"),
"tags" => explode(",", db_fetch_result($result, 0, "tag_cache")),
"author" => db_fetch_result($result, 0, "author"));
} else {
$entry_plugin_data = "";
$stored_article = array();
}
$article = array("owner_uid" => $owner_uid, // read only
@ -533,7 +539,8 @@
"link" => $entry_link,
"tags" => $entry_tags,
"plugin_data" => $entry_plugin_data,
"author" => $entry_author);
"author" => $entry_author,
"stored" => $stored_article);
foreach ($pluginhost->get_hooks($pluginhost::HOOK_ARTICLE_FILTER) as $plugin) {
$article = $plugin->hook_article_filter($article);

View file

@ -20,35 +20,37 @@ class Af_Buttersafe extends Plugin {
function hook_article_filter($article) {
$owner_uid = $article["owner_uid"];
if (strpos($article["guid"], "buttersafe.com") !== FALSE &&
strpos($article["plugin_data"], "buttersafe,$owner_uid:") === FALSE) {
if (strpos($article["guid"], "buttersafe.com") !== FALSE) {
if (strpos($article["plugin_data"], "buttersafe,$owner_uid:") === FALSE) {
$doc = new DOMDocument();
@$doc->loadHTML(fetch_file_contents($article["link"]));
$doc = new DOMDocument();
@$doc->loadHTML(fetch_file_contents($article["link"]));
$basenode = false;
$basenode = false;
if ($doc) {
$xpath = new DOMXPath($doc);
$entries = $xpath->query('(//img[@src])');
if ($doc) {
$xpath = new DOMXPath($doc);
$entries = $xpath->query('(//img[@src])');
$matches = array();
$matches = array();
foreach ($entries as $entry) {
foreach ($entries as $entry) {
if (preg_match("/(http:\/\/buttersafe.com\/comics\/\d{4}.*)/i", $entry->getAttribute("src"), $matches)) {
if (preg_match("/(http:\/\/buttersafe.com\/comics\/\d{4}.*)/i", $entry->getAttribute("src"), $matches)) {
$basenode = $entry;
break;
$basenode = $entry;
break;
}
}
if ($basenode) {
$article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG);
$article["plugin_data"] = "buttersafe,$owner_uid:" . $article["plugin_data"];
}
}
if ($basenode) {
$article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG);
}
} else if (isset($article["stored"]["content"])) {
$article["content"] = $article["stored"]["content"];
}
$article["plugin_data"] = "buttersafe,$owner_uid:" . $article["plugin_data"];
}
return $article;

View file

@ -20,35 +20,37 @@ class Af_Explosm extends Plugin {
function hook_article_filter($article) {
$owner_uid = $article["owner_uid"];
if (strpos($article["link"], "explosm.net/comics") !== FALSE &&
strpos($article["plugin_data"], "explosm,$owner_uid:") === FALSE) {
if (strpos($article["link"], "explosm.net/comics") !== FALSE) {
if (strpos($article["plugin_data"], "explosm,$owner_uid:") === FALSE) {
$doc = new DOMDocument();
@$doc->loadHTML(fetch_file_contents($article["link"]));
$doc = new DOMDocument();
@$doc->loadHTML(fetch_file_contents($article["link"]));
$basenode = false;
$basenode = false;
if ($doc) {
$xpath = new DOMXPath($doc);
$entries = $xpath->query('(//img[@src])'); // we might also check for img[@class='strip'] I guess...
if ($doc) {
$xpath = new DOMXPath($doc);
$entries = $xpath->query('(//img[@src])'); // we might also check for img[@class='strip'] I guess...
$matches = array();
$matches = array();
foreach ($entries as $entry) {
foreach ($entries as $entry) {
if (preg_match("/(http:\/\/.*\/db\/files\/Comics\/.*)/i", $entry->getAttribute("src"), $matches)) {
if (preg_match("/(http:\/\/.*\/db\/files\/Comics\/.*)/i", $entry->getAttribute("src"), $matches)) {
$basenode = $entry;
break;
$basenode = $entry;
break;
}
}
if ($basenode) {
$article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG);
$article["plugin_data"] = "explosm,$owner_uid:" . $article["plugin_data"];
}
}
if ($basenode) {
$article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG);
}
} else if (isset($article["stored"]["content"])) {
$article["content"] = $article["stored"]["content"];
}
$article["plugin_data"] = "explosm,$owner_uid:" . $article["plugin_data"];
}
return $article;

View file

@ -20,34 +20,37 @@ class Af_GoComics extends Plugin {
function hook_article_filter($article) {
$owner_uid = $article["owner_uid"];
if (strpos($article["guid"], "gocomics.com") !== FALSE && strpos($article["plugin_data"], "gocomics,$owner_uid:") === FALSE) {
$doc = new DOMDocument();
@$doc->loadHTML(fetch_file_contents($article["link"]));
if (strpos($article["guid"], "gocomics.com") !== FALSE) {
if (strpos($article["plugin_data"], "gocomics,$owner_uid:") === FALSE) {
$doc = new DOMDocument();
@$doc->loadHTML(fetch_file_contents($article["link"]));
$basenode = false;
$basenode = false;
if ($doc) {
$xpath = new DOMXPath($doc);
$entries = $xpath->query('(//img[@src])'); // we might also check for img[@class='strip'] I guess...
if ($doc) {
$xpath = new DOMXPath($doc);
$entries = $xpath->query('(//img[@src])'); // we might also check for img[@class='strip'] I guess...
$matches = array();
$matches = array();
foreach ($entries as $entry) {
foreach ($entries as $entry) {
if (preg_match("/(http:\/\/assets.amuniversal.com\/.*)/i", $entry->getAttribute("src"), $matches)) {
if (preg_match("/(http:\/\/assets.amuniversal.com\/.*)/i", $entry->getAttribute("src"), $matches)) {
$entry->setAttribute("src", $matches[0]);
$basenode = $entry;
break;
$entry->setAttribute("src", $matches[0]);
$basenode = $entry;
break;
}
}
if ($basenode) {
$article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG);
$article["plugin_data"] = "gocomics,$owner_uid:" . $article["plugin_data"];
}
}
if ($basenode) {
$article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG);
}
} else if (isset($article["stored"]["content"])) {
$article["content"] = $article["stored"]["content"];
}
$article["plugin_data"] = "gocomics,$owner_uid:" . $article["plugin_data"];
}
return $article;

View file

@ -20,35 +20,37 @@ class Af_PennyArcade extends Plugin {
function hook_article_filter($article) {
$owner_uid = $article["owner_uid"];
if (strpos($article["link"], "penny-arcade.com") !== FALSE && strpos($article["title"], "Comic:") !== FALSE &&
strpos($article["plugin_data"], "pennyarcade,$owner_uid:") === FALSE) {
if (strpos($article["link"], "penny-arcade.com") !== FALSE && strpos($article["title"], "Comic:") !== FALSE) {
if (strpos($article["plugin_data"], "pennyarcade,$owner_uid:") === FALSE) {
$doc = new DOMDocument();
@$doc->loadHTML(fetch_file_contents($article["link"]));
$doc = new DOMDocument();
@$doc->loadHTML(fetch_file_contents($article["link"]));
$basenode = false;
$basenode = false;
if ($doc) {
$xpath = new DOMXPath($doc);
$entries = $xpath->query('(//img[@src])'); // we might also check for img[@class='strip'] I guess...
if ($doc) {
$xpath = new DOMXPath($doc);
$entries = $xpath->query('(//img[@src])'); // we might also check for img[@class='strip'] I guess...
$matches = array();
$matches = array();
foreach ($entries as $entry) {
foreach ($entries as $entry) {
if (preg_match("/(http:\/\/art.penny-arcade.com\/.*)/i", $entry->getAttribute("src"), $matches)) {
if (preg_match("/(http:\/\/art.penny-arcade.com\/.*)/i", $entry->getAttribute("src"), $matches)) {
$basenode = $entry;
break;
$basenode = $entry;
break;
}
}
if ($basenode) {
$article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG);
$article["plugin_data"] = "pennyarcade,$owner_uid:" . $article["plugin_data"];
}
}
if ($basenode) {
$article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG);
}
} else if (isset($article["stored"]["content"])) {
$article["content"] = $article["stored"]["content"];
}
$article["plugin_data"] = "pennyarcade,$owner_uid:" . $article["plugin_data"];
}
return $article;

View file

@ -25,46 +25,50 @@ class Af_Unburn extends Plugin {
if ((strpos($article["link"], "feedproxy.google.com") !== FALSE ||
strpos($article["link"], "/~r/") !== FALSE ||
strpos($article["link"], "feedsportal.com") !== FALSE) &&
strpos($article["plugin_data"], "unburn,$owner_uid:") === FALSE) {
strpos($article["link"], "feedsportal.com") !== FALSE)) {
$ch = curl_init($article["link"]);
curl_setopt($ch, CURLOPT_TIMEOUT, 5);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_USERAGENT, SELF_USER_AGENT);
if (strpos($article["plugin_data"], "unburn,$owner_uid:") === FALSE) {
$contents = @curl_exec($ch);
$ch = curl_init($article["link"]);
curl_setopt($ch, CURLOPT_TIMEOUT, 5);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_USERAGENT, SELF_USER_AGENT);
$real_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
$contents = @curl_exec($ch);
curl_close($ch);
$real_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
if ($real_url) {
/* remove the rest of it */
curl_close($ch);
$query = parse_url($real_url, PHP_URL_QUERY);
if ($real_url) {
/* remove the rest of it */
if ($query && strpos($query, "utm_source") !== FALSE) {
$args = array();
parse_str($query, $args);
$query = parse_url($real_url, PHP_URL_QUERY);
foreach (array("utm_source", "utm_medium", "utm_campaign") as $param) {
if (isset($args[$param])) unset($args[$param]);
if ($query && strpos($query, "utm_source") !== FALSE) {
$args = array();
parse_str($query, $args);
foreach (array("utm_source", "utm_medium", "utm_campaign") as $param) {
if (isset($args[$param])) unset($args[$param]);
}
$new_query = http_build_query($args);
if ($new_query != $query) {
$real_url = str_replace("?$query", "?$new_query", $real_url);
}
}
$new_query = http_build_query($args);
$real_url = preg_replace("/\?$/", "", $real_url);
if ($new_query != $query) {
$real_url = str_replace("?$query", "?$new_query", $real_url);
}
$article["plugin_data"] = "unburn,$owner_uid:" . $article["plugin_data"];
$article["link"] = $real_url;
}
$real_url = preg_replace("/\?$/", "", $real_url);
$article["plugin_data"] = "unburn,$owner_uid:" . $article["plugin_data"];
$article["link"] = $real_url;
} else if (isset($article["stored"]["link"])) {
$article["link"] = $article["stored"]["link"];
}
}