implement experimental article on-import data filters
This commit is contained in:
parent
bf2611195f
commit
cc85704f3c
4 changed files with 111 additions and 2 deletions
14
classes/filter.php
Normal file
14
classes/filter.php
Normal file
|
@ -0,0 +1,14 @@
|
|||
<?php
|
||||
class Filter {
|
||||
protected $link;
|
||||
|
||||
function __construct($link) {
|
||||
$this->link = $link;
|
||||
}
|
||||
|
||||
function filter_article($article) {
|
||||
return $article;
|
||||
}
|
||||
|
||||
}
|
||||
?>
|
47
classes/filter/redditimgur.php
Normal file
47
classes/filter/redditimgur.php
Normal file
|
@ -0,0 +1,47 @@
|
|||
<?php
|
||||
class Filter_RedditImgur {
|
||||
|
||||
function filter_article($article) {
|
||||
|
||||
if (strpos($article["link"], "reddit.com/r/") !== FALSE) {
|
||||
if (strpos($article["content"], "i.imgur.com") !== FALSE) {
|
||||
|
||||
$doc = new DOMDocument();
|
||||
@$doc->loadHTML($article["content"]);
|
||||
|
||||
if ($doc) {
|
||||
$xpath = new DOMXPath($doc);
|
||||
$entries = $xpath->query('(//a[@href]|//img[@src])');
|
||||
|
||||
foreach ($entries as $entry) {
|
||||
if ($entry->hasAttribute("href")) {
|
||||
if (preg_match("/i.imgur.com\/.*?.jpg/", $entry->getAttribute("href"))) {
|
||||
|
||||
$img = $doc->createElement('img');
|
||||
$img->setAttribute("src", $entry->getAttribute("href"));
|
||||
|
||||
$entry->parentNode->replaceChild($img, $entry);
|
||||
}
|
||||
}
|
||||
|
||||
// remove tiny thumbnails
|
||||
if ($entry->hasAttribute("src")) {
|
||||
if ($entry->parentNode && $entry->parentNode->parentNode) {
|
||||
$entry->parentNode->parentNode->removeChild($entry->parentNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$node = $doc->getElementsByTagName('body')->item(0);
|
||||
|
||||
if ($node) {
|
||||
$article["content"] = $doc->saveXML($node, LIBXML_NOEMPTYTAG);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $article;
|
||||
}
|
||||
}
|
||||
?>
|
|
@ -7,11 +7,19 @@
|
|||
function __autoload($class) {
|
||||
$class_file = str_replace("_", "/", strtolower(basename($class)));
|
||||
|
||||
$file = dirname(__FILE__)."/../plugins/$class_file.php";
|
||||
|
||||
if (file_exists($file)) {
|
||||
require $file;
|
||||
return;
|
||||
}
|
||||
|
||||
$file = dirname(__FILE__)."/../classes/$class_file.php";
|
||||
|
||||
if (file_exists($file)) {
|
||||
require $file;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
mb_internal_encoding("UTF-8");
|
||||
|
|
|
@ -398,6 +398,23 @@
|
|||
_debug("update_rss_feed: " . count($filters) . " filters loaded.");
|
||||
}
|
||||
|
||||
$filter_plugins = array();
|
||||
|
||||
if (defined('_ARTICLE_FILTER_PLUGINS')) {
|
||||
foreach (explode(",", _ARTICLE_FILTER_PLUGINS) as $p) {
|
||||
$pclass = "filter_" . trim($p);
|
||||
|
||||
if (class_exists($pclass)) {
|
||||
$plugin = new $pclass($link);
|
||||
array_push($filter_plugins, $plugin);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($debug_enabled) {
|
||||
_debug("update_rss_feed: " . count($filter_plugins) . " filter plugins loaded.");
|
||||
}
|
||||
|
||||
if ($use_simplepie) {
|
||||
$iterator = $rss->get_items();
|
||||
} else {
|
||||
|
@ -651,8 +668,6 @@
|
|||
|
||||
$entry_content = db_escape_string($entry_content, false);
|
||||
|
||||
$content_hash = "SHA1:" . sha1(strip_tags($entry_content));
|
||||
|
||||
$entry_title = db_escape_string($entry_title);
|
||||
$entry_link = db_escape_string($entry_link);
|
||||
$entry_comments = mb_substr(db_escape_string($entry_comments), 0, 250);
|
||||
|
@ -757,6 +772,31 @@
|
|||
_debug("update_rss_feed: done collecting data [TITLE:$entry_title]");
|
||||
}
|
||||
|
||||
// TODO: less memory-hungry implementation
|
||||
if (count($filter_plugins) > 0) {
|
||||
if ($debug_enabled) {
|
||||
_debug("update_rss_feed: applying plugin filters...");
|
||||
}
|
||||
|
||||
$article = array("owner_uid" => $owner_uid,
|
||||
"title" => $entry_title,
|
||||
"content" => $entry_content,
|
||||
"link" => $entry_link,
|
||||
"tags" => $entry_tags,
|
||||
"author" => $entry_author);
|
||||
|
||||
foreach ($filter_plugins as $plugin) {
|
||||
$article = $plugin->filter_article($article);
|
||||
}
|
||||
|
||||
$entry_title = $article["title"];
|
||||
$entry_content = $article["content"];
|
||||
$entry_tags = $article["tags"];
|
||||
$entry_author = $article["author"];
|
||||
}
|
||||
|
||||
$content_hash = "SHA1:" . sha1(strip_tags($entry_content));
|
||||
|
||||
db_query($link, "BEGIN");
|
||||
|
||||
if (db_num_rows($result) == 0) {
|
||||
|
|
Loading…
Reference in a new issue