rssfuncs.php 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413
  1. <?php
  2. define('DAEMON_UPDATE_LOGIN_LIMIT', 30);
  3. define('DAEMON_FEED_LIMIT', 100);
  4. define('DAEMON_SLEEP_INTERVAL', 120);
  5. function update_feedbrowser_cache($link) {
  6. $result = db_query($link, "SELECT feed_url, site_url, title, COUNT(id) AS subscribers
  7. FROM ttrss_feeds WHERE (SELECT COUNT(id) = 0 FROM ttrss_feeds AS tf
  8. WHERE tf.feed_url = ttrss_feeds.feed_url
  9. AND (private IS true OR auth_login != '' OR auth_pass != '' OR feed_url LIKE '%:%@%/%'))
  10. GROUP BY feed_url, site_url, title ORDER BY subscribers DESC LIMIT 1000");
  11. db_query($link, "BEGIN");
  12. db_query($link, "DELETE FROM ttrss_feedbrowser_cache");
  13. $count = 0;
  14. while ($line = db_fetch_assoc($result)) {
  15. $subscribers = db_escape_string($line["subscribers"]);
  16. $feed_url = db_escape_string($line["feed_url"]);
  17. $title = db_escape_string($line["title"]);
  18. $site_url = db_escape_string($line["site_url"]);
  19. $tmp_result = db_query($link, "SELECT subscribers FROM
  20. ttrss_feedbrowser_cache WHERE feed_url = '$feed_url'");
  21. if (db_num_rows($tmp_result) == 0) {
  22. db_query($link, "INSERT INTO ttrss_feedbrowser_cache
  23. (feed_url, site_url, title, subscribers) VALUES ('$feed_url',
  24. '$site_url', '$title', '$subscribers')");
  25. ++$count;
  26. }
  27. }
  28. db_query($link, "COMMIT");
  29. return $count;
  30. }
  31. /**
  32. * Update a feed batch.
  33. * Used by daemons to update n feeds by run.
  34. * Only update feed needing a update, and not being processed
  35. * by another process.
  36. *
  37. * @param mixed $link Database link
  38. * @param integer $limit Maximum number of feeds in update batch. Default to DAEMON_FEED_LIMIT.
  39. * @param boolean $from_http Set to true if you call this function from http to disable cli specific code.
  40. * @param boolean $debug Set to false to disable debug output. Default to true.
  41. * @return void
  42. */
  43. function update_daemon_common($link, $limit = DAEMON_FEED_LIMIT, $from_http = false, $debug = true) {
  44. // Process all other feeds using last_updated and interval parameters
  45. define('PREFS_NO_CACHE', true);
  46. // Test if the user has loggued in recently. If not, it does not update its feeds.
  47. if (!SINGLE_USER_MODE && DAEMON_UPDATE_LOGIN_LIMIT > 0) {
  48. if (DB_TYPE == "pgsql") {
  49. $login_thresh_qpart = "AND ttrss_users.last_login >= NOW() - INTERVAL '".DAEMON_UPDATE_LOGIN_LIMIT." days'";
  50. } else {
  51. $login_thresh_qpart = "AND ttrss_users.last_login >= DATE_SUB(NOW(), INTERVAL ".DAEMON_UPDATE_LOGIN_LIMIT." DAY)";
  52. }
  53. } else {
  54. $login_thresh_qpart = "";
  55. }
  56. // Test if the feed need a update (update interval exceded).
  57. if (DB_TYPE == "pgsql") {
  58. $update_limit_qpart = "AND ((
  59. ttrss_feeds.update_interval = 0
  60. AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_user_prefs.value || ' minutes') AS INTERVAL)
  61. ) OR (
  62. ttrss_feeds.update_interval > 0
  63. AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_feeds.update_interval || ' minutes') AS INTERVAL)
  64. ) OR ttrss_feeds.last_updated IS NULL
  65. OR last_updated = '1970-01-01 00:00:00')";
  66. } else {
  67. $update_limit_qpart = "AND ((
  68. ttrss_feeds.update_interval = 0
  69. AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL CONVERT(ttrss_user_prefs.value, SIGNED INTEGER) MINUTE)
  70. ) OR (
  71. ttrss_feeds.update_interval > 0
  72. AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL ttrss_feeds.update_interval MINUTE)
  73. ) OR ttrss_feeds.last_updated IS NULL
  74. OR last_updated = '1970-01-01 00:00:00')";
  75. }
  76. // Test if feed is currently being updated by another process.
  77. if (DB_TYPE == "pgsql") {
  78. $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '5 minutes')";
  79. } else {
  80. $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 5 MINUTE))";
  81. }
  82. // Test if there is a limit to number of updated feeds
  83. $query_limit = "";
  84. if($limit) $query_limit = sprintf("LIMIT %d", $limit);
  85. $random_qpart = sql_random_function();
  86. // We search for feed needing update.
  87. $result = db_query($link, "SELECT ttrss_feeds.feed_url,ttrss_feeds.id, ttrss_feeds.owner_uid,
  88. ".SUBSTRING_FOR_DATE."(ttrss_feeds.last_updated,1,19) AS last_updated,
  89. ttrss_feeds.update_interval
  90. FROM
  91. ttrss_feeds, ttrss_users, ttrss_user_prefs
  92. WHERE
  93. ttrss_feeds.owner_uid = ttrss_users.id
  94. AND ttrss_users.id = ttrss_user_prefs.owner_uid
  95. AND ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL'
  96. $login_thresh_qpart $update_limit_qpart
  97. $updstart_thresh_qpart
  98. ORDER BY $random_qpart $query_limit");
  99. $user_prefs_cache = array();
  100. if($debug) _debug(sprintf("Scheduled %d feeds to update...\n", db_num_rows($result)));
  101. // Here is a little cache magic in order to minimize risk of double feed updates.
  102. $feeds_to_update = array();
  103. while ($line = db_fetch_assoc($result)) {
  104. $feeds_to_update[$line['id']] = $line;
  105. }
  106. // We update the feed last update started date before anything else.
  107. // There is no lag due to feed contents downloads
  108. // It prevent an other process to update the same feed.
  109. $feed_ids = array_keys($feeds_to_update);
  110. if($feed_ids) {
  111. db_query($link, sprintf("UPDATE ttrss_feeds SET last_update_started = NOW()
  112. WHERE id IN (%s)", implode(',', $feed_ids)));
  113. }
  114. expire_cached_files($debug);
  115. // For each feed, we call the feed update function.
  116. while ($line = array_pop($feeds_to_update)) {
  117. if($debug) _debug("Feed: " . $line["feed_url"] . ", " . $line["last_updated"]);
  118. update_rss_feed($link, $line["id"], true);
  119. sleep(1); // prevent flood (FIXME make this an option?)
  120. }
  121. // Send feed digests by email if needed.
  122. send_headlines_digests($link, $debug);
  123. } // function update_daemon_common
  124. function fetch_twitter_rss($link, $url, $owner_uid) {
  125. require_once 'lib/tmhoauth/tmhOAuth.php';
  126. require_once "lib/magpierss/rss_fetch.inc";
  127. require_once 'lib/magpierss/rss_utils.inc';
  128. $result = db_query($link, "SELECT twitter_oauth FROM ttrss_users
  129. WHERE id = $owner_uid");
  130. $access_token = json_decode(db_fetch_result($result, 0, 'twitter_oauth'), true);
  131. $url_escaped = db_escape_string($url);
  132. if ($access_token) {
  133. $tmhOAuth = new tmhOAuth(array(
  134. 'consumer_key' => CONSUMER_KEY,
  135. 'consumer_secret' => CONSUMER_SECRET,
  136. 'user_token' => $access_token['oauth_token'],
  137. 'user_secret' => $access_token['oauth_token_secret'],
  138. ));
  139. $code = $tmhOAuth->request('GET', $url,
  140. convertUrlQuery(parse_url($url, PHP_URL_QUERY)));
  141. if ($code == 200) {
  142. $content = $tmhOAuth->response['response'];
  143. define('MAGPIE_CACHE_ON', false);
  144. $rss = new MagpieRSS($content, MAGPIE_OUTPUT_ENCODING,
  145. MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING );
  146. return $rss;
  147. } else {
  148. db_query($link, "UPDATE ttrss_feeds
  149. SET last_error = 'OAuth authorization failed ($code).'
  150. WHERE feed_url = '$url_escaped' AND owner_uid = $owner_uid");
  151. }
  152. } else {
  153. db_query($link, "UPDATE ttrss_feeds
  154. SET last_error = 'OAuth information not found.'
  155. WHERE feed_url = '$url_escaped' AND owner_uid = $owner_uid");
  156. return false;
  157. }
  158. }
  159. function update_rss_feed($link, $feed, $ignore_daemon = false, $no_cache = false,
  160. $override_url = false) {
  161. global $plugins;
  162. require_once "lib/simplepie/simplepie.inc";
  163. require_once "lib/magpierss/rss_fetch.inc";
  164. require_once 'lib/magpierss/rss_utils.inc';
  165. $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug'];
  166. if (!$_REQUEST["daemon"] && !$ignore_daemon) {
  167. return false;
  168. }
  169. if ($debug_enabled) {
  170. _debug("update_rss_feed: start");
  171. }
  172. if (!$ignore_daemon) {
  173. if (DB_TYPE == "pgsql") {
  174. $updstart_thresh_qpart = "(ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '120 seconds')";
  175. } else {
  176. $updstart_thresh_qpart = "(ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 120 SECOND))";
  177. }
  178. $result = db_query($link, "SELECT id,update_interval,auth_login,
  179. auth_pass,cache_images,update_method,last_updated
  180. FROM ttrss_feeds WHERE id = '$feed' AND $updstart_thresh_qpart");
  181. } else {
  182. $result = db_query($link, "SELECT id,update_interval,auth_login,
  183. feed_url,auth_pass,cache_images,update_method,last_updated,
  184. mark_unread_on_update, owner_uid, update_on_checksum_change,
  185. pubsub_state
  186. FROM ttrss_feeds WHERE id = '$feed'");
  187. }
  188. if (db_num_rows($result) == 0) {
  189. if ($debug_enabled) {
  190. _debug("update_rss_feed: feed $feed NOT FOUND/SKIPPED");
  191. }
  192. return false;
  193. }
  194. $update_method = db_fetch_result($result, 0, "update_method");
  195. $last_updated = db_fetch_result($result, 0, "last_updated");
  196. $owner_uid = db_fetch_result($result, 0, "owner_uid");
  197. $mark_unread_on_update = sql_bool_to_bool(db_fetch_result($result,
  198. 0, "mark_unread_on_update"));
  199. $update_on_checksum_change = sql_bool_to_bool(db_fetch_result($result,
  200. 0, "update_on_checksum_change"));
  201. $pubsub_state = db_fetch_result($result, 0, "pubsub_state");
  202. db_query($link, "UPDATE ttrss_feeds SET last_update_started = NOW()
  203. WHERE id = '$feed'");
  204. $auth_login = db_fetch_result($result, 0, "auth_login");
  205. $auth_pass = db_fetch_result($result, 0, "auth_pass");
  206. if ($update_method == 0)
  207. $update_method = DEFAULT_UPDATE_METHOD + 1;
  208. // 1 - Magpie
  209. // 2 - SimplePie
  210. // 3 - Twitter OAuth
  211. if ($update_method == 2)
  212. $use_simplepie = true;
  213. else
  214. $use_simplepie = false;
  215. if ($debug_enabled) {
  216. _debug("update method: $update_method (feed setting: $update_method) (use simplepie: $use_simplepie)\n");
  217. }
  218. if ($update_method == 1) {
  219. $auth_login = urlencode($auth_login);
  220. $auth_pass = urlencode($auth_pass);
  221. }
  222. $cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images"));
  223. $fetch_url = db_fetch_result($result, 0, "feed_url");
  224. $feed = db_escape_string($feed);
  225. if ($auth_login && $auth_pass ){
  226. $url_parts = array();
  227. preg_match("/(^[^:]*):\/\/(.*)/", $fetch_url, $url_parts);
  228. if ($url_parts[1] && $url_parts[2]) {
  229. $fetch_url = $url_parts[1] . "://$auth_login:$auth_pass@" . $url_parts[2];
  230. }
  231. }
  232. if ($override_url)
  233. $fetch_url = $override_url;
  234. if ($debug_enabled) {
  235. _debug("update_rss_feed: fetching [$fetch_url]...");
  236. }
  237. // Ignore cache if new feed or manual update.
  238. $cache_age = (is_null($last_updated) || $last_updated == '1970-01-01 00:00:00') ?
  239. -1 : get_feed_update_interval($link, $feed) * 60;
  240. if ($update_method == 3) {
  241. $rss = fetch_twitter_rss($link, $fetch_url, $owner_uid);
  242. } else if ($update_method == 1) {
  243. define('MAGPIE_CACHE_AGE', $cache_age);
  244. define('MAGPIE_CACHE_ON', !$no_cache);
  245. define('MAGPIE_FETCH_TIME_OUT', 60);
  246. define('MAGPIE_CACHE_DIR', CACHE_DIR . "/magpie");
  247. $rss = @fetch_rss($fetch_url);
  248. } else {
  249. $simplepie_cache_dir = CACHE_DIR . "/simplepie";
  250. if (!is_dir($simplepie_cache_dir)) {
  251. mkdir($simplepie_cache_dir);
  252. }
  253. $rss = new SimplePie();
  254. $rss->set_useragent(SELF_USER_AGENT);
  255. # $rss->set_timeout(10);
  256. $rss->set_feed_url($fetch_url);
  257. $rss->set_output_encoding('UTF-8');
  258. //$rss->force_feed(true);
  259. if ($debug_enabled) {
  260. _debug("feed update interval (sec): " .
  261. get_feed_update_interval($link, $feed)*60);
  262. }
  263. $rss->enable_cache(!$no_cache);
  264. if (!$no_cache) {
  265. $rss->set_cache_location($simplepie_cache_dir);
  266. $rss->set_cache_duration($cache_age);
  267. }
  268. $rss->init();
  269. }
  270. // print_r($rss);
  271. if ($debug_enabled) {
  272. _debug("update_rss_feed: fetch done, parsing...");
  273. }
  274. $feed = db_escape_string($feed);
  275. if ($update_method == 2) {
  276. $fetch_ok = !$rss->error();
  277. } else {
  278. $fetch_ok = !!$rss;
  279. }
  280. if ($fetch_ok) {
  281. if ($debug_enabled) {
  282. _debug("update_rss_feed: processing feed data...");
  283. }
  284. // db_query($link, "BEGIN");
  285. if (DB_TYPE == "pgsql") {
  286. $favicon_interval_qpart = "favicon_last_checked < NOW() - INTERVAL '12 hour'";
  287. } else {
  288. $favicon_interval_qpart = "favicon_last_checked < DATE_SUB(NOW(), INTERVAL 12 HOUR)";
  289. }
  290. $result = db_query($link, "SELECT title,icon_url,site_url,owner_uid,
  291. (favicon_last_checked IS NULL OR $favicon_interval_qpart) AS
  292. favicon_needs_check
  293. FROM ttrss_feeds WHERE id = '$feed'");
  294. $registered_title = db_fetch_result($result, 0, "title");
  295. $orig_icon_url = db_fetch_result($result, 0, "icon_url");
  296. $orig_site_url = db_fetch_result($result, 0, "site_url");
  297. $favicon_needs_check = sql_bool_to_bool(db_fetch_result($result, 0,
  298. "favicon_needs_check"));
  299. $owner_uid = db_fetch_result($result, 0, "owner_uid");
  300. if ($use_simplepie) {
  301. $site_url = db_escape_string(trim($rss->get_link()));
  302. } else {
  303. $site_url = db_escape_string(trim($rss->channel["link"]));
  304. }
  305. // weird, weird Magpie
  306. if (!$use_simplepie) {
  307. if (!$site_url) $site_url = db_escape_string($rss->channel["link_"]);
  308. }
  309. $site_url = rewrite_relative_url($fetch_url, $site_url);
  310. $site_url = substr($site_url, 0, 250);
  311. if ($debug_enabled) {
  312. _debug("update_rss_feed: checking favicon...");
  313. }
  314. if ($favicon_needs_check) {
  315. check_feed_favicon($site_url, $feed, $link);
  316. db_query($link, "UPDATE ttrss_feeds SET favicon_last_checked = NOW()
  317. WHERE id = '$feed'");
  318. }
  319. if (!$registered_title || $registered_title == "[Unknown]") {
  320. if ($use_simplepie) {
  321. $feed_title = db_escape_string($rss->get_title());
  322. } else {
  323. $feed_title = db_escape_string($rss->channel["title"]);
  324. }
  325. if ($debug_enabled) {
  326. _debug("update_rss_feed: registering title: $feed_title");
  327. }
  328. db_query($link, "UPDATE ttrss_feeds SET
  329. title = '$feed_title' WHERE id = '$feed'");
  330. }
  331. if ($site_url && $orig_site_url != $site_url) {
  332. db_query($link, "UPDATE ttrss_feeds SET
  333. site_url = '$site_url' WHERE id = '$feed'");
  334. }
  335. // print "I: " . $rss->channel["image"]["url"];
  336. if (!$use_simplepie) {
  337. $icon_url = db_escape_string(trim($rss->image["url"]));
  338. } else {
  339. $icon_url = db_escape_string(trim($rss->get_image_url()));
  340. }
  341. $icon_url = rewrite_relative_url($fetch_url, $icon_url);
  342. $icon_url = substr($icon_url, 0, 250);
  343. if ($icon_url && $orig_icon_url != $icon_url) {
  344. db_query($link, "UPDATE ttrss_feeds SET icon_url = '$icon_url' WHERE id = '$feed'");
  345. }
  346. if ($debug_enabled) {
  347. _debug("update_rss_feed: loading filters...");
  348. }
  349. $filters = load_filters($link, $feed, $owner_uid);
  350. // if ($debug_enabled) {
  351. // print_r($filters);
  352. // }
  353. if ($use_simplepie) {
  354. $iterator = $rss->get_items();
  355. } else {
  356. $iterator = $rss->items;
  357. if (!$iterator || !is_array($iterator)) $iterator = $rss->entries;
  358. if (!$iterator || !is_array($iterator)) $iterator = $rss;
  359. }
  360. if (!is_array($iterator)) {
  361. /* db_query($link, "UPDATE ttrss_feeds
  362. SET last_error = 'Parse error: can\'t find any articles.'
  363. WHERE id = '$feed'"); */
  364. // clear any errors and mark feed as updated if fetched okay
  365. // even if it's blank
  366. if ($debug_enabled) {
  367. _debug("update_rss_feed: entry iterator is not an array, no articles?");
  368. }
  369. db_query($link, "UPDATE ttrss_feeds
  370. SET last_updated = NOW(), last_error = '' WHERE id = '$feed'");
  371. return; // no articles
  372. }
  373. if ($pubsub_state != 2 && PUBSUBHUBBUB_ENABLED) {
  374. if ($debug_enabled) _debug("update_rss_feed: checking for PUSH hub...");
  375. $feed_hub_url = false;
  376. if ($use_simplepie) {
  377. $links = $rss->get_links('hub');
  378. if ($links && is_array($links)) {
  379. foreach ($links as $l) {
  380. $feed_hub_url = $l;
  381. break;
  382. }
  383. }
  384. } else {
  385. $atom = $rss->channel['atom'];
  386. if ($atom) {
  387. if ($atom['link@rel'] == 'hub') {
  388. $feed_hub_url = $atom['link@href'];
  389. }
  390. if (!$feed_hub_url && $atom['link#'] > 1) {
  391. for ($i = 2; $i <= $atom['link#']; $i++) {
  392. if ($atom["link#$i@rel"] == 'hub') {
  393. $feed_hub_url = $atom["link#$i@href"];
  394. break;
  395. }
  396. }
  397. }
  398. } else {
  399. $feed_hub_url = $rss->channel['link_hub'];
  400. }
  401. }
  402. if ($debug_enabled) _debug("update_rss_feed: feed hub url: $feed_hub_url");
  403. if ($feed_hub_url && function_exists('curl_init') &&
  404. !ini_get("open_basedir")) {
  405. require_once 'lib/pubsubhubbub/subscriber.php';
  406. $callback_url = get_self_url_prefix() .
  407. "/public.php?op=pubsub&id=$feed";
  408. $s = new Subscriber($feed_hub_url, $callback_url);
  409. $rc = $s->subscribe($fetch_url);
  410. if ($debug_enabled)
  411. _debug("update_rss_feed: feed hub url found, subscribe request sent.");
  412. db_query($link, "UPDATE ttrss_feeds SET pubsub_state = 1
  413. WHERE id = '$feed'");
  414. }
  415. }
  416. if ($debug_enabled) {
  417. _debug("update_rss_feed: processing articles...");
  418. }
  419. foreach ($iterator as $item) {
  420. $hook_params = array("item" => &$item, "feed" => $feed);
  421. $plugins->hook('rss_update_item', $hook_params);
  422. if ($_REQUEST['xdebug'] == 2) {
  423. print_r($item);
  424. }
  425. if ($use_simplepie) {
  426. $entry_guid = $item->get_id();
  427. if (!$entry_guid) $entry_guid = $item->get_link();
  428. if (!$entry_guid) $entry_guid = make_guid_from_title($item->get_title());
  429. } else {
  430. $entry_guid = $item["id"];
  431. if (!$entry_guid) $entry_guid = $item["guid"];
  432. if (!$entry_guid) $entry_guid = $item["about"];
  433. if (!$entry_guid) $entry_guid = $item["link"];
  434. if (!$entry_guid) $entry_guid = make_guid_from_title($item["title"]);
  435. }
  436. if ($debug_enabled) {
  437. _debug("update_rss_feed: guid $entry_guid");
  438. }
  439. if (!$entry_guid) continue;
  440. $entry_timestamp = "";
  441. if ($use_simplepie) {
  442. $entry_timestamp = strtotime($item->get_date());
  443. } else {
  444. $rss_2_date = $item['pubdate'];
  445. $rss_1_date = $item['dc']['date'];
  446. $atom_date = $item['issued'];
  447. if (!$atom_date) $atom_date = $item['updated'];
  448. if ($atom_date != "") $entry_timestamp = parse_w3cdtf($atom_date);
  449. if ($rss_1_date != "") $entry_timestamp = parse_w3cdtf($rss_1_date);
  450. if ($rss_2_date != "") $entry_timestamp = strtotime($rss_2_date);
  451. }
  452. if ($entry_timestamp == "" || $entry_timestamp == -1 || !$entry_timestamp) {
  453. $entry_timestamp = time();
  454. $no_orig_date = 'true';
  455. } else {
  456. $no_orig_date = 'false';
  457. }
  458. $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
  459. if ($debug_enabled) {
  460. _debug("update_rss_feed: date $entry_timestamp [$entry_timestamp_fmt]");
  461. }
  462. if ($use_simplepie) {
  463. $entry_title = $item->get_title();
  464. } else {
  465. $entry_title = trim(strip_tags($item["title"]));
  466. }
  467. if ($use_simplepie) {
  468. $entry_link = $item->get_link();
  469. } else {
  470. // strange Magpie workaround
  471. $entry_link = $item["link_"];
  472. if (!$entry_link) $entry_link = $item["link"];
  473. }
  474. $entry_link = rewrite_relative_url($site_url, $entry_link);
  475. if ($debug_enabled) {
  476. _debug("update_rss_feed: title $entry_title");
  477. _debug("update_rss_feed: link $entry_link");
  478. }
  479. if (!$entry_title) $entry_title = date("Y-m-d H:i:s", $entry_timestamp);;
  480. $entry_link = strip_tags($entry_link);
  481. if ($use_simplepie) {
  482. $entry_content = $item->get_content();
  483. if (!$entry_content) $entry_content = $item->get_description();
  484. } else {
  485. $entry_content = $item["content:escaped"];
  486. if (!$entry_content) $entry_content = $item["content:encoded"];
  487. if (!$entry_content && is_array($entry_content)) $entry_content = $item["content"]["encoded"];
  488. if (!$entry_content) $entry_content = $item["content"];
  489. if (is_array($entry_content)) $entry_content = $entry_content[0];
  490. // Magpie bugs are getting ridiculous
  491. if (trim($entry_content) == "Array") $entry_content = false;
  492. if (!$entry_content) $entry_content = $item["atom_content"];
  493. if (!$entry_content) $entry_content = $item["summary"];
  494. if (!$entry_content ||
  495. strlen($entry_content) < strlen($item["description"])) {
  496. $entry_content = $item["description"];
  497. };
  498. // WTF
  499. if (is_array($entry_content)) {
  500. $entry_content = $entry_content["encoded"];
  501. if (!$entry_content) $entry_content = $entry_content["escaped"];
  502. }
  503. }
  504. if ($cache_images && is_writable(CACHE_DIR . '/images'))
  505. $entry_content = cache_images($entry_content, $site_url, $debug_enabled);
  506. if ($_REQUEST["xdebug"] == 2) {
  507. print "update_rss_feed: content: ";
  508. print $entry_content;
  509. print "\n";
  510. }
  511. $entry_content_unescaped = $entry_content;
  512. if ($use_simplepie) {
  513. $entry_comments = strip_tags($item->data["comments"]);
  514. if ($item->get_author()) {
  515. $entry_author_item = $item->get_author();
  516. $entry_author = $entry_author_item->get_name();
  517. if (!$entry_author) $entry_author = $entry_author_item->get_email();
  518. $entry_author = db_escape_string($entry_author);
  519. }
  520. } else {
  521. $entry_comments = strip_tags($item["comments"]);
  522. $entry_author = db_escape_string(strip_tags($item['dc']['creator']));
  523. if ($item['author']) {
  524. if (is_array($item['author'])) {
  525. if (!$entry_author) {
  526. $entry_author = db_escape_string(strip_tags($item['author']['name']));
  527. }
  528. if (!$entry_author) {
  529. $entry_author = db_escape_string(strip_tags($item['author']['email']));
  530. }
  531. }
  532. if (!$entry_author) {
  533. $entry_author = db_escape_string(strip_tags($item['author']));
  534. }
  535. }
  536. }
  537. if (preg_match('/^[\t\n\r ]*$/', $entry_author)) $entry_author = '';
  538. $entry_guid = db_escape_string(strip_tags($entry_guid));
  539. $entry_guid = mb_substr($entry_guid, 0, 250);
  540. $result = db_query($link, "SELECT id FROM ttrss_entries
  541. WHERE guid = '$entry_guid'");
  542. $entry_content = db_escape_string($entry_content, false);
  543. $content_hash = "SHA1:" . sha1(strip_tags($entry_content));
  544. $entry_title = db_escape_string($entry_title);
  545. $entry_link = db_escape_string($entry_link);
  546. $entry_comments = mb_substr(db_escape_string($entry_comments), 0, 250);
  547. $entry_author = mb_substr($entry_author, 0, 250);
  548. if ($use_simplepie) {
  549. $num_comments = 0; #FIXME#
  550. } else {
  551. $num_comments = db_escape_string($item["slash"]["comments"]);
  552. }
  553. if (!$num_comments) $num_comments = 0;
  554. if ($debug_enabled) {
  555. _debug("update_rss_feed: looking for tags [1]...");
  556. }
  557. // parse <category> entries into tags
  558. $additional_tags = array();
  559. if ($use_simplepie) {
  560. $additional_tags_src = $item->get_categories();
  561. if (is_array($additional_tags_src)) {
  562. foreach ($additional_tags_src as $tobj) {
  563. array_push($additional_tags, $tobj->get_term());
  564. }
  565. }
  566. if ($debug_enabled) {
  567. _debug("update_rss_feed: category tags:");
  568. print_r($additional_tags);
  569. }
  570. } else {
  571. $t_ctr = $item['category#'];
  572. if ($t_ctr == 0) {
  573. $additional_tags = array();
  574. } else if ($t_ctr > 0) {
  575. $additional_tags = array($item['category']);
  576. if ($item['category@term']) {
  577. array_push($additional_tags, $item['category@term']);
  578. }
  579. for ($i = 0; $i <= $t_ctr; $i++ ) {
  580. if ($item["category#$i"]) {
  581. array_push($additional_tags, $item["category#$i"]);
  582. }
  583. if ($item["category#$i@term"]) {
  584. array_push($additional_tags, $item["category#$i@term"]);
  585. }
  586. }
  587. }
  588. // parse <dc:subject> elements
  589. $t_ctr = $item['dc']['subject#'];
  590. if ($t_ctr > 0) {
  591. array_push($additional_tags, $item['dc']['subject']);
  592. for ($i = 0; $i <= $t_ctr; $i++ ) {
  593. if ($item['dc']["subject#$i"]) {
  594. array_push($additional_tags, $item['dc']["subject#$i"]);
  595. }
  596. }
  597. }
  598. }
  599. if ($debug_enabled) {
  600. _debug("update_rss_feed: looking for tags [2]...");
  601. }
  602. /* taaaags */
  603. // <a href="..." rel="tag">Xorg</a>, //
  604. $entry_tags = null;
  605. preg_match_all("/<a.*?rel=['\"]tag['\"].*?\>([^<]+)<\/a>/i",
  606. $entry_content_unescaped, $entry_tags);
  607. $entry_tags = $entry_tags[1];
  608. $entry_tags = array_merge($entry_tags, $additional_tags);
  609. $entry_tags = array_unique($entry_tags);
  610. for ($i = 0; $i < count($entry_tags); $i++)
  611. $entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8');
  612. if ($debug_enabled) {
  613. _debug("update_rss_feed: unfiltered tags found:");
  614. print_r($entry_tags);
  615. }
  616. # sanitize content
  617. $entry_content = sanitize_article_content($entry_content);
  618. $entry_title = sanitize_article_content($entry_title);
  619. if ($debug_enabled) {
  620. _debug("update_rss_feed: done collecting data [TITLE:$entry_title]");
  621. }
  622. db_query($link, "BEGIN");
  623. if (db_num_rows($result) == 0) {
  624. if ($debug_enabled) {
  625. _debug("update_rss_feed: base guid not found");
  626. }
  627. // base post entry does not exist, create it
  628. $result = db_query($link,
  629. "INSERT INTO ttrss_entries
  630. (title,
  631. guid,
  632. link,
  633. updated,
  634. content,
  635. content_hash,
  636. no_orig_date,
  637. date_updated,
  638. date_entered,
  639. comments,
  640. num_comments,
  641. author)
  642. VALUES
  643. ('$entry_title',
  644. '$entry_guid',
  645. '$entry_link',
  646. '$entry_timestamp_fmt',
  647. '$entry_content',
  648. '$content_hash',
  649. $no_orig_date,
  650. NOW(),
  651. NOW(),
  652. '$entry_comments',
  653. '$num_comments',
  654. '$entry_author')");
  655. } else {
  656. // we keep encountering the entry in feeds, so we need to
  657. // update date_updated column so that we don't get horrible
  658. // dupes when the entry gets purged and reinserted again e.g.
  659. // in the case of SLOW SLOW OMG SLOW updating feeds
  660. $base_entry_id = db_fetch_result($result, 0, "id");
  661. db_query($link, "UPDATE ttrss_entries SET date_updated = NOW()
  662. WHERE id = '$base_entry_id'");
  663. }
  664. // now it should exist, if not - bad luck then
  665. $result = db_query($link, "SELECT
  666. id,content_hash,no_orig_date,title,
  667. ".SUBSTRING_FOR_DATE."(date_updated,1,19) as date_updated,
  668. ".SUBSTRING_FOR_DATE."(updated,1,19) as updated,
  669. num_comments
  670. FROM
  671. ttrss_entries
  672. WHERE guid = '$entry_guid'");
  673. $entry_ref_id = 0;
  674. $entry_int_id = 0;
  675. if (db_num_rows($result) == 1) {
  676. if ($debug_enabled) {
  677. _debug("update_rss_feed: base guid found, checking for user record");
  678. }
  679. // this will be used below in update handler
  680. $orig_content_hash = db_fetch_result($result, 0, "content_hash");
  681. $orig_title = db_fetch_result($result, 0, "title");
  682. $orig_num_comments = db_fetch_result($result, 0, "num_comments");
  683. $orig_date_updated = strtotime(db_fetch_result($result,
  684. 0, "date_updated"));
  685. $ref_id = db_fetch_result($result, 0, "id");
  686. $entry_ref_id = $ref_id;
  687. // check for user post link to main table
  688. // do we allow duplicate posts with same GUID in different feeds?
  689. if (get_pref($link, "ALLOW_DUPLICATE_POSTS", $owner_uid, false)) {
  690. $dupcheck_qpart = "AND (feed_id = '$feed' OR feed_id IS NULL)";
  691. } else {
  692. $dupcheck_qpart = "";
  693. }
  694. /* Collect article tags here so we could filter by them: */
  695. $article_filters = get_article_filters($filters, $entry_title,
  696. $entry_content, $entry_link, $entry_timestamp, $entry_author,
  697. $entry_tags);
  698. if ($debug_enabled) {
  699. _debug("update_rss_feed: article filters: ");
  700. if (count($article_filters) != 0) {
  701. print_r($article_filters);
  702. }
  703. }
  704. if (find_article_filter($article_filters, "filter")) {
  705. db_query($link, "COMMIT"); // close transaction in progress
  706. continue;
  707. }
  708. $score = calculate_article_score($article_filters);
  709. if ($debug_enabled) {
  710. _debug("update_rss_feed: initial score: $score");
  711. }
  712. $query = "SELECT ref_id, int_id FROM ttrss_user_entries WHERE
  713. ref_id = '$ref_id' AND owner_uid = '$owner_uid'
  714. $dupcheck_qpart";
  715. // if ($_REQUEST["xdebug"]) print "$query\n";
  716. $result = db_query($link, $query);
  717. // okay it doesn't exist - create user entry
  718. if (db_num_rows($result) == 0) {
  719. if ($debug_enabled) {
  720. _debug("update_rss_feed: user record not found, creating...");
  721. }
  722. if ($score >= -500 && !find_article_filter($article_filters, 'catchup')) {
  723. $unread = 'true';
  724. $last_read_qpart = 'NULL';
  725. } else {
  726. $unread = 'false';
  727. $last_read_qpart = 'NOW()';
  728. }
  729. if (find_article_filter($article_filters, 'mark') || $score > 1000) {
  730. $marked = 'true';
  731. } else {
  732. $marked = 'false';
  733. }
  734. if (find_article_filter($article_filters, 'publish')) {
  735. $published = 'true';
  736. } else {
  737. $published = 'false';
  738. }
  739. // N-grams
  740. if (DB_TYPE == "pgsql" and defined('_NGRAM_TITLE_DUPLICATE_THRESHOLD')) {
  741. $result = db_query($link, "SELECT COUNT(*) AS similar FROM
  742. ttrss_entries,ttrss_user_entries
  743. WHERE ref_id = id AND updated >= NOW() - INTERVAL '7 day'
  744. AND similarity(title, '$entry_title') >= "._NGRAM_TITLE_DUPLICATE_THRESHOLD."
  745. AND owner_uid = $owner_uid");
  746. $ngram_similar = db_fetch_result($result, 0, "similar");
  747. if ($debug_enabled) {
  748. _debug("update_rss_feed: N-gram similar results: $ngram_similar");
  749. }
  750. if ($ngram_similar > 0) {
  751. $unread = 'false';
  752. }
  753. }
  754. $result = db_query($link,
  755. "INSERT INTO ttrss_user_entries
  756. (ref_id, owner_uid, feed_id, unread, last_read, marked,
  757. published, score, tag_cache, label_cache, uuid)
  758. VALUES ('$ref_id', '$owner_uid', '$feed', $unread,
  759. $last_read_qpart, $marked, $published, '$score', '', '', '')");
  760. if (PUBSUBHUBBUB_HUB && $published == 'true') {
  761. $rss_link = get_self_url_prefix() .
  762. "/public.php?op=rss&id=-2&key=" .
  763. get_feed_access_key($link, -2, false, $owner_uid);
  764. $p = new Publisher(PUBSUBHUBBUB_HUB);
  765. $pubsub_result = $p->publish_update($rss_link);
  766. }
  767. $result = db_query($link,
  768. "SELECT int_id FROM ttrss_user_entries WHERE
  769. ref_id = '$ref_id' AND owner_uid = '$owner_uid' AND
  770. feed_id = '$feed' LIMIT 1");
  771. if (db_num_rows($result) == 1) {
  772. $entry_int_id = db_fetch_result($result, 0, "int_id");
  773. }
  774. } else {
  775. if ($debug_enabled) {
  776. _debug("update_rss_feed: user record FOUND");
  777. }
  778. $entry_ref_id = db_fetch_result($result, 0, "ref_id");
  779. $entry_int_id = db_fetch_result($result, 0, "int_id");
  780. }
  781. if ($debug_enabled) {
  782. _debug("update_rss_feed: RID: $entry_ref_id, IID: $entry_int_id");
  783. }
  784. $post_needs_update = false;
  785. $update_insignificant = false;
  786. if ($orig_num_comments != $num_comments) {
  787. $post_needs_update = true;
  788. $update_insignificant = true;
  789. }
  790. if ($content_hash != $orig_content_hash) {
  791. $post_needs_update = true;
  792. $update_insignificant = false;
  793. }
  794. if (db_escape_string($orig_title) != $entry_title) {
  795. $post_needs_update = true;
  796. $update_insignificant = false;
  797. }
  798. // if post needs update, update it and mark all user entries
  799. // linking to this post as updated
  800. if ($post_needs_update) {
  801. if (defined('DAEMON_EXTENDED_DEBUG')) {
  802. _debug("update_rss_feed: post $entry_guid needs update...");
  803. }
  804. // print "<!-- post $orig_title needs update : $post_needs_update -->";
  805. db_query($link, "UPDATE ttrss_entries
  806. SET title = '$entry_title', content = '$entry_content',
  807. content_hash = '$content_hash',
  808. updated = '$entry_timestamp_fmt',
  809. num_comments = '$num_comments'
  810. WHERE id = '$ref_id'");
  811. if (!$update_insignificant) {
  812. if ($mark_unread_on_update) {
  813. db_query($link, "UPDATE ttrss_user_entries
  814. SET last_read = null, unread = true WHERE ref_id = '$ref_id'");
  815. } else if ($update_on_checksum_change) {
  816. db_query($link, "UPDATE ttrss_user_entries
  817. SET last_read = null WHERE ref_id = '$ref_id'
  818. AND unread = false");
  819. }
  820. }
  821. }
  822. }
  823. db_query($link, "COMMIT");
  824. if ($debug_enabled) {
  825. _debug("update_rss_feed: assigning labels...");
  826. }
  827. assign_article_to_labels($link, $entry_ref_id, $article_filters,
  828. $owner_uid);
  829. if ($debug_enabled) {
  830. _debug("update_rss_feed: looking for enclosures...");
  831. }
  832. // enclosures
  833. $enclosures = array();
  834. if ($use_simplepie) {
  835. $encs = $item->get_enclosures();
  836. if (is_array($encs)) {
  837. foreach ($encs as $e) {
  838. $e_item = array(
  839. $e->link, $e->type, $e->length);
  840. array_push($enclosures, $e_item);
  841. }
  842. }
  843. } else {
  844. // <enclosure>
  845. $e_ctr = $item['enclosure#'];
  846. if ($e_ctr > 0) {
  847. $e_item = array($item['enclosure@url'],
  848. $item['enclosure@type'],
  849. $item['enclosure@length']);
  850. array_push($enclosures, $e_item);
  851. for ($i = 0; $i <= $e_ctr; $i++ ) {
  852. if ($item["enclosure#$i@url"]) {
  853. $e_item = array($item["enclosure#$i@url"],
  854. $item["enclosure#$i@type"],
  855. $item["enclosure#$i@length"]);
  856. array_push($enclosures, $e_item);
  857. }
  858. }
  859. }
  860. // <media:content>
  861. // can there be many of those? yes -fox
  862. $m_ctr = $item['media']['content#'];
  863. if ($m_ctr > 0) {
  864. $e_item = array($item['media']['content@url'],
  865. $item['media']['content@medium'],
  866. $item['media']['content@length']);
  867. array_push($enclosures, $e_item);
  868. for ($i = 0; $i <= $m_ctr; $i++ ) {
  869. if ($item["media"]["content#$i@url"]) {
  870. $e_item = array($item["media"]["content#$i@url"],
  871. $item["media"]["content#$i@medium"],
  872. $item["media"]["content#$i@length"]);
  873. array_push($enclosures, $e_item);
  874. }
  875. }
  876. }
  877. }
  878. if ($debug_enabled) {
  879. _debug("update_rss_feed: article enclosures:");
  880. print_r($enclosures);
  881. }
  882. db_query($link, "BEGIN");
  883. foreach ($enclosures as $enc) {
  884. $enc_url = db_escape_string($enc[0]);
  885. $enc_type = db_escape_string($enc[1]);
  886. $enc_dur = db_escape_string($enc[2]);
  887. $result = db_query($link, "SELECT id FROM ttrss_enclosures
  888. WHERE content_url = '$enc_url' AND post_id = '$entry_ref_id'");
  889. if (db_num_rows($result) == 0) {
  890. db_query($link, "INSERT INTO ttrss_enclosures
  891. (content_url, content_type, title, duration, post_id) VALUES
  892. ('$enc_url', '$enc_type', '', '$enc_dur', '$entry_ref_id')");
  893. }
  894. }
  895. db_query($link, "COMMIT");
  896. // check for manual tags (we have to do it here since they're loaded from filters)
  897. foreach ($article_filters as $f) {
  898. if ($f[0] == "tag") {
  899. $manual_tags = trim_array(explode(",", $f[1]));
  900. foreach ($manual_tags as $tag) {
  901. if (tag_is_valid($tag)) {
  902. array_push($entry_tags, $tag);
  903. }
  904. }
  905. }
  906. }
  907. // Skip boring tags
  908. $boring_tags = trim_array(explode(",", mb_strtolower(get_pref($link,
  909. 'BLACKLISTED_TAGS', $owner_uid, ''), 'utf-8')));
  910. $filtered_tags = array();
  911. $tags_to_cache = array();
  912. if ($entry_tags && is_array($entry_tags)) {
  913. foreach ($entry_tags as $tag) {
  914. if (array_search($tag, $boring_tags) === false) {
  915. array_push($filtered_tags, $tag);
  916. }
  917. }
  918. }
  919. $filtered_tags = array_unique($filtered_tags);
  920. if ($debug_enabled) {
  921. _debug("update_rss_feed: filtered article tags:");
  922. print_r($filtered_tags);
  923. }
  924. // Save article tags in the database
  925. if (count($filtered_tags) > 0) {
  926. db_query($link, "BEGIN");
  927. foreach ($filtered_tags as $tag) {
  928. $tag = sanitize_tag($tag);
  929. $tag = db_escape_string($tag);
  930. if (!tag_is_valid($tag)) continue;
  931. $result = db_query($link, "SELECT id FROM ttrss_tags
  932. WHERE tag_name = '$tag' AND post_int_id = '$entry_int_id' AND
  933. owner_uid = '$owner_uid' LIMIT 1");
  934. if ($result && db_num_rows($result) == 0) {
  935. db_query($link, "INSERT INTO ttrss_tags
  936. (owner_uid,tag_name,post_int_id)
  937. VALUES ('$owner_uid','$tag', '$entry_int_id')");
  938. }
  939. array_push($tags_to_cache, $tag);
  940. }
  941. /* update the cache */
  942. $tags_to_cache = array_unique($tags_to_cache);
  943. $tags_str = db_escape_string(join(",", $tags_to_cache));
  944. db_query($link, "UPDATE ttrss_user_entries
  945. SET tag_cache = '$tags_str' WHERE ref_id = '$entry_ref_id'
  946. AND owner_uid = $owner_uid");
  947. db_query($link, "COMMIT");
  948. }
  949. if ($debug_enabled) {
  950. _debug("update_rss_feed: article processed");
  951. }
  952. }
  953. if (!$last_updated) {
  954. if ($debug_enabled) {
  955. _debug("update_rss_feed: new feed, catching it up...");
  956. }
  957. catchup_feed($link, $feed, false, $owner_uid);
  958. }
  959. if ($debug_enabled) {
  960. _debug("purging feed...");
  961. }
  962. purge_feed($link, $feed, 0, $debug_enabled);
  963. db_query($link, "UPDATE ttrss_feeds
  964. SET last_updated = NOW(), last_error = '' WHERE id = '$feed'");
  965. // db_query($link, "COMMIT");
  966. } else {
  967. if ($use_simplepie) {
  968. $error_msg = mb_substr($rss->error(), 0, 250);
  969. } else {
  970. $error_msg = mb_substr(magpie_error(), 0, 250);
  971. }
  972. if ($debug_enabled) {
  973. _debug("update_rss_feed: error fetching feed: $error_msg");
  974. }
  975. $error_msg = db_escape_string($error_msg);
  976. db_query($link,
  977. "UPDATE ttrss_feeds SET last_error = '$error_msg',
  978. last_updated = NOW() WHERE id = '$feed'");
  979. }
  980. if ($use_simplepie) {
  981. unset($rss);
  982. }
  983. if ($debug_enabled) {
  984. _debug("update_rss_feed: done");
  985. }
  986. }
  987. function cache_images($html, $site_url, $debug) {
  988. $cache_dir = CACHE_DIR . "/images";
  989. libxml_use_internal_errors(true);
  990. $charset_hack = '<head>
  991. <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
  992. </head>';
  993. $doc = new DOMDocument();
  994. $doc->loadHTML($charset_hack . $html);
  995. $xpath = new DOMXPath($doc);
  996. $entries = $xpath->query('(//img[@src])');
  997. foreach ($entries as $entry) {
  998. if ($entry->hasAttribute('src')) {
  999. $src = rewrite_relative_url($site_url, $entry->getAttribute('src'));
  1000. $local_filename = CACHE_DIR . "/images/" . sha1($src) . ".png";
  1001. if ($debug) _debug("cache_images: downloading: $src to $local_filename");
  1002. if (!file_exists($local_filename)) {
  1003. $file_content = fetch_file_contents($src);
  1004. if ($file_content && strlen($file_content) > 1024) {
  1005. file_put_contents($local_filename, $file_content);
  1006. }
  1007. }
  1008. if (file_exists($local_filename)) {
  1009. $entry->setAttribute('src', SELF_URL_PATH . '/image.php?url=' .
  1010. base64_encode($src));
  1011. }
  1012. }
  1013. }
  1014. $node = $doc->getElementsByTagName('body')->item(0);
  1015. return $doc->saveXML($node, LIBXML_NOEMPTYTAG);
  1016. }
  1017. function expire_cached_files($debug) {
  1018. foreach (array("magpie", "simplepie", "images", "export") as $dir) {
  1019. $cache_dir = CACHE_DIR . "/$dir";
  1020. if ($debug) _debug("Expiring $cache_dir");
  1021. $num_deleted = 0;
  1022. if (is_writable($cache_dir)) {
  1023. $files = glob("$cache_dir/*");
  1024. if ($files)
  1025. foreach ($files as $file) {
  1026. if (time() - filemtime($file) > 86400*7) {
  1027. unlink($file);
  1028. ++$num_deleted;
  1029. }
  1030. }
  1031. }
  1032. if ($debug) _debug("Removed $num_deleted files.");
  1033. }
  1034. }
  1035. /**
  1036. * Source: http://www.php.net/manual/en/function.parse-url.php#104527
  1037. * Returns the url query as associative array
  1038. *
  1039. * @param string query
  1040. * @return array params
  1041. */
  1042. function convertUrlQuery($query) {
  1043. $queryParts = explode('&', $query);
  1044. $params = array();
  1045. foreach ($queryParts as $param) {
  1046. $item = explode('=', $param);
  1047. $params[$item[0]] = $item[1];
  1048. }
  1049. return $params;
  1050. }
  1051. ?>