forked from blallo/rss-bridge
Resolve content links
Use some code to resolve content links to bypass shorteners...
This commit is contained in:
parent
30f339e3b2
commit
ff4ccf985f
1 changed files with 55 additions and 14 deletions
|
@ -37,6 +37,39 @@ class TwitterBridgeTweaked extends BridgeAbstract{
|
|||
return implode(' ', $U);
|
||||
}
|
||||
|
||||
// (c) Kraoc / urlclean
|
||||
// https://github.com/kraoc/Leed-market/blob/master/urlclean/urlclean.plugin.disabled.php
|
||||
private function resolve_url($link) {
|
||||
// fallback to crawl to real url (slowest method and unsecure to privacy)
|
||||
if (function_exists('curl_init') && !ini_get('safe_mode')) {
|
||||
curl_setopt($ch, CURLOPT_USERAGENT, $ua);
|
||||
curl_setopt($ch, CURLOPT_URL, $link);
|
||||
curl_setopt($ch, CURLOPT_HEADER, true);
|
||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||
// >>> anonimization
|
||||
curl_setopt($ch, CURLOPT_COOKIESESSION, true);
|
||||
curl_setopt($ch, CURLOPT_REFERER, '');
|
||||
// <<< anonimization
|
||||
$ch = curl_init();
|
||||
$ua = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.16 (KHTML, like Gecko) Chrome/24.0.1304.0 Safari/537.16';
|
||||
$a = curl_exec($ch);
|
||||
$link = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
|
||||
}
|
||||
|
||||
$link = preg_replace("/[&#?]xtor=(.)+/", "", $link); // remove: xtor
|
||||
$link = preg_replace("/utm_([^&#]|(&))+&*/", "", $link); // remove: utm_
|
||||
|
||||
// cleanup end of url
|
||||
$link = preg_replace("/\?&/", "", $link);
|
||||
if (isset($link[strlen($link) -1])){
|
||||
if ($link[strlen($link) -1] == '?')
|
||||
$link = substr($link, 0, strlen($link) -1);
|
||||
}
|
||||
|
||||
return $link;
|
||||
}
|
||||
|
||||
public function collectData(array $param){
|
||||
$html = '';
|
||||
if (isset($param['q'])) { /* keyword search mode */
|
||||
|
@ -88,6 +121,14 @@ class TwitterBridgeTweaked extends BridgeAbstract{
|
|||
$item->title = preg_replace('|www\.[a-z\.0-9]+|i', '', $item->title); // remove www. links
|
||||
$item->title = $this->cleaner($item->title); // remove all remaining links
|
||||
$item->title = trim($item->title); // remove extra spaces at beginning and end
|
||||
|
||||
// convert all content links to real ones
|
||||
$regex = "/(http|https|ftp|ftps)\:\/\/[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(\/\S*)?/";
|
||||
$item->content = preg_replace_callback($regex, function($url) {
|
||||
// do stuff with $url[0] here
|
||||
return $this->resolve_url($url[0]);
|
||||
}, $item->content);
|
||||
|
||||
// put out
|
||||
$this->items[] = $item;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue