Refined “strip” function a bit again (added some regex, modified a bit some others)

This commit is contained in:
pezcurrel 2024-08-25 19:55:58 +02:00
parent 1558507795
commit c1f0e8779e

View file

@ -965,21 +965,21 @@ function nullyp($str) {
function strip($str,$uri) {
if (nullemp($str)) return(null);
//echo '<pre>'.htmlentities($str).'</pre>';
//echo '<pre>'.$str.'</pre>';
$str=preg_replace(['#^\s*#m','#[\r\n]#'],['',' '],$str);// strip all spaces from empty lines, then all carriage return and new line chars
$str=preg_replace('#<br\s+/>#i','<br>',$str);// convert all "<br />" to "<br>"
$str=preg_replace('#<br\s*/>#i','<br>',$str);// convert all "<br />" to "<br>"
$str=preg_replace('#<a\s+[^>]*></a>#i','',$str);
$str=preg_replace('#<a\s+[^>]*href="(?![a-zA-Z]+://)([^"]+)#i','<a href="https://'.$uri.'$1',$str);// if an href is not preceded by a protocol scheme, it's relative ...
$str=str_ireplace('</a><a','</a> <a',$str);// always put at least one space between links
//$str=preg_replace(['#</p><br>#i','#</li><br>#i','#</ul><br>#i','#<ul><br>#i'],['</p>','</li>','</ul>','<ul>'],$str);
$str=preg_replace(['#<b>#i','#</b>#i','#<i>#i','#</i>#i'],['<strong>','</strong>','<em>','</em>'],$str);
$str=preg_replace('#<p[^>]*>\s*</p>#is','',$str);
$str=preg_replace(['#\s*(&nbsp;)*\s*</p>#is','#<p[^>]*>\s*</p>#is'],['</p>',''],$str);
$str=preg_replace(['#<p[^>]*>(.*)</p>#Ui','#<div[^>]>(.*)</div>#Ui'],['<br><br>$1<br><br>','<br><br>$1<br><br>'],$str);
$str=strip_tags($str,'<a><br><ol><ul><li><strong><em><small><img><h1><h2><h3><h4><h5><h6>');
$str=preg_replace('#<([^>]*)\s(style|class)="[^"]*"([^>]*)>#i','<$1$3>',$str);
$str=preg_replace(['#<h[1-6][^>]*>#i','#</h[1-6]>#i'],['<br><br><span class="exh">','</span><br><br>'],$str);
$str=preg_replace(['#<([ou])l[^>]*>#i','#</([ou])l>#i'],['<br><br><$1l class="nobott">','</$1l><br><br>'],$str);
$str=preg_replace(['#<br>\s+<br>#i','#^(<br>)+#i','#(<br>){3,}#i','#(<br>)+$#i','#</([ou])l>\s*(<br>)+#i','#</li>\s*(<br>)+#i'],['<br><br>','','<br><br>','','</$1l><br>','</li><br>'],$str);
$str=preg_replace(['#\s+$#','#<br>\s+<br>#i','#^(<br>)+#i','#(<br>){3,}#i','#(<br>)+$#i','#</([ou])l>\s*(<br>)+#i','#</li>\s*(<br>)+#i'],['','<br><br>','','<br><br>','','</$1l><br>','</li><br>'],$str);
// this part below is to try and assign the css "nobott" css class to a possible closing <ol>/<ul>, to avoid the useless and UGLY last bottom-margin :-)
$str=preg_replace(['#<ol[^>]*>(.*)</ol>$#i','#<ul[^>]*>(.*)</ul>$#i'],['<ol class="nobott">$1</ol>','<ul class="nobott">$1</ul>'],$str);
return $str;