strip html and javascript from string.

convert html” makes use of the built-in PHP functions chr( ) and preg_replace( ).

01 <?php
02 
03 // $document should contain an HTML document.
04 // This will remove HTML tags, javascript sections
05 // and white space. It will also convert some
06 // common HTML entities to their text equivalent.
07 
08 $search = array("'<script[^>]*?>.*?</script>'si",  // Strip out javascript
09         "'<[\/\!]*?[^<>]*?>'si",          // Strip out HTML tags
10         "'([\r\n])[\s]+'",                // Strip out white space
11         "'&(quot|#34);'i",                // Replace HTML entities
12         "'&(amp|#38);'i",
13         "'&(lt|#60);'i",
14         "'&(gt|#62);'i",
15         "'&(nbsp|#160);'i",
16         "'&(iexcl|#161);'i",
17         "'&(cent|#162);'i",
18         "'&(pound|#163);'i",
19         "'&(copy|#169);'i",
20         "'&#(\d+);'e");           // evaluate as php
21 
22 $replace = array ("",
23         "",
24         "\\1",
25         "\"",
26         "&",
27         "<",
28         ">",
29         " ",
30         chr(161),
31         chr(162),
32         chr(163),
33         chr(169),
34         chr(\\1));
35  
36 $text = preg_replace($search, $replace, $document);
37  
38 ?>