01<?php 02 03// $document should contain an HTML document. 04// This will remove HTML tags, javascript sections 05// and white space. It will also convert some 06// common HTML entities to their text equivalent. 07 08$search=array("'<script[^>]*?>.*?</script>'si",// Strip out javascript 09"'<[\/\!]*?[^<>]*?>'si",// Strip out HTML tags 10"'([\r\n])[\s]+'",// Strip out white space 11"'&(quot|#34);'i",// Replace HTML entities 12"'&(amp|#38);'i", 13"'&(lt|#60);'i", 14"'&(gt|#62);'i", 15"'&(nbsp|#160);'i", 16"'&(iexcl|#161);'i", 17"'&(cent|#162);'i", 18"'&(pound|#163);'i", 19"'&(copy|#169);'i", 20"'&#(\d+);'e");// evaluate as php 21 22$replace=array("", 23"", 24"\\1", 25"\"", 26"&", 27"<", 28">", 29" ", 30chr(161), 31chr(162), 32chr(163), 33chr(169), 34chr(1)); 35 36$text=preg_replace($search,$replace,$document); 37 38?>