php清除word 没用的标签,并保留常见标签

本文阅读 0 分钟
首页 PHP笔记 正文
  1. function ClearHtml1($content, $allowtags = '<p><span><strong><br/><br><div><ul><li><a><ol><i>')
  2. {
  3. mb_regex_encoding('UTF-8');
  4. // replace MS special characters first
  5. $search = array(
  6. '/‘/u',
  7. '/’/u',
  8. '/“/u',
  9. '/”/u',
  10. '/—/u'
  11. );
  12. $replace = array(
  13. '\'',
  14. '\'',
  15. '"',
  16. '"',
  17. '-'
  18. );
  19. $content = preg_replace($search, $replace, $content);
  20. // make sure _all_ html entities are converted to the plain ascii equivalents - it appears
  21. // in some MS headers, some html entities are encoded and some aren't
  22. $content = html_entity_decode($content, ENT_QUOTES, 'UTF-8');
  23. // try to strip out any C style comments first, since these, embedded in html comments, seem to
  24. // prevent strip_tags from removing html comments (MS Word introduced combination)
  25. if (mb_stripos($content, '/*') !== FALSE) {
  26. $content = mb_eregi_replace('#/\*.*?\*/#s', '', $content, 'm');
  27. }
  28. // introduce a space into any arithmetic expressions that could be caught by strip_tags so that they won't be
  29. // '<1' becomes '< 1'(note: somewhat application specific)
  30. $content = preg_replace(array(
  31. '/<([0-9]+)/'
  32. ), array(
  33. '< $1'
  34. ), $content);
  35. $content = strip_tags($content, $allowtags);
  36. // eliminate extraneous whitespace from start and end of line, or anywhere there are two or more spaces, convert it to one
  37. $content = preg_replace(array(
  38. '/^\s\s+/',
  39. '/\s\s+$/',
  40. '/\s\s+/u'
  41. ), array(
  42. '',
  43. '',
  44. ' '
  45. ), $content);
  46. // strip out inline css and simplify style tags
  47. $search = array(
  48. '#<(strong|b)[^>]*>(.*?)</(strong|b)>#isu',
  49. '#<(em|i)[^>]*>(.*?)</(em|i)>#isu',
  50. '#<u[^>]*>(.*?)</u>#isu'
  51. );
  52. $replace = array(
  53. '<b>$2</b>',
  54. '<i>$2</i>',
  55. '<u>$1</u>'
  56. );
  57. $content = preg_replace($search, $replace, $content);
  58. // on some of the ?newer MS Word exports, where you get conditionals of the form 'if gte mso 9', etc., it appears
  59. // that whatever is in one of the html comments prevents strip_tags from eradicating the html comment that contains
  60. // some MS Style Definitions - this last bit gets rid of any leftover comments */
  61. $num_matches = preg_match_all("/\<!--/u", $content, $matches);
  62. if ($num_matches) {
  63. $content = preg_replace('/\<!--(.)*--\>/isu', '', $content);
  64. }
  65. return $content;
  66. }
解压密码: detechn或detechn.com

免责声明

本站所有资源出自互联网收集整理,本站不参与制作,如果侵犯了您的合法权益,请联系本站我们会及时删除。

本站发布资源来源于互联网,可能存在水印或者引流等信息,请用户自行鉴别,做一个有主见和判断力的用户。

本站资源仅供研究、学习交流之用,若使用商业用途,请购买正版授权,否则产生的一切后果将由下载用户自行承担。

php二分查找算法(折半查找算法)
« 上一篇 08-21
php 如何将数组转JSON
下一篇 » 08-21

发表评论