php清除word 没用的标签,并保留常见标签
- function ClearHtml1($content, $allowtags = '<p><span><strong><br/><br><div><ul><li><a><ol><i>')
- {
- mb_regex_encoding('UTF-8');
- // replace MS special characters first
- $search = array(
- '/‘/u',
- '/’/u',
- '/“/u',
- '/”/u',
- '/—/u'
- );
- $replace = array(
- '\'',
- '\'',
- '"',
- '"',
- '-'
- );
- $content = preg_replace($search, $replace, $content);
- // make sure _all_ html entities are converted to the plain ascii equivalents - it appears
- // in some MS headers, some html entities are encoded and some aren't
- $content = html_entity_decode($content, ENT_QUOTES, 'UTF-8');
- // try to strip out any C style comments first, since these, embedded in html comments, seem to
- // prevent strip_tags from removing html comments (MS Word introduced combination)
- if (mb_stripos($content, '/*') !== FALSE) {
- $content = mb_eregi_replace('#/\*.*?\*/#s', '', $content, 'm');
- }
- // introduce a space into any arithmetic expressions that could be caught by strip_tags so that they won't be
- // '<1' becomes '< 1'(note: somewhat application specific)
- $content = preg_replace(array(
- '/<([0-9]+)/'
- ), array(
- '< $1'
- ), $content);
-
- $content = strip_tags($content, $allowtags);
- // eliminate extraneous whitespace from start and end of line, or anywhere there are two or more spaces, convert it to one
- $content = preg_replace(array(
- '/^\s\s+/',
- '/\s\s+$/',
- '/\s\s+/u'
- ), array(
- '',
- '',
- ' '
- ), $content);
- // strip out inline css and simplify style tags
- $search = array(
- '#<(strong|b)[^>]*>(.*?)</(strong|b)>#isu',
- '#<(em|i)[^>]*>(.*?)</(em|i)>#isu',
- '#<u[^>]*>(.*?)</u>#isu'
- );
- $replace = array(
- '<b>$2</b>',
- '<i>$2</i>',
- '<u>$1</u>'
- );
- $content = preg_replace($search, $replace, $content);
-
- // on some of the ?newer MS Word exports, where you get conditionals of the form 'if gte mso 9', etc., it appears
- // that whatever is in one of the html comments prevents strip_tags from eradicating the html comment that contains
- // some MS Style Definitions - this last bit gets rid of any leftover comments */
- $num_matches = preg_match_all("/\<!--/u", $content, $matches);
- if ($num_matches) {
- $content = preg_replace('/\<!--(.)*--\>/isu', '', $content);
- }
- return $content;
- }
-
解压密码: detechn或detechn.com
免责声明
本站所有资源出自互联网收集整理,本站不参与制作,如果侵犯了您的合法权益,请联系本站我们会及时删除。
本站发布资源来源于互联网,可能存在水印或者引流等信息,请用户自行鉴别,做一个有主见和判断力的用户。
本站资源仅供研究、学习交流之用,若使用商业用途,请购买正版授权,否则产生的一切后果将由下载用户自行承担。