前端之家收集整理的这篇文章主要介绍了
利用正则表达式实现去除所有HTML标签代码,
前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。
protected string str = "<table><tr><td>sdasasdsdd</td></tr></table><br><p>sds</p><img id='img1' src='http://www.zhixing123.cn/uploads/allimg/110330/1104201G0-0.gif' width='100' height='50' alt=''>aaassss<br><img src='http://www.zhixing123.cn/uploads/allimg/110330/1104201G0-0.gif' width='100' height='50' alt=''> 说是道 ";
protected void Page_Load(object sender,EventArgs e)
{
//string regexstr = @"<[^>]*>"; //去除所有的标签
//@"<script[^>]*?>.*?</script >" //去除所有脚本,中间部分也删除
// string regexstr = @"<img[^>]*>"; //去除图片的正则
// string regexstr = @"<(?!br).*?>"; //去除所有标签,只剩br
// string regexstr = @"<table[^>]*?>.*?</table>"; //去除table里面的所有内容
string regexstr = @"<(?!img|br|p|/p).*?>"; //去除所有标签,只剩img,br,p
str = Regex.Replace(str,regexstr,string.Empty,RegexOptions.IgnoreCase);
}
asp中正则表达式去除HTML标记(窃自eWebEditor)
2009年12月31日 星期四 下午 12:40
function ExecReg(re,content)
Dim myRegExp,ResultString
Set myRegExp = New RegExp
myRegExp.Global = True
myRegExp.Pattern = re
ResultString = myRegExp.Replace(content,"" )
ExecReg = ResultString
end function
function DecodeFilter(html)
html = LCase (html)
' 去除所有客户端脚本javascipt,vbscript,jscript,js,vbs,event,html = ExecReg( " </?script[^>]*> ",html)
html = ExecReg( " (javascript|jscript|vbscript|vbs): ",html)
html = ExecReg( " on(mouse|exit|error|click|key) ",html)
html = ExecReg( " &# ",html)
' 去除表格<table><tr><td><th><a><p><img><div>
html = ExecReg( " </?table[^>]*> ",html)
html = ExecReg( " </?tr[^>]*> ",html)
html = ExecReg( " </?th[^>]*> ",html)
html = ExecReg( " </?td[^>]*> ",html)
html = ExecReg( " </?a[^>]*> ",html)
html = ExecReg( " </?p[^>]*> ",html)
html = ExecReg( " </?img[^>]*> ",html)
html = ExecReg( " </?div[^>]*> ",html)
html = ExecReg( " </?ul[^>]*> ",html)
html = ExecReg( " </?li[^>]*> ",html)
html = ExecReg( " </?tbody[^>]*> ",html)
html = ExecReg( " </?h1[^>]*> ",html)
html = ExecReg( " </?h2[^>]*> ",html)
html = ExecReg( " </?h3[^>]*> ",html)
html = ExecReg( " </?h4[^>]*> ",html)
html = ExecReg( " </?h5[^>]*> ",html)
html = ExecReg( " </?h6[^>]*> ",html)
html = ExecReg( " </?b[^>]*> ",html)
html = ExecReg( " </?strong[^>]*> ",html)
' 去除样式类class=""
html = ExecReg( " (<[^>]+) class=[^ |^>]*([^>]*>) ",html)
' 去除样式style=""
html = ExecReg( " (<[^>]+) style=""[^""]*""([^>]*>) ",html)
' 去除XML<?xml>
html = ExecReg( " <\?xml[^>]*> ",html)
' 去除命名空间<o:p></o:p>
html = ExecReg( " </?[a-z]+:[^>]*> ",html)
' 去除字体<font></font>
html = ExecReg( " </?font[^>]*> ",html)
' 去除字幕<marquee></marquee>
html = ExecReg( " </?marquee[^>]*> ",html)
' 去除对象<object><param><embed></object>
html = ExecReg( " </?object[^>]*> ",html)
html = ExecReg( " </?param[^>]*> ",html)
html = ExecReg( " </?embed[^>]*> ",html)
DecodeFilter = html
end function
Function RemoveHTML(strText)
Dim RegEx
Set RegEx = New RegExp
RegEx.Pattern = "<[^>]*>"
RegEx.Global = True
RemoveHTML = RegEx.Replace(strText,"")
End Function
function nohtml(str)
dim re
Set re=new RegExp
re.IgnoreCase =true
re.Global=True
re.Pattern="(\<.[^\<]*\>)"
str=re.replace(str," ")
re.Pattern="(\<\/[^\<]*\>)"
str=re.replace(str," ")
str=replace(str," ","")
str=replace(str,"")
nohtml=str
set re=nothing
end function
注:java中 "html内容".replaceAll("<[^>]*>","")