public class Test { public static void main(String[] args) { //case1:除去“.”和重复出现的字符 String line= "我我我..我要..要要要.....学学学学学...编编编....程程程程程程"; line = line.replaceAll("\\.",""); System.out.println(line.replaceAll("(.)\\1+","$1")); //case2:对IP地址进行排序 String ip = "192.168.1.254 102.49.23.13 10.10.10.10 2.2.2.2 8.109.90.30"; ip = ip.replaceAll("(\\d+)","00$1"); ip = ip.replaceAll("0*(\\d{3})","$1"); System.out.println("排序前:"+ip); /*....排序过程已被省略...*/ ip = ip.replaceAll("0*(\\d+)","$1"); System.out.println("排序后:"+ip); } }
2,多行匹配html
Matcher matcher = Pattern.compile("<div\\s*class=\\s*\"area\"\\s*id=\\s*\"area2\"\\s*>\\s*<h2>.*</h2>" + "\\s*<ul>\\s*<li>(?<re>.*?)?</li>\\s*</ul>\\s*</div>",Pattern.DOTALL).matcher(text);,
3,常用场景
非中文: [^\u4E00-\u9FA5]+ 提取网页content: <Meta ([^>]*?)(keywords|description|KEYWORDS|DESCRIPTION).*?content="?(?<content>[^">]+) 去除标签: <title[^>]*>.*?</title>|<script[^>]*>.*?</script>|<style[^>]*>.*?</style>|<!--.*?-->|&(#*)\w+;|\s+|<[^>]*> 提取网页编码: <Meta([^>]*?)charset\s*=\s*(?<charset>[^">]+)