前端之家收集整理的这篇文章主要介绍了
正则表达式,模拟网络爬虫小例子,
前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。
package cn.zhengze;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class netbug {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
File file = new File("mail.html");
String regex = "\\w+@[a-zA-Z0-9]+(\\.[a-zA-Z]{2,3}){1,3}";
List<String> mailList = getMails(file,regex);
for (String mail : mailList) {
System.out.println(mail);
}
}
private static List<String> getMails(File file,String regex)
throws IOException {
BufferedReader bufr = new BufferedReader(new FileReader(file));
Pattern p = Pattern.compile(regex);
List<String> list = new ArrayList<String>();
String line = null;
while ((line = bufr.readLine()) != null) {
Matcher m = p.matcher(line);
while (m.find()) {
list.add(m.group());
}
}
return list;
}
}