Demo源码如下:
Demo下载地址:http://download.csdn.net/detail/zxcvbnm32123/5830571
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.IO; using System.Text.RegularExpressions; namespace _12提取html中的所有的Email地址 { class Program { static void Main(string[] args) { string html = File.ReadAllText("1.htm"); //提取Email //通过()提取组,正则表达式如下 string regEmail = @"([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9]+)(\.[a-zA-Z0-9])+"; MatchCollection mc = Regex.Matches(html,regEmail); //请统计出常用邮件服务提供商的用户使用。 //163 //126 //sohu //gmail //qq //sina //yahoo //hotmail int count_163 = 0; int count_126 = 0; int count_gmail = 0; int count_qq = 0; int count_sohu = 0; int count_sina = 0; int count_yahoo = 0; int count_hotmail = 0; foreach (Match match in mc) { #region MyRegion //match.Groups[0].Value中存储的值遇match.Value中存储的值是一样的 //表示提取到的Email的完整字符串 //match.Value Console.WriteLine(match.Value); switch (match.Groups[2].Value) { //default: } Console.WriteLine(match.Groups[0].Value);//0:完整邮箱名 Console.WriteLine(match.Groups[1].Value);//:1:用户名 Console.WriteLine(match.Groups[2].Value);//:2:域名 Console.WriteLine(match.Groups[3].Value);//:3:组织名 #endregion Console.WriteLine(match.Value);//输出所有邮箱地址 //通过match.Groups[]来获取提取组。注意:第0组存储的是完整匹配字符串,要获取组因该从索引1开始。 switch (match.Groups[2].Value.ToLower()) { case "163": count_163++; break; case "126": count_126++; break; case "gmail": count_gmail++; break; case "qq": count_qq++; break; case "sohu": count_sohu++; break; case "sina": count_sina++; break; case "yahoo": count_yahoo++; break; case "hotmail": count_hotmail++; break; } } Console.WriteLine("=============统计信息============"); Console.WriteLine("邮箱总数:{0}",mc.Count); Console.WriteLine("网易163邮箱用户数:{0}",count_163); Console.WriteLine("网易126邮箱用户数:{0}",count_126); Console.WriteLine("gmail邮箱用户数:{0}",count_gmail); Console.WriteLine("QQ邮箱用户数:{0}",count_qq); Console.WriteLine("sohu邮箱用户数:{0}",count_sohu); Console.WriteLine("sina邮箱用户数:{0}",count_sina); Console.WriteLine("yahoo邮箱用户数:{0}",count_yahoo); Console.WriteLine("hotmail邮箱用户数:{0}",count_hotmail); Console.ReadKey(); } } }
“1.htm”截图如下:
通过调试控制,查看获取的全部网页字符串,截图如下:
输出结果如下:
Demo下载地址:http://download.csdn.net/detail/zxcvbnm32123/5830571
原文链接:https://www.f2er.com/regex/362718.html