重点是先抓取<td>里的内容,其它的就好说了:
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.IO; using System.Text.RegularExpressions; namespace ReadFile { class Program { static void Main(string[] args) { string FilePath = @"E:\vs\bd.txt"; StreamReader sr = new StreamReader(FilePath,Encoding.GetEncoding("UTF-8")); string FileContent = sr.ReadToEnd(); FileContent = Regex.Replace(FileContent,"[\r\n\t]+",""); string PatternTable = "(?i)<table class=\"result\"[^>]*?>(?:(?!</?table>)[\\s\\S])*?</table>"; string PatternBlock = "<td class=\"c-default\" >(?:(?!</?td>)[\\s\\S])*?</td>"; MatchCollection mc = Regex.Matches(FileContent,PatternBlock,RegexOptions.Multiline); Match mm = Regex.Match(FileContent,RegexOptions.Multiline); //Debug.WriteLine(mm.Groups.Count); foreach (Match mat in mc) { Console.WriteLine("================================================"); Console.WriteLine(mat.Value); } Console.Read(); } } }