利用正则表达式获取博客园随笔(四)

前端之家收集整理的这篇文章主要介绍了利用正则表达式获取博客园随笔(四)前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。

我们前天讲到了需要加入多线程来缓解界面卡死的现象,那现在就让我来给大家介绍一个由博客园的某位大牛写的线程池。(实在是忘了是谁了、、)

线程池的代码奉上:

  1     sealed class MyThreadPool
  2     {
  3         //线程锁对象
  4         private static object lockObj = new object();
  5         //任务队列
  6         private static Queue<ThreadStart> threadStartQueue = new Queue<ThreadStart>();
  7         //记录当前工作的任务集合,从中可以判断当前工作线程使用数,如果使用int判断的话可能会有问题,
  8         //用集合的话还能取得对象的引用,比较好
  9         private static HashSet<ThreadStart> threadsWorker = new HashSet<ThreadStart>();
 10         //当前允许最大工作线程数
 11         private static int maxThreadWorkerCount = 1;
 12         //当前允许最小工作线程数
 13         private static int minThreadWorkerCount = 0;
 14 
 15         /// <summary>
 16         /// 设定最大工作线程数
 17         /// </summary>
 18         /// <param name="maxThreadCount">数量</param>
 19         public static void SetMaxWorkThreadCount(int maxThreadCount)
 20         {
 21             maxThreadWorkerCount = minThreadWorkerCount > maxThreadCount ?
 22             minThreadWorkerCount : maxThreadCount;
 23         }
 24         /// <summary>
 25         /// 设定最小工作线程数
 26         /// </summary>
 27         /// <param name="maxThreadCount">数量</param>
 28         public static void SetMinWorkThreadCount(int minThreadCount)
 29         {
 30             minThreadWorkerCount = minThreadCount > maxThreadWorkerCount ?
 31             maxThreadWorkerCount : minThreadCount;
 32         }
 33         /// <summary>
 34         /// 启动线程池工作
 35         /// </summary>
 36         /// <param name="threadStartArray">任务数组</param>
 37         public static void MyQueueUserWorkItem(List<ThreadStart> threadStartArray)
 38         {
 39             //将任务集合都放入到线程池中
 40             AddAllThreadsToPool(threadStartArray);
 41             //线程池执行任务
 42             ExcuteTask();
 43         }
 44         /// <summary>
 45         /// 将单一任务加入队列中
 46         /// </summary>
 47         /// <param name="ts">单一任务对象</param>
 48         private static void AddThreadToQueue(ThreadStart ts)
 49         {
 50             lock (lockObj)
 51             {
 52                 threadStartQueue.Enqueue(ts);
 53             }
 54         }
 55 
 56         /// <summary>
 57         /// 将多个任务加入到线程池的任务队列中
 58         /// </summary>
 59         /// <param name="threadStartArray">多个任务</param>
 60         private static void AddAllThreadsToPool(List<ThreadStart> threadStartArray)
 61         {
 62             foreach (var threadStart in threadStartArray)
 63                 AddThreadToQueue(threadStart);
 64         }
 65 
 66         /// <summary>
 67         /// 执行任务,判断队列中的任务数量是否大于0,如果是则判断当前正在使用的工作线程的
 68         /// 数量是否大于等于允许的最大工作线程数,如果一旦有线程空闲的话
 69         /// 就会执行ExcuteTaskInQueen方法处理任务
 70         /// </summary>
 71         private static void ExcuteTask()
 72         {
 73             while (threadStartQueue.Count > 0)
 74             {
 75                 Thread.Sleep(100);
 76                 if (threadsWorker.Count < maxThreadWorkerCount)
 77                 {
 78                     ExcuteTaskInQueen();
 79                 }
 80             }
 81         }
 82 
 83         /// <summary>
 84         /// 执行出对列的任务,加锁保护
 85         /// </summary>
 86         private static void ExcuteTaskInQueen()
 87         {
 88             lock (lockObj)
 89             {
 90                 ExcuteTaskByThread(
 91 threadStartQueue.Dequeue());
 92             }
 93         }
 94 
 95         /// <summary>
 96         /// 实现细节,这里使用BackGroudWork来实现后台线程
 97         /// 注册doWork和Completed事件,当执行一个任务前,前将任务加入到
 98         /// 工作任务集合(表示工作线程少了一个空闲),一旦RunWorkerCompleted事件被触发则将任务从工作
 99         /// 任务集合中移除(表示工作线程也空闲了一个)
100         /// </summary>
101         /// <param name="threadStart"></param>
102         private static void ExcuteTaskByThread(ThreadStart threadStart)
103         {
104             threadsWorker.Add(threadStart);
105             BackgroundWorker worker = new BackgroundWorker();
106             worker.DoWork += (o,e) => { threadStart.Invoke(); };
107             worker.RunWorkerCompleted += (o,e) => { threadsWorker.Remove(threadStart); };
108             worker.RunWorkerAsync();
109         }
110     }
View Code

然后呢再奉上有所修改的和新增的方法代码

 1         public List<CnblogsResult> getResult(string Html)
 2         {
 3             List<ThreadStart> StartArray = new List<ThreadStart>();
 4             Regex regexContent = new Regex("<div class=\"post_item_body\">(?<content>.*?)<div class=\"clear\"></div>",RegexOptions.Singleline);//获取单个随笔数据
 5             if (regexContent.IsMatch(Html))
 6             {
 7                 MatchCollection blog = regexContent.Matches(Html);
 8                 int i = 1;
 9                 foreach (Match item in blog)
10                 {
11                     chuancanshu ccs = new chuancanshu();
12                     ccs.i = i++;
13                     ccs.item = item;
14                     StartArray.Add(new ThreadStart(() =>
15                         {
16                             Cnblogs(ccs);
17                         }));
18                 }
19                 MyThreadPool.SetMaxWorkThreadCount(5);//设置每次从线程池中启动10个线程
20                 MyThreadPool.MyQueueUserWorkItem(StartArray);//启动线程池
21             }
22             return results;
23         }
View Code
 1         private void  Cnblogs(object obj)
 2         {
 3             CnblogsResult result = new CnblogsResult();
 4             Regex regexProperty = new Regex("<h3><a.*?href=\"(?<href>.*?)\".*?>(?<Title>.*?)</a></h3>.*?<a .*? class=\"lightblue\".*?>(?<Author>.*?)</a>.*?发布于.*?(?<time>.*?)<span", 5           RegexOptions.Singleline);//获取标题、时间、链接、作者等
 6             chuancanshu ccs = (chuancanshu)obj;
 7             Match item = ccs.item;
 8             int i = ccs.i;
 9             if (regexProperty.IsMatch(item.ToString()))
10             {
11                 var Property = regexProperty.Match(item.ToString());
12                 result.Title = Property.Groups["Title"].Value;
13                 result.Author = Property.Groups["Author"].Value;
14                 result.time = Property.Groups["time"].Value;
15                 result.href = Property.Groups["href"].Value;
16                 result.Rank = i;
17             }
18             results.Add(result);
19             if (getResults != null)
20             {
21                 getResults(results);
22             }
23         }
View Code
1  struct chuancanshu
2         {
3             public Match item;
4             public int i;
5         }
View Code

在这里呢,我把

        List<CnblogsResult> results = new List<CnblogsResult>();

这段这个提取出来当作公共变量了。

最后,奉上本例子的源码:点这里下载

猜你在找的正则表达式相关文章