动态正则匹配

前端之家收集整理的这篇文章主要介绍了动态正则匹配前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。

需求:

1、写一个动态正则;

2、只要写出日志的Schma就可以获取到日志的正则。

package com.donews.util

import java.util.regex.Pattern

import scala.collection.mutable.ArrayBuffer

/**
  * Created by yuhui on 2016/8/5.
  */

/***
列子:       www.donews.com 123.125.71.72 - - [28/Nov/2016:11:08:50 +0800] "GET /media/201408/2834414.shtm HTTP/1.1" 200 11296 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" China 22 Beijing
第一版本    "$domain $ip - $remote_user [$timestamp] \"$http_url\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" $country $region $city"

例子 :      www.donews.com 123.125.71.72 - - [28/Nov/2016:11:08:50 +0800] "GET /media/201408/2834414.shtm HTTP/1.1" 200 11296 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" "-" "China" "22" "Beijing"
第二版本    "$domain $ip - $remote_user [$timestamp] \"$http_url\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$e_ip\" \"$country\" \"$region\" \"$city\""

例子 :     www.donews.com 123.125.71.72 - - [28/Nov/2016:11:08:50 +0800] "GET /media/201408/2834414.shtm HTTP/1.1" "http://www.donews.com/media/201408/2834414.shtm" 200 11296 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" "-" "China" "22" "Beijing"
第三版本    $domain $http_x_forwarded_for - $remote_user [$timestamp] "$http_url" "$url" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$e_ip" "$country" "$region" "$city"
 */


object DynamicRegex{

  var cmd = ""

  var regex =""

  def tran(cmd: String): String = {
    val sb = new StringBuffer()
    sb.append("^")
    val regex = "^(\\W+)$"
    val p = Pattern.compile(regex)
    cmd.split(" ").foreach(key =>
      if (!p.matcher(key).find()) {
        key.substring(0,key.indexOf("$"))
        match {
          case "" =>
            if (key.split("\\$").length > 2) {
              var split = ""
              val regex = "(\\$\\w+)(\\W+)(\\$\\w+)(.*)"
              val p = Pattern.compile(regex)
              val m = p.matcher(key)
              while (m.find()) {
                split = m.group(2)
              }
              sb.append("(")
              for (i <- Range(0,key.split("\\$").length - 1,1)) {
                if (i < key.split("\\$").length - 2) {
                  sb.append("[\\S]+[" + split + "]")
                } else {
                  sb.append("[\\S]+")
                }
              }
              sb.append(")\\s")
            } else {
              sb.append("([\\S]+)\\s")
            }
          case _ =>
            val regex = "(\\W+)(\\$\\w+)(\\W+)"
            val p = Pattern.compile(regex)
            val m = p.matcher(key)
            if (m.find) {
              val pre = m.group(1)
              val end = m.group(3)
              sb.append("(" + escape(pre) + ".+" + escape(end) + ")\\s")
            }
        }
      }else{
        sb.append("(\\W+)\\s")
      }
    )
    val str = sb.toString
    str.substring(0,str.length - 2).concat("$")
  }

  def escape(original: String): String = {
    val tb = new StringBuffer()
    for (i <- Range(0,original.length(),1)) {
      if ("\"".equals(original.charAt(i).toString)) {
      } else {
        tb.append("\\")
      }
      tb.append(original.charAt(i))
    }
    tb.toString
  }

  def lineToGroup(line: String): ArrayBuffer[String] = {
    val groups = ArrayBuffer[String]()
    val p = Pattern.compile(regex)
    val m = p.matcher(line)
    while (m.find()) {
      for (i <- Range(1,m.groupCount() + 1,1)) {
        groups.append(m.group(i))
      }
    }
    groups
  }

  def main(args: Array[String]): Unit = {

    cmd = "$domain $http_x_forwarded_for - $remote_user [$timestamp] \"$http_url\" \"$url\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$e_ip\" \"$country\" \"$region\" \"$city\""
    regex=tran(cmd)
    println(regex)
    val log = "www.donews.com 123.125.71.72 - - [28/Nov/2016:11:08:50 +0800] \"GET /media/201408/2834414.shtm HTTP/1.1\" \"http://www.donews.com/media/201408/2834414.shtm\" 200 11296 \"-\" \"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)\" \"-\" \"China\" \"22\" \"Beijing\""
    lineToGroup(log).foreach(x=>println(x))

  }
}

输出结果:

^([\S]+)\s([\S]+)\s(\W+)\s([\S]+)\s(\[.+\])\s(".+")\s(".+")\s([\S]+)\s([\S]+)\s(".+")\s(".+")\s(".+")\s(".+")\s(".+")\s(".+")$ www.donews.com 123.125.71.72 - - [28/Nov/2016:11:08:50 +0800] "GET /media/201408/2834414.shtm HTTP/1.1" "http://www.donews.com/media/201408/2834414.shtm" 200 11296 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" "-" "China" "22" "Beijing"

猜你在找的正则表达式相关文章