日志分析代码实现(正则表达式)
importdatetime
importre
logline='''183.60.212.153--[19/Feb/2013:10:23:29+0800]\"GET/o2o/media.html?menu=3HTTP/1.1"20016691"-"\"Mozilla/5.0(compatible;EasouSpider;+http://www.easou.com/search/spider.html)"'''#使用正则表达式的命名分组,可以直接根据分组名和对应匹配字段#进行对应生成新字典deflog_clean(line:str): pattern='''(?P<remote>([\d\.]{7,}))--\[(?P<time>[^\[\]]+)\]\"(?P<request>[^"]+)\"(?P<status>\d{3})(?P<size>\d+)\"-\"\"(?P<useragent>[^"]+)\"\s?''' regex=re.compile(pattern) matcher=regex.fullmatch(line) ifmatcher: #None时,异常处理 return{k:operations.get(k,lambdax:x)(v)fork,vinmatcher.groupdict().items()}operations={ 'time':lambdatime:datetime.datetime.strptime(time,'%d/%b/%Y:%H:%M:%S%z'),'request':lambdarequest:dict(zip(('method','url','protocol'),request.split())),'status':int,'size':int}print(log_clean(logline))