py正则分析日志 自己用的

前端之家收集整理的这篇文章主要介绍了py正则分析日志 自己用的前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。

日后在做整理

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Created by weilai on 2016/12/21
# 自动分析火车票发送和接受日志工具,wl 201611221
# 用于分析火车票发送和收通知内容 和时差
# v0.1


import os
import datetime
import time
import json
import re


# 定义基础目录
path = 'X:\\AccDetail'
send_log_base_path = 'X:\\InteractionDetail'
receive_log_base_path = 'X:\\InteractionDetail'

now = datetime.datetime.now()
today_dir = '20161221' #now.strftime('%Y%m%d')
# 定义基础目录和时间目录组合
work_dir = path + '\\' + today_dir
send_log_path = send_log_base_path + '\\' + today_dir
receive_log_path = receive_log_base_path + '\\' + today_dir
# 定义发送,接收,结果集合
book_list = []
receive_list = []
ticket_list = []

pattern_log_body = r'\[BEGIN\]([\s\S.]*?)\[END\]'
pattern_log_time = r'\[L_B\]时间戳:(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3})\[L_E\]'
pattern_log_run_time = r'\[L_B\]时间间隔:(\d{7}\.\d{3})\[L_E\]'
pattern_log_func_name = r'\[L_B\](自定义消息:book)\[L_E\]'
pattern_log_send_content = r'\[L_B\]发送内容:j=([\s\S.]*?)\[L_E\]'
pattern_log_receive_content = r'\[L_B\]接收内容:([\s\S.]*?)\[L_E\]'

pattern_log_notice_func_name = r'\[L_B\](自定义消息:预定结果通知)\[L_E\]'
pattern_log_notice_send_content = r'\[L_B\]发送内容:([\s\S.]*?)\[L_E\]'
pattern_log_notice_receive_content = r'\[L_B\]接收内容:j=([\s\S.]*?)\[L_E\]'


class InteractiveInfo:
    """初级分析结果类"""
    log_time = ''
    log_name = ''
    log_run_time = ''
    log_param = ''
    log_result = ''
    flow_id = ''
    order_id = ''
    out_order_id = ''
    msg = ''
    log_file = ''
    valid_data = False
    train = ''
    passager = ''
    ticket = ''


class TicketInfo:
    """最后分析结果类"""
    ticket_order_id = ''
    ticket_out_order_id = ''
    ticket_flow_id = ''
    ticket_send_time = ''
    ticket_receive_time = ''
    time_diff = 0
    receive_times = 0
    info_from_file = ''
    msg = ''
    source_send_data = None
    source_receive_data = None

# ------------------------------------------------


def regex_text(pattern_str,txt):
    pattern = re.compile(pattern_str,re.M)
    match = pattern.findall(txt)
    if match:
        return match
    return None


def regex_log_body(txt):
    return regex_text(pattern_log_body,txt)


def regex_log_time(txt):
    return regex_text(pattern_log_time,txt)


def regex_log_run_time(txt):
    return regex_text(pattern_log_run_time,txt)


def regex_log_func_name(txt):
    return regex_text(pattern_log_func_name,txt)


def regex_log_send_content(txt):
    return regex_text(pattern_log_send_content,txt)


def regex_log_receive_content(txt):
    return regex_text(pattern_log_receive_content,txt)


def regex_log_notice_func_name(txt):
    return regex_text(pattern_log_notice_func_name,txt)


def regex_log_notice_receive_content(txt):
    return regex_text(pattern_log_notice_receive_content,txt)
# ----------------------------------------------


def analyse_send_file(log_file_path):
    with open(log_file_path,'r',encoding='utf-8') as f:
        print(log_file_path)
        log_file_str = f.read()
        logs = regex_log_body(log_file_str)
        for log in logs:
            log_item = log
            log_name = regex_log_func_name(log_item)
            if log_name is not None:
                send_log = InteractiveInfo()
                send_log.log_name = log_name[0]
                send_log.log_file = log_file_path
                # print(log_name[0])
                log_time = regex_log_time(log_item)
                send_log.log_time = log_time[0]
                # print(log_time[0])
                log_run_time = regex_log_run_time(log_item)
                send_log.log_run_time = log_run_time[0]
                # print(log_run_time[0])
                send_content = regex_log_send_content(log_item)
                get_send_json(send_content[0],send_log)
                # print(send_content[0])
                receive_content = regex_log_receive_content(log_item)
                get_send_result_json(receive_content[0],send_log)
                # print(receive_content[0])
                book_list.append(send_log)
# ---------------------------------------------------


def get_send_result_json(json_str,send_obj):
    json_val = json.loads(json_str)
    send_obj.flow_id = str(json_val['i'])
    send_obj.msg += json_val['m'] + "|"


def get_send_json(json_str,send_obj):
    json_val = json.loads(json_str)
    send_obj.order_id = json_val['d']['n']
    x1 = lambda x: x['pn']
    send_obj.passager = ','.join([x1(y) for y in json_val['d']['p']])
    send_obj.train = '{0}|{1}|{2}|{3}'.format(json_val['d']['c'],json_val['d']['d'],json_val['d']['fn'],json_val['d']['tn'])
    # print(send_obj.train)
# -----------------------------------------------


def analyse_receive_file(log_file_path):
    with open(log_file_path,encoding='utf-8') as f:
        print(log_file_path)
        log_file_str = f.read()
        logs = regex_log_body(log_file_str)
        for log in logs:
            log_item = log
            log_name = regex_log_notice_func_name(log_item)
            if log_name is not None:
                receive_log = InteractiveInfo()
                receive_log.log_name = log_name[0]
                # print(receive_log.log_name)
                receive_log.log_file = log_file_path
                # print(log_name[0])
                log_time = regex_log_time(log_item)
                receive_log.log_time = log_time[0]
                # print(log_time[0])
                log_run_time = regex_log_run_time(log_item)
                receive_log.log_run_time = log_run_time[0]
                # print(log_run_time[0])
                # 无需这两句 不分解发送内容
                # send_content = regex_log_send_content(log_item)
                # get_receive_json(send_content[0],receive_log)
                # print(send_content[0])
                receive_content = regex_log_notice_receive_content(log_item)
                get_receive_result_json(receive_content[0],receive_log)
                # print(receive_content[0])
                receive_list.append(receive_log)


def get_receive_result_json(json_str,receive_obj):
    json_val = json.loads(json_str)
    receive_obj.flow_id = str(json_val['i'])
    receive_obj.out_order_id = str(json_val['d']['i'])
    receive_obj.order_id = str(json_val['d']['n'])
    receive_obj.msg += json_val['m'] + "|"
    x1 = lambda x: x['tn']+'|'+x['si']+'|'+x['pr']
    if 't' in json_val['d']:
        receive_obj.ticket = json_val['d']['t'] + '|'
    if 'p' in json_val['d']:
        receive_obj.ticket = ','.join([x1(y) for y in json_val['d']['p']])
    receive_obj.ticket.replace('\n','')
    # print(receive_obj.ticket)


def get_receive_json(json_str,send_obj):
    pass
    # -----------------------------------------------
# -----------------------------------------------


def analyse_file(analyse_dir,dir_str,analyse_func):
    """分析日志"""
    file_list = os.listdir(analyse_dir)
    for dir_name in file_list:
        if dir_name.find(dir_str) >= 0:
            full_path = analyse_dir + '\\' + dir_name
            # print(full_path)
            files = os.listdir(full_path)
            for file in files:
                file_path = full_path + '\\' + file
                analyse_func(file_path)
# ----------------------------------------------


def analyse_send_log(analyse_dir):
    analyse_file(analyse_dir,'火车票交互服务',analyse_send_file)


def analyse_receive_log(analyse_dir):
    analyse_file(analyse_dir,'火车票业务收通知网站基线',analyse_receive_file)


def show_send_list(source_list):
    """显示分析的发送日志"""
    global book_list
    book_list = sorted(source_list,key=lambda x: x.log_time)
    i = 0
    p_template = '{0})=={1}=={2}=={3}=={4}=={5};'
    for item in book_list:
        i += 1
        p_str = p_template.format(str(i),item.log_time,item.log_name,item.flow_id,item.order_id,item.msg)
        print(p_str)


def show_receive_list(source_list):
    """显示分析的接收日志"""
    global receive_list
    receive_list = sorted(source_list,key=lambda x: x.log_time)
    i = 0
    p_template = '{0})=={1}=={2}=={3}=={4}=={5};'
    for item in receive_list:
        i += 1
        m = item.msg
        if item.msg == '|':
            m += item.ticket
        p_str = p_template.format(str(i),m)
        print(p_str)
# ----------------------------------------------


def analyse_diff(send_list,result_list):
    """对比发送数据和接收数据,组合对照数据,并计算运行时差"""
    for b_item in send_list:
        for r_item in result_list:
            if b_item.flow_id == r_item.flow_id and b_item.flow_id != '': # 不能为空字符串
                has_it = [x for x in ticket_list if x.ticket_flow_id == b_item.flow_id]
                if len(has_it) <= 0:
                    t = TicketInfo()
                    t.ticket_send_time = b_item.log_time
                    t.ticket_receive_time = r_item.log_time
                    t.ticket_flow_id = b_item.flow_id
                    t.ticket_order_id = b_item.order_id
                    t.ticket_out_order_id = r_item.out_order_id
                    t.msg = b_item.msg + '|' + r_item.msg
                    t.receive_times = 1
                    t.time_diff = 0
                    t.info_from_file = '{f1}|{f2}'.format(f1=b_item.log_file,f2=r_item.log_file)
                    time_a1 = time.strptime(t.ticket_receive_time,"%Y-%m-%d %H:%M:%S.%f")
                    time_stamp1 = datetime.datetime(*time_a1[:6])
                    time_a2 = time.strptime(t.ticket_send_time,"%Y-%m-%d %H:%M:%S.%f")
                    time_stamp2 = datetime.datetime(*time_a2[:6])
                    t.time_diff = (time_stamp1 - time_stamp2).total_seconds()
                    # print(t.time_diff)
                    ticket_list.append(t)
                else:
                    has_it[0].receive_times += 1
# -------------------------------------------------


def show_result_list(source_list):
    """显示分析组合结果"""
    global ticket_list
    ticket_list = sorted(source_list,key=lambda x: x.ticket_send_time)
    i = 0
    p_template = '{0})={1}=={2}=={3}-{4}[{5}秒]{6}次'
    for item in ticket_list:
        i += 1
        p_str = p_template.format(str(i),item.ticket_flow_id,item.ticket_order_id,item.ticket_send_time,item.ticket_receive_time,str(item.time_diff),str(item.receive_times),item.info_from_file)
        print(p_str)
# ---------------------------------------------------


print('/***********火车票发送交互服务日志[{0}]***********/'.format(today_dir))
analyse_send_log(send_log_path)
show_send_list(book_list)
print('/***********火车票接收通知日志[{0}]***********/'.format(today_dir))
analyse_receive_log(receive_log_path)
show_receive_list(receive_list)
print('/***********火车票占座时差计算[{0}]***********/'.format(today_dir))
analyse_diff(book_list,receive_list)
show_result_list(ticket_list)

# 样品
xx = """[BEGIN]
[L_B]日志ID:bb1f4d67-162f-47d0-90b0-c27bfbe218cb[L_E]
[L_B]时间点:636179106143493242[L_E]
[L_B]跟踪ID:00517WCF16122109492672990[L_E]
[L_B]时间戳:2016-12-21 09:50:14.349[L_E]
[L_B]时间间隔:0047620.052[L_E]
[L_B]应用程序名:火车票交互服务v0029[L_E]
[L_B]模块名:火车票交互[L_E]
[L_B]方法名:[L_E]
[L_B]方法说明:[L_E]
[L_B]进程ID:25968[L_E]
[L_B]线程ID:27[L_E]
[L_B]发送地址:http://121.41.85.136/a.ashx[L_E]
[L_B]异常ID:[L_E]
[L_B]异常级别:[L_E]
[L_B]自定义消息:book[L_E]
[L_B]交互类型:[L_E]
[L_B]关键信息1:1612210950140910f0500236688[L_E]
[L_B]关键信息2:[L_E]
[L_B]机器名:WIN-VDHFP5GHJ89[L_E]
[L_B]本地IP:172.17.1.242[L_E]
[L_B]发送内容:j={"a": true,"b":[{"PassagerID":1,"TicketKeyID":"1688"}],"c": "s55666","d": {  "c":"D2244","d":"2016-12-31","n":"161221095016770236688","w":false,"p":[{"pi":"555555555555555554","pn":"某","po":1,"ic":"1","xc":"O","pc":"1"}],"fc":"ICW","fn":"成都东","tc":"NIW","tn":"遂宁"    },"f": "book","r":"http://9999.com:1401/Notify","s": "c722f24f1ec88271a3c0aed7e70f6835","t": "20161221095014","w": 30000 }[L_E]
[L_B]接收内容:{
  "c": 0,"d": null,"m": "请等待异步","s": true,"w": true,"i": 2016122100000000298
}[L_E]
[L_B]路由:r+zYFqAUxzhIEINyz9RyMppo9SMJ1oVwbqlQwyzn0Wol+lxGUB1MwjcWz88Pa5ah7StDIUP/0XBirsjsZMiW0BfC8lHJzAILcpwLva4Q8/zrzra0mjlKFg@@[L_E]
[END]"""
y = re.findall(r'\[BEGIN\]([\s\S.]*?)\[END\]',xx,re.M)
print(y)

猜你在找的正则表达式相关文章