简单后缀表达式转nfa 正则匹配 Python

前端之家收集整理的这篇文章主要介绍了简单后缀表达式转nfa 正则匹配 Python前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。

简单学习了状态机的知识

先记录下来

global stack

class State:
    #counter = 0
    def __init__(self,c,out = None,out1 = None):
        self.c = c
        self.out = out
        self.out1 = out1
        self.lastlist = 0
        #State.counter += 1  
        
class Fragment:
    def __init__(self,begin,end = None):
        self.begin = begin
        self.end = end

_BEGIN = '^'  
_NULL = ' '  
def make_begin():  
    return Fragment(State(_BEGIN))
def make_null():
    return Fragment(State(_NULL))

_END = '$'
def make_end():
    return Fragment(State(_END)) 

def make_atom(char):  
    a = State(char)
    print "make_atom",a.__class__
    return Fragment(a,a)  

def make_seq(frag1,frag2):  
    if frag2 is None: return frag1  
    else:  
        #print frag1.end.__class__
        frag1.end.out = frag2.begin  
        #frag1.end = frag2.begin  
        return Fragment(frag1.begin,frag2.end)

_Split = '`'   
_MERGE = '!'     
def SplitState(state1,state2):
    return State(_Split,out = state1,out1 = state2)
def MergeState(state1,state2):
    a = State(_MERGE)
    print state1.__class__
    state1.out = a
    state2.out = a
    return a

def make_alt(frag1,frag2):  
    split = SplitState(frag1.begin,frag2.begin)  
    merge = MergeState(frag1.end,frag2.end)  
    return Fragment(split,merge)   
 
    
def build(string):
    global stack,k
    alpha = "abcdefghijklmnopqrstuvwxyz"
    c = string 
    if c == '$':
        e = make_end()
        k = k - 1
        stack[k] = make_seq(stack[k],e)
        print k
    elif c in alpha :
        stack[k] = make_atom(c)
        k = k + 1
    elif c == '|':
        k = k - 1
        e1 = stack[k]
        k = k - 1
        e2 = stack[k]
        stack[k] = make_alt(e1,e2)
        k = k + 1
    elif c == '.':
        k = k - 1
        e1 = stack[k]
        k = k - 1 
        e2 = stack[k]
        e1 = make_seq(e2,e1)
        stack[k] = e1
        k = k + 1
    elif c == '?':
        k = k - 1
        e1 = stack[k]
        split_state = State(_Split,out = e1.begin)
        merge_state = State(_MERGE)
        split_state.out1 = merge_state
        e1.end.out = merge_state
        stack[k] = Fragment(split_state,merge_state)
        k = k + 1
    elif c == '*':
        k = k - 1
        e1 = stack[k]
        split_state = State(_Split,out = e1.begin)
        merge_state = State(_MERGE)
        split_state.out1 = merge_state
        e1.end.out = split_state
        stack[k] = Fragment(split_state,merge_state)
        k = k + 1
        
        
def post2nfa(string):
    global stack,k
    stack = []
    k = 0
    for l in range(1000):
        stack.append(make_null())
        
    for i in string:
        build(i)
    
def match(string):
    global stack
    nfa = stack[0]
    curr_set = { nfa.begin }
    match_pos = []
    i = 0
    while i < len(string):
        match_set = set()
        while curr_set:
            s = curr_set.pop()
            if s.c == _BEGIN or s.c == _MERGE:
                curr_set.add(s.out)
                #print "match ",str(s.c),"out:",str(s.out.c)
            elif s.c == _Split:
                curr_set.add(s.out)
                curr_set.add(s.out1)
                #print "add",s.out1.c,s.out.c
            elif s.c == string[i]:
                match_set.add(s.out)
                print "match char","next",str(s.out.c)
                if s.out.c == _END:
                    match_pos.append(i)
                    print "matched",i
            #print "curr_set:",#for s in curr_set:
            #    print s.c,else:
                #print "pass "
                continue
            
        if match_set:
            curr_set = { x for x in match_set}
            print "match set ",for z in match_set:
                print z.c,i = i + 1
        else:
            return match_pos
    return match_pos
            
                
    

实例:
>>> post2nfa("ka|c.a*.d.$")
make_atom __main__.State
make_atom __main__.State
__main__.State
make_atom __main__.State
make_atom __main__.State
make_atom __main__.State
0
>>> a = match("kcaaad")
match char k next !
match set  ! match char c next `
match set  ` match char a next `
match set  ` match char a next `
match set  ` match char a next `
match set  ` match char d next $
matched 5
match set  $
>>> print a
[5]
>>> a = match("kcaaadasdas")
match char k next !
match set  ! match char c next `
match set  ` match char a next `
match set  ` match char a next `
match set  ` match char a next `
match set  ` match char d next $
matched 5
match set  $
>>> print a
[5]
>>> 
在第五位匹配成功

猜你在找的正则表达式相关文章