简单学习了状态机的知识
先记录下来
global stack class State: #counter = 0 def __init__(self,c,out = None,out1 = None): self.c = c self.out = out self.out1 = out1 self.lastlist = 0 #State.counter += 1 class Fragment: def __init__(self,begin,end = None): self.begin = begin self.end = end _BEGIN = '^' _NULL = ' ' def make_begin(): return Fragment(State(_BEGIN)) def make_null(): return Fragment(State(_NULL)) _END = '$' def make_end(): return Fragment(State(_END)) def make_atom(char): a = State(char) print "make_atom",a.__class__ return Fragment(a,a) def make_seq(frag1,frag2): if frag2 is None: return frag1 else: #print frag1.end.__class__ frag1.end.out = frag2.begin #frag1.end = frag2.begin return Fragment(frag1.begin,frag2.end) _Split = '`' _MERGE = '!' def SplitState(state1,state2): return State(_Split,out = state1,out1 = state2) def MergeState(state1,state2): a = State(_MERGE) print state1.__class__ state1.out = a state2.out = a return a def make_alt(frag1,frag2): split = SplitState(frag1.begin,frag2.begin) merge = MergeState(frag1.end,frag2.end) return Fragment(split,merge) def build(string): global stack,k alpha = "abcdefghijklmnopqrstuvwxyz" c = string if c == '$': e = make_end() k = k - 1 stack[k] = make_seq(stack[k],e) print k elif c in alpha : stack[k] = make_atom(c) k = k + 1 elif c == '|': k = k - 1 e1 = stack[k] k = k - 1 e2 = stack[k] stack[k] = make_alt(e1,e2) k = k + 1 elif c == '.': k = k - 1 e1 = stack[k] k = k - 1 e2 = stack[k] e1 = make_seq(e2,e1) stack[k] = e1 k = k + 1 elif c == '?': k = k - 1 e1 = stack[k] split_state = State(_Split,out = e1.begin) merge_state = State(_MERGE) split_state.out1 = merge_state e1.end.out = merge_state stack[k] = Fragment(split_state,merge_state) k = k + 1 elif c == '*': k = k - 1 e1 = stack[k] split_state = State(_Split,out = e1.begin) merge_state = State(_MERGE) split_state.out1 = merge_state e1.end.out = split_state stack[k] = Fragment(split_state,merge_state) k = k + 1 def post2nfa(string): global stack,k stack = [] k = 0 for l in range(1000): stack.append(make_null()) for i in string: build(i) def match(string): global stack nfa = stack[0] curr_set = { nfa.begin } match_pos = [] i = 0 while i < len(string): match_set = set() while curr_set: s = curr_set.pop() if s.c == _BEGIN or s.c == _MERGE: curr_set.add(s.out) #print "match ",str(s.c),"out:",str(s.out.c) elif s.c == _Split: curr_set.add(s.out) curr_set.add(s.out1) #print "add",s.out1.c,s.out.c elif s.c == string[i]: match_set.add(s.out) print "match char","next",str(s.out.c) if s.out.c == _END: match_pos.append(i) print "matched",i #print "curr_set:",#for s in curr_set: # print s.c,else: #print "pass " continue if match_set: curr_set = { x for x in match_set} print "match set ",for z in match_set: print z.c,i = i + 1 else: return match_pos return match_pos
实例:
>>> post2nfa("ka|c.a*.d.$") make_atom __main__.State make_atom __main__.State __main__.State make_atom __main__.State make_atom __main__.State make_atom __main__.State 0 >>> a = match("kcaaad") match char k next ! match set ! match char c next ` match set ` match char a next ` match set ` match char a next ` match set ` match char a next ` match set ` match char d next $ matched 5 match set $ >>> print a [5] >>> a = match("kcaaadasdas") match char k next ! match set ! match char c next ` match set ` match char a next ` match set ` match char a next ` match set ` match char a next ` match set ` match char d next $ matched 5 match set $ >>> print a [5] >>>在第五位匹配成功 原文链接:https://www.f2er.com/regex/359361.html