这是较为简单的正则表达式引擎
目前只支持. + | 三种特殊字符
发现bug 请联系
已有封装好的函数,请直接使用
未来将逐渐更新
例子如图
// regulatr.cpp : 定义控制台应用程序的入口点。
//
/*______________________________________________________
regular expression engine
written by 唐hz
2015.8.10
_______________________________________________________*/
#include "stdafx.h"
#include<iostream>
#include<vector>
using namespace std;
struct State;
#define error -1//此处不确定
#define normal 1
#define finish 16
#define mul_way 2
#define start 8
#define any 32
#define error_back 64
#define self_loop1 4
struct Linker{
State *from;
State *to;
char request;
};
struct LinkerList{
Linker l;
struct LinkerList *next;
};
struct State{
int status;
int position;
LinkerList * in;
LinkerList * out;
};
struct Position{
State* p;
int i;
};
State g_State[100];
int g_next[100];
int g_start,g_finish,g_maxi=-1;
int g_can_not_any=-1;
void next(char *p){
int n=strlen(p);
int i=1,j=0;
g_next[j]=-1;
while(i<n){
if(p[i]==p[j])
g_next[i++]=j++;
else {
if(j!=0)
j=g_next[j-1]+1;
else{
j=0;
g_next[i++]=-1;
}
}
}
}
void addlinker(LinkerList **a,char b,State *c,State *d){
if(*a==NULL){
(*a)=new LinkerList;
(*a)->l.request=b;
(*a)->l.from=c;
(*a)->l.to=d;
(*a)->next=NULL;
return ;
}
LinkerList * temp=*a;
while(temp->next!=NULL){
temp=temp->next;
}
temp->next=new LinkerList;
temp=temp->next;
temp->l.request=b;
temp->l.from=c;
temp->l.to=d;
temp->next=NULL;
return ;
}
void addlinker2(LinkerList **a,State *d){
if(*a==NULL){
(*a)=new LinkerList;
(*a)->l.request=b;
(*a)->l.from=c;
(*a)->l.to=d;
(*a)->next=NULL;
return ;
}
LinkerList * temp=*a,*temp1;
while(temp->next&&temp->next->next!=NULL){
temp=temp->next;
}
temp1=temp->next;
temp->next=new LinkerList;
temp=temp->next;
temp->l.request=b;
temp->l.from=c;
temp->l.to=d;
temp->next=temp1;
return ;
}
void addlinker1(LinkerList **a,State *d){
if(*a==NULL){
(*a)=new LinkerList;
(*a)->l.request=b;
(*a)->l.from=c;
(*a)->l.to=d;
(*a)->next=NULL;
return ;
}
LinkerList * temp;
temp=new LinkerList;
temp->l.request=b;
temp->l.from=c;
temp->l.to=d;
temp->next=*a;
*a=temp;
return ;
}
void connect(State &a,State &b,char r,int x=0){
if(x==0){
addlinker(&(a.out),r,&a,&b);
addlinker(&(b.in),&b);}
else if(x==1){
addlinker1(&(a.out),&b);
addlinker1(&(b.in),&b);
}
else if(x==2){
addlinker2(&(a.out),&b);
addlinker2(&(b.in),&b);
}
}
void compile(char *p,char*q){
int i=0,j=0;
while(i<(int)strlen(q)){
if(q[i]=='|'){
i+=2;
}
else if(q[i]=='+'){
i++;
}
else p[j++]=q[i++];
}
p[j]='\0';
}
void construction(char *p){
char q[100];
compile(q,p);
next(q);
int i=0;
for(int j=0;j<100;j++){
g_State[j].in=g_State[j].out=NULL;
g_State[j].status=0;//
g_State[j].position=j;
}
g_State[0].status|=start;
connect(g_State[0],g_State[0],error,0);
while(*p!='\n')
{
switch(*p){
case '|':
if(i>0) i--;
p++;
g_State[i].status|=mul_way;
break;
case '+':
i--;
p--;
connect(g_State[i],g_State[i],*p,2);
g_State[i].status|=self_loop1;
i++;
p+=2;
break;
default:
if(!(g_State[i].status&start)&&!(g_State[i].status&error_back)){
g_State[i].status|=error_back;
connect(g_State[i],g_State[g_next[i-1]+1],0);}
if(*p=='.'){
g_State[i].status|=any;
}
connect(g_State[i],g_State[i+1],1);
g_State[i].status|=normal;
i++;
p++;}
}
g_State[i].status|=finish;
}//查int 字节
void realease(){
LinkerList *temp,*temp1;
for(int i=0;i<100;i++)
{
temp=g_State[i].in;
while(temp){
temp1=temp;
temp=temp->next;
delete temp1;
}
temp=g_State[i].out;
while(temp){
temp1=temp;
temp=temp->next;
delete temp1;
}
}
}
bool NextState(Position a,char *p,vector<Position> & find,int i){
auto temp=a.p->out;
Position rec;
bool j=false;
while(temp){
if(temp->l.request!=error&&temp->l.request==*(p+i)||(temp->l.request=='.'&&a.p->status&any&&a.p->position!=g_can_not_any)){
j=true;
if(temp->l.to->position==a.p->position&&temp->l.request!='.'&&a.p->status&any){
g_can_not_any=a.p->position;
}
rec.p=((temp->l).to);
rec.i=i+1;
if(rec.i<(int)strlen(p))
find.push_back(rec);
}
else
{
if((temp->l.request==error)&&(!(a.p->status&finish))&&!j){
if(((temp->l).to)->position<g_can_not_any){
cout<<"使g_can_not_any回复初值\n";
g_can_not_any=-1;}
rec.p=((temp->l).to);
rec.i=i;
if(!(rec.p->status&start))
find.push_back(rec);
}
}
temp=temp->next;
}
if(j) return true;
return false;
}
bool isexist( char *p){
vector<Position> find;
Position temp;
temp.p=g_State;
temp.i=0;
find.push_back(temp);
g_start=1;
g_finish=0;
g_maxi=-1;
bool canfinish=false;
while(!find.empty()){
if(temp.p->status&finish) {g_finish=temp.i;
break;
}
if(temp.i>g_maxi)
g_maxi=temp.i;
find.pop_back();
NextState(temp,p,find,temp.i);
if(!find.empty())
temp=find.back();
else if(g_maxi<(int)strlen(p)){
temp.i=g_maxi+1;
temp.p=g_State;
g_start=temp.i+1;//人是从1数而机器从0数,故加1
find.push_back(temp);
}
}
if(g_finish>=g_start) return true;
return false;
}
void output(char *p,int a,int b){
cout<<"\n位于"<<a<<"和"<<b<<"之间\n";
for(int i=a-1;i<b;i++)
cout<<p[i];
cout<<"\n";
}
void findWholeMatch(char *p){
char *str=p;
int rec=0;
while(isexist(p)&&rec<(int)strlen(str)){
output(str,g_start+rec,g_finish+rec);
rec+=g_finish;
p+=g_finish;
}
}
int main(){
char expression[100];
char content[1000];
fgets(expression,100,stdin);
fgets(content,1000,stdin);
construction(expression);
cout<<"构建完成 \n";
/*
if(isexist(content)){
cout<<"存在\n";
cout<<"开始在"<<g_start<<"结束在"<<g_finish<<"\n";
output(content,g_start,g_finish);
}
else cout<<"不存在\n";
*/
findWholeMatch(content);
system("pause");
realease();
}// regulatr.cpp : 定义控制台应用程序的入口点。
//
/*______________________________________________________
regular expression engine
written by 唐hz
2015.8.10
_______________________________________________________*/
#include "stdafx.h"
#include<iostream>
#include<vector>
using namespace std;
struct State;
#define error -1//此处不确定
#define normal 1
#define finish 16
#define mul_way 2
#define start 8
#define any 32
#define error_back 64
#define self_loop1 4
struct Linker{
State *from;
State *to;
char request;
};
struct LinkerList{
Linker l;
struct LinkerList *next;
};
struct State{
int status;
int position;
LinkerList * in;
LinkerList * out;
};
struct Position{
State* p;
int i;
};
State g_State[100];
int g_next[100];
int g_start,g_maxi=-1;
int g_can_not_any=-1;
void next(char *p){
int n=strlen(p);
int i=1,j=0;
g_next[j]=-1;
while(i<n){
if(p[i]==p[j])
g_next[i++]=j++;
else {
if(j!=0)
j=g_next[j-1]+1;
else{
j=0;
g_next[i++]=-1;
}
}
}
}
void addlinker(LinkerList **a,State *d){
if(*a==NULL){
(*a)=new LinkerList;
(*a)->l.request=b;
(*a)->l.from=c;
(*a)->l.to=d;
(*a)->next=NULL;
return ;
}
LinkerList * temp;
temp=new LinkerList;
temp->l.request=b;
temp->l.from=c;
temp->l.to=d;
temp->next=*a;
*a=temp;
return ;
}
void connect(State &a,&b);
}
}
void compile(char *p,j=0;
while(i<(int)strlen(q)){
if(q[i]=='|'){
i+=2;
}
else if(q[i]=='+'){
i++;
}
else p[j++]=q[i++];
}
p[j]='\0';
}
void construction(char *p){
char q[100];
compile(q,0);
while(*p!='\n')
{
switch(*p){
case '|':
if(i>0) i--;
p++;
g_State[i].status|=mul_way;
break;
case '+':
i--;
p--;
connect(g_State[i],1);
g_State[i].status|=normal;
i++;
p++;}
}
g_State[i].status|=finish;
}//查int 字节
void realease(){
LinkerList *temp,*temp1;
for(int i=0;i<100;i++)
{
temp=g_State[i].in;
while(temp){
temp1=temp;
temp=temp->next;
delete temp1;
}
temp=g_State[i].out;
while(temp){
temp1=temp;
temp=temp->next;
delete temp1;
}
}
}
bool NextState(Position a,int i){
auto temp=a.p->out;
Position rec;
bool j=false;
while(temp){
if(temp->l.request!=error&&temp->l.request==*(p+i)||(temp->l.request=='.'&&a.p->status&any&&a.p->position!=g_can_not_any)){
j=true;
if(temp->l.to->position==a.p->position&&temp->l.request!='.'&&a.p->status&any){
g_can_not_any=a.p->position;
}
rec.p=((temp->l).to);
rec.i=i+1;
if(rec.i<(int)strlen(p))
find.push_back(rec);
}
else
{
if((temp->l.request==error)&&(!(a.p->status&finish))&&!j){
if(((temp->l).to)->position<g_can_not_any){
cout<<"使g_can_not_any回复初值\n";
g_can_not_any=-1;}
rec.p=((temp->l).to);
rec.i=i;
if(!(rec.p->status&start))
find.push_back(rec);
}
}
temp=temp->next;
}
if(j) return true;
return false;
}
bool isexist( char *p){
vector<Position> find;
Position temp;
temp.p=g_State;
temp.i=0;
find.push_back(temp);
g_start=1;
g_finish=0;
g_maxi=-1;
bool canfinish=false;
while(!find.empty()){
if(temp.p->status&finish) {g_finish=temp.i;
break;
}
if(temp.i>g_maxi)
g_maxi=temp.i;
find.pop_back();
NextState(temp,g_finish+rec);
rec+=g_finish;
p+=g_finish;
}
}
int main(){
char expression[100];
char content[1000];
fgets(expression,g_finish);
}
else cout<<"不存在\n";
*/
findWholeMatch(content);
system("pause");
realease();
}
采用了NFA,未来将逐步更新。