前端之家收集整理的这篇文章主要介绍了
ICU正则表达式初试,
前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。
学习了一下ICU下的正则表达式用法,随即编码一试,代码如下:
#include <iostream>
#include "unicode/utypes.h"
#include "unicode/ucnv.h"
#include "unicode/regex.h"
using namespace std;
int reg_exp_match(const char*pat,const char* sour);
#pragma comment(lib,"icuuc.lib")
#pragma comment(lib,"icuin.lib")
int main()
{
/*int num = reg_exp_match("a[abc]c+[0-9]","abc6dfdacc6fdhaac8kjhdacc4s");*/
const char* sour = "ab2013-04-26dfd6f2012-12-09dhaac8kj2016-05-12hdacc4s";
/*int num = reg_exp_match("\^(\\d{4})-(\\d{2}-(\\d\\d))\$",sour);*/
int num = reg_exp_match( "[0-9]{2,4}-[0-9]{1,2}-[0-9]{1,2}",sour);
cout<<num<<endl;
system("pause");
return 0;
}
int reg_exp_match(const char*pat,const char* sour)
{
int num = 0;
int LEN = strlen(sour);
UConverter *cv = NULL;
RegexPattern *REPattern = NULL;///正则表达式
RegexMatcher *REMatcher = NULL;//匹配器
UErrorCode status = U_ZERO_ERROR;
UnicodeString patString(pat);
//Unicode正则表达式组装,这些函数经常代替构造函数来创建RegexPattern对象
REPattern = RegexPattern::compile(patString,status);
if (U_FAILURE(status))
{
return 0;
}
//把母串转换为Unicode
UnicodeString inputString(sour);
//剔除字符串中的一些序列
UnicodeString unEscapedInput = inputString.unescape();
//创建一个正则表达式匹配器
REMatcher = REPattern->matcher(unEscapedInput,status);
if (U_FAILURE(status))
{
return 0;
}
//UnicodeString ss = REMatcher->input();//返回母串
//UnicodeString ss = REMatcher->refreshInputText("123",status);
//UnicodeString another = (UnicodeString)"abc5fdhaac2kjhdacc1h";
//REMatcher->reset(another);
////用replaceStr替换母串中第一个的正则表达式字符串
/*UnicodeString substring = REMatcher->replaceFirst(replaceStr,status);*/
int32_t leng = inputString.length();
char *result = new char[4*leng];
memset(result,4*leng);
cv = ucnv_open("GB18030",&status);
ucnv_fromUChars(cv,result,4*leng,inputString.getBuffer(),leng,&status);
ucnv_close(cv);
int64_t pos = 0;
LEN = strlen(result);
printf("%s\n",result);
UnicodeString dest[10];
//该函数以模式串为分隔符将待处理的字符串分为几组存放到数组dest中
int32_t gp = REMatcher->split(inputString,dest,10,status);
//int64_t pos = 0;
//int32_t count = 0;
//if(REMatcher->find())
//{
// //必须在find函数之后str才有值
// UnicodeString str = REMatcher->group(status);
//}
//groupCount函数根据正则表达式中的括号数目来计数的
//count = REMatcher->groupCount();
while(pos < LEN)
{
if(REMatcher->find(pos,status))
{
/*if(REMatcher->find())
{*/
//必须在find函数之后str才有值
UnicodeString str = REMatcher->group(status);
/*}*/
num++;
pos = REMatcher->end64(status);
}
else
{
break;
}
}
//count = REMatcher->groupCount();
return num;
}
原文链接:https://www.f2er.com/regex/362453.html