这里是因为今天看到一个XML解析挑战赛,抽出了一个XML解析库,有需要的朋友可以拿去用用,超级强大
关于这个挑战赛想说一下,XML应该每个标签都有tag 的,可是出的xml字符串却不是这样的,估计出题的人对XML理解不透彻.
这里贴出地址:点击打开链接
#include <string> #include <vector> #include <map> #include <sstream> #include <algorithm> using namespace std; typedef unsigned char byte; int wmemicmp(wchar_t * src,wchar_t * dst,size_t count) { if (src == NULL || dst == NULL || count == 0) { return 0; } else { int f = 0,l = 0; while (count--) { f = toupper(*src++); l = toupper(*dst++); if (f != l) { break; } } return (f - l); } } int memrcmp(char * src,char * dst,size_t rcount) { if (src == NULL || dst == NULL || rcount == 0) { return 0; } else { int f = 0,l = 0; while (rcount--) { f = *src--; l = *dst--; if (f != l) { break; } } return (f - l); } } int wmemrcmp(wchar_t * src,l = 0; while (rcount--) { f = *src--; l = *dst--; if (f != l) { break; } } return (f - l); } } int memricmp(char * src,l = 0; while (rcount--) { f = toupper(*src--); l = toupper(*dst--); if (f != l) { break; } } return (f - l); } } int wmemricmp(wchar_t * src,l = 0; while (rcount--) { f = toupper(*src--); l = toupper(*dst--); if (f != l) { break; } } return (f - l); } } char* memmem(char * src,size_t srclen,size_t dstlen) { if (src == NULL || dst == NULL || srclen < dstlen) { return (0); } else if (srclen == dstlen) { return (memcmp(src,dst,dstlen) == 0 ? (char*)src : 0); } else { for (int i = 0,j = srclen - dstlen; i <= j; i++) { if (memcmp(src + i,dstlen) == 0) { return (src + i); } } return (0); } return 0; } wchar_t* wmemmem(wchar_t * src,size_t dstlen) { if (src == NULL || dst == NULL || srclen < dstlen) { return (0); } else if (srclen == dstlen) { return (wmemcmp(src,dstlen) == 0 ? (wchar_t*)src : 0); } else { for (int i = 0,j = srclen - dstlen; i <= j; i++) { if (wmemcmp(src + i,dstlen) == 0) { return (src + i); } } return (0); } } char* memimem(char * src,size_t dstlen) { if (src == NULL || dst == NULL || srclen < dstlen) { return (0); } else if (srclen == dstlen) { return (memicmp(src,j = srclen - dstlen; i <= j; i++) { if (memicmp(src + i,dstlen) == 0) { return (src + i); } } return (0); } } wchar_t* wmemimem(wchar_t * src,size_t dstlen) { if (src == NULL || dst == NULL || srclen < dstlen) { return (0); } else if (srclen == dstlen) { return (wmemicmp(src,j = srclen - dstlen; i <= j; i++) { if (wmemicmp(src + i,dstlen) == 0) { return (src + i); } } return (0); } } char* memrmem(char * src,size_t srcrlen,size_t dstrlen) { if (src == NULL || dst == NULL || srcrlen < dstrlen) { return (0); } else if (srcrlen == dstrlen) { return (memrcmp(src,dstrlen) == 0 ? (char*)src : 0); } else { for (int i = 0,j = srcrlen - dstrlen; i <= j; i++) { if (memrcmp(src - i,dstrlen) == 0) { return (src - i); } } return (0); } return 0; } wchar_t* wmemrmem(wchar_t * src,size_t dstrlen) { if (src == NULL || dst == NULL || srcrlen < dstrlen) { return (0); } else if (srcrlen == dstrlen) { return (wmemrcmp(src,dstrlen) == 0 ? (wchar_t*)src : 0); } else { for (int i = 0,j = srcrlen - dstrlen; i <= j; i++) { if (wmemrcmp(src - i,dstrlen) == 0) { return (src - i); } } return (0); } } char* memrimem(char * src,size_t dstrlen) { if (src == NULL || dst == NULL || srcrlen < dstrlen) { return (0); } else if (srcrlen == dstrlen) { return (memricmp(src,j = srcrlen - dstrlen; i <= j; i++) { if (memricmp(src - i,dstrlen) == 0) { return (src - i); } } return (0); } } wchar_t* wmemrimem(wchar_t * src,size_t dstrlen) { if (src == NULL || dst == NULL || srcrlen < dstrlen) { return (0); } else if (srcrlen == dstrlen) { return (wmemricmp(src,j = srcrlen - dstrlen; i <= j; i++) { if (wmemricmp(src - i,dstrlen) == 0) { return (src - i); } } return (0); } } char* memrep(char* src,int srclen,char* dst,int dstlen,char* r,int rlen,char* t,int tlen) { int brk = 0; int src_cnt = 0; int dst_cnt = 0; while(src_cnt < srclen && dst_cnt < dstlen) { brk = 1; char* temp = memmem(src + src_cnt,srclen - src_cnt,r,rlen); if (temp) { int move_cnt = temp - (src + src_cnt); if(move_cnt <= (dstlen - dst_cnt)) { memmove(dst + dst_cnt,src + src_cnt,move_cnt); src_cnt += move_cnt; dst_cnt += move_cnt; if(tlen <= dstlen - dst_cnt) { memmove(dst + dst_cnt,t,tlen); src_cnt += rlen; dst_cnt += tlen; brk = 0; } } } else { int move_cnt = srclen - src_cnt; if(move_cnt <= (dstlen - dst_cnt)) { memmove(dst + dst_cnt,move_cnt); src_cnt += move_cnt; dst_cnt += move_cnt; } } if (brk) { break; } } return (brk ? 0 : dst); } wchar_t* wmemrep(wchar_t* src,wchar_t* dst,wchar_t* r,wchar_t* t,int tlen) { int brk = 0; int src_cnt = 0; int dst_cnt = 0; while(src_cnt < srclen && dst_cnt < dstlen) { brk = 1; wchar_t* temp = wmemmem(src + src_cnt,rlen); if (temp) { int move_cnt = temp - (src + src_cnt); if(move_cnt <= (dstlen - dst_cnt)) { wmemmove(dst + dst_cnt,move_cnt); src_cnt += move_cnt; dst_cnt += move_cnt; if(tlen <= dstlen - dst_cnt) { wmemmove(dst + dst_cnt,tlen); src_cnt += rlen; dst_cnt += tlen; brk = 0; } } } else { int move_cnt = srclen - src_cnt; if(move_cnt <= (dstlen - dst_cnt)) { wmemmove(dst + dst_cnt,move_cnt); src_cnt += move_cnt; dst_cnt += move_cnt; } } if (brk) { break; } } return (brk ? 0 : dst); } char* memirep(char* src,int tlen) { int brk = 0; int src_cnt = 0; int dst_cnt = 0; while(src_cnt < srclen && dst_cnt < dstlen) { brk = 1; char* temp = memimem(src + src_cnt,move_cnt); src_cnt += move_cnt; dst_cnt += move_cnt; } } if (brk) { break; } } return (brk ? 0 : dst); } wchar_t* wmemirep(wchar_t* src,int tlen) { int brk = 0; int src_cnt = 0; int dst_cnt = 0; while(src_cnt < srclen && dst_cnt < dstlen) { brk = 1; wchar_t* temp = wmemimem(src + src_cnt,move_cnt); src_cnt += move_cnt; dst_cnt += move_cnt; } } if (brk) { break; } } return (brk ? 0 : dst); } char* memtrimleft(char* src,char* spec,int speclen) { if (src == NULL || spec == NULL) { return (0); } else { for (int i = 0,j = srclen; i < j; i++) { if (!memmem(spec,speclen,src + i,1)) { if (i != 0) { memmove(src,srclen - i); } return src; } } return (0); } } wchar_t* wmemtrimleft(wchar_t* src,wchar_t* spec,j = srclen; i < j; i++) { if (!wmemmem(spec,1)) { if (i != 0) { wmemmove(src,srclen - i); } return src; } } return (0); } } char* memtrimright(char* src,int speclen) { if (src == NULL || spec == NULL) { return (0); } else { for (int i = srclen - 1,j = 0; i >= j; i--) { if (!memmem(spec,1)) { break; } src[i] = 0; } return src; } } wchar_t* wmemtrimright(wchar_t* src,j = 0; i >= j; i--) { if (!wmemmem(spec,1)) { break; } src[i] = 0; } return src; } } char* memtrim(char* src,int speclen) { src = memtrimleft(src,srclen,spec,speclen); src = memtrimright(src,speclen); return src; } wchar_t* wmemtrim(wchar_t* src,int speclen) { src = wmemtrimleft(src,speclen); src = wmemtrimright(src,speclen); return src; } char* memskip(char* src,1)) { return (src + i); } } return (0); } } wchar_t* wmemskip(wchar_t* src,1)) { return (src + i); } } return (0); } } char* memrskip(char* src,int srcrlen,j = srcrlen; i < j; i++) { if (!memmem(spec,src - i,1)) { return (src - i); } } return (0); } } wchar_t* wmemrskip(wchar_t* src,j = srcrlen; i < j; i++) { if (!wmemmem(spec,1)) { return (src - i); } } return (0); } } char* membrk(char* src,j = srclen; i < j; i++) { if (memmem(spec,1)) { return (src + i); } } return (0); } } wchar_t* wmembrk(wchar_t* src,j = srclen; i < j; i++) { if (wmemmem(spec,1)) { return (src + i); } } return (0); } } char* memrbrk(char* src,j = srcrlen; i < j; i++) { if (memmem(spec,1)) { return (src - i); } } return (0); } } wchar_t* wmemrbrk(wchar_t* src,j = srcrlen; i < j; i++) { if (wmemmem(spec,1)) { return (src - i); } } return (0); } } char* memnext(char* src,int speclen,char* cat,int catlen,char* brk,int brklen) { int i = 0,j = srclen; for (; i < j; i++) { if (memmem(cat,catlen,1)) { //cat char* nxt = memmem(src + i + 1,j - i - 1,1); if (!nxt) break; i += nxt - (src + i); //continue; } else if (memmem(brk,brklen,1)) { //break break; } else if (memmem(spec,1)) { break; } } return ((i < srclen) ? (src + i) : 0); } wchar_t* wmemnext(wchar_t* src,wchar_t* cat,wchar_t* brk,j = srclen; for (; i < j; i++) { if (wmemmem(cat,1)) { //cat wchar_t* nxt = wmemmem(src + i + 1,1); if (!nxt) break; i += nxt - (src + i); //continue; } else if (wmemmem(brk,1)) { //break break; } else if (wmemmem(spec,1)) { break; } } return ((i < srclen) ? (src + i) : 0); } char* memnexts(char* src,j = srclen - speclen; for (; i < j; i++) { if (memmem(cat,1)) { //break break; } else if (memcmp(spec,speclen) == 0) { break; } } return ((i < srclen) ? (src + i) : 0); } wchar_t* wmemnexts(wchar_t* src,j = srclen - speclen; for (; i < j; i++) { if (wmemmem(cat,1)) { //break break; } else if (wmemcmp(spec,speclen) == 0) { break; } } return ((i < srclen) ? (src + i) : 0); } char* meminexts(char* src,j = srclen - speclen; for (; i < j; i++) { if (memimem(cat,1)) { //cat char* nxt = memimem(src + i + 1,1); if (!nxt) break; i += nxt - (src + i); //continue; } else if (memimem(brk,1)) { //break break; } else if (memicmp(spec,speclen) == 0) { break; } } return ((i < srclen) ? (src + i) : 0); } wchar_t* wmeminexts(wchar_t* src,j = srclen - speclen; for (; i < j; i++) { if (wmemimem(cat,1)) { //cat wchar_t* nxt = wmemimem(src + i + 1,1); if (!nxt) break; i += nxt - (src + i); //continue; } else if (wmemimem(brk,1)) { //break break; } else if (wmemicmp(spec,speclen) == 0) { break; } } return ((i < srclen) ? (src + i) : 0); } char* strnstr(char* string,int len,char* substr) { return memmem(string,len,substr,strlen(substr)); } wchar_t* wcsnstr(wchar_t* string,wchar_t* substr) { return wmemmem(string,wcslen(substr)); } char* stristr(char* str1,char* str2) { char *cp = (char *) str1; char *s1,*s2; if ( !*str2 ) return((char *)str1); while (*cp) { s1 = cp; s2 = (char *) str2; while ( *s1 && *s2 && !(toupper(*s1)-toupper(*s2)) ) s1++,s2++; if (!*s2) return(cp); cp++; } return(NULL); } wchar_t* wcsistr (wchar_t * str1,wchar_t* str2) { wchar_t *cp = (wchar_t *) str1; wchar_t *s1,*s2; if ( !*str2 ) return((wchar_t *)str1); while (*cp) { s1 = cp; s2 = (wchar_t *) str2; while ( *s1 && *s2 && !(toupper(*s1)-toupper(*s2)) ) s1++,s2++; if (!*s2) return(cp); cp++; } return(NULL); } char* strnistr(char* string,char* spec) { return memimem(string,strlen(spec)); } wchar_t* wcsnistr(wchar_t* string,wchar_t* spec) { return wmemimem(string,wcslen(spec)); } char* strichr (char * string,int ch) { while (*string && (toupper(*string) != toupper(ch))) string++; if (toupper(*string) == toupper(ch)) return string; return(NULL); } wchar_t* wcsichr (wchar_t * string,int ch) { while (*string && (toupper(*string) != toupper(ch))) string++; if (toupper(*string) == toupper(ch)) return string; return(NULL); } char* strnchr(char* string,char ch) { while (len-- > 0) { if (*string != ch) { string++; } else { break; } } return ((len >= 0) ? string : 0); } wchar_t* wcsnchr(wchar_t* string,wchar_t ch) { while (len-- > 0) { if (*string != ch) { string++; } else { break; } } return ((len >= 0) ? string : 0); } char* strnichr(char* string,char ch) { while (len-- > 0) { if (toupper(*string) != toupper(ch)) { string++; } else { break; } } return ((len >= 0) ? string : 0); } wchar_t* wcsnichr(wchar_t* string,wchar_t ch) { while (len-- > 0) { if (toupper(*string) != toupper(ch)) { string++; } else { break; } } return ((len >= 0) ? string : 0); } char* strtrimleft(char* string,char* spec) { char* str = string; while (*str && strchr(spec,*str)) str++; if (str != string) memmove(string,str,strlen(string) - (str - string)); return string; } wchar_t* wcstrimleft(wchar_t* string,wchar_t* spec) { wchar_t* str = string; while (*str && wcschr(spec,*str)) str++; if (str != string) wmemmove(string,wcslen(string) - (str - string)); return string; } char* strntrimleft(char* string,char* spec) { return memtrimleft(string,strlen(spec)); } wchar_t* wcsntrimleft(wchar_t* string,wchar_t* spec) { return wmemtrimleft(string,wcslen(spec)); } char* strtrimright(char* string,char* spec) { int len = strlen(string); char* str = string + len; while (str != string && strchr(spec,*str)) str--; if (str - string < len) *str = 0; return string; } wchar_t* wcstrimright(wchar_t* string,wchar_t* spec) { int len = wcslen(string); wchar_t* str = string + len; while (str != string && wcschr(spec,*str)) str--; if (str - string < len) *str = 0; return string; } char* strntrimright(char* string,char* spec) { return memtrimright(string,strlen(spec)); } wchar_t* wcsntrimright(wchar_t* string,wchar_t* spec) { return wmemtrimright(string,wcslen(spec)); } char* strtrim(char* string,char* spec) { return strtrimright(strtrimleft(string,spec),spec); } wchar_t* wcstrim(wchar_t* string,wchar_t* spec) { return wcstrimright(wcstrimleft(string,spec); } char* strntrim(char* string,char* spec) { return memtrim(string,strlen(spec)); } wchar_t* wcsntrim(wchar_t* string,wchar_t* spec) { return wmemtrim(string,wcslen(spec)); } char* strrep(char* string,char* t) { int rlen = strlen(r); int tlen = strlen(t); int rtspace = rlen - tlen; if (rtspace < 0) { return 0; } else { char* str = string; while(*str) { if (strncmp(str,rlen) == 0) { strncpy(str,tlen); if (rtspace) { strcpy(str + tlen,str + rlen); } str += tlen; } else { str++; } } return string; } } wchar_t* wcsrep(wchar_t* string,wchar_t* t) { int rlen = wcslen(r); int tlen = wcslen(t); int rtspace = rlen - tlen; if (rtspace < 0) { return 0; } else { wchar_t* str = string; while(*str) { if (wcsncmp(str,rlen) == 0) { wcsncpy(str,tlen); if (rtspace) { wcscpy(str + tlen,str + rlen); } str += tlen; } else { str++; } } return string; } } char* strnrep(char* string,char* t) { int rlen = strlen(r); int tlen = strlen(t); int rtspace = rlen - tlen; if (rtspace < 0) { return 0; } else { char* str = string; while(len-- > 0) { if (strncmp(str,str + rlen); } str += tlen; } else { str++; } } return string; } } wchar_t* wcsnrep(wchar_t* string,wchar_t* t) { int rlen = wcslen(r); int tlen = wcslen(t); int rtspace = rlen - tlen; if (rtspace < 0) { return 0; } else { wchar_t* str = string; while(len-- > 0) { if (wcsncmp(str,str + rlen); } str += tlen; } else { str++; } } return string; } } char* strirep(char* string,char* t) { int rlen = strlen(r); int tlen = strlen(t); int rtspace = rlen - tlen; if (rtspace < 0) { return 0; } else { char* str = string; while(*str) { if (strnicmp(str,str + rlen); } str += tlen; } else { str++; } } return string; } } wchar_t* wcsirep(wchar_t* string,wchar_t* t) { int rlen = wcslen(r); int tlen = wcslen(t); int rtspace = rlen - tlen; if (rtspace < 0) { return 0; } else { wchar_t* str = string; while(*str) { if (wcsnicmp(str,str + rlen); } str += tlen; } else { str++; } } return string; } } char* strnirep(char* string,char* t) { int rlen = strlen(r); int tlen = strlen(t); int rtspace = rlen - tlen; if (rtspace < 0) { return 0; } else { char* str = string; while(len-- > 0) { if (strnicmp(str,str + rlen); } str += tlen; } else { str++; } } return string; } } wchar_t* wcsnirep(wchar_t* string,wchar_t* t) { int rlen = wcslen(r); int tlen = wcslen(t); int rtspace = rlen - tlen; if (rtspace < 0) { return 0; } else { wchar_t* str = string; while(len-- > 0) { if (wcsnicmp(str,str + rlen); } str += tlen; } else { str++; } } return string; } } char* strrepchr(char* string,char r,char t) { char* str = string; while(*str) { if (*str == r) { *str = t; } str++; } return string; } wchar_t* wcsrepchr(wchar_t* string,wchar_t r,wchar_t t) { wchar_t* str = string; while(*str) { if (*str == r) { *str = t; } str++; } return string; } char* strnrepchr(char* string,char t) { char* str = string; while(len-- > 0) { if (*str == r) { *str = t; } str++; } return string; } wchar_t* wcsnrepchr(wchar_t* string,wchar_t t) { wchar_t* str = string; while(len-- > 0) { if (*str == r) { *str = t; } str++; } return string; } char* strirepchr(char* string,char t) { char* str = string; while(*str) { if (toupper(*str) == toupper(r)) { *str = t; } str++; } return string; } wchar_t* wcsirepchr(wchar_t* string,wchar_t t) { wchar_t* str = string; while(*str) { if (toupper(*str) == toupper(r)) { *str = t; } str++; } return string; } char* strnirepchr(char* string,char t) { char* str = string; while(len-- > 0) { if (toupper(*str) == toupper(r)) { *str = t; } str++; } return string; } wchar_t* wcsnirepchr(wchar_t* string,wchar_t t) { wchar_t* str = string; while(len-- > 0) { if (toupper(*str) == toupper(r)) { *str = t; } str++; } return string; } char* strskipchr(char* string,char spec) { while(*string && spec == *string) string++; return (*string ? string : 0); } wchar_t* wcsskipchr(wchar_t* string,wchar_t spec) { while(*string && spec == *string) string++; return (*string ? string : 0); } char* strnskipchr(char* string,char spec) { while(len-- && spec == *string) string++; return ((len >= 0) ? string : 0); } wchar_t* wcsnskipchr(wchar_t* string,wchar_t spec) { while(len-- && spec == *string) string++; return ((len >= 0) ? string : 0); } char* strskip(char* string,char* spec) { while(*string && strchr(spec,*string)) string++; return (*string ? string : 0); } wchar_t* wcsskip(wchar_t* string,wchar_t* spec) { while(*string && wcschr(spec,*string)) string++; return (*string ? string : 0); } char* strnskip(char* string,char* spec) { return memskip(string,strlen(spec)); } wchar_t* wcsnskip(wchar_t* string,wchar_t* spec) { return wmemskip(string,wcslen(spec)); } char* strrskip(char* string,char* spec) { return memrskip(string,rlen,strlen(spec)); } wchar_t* wcsrskip(wchar_t* string,wchar_t* spec) { return wmemrskip(string,wcslen(spec)); } char* strbrkchr(char* string,char spec) { while(*string && spec != *string) string++; return (*string ? string : 0); } wchar_t* wcsbrkchr(wchar_t* string,wchar_t spec) { while(*string && spec != *string) string++; return (*string ? string : 0); } char* strnbrkchr(char* string,char spec) { while(len-- && spec != *string) string++; return ((len >= 0) ? string : 0); } wchar_t* wcsnbrkchr(wchar_t* string,wchar_t spec) { while(len-- && spec != *string) string++; return ((len >= 0) ? string : 0); } char* strrbrkchr(char* string,char spec) { while(rlen-- && spec != *string) string--; return ((rlen >= 0) ? string : 0); } wchar_t* wcsrbrkchr(wchar_t* string,wchar_t spec) { while(rlen-- && spec != *string) string--; return ((rlen >= 0) ? string : 0); } char* strbrk(char* string,char* spec) { while(*string && !strchr(spec,*string)) string++; return (*string ? string : 0); } wchar_t* wcsbrk(wchar_t* string,wchar_t* spec) { while(*string && !wcschr(spec,*string)) string++; return (*string ? string : 0); } char* strnbrk(char* string,char* spec) { return membrk(string,strlen(spec)); } wchar_t* wcsnbrk(wchar_t* string,wchar_t* spec) { return wmembrk(string,wcslen(spec)); } char* strrbrk(char* string,char* spec) { return memrbrk(string,strlen(spec)); } wchar_t* wcsrbrk(wchar_t* string,wchar_t* spec) { return wmemrbrk(string,wcslen(spec)); } char* strnext(char* string,char* brk) { while (*string) { if (strchr(cat,*string)) { //cat string string = strchr(string + 1,*string); if (!string) break; } else if (strchr(brk,*string)) { //break string break; } else if (strchr(spec,*string)) { break; } string++; } return ((string && *string) ? string : 0); } wchar_t* wcsnext(wchar_t* string,wchar_t* brk) { while (*string) { if (wcschr(cat,*string)) { //cat string string = wcschr(string + 1,*string); if (!string) break; } else if (wcschr(brk,*string)) { //break string break; } else if (wcschr(spec,*string)) { break; } string++; } return ((string && *string) ? string : 0); } char* strnnext(char* string,char* brk) { while (len-- > 0) { if (strchr(cat,*string)) { //cat string string = strnchr(string + 1,len - 1,*string)) { break; } string++; } return ((len >= 0) ? string : 0); } wchar_t* wcsnnext(wchar_t* string,wchar_t* brk) { while (len-- > 0) { if (wcschr(cat,*string)) { //cat string string = wcsnchr(string + 1,*string)) { break; } string++; } return ((len >= 0) ? string : 0); } char* strnexts(char* string,char* brk) { return memnexts(string,strlen(string),strlen(spec),cat,strlen(cat),brk,strlen(brk)); } wchar_t* wcsnexts(wchar_t* string,wchar_t* brk) { return wmemnexts(string,wcslen(string),wcslen(spec),wcslen(cat),wcslen(brk)); } char* strinexts(char* string,char* brk) { return meminexts(string,strlen(brk)); } wchar_t* wcsinexts(wchar_t* string,wchar_t* brk) { return wmeminexts(string,wcslen(brk)); } char* strnnexts(char* string,strlen(brk)); } wchar_t* wcsnnexts(wchar_t* string,wcslen(brk)); } char* strninexts(char* string,strlen(brk)); } wchar_t* wcsninexts(wchar_t* string,wcslen(brk)); } #define CP_GBK 936 // -- 简体中文GBK #define CP_BIG5 950 // -- 繁体中文Big5 #define CP_UTF8 65001 // UTF-8 translation unsigned int strn2encode(char* str,unsigned int cp) { if (len < 3) { } else if(0 == strnicmp(str,"GBK",3)) { cp = CP_GBK; } else if(0 == strnicmp(str,"GB2312",6)) { cp = CP_GBK; } else if(0 == strnicmp(str,"BIG5",4)) { cp = CP_BIG5; } else if(0 == strnicmp(str,"UTF-8",5)) { cp = CP_UTF8; } return cp; } unsigned int wcsn2encode(wchar_t* str,unsigned int cp) { if (len < 3) { } else if(0 == wcsnicmp(str,L"GBK",3)) { cp = CP_GBK; } else if(0 == wcsnicmp(str,L"GB2312",6)) { cp = CP_GBK; } else if(0 == wcsnicmp(str,L"BIG5",4)) { cp = CP_BIG5; } else if(0 == wcsnicmp(str,L"UTF-8",5)) { cp = CP_UTF8; } return cp; } unsigned int str2encode(char* str,unsigned int cp) { return strn2encode(str,strlen(str),cp); } unsigned int wcs2encode(wchar_t* str,unsigned int cp) { return wcsn2encode(str,wcslen(str),cp); } class strref : public std::pair<char*,int> { typedef std::pair<char*,int> base; public: strref() : base() {} strref(const char* str,int len = -1) { first = (char*)str; second = len < 0 ? (str ? strlen(first) : 0) : len; } strref(const std::string& str) { first = (char*)str.c_str(); second = str.size(); } operator const char* () const { return first; } const char* c_str() const { return first; } int size() const { return second; } }; struct strrefless { bool operator()(const strref& s1,const strref& s2) const { return strncmp(s1.first,s2.first,min(s1.second,s2.second)) < 0; } }; struct strrefiless { bool operator()(const strref& s1,const strref& s2) const { return strnicmp(s1.first,s2.second)) < 0; } }; typedef std::vector<strref> strrefarray; typedef std::pair<strref,strref> strrefpair; struct strrefpairless { bool operator()(const strrefpair& s1,const strrefpair& s2) const { return strncmp(s1.first.first,s2.first.first,min(s1.first.second,s2.first.second)) < 0; } }; struct strrefpairiless { bool operator()(const strrefpair& s1,const strrefpair& s2) const { return strnicmp(s1.first.first,s2.first.second)) < 0; } }; typedef std::vector<strrefpair> strrefpairarray; typedef std::map<strref,strref,strrefless> strref2strref; typedef std::map<strref,strrefiless> strrefi2strref; void strnkeyvalue(strrefpairarray& key2value,char* str,char* spec = "=",char* cat = "\"",char* brk = " \t\r\n") { char* nil = str + len; char* begin = str; while(begin && begin < nil) { begin = strnskip(begin,nil - begin,brk); if (begin) { strrefpair keyvalue; strref & key = keyvalue.first; char* end = 0; if(strchr(cat,*begin)) { end = strnchr(begin + 1,nil - begin - 1,*begin); if (end) { begin += 1; key.first = begin; key.second = end - begin; end = strnskip(end + 1,nil - end - 1,brk); } } else { end = strnnext(begin + 1,"",brk); if (end) { key.first = begin; key.second = end - begin; if (strchr(brk,*end)) { end = strnskip(end + 1,brk); } } } if(end && strchr(spec,*end)) { begin = strnskip(end + 1,brk); if (begin) { strref& val = keyvalue.second; if (strchr(cat,*begin); begin += 1; val.first = begin; if (end) { val.second = end - begin; end += 1; } } else { end = strnbrk(begin,brk); val.first = begin; if (end) { val.second = end - begin; } } if (!end) { val.second = nil - begin; } key2value.push_back(keyvalue); } else { break; } } begin = end; } } } enum { node_element = 0,node_cdata_section,node_processing_instruction,node_comment,node_document_type,}; template<class tbase = strref> class strrefnode : public tbase { typedef strrefnode<tbase> strrefelem; typedef std::vector<strrefelem*> strrefelemarray; public: strrefelem* parent; byte type; strref data; strrefelemarray childs; public: strrefnode(strrefelem* parent) { this->parent = parent; } ~strrefnode() { clear(); parent = 0; } void clear() { type = 0; first = 0; second = 0; clearchild(); } void clearchild() { data.first = 0; data.second = 0; for (size_t i = 0,j = childs.size(); i < j; ++i) { delete childs[i]; } childs.clear(); } char* strnode(char* str,int len) { clear(); char* nil = str + len; char* next = str; while (next && next < nil) { next = strnskip(next,nil - next," \t\n\r"); //ignore space if (!next) { break; } if (*next == '<') { //element // Started with <,could be: // <!--...--> comment // <!DOCTYPE ...> dtd // <?target ...?> processing instruction // <![CDATA[...]]> cdata section // <NAME ...> element // first = next; type = node_element; char* node_end = 0; next++; if (*next == '?') { type = node_processing_instruction; node_end = "?>"; } else if (*next == '!') { if (*(next + 1) == '[') { type = node_cdata_section; node_end = "]]>"; } else if (*(next + 1) == '-') { type = node_comment; node_end = "-->"; } else { type = node_document_type; } } else if (*next == '/') { //parent node end </....> next = strnnext(next + 1,nil - next - 1,">","\"",""); //assert(next); next++; //second = next - first; break; } if (node_end) { next = strnnexts(next + 1,node_end,""); if (next) { next += strlen(node_end); second = next - first; } break; } else { next = strnnext(next,""); if (!next) { break; } if (type == node_element) { if (*(next - 1) == '/') { next++; second = next - first; } else { next++; data.first = next; do { strrefelem * child = new strrefelem(this); next = child->strnode(next,nil - next); if (!child->first) { //error no "<" ... break; } if (*(child->first + 1) == '/') { second = next ? next - first : nil - first; data.second = child->first - data.first; break; } else { childs.push_back(child); } } while (next); if (!second) { //find data end if no child next = strnnexts(data.first,nil - data.first,"</",""); if (next) { data.second = next - data.first; next = strnnext(next,""); if (next) { next++; second = next - first; } } } } } else { next++; second = next - first; } } } else { return 0; } break; } return (next && *next) ? next : 0; } }; class strrefattr : public strref { public: strref name; strrefpairarray attrs; }; typedef strrefnode<strrefattr> strrefelement; void strelement(strrefelement& elem) { if (elem.type != node_element) { return; } if (elem.first && elem.second) { char* nil = elem.first + elem.second; char* next = elem.first; next++; //skip '<' elem.name.first = next; elem.name.second = 0; next = strnbrk(next," \t>\r\n"); if (next) { elem.name.second = next - elem.name.first; char* attr_end = strnnext(next,"/>",""); if (attr_end) { strnkeyvalue(elem.attrs,next,attr_end - next); } } } } void stresc2value(char* str,std::string& val) { static char* code[] = { "lt;","amp;","gt;","apos;","quot;" }; static int codelen[] = { 3,4,3,5,5 }; static char* symbol = "<&>\'\""; val = ""; val.reserve(len + 1); char* nil = str + len; char* next = str; while (next && next < nil) { if (*next == '&') { bool converted = false; char* nnext = next + 1; // Is it a numeric character reference? if (*nnext == '#') { // Is it a hex number? int base = 10; nnext++; if (*nnext == 'x') { nnext++; base = 16; } char* nnext_end = nnext; // Look for terminating semi-colon within 7 characters int n7Len = 0; while (n7Len < 7 && nnext_end < nil && *nnext_end != ';') {// only ASCII digits 0-9,A-F,a-f expected ++n7Len; nnext_end++; } // process unicode if (*nnext_end == ';') { int num = strtol(nnext,NULL,base); if (num < 0x80) val += (char)num; else if (num < 0x800) {// Convert to 2-byte UTF-8 val += (char)(((num&0x7c0)>>6) | 0xc0); val += (char)((num&0x3f) | 0x80); } else { // Convert to 3-byte UTF-8 val += (char)(((num&0xf000)>>12) | 0xe0); val += (char)(((num&0xfc0)>>6) | 0x80); val += (char)((num&0x3f) | 0x80); } next = nnext_end + 1; converted = true; } } else { // Look for matching &code; for (int match = 0; match < 5; ++match) { if (strnicmp(code[match],nnext,codelen[match]) == 0) { val += symbol[match]; next = nnext + codelen[match]; converted = true; break; } } } // If the code is not converted,leave it as is if (!converted) { val += *next; next++; } } else { val += *next; next++; } } } void strelement2value(strrefelement& elem,std::string& val) { std::stringstream ss; if (elem.type != node_element) { ss.write(elem.first,elem.second); } else { if (!elem.name.second) { ss.write(elem.first,elem.second); } else { ss << "<"; ss.write(elem.name.first,elem.name.second); ss << " "; strrefpairarray::iterator it_attr = elem.attrs.begin(); for (; it_attr != elem.attrs.end(); ++it_attr) { //if (it_attr->first.second && it_attr->second.second) { ss.write(it_attr->first.first,it_attr->first.second); ss << "=\""; ss.write(it_attr->second.first,it_attr->second.second); ss << "\" "; //} } if (!elem.data.second && elem.childs.empty()) { ss << "/>"; } else { ss << ">"; if (elem.childs.empty()) { ss.write(elem.data.first,elem.data.second); } else { for (size_t i = 0,j = elem.childs.size(); i < j; ++i) { std::string tmp; strelement2value(*elem.childs[i],tmp); ss << tmp; } } ss << "</"; ss.write(elem.name.first,elem.name.second); ss << ">"; } } } val = ss.str(); } typedef std::vector<strrefelement*> strrefelementarray; void strmarkup(strrefelementarray& elements,int len) { char* nil = str + len; char* next = str; while (next && next < nil) { strrefelement *elem = new strrefelement(0); next = elem->strnode(next,nil - next); strelement(*elem); if (elem->first) { elements.push_back(elem); } else { delete elem; } } } class ParserA { protected: std::string m_str; public: const std::string& Set(std::string& str) { m_str = str; return m_str; } const std::string& Get() { return m_str; } }; class MarkupParserA : public ParserA { typedef ParserA Base; protected: strrefelementarray m_elements; strrefelement* m_pcurelement; //current element strrefelement* m_pcurchild; //cur child public: MarkupParserA() { //m_elements; m_pcurelement = 0; //current element m_pcurchild = 0; //cur child } ~MarkupParserA() { m_elements.clear(); m_pcurelement = 0; //current element m_pcurchild = 0; //cur child } const std::string& Set(std::string& str) { m_pcurelement = 0; m_pcurchild = 0; for (size_t i = 0,j = m_elements.size(); i < j; ++i) { delete m_elements[i]; } m_elements.clear(); Base::Set(str); strmarkup(m_elements,(char*)m_str.c_str(),m_str.size()); return m_str; } const std::string& Get() { return Base::Get(); } int GetSize() { if (m_pcurelement) { return m_pcurelement->childs.size(); } return m_elements.size(); } bool GetName(strref& name,bool child = false) { if (!child) { if (m_pcurelement) { name = m_pcurelement->name; return true; } } else { if (m_pcurchild) { name = m_pcurchild->name; return true; } } return false; } bool GetName(std::string& name,bool child = false) { strref strname; if (GetName(strname,child)) { name.assign(strname.first,strname.second); return true; } return false; } std::string& GetTagName(std::string& name,bool child = false) { GetName(name,child); return name; } char* GetTagName(char* name,bool child = false) { strref strname; if(GetName(strname,child)) { len = min(strname.second,len); strncpy(name,strname.first,len); name[len] = 0; } return name; } //get data bool Get(const strref& name,strref& val) { bool ret = false; if (Find(name)) { if (m_pcurchild->childs.size() == 1) { switch (m_pcurchild->childs[0]->type) { case node_cdata_section: // <![CDATA[...]]> cdata section val.first = m_pcurchild->data.first + 9; val.second = m_pcurchild->data.second - 12; ret = true; break; case node_processing_instruction: // <?target ...?> processing instruction val.first = m_pcurchild->data.first + 8; val.second = m_pcurchild->data.second - 10; ret = true; break; case node_comment: //<!--...--> comment val.first = m_pcurchild->data.first + 4; val.second = m_pcurchild->data.second - 7; ret = true; break; case node_document_type: // <!DOCTYPE ...> dtd val.first = m_pcurchild->data.first + 9; val.second = m_pcurchild->data.second - 10; ret = true; break; default: break; } } if (!ret) { val = m_pcurchild->data; ret = true; } } return ret; } bool Get(const std::string& name,std::string& val) { strref strname((char*)name.c_str(),name.size()); strref strval; if (Get(strname,strval)) { val.assign(strval.first,strval.second); return true; } return false; } std::string& GetDataString(const std::string& name,std::string& val,std::string& def = std::string("")) { if(!Get(name,val)) { val = def; } return val; } char* GetDataString(const char* name,char* val,char* def = "") { strref strname(name,strlen(name)); strref strval; if (Get(strname,strval)) { len = min(strval.second,len); strncpy(val,strval.first,len); val[len] = 0; } else { strcpy(val,def); } return val; } bool Get(const strref& name,const strref& attr,strref& val) { if (Find(name)) { strrefpairarray& attrs = m_pcurchild->attrs; for (size_t i = 0,j = attrs.size(); i < j; ++i) { strrefpair & keyvalue = attrs[i]; if (strnicmp(keyvalue.first.first,attr.first,min(keyvalue.first.second,attr.second)) == 0) { val = keyvalue.second; return true; } } } return false; } bool Get(const std::string& name,const std::string attr,name.size()); strref strattr((char*)attr.c_str(),attr.size()); strref strval; if (Get(strname,strattr,strval)) { stresc2value(strval.first,strval.second,val); //val.assign(strval.first,strval.second); return true; } return false; } void ResetPos(strrefelement* pelement = 0) { strrefelementarray& childs = pelement ? pelement->childs : m_elements; size_t i = 0,j = childs.size(); for (; i < j; ++i) { strrefelement & child = *childs[i]; if (child.type != node_element) { continue; } if (!child.name.second) { strelement(child); } } m_pcurelement = pelement; m_pcurchild = 0; } void IntoElem() { //assert(m_pcurchild); ResetPos(m_pcurchild); } void OutofElem() { if (m_pcurelement) { ResetPos(m_pcurelement->parent); } } bool Find(const strref& name) { strrefelement* ptempelement = m_pcurelement; if (!name.first || !name.second) { //return m_pcurelement; } else { char* str = strnbrk(name.first,name.second,"\\/"); if (!str) { //if no sec break find direct. ptempelement = FindChild(name,ptempelement); } else { //else find child strref sec(name.first,str - name.first); strref next(str + 1,name.second - sec.second - 1); do { //assert(sec.first && sec.second); ptempelement = FindChild(sec,ptempelement); if (!ptempelement) { break; } if (!next.first || !next.second) { break; } sec.first = next.first; sec.second = next.second; str = strnbrk(next.first,next.second,"\\/"); if (str) { sec.second = str - sec.first; next.first = str + 1; next.second = next.second - sec.second - 1; } else { next.first = 0; next.second = 0; } } while (true); } } m_pcurchild = ptempelement; return (m_pcurchild!=0); } long First() { return Next(0); } long Next(long cookie) { if (m_pcurelement == 0) { if (cookie < m_elements.size()) { m_pcurchild = m_elements[cookie]; ++cookie; return cookie; } } else { if (cookie < m_pcurelement->childs.size()) { m_pcurchild = m_pcurelement->childs[cookie]; ++cookie; return cookie; } } return 0; } long FirstAttribute(strref& attr,strref& val) { return NextAttribute(attr,val,0); } long NextAttribute(strref& attr,strref& val,long cookie) { if (m_pcurelement) { if (cookie < m_pcurelement->attrs.size()) { attr = m_pcurelement->attrs[cookie].first; val = m_pcurelement->attrs[cookie].second; ++cookie; return cookie; } } return 0; } long FirstAttributeString(std::string& attr,std::string& val) { long cookie = 0; strref strattr,strval; if((cookie = FirstAttribute(strattr,strval))) { attr.assign(strattr.first,strattr.second); val.assign(strval.first,strval.second); return cookie; } return 0; } long NextAttributeString(std::string& attr,long cookie) { strref strattr,strval; if((cookie = NextAttribute(strattr,strval,cookie))) { attr.assign(strattr.first,strval.second); return cookie; } return 0; } long FirstAttributeString(char* attr,int attrlen,int vallen) { long cookie = 0; strref strattr,strval))) { attrlen = min(strattr.second,attrlen); strncpy(attr,strattr.first,attrlen); attr[attrlen] = 0; vallen = min(strval.second,vallen); strncpy(val,vallen); val[vallen] = 0; return cookie; } return 0; } long NextAttributeString(char* attr,int vallen,cookie))) { attrlen = min(strattr.second,vallen); val[vallen] = 0; return cookie; } return 0; } std::string& GetAttributeString(std::string& name,std::string& attr,std::string& def = std::string("")) { if (!Get(name,attr,val)) { val = def; } return val; } char* GetAttributeString(char* name,char* attr,strlen(name)); strref strattr(attr,strlen(attr)); strref strval; if (Get(strname,def); } return val; } protected: strrefelement* FindChild(const strref& name,strrefelement* parent) { //first find cur child if (m_pcurchild && m_pcurchild->parent == parent) { if (m_pcurchild->name.second == name.second && !strnicmp(m_pcurchild->name.first,name.first,name.second)) { return m_pcurchild; } } //second find all child strrefelementarray& childs = parent ? parent->childs : m_elements; size_t i = 0,j = childs.size(); for (; i < j; ++i) { strrefelement & child = *childs[i]; if (child.type != node_element) { continue; } if (!child.name.second) { strelement(child); } if (child.name.second == name.second && !strnicmp(child.name.first,name.second)) { return childs[i]; } } return 0; } }; int _tmain(int argc,_TCHAR* argv[]) { std::string in = "<?xml version=\"1.0\" ?><Books><Book><Name Name = \"The C++ Programming Language\" Author=\"Bjarne Stroustrup\" /></Book><Book><Name Name = \"Effective C++\" Author = \"Scott Meyers\" /></Book></Books>"; //string out = "Books\r\n\tBook 1\r\n\t\tName:The C++ Programming Language\r\n\t\tAuthor:Bjarne Stroustrup\r\n\tBook 2\r\n\t\tName:Effective C++\r\n\t\tAuthor:Scott Meyers"; std::string str,out; MarkupParserA xml; xml.Set(in); std::stringstream ss; if(xml.Find("Books")) { xml.IntoElem(); ss << xml.GetTagName(str) << "\r\n\t"; for(long cookie1 = xml.First(); cookie1; cookie1 = xml.Next(cookie1)) { xml.IntoElem(); ss << xml.GetTagName(str) << cookie1 << "\r\n\t\t"; for(long cookie2 = xml.First(); cookie2; cookie2 = xml.Next(cookie2)) { xml.IntoElem(); std::string name,val; for(long attrcookie2 = xml.FirstAttributeString(name,val); attrcookie2; ) { ss << name << ":" << val; attrcookie2 = xml.NextAttributeString(name,attrcookie2); if(attrcookie2) { ss << "\r\n\t\t"; } else { ss << "\r\n\t"; } } xml.OutofElem(); } xml.OutofElem(); } xml.OutofElem(); } printf("%s\n",ss.str().c_str()); system("pause"); return 0; }