pongo庞果：xml字符串文件的解析——为什么我的代码效率低？

最近正在学习C++ STL，尝试着使用C++解庞果上面的在线编程挑战题目，结果总是说超过3s时间。这是为啥呀为啥呀？哥只是用了个string 而已啊

题目：http://hero.pongo.cn/Home/Index 蓝港在线：xml字符串文件的解析

代码贴出来，留待日后有能力了改进效率：

#include <iostream>
#include <sstream>
#include <string>
#include <boost/progress.hpp>

using namespace std;
using namespace boost;

string intToString(const int n )
{
   std::stringstream newstr;
   newstr << n;
   return newstr.str();
}

void ltrim(string& s)
{
   const char drop = ' ';
   // trim right
   s.erase(s.find_last_not_of(drop)+1);
   // trim left
   //s.erase(0,s.find_first_not_of(drop));
}

// LAB means Left Angle Brace: "<"
void jumpTilEndorLAB(string& str,int& pos)
{
   while((str[pos] != '\0') && (str[pos] != '<'))
   {
      pos++;
   }
}

//parse xml header: <?xml .... ?>
void parseXmlHeader(string& str,int& pos)
{
   if(('<' == str[pos]) && ('?' == str[pos+1]))
   {
      while(('?' != str[pos]) && ('>' != str[pos+1]))
      {
         pos++;
      }
      pos += 2;   //jump over ?>
      jumpTilEndorLAB(str,pos);
   }
   return ;
}

// parse the xml
string ParsingXML(string str)
{
   string   result;
   result.reserve(1024);
   int bodyPos = 0;
   int length = str.length(); 

   parseXmlHeader(str,bodyPos);

   if(bodyPos == length)
   {
      return result;
   }

   // Body format:
   //<catigory1_name>
   //    <entity1_name>
   //          <attr1="..." att2="..." />
   //    </entity1_name>
   //    <entity2...>
   //</catigory1_name>

   int categoryStart = bodyPos;
   int categoryEndAll = length;
   // parse all categories
   while( categoryStart != categoryEndAll)
   {
      string category(str.substr(categoryStart+1,str.find_first_of('>',categoryStart)-categoryStart-1));

      //cout << category << endl;
      result += (category + "\r\n");

      int entityStart = categoryStart + category.length() + 2; //jump over <category_name>
      int entityEndAll = str.find(("</"+category+">"),categoryStart);
      jumpTilEndorLAB(str,entityStart);

      //parse the category's entities
      int entityIndex = 1;
      while(entityStart != entityEndAll)
      {
         string entity(str.substr(entityStart+1,entityStart)-entityStart-1));
         //cout << "\t" << entity << " " << entityIndex++ << endl;
         result += "\t";
         result += (entity + " " + intToString(entityIndex) + "\r\n");

         entityIndex++;

         int attrStart = entityStart + 2 + entity.length();  //jump over <entity_name>
         while(str[attrStart] != '<') 
         {
            attrStart++;
         }
         attrStart++;   //jump over the "<" before attribute
         while((str[attrStart] == ' ') || (str[attrStart] == '\t') || 
            (str[attrStart] == '\r') || (str[attrStart] == '\n'))   // jump over ' ' before attribute if spaces exist
         {
            attrStart++;
         }
         // parse the attributes
         while((str[attrStart] != '/') && (str[attrStart+1] != '>'))
         {
            int equivalentPos = str.find_first_of('=',attrStart);
            string attrName = str.substr(attrStart,equivalentPos-attrStart);
            ltrim(attrName);
            //cout << "\t\t" << attrName << ":";
            result += "\t\t";
            result += (attrName + ":");

            int valueBegin = str.find_first_of('\"',equivalentPos+1)+1;
            int valueEnd = str.find_first_of('\"',valueBegin+1);
            string attrValue = str.substr(valueBegin,valueEnd-valueBegin);
            //cout << attrValue << endl;
            result += (attrValue + "\r\n");

            attrStart = valueEnd + 1;  //parse next attribute
            while((str[attrStart] == ' ') || (str[attrStart] == '\t') || 
               (str[attrStart] == '\r') || (str[attrStart] == '\n'))   // jump over ' ' before attribute if spaces exist
            {
               attrStart++;
            }
         }
         entityStart = attrStart + 5 + entity.length();   //jump over "/></entity_name>"
         jumpTilEndorLAB(str,entityStart);   //parse next entity
      }
      categoryStart = entityStart + 3 + category.length();   //jump over </category_name>
      jumpTilEndorLAB(str,categoryStart);         //parse next category
   }

   return result;
}

int main()
{   
   string str1(
"<?xml version=\"1.0\" ?>\
\r\n<Books>\
\r\n\t<Book>\
\r\n\t\t<Name = \"The C++ Programming Language\" \
\r\n\t\tAuthor=\"Bjarne Stroustrup\" />\
\r\n\t</Book>\r\n\t<Book>\
\r\n\t\t<Name = \"Effective C++\" \
\r\n\t\tAuthor = \"Scott Meyers\" />\
\r\n\t</Book>\
\r\n</Books>");

   //cout << str1 << endl;

   string result1;
   {
      progress_timer pt;
      for(int i = 0; i < 1000; i++)
      {
         result1 = ParsingXML(str1);
      }
   }
   cout << result1;

   return 1;

}

注：此代码效率低，需要改进！

pongo庞果：xml字符串文件的解析——为什么我的代码效率低？

猜你在找的XML相关文章