static void xml2plain() { string src = "ec-news-2009-realsrc.xml"; string trg = "ec-news-2009-ref.xml"; XmlDocument xml = new XmlDocument(); XmlReaderSettings settings = new XmlReaderSettings(); settings.IgnoreComments = true; XmlReader reader = XmlReader.Create(trg,settings); xml.Load(reader); XmlNodeList nodes = xml.GetElementsByTagName("seg"); StringBuilder buf = new StringBuilder(); int c = 0; bool s = false; int f_c = 1; foreach (XmlNode n in nodes) { string v = n.InnerText; v = v.Trim(); buf.AppendLine(v); } File.WriteAllText(name1,buf.ToString(),Encoding.UTF8); }
这个比较简单,读取文件中所有节点名字为seg的xml的值,对于一般格式简单的文件很方便。
下面这个是从文件中读取数据,最终合并为一个文件。注意:默认读取文件夹中的文件是按照字母顺序读取的,对于按照数字排序的文件需要自定义。
static void trans2cor() { string folder = @"D:\Documents\Projects\xml2plain\xml2plain\bin\Debug\trans\google-src\"; DirectoryInfo dir = new DirectoryInfo(folder); FileInfo[] files = dir.GetFiles("*.txt"); StringBuilder buf = new StringBuilder(); bool s = false; int length = 11; for (int i = 0; i < length; i++) { string name = folder + (i +1).ToString()+".txt"; string[] lines = File.ReadAllLines(name,Encoding.Default); for (int j = 0; j < lines.Length; j++) { string tmp = lines[j]; tmp = tmp.Trim(); if (s) { string tt = ""; for (int k = 0; k < tmp.Length; k++) { tt += tmp[k]; tt += " "; } tmp = tt.Trim(); } if (tmp != "") { buf.AppendLine(tmp); } } } File.WriteAllText("result.txt",Encoding.UTF8); }