在项目中可能会出现这样的场景:需要从一段文本中解析出数据,
列如:需要从下文找出注红的数据
FSI/*CX S KA 909Y22MAR PEK1630 2020HKG0X 333 S CX 806Y23MAR HKG1150 1315ORD0S 77W 01 YOW2+YX2 17758 CNY INCL TAX *SYSTEM DEFAULT-CHECK OPERATING CARRIER *INTERLINE AGREEMENT PRICING APPLIED *ATTN PRICED ON 21JAN14*1307 BJS XHKG YOW2 NVB NVA22MAR 2PC CHI YX2 NVB NVA22MAR 2PC FARE CNY 16480 TAX CNY 90CN CNY 94HK CNY 1094XT TOTAL CNY 17758 22MAR14BJS KA X/HKG563.99CX CHI Q4.25 2140.91NUC2709.15END R OE6.081590 XT CNY 106US CNY 31XA CNY 43XY CNY 34YC CNY 880YR ENDOS 02 *T1 *AUTO BAGGAGE INFORMATION AVAILABLE - SEE FSB RFSONLN/1E /EFEP_13/FCC=T/
通过下面这个解析类,可以实现我们的功能,主要用到了正则表达式的()捕获功能
package cn.test; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class QTaxParser1 { private static final String QTAX_PATTERN = "^[0-9]{1,2}(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC).*"; private static final String TAX_PATTERN = "^TAX.*"; private static final String NUM_PATTERN = "([0-9]+)([A-Z]+) *"; private static final String QNUM_PATTERN = "Q([0-9]+\\.{0,1}[0-9]*)"; private static final String QROE_PATTERN = "\\s+R\\s*O\\s*E\\s*(([0-9]\\s*)+(\\.\\s*){0,1}([0-9]\\s*)*)\\s+"; private static final String RATE_PATTERN = "=([0-9]+\\.{0,1}[0-9]*)"; private Map<String,Double> tax = new HashMap<String,Double>(); private List<Double> qTax = new ArrayList<Double>(); private Double roe; private static Logger log = LoggerFactory.getLogger(QTaxParser1.class); public Map<String,Double> getTax(String txt){ // 分解出TAX 行 List<String> taxLine = parase(txt,TAX_PATTERN); if ((taxLine != null) && (taxLine.size() > 0)) { // 处理TAX 行 List<String> taxItem = parase(taxLine.get(0),NUM_PATTERN); for (int i = 0; i < taxItem.size(); i += 2) { tax.put(taxItem.get(i + 1),Double.parseDouble(taxItem.get(i))); } } return tax; } public List<Double> getQTax(String txt){ // 分解出TAX 行 List<String> taxLine = parase(txt,TAX_PATTERN); if ((taxLine != null) && (taxLine.size() > 0)) { // 分解出Q行 List<String> qTaxLine = parase(txt,QTAX_PATTERN,false); if ((qTaxLine != null) && (qTaxLine.size() > 0)) { // 处理QTAX 行 List<String> qTaxItem = parase(qTaxLine.get(0),QNUM_PATTERN); // 提取Q值 for (int i = 0; i < qTaxItem.size(); i++) { qTax.add(Double.parseDouble(qTaxItem.get(i))); } } } return qTax; } public Double getROE(String txt) { // 分解出ROE行 List<String> roeItem = parase(txt,QROE_PATTERN); // 提取ROE值 if (roeItem.size() > 0) { roe = Double.parseDouble(roeItem.get(0).replaceAll("\\s*","")); } return roe; } public boolean isTaxPage(String txt) { Pattern ptn = Pattern.compile(QTAX_PATTERN,Pattern.MULTILINE); Matcher m = ptn.matcher(txt); if (m.find()) { log.debug("TAX Match:" + m.group()); return true; } return false; } public String getRateValue(String txt) { List<String> rates = parase(txt,RATE_PATTERN); if (rates.size() > 0) { return parase(txt,RATE_PATTERN).get(0); } else { return null; } } private List<String> parase(String txt,String pattern) { return parase(txt,pattern,true); } private static List<String> parase(String txt,String pattern,boolean grouped) { Pattern ptn = Pattern.compile(pattern,Pattern.MULTILINE); Matcher m = ptn.matcher(txt); List<String> matches = new ArrayList<String>(); if (!grouped || (m.groupCount() == 0)) { if (m.find()) { matches.add(m.group()); } } else { while (m.find()) { for (int i = 1; i <= m.groupCount(); i++) { matches.add(m.group(i)); } } } return matches; } }测试用例
package itour.cn.fare.gateway; import cn.test.QTaxParser1; import net.sf.json.JSONArray; import net.sf.json.JSONObject; public class QTaxParserTest { public static void main(String[] args) { QTaxParser1 parser = new QTaxParser1(); String txt =" FSICH/*CX "+ "\n"+ "S KA 909Y22MAR PEK1630 2020HKG0X 333 "+ "\n"+ "S CX 806Y23MAR HKG1150 1315ORD0S 77W "+ "\n"+ "01 YOW2+YX2 CH 13464 CNY INCL TAX"+ "\n"+ "*SYSTEM DEFAULT-CHECK OPERATING CARRIER "+ "\n"+ "*INTERLINE AGREEMENT PRICING APPLIED"+ "\n"+ "*ACCOMPANIED VALIDATION-ALL PAX MUST BE TKTD AT SAME TIME "+ "\n"+ "*VERIFY AGE REQUIREMENTS"+ "\n"+ "*ATTN PRICED ON 21JAN14*1158"+ "\n"+ "BJS"+ "\n"+ "XHKG YOW2 CH25 NVB NVA22MAR 2PC "+ "\n"+ " CHI YX2 CH25 NVB NVA22MAR 2PC "+ "\n"+ "FARE CNY 12370 "+ "\n"+ "TAX EXEMPT CN CNY 106US CNY 988XT"+ "\n"+ "TOTAL CNY 13464 "+ "\n"+ "22MAR14BJS KA X/HKG422.99CX CHI Q4.25 1605.68NUC2032.92END R"+ "\n"+ "OE6.081590 "+ "\n"+ "XT CNY 31XA CNY 43XY CNY 34YC CNY 880YR "+ "\n"+ "ENDOS 02 *T1"+ "\n"+ "*AUTO BAGGAGE INFORMATION AVAILABLE - SEE FSB "+ "\n"+ "RFSONLN/1E /EFEP_23/FCC=T/"; System.out.println(JSONObject.fromObject(parser.getTax(txt)).toString()); System.out.println(JSONArray.fromObject(parser.getQTax(txt)).toString()); System.out.println(JSONArray.fromObject(parser.getROE(txt)).toString()); } }