我有一个地址类,使用正则表达式从地址的第一行解析房屋号码,街道名称和街道类型.这段代码通常运行良好,但我发布在这里与社区分享,看看有没有人有改进的建议.
注意:STREETTYPES和QUADRANT常量分别包含所有相关的街道类型和象限.
我在这里添加了一个子集:
private const string STREETTYPES = @"ALLEY|ALY|ANNEX|AX|ARCADE|ARC|AVENUE|AV|AVE|BAYOU|BYU|BEACH|..."; private const string QUADRANTS = "N|NORTH|S|SOUTH|E|EAST|W|WEST|NE|NORTHEAST|NW|NORTHWEST|SE|SOUTHEAST|SW|SOUTHWEST";
HouseNumber,Quadrant,StreetName和StreetType都是该类的属性.
private void Parse(string line1) { HouseNumber = string.Empty; Quadrant = string.Empty; StreetName = string.Empty; StreetType = string.Empty; if (!String.IsNullOrEmpty(line1)) { string noPeriodsLine1 = String.Copy(line1); noPeriodsLine1 = noPeriodsLine1.Replace(".",""); string addressParseRegEx = @"(?ix) ^ \s* (?: (?<housenumber>\d+) (?:(?:\s+|-)(?<quadrant>" + QUADRANTS + @"))? (?:(?:\s+|-)(?<streetname>\S+(?:\s+\S+)*?))?? (?:(?:\s+|-)(?<quadrant>" + QUADRANTS + @"))? (?:(?:\s+|-)(?<streettype>" + STREETTYPES + @"))? (?:(?:\s+|-)(?<streettypequalifier>(?!(?:" + QUADRANTS + @"))(?:\d+|\S+)))? (?:(?:\s+|-)(?<streettypequadrant>(" + QUADRANTS + @")))?? (?:(?:\s+|-)(?<suffix>(?:ste|suite|po\sBox|apt)\s*\S*))? | (?:(?:po|postoffice|post\s+office)\s+Box\s+(?<postofficeBox>\S+)) ) \s* $ "; Match match = Regex.Match(noPeriodsLine1,addressParseRegEx); if (match.Success) { HouseNumber = match.Groups["housenumber"].Value; Quadrant = (string.IsNullOrEmpty(match.Groups["quadrant"].Value)) ? match.Groups["streettypequadrant"].Value : match.Groups["quadrant"].Value; if (match.Groups["streetname"].Captures.Count > 1) { foreach (Capture capture in match.Groups["streetname"].Captures) { StreetName += capture.Value + " "; } StreetName = StreetName.Trim(); } else { StreetName = (string.IsNullOrEmpty(match.Groups["streetname"].Value)) ? match.Groups["streettypequalifier"].Value : match.Groups["streetname"].Value; } StreetType = match.Groups["streettype"].Value; //if the matched street type is found //use the abbreviated version...especially for credit bureau calls string streetTypeAbbreviation; if (StreetTypes.TryGetValue(StreetType.ToUpper(),out streetTypeAbbreviation)) { StreetType = streetTypeAbbreviation; } } } }