如何解析名称:&用DI
HtmlParser从标签中的值文本?我尝试使用来自Clever Components的TCLHtmlParser,但它失败了.第二个问题是可以DIHtmlParser解析个别标签,例如循环通过其子标签.这是一个完全噩梦的这样一个简单的问题.
<div class="tvRow tvFirst hasLabel tvFirst" title="example1"> <label class="tvLabel">Name:</label> <span class="tvValue">Value</span> <div class="clear"></div></div> <div class="tvRow tvFirst hasLabel tvFirst" title="example2"> <label class="tvLabel">Name:</label> <span class="tvValue">Value</span> <div class="clear"></div></div>
解决方法
您可以使用
IHTMLDocument2
DOM从HTML解析您需要的任何元素:
uses ActiveX,MSHTML; const HTML = '<div class="tvRow tvFirst hasLabel tvFirst" title="example1">' + '<label class="tvLabel">Name:</label>' + '<span class="tvValue">Value</span>' + '<div class="clear"></div>' + '</div>'; procedure TForm1.Button1Click(Sender: TObject); var doc: OleVariant; el: OleVariant; i: Integer; begin doc := coHTMLDocument.Create as IHTMLDocument2; doc.write(HTML); doc.close; ShowMessage(doc.body.innerHTML); for i := 0 to doc.body.all.length - 1 do begin el := doc.body.all.item(i); if (el.tagName = 'LABEL') and (el.className = 'tvLabel') then ShowMessage(el.innerText); if (el.tagName = 'SPAN') and (el.className = 'tvValue') then ShowMessage(el.innerText); end; end;
我想提到我今天发现的另一个非常好的HTML解析器:htmlp
(Delphi Dom HTML解析器和转换器).它不像IHTMLDocument2那么灵活,但是很容易使用,快速,免费,并支持旧的Delphi版本的Unicode.
样品用量:
uses HtmlParser,DomCore; function GetDocBody(HtmlDoc: TDocument): TElement; var i: integer; node: TNode; begin Result := nil; for i := 0 to HtmlDoc.documentElement.childNodes.length - 1 do begin node := HtmlDoc.documentElement.childNodes.item(i); if node.nodeName = 'body' then begin Result := node as TElement; Break; end; end; end; procedure THTMLForm.Button2Click(Sender: TObject); var HtmlParser: THtmlParser; HtmlDoc: TDocument; i: Integer; body,el: TElement; node: TNode; begin HtmlParser := THtmlParser.Create; try HtmlDoc := HtmlParser.parseString(HTML); try body := GetDocBody(HtmlDoc); if Assigned(body) then for i := 0 to body.childNodes.length - 1 do begin node := body.childNodes.item(i); if (node is TElement) then begin el := node as TElement; if (el.tagName = 'div') and (el.GetAttribute('class') = 'tvRow tvFirst hasLabel tvFirst') then begin // iterate el.childNodes here... ShowMessage(IntToStr(el.childNodes.length)); end; end; end; finally HtmlDoc.Free; end; finally HtmlParser.Free end; end;