Chilkat HOME ASP Visual Basic VB.NET C# Visual C++ C MFC Delphi FoxPro Java Perl PHP Python Ruby SQL Server VBScript
|
Extract HTML Form InformationDownload an HTML page, parse the HTML, and extract information for the 1st form on the page.
uses Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms, Dialogs, StdCtrls, CHILKATXMLLib_TLB, HTMLTOXMLLib_TLB, CHILKATHTTPLib_TLB, OleCtrls; ... procedure TForm1.Button1Click(Sender: TObject); var http: TChilkatHttp; success: Integer; html: String; htmlToXml: THtmlToXml; xml: CHILKATXMLLib_TLB.IChilkatXml; formRoot: CHILKATXMLLib_TLB.IChilkatXml; xmlTemp: CHILKATXMLLib_TLB.IChilkatXml; xmlNode: CHILKATXMLLib_TLB.IChilkatXml; begin http := TChilkatHttp.Create(Self); // Any string unlocks the component for the 1st 30-days. success := http.UnlockComponent('Anything for 30-day trial'); if (success <> 1) then begin ShowMessage(http.LastErrorText); end; // Send the HTTP GET and return the content in a string. html := http.QuickGetStr('http://www.nature.com/register/'); // Convert the HTML to XML for parsing... htmlToXml := THtmlToXml.Create(Self); // Any string argument automatically begins the 30-day trial. success := htmlToXml.UnlockComponent('30-day trial'); if (success <> 1) then begin ShowMessage('HtmlToXml component unlock failed'); end; // Indicate the charset of the output XML we'll want. // The charset of the original HTML page can be anything. // Chilkat will convert and handle it properly... htmlToXml.XmlCharset := 'utf-8'; // Set the HTML: htmlToXml.Html := html; // Convert and load into an XML object: xml := TChilkatXml.Create(Self).ControlInterface; xml.LoadXml(htmlToXml.ToXml()); success := xml.SaveXml('out.xml'); if (success <> 1) then begin ShowMessage(xml.LastErrorText); end; // Find the first form tag and navigate to it: formRoot := xml.SearchForTag(nil,'form'); if (formRoot = nil ) then begin ShowMessage('No HTML form found!'); end; // Show the form's name, method, and action: Memo1.Lines.Add('Form Name: ' + formRoot.GetAttrValue('name')); Memo1.Lines.Add('Form Method: ' + formRoot.GetAttrValue('method')); Memo1.Lines.Add('Form Action: ' + formRoot.GetAttrValue('action')); Memo1.Lines.Add('----'); // Iterate over "input" tags within the form: xmlNode := formRoot.SearchForTag(nil,'input'); while (not (xmlNode = nil )) do begin // print the input field's name, type, and current value: // an empty input type indicates the default "text" type Memo1.Lines.Add('Input Name: ' + xmlNode.GetAttrValue('name')); Memo1.Lines.Add('Input Type: ' + xmlNode.GetAttrValue('type')); Memo1.Lines.Add('Current Value: ' + xmlNode.GetAttrValue('value')); Memo1.Lines.Add('----'); xmlTemp := xmlNode; xmlNode := formRoot.SearchForTag(xmlNode,'input'); end; // Iterate over "select" tags within the form: xmlNode := formRoot.SearchForTag(nil,'select'); while (not (xmlNode = nil )) do begin Memo1.Lines.Add('Select Name: ' + xmlNode.GetAttrValue('name')); xmlTemp := xmlNode; xmlNode := formRoot.SearchForTag(xmlNode,'select'); end; // Iterate over "textarea" tags within the form: xmlNode := formRoot.SearchForTag(nil,'textarea'); while (not (xmlNode = nil )) do begin Memo1.Lines.Add('TextArea Name: ' + xmlNode.GetAttrValue('name')); xmlTemp := xmlNode; xmlNode := formRoot.SearchForTag(xmlNode,'textarea'); end; end; |
Need a specific example? Send a request to support@chilkatsoft.com
© 2000-2007 Chilkat Software, Inc. All Rights Reserved.