Chilkat HOME ASP Visual Basic VB.NET C# Visual C++ C MFC Delphi FoxPro Java Perl PHP Python Ruby SQL Server VBScript
Extract HTML Form InformationDownload an HTML page, parse the HTML, and extract information for the 1st form on the page.
// Needs #include <CkHttp.h> // Needs #include <CkHtmlToXml.h> // Needs #include <CkXml.h> CkString strOut; CkHttp http; bool success; // Any string unlocks the component for the 1st 30-days. success = http.UnlockComponent("Anything for 30-day trial"); if (success != true) { strOut.append(http.lastErrorText()); strOut.append("\r\n"); SetDlgItemText(IDC_EDIT1,strOut.getUnicode()); return; } // Send the HTTP GET and return the content in a string. const char * html; html = http.quickGetStr("http://www.nature.com/register/"); // Convert the HTML to XML for parsing... CkHtmlToXml htmlToXml; // Any string argument automatically begins the 30-day trial. success = htmlToXml.UnlockComponent("30-day trial"); if (success != true) { strOut.append("HtmlToXml component unlock failed\r\n"); SetDlgItemText(IDC_EDIT1,strOut.getUnicode()); return; } // Indicate the charset of the output XML we'll want. // The charset of the original HTML page can be anything. // Chilkat will convert and handle it properly... htmlToXml.put_XmlCharset("utf-8"); // Set the HTML: htmlToXml.put_Html(html); // Convert and load into an XML object: CkXml xml; xml.LoadXml(htmlToXml.toXml()); success = xml.SaveXml("out.xml"); if (success != true) { strOut.append(xml.lastErrorText()); strOut.append("\r\n"); SetDlgItemText(IDC_EDIT1,strOut.getUnicode()); return; } // Find the first form tag and navigate to it: CkXml *formRoot = 0; formRoot = xml.SearchForTag(0,"form"); if (formRoot == 0 ) { strOut.append("No HTML form found!\r\n"); SetDlgItemText(IDC_EDIT1,strOut.getUnicode()); return; } // Show the form's name, method, and action: strOut.append("Form Name: "); strOut.append(formRoot->getAttrValue("name")); strOut.append("\r\n"); strOut.append("Form Method: "); strOut.append(formRoot->getAttrValue("method")); strOut.append("\r\n"); strOut.append("Form Action: "); strOut.append(formRoot->getAttrValue("action")); strOut.append("\r\n"); strOut.append("----"); strOut.append("\r\n"); // Iterate over "input" tags within the form: CkXml *xmlTemp = 0; CkXml *xmlNode = 0; xmlNode = formRoot->SearchForTag(0,"input"); while ((!(xmlNode == 0 ))) { // print the input field's name, type, and current value: // an empty input type indicates the default "text" type strOut.append("Input Name: "); strOut.append(xmlNode->getAttrValue("name")); strOut.append("\r\n"); strOut.append("Input Type: "); strOut.append(xmlNode->getAttrValue("type")); strOut.append("\r\n"); strOut.append("Current Value: "); strOut.append(xmlNode->getAttrValue("value")); strOut.append("\r\n"); strOut.append("----"); strOut.append("\r\n"); xmlTemp = xmlNode; xmlNode = formRoot->SearchForTag(*xmlNode,"input"); delete xmlTemp; } // Iterate over "select" tags within the form: xmlNode = formRoot->SearchForTag(0,"select"); while ((!(xmlNode == 0 ))) { strOut.append("Select Name: "); strOut.append(xmlNode->getAttrValue("name")); strOut.append("\r\n"); xmlTemp = xmlNode; xmlNode = formRoot->SearchForTag(*xmlNode,"select"); delete xmlTemp; } // Iterate over "textarea" tags within the form: xmlNode = formRoot->SearchForTag(0,"textarea"); while ((!(xmlNode == 0 ))) { strOut.append("TextArea Name: "); strOut.append(xmlNode->getAttrValue("name")); strOut.append("\r\n"); xmlTemp = xmlNode; xmlNode = formRoot->SearchForTag(*xmlNode,"textarea"); delete xmlTemp; } delete formRoot; SetDlgItemText(IDC_EDIT1,strOut.getUnicode()); |
Need a specific example? Send a request to support@chilkatsoft.com
© 2000-2008 Chilkat Software, Inc. All Rights Reserved.