Chilkat HOME ASP Visual Basic VB.NET C# C C++ MFC Delphi FoxPro Java Perl Python Ruby SQL Server VBScript
Extract HTML Form InformationDownload an HTML page, parse the HTML, and extract information for the 1st form on the page.
<?php $http = new COM("Chilkat.Http"); // Any string unlocks the component for the 1st 30-days. $success = $http->UnlockComponent('Anything for 30-day trial'); if ($success != true) { print $http->lastErrorText() . "\n"; exit; } // Send the HTTP GET and return the content in a string. $html = $http->quickGetStr('http://www.nature.com/register/'); // Convert the HTML to XML for parsing... $htmlToXml = new COM("Chilkat.HtmlToXml"); // Any string argument automatically begins the 30-day trial. $success = $htmlToXml->UnlockComponent('30-day trial'); if ($success != true) { print 'HtmlToXml component unlock failed' . "\n"; exit; } // Indicate the charset of the output XML we'll want. // The charset of the original HTML page can be anything. // Chilkat will convert and handle it properly... $htmlToXml->XmlCharset = 'utf-8'; // Set the HTML: $htmlToXml->Html = $html; // Convert and load into an XML object: $xml = new COM("Chilkat.Xml"); $xml->LoadXml($htmlToXml->toXml()); $success = $xml->SaveXml('out.xml'); if ($success != true) { print $xml->lastErrorText() . "\n"; exit; } // Find the first form tag and navigate to it: $formRoot = $xml->SearchForTag(null,'form'); if (is_null($formRoot)) { print 'No HTML form found!' . "\n"; exit; } // Show the form's name, method, and action: print 'Form Name: ' . $formRoot->getAttrValue('name') . "\n"; print 'Form Method: ' . $formRoot->getAttrValue('method') . "\n"; print 'Form Action: ' . $formRoot->getAttrValue('action') . "\n"; print '----' . "\n"; // Iterate over "input" tags within the form: $xmlNode = $formRoot->SearchForTag(null,'input'); while ((!(is_null($xmlNode)))) { // print the input field's name, type, and current value: // an empty input type indicates the default "text" type print 'Input Name: ' . $xmlNode->getAttrValue('name') . "\n"; print 'Input Type: ' . $xmlNode->getAttrValue('type') . "\n"; print 'Current Value: ' . $xmlNode->getAttrValue('value') . "\n"; print '----' . "\n"; $xmlTemp = $xmlNode; $xmlNode = $formRoot->SearchForTag($xmlNode,'input'); } // Iterate over "select" tags within the form: $xmlNode = $formRoot->SearchForTag(null,'select'); while ((!(is_null($xmlNode)))) { print 'Select Name: ' . $xmlNode->getAttrValue('name') . "\n"; $xmlTemp = $xmlNode; $xmlNode = $formRoot->SearchForTag($xmlNode,'select'); } // Iterate over "textarea" tags within the form: $xmlNode = $formRoot->SearchForTag(null,'textarea'); while ((!(is_null($xmlNode)))) { print 'TextArea Name: ' . $xmlNode->getAttrValue('name') . "\n"; $xmlTemp = $xmlNode; $xmlNode = $formRoot->SearchForTag($xmlNode,'textarea'); } ?> |
Need a specific example? Send a request to support@chilkatsoft.com
© 2000-2010 Chilkat Software, Inc. All Rights Reserved.