Chilkat HOME ASP Visual Basic VB.NET C# Visual C++ C MFC Delphi FoxPro Java Perl PHP Python Ruby SQL Server VBScript
Extract all HTML Objects from a Web PageDemonstrates how to download a Web page (at a URL) and extract all HTML objects. Eg. images, links, CSS files, JavaScript files, etc.
// Needs #include <CkMht.h> CkString strOut; CkMht mht; bool success; success = mht.UnlockComponent("30-day trial"); if (success != true) { strOut.append("Mht component unlock failed\r\n"); SetDlgItemText(IDC_EDIT1,strOut.getUnicode()); return; } // Download a URL into an in-memory MHT web archive contained // in a string variable: const char * mhtDoc; mhtDoc = mht.getMHT("http://www.gopackaging.com/"); // On failure, the mhtDoc will be a zero-length string. // Check the LastErrorText property for error information. // Now extract the HTML and embedded objects: const char * unpackDir; unpackDir = "c:/temp/"; const char * htmlFilename; htmlFilename = "gopackaging.html"; const char * partsSubdir; partsSubdir = "objects"; // Extract to c:/temp/gopackaging.html. // images and other embedded objects are placed in // c:/temp/objects. Directories are automatically // created if they don't already exist. success = mht.UnpackMHTString(mhtDoc,unpackDir,htmlFilename,partsSubdir); if (success != true) { strOut.append(mht.lastErrorText()); strOut.append("\r\n"); } else { strOut.append("Unpacked!\r\n"); } SetDlgItemText(IDC_EDIT1,strOut.getUnicode()); |
Need a specific example? Send a request to support@chilkatsoft.com
© 2000-2008 Chilkat Software, Inc. All Rights Reserved.