Java

Extract all HTML Objects from a Web Page

Demonstrates how to download a Web page (at a URL) and extract all HTML objects. Eg. images, links, CSS files, JavaScript files, etc.

Chilkat Java Downloads

Download Chilkat for Java

Java

import com.chilkatsoft.*;

public class ChilkatExample {

  static {
    try {
        System.loadLibrary("chilkat");
    } catch (UnsatisfiedLinkError e) {
      System.err.println("Native code library failed to load.\n" + e);
      System.exit(1);
    }
  }

  public static void main(String argv[])
  {
    boolean success = false;

    // This example assumes the Chilkat API to have been previously unlocked.
    // See Global Unlock Sample for sample code.

    CkMht mht = new CkMht();

    // Download a URL into an in-memory MHT web archive contained
    // in a string variable.
    // The following URL is randomly picked and was valid at the time of writing this example:
    String mhtDoc = mht.getMHT("https://www.tetonlodge.com/");
    if (mht.get_LastMethodSuccess() != true) {
        System.out.println(mht.lastErrorText());
        return;
        }

    // Extract the HTML and embedded objects:
    String unpackDir = "C:/AAWorkarea/mhtTesting/";
    String htmlFilename = "lodge.html";
    String partsSubdir = "objects";

    // Extract to C:/AAWorkarea/mhtTesting/lodge.html.
    // images and other embedded objects are placed in
    // C:/AAWorkarea/mhtTesting/objects.  Directories are automatically
    // created if they don't already exist.
    success = mht.UnpackMHTString(mhtDoc,unpackDir,htmlFilename,partsSubdir);
    if (success != true) {
        System.out.println(mht.lastErrorText());
        }
    else {
        System.out.println("Unpacked!");
        }
  }
}