Sample code for 30+ languages & platforms
Java

HTML Table to CSV

See more HTML-to-XML/Text Examples

Demonstrates a method for converting an HTML table to a CSV file.

Note: This example requires Chilkat v9.5.0.77 or greater.

Chilkat Java Downloads

Java
import com.chilkatsoft.*;

public class ChilkatExample {

  static {
    try {
        System.loadLibrary("chilkat");
    } catch (UnsatisfiedLinkError e) {
      System.err.println("Native code library failed to load.\n" + e);
      System.exit(1);
    }
  }

  public static void main(String argv[])
  {
    boolean success = false;

    // This example requires the Chilkat API to have been previously unlocked.
    // See Global Unlock Sample for sample code.

    // First download the HTML containing the table
    CkHttp http = new CkHttp();
    CkBinData bdHtml = new CkBinData();

    success = http.QuickGetBd("https://example-code.com/data/etf_table.html",bdHtml);
    if (success != true) {
        System.out.println(http.lastErrorText());
        return;
        }

    // Convert to XML.
    CkHtmlToXml htx = new CkHtmlToXml();
    htx.SetHtmlBd(bdHtml);

    CkStringBuilder sbXml = new CkStringBuilder();
    htx.ToXmlSb(sbXml);

    CkXml xml = new CkXml();
    xml.LoadSb(sbXml,true);

    // Remove attributes and sub-trees we don't need.
    // (In other words, we're getting rid of clutter...)
    int numRemoved = xml.PruneTag("thead");
    numRemoved = xml.PruneAttribute("style");
    numRemoved = xml.PruneAttribute("class");

    // Scrub the element and attribute content.
    xml.Scrub("ContentTrimEnds,ContentTrimInside,AttrTrimEnds,AttrTrimInside");

    // Let's see what we have...
    System.out.println(xml.getXml());

    // We have the following XML.
    // Copy this XML into the online tool at Generate Parsing Code from XML
    // as a starting point for accessing the data..

    // <?xml version="1.0" encoding="utf-8"?>
    // <root>
    //     <html>
    //         <head>
    //             <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
    //         </head>
    //         <body text="#000000" bgcolor="#FFFFFF">
    //             <div>
    //                 <div>
    //                     <table role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1"/>
    //                 </div>
    //             </div>
    //             <div>
    //                 <table id="topHoldingsTable" role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1">
    //                     <tbody>
    //                         <tr role="row">
    //                             <td>
    //                                 <text>ITUB4</text>
    //                             </td>
    //                             <td>
    //                                 <text>ITAU UNIBANCO HOLDING PREF SA</text>
    //                             </td>
    //                             <td>
    //                                 <text>Financials</text>
    //                             </td>
    //                             <td>
    //                                 <text>Brazil</text>
    //                             </td>
    //                             <td>
    //                                 <text>10.94</text>
    //                             </td>
    //                             <td>
    //                                 <text>998,954,813.73</text>
    //                             </td>
    //                         </tr>
    //                         <tr role="row">
    //                             <td>
    //                                 <text>BBDC4</text>
    //                             </td>
    //                             <td>
    //                                 <text>BANCO BRADESCO PREF SA</text>
    //                             </td>
    //                             <td>
    //                                 <text>Financials</text>
    //                             </td>
    //                             <td>
    //                                 <text>Brazil</text>
    //                             </td>
    //                             <td>
    //                                 <text>9.01</text>
    //                             </td>
    //                             <td>
    //                                 <text>822,164,622.75</text>
    //                             </td>
    //                         </tr>
    // 			...
    // 			...
    // 			...
    //                     </tbody>
    //                 </table>
    //             </div>
    //         </body>
    //     </html>
    // </root>

    // 
    // This is the code generated by the online tool:
    // 
    int i;
    int count_i;
    String table_role;
    String table_data_scrollx;
    String table_data_sortdirection;
    String table_data_sorton;
    String table_id;
    int j;
    int count_j;
    String tr_role;
    int k;
    int count_k;
    String tagPath;
    String text;

    i = 0;
    count_i = xml.NumChildrenHavingTag("html|body|div");
    while (i < count_i) {
        xml.put_I(i);
        table_role = xml.chilkatPath("html|body|div[i]|div|table|(role)");
        table_data_scrollx = xml.chilkatPath("html|body|div[i]|div|table|(data-scrollx)");
        table_data_sortdirection = xml.chilkatPath("html|body|div[i]|div|table|(data-sortdirection)");
        table_data_sorton = xml.chilkatPath("html|body|div[i]|div|table|(data-sorton)");
        table_id = xml.chilkatPath("html|body|div[i]|table|(id)");
        table_role = xml.chilkatPath("html|body|div[i]|table|(role)");
        table_data_scrollx = xml.chilkatPath("html|body|div[i]|table|(data-scrollx)");
        table_data_sortdirection = xml.chilkatPath("html|body|div[i]|table|(data-sortdirection)");
        table_data_sorton = xml.chilkatPath("html|body|div[i]|table|(data-sorton)");
        j = 0;
        count_j = xml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr");
        while (j < count_j) {
            xml.put_J(j);
            tr_role = xml.chilkatPath("html|body|div[i]|table|tbody|tr[j]|(role)");
            k = 0;
            count_k = xml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr[j]|td");
            while (k < count_k) {
                xml.put_K(k);
                text = xml.getChildContent("html|body|div[i]|table|tbody|tr[j]|td[k]|text");
                k = k+1;
                }

            j = j+1;
            }

        i = i+1;
        }

    // Let's modify the above code to build the CSV.
    CkCsv csv = new CkCsv();
    csv.SetColumnName(0,"Ticker");
    csv.SetColumnName(1,"Name");
    csv.SetColumnName(2,"Sector");
    csv.SetColumnName(3,"Country");
    csv.SetColumnName(4,"Weight");
    csv.SetColumnName(5,"Notional Vaue");

    i = 0;
    count_i = xml.NumChildrenHavingTag("html|body|div");
    while (i < count_i) {
        xml.put_I(i);
        j = 0;
        count_j = xml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr");
        while (j < count_j) {
            xml.put_J(j);
            k = 0;
            count_k = xml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr[j]|td");
            while (k < count_k) {
                xml.put_K(k);
                csv.SetCell(j,k,xml.getChildContent("html|body|div[i]|table|tbody|tr[j]|td[k]|text"));
                k = k+1;
                }

            j = j+1;
            }

        i = i+1;
        }

    csv.SaveFile("qa_output/brasil_etf.csv");
    String csvStr = csv.saveToString();
    System.out.println(csvStr);

    // Our CSV looks like this:
    // Ticker,Name,Sector,Country,Weight,Notional Vaue
    // ITUB4,ITAU UNIBANCO HOLDING PREF SA,Financials,Brazil,10.94,"998,954,813.73"
    // BBDC4,BANCO BRADESCO PREF SA,Financials,Brazil,9.01,"822,164,622.75"
    // VALE3,CIA VALE DO RIO DOCE SH,Materials,Brazil,8.60,"785,290,260.07"
    // PETR4,PETROLEO BRASILEIRO PREF SA,Energy,Brazil,5.68,"518,124,434.10"
    // PETR3,PETROBRAS,Energy,Brazil,4.86,"443,254,438.53"
    // B3SA3,B3 BRASIL BOLSA BALCAO SA,Financials,Brazil,4.57,"417,636,740.16"
    // ABEV3,AMBEV SA,Consumer Staples,Brazil,4.57,"417,216,913.63"
    // BBAS3,BANCO DO BRASIL SA,Financials,Brazil,3.25,"296,921,232.15"
    // ITSA4,ITAUSA INVESTIMENTOS ITAU PREF SA,Financials,Brazil,2.90,"265,153,684.52"
    // LREN3,LOJAS RENNER SA,Consumer Discretionary,Brazil,2.25,"205,832,175.98"
    // 
  }
}