Sample code for 30+ languages & platforms
C

Convert an HTML Table to Plain-Text

See more HTML-to-XML/Text Examples

Converts an HTML table to plain-text.

Chilkat C Downloads

C
#include <C_CkHtmlToXml.h>
#include <C_CkXml.h>
#include <C_CkStringBuilder.h>

void ChilkatSample(void)
    {
    BOOL success;
    HCkHtmlToXml h2x;
    HCkXml xml;
    HCkStringBuilder sbPlainText;
    int numRows;
    int i;

    success = FALSE;

    // This example assumes the Chilkat API to have been previously unlocked.
    // See Global Unlock Sample for sample code.

    // This example converts the following HTML fragment to plain-text showing just
    // the information desired.
    // This is the raw HTML to be converted:
    // 
    // <table><tr><th>
    // <p>Property</p></th><th scope="col"><p>Type</p></th><th 
    // scope="col"><p>R/W</p></th><th scope="col"><p><a 
    // href="https://msdn.microsoft.com/en-us/library/office/jj164022.aspx#NavigationProperties">
    // Returned with resource</a></p></th><th 
    // scope="col"><p>Description</p></th></tr><tr><td 
    // data-th="Property"><p>Author</p></td><td data-th="Type"><p><a 
    // href="https://msdn.microsoft.com/en-us/library/office/dn531432.aspx#bk_User">
    // SP.User</a></p></td><td data-th="R/W"><p>R</p></td><td data-th="Returned with 
    // resource"><p>No</p></td><td data-th="Description"><p>Gets a value that 
    // specifies the user who added the file.</p></td></tr><tr><td 
    // data-th="Property"><p>CheckedOutByUser</p></td><td data-th="Type"><p><a 
    // href="https://msdn.microsoft.com/en-us/library/office/dn531432.aspx#bk_User">
    // SP.User</a></p></td><td data-th="R/W"><p>R</p></td><td data-th="Returned with 
    // resource"><p>No</p></td><td data-th="Description"><p>Gets a value that returns 
    // the user who has checked out the file.</p></td></tr><tr><td 
    // data-th="Property"><p>CheckInComment</p></td><td data-th="Type"><p><span 
    // class="input">String</span></p></td><td data-th="R/W"><p>R</p></td><td 
    // data-th="Returned with resource"><p>Yes</p></td><td 
    // data-th="Description"><p>Gets a value that returns the comment used when a 
    // document is checked in to a document library.</p></td></tr><tr><td 
    // data-th="Property"><p>CheckOutType</p></td><td data-th="Type"><p><span 
    // class="input">Int32</span></p></td><td data-th="R/W"><p>R</p></td><td 
    // data-th="Returned with resource"><p>Yes</p></td><td 
    // data-th="Description"><p>Gets a value that indicates how the file is checked 
    // out of a document library. Represents an <span 
    // class="input">SP.CheckOutType</span> value: Online = 0; Offline = 1; None = 
    // 2.</p><p>The checkout state of a file is independent of its locked 
    // state.</p></td></tr><tr><td data-th="Property"><p>ContentTag</p></td><td 
    // data-th="Type"><p><span class="input">String</span></p></td><td 
    // data-th="R/W"><p>R</p></td><td data-th="Returned with 
    // resource"><p>Yes</p></td><td data-th="Description"><p>Returns internal version 
    // of content, used to validate document equality for read 
    // purposes.</p></td></tr><tr><td 
    // data-th="Property"><p>CustomizedPageStatus</p></td><td data-th="Type"><p><span 
    // class="input">Int32</span></p></td><td data-th="R/W"><p>R</p></td><td 
    // data-th="Returned with resource"><p>Yes</p></td><td 
    // data-th="Description"><p>Gets a value that specifies the customization status 
    // of the file. Represents an <span class="input">SP.CustomizedPageStatus</span> 
    // value: None = 0; Uncustomized = 1; Customized = 2.</p></td></tr><tr><td 
    // data-th="Property"><p>ETag</p></td><td data-th="Type"><p><span 
    // class="input">String</span></p></td><td data-th="R/W"><p>R</p></td><td 
    // data-th="Returned with resource"><p>Yes</p></td><td 
    // data-th="Description"><p>Gets a value that specifies the ETag 
    // value.</p></td></tr><tr><td data-th="Property"><p>Exists</p></td><td 
    // data-th="Type"><p><span class="input">Boolean</span></p></td><td 
    // data-th="R/W"><p>R</p></td><td data-th="Returned with 
    // resource"><p>Yes</p></td><td data-th="Description"><p>Gets a value that 
    // specifies whether the file exists.</p></td></tr><tr><td 
    // data-th="Property"><p>Length</p></td><td data-th="Type"><p><span 
    // class="input">Int64</span></p></td><td data-th="R/W"><p>R</p></td><td 
    // data-th="Returned with resource"><p>Yes</p></td><td 
    // data-th="Description"><p>Gets the size of the file in bytes, excluding the size 
    // of any Web Parts that are used in the file.</p></td></tr><tr><td 
    // data-th="Property"><p>Level</p></td><td data-th="Type"><p><span 
    // class="input">Byte</span></p></td><td data-th="R/W"><p>R</p></td><td 
    // data-th="Returned with resource"><p>Yes</p></td><td 
    // data-th="Description"><p>Gets a value that specifies the publishing level of 
    // the file. Represents an <span class="input">SP.FileLevel</span> value: 
    // Published = 1; Draft = 2; Checkout = 255.</p></td></tr><tr><td 
    // data-th="Property"><p>ListItemAllFields</p></td><td data-th="Type"><p><a 
    // href="https://msdn.microsoft.com/en-us/library/office/dn531433.aspx#bk_ListItem">
    // SP.ListItem</a></p></td><td data-th="R/W"><p>R</p></td><td data-th="Returned 
    // with resource"><p>No</p></td><td data-th="Description"><p>Gets a value that 
    // specifies the list item field values for the list item corresponding to the 
    // file.</p></td></tr><tr><td data-th="Property"><p>LockedByUser</p></td><td 
    // data-th="Type"><p><a 
    // href="https://msdn.microsoft.com/en-us/library/office/dn531432.aspx#bk_User">
    // SP.User</a></p></td><td data-th="R/W"><p>R</p></td><td data-th="Returned with 
    // resource"><p>No</p></td><td data-th="Description"><p>Gets a value that returns 
    // the user that owns the current lock on the file.</p></td></tr><tr><td 
    // data-th="Property"><p>MajorVersion</p></td><td data-th="Type"><p><span 
    // class="input">Int32</span></p></td><td data-th="R/W"><p>R</p></td><td 
    // data-th="Returned with resource"><p>Yes</p></td><td 
    // data-th="Description"><p>Gets a value that specifies the major version of the 
    // file.</p></td></tr><tr><td data-th="Property"><p>MinorVersion</p></td><td 
    // data-th="Type"><p><span class="input">Int32</span></p></td><td 
    // data-th="R/W"><p>R</p></td><td data-th="Returned with 
    // resource"><p>Yes</p></td><td data-th="Description"><p>Gets a value that 
    // specifies the minor version of the file.</p></td></tr><tr><td 
    // data-th="Property"><p>ModifiedBy</p></td><td data-th="Type"><p><a 
    // href="https://msdn.microsoft.com/en-us/library/office/dn531432.aspx#bk_User">
    // SP.User</a></p></td><td data-th="R/W"><p>R</p></td><td data-th="Returned with 
    // resource"><p>No</p></td><td data-th="Description"><p>Gets a value that returns 
    // the user who last modified the file.</p></td></tr><tr><td 
    // data-th="Property"><p>Name</p></td><td data-th="Type"><p><span 
    // class="input">String</span></p></td><td data-th="R/W"><p>R</p></td><td 
    // data-th="Returned with resource"><p>Yes</p></td><td 
    // data-th="Description"><p>Gets the name of the file including the 
    // extension.</p></td></tr><tr><td 
    // data-th="Property"><p>ServerRelativeUrl</p></td><td data-th="Type"><p><span 
    // class="input">String</span></p></td><td data-th="R/W"><p>R</p></td><td 
    // data-th="Returned with resource"><p>Yes</p></td><td 
    // data-th="Description"><p>Gets the relative URL of the file based on the URL for 
    // the server.</p></td></tr><tr><td data-th="Property"><p>TimeCreated</p></td><td 
    // data-th="Type"><p><span class="input">DateTime</span></p></td><td 
    // data-th="R/W"><p>R</p></td><td data-th="Returned with 
    // resource"><p>Yes</p></td><td data-th="Description"><p>Gets a value that 
    // specifies when the file was created.</p></td></tr><tr><td 
    // data-th="Property"><p>TimeLastModified</p></td><td data-th="Type"><p><span 
    // class="input">DateTime</span></p></td><td data-th="R/W"><p>R</p></td><td 
    // data-th="Returned with resource"><p>Yes</p></td><td 
    // data-th="Description"><p>Gets a value that specifies when the file was last 
    // modified.</p></td></tr><tr><td data-th="Property"><p>Title</p></td><td 
    // data-th="Type"><p><span class="input">String</span></p></td><td 
    // data-th="R/W"><p>R</p></td><td data-th="Returned with 
    // resource"><p>Yes</p></td><td data-th="Description"><p>Gets a value that 
    // specifies the display name of the file.</p></td></tr><tr><td 
    // data-th="Property"><p>UiVersion</p></td><td data-th="Type"><p><span 
    // class="input">Int32</span></p></td><td data-th="R/W"><p>R</p></td><td 
    // data-th="Returned with resource"><p>Yes</p></td><td 
    // data-th="Description"><p>Gets a value that specifies the 
    // implementation-specific version identifier of the file.</p></td></tr><tr><td 
    // data-th="Property"><p>UiVersionLabel</p></td><td data-th="Type"><p><span 
    // class="input">String</span></p></td><td data-th="R/W"><p>R</p></td><td 
    // data-th="Returned with resource"><p>Yes</p></td><td 
    // data-th="Description"><p>Gets a value that specifies the 
    // implementation-specific version identifier of the file.</p></td></tr><tr><td 
    // data-th="Property"><p>Versions</p></td><td data-th="Type"><p><a 
    // href="https://msdn.microsoft.com/en-us/library/office/dn450841.aspx#bk_FileVersionCollection">SP.FileVersionCollection</a>
    // </p></td><td data-th="R/W"><p>R</p></td><td data-th="Returned with 
    // resource"><p>No</p></td><td data-th="Description"><p>Gets a value that returns 
    // a collection of file version objects that represent the versions of the 
    // file.</p></td></tr></table>
    // 

    h2x = CkHtmlToXml_Create();
    success = CkHtmlToXml_SetHtmlFromFile(h2x,"qa_data/htmlToText/htmlTable.html");

    // First convert to well-formed XML that makes it easier to parse.
    xml = CkXml_Create();
    CkXml_LoadXml(xml,CkHtmlToXml_toXml(h2x));
    printf("%s\n",CkXml_getXml(xml));

    // The XML looks like this:

    // <?xml version="1.0" encoding="utf-8" ?>
    // <root>
    //     <table>
    //         <tr>
    //             <th>
    //                 <p>
    //                     <text>Property</text>
    //                 </p>
    //             </th>
    //             <th scope="col">
    //                 <p>
    //                     <text>Type</text>
    //                 </p>
    //             </th>
    //             <th scope="col">
    //                 <p>
    //                     <text>R/W</text>
    //                 </p>
    //             </th>
    //             <th scope="col">
    //                 <p>
    //                     <a href="https://msdn.microsoft.com/en-us/library/office/jj164022.aspx#NavigationProperties">
    //                         <text>Returned with resource</text>
    //                     </a>
    //                 </p>
    //             </th>
    //             <th scope="col">
    //                 <p>
    //                     <text>Description</text>
    //                 </p>
    //             </th>
    //         </tr>
    //         <tr>
    //             <td data-th="Property">
    //                 <p>
    //                     <text>Author</text>
    //                 </p>
    //             </td>
    //             <td data-th="Type">
    //                 <p>
    //                     <a href="https://msdn.microsoft.com/en-us/library/office/dn531432.aspx#bk_User">
    //                         <text>SP.User</text>
    //                     </a>
    //                 </p>
    //             </td>
    //             <td data-th="R/W">
    //                 <p>
    //                     <text>R</text>
    //                 </p>
    //             </td>
    //             <td data-th="Returned with resource">
    //                 <p>
    //                     <text>No</text>
    //                 </p>
    //             </td>
    //             <td data-th="Description">
    //                 <p>
    //                     <text>Gets a value that specifies the user who added the file.</text>
    //                 </p>
    //             </td>
    //         </tr>
    // ...
    // 

    printf("------------------------------------------------\n");

    // Iterate over the XML, skipping the 1st table row, and emit
    // the Property and Description for each row.

    sbPlainText = CkStringBuilder_Create();

    // First move to the "table" node.
    CkXml_FirstChild2(xml);

    // Get the number of rows.
    numRows = CkXml_getNumChildren(xml);

    // Indexing is 0-based, meaning the 1st row (i.e. the table headers) is at index 0.
    // Skip it by starting with index 1.
    i = 1;
    while (i < numRows) {
        CkXml_putI(xml,i);

        CkStringBuilder_Append(sbPlainText,CkXml_getChildContent(xml,"tr[i]|/A/td,data-th,Property|p|text"));
        CkStringBuilder_Append(sbPlainText,": ");
        CkStringBuilder_AppendLine(sbPlainText,CkXml_getChildContent(xml,"tr[i]|/A/td,data-th,Description|p|text"),TRUE);

        i = i + 1;
    }

    printf("%s\n",CkStringBuilder_getAsString(sbPlainText));

    // The output is:

    // Author: Gets a value that specifies the user who added the file.
    // CheckedOutByUser: Gets a value that returns the user who has checked out the file.
    // CheckInComment: Gets a value that returns the comment used when a document is checked in to a document library.
    // CheckOutType: Gets a value that indicates how the file is checked out of a document library. Represents an
    // ContentTag: Returns internal version of content, used to validate document equality for read purposes.
    // CustomizedPageStatus: Gets a value that specifies the customization status of the file. Represents an
    // ETag: Gets a value that specifies the ETag value.
    // Exists: Gets a value that specifies whether the file exists.
    // Length: Gets the size of the file in bytes, excluding the size of any Web Parts that are used in the file.
    // Level: Gets a value that specifies the publishing level of the file. Represents an
    // ListItemAllFields: Gets a value that specifies the list item field values for the list item corresponding to the file.
    // LockedByUser: Gets a value that returns the user that owns the current lock on the file.
    // MajorVersion: Gets a value that specifies the major version of the file.
    // MinorVersion: Gets a value that specifies the minor version of the file.
    // ModifiedBy: Gets a value that returns the user who last modified the file.
    // Name: Gets the name of the file including the extension.
    // ServerRelativeUrl: Gets the relative URL of the file based on the URL for the server.
    // TimeCreated: Gets a value that specifies when the file was created.
    // TimeLastModified: Gets a value that specifies when the file was last modified.
    // Title: Gets a value that specifies the display name of the file.
    // UiVersion: Gets a value that specifies the implementation-specific version identifier of the file.
    // UiVersionLabel: Gets a value that specifies the implementation-specific version identifier of the file.
    // Versions: Gets a value that returns a collection of file version objects that represent the versions of the file.
    // 


    CkHtmlToXml_Dispose(h2x);
    CkXml_Dispose(xml);
    CkStringBuilder_Dispose(sbPlainText);

    }