Chilkat HOME ASP Visual Basic VB.NET C# Visual C++ C MFC Delphi FoxPro Java Perl PHP Python Ruby SQL Server VBScript
|
Convert HTML Email Body to Plain TextC# example code to load an email and convert the HTML body to plain text. This uses both the Chilkat Mail and Chilkat Html-to-Xml components.
// string XmlToText(string xmlStr) { StringBuilder sb = new StringBuilder(); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xmlStr); Stack<int> curIndex = new Stack<int>(100); Stack<int> numChildren = new Stack<int>(100); // We are on the 1st child for the current node... curIndex.Push(0); numChildren.Push(xml.NumChildren); while (true) { int idx = curIndex.Peek(); int nChildren = numChildren.Peek(); // Have we finished with the children at this node? // If so, move back up to the parent. if (idx == nChildren) { curIndex.Pop(); numChildren.Pop(); if (curIndex.Count == 0) { // We're done... break; } xml.GetParent2(); } else { // Traverse to the Nth child. xml.GetChild2(idx); // Tags that should represent a CRLF in plain-text are // caught here... if (xml.Tag.Equals("br") || xml.Tag.Equals("div") || xml.Tag.Equals("tr") || xml.Tag.Equals("p")) { sb.Append("\r\n"); } sb.Append(xml.Content); // Increment the current index at the top of the stack. int tempIdx = curIndex.Pop(); curIndex.Push(tempIdx + 1); // Does this child have any children? int n = xml.NumChildren; if (n > 0) { curIndex.Push(0); numChildren.Push(n); } else { // Just move back up. xml.GetParent2(); } } } return sb.ToString(); } private void button1_Click(object sender, EventArgs e) { // Create a MailMan first for the purpose // of unlocking the component. Chilkat.MailMan mailman = new Chilkat.MailMan(); mailman.UnlockComponent("anything for 30-day trial"); Chilkat.Email email = new Chilkat.Email(); // Load an email from a .eml bool success; success = email.LoadEml("myEmail.eml"); if (success != true) { MessageBox.Show(email.LastErrorText); return; } // Get the body of the email, which may or may not be HTML. string bodyStr = email.Body; // Assume the body is HTML... // Convert it to XML: Chilkat.HtmlToXml htmlConv = new Chilkat.HtmlToXml(); htmlConv.UnlockComponent("Anything for 30-day trial"); // br tags are dropped by default, but we want them... htmlConv.UndropTagType("br"); // Drop some tag types to simplify the resultant XML... htmlConv.DropTagType("font"); htmlConv.DropTagType("b"); htmlConv.DropTagType("i"); // Remove scripts. htmlConv.DropTagType("script"); // Exclude style sheets. htmlConv.DropTagType("style"); htmlConv.Html = bodyStr; string xml = htmlConv.ToXml(); string plainText = XmlToText(xml); // We don't want more than 2 CRLF's in a row: while (plainText.Contains("\r\n\r\n\r\n")) { plainText = plainText.Replace("\r\n\r\n\r\n", "\r\n\r\n"); } textBox1.Text = plainText; } |
Need a specific example? Send a request to support@chilkatsoft.com
© 2000-2008 Chilkat Software, Inc. All Rights Reserved.