Chilkat HOME ASP Visual Basic VB.NET C# Visual C++ C MFC Delphi FoxPro Java Perl PHP Python Ruby SQL Server VBScript
|
Dropping Specific HTML TagsSpecific HTML tags may be dropped during the conversion from HTML to XML by calling DropTagType for each tag.
# file: DropTagType.py
import chilkat
# Demonstrates how specific HTML tags can be selected to be dropped
# during the HTML to XML conversion process.
htmlConv = chilkat.CkHtmlToXml()
success = htmlConv.UnlockComponent("anything for 30-day trial")
if not success:
print "component is locked!"
sys.exit(0)
html = "<html><body><span>This <b>is</b> a <i>test</i><hr></span></body></html>"
# First, call UndropTextFormattingTags to prevent the text formatting tags
# from being dropped by default.
htmlConv.UndropTextFormattingTags()
# We'll want to drop <hr>, <i>, and <span> tags:
htmlConv.DropTagType("hr")
htmlConv.DropTagType("i")
htmlConv.DropTagType("span")
# To convert, set the HTML and get the XML:
htmlConv.put_Html(html)
xml = htmlConv.xml()
print xml
# The output is this:
#
# <?xml version="1.0" encoding="utf-8" ?>
#
# <root>
# <html>
# <body>
# <text>This </text>
# <b>
# <text>is</text>
# </b>
# <text>a test</text>
# </body>
# </html>
# </root>
|
Need a specific example? Send a request to support@chilkatsoft.com
© 2000-2008 Chilkat Software, Inc. All Rights Reserved.