Tcl
Tcl
XML Path Performance Optimizations
See more XML Examples
Discusses some important things to know about using Chilkat paths in the Chilkat XML API.Chilkat Tcl Downloads
load ./chilkat.dll
set success 0
set xml [new_CkXml]
# Let's load XML containing the following:
# <?xml version="1.0" encoding="utf-8"?>
# <xyz>
# <licenses>
# <license>
# <id>1234</id>
# </license>
# <license>
# <id>1234</id>
# </license>
# ...
# My sample XML contains 64,000 "license" nodes ..
# ...
# <license>
# <id>1234</id>
# </license>
# <license>
# <id>1234</id>
# </license>
# </licenses>
# </xyz>
#
set success [CkXml_LoadXmlFile $xml "qa_output/large.xml"]
if {$success != 1} then {
puts [CkXml_lastErrorText $xml]
delete_CkXml $xml
exit
}
# Iterating over the individual "license" nodes with this code snippet is
# extremely slow:
set licCount [CkXml_NumChildrenHavingTag $xml "licenses|license"]
puts "license count = $licCount"
set i 0
# If "10" is changed to licCount, then it becomes apparent that this loop gets slower with each iteration.
while {$i < 10} {
CkXml_put_I $xml $i
set s [CkXml_getChildContent $xml "licenses|license[i]|id"]
puts "$i: $s"
set i [expr $i + 1]
}
# The reason it is extremely slow is that the "license[i]" part of the path passed to GetChildContent
# says: find the i'th child of "licenses" having the tag "license". Chilkat cannot assume that all
# children of an XML node have the same tag. Therefore it's not possible to directly access the i'th child.
# Internally, Chilkat must start at the 1st child and iterate until it reaches the i'th child having the
# tag "license".
# For example, imagine if the XML was like this:
# <?xml version="1.0" encoding="utf-8"?>
# <xyz>
# <licenses>
# <license>
# <id>1234</id>
# </license>
# <somethingElse>
# <a>abc</a>
# </somethingElse>
# <license>
# <id>1234</id>
# </license>
# ...
# In the above XML, the 1st "license" is the 1st child of "licenses", but the 2nd "license"
# is the 3rd child of "licenses".
# If you already know that all children have the same tag, there is a shortcut that allows
# for direct access to that child. Just leave off the tag name, like this:
set i 0
# If "10" is changed to licCount, then we can see the time for each loop is the same, and it's fast.
while {$i < 10} {
CkXml_put_I $xml $i
set s [CkXml_getChildContent $xml "licenses|[i]|id"]
puts "$i: $s"
set i [expr $i + 1]
}
# When we pass just the index "[i]", we're saying: Get the i'th child regardless of tag.
# This is extremely fast because internally we can just access the i'th child directly.
# Another performance improvement is to call NumChildrenAt rather than NumChildrenHavingTag.
# For example:
set licCount [CkXml_NumChildrenAt $xml "licenses"]
puts "licCount = $licCount"
# NumChildrenAt returns the total number of children at the tag path. If we already know
# all children will have the same tag, we can just get the count
delete_CkXml $xml