(VB.NET) Unicode Escape and Unescape Text in StringBuilder

Demonstrates options for unicode escaping non-us-ascii chars and emojis.

Note: This example requires Chilkat v11.1.0 or greater.

For more information, see https://www.chilkatsoft.com/unicode_escape.asp

Chilkat .NET Downloads

Chilkat .NET Framework

Chilkat for .NET Core

Dim success As Boolean = False

Dim sbOriginal As New Chilkat.StringBuilder
success = sbOriginal.LoadFile("qa_data/txt/utf16_emojis_accented_jap.txt","utf-16")
If (success = False) Then
    Debug.WriteLine(sbOriginal.LastErrorText)
    Exit Sub
End If


' The above file contains the following text, which includes some emoji's,
' Japanese chars, and accented chars.

Dim sb As New Chilkat.StringBuilder
sb.AppendSb(sbOriginal)

' Charset is not used for unicode escaping.  Set it to "utf-8", but it means nothing.
Dim charsetNotUsed As String = "utf-8"

' Indicate the desired format/style of Unicode escaping.
' Choose JSON-style (JavaScript-style) Unicode escape sequences by using "unicodeescape"
Dim encoding As String = "unicodeescape"

sb.Encode(encoding,charsetNotUsed)
Debug.WriteLine(sb.GetAsString())

' Output:
' \ud83e\udde0
' \ud83d\udd10
' \u2705
' \u26a0\ufe0f
' \u274c
' \u2713
' \u4e2d
' \u00e9 xyz \u00e0
' abc \u79c1 \u306f \u3093 ghi

' Revert back to the unescaped chars:
sb.Decode(encoding,charsetNotUsed)
Debug.WriteLine(sb.GetAsString())

' -----------------------------------------------------------------------------------------
' Do the same, but use uppercase letters (A-F) in the hex values.
encoding = "unicodeescape-upper"
sb.Encode(encoding,charsetNotUsed)
Debug.WriteLine(sb.GetAsString())

' Output:
' \uD83E\uDDE0
' \uD83D\uDD10
' \u2705
' \u26A0\uFE0F
' \u274C
' \u2713
' \u4E2D
' \u00E9 xyz \u00E0
' abc \u79C1 \u306F \u3093 ghi

' Revert back to the unescaped chars:
sb.Decode(encoding,charsetNotUsed)
Debug.WriteLine(sb.GetAsString())

' -----------------------------------------------------------------------------------------
'  ECMAScript (JavaScript) �code point escape� syntax

encoding = "unicodeescape-curly"
sb.Encode(encoding,charsetNotUsed)
Debug.WriteLine(sb.GetAsString())

' Output:
' \u{d83e}\u{dde0}
' \u{d83d}\u{dd10}
' \u{2705}
' \u{26a0}\u{fe0f}
' \u{274c}
' \u{2713}
' \u{4e2d}
' \u{00e9} xyz \u{00e0}
' abc \u{79c1} \u{306f} \u{3093} ghi

' Revert back to the unescaped chars:
sb.Decode(encoding,charsetNotUsed)
Debug.WriteLine(sb.GetAsString())

' -----------------------------------------------------------------------------------------
' Do the same, but use uppercase letters (A-F) in the hex values.
encoding = "unicodeescape-curly-upper"
sb.Encode(encoding,charsetNotUsed)
Debug.WriteLine(sb.GetAsString())

' Output:
' \u{D83E}\u{DDE0}
' \u{D83D}\u{DD10}
' \u{2705}
' \u{26A0}\u{FE0F}
' \u{274C}
' \u{2713}
' \u{4E2D}
' \u{00E9} xyz \u{00E0}
' abc \u{79C1} \u{306F} \u{3093} ghi

' Revert back to the unescaped chars:
sb.Decode(encoding,charsetNotUsed)
Debug.WriteLine(sb.GetAsString())

' -----------------------------------------------------------------------------------------
' HTML hexadecimal character reference

encoding = "unicodeescape-htmlhex"
sb.Encode(encoding,charsetNotUsed)
Debug.WriteLine(sb.GetAsString())

' Output:
' &#x1f9e0;
' &#x1f510;
' &#x2705;
' &#x26a0;&#xfe0f;
' &#x274c;
' &#x2713;
' &#x4e2d;
' &#xe9; xyz &#xe0;
' abc &#x79c1; &#x306f; &#x3093; ghi

' Revert back to the unescaped chars:
sb.Decode(encoding,charsetNotUsed)
Debug.WriteLine(sb.GetAsString())

' -----------------------------------------------------------------------------------------
' HTML decimal character reference

encoding = "unicodeescape-htmldec"
sb.Encode(encoding,charsetNotUsed)
Debug.WriteLine(sb.GetAsString())

' Output:
' &#129504;
' &#128272;
' &#9989;
' &#9888;&#65039;
' &#10060;
' &#10003;
' &#20013;
' &#233; xyz &#224;
' abc &#31169; &#12399; &#12435; ghi

' Revert back to the unescaped chars:
sb.Decode(encoding,charsetNotUsed)
Debug.WriteLine(sb.GetAsString())

' -----------------------------------------------------------------------------------------
' Unicode code point notation or U+ notation

encoding = "unicodeescape-plus"
sb.Encode(encoding,charsetNotUsed)
Debug.WriteLine(sb.GetAsString())

' Output:
' u+1f9e0
' u+1f510
' u+2705
' u+26a0u+fe0f
' u+274c
' u+2713
' u+4e2d
' u+00e9 xyz u+00e0
' abc u+79c1 u+306f u+3093 ghi

' Chilkat cannot unescape the Unicode code point notation or U+ notation.
' For this style, Chilkat only goes in one direction, which is to escape.

' To emit uppercase hex, specify unicodeescape-plus-upper
encoding = "unicodeescape-plus-upper"
' ...
' ...

sb.Clear()
sb.AppendSb(sbOriginal)

' -----------------------------------------------------------------------------------------
' Hex in Angle Brackets

encoding = "unicodeescape-angle"
sb.Encode(encoding,charsetNotUsed)
Debug.WriteLine(sb.GetAsString())

' Output:
' <1f9e0>
' <1f510>
' <2705>
' <26a0><fe0f>
' <274c>
' <2713>
' <4e2d>
' <e9> xyz <e0>
' abc <79c1> <306f> <3093> ghi

' Chilkat cannot unescape the angle bracket notation.
' For this style, Chilkat only goes in one direction, which is to escape.

sb.Clear()
sb.AppendSb(sbOriginal)