(PowerBuilder) Unicode Escape and Unescape Text in StringBuilder

Demonstrates options for unicode escaping non-us-ascii chars and emojis.

Note: This example requires Chilkat v11.1.0 or greater.

For more information, see https://www.chilkatsoft.com/unicode_escape.asp

Chilkat ActiveX Downloads

ActiveX for 32-bit and 64-bit Windows

integer li_rc
integer li_Success
oleobject loo_SbOriginal
oleobject loo_Sb
string ls_CharsetNotUsed
string ls_Encoding

li_Success = 0

loo_SbOriginal = create oleobject
li_rc = loo_SbOriginal.ConnectToNewObject("Chilkat.StringBuilder")
if li_rc < 0 then
    destroy loo_SbOriginal
    MessageBox("Error","Connecting to COM object failed")
    return
end if
li_Success = loo_SbOriginal.LoadFile("qa_data/txt/utf16_emojis_accented_jap.txt","utf-16")
if li_Success = 0 then
    Write-Debug loo_SbOriginal.LastErrorText
    destroy loo_SbOriginal
    return
end if

// The above file contains the following text, which includes some emoji's,
// Japanese chars, and accented chars.

loo_Sb = create oleobject
li_rc = loo_Sb.ConnectToNewObject("Chilkat.StringBuilder")

loo_Sb.AppendSb(loo_SbOriginal)

// Charset is not used for unicode escaping.  Set it to "utf-8", but it means nothing.
ls_CharsetNotUsed = "utf-8"

// Indicate the desired format/style of Unicode escaping.
// Choose JSON-style (JavaScript-style) Unicode escape sequences by using "unicodeescape"
ls_Encoding = "unicodeescape"

loo_Sb.Encode(ls_Encoding,ls_CharsetNotUsed)
Write-Debug loo_Sb.GetAsString()

// Output:
// \ud83e\udde0
// \ud83d\udd10
// \u2705
// \u26a0\ufe0f
// \u274c
// \u2713
// \u4e2d
// \u00e9 xyz \u00e0
// abc \u79c1 \u306f \u3093 ghi

// Revert back to the unescaped chars:
loo_Sb.Decode(ls_Encoding,ls_CharsetNotUsed)
Write-Debug loo_Sb.GetAsString()

// -----------------------------------------------------------------------------------------
// Do the same, but use uppercase letters (A-F) in the hex values.
ls_Encoding = "unicodeescape-upper"
loo_Sb.Encode(ls_Encoding,ls_CharsetNotUsed)
Write-Debug loo_Sb.GetAsString()

// Output:
// \uD83E\uDDE0
// \uD83D\uDD10
// \u2705
// \u26A0\uFE0F
// \u274C
// \u2713
// \u4E2D
// \u00E9 xyz \u00E0
// abc \u79C1 \u306F \u3093 ghi

// Revert back to the unescaped chars:
loo_Sb.Decode(ls_Encoding,ls_CharsetNotUsed)
Write-Debug loo_Sb.GetAsString()

// -----------------------------------------------------------------------------------------
//  ECMAScript (JavaScript) �code point escape� syntax

ls_Encoding = "unicodeescape-curly"
loo_Sb.Encode(ls_Encoding,ls_CharsetNotUsed)
Write-Debug loo_Sb.GetAsString()

// Output:
// \u{d83e}\u{dde0}
// \u{d83d}\u{dd10}
// \u{2705}
// \u{26a0}\u{fe0f}
// \u{274c}
// \u{2713}
// \u{4e2d}
// \u{00e9} xyz \u{00e0}
// abc \u{79c1} \u{306f} \u{3093} ghi

// Revert back to the unescaped chars:
loo_Sb.Decode(ls_Encoding,ls_CharsetNotUsed)
Write-Debug loo_Sb.GetAsString()

// -----------------------------------------------------------------------------------------
// Do the same, but use uppercase letters (A-F) in the hex values.
ls_Encoding = "unicodeescape-curly-upper"
loo_Sb.Encode(ls_Encoding,ls_CharsetNotUsed)
Write-Debug loo_Sb.GetAsString()

// Output:
// \u{D83E}\u{DDE0}
// \u{D83D}\u{DD10}
// \u{2705}
// \u{26A0}\u{FE0F}
// \u{274C}
// \u{2713}
// \u{4E2D}
// \u{00E9} xyz \u{00E0}
// abc \u{79C1} \u{306F} \u{3093} ghi

// Revert back to the unescaped chars:
loo_Sb.Decode(ls_Encoding,ls_CharsetNotUsed)
Write-Debug loo_Sb.GetAsString()

// -----------------------------------------------------------------------------------------
// HTML hexadecimal character reference

ls_Encoding = "unicodeescape-htmlhex"
loo_Sb.Encode(ls_Encoding,ls_CharsetNotUsed)
Write-Debug loo_Sb.GetAsString()

// Output:
// &#x1f9e0;
// &#x1f510;
// &#x2705;
// &#x26a0;&#xfe0f;
// &#x274c;
// &#x2713;
// &#x4e2d;
// &#xe9; xyz &#xe0;
// abc &#x79c1; &#x306f; &#x3093; ghi

// Revert back to the unescaped chars:
loo_Sb.Decode(ls_Encoding,ls_CharsetNotUsed)
Write-Debug loo_Sb.GetAsString()

// -----------------------------------------------------------------------------------------
// HTML decimal character reference

ls_Encoding = "unicodeescape-htmldec"
loo_Sb.Encode(ls_Encoding,ls_CharsetNotUsed)
Write-Debug loo_Sb.GetAsString()

// Output:
// &#129504;
// &#128272;
// &#9989;
// &#9888;&#65039;
// &#10060;
// &#10003;
// &#20013;
// &#233; xyz &#224;
// abc &#31169; &#12399; &#12435; ghi

// Revert back to the unescaped chars:
loo_Sb.Decode(ls_Encoding,ls_CharsetNotUsed)
Write-Debug loo_Sb.GetAsString()

// -----------------------------------------------------------------------------------------
// Unicode code point notation or U+ notation

ls_Encoding = "unicodeescape-plus"
loo_Sb.Encode(ls_Encoding,ls_CharsetNotUsed)
Write-Debug loo_Sb.GetAsString()

// Output:
// u+1f9e0
// u+1f510
// u+2705
// u+26a0u+fe0f
// u+274c
// u+2713
// u+4e2d
// u+00e9 xyz u+00e0
// abc u+79c1 u+306f u+3093 ghi

// Chilkat cannot unescape the Unicode code point notation or U+ notation.
// For this style, Chilkat only goes in one direction, which is to escape.

// To emit uppercase hex, specify unicodeescape-plus-upper
ls_Encoding = "unicodeescape-plus-upper"
// ...
// ...

loo_Sb.Clear()
loo_Sb.AppendSb(loo_SbOriginal)

// -----------------------------------------------------------------------------------------
// Hex in Angle Brackets

ls_Encoding = "unicodeescape-angle"
loo_Sb.Encode(ls_Encoding,ls_CharsetNotUsed)
Write-Debug loo_Sb.GetAsString()

// Output:
// <1f9e0>
// <1f510>
// <2705>
// <26a0><fe0f>
// <274c>
// <2713>
// <4e2d>
// <e9> xyz <e0>
// abc <79c1> <306f> <3093> ghi

// Chilkat cannot unescape the angle bracket notation.
// For this style, Chilkat only goes in one direction, which is to escape.

loo_Sb.Clear()
loo_Sb.AppendSb(loo_SbOriginal)


destroy loo_SbOriginal
destroy loo_Sb