![]() |
Chilkat HOME Android™ AutoIt C C# C++ Chilkat2-Python CkPython Classic ASP DataFlex Delphi DLL Go Java Node.js Objective-C PHP Extension Perl PowerBuilder PowerShell PureBasic Ruby SQL Server Swift Tcl Unicode C Unicode C++ VB.NET VBScript Visual Basic 6.0 Visual FoxPro Xojo Plugin
(PowerBuilder) Unicode Escape and Unescape Text in StringBuilderDemonstrates options for unicode escaping non-us-ascii chars and emojis. Note: This example requires Chilkat v11.1.0 or greater. For more information, see https://www.chilkatsoft.com/unicode_escape.asp
integer li_rc integer li_Success oleobject loo_SbOriginal oleobject loo_Sb string ls_CharsetNotUsed string ls_Encoding li_Success = 0 loo_SbOriginal = create oleobject li_rc = loo_SbOriginal.ConnectToNewObject("Chilkat.StringBuilder") if li_rc < 0 then destroy loo_SbOriginal MessageBox("Error","Connecting to COM object failed") return end if li_Success = loo_SbOriginal.LoadFile("qa_data/txt/utf16_emojis_accented_jap.txt","utf-16") if li_Success = 0 then Write-Debug loo_SbOriginal.LastErrorText destroy loo_SbOriginal return end if // The above file contains the following text, which includes some emoji's, // Japanese chars, and accented chars. loo_Sb = create oleobject li_rc = loo_Sb.ConnectToNewObject("Chilkat.StringBuilder") loo_Sb.AppendSb(loo_SbOriginal) // Charset is not used for unicode escaping. Set it to "utf-8", but it means nothing. ls_CharsetNotUsed = "utf-8" // Indicate the desired format/style of Unicode escaping. // Choose JSON-style (JavaScript-style) Unicode escape sequences by using "unicodeescape" ls_Encoding = "unicodeescape" loo_Sb.Encode(ls_Encoding,ls_CharsetNotUsed) Write-Debug loo_Sb.GetAsString() // Output: // \ud83e\udde0 // \ud83d\udd10 // \u2705 // \u26a0\ufe0f // \u274c // \u2713 // \u4e2d // \u00e9 xyz \u00e0 // abc \u79c1 \u306f \u3093 ghi // Revert back to the unescaped chars: loo_Sb.Decode(ls_Encoding,ls_CharsetNotUsed) Write-Debug loo_Sb.GetAsString() // ----------------------------------------------------------------------------------------- // Do the same, but use uppercase letters (A-F) in the hex values. ls_Encoding = "unicodeescape-upper" loo_Sb.Encode(ls_Encoding,ls_CharsetNotUsed) Write-Debug loo_Sb.GetAsString() // Output: // \uD83E\uDDE0 // \uD83D\uDD10 // \u2705 // \u26A0\uFE0F // \u274C // \u2713 // \u4E2D // \u00E9 xyz \u00E0 // abc \u79C1 \u306F \u3093 ghi // Revert back to the unescaped chars: loo_Sb.Decode(ls_Encoding,ls_CharsetNotUsed) Write-Debug loo_Sb.GetAsString() // ----------------------------------------------------------------------------------------- // ECMAScript (JavaScript) �code point escape� syntax ls_Encoding = "unicodeescape-curly" loo_Sb.Encode(ls_Encoding,ls_CharsetNotUsed) Write-Debug loo_Sb.GetAsString() // Output: // \u{d83e}\u{dde0} // \u{d83d}\u{dd10} // \u{2705} // \u{26a0}\u{fe0f} // \u{274c} // \u{2713} // \u{4e2d} // \u{00e9} xyz \u{00e0} // abc \u{79c1} \u{306f} \u{3093} ghi // Revert back to the unescaped chars: loo_Sb.Decode(ls_Encoding,ls_CharsetNotUsed) Write-Debug loo_Sb.GetAsString() // ----------------------------------------------------------------------------------------- // Do the same, but use uppercase letters (A-F) in the hex values. ls_Encoding = "unicodeescape-curly-upper" loo_Sb.Encode(ls_Encoding,ls_CharsetNotUsed) Write-Debug loo_Sb.GetAsString() // Output: // \u{D83E}\u{DDE0} // \u{D83D}\u{DD10} // \u{2705} // \u{26A0}\u{FE0F} // \u{274C} // \u{2713} // \u{4E2D} // \u{00E9} xyz \u{00E0} // abc \u{79C1} \u{306F} \u{3093} ghi // Revert back to the unescaped chars: loo_Sb.Decode(ls_Encoding,ls_CharsetNotUsed) Write-Debug loo_Sb.GetAsString() // ----------------------------------------------------------------------------------------- // HTML hexadecimal character reference ls_Encoding = "unicodeescape-htmlhex" loo_Sb.Encode(ls_Encoding,ls_CharsetNotUsed) Write-Debug loo_Sb.GetAsString() // Output: // 🧠 // 🔐 // ✅ // ⚠️ // ❌ // ✓ // 中 // é xyz à // abc 私 は ん ghi // Revert back to the unescaped chars: loo_Sb.Decode(ls_Encoding,ls_CharsetNotUsed) Write-Debug loo_Sb.GetAsString() // ----------------------------------------------------------------------------------------- // HTML decimal character reference ls_Encoding = "unicodeescape-htmldec" loo_Sb.Encode(ls_Encoding,ls_CharsetNotUsed) Write-Debug loo_Sb.GetAsString() // Output: // 🧠 // 🔐 // ✅ // ⚠️ // ❌ // ✓ // 中 // é xyz à // abc 私 は ん ghi // Revert back to the unescaped chars: loo_Sb.Decode(ls_Encoding,ls_CharsetNotUsed) Write-Debug loo_Sb.GetAsString() // ----------------------------------------------------------------------------------------- // Unicode code point notation or U+ notation ls_Encoding = "unicodeescape-plus" loo_Sb.Encode(ls_Encoding,ls_CharsetNotUsed) Write-Debug loo_Sb.GetAsString() // Output: // u+1f9e0 // u+1f510 // u+2705 // u+26a0u+fe0f // u+274c // u+2713 // u+4e2d // u+00e9 xyz u+00e0 // abc u+79c1 u+306f u+3093 ghi // Chilkat cannot unescape the Unicode code point notation or U+ notation. // For this style, Chilkat only goes in one direction, which is to escape. // To emit uppercase hex, specify unicodeescape-plus-upper ls_Encoding = "unicodeescape-plus-upper" // ... // ... loo_Sb.Clear() loo_Sb.AppendSb(loo_SbOriginal) // ----------------------------------------------------------------------------------------- // Hex in Angle Brackets ls_Encoding = "unicodeescape-angle" loo_Sb.Encode(ls_Encoding,ls_CharsetNotUsed) Write-Debug loo_Sb.GetAsString() // Output: // <1f9e0> // <1f510> // <2705> // <26a0><fe0f> // <274c> // <2713> // <4e2d> // <e9> xyz <e0> // abc <79c1> <306f> <3093> ghi // Chilkat cannot unescape the angle bracket notation. // For this style, Chilkat only goes in one direction, which is to escape. loo_Sb.Clear() loo_Sb.AppendSb(loo_SbOriginal) destroy loo_SbOriginal destroy loo_Sb |
© 2000-2025 Chilkat Software, Inc. All Rights Reserved.