DataFlex
Unicode Escape

Demonstrates options for unicode escaping non-us-ascii chars and emojis.
Chilkat DataFlex Downloads

DataFlex
Use ChilkatAx-win32.pkg

Procedure Test
    Boolean iSuccess
    Handle hoSb
    String sOriginal
    Handle hoCrypt
    String sCharsetNotUsed
    String sEncoding
    String sEscaped
    String sUnescaped
    String sTemp1

    Move False To iSuccess

    Get Create (RefClass(cComChilkatStringBuilder)) To hoSb
    If (Not(IsComObjectCreated(hoSb))) Begin
        Send CreateComObject of hoSb
    End
    Get ComLoadFile Of hoSb "qa_data/txt/utf16_emojis_accented_jap.txt" "utf-16" To iSuccess
    If (iSuccess = False) Begin
        Get ComLastErrorText Of hoSb To sTemp1
        Showln sTemp1
        Procedure_Return
    End

    Get ComGetAsString Of hoSb To sOriginal

    // The above file contains the following text, which includes some emoji's,
    // Japanese chars, and accented chars.

    // 🧠
    // 🔐
    // ✅
    // ⚠️
    // ❌
    // ✓
    // 中
    // é xyz à
    // abc 私 は ん ghi

    Get Create (RefClass(cComChilkatCrypt2)) To hoCrypt
    If (Not(IsComObjectCreated(hoCrypt))) Begin
        Send CreateComObject of hoCrypt
    End

    // Charset is not used for unicode escaping.  Set it to "utf-8", but it means nothing.
    Move "utf-8" To sCharsetNotUsed

    // Indicate the desired format/style of Unicode escaping.
    // Choose JSON-style (JavaScript-style) Unicode escape sequences by using "unicodeescape"
    Move "unicodeescape" To sEncoding

    Get ComEncodeString Of hoCrypt sOriginal sCharsetNotUsed sEncoding To sEscaped
    Showln sEscaped

    // Output:
    // \ud83e\udde0
    // \ud83d\udd10
    // \u2705
    // \u26a0\ufe0f
    // \u274c
    // \u2713
    // \u4e2d
    // \u00e9 xyz \u00e0
    // abc \u79c1 \u306f \u3093 ghi

    // Revert back to the unescaped chars:
    Get ComDecodeString Of hoCrypt sEscaped sCharsetNotUsed sEncoding To sUnescaped
    Showln sUnescaped

    // -----------------------------------------------------------------------------------------
    // Do the same, but use uppercase letters (A-F) in the hex values.
    Move "unicodeescape-upper" To sEncoding
    Get ComEncodeString Of hoCrypt sOriginal sCharsetNotUsed sEncoding To sEscaped
    Showln sEscaped

    // Output:
    // \uD83E\uDDE0
    // \uD83D\uDD10
    // \u2705
    // \u26A0\uFE0F
    // \u274C
    // \u2713
    // \u4E2D
    // \u00E9 xyz \u00E0
    // abc \u79C1 \u306F \u3093 ghi

    // Revert back to the unescaped chars:
    Get ComDecodeString Of hoCrypt sEscaped sCharsetNotUsed sEncoding To sUnescaped
    Showln sUnescaped

    // -----------------------------------------------------------------------------------------
    //  ECMAScript (JavaScript) “code point escape” syntax

    Move "unicodeescape-curly" To sEncoding
    Get ComEncodeString Of hoCrypt sOriginal sCharsetNotUsed sEncoding To sEscaped
    Showln sEscaped

    // Output:
    // \u{d83e}\u{dde0}
    // \u{d83d}\u{dd10}
    // \u{2705}
    // \u{26a0}\u{fe0f}
    // \u{274c}
    // \u{2713}
    // \u{4e2d}
    // \u{00e9} xyz \u{00e0}
    // abc \u{79c1} \u{306f} \u{3093} ghi

    // Revert back to the unescaped chars:
    Get ComDecodeString Of hoCrypt sEscaped sCharsetNotUsed sEncoding To sUnescaped
    Showln sUnescaped

    // -----------------------------------------------------------------------------------------
    // Do the same, but use uppercase letters (A-F) in the hex values.
    Move "unicodeescape-curly-upper" To sEncoding
    Get ComEncodeString Of hoCrypt sOriginal sCharsetNotUsed sEncoding To sEscaped
    Showln sEscaped

    // Output:
    // \u{D83E}\u{DDE0}
    // \u{D83D}\u{DD10}
    // \u{2705}
    // \u{26A0}\u{FE0F}
    // \u{274C}
    // \u{2713}
    // \u{4E2D}
    // \u{00E9} xyz \u{00E0}
    // abc \u{79C1} \u{306F} \u{3093} ghi

    // Revert back to the unescaped chars:
    Get ComDecodeString Of hoCrypt sEscaped sCharsetNotUsed sEncoding To sUnescaped
    Showln sUnescaped

    // -----------------------------------------------------------------------------------------
    // Unicode code point notation or U+ notation

    Move "unicodeescape-plus" To sEncoding
    Get ComEncodeString Of hoCrypt sOriginal sCharsetNotUsed sEncoding To sEscaped
    Showln sEscaped

    // Output:
    // u+1f9e0
    // u+1f510
    // u+2705
    // u+26a0u+fe0f
    // u+274c
    // u+2713
    // u+4e2d
    // u+00e9 xyz u+00e0
    // abc u+79c1 u+306f u+3093 ghi

    // Chilkat cannot unescape the Unicode code point notation or U+ notation.
    // For this style, Chilkat only goes in one direction, which is to escape.

    // To emit uppercase hex, specify unicodeescape-plus-upper
    Move "unicodeescape-plus-upper" To sEncoding
    // ...
    // ...

    // -----------------------------------------------------------------------------------------
    // HTML hexadecimal character reference

    Move "unicodeescape-htmlhex" To sEncoding
    Get ComEncodeString Of hoCrypt sOriginal sCharsetNotUsed sEncoding To sEscaped
    Showln sEscaped

    // Output:
    // &#x1f9e0;
    // &#x1f510;
    // &#x2705;
    // &#x26a0;&#xfe0f;
    // &#x274c;
    // &#x2713;
    // &#x4e2d;
    // &#xe9; xyz &#xe0;
    // abc &#x79c1; &#x306f; &#x3093; ghi

    // Revert back to the unescaped chars:
    Get ComDecodeString Of hoCrypt sEscaped sCharsetNotUsed sEncoding To sUnescaped
    Showln sUnescaped

    // -----------------------------------------------------------------------------------------
    // HTML decimal character reference

    Move "unicodeescape-htmldec" To sEncoding
    Get ComEncodeString Of hoCrypt sOriginal sCharsetNotUsed sEncoding To sEscaped
    Showln sEscaped

    // Output:
    // &#129504;
    // &#128272;
    // &#9989;
    // &#9888;&#65039;
    // &#10060;
    // &#10003;
    // &#20013;
    // &#233; xyz &#224;
    // abc &#31169; &#12399; &#12435; ghi

    // Revert back to the unescaped chars:
    Get ComDecodeString Of hoCrypt sEscaped sCharsetNotUsed sEncoding To sUnescaped
    Showln sUnescaped

    // -----------------------------------------------------------------------------------------
    // Hex in Angle Brackets

    Move "unicodeescape-angle" To sEncoding
    Get ComEncodeString Of hoCrypt sOriginal sCharsetNotUsed sEncoding To sEscaped
    Showln sEscaped

    // Output:
    // <1f9e0>
    // <1f510>
    // <2705>
    // <26a0><fe0f>
    // <274c>
    // <2713>
    // <4e2d>
    // <e9> xyz <e0>
    // abc <79c1> <306f> <3093> ghi

    // Chilkat cannot unescape the angle bracket notation.
    // For this style, Chilkat only goes in one direction, which is to escape.


End_Procedure