Unicode Escape

Demonstrates options for unicode escaping non-us-ascii chars and emojis.
Chilkat Go Downloads

    success := false

    sb := chilkat.NewStringBuilder()
    success = sb.LoadFile("qa_data/txt/utf16_emojis_accented_jap.txt","utf-16")
    if success == false {
        fmt.Println(sb.LastErrorText())
        sb.DisposeStringBuilder()
        return
    }

    original := sb.GetAsString()

    // The above file contains the following text, which includes some emoji's,
    // Japanese chars, and accented chars.

    // 🧠
    // 🔐
    // ✅
    // ⚠️
    // ❌
    // ✓
    // 中
    // é xyz à
    // abc 私 は ん ghi

    crypt := chilkat.NewCrypt2()

    // Charset is not used for unicode escaping.  Set it to "utf-8", but it means nothing.
    charsetNotUsed := "utf-8"

    // Indicate the desired format/style of Unicode escaping.
    // Choose JSON-style (JavaScript-style) Unicode escape sequences by using "unicodeescape"
    encoding := "unicodeescape"

    escaped := crypt.EncodeString(*original,charsetNotUsed,encoding)
    fmt.Println(*escaped)

    // Output:
    // \ud83e\udde0
    // \ud83d\udd10
    // \u2705
    // \u26a0\ufe0f
    // \u274c
    // \u2713
    // \u4e2d
    // \u00e9 xyz \u00e0
    // abc \u79c1 \u306f \u3093 ghi

    // Revert back to the unescaped chars:
    unescaped := crypt.DecodeString(*escaped,charsetNotUsed,encoding)
    fmt.Println(*unescaped)

    // -----------------------------------------------------------------------------------------
    // Do the same, but use uppercase letters (A-F) in the hex values.
    encoding = "unicodeescape-upper"
    escaped = crypt.EncodeString(*original,charsetNotUsed,encoding)
    fmt.Println(*escaped)

    // Output:
    // \uD83E\uDDE0
    // \uD83D\uDD10
    // \u2705
    // \u26A0\uFE0F
    // \u274C
    // \u2713
    // \u4E2D
    // \u00E9 xyz \u00E0
    // abc \u79C1 \u306F \u3093 ghi

    // Revert back to the unescaped chars:
    unescaped = crypt.DecodeString(*escaped,charsetNotUsed,encoding)
    fmt.Println(*unescaped)

    // -----------------------------------------------------------------------------------------
    //  ECMAScript (JavaScript) “code point escape” syntax

    encoding = "unicodeescape-curly"
    escaped = crypt.EncodeString(*original,charsetNotUsed,encoding)
    fmt.Println(*escaped)

    // Output:
    // \u{d83e}\u{dde0}
    // \u{d83d}\u{dd10}
    // \u{2705}
    // \u{26a0}\u{fe0f}
    // \u{274c}
    // \u{2713}
    // \u{4e2d}
    // \u{00e9} xyz \u{00e0}
    // abc \u{79c1} \u{306f} \u{3093} ghi

    // Revert back to the unescaped chars:
    unescaped = crypt.DecodeString(*escaped,charsetNotUsed,encoding)
    fmt.Println(*unescaped)

    // -----------------------------------------------------------------------------------------
    // Do the same, but use uppercase letters (A-F) in the hex values.
    encoding = "unicodeescape-curly-upper"
    escaped = crypt.EncodeString(*original,charsetNotUsed,encoding)
    fmt.Println(*escaped)

    // Output:
    // \u{D83E}\u{DDE0}
    // \u{D83D}\u{DD10}
    // \u{2705}
    // \u{26A0}\u{FE0F}
    // \u{274C}
    // \u{2713}
    // \u{4E2D}
    // \u{00E9} xyz \u{00E0}
    // abc \u{79C1} \u{306F} \u{3093} ghi

    // Revert back to the unescaped chars:
    unescaped = crypt.DecodeString(*escaped,charsetNotUsed,encoding)
    fmt.Println(*unescaped)

    // -----------------------------------------------------------------------------------------
    // Unicode code point notation or U+ notation

    encoding = "unicodeescape-plus"
    escaped = crypt.EncodeString(*original,charsetNotUsed,encoding)
    fmt.Println(*escaped)

    // Output:
    // u+1f9e0
    // u+1f510
    // u+2705
    // u+26a0u+fe0f
    // u+274c
    // u+2713
    // u+4e2d
    // u+00e9 xyz u+00e0
    // abc u+79c1 u+306f u+3093 ghi

    // Chilkat cannot unescape the Unicode code point notation or U+ notation.
    // For this style, Chilkat only goes in one direction, which is to escape.

    // To emit uppercase hex, specify unicodeescape-plus-upper
    encoding = "unicodeescape-plus-upper"
    // ...
    // ...

    // -----------------------------------------------------------------------------------------
    // HTML hexadecimal character reference

    encoding = "unicodeescape-htmlhex"
    escaped = crypt.EncodeString(*original,charsetNotUsed,encoding)
    fmt.Println(*escaped)

    // Output:
    // &#x1f9e0;
    // &#x1f510;
    // &#x2705;
    // &#x26a0;&#xfe0f;
    // &#x274c;
    // &#x2713;
    // &#x4e2d;
    // &#xe9; xyz &#xe0;
    // abc &#x79c1; &#x306f; &#x3093; ghi

    // Revert back to the unescaped chars:
    unescaped = crypt.DecodeString(*escaped,charsetNotUsed,encoding)
    fmt.Println(*unescaped)

    // -----------------------------------------------------------------------------------------
    // HTML decimal character reference

    encoding = "unicodeescape-htmldec"
    escaped = crypt.EncodeString(*original,charsetNotUsed,encoding)
    fmt.Println(*escaped)

    // Output:
    // &#129504;
    // &#128272;
    // &#9989;
    // &#9888;&#65039;
    // &#10060;
    // &#10003;
    // &#20013;
    // &#233; xyz &#224;
    // abc &#31169; &#12399; &#12435; ghi

    // Revert back to the unescaped chars:
    unescaped = crypt.DecodeString(*escaped,charsetNotUsed,encoding)
    fmt.Println(*unescaped)

    // -----------------------------------------------------------------------------------------
    // Hex in Angle Brackets

    encoding = "unicodeescape-angle"
    escaped = crypt.EncodeString(*original,charsetNotUsed,encoding)
    fmt.Println(*escaped)

    // Output:
    // <1f9e0>
    // <1f510>
    // <2705>
    // <26a0><fe0f>
    // <274c>
    // <2713>
    // <4e2d>
    // <e9> xyz <e0>
    // abc <79c1> <306f> <3093> ghi

    // Chilkat cannot unescape the angle bracket notation.
    // For this style, Chilkat only goes in one direction, which is to escape.

    sb.DisposeStringBuilder()
    crypt.DisposeCrypt2()