![]() |
Chilkat HOME Android™ AutoIt C C# C++ Chilkat2-Python CkPython Classic ASP DataFlex Delphi DLL Go Java Node.js Objective-C PHP Extension Perl PowerBuilder PowerShell PureBasic Ruby SQL Server Swift Tcl Unicode C Unicode C++ VB.NET VBScript Visual Basic 6.0 Visual FoxPro Xojo Plugin
(Chilkat2-Python) Unicode EscapeDemonstrates options for unicode escaping non-us-ascii chars and emojis. Note: This example requires Chilkat v11.1.0 or greater. For more information, see https://www.chilkatsoft.com/unicode_escape.asp
import sys import chilkat2 success = False sb = chilkat2.StringBuilder() success = sb.LoadFile("qa_data/txt/utf16_emojis_accented_jap.txt","utf-16") if (success == False): print(sb.LastErrorText) sys.exit() original = sb.GetAsString() # The above file contains the following text, which includes some emoji's, # Japanese chars, and accented chars. # 🧠 # 🔐 # ✅ # ⚠️ # ❌ # ✓ # 中 # é xyz à # abc 私 は ん ghi crypt = chilkat2.Crypt2() # Charset is not used for unicode escaping. Set it to "utf-8", but it means nothing. charsetNotUsed = "utf-8" # Indicate the desired format/style of Unicode escaping. # Choose JSON-style (JavaScript-style) Unicode escape sequences by using "unicodeescape" encoding = "unicodeescape" escaped = crypt.EncodeString(original,charsetNotUsed,encoding) print(escaped) # Output: # \ud83e\udde0 # \ud83d\udd10 # \u2705 # \u26a0\ufe0f # \u274c # \u2713 # \u4e2d # \u00e9 xyz \u00e0 # abc \u79c1 \u306f \u3093 ghi # Revert back to the unescaped chars: unescaped = crypt.DecodeString(escaped,charsetNotUsed,encoding) print(unescaped) # ----------------------------------------------------------------------------------------- # Do the same, but use uppercase letters (A-F) in the hex values. encoding = "unicodeescape-upper" escaped = crypt.EncodeString(original,charsetNotUsed,encoding) print(escaped) # Output: # \uD83E\uDDE0 # \uD83D\uDD10 # \u2705 # \u26A0\uFE0F # \u274C # \u2713 # \u4E2D # \u00E9 xyz \u00E0 # abc \u79C1 \u306F \u3093 ghi # Revert back to the unescaped chars: unescaped = crypt.DecodeString(escaped,charsetNotUsed,encoding) print(unescaped) # ----------------------------------------------------------------------------------------- # ECMAScript (JavaScript) “code point escape” syntax encoding = "unicodeescape-curly" escaped = crypt.EncodeString(original,charsetNotUsed,encoding) print(escaped) # Output: # \u{d83e}\u{dde0} # \u{d83d}\u{dd10} # \u{2705} # \u{26a0}\u{fe0f} # \u{274c} # \u{2713} # \u{4e2d} # \u{00e9} xyz \u{00e0} # abc \u{79c1} \u{306f} \u{3093} ghi # Revert back to the unescaped chars: unescaped = crypt.DecodeString(escaped,charsetNotUsed,encoding) print(unescaped) # ----------------------------------------------------------------------------------------- # Do the same, but use uppercase letters (A-F) in the hex values. encoding = "unicodeescape-curly-upper" escaped = crypt.EncodeString(original,charsetNotUsed,encoding) print(escaped) # Output: # \u{D83E}\u{DDE0} # \u{D83D}\u{DD10} # \u{2705} # \u{26A0}\u{FE0F} # \u{274C} # \u{2713} # \u{4E2D} # \u{00E9} xyz \u{00E0} # abc \u{79C1} \u{306F} \u{3093} ghi # Revert back to the unescaped chars: unescaped = crypt.DecodeString(escaped,charsetNotUsed,encoding) print(unescaped) # ----------------------------------------------------------------------------------------- # Unicode code point notation or U+ notation encoding = "unicodeescape-plus" escaped = crypt.EncodeString(original,charsetNotUsed,encoding) print(escaped) # Output: # u+1f9e0 # u+1f510 # u+2705 # u+26a0u+fe0f # u+274c # u+2713 # u+4e2d # u+00e9 xyz u+00e0 # abc u+79c1 u+306f u+3093 ghi # Chilkat cannot unescape the Unicode code point notation or U+ notation. # For this style, Chilkat only goes in one direction, which is to escape. # To emit uppercase hex, specify unicodeescape-plus-upper encoding = "unicodeescape-plus-upper" # ... # ... # ----------------------------------------------------------------------------------------- # HTML hexadecimal character reference encoding = "unicodeescape-htmlhex" escaped = crypt.EncodeString(original,charsetNotUsed,encoding) print(escaped) # Output: # 🧠 # 🔐 # ✅ # ⚠️ # ❌ # ✓ # 中 # é xyz à # abc 私 は ん ghi # Revert back to the unescaped chars: unescaped = crypt.DecodeString(escaped,charsetNotUsed,encoding) print(unescaped) # ----------------------------------------------------------------------------------------- # HTML decimal character reference encoding = "unicodeescape-htmldec" escaped = crypt.EncodeString(original,charsetNotUsed,encoding) print(escaped) # Output: # 🧠 # 🔐 # ✅ # ⚠️ # ❌ # ✓ # 中 # é xyz à # abc 私 は ん ghi # Revert back to the unescaped chars: unescaped = crypt.DecodeString(escaped,charsetNotUsed,encoding) print(unescaped) # ----------------------------------------------------------------------------------------- # Hex in Angle Brackets encoding = "unicodeescape-angle" escaped = crypt.EncodeString(original,charsetNotUsed,encoding) print(escaped) # Output: # <1f9e0> # <1f510> # <2705> # <26a0><fe0f> # <274c> # <2713> # <4e2d> # <e9> xyz <e0> # abc <79c1> <306f> <3093> ghi # Chilkat cannot unescape the angle bracket notation. # For this style, Chilkat only goes in one direction, which is to escape. |
© 2000-2025 Chilkat Software, Inc. All Rights Reserved.