How do you determine if a string contains escaped unicode so you know whether or not to run .decode("unicode-escape")
?
For example:
test.py
# -*- coding: utf-8 -*-
str_escaped = '"A\u0026B"'
str_unicode = '"Война́ и миръ"'
arr_all_strings = [str_escaped, str_unicode]
def is_escaped_unicode(str):
#how do I determine if this is escaped unicode?
pass
for str in arr_all_strings:
if is_escaped_unicode(str):
str = str.decode("unicode-escape")
print str
Current output:
"A\u0026B"
"Война́ и миръ"
Expected output:
"A&B"
"Война́ и миръ"
How do I define is_escaped_unicode(str)
to determine if the string that's passed is actually escaped unicode?
str_escaped = u'"A\u0026B"'
str_unicode = '"Война́ и миръ"'
arr_all_strings = [str_escaped, str_unicode]
def is_ascii(s):
return all(ord(c) < 128 for c in s)
def is_escaped_unicode(str):
#how do I determine if this is escaped unicode?
if is_ascii(str): # escaped unicode is ascii
return True
return False
for str in arr_all_strings:
if is_escaped_unicode(str):
str = str.decode("unicode-escape")
print str
The following code will work for your case.
Explain:
All string in str_escaped is in Ascii range.
Char in str_unicode do not contain in Ascii range.