summaryrefslogtreecommitdiff
path: root/bleach/encoding.py
diff options
context:
space:
mode:
Diffstat (limited to 'bleach/encoding.py')
-rw-r--r--bleach/encoding.py60
1 files changed, 34 insertions, 26 deletions
diff --git a/bleach/encoding.py b/bleach/encoding.py
index b9a989d..707adaa 100644
--- a/bleach/encoding.py
+++ b/bleach/encoding.py
@@ -1,6 +1,7 @@
import datetime
from decimal import Decimal
import types
+import six
def is_protected_type(obj):
@@ -10,45 +11,52 @@ def is_protected_type(obj):
force_unicode(strings_only=True).
"""
return isinstance(obj, (
- types.NoneType,
- int, long,
- datetime.datetime, datetime.date, datetime.time,
- float, Decimal)
+ six.integer_types +
+ (types.NoneType,
+ datetime.datetime, datetime.date, datetime.time,
+ float, Decimal))
)
def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
"""
- Similar to smart_unicode, except that lazy instances are resolved to
+ Similar to smart_text, except that lazy instances are resolved to
strings, rather than kept as lazy objects.
If strings_only is True, don't convert (some) non-string-like objects.
"""
+ # Handle the common case first, saves 30-40% when s is an instance of
+ # six.text_type. This function gets called often in that setting.
+ if isinstance(s, six.text_type):
+ return s
if strings_only and is_protected_type(s):
return s
try:
- if not isinstance(s, basestring,):
+ if not isinstance(s, six.string_types):
if hasattr(s, '__unicode__'):
- s = unicode(s)
+ s = s.__unicode__()
else:
- try:
- s = unicode(str(s), encoding, errors)
- except UnicodeEncodeError:
- if not isinstance(s, Exception):
- raise
- # If we get to here, the caller has passed in an Exception
- # subclass populated with non-ASCII data without special
- # handling to display as a string. We need to handle this
- # without raising a further exception. We do an
- # approximation to what the Exception's standard str()
- # output should be.
- s = ' '.join([force_unicode(arg, encoding, strings_only,
- errors) for arg in s])
- elif not isinstance(s, unicode):
- # Note: We use .decode() here, instead of unicode(s, encoding,
- # errors), so that if s is a SafeString, it ends up being a
- # SafeUnicode at the end.
+ if six.PY3:
+ if isinstance(s, bytes):
+ s = six.text_type(s, encoding, errors)
+ else:
+ s = six.text_type(s)
+ else:
+ s = six.text_type(bytes(s), encoding, errors)
+ else:
+ # Note: We use .decode() here, instead of six.text_type(s,
+ # encoding, errors), so that if s is a SafeBytes, it ends up being
+ # a SafeText at the end.
s = s.decode(encoding, errors)
- except UnicodeDecodeError, e:
- raise UnicodeDecodeError(*e.args)
+ except UnicodeDecodeError as e:
+ if not isinstance(s, Exception):
+ raise UnicodeDecodeError(*e.args)
+ else:
+ # If we get to here, the caller has passed in an Exception
+ # subclass populated with non-ASCII bytestring data without a
+ # working unicode method. Try to handle this without raising a
+ # further exception by individually forcing the exception args
+ # to unicode.
+ s = ' '.join([force_unicode(arg, encoding, strings_only,
+ errors) for arg in s])
return s