diff options
Diffstat (limited to 'bleach/encoding.py')
-rw-r--r-- | bleach/encoding.py | 60 |
1 files changed, 34 insertions, 26 deletions
diff --git a/bleach/encoding.py b/bleach/encoding.py index b9a989d..707adaa 100644 --- a/bleach/encoding.py +++ b/bleach/encoding.py @@ -1,6 +1,7 @@ import datetime from decimal import Decimal import types +import six def is_protected_type(obj): @@ -10,45 +11,52 @@ def is_protected_type(obj): force_unicode(strings_only=True). """ return isinstance(obj, ( - types.NoneType, - int, long, - datetime.datetime, datetime.date, datetime.time, - float, Decimal) + six.integer_types + + (types.NoneType, + datetime.datetime, datetime.date, datetime.time, + float, Decimal)) ) def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'): """ - Similar to smart_unicode, except that lazy instances are resolved to + Similar to smart_text, except that lazy instances are resolved to strings, rather than kept as lazy objects. If strings_only is True, don't convert (some) non-string-like objects. """ + # Handle the common case first, saves 30-40% when s is an instance of + # six.text_type. This function gets called often in that setting. + if isinstance(s, six.text_type): + return s if strings_only and is_protected_type(s): return s try: - if not isinstance(s, basestring,): + if not isinstance(s, six.string_types): if hasattr(s, '__unicode__'): - s = unicode(s) + s = s.__unicode__() else: - try: - s = unicode(str(s), encoding, errors) - except UnicodeEncodeError: - if not isinstance(s, Exception): - raise - # If we get to here, the caller has passed in an Exception - # subclass populated with non-ASCII data without special - # handling to display as a string. We need to handle this - # without raising a further exception. We do an - # approximation to what the Exception's standard str() - # output should be. - s = ' '.join([force_unicode(arg, encoding, strings_only, - errors) for arg in s]) - elif not isinstance(s, unicode): - # Note: We use .decode() here, instead of unicode(s, encoding, - # errors), so that if s is a SafeString, it ends up being a - # SafeUnicode at the end. + if six.PY3: + if isinstance(s, bytes): + s = six.text_type(s, encoding, errors) + else: + s = six.text_type(s) + else: + s = six.text_type(bytes(s), encoding, errors) + else: + # Note: We use .decode() here, instead of six.text_type(s, + # encoding, errors), so that if s is a SafeBytes, it ends up being + # a SafeText at the end. s = s.decode(encoding, errors) - except UnicodeDecodeError, e: - raise UnicodeDecodeError(*e.args) + except UnicodeDecodeError as e: + if not isinstance(s, Exception): + raise UnicodeDecodeError(*e.args) + else: + # If we get to here, the caller has passed in an Exception + # subclass populated with non-ASCII bytestring data without a + # working unicode method. Try to handle this without raising a + # further exception by individually forcing the exception args + # to unicode. + s = ' '.join([force_unicode(arg, encoding, strings_only, + errors) for arg in s]) return s |