diff options
Diffstat (limited to 'bleach/encoding.py')
-rw-r--r-- | bleach/encoding.py | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/bleach/encoding.py b/bleach/encoding.py new file mode 100644 index 0000000..b9a989d --- /dev/null +++ b/bleach/encoding.py @@ -0,0 +1,54 @@ +import datetime +from decimal import Decimal +import types + + +def is_protected_type(obj): + """Determine if the object instance is of a protected type. + + Objects of protected types are preserved as-is when passed to + force_unicode(strings_only=True). + """ + return isinstance(obj, ( + types.NoneType, + int, long, + datetime.datetime, datetime.date, datetime.time, + float, Decimal) + ) + + +def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'): + """ + Similar to smart_unicode, except that lazy instances are resolved to + strings, rather than kept as lazy objects. + + If strings_only is True, don't convert (some) non-string-like objects. + """ + if strings_only and is_protected_type(s): + return s + try: + if not isinstance(s, basestring,): + if hasattr(s, '__unicode__'): + s = unicode(s) + else: + try: + s = unicode(str(s), encoding, errors) + except UnicodeEncodeError: + if not isinstance(s, Exception): + raise + # If we get to here, the caller has passed in an Exception + # subclass populated with non-ASCII data without special + # handling to display as a string. We need to handle this + # without raising a further exception. We do an + # approximation to what the Exception's standard str() + # output should be. + s = ' '.join([force_unicode(arg, encoding, strings_only, + errors) for arg in s]) + elif not isinstance(s, unicode): + # Note: We use .decode() here, instead of unicode(s, encoding, + # errors), so that if s is a SafeString, it ends up being a + # SafeUnicode at the end. + s = s.decode(encoding, errors) + except UnicodeDecodeError, e: + raise UnicodeDecodeError(*e.args) + return s |