summaryrefslogtreecommitdiff
path: root/bleach/encoding.py
diff options
context:
space:
mode:
Diffstat (limited to 'bleach/encoding.py')
-rw-r--r--bleach/encoding.py54
1 files changed, 54 insertions, 0 deletions
diff --git a/bleach/encoding.py b/bleach/encoding.py
new file mode 100644
index 0000000..b9a989d
--- /dev/null
+++ b/bleach/encoding.py
@@ -0,0 +1,54 @@
+import datetime
+from decimal import Decimal
+import types
+
+
+def is_protected_type(obj):
+ """Determine if the object instance is of a protected type.
+
+ Objects of protected types are preserved as-is when passed to
+ force_unicode(strings_only=True).
+ """
+ return isinstance(obj, (
+ types.NoneType,
+ int, long,
+ datetime.datetime, datetime.date, datetime.time,
+ float, Decimal)
+ )
+
+
+def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
+ """
+ Similar to smart_unicode, except that lazy instances are resolved to
+ strings, rather than kept as lazy objects.
+
+ If strings_only is True, don't convert (some) non-string-like objects.
+ """
+ if strings_only and is_protected_type(s):
+ return s
+ try:
+ if not isinstance(s, basestring,):
+ if hasattr(s, '__unicode__'):
+ s = unicode(s)
+ else:
+ try:
+ s = unicode(str(s), encoding, errors)
+ except UnicodeEncodeError:
+ if not isinstance(s, Exception):
+ raise
+ # If we get to here, the caller has passed in an Exception
+ # subclass populated with non-ASCII data without special
+ # handling to display as a string. We need to handle this
+ # without raising a further exception. We do an
+ # approximation to what the Exception's standard str()
+ # output should be.
+ s = ' '.join([force_unicode(arg, encoding, strings_only,
+ errors) for arg in s])
+ elif not isinstance(s, unicode):
+ # Note: We use .decode() here, instead of unicode(s, encoding,
+ # errors), so that if s is a SafeString, it ends up being a
+ # SafeUnicode at the end.
+ s = s.decode(encoding, errors)
+ except UnicodeDecodeError, e:
+ raise UnicodeDecodeError(*e.args)
+ return s