aboutsummaryrefslogtreecommitdiff
path: root/bleach/sanitizer.py
diff options
context:
space:
mode:
Diffstat (limited to 'bleach/sanitizer.py')
-rw-r--r--bleach/sanitizer.py24
1 files changed, 12 insertions, 12 deletions
diff --git a/bleach/sanitizer.py b/bleach/sanitizer.py
index 4640012..88246f8 100644
--- a/bleach/sanitizer.py
+++ b/bleach/sanitizer.py
@@ -1,3 +1,4 @@
+from __future__ import unicode_literals
import re
from xml.sax.saxutils import escape, unescape
@@ -14,8 +15,6 @@ class BleachSanitizerMixin(HTMLSanitizerMixin):
"""Mixin to replace sanitize_token() and sanitize_css()."""
allowed_svg_properties = []
- # TODO: When the next html5lib version comes out, nuke this.
- attr_val_is_uri = HTMLSanitizerMixin.attr_val_is_uri + ['poster']
def sanitize_token(self, token):
"""Sanitize a token either by HTML-encoding or dropping.
@@ -30,7 +29,7 @@ class BleachSanitizerMixin(HTMLSanitizerMixin):
"""
if (getattr(self, 'wildcard_attributes', None) is None and
- isinstance(self.allowed_attributes, dict)):
+ isinstance(self.allowed_attributes, dict)):
self.wildcard_attributes = self.allowed_attributes.get('*', [])
if token['type'] in (tokenTypes['StartTag'], tokenTypes['EndTag'],
@@ -56,7 +55,7 @@ class BleachSanitizerMixin(HTMLSanitizerMixin):
unescape(attrs[attr])).lower()
# Remove replacement characters from unescaped
# characters.
- val_unescaped = val_unescaped.replace(u"\ufffd", "")
+ val_unescaped = val_unescaped.replace("\ufffd", "")
if (re.match(r'^[a-z0-9][-+.a-z0-9]*:', val_unescaped)
and (val_unescaped.split(':')[0] not in
self.allowed_protocols)):
@@ -67,8 +66,8 @@ class BleachSanitizerMixin(HTMLSanitizerMixin):
' ',
unescape(attrs[attr]))
if (token['name'] in self.svg_allow_local_href and
- 'xlink:href' in attrs and
- re.search(r'^\s*[^#\s].*', attrs['xlink:href'])):
+ 'xlink:href' in attrs and
+ re.search(r'^\s*[^#\s].*', attrs['xlink:href'])):
del attrs['xlink:href']
if 'style' in attrs:
attrs['style'] = self.sanitize_css(attrs['style'])
@@ -79,13 +78,14 @@ class BleachSanitizerMixin(HTMLSanitizerMixin):
pass
else:
if token['type'] == tokenTypes['EndTag']:
- token['data'] = '</%s>' % token['name']
+ token['data'] = '</{0!s}>'.format(token['name'])
elif token['data']:
- attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in
+ attr = ' {0!s}="{1!s}"'
+ attrs = ''.join([attr.format(k, escape(v)) for k, v in
token['data']])
- token['data'] = '<%s%s>' % (token['name'], attrs)
+ token['data'] = '<{0!s}{1!s}>'.format(token['name'], attrs)
else:
- token['data'] = '<%s>' % token['name']
+ token['data'] = '<{0!s}>'.format(token['name'])
if token['selfClosing']:
token['data'] = token['data'][:-1] + '/>'
token['type'] = tokenTypes['Characters']
@@ -112,8 +112,8 @@ class BleachSanitizerMixin(HTMLSanitizerMixin):
# TODO: Make sure this does what it's meant to - I *think* it wants to
# validate style attribute contents.
parts = style.split(';')
- gauntlet = re.compile("""^([-/:,#%.'"\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'\s*"""
- """|"[\s\w]+"|\([\d,%\.\s]+\))*$""")
+ gauntlet = re.compile("""^([-/:,#%.'"\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'"""
+ """\s*|"[\s\w]+"|\([\d,%\.\s]+\))*$""")
for part in parts:
if not gauntlet.match(part):
return ''