diff options
Diffstat (limited to 'bleach/sanitizer.py')
-rw-r--r-- | bleach/sanitizer.py | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/bleach/sanitizer.py b/bleach/sanitizer.py index 4640012..88246f8 100644 --- a/bleach/sanitizer.py +++ b/bleach/sanitizer.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import re from xml.sax.saxutils import escape, unescape @@ -14,8 +15,6 @@ class BleachSanitizerMixin(HTMLSanitizerMixin): """Mixin to replace sanitize_token() and sanitize_css().""" allowed_svg_properties = [] - # TODO: When the next html5lib version comes out, nuke this. - attr_val_is_uri = HTMLSanitizerMixin.attr_val_is_uri + ['poster'] def sanitize_token(self, token): """Sanitize a token either by HTML-encoding or dropping. @@ -30,7 +29,7 @@ class BleachSanitizerMixin(HTMLSanitizerMixin): """ if (getattr(self, 'wildcard_attributes', None) is None and - isinstance(self.allowed_attributes, dict)): + isinstance(self.allowed_attributes, dict)): self.wildcard_attributes = self.allowed_attributes.get('*', []) if token['type'] in (tokenTypes['StartTag'], tokenTypes['EndTag'], @@ -56,7 +55,7 @@ class BleachSanitizerMixin(HTMLSanitizerMixin): unescape(attrs[attr])).lower() # Remove replacement characters from unescaped # characters. - val_unescaped = val_unescaped.replace(u"\ufffd", "") + val_unescaped = val_unescaped.replace("\ufffd", "") if (re.match(r'^[a-z0-9][-+.a-z0-9]*:', val_unescaped) and (val_unescaped.split(':')[0] not in self.allowed_protocols)): @@ -67,8 +66,8 @@ class BleachSanitizerMixin(HTMLSanitizerMixin): ' ', unescape(attrs[attr])) if (token['name'] in self.svg_allow_local_href and - 'xlink:href' in attrs and - re.search(r'^\s*[^#\s].*', attrs['xlink:href'])): + 'xlink:href' in attrs and + re.search(r'^\s*[^#\s].*', attrs['xlink:href'])): del attrs['xlink:href'] if 'style' in attrs: attrs['style'] = self.sanitize_css(attrs['style']) @@ -79,13 +78,14 @@ class BleachSanitizerMixin(HTMLSanitizerMixin): pass else: if token['type'] == tokenTypes['EndTag']: - token['data'] = '</%s>' % token['name'] + token['data'] = '</{0!s}>'.format(token['name']) elif token['data']: - attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in + attr = ' {0!s}="{1!s}"' + attrs = ''.join([attr.format(k, escape(v)) for k, v in token['data']]) - token['data'] = '<%s%s>' % (token['name'], attrs) + token['data'] = '<{0!s}{1!s}>'.format(token['name'], attrs) else: - token['data'] = '<%s>' % token['name'] + token['data'] = '<{0!s}>'.format(token['name']) if token['selfClosing']: token['data'] = token['data'][:-1] + '/>' token['type'] = tokenTypes['Characters'] @@ -112,8 +112,8 @@ class BleachSanitizerMixin(HTMLSanitizerMixin): # TODO: Make sure this does what it's meant to - I *think* it wants to # validate style attribute contents. parts = style.split(';') - gauntlet = re.compile("""^([-/:,#%.'"\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'\s*""" - """|"[\s\w]+"|\([\d,%\.\s]+\))*$""") + gauntlet = re.compile("""^([-/:,#%.'"\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'""" + """\s*|"[\s\w]+"|\([\d,%\.\s]+\))*$""") for part in parts: if not gauntlet.match(part): return '' |