import six
import html5lib
from nose.tools import eq_
import bleach
from bleach.tests.tools import in_
def test_empty():
eq_('', bleach.clean(''))
def test_nbsp():
if six.PY3:
expected = '\xa0test string\xa0'
else:
expected = six.u('\\xa0test string\\xa0')
eq_(expected, bleach.clean(' test string '))
def test_comments_only():
comment = ''
open_comment = ''.format(open_comment), bleach.clean(open_comment,
strip_comments=False))
def test_with_comments():
html = 'Just text'
eq_('Just text', bleach.clean(html))
eq_(html, bleach.clean(html, strip_comments=False))
def test_no_html():
eq_('no html string', bleach.clean('no html string'))
def test_allowed_html():
eq_('an allowed tag',
bleach.clean('an allowed tag'))
eq_('another good tag',
bleach.clean('another good tag'))
def test_bad_html():
eq_('a fixed tag',
bleach.clean('a fixed tag'))
def test_function_arguments():
TAGS = ['span', 'br']
ATTRS = {'span': ['style']}
eq_('a
test',
bleach.clean('a
test',
tags=TAGS, attributes=ATTRS))
def test_named_arguments():
ATTRS = {'a': ['rel', 'href']}
s = ('xx.com',
'xx.com')
eq_('xx.com', bleach.clean(s[0]))
in_(s, bleach.clean(s[0], attributes=ATTRS))
def test_disallowed_html():
eq_('a <script>safe()</script> test',
bleach.clean('a test'))
eq_('a <style>body{}</style> test',
bleach.clean('a test'))
def test_bad_href():
eq_('no link',
bleach.clean('no link'))
def test_bare_entities():
eq_('an & entity', bleach.clean('an & entity'))
eq_('an < entity', bleach.clean('an < entity'))
eq_('tag < and entity',
bleach.clean('tag < and entity'))
eq_('&', bleach.clean('&'))
def test_escaped_entities():
s = '<em>strong</em>'
eq_(s, bleach.clean(s))
def test_serializer():
s = ''
eq_(s, bleach.clean(s, tags=['table']))
eq_('test', bleach.linkify(''))
eq_('test
', bleach.clean('test
', tags=['p']))
def test_no_href_links():
s = 'x'
eq_(s, bleach.linkify(s))
def test_weird_strings():
s = '3'
eq_(bleach.clean(s), '')
def test_xml_render():
parser = html5lib.HTMLParser()
eq_(bleach._render(parser.parseFragment('')), '')
def test_stripping():
eq_('a test with html tags',
bleach.clean('a test with html tags', strip=True))
eq_('a test with html tags',
bleach.clean('a test with
'
'html tags', strip=True))
s = 'link text
'
eq_('link text
', bleach.clean(s, tags=['p'], strip=True))
s = 'multiply nested text
'
eq_('multiply nested text
', bleach.clean(s, tags=['p'], strip=True))
s = ('
'
'
')
eq_('
',
bleach.clean(s, tags=['p', 'a'], strip=True))
def test_allowed_styles():
ATTR = ['style']
STYLE = ['color']
blank = ''
s = ''
eq_(blank, bleach.clean('', attributes=ATTR))
eq_(s, bleach.clean(s, attributes=ATTR, styles=STYLE))
eq_(s, bleach.clean('',
attributes=ATTR, styles=STYLE))
def test_idempotent():
"""Make sure that applying the filter twice doesn't change anything."""
dirty = 'invalid & < extra http://link.com'
clean = bleach.clean(dirty)
eq_(clean, bleach.clean(clean))
linked = bleach.linkify(dirty)
eq_(linked, bleach.linkify(linked))
def test_rel_already_there():
"""Make sure rel attribute is updated not replaced"""
linked = ('Click '
'here.')
link_good = (('Click '
'here.'),
('Click '
'here.'))
in_(link_good, bleach.linkify(linked))
in_(link_good, bleach.linkify(link_good[0]))
def test_lowercase_html():
"""We should output lowercase HTML."""
dirty = 'BAR'
clean = 'BAR'
eq_(clean, bleach.clean(dirty, attributes=['class']))
def test_wildcard_attributes():
ATTR = {
'*': ['id'],
'img': ['src'],
}
TAG = ['img', 'em']
dirty = ('both can have '
'
')
clean = ('both can have
',
'both can have
')
in_(clean, bleach.clean(dirty, tags=TAG, attributes=ATTR))
def test_sarcasm():
"""Jokes should crash."""
dirty = 'Yeah right '
clean = 'Yeah right <sarcasm/>'
eq_(clean, bleach.clean(dirty))