import six import html5lib from nose.tools import eq_ import bleach from bleach.tests.tools import in_ def test_empty(): eq_('', bleach.clean('')) def test_nbsp(): if six.PY3: expected = '\xa0test string\xa0' else: expected = six.u('\\xa0test string\\xa0') eq_(expected, bleach.clean(' test string ')) def test_comments_only(): comment = '' open_comment = ''.format(open_comment), bleach.clean(open_comment, strip_comments=False)) def test_with_comments(): html = 'Just text' eq_('Just text', bleach.clean(html)) eq_(html, bleach.clean(html, strip_comments=False)) def test_no_html(): eq_('no html string', bleach.clean('no html string')) def test_allowed_html(): eq_('an allowed tag', bleach.clean('an allowed tag')) eq_('another good tag', bleach.clean('another good tag')) def test_bad_html(): eq_('a fixed tag', bleach.clean('a fixed tag')) def test_function_arguments(): TAGS = ['span', 'br'] ATTRS = {'span': ['style']} eq_('a
test', bleach.clean('a
test', tags=TAGS, attributes=ATTRS)) def test_named_arguments(): ATTRS = {'a': ['rel', 'href']} s = ('xx.com', 'xx.com') eq_('xx.com', bleach.clean(s[0])) in_(s, bleach.clean(s[0], attributes=ATTRS)) def test_disallowed_html(): eq_('a <script>safe()</script> test', bleach.clean('a test')) eq_('a <style>body{}</style> test', bleach.clean('a test')) def test_bad_href(): eq_('no link', bleach.clean('no link')) def test_bare_entities(): eq_('an & entity', bleach.clean('an & entity')) eq_('an < entity', bleach.clean('an < entity')) eq_('tag < and entity', bleach.clean('tag < and entity')) eq_('&', bleach.clean('&')) def test_escaped_entities(): s = '<em>strong</em>' eq_(s, bleach.clean(s)) def test_serializer(): s = '
' eq_(s, bleach.clean(s, tags=['table'])) eq_('test
', bleach.linkify('test
')) eq_('

test

', bleach.clean('

test

', tags=['p'])) def test_no_href_links(): s = 'x' eq_(s, bleach.linkify(s)) def test_weird_strings(): s = 'with
html tags', bleach.clean('a test with html tags', strip=True)) eq_('a test with html tags', bleach.clean('a test with ' 'html tags', strip=True)) s = '

link text

' eq_('

link text

', bleach.clean(s, tags=['p'], strip=True)) s = '

multiply nested text

' eq_('

multiply nested text

', bleach.clean(s, tags=['p'], strip=True)) s = ('

' '

') eq_('

', bleach.clean(s, tags=['p', 'a'], strip=True)) def test_allowed_styles(): ATTR = ['style'] STYLE = ['color'] blank = '' s = '' eq_(blank, bleach.clean('', attributes=ATTR)) eq_(s, bleach.clean(s, attributes=ATTR, styles=STYLE)) eq_(s, bleach.clean('', attributes=ATTR, styles=STYLE)) def test_idempotent(): """Make sure that applying the filter twice doesn't change anything.""" dirty = 'invalid & < extra http://link.com' clean = bleach.clean(dirty) eq_(clean, bleach.clean(clean)) linked = bleach.linkify(dirty) eq_(linked, bleach.linkify(linked)) def test_rel_already_there(): """Make sure rel attribute is updated not replaced""" linked = ('Click ' 'here.') link_good = (('Click ' 'here.'), ('Click ' 'here.')) in_(link_good, bleach.linkify(linked)) in_(link_good, bleach.linkify(link_good[0])) def test_lowercase_html(): """We should output lowercase HTML.""" dirty = 'BAR' clean = 'BAR' eq_(clean, bleach.clean(dirty, attributes=['class'])) def test_wildcard_attributes(): ATTR = { '*': ['id'], 'img': ['src'], } TAG = ['img', 'em'] dirty = ('both can have ' '') clean = ('both can have ', 'both can have ') in_(clean, bleach.clean(dirty, tags=TAG, attributes=ATTR)) def test_sarcasm(): """Jokes should crash.""" dirty = 'Yeah right ' clean = 'Yeah right <sarcasm/>' eq_(clean, bleach.clean(dirty))