import html5lib from nose.tools import eq_ import bleach def test_empty(): eq_('', bleach.clean('')) def test_nbsp(): eq_(u'\xa0test string\xa0', bleach.clean(' test string ')) def test_comments_only(): comment = '' open_comment = '' % open_comment, bleach.clean(open_comment, strip_comments=False)) def test_with_comments(): html = 'Just text' eq_('Just text', bleach.clean(html)) eq_(html, bleach.clean(html, strip_comments=False)) def test_no_html(): eq_('no html string', bleach.clean('no html string')) def test_allowed_html(): eq_('an allowed tag', bleach.clean('an allowed tag')) eq_('another good tag', bleach.clean('another good tag')) def test_bad_html(): eq_('a fixed tag', bleach.clean('a fixed tag')) def test_function_arguments(): TAGS = ['span', 'br'] ATTRS = {'span': ['style']} eq_('a
test', bleach.clean('a
test', tags=TAGS, attributes=ATTRS)) def test_named_arguments(): ATTRS = {'a': ['rel', 'href']} s = u'xx.com' eq_('xx.com', bleach.clean(s)) eq_(s, bleach.clean(s, attributes=ATTRS)) def test_disallowed_html(): eq_('a <script>safe()</script> test', bleach.clean('a test')) eq_('a <style>body{}</style> test', bleach.clean('a test')) def test_bad_href(): eq_('no link', bleach.clean('no link')) def test_bare_entities(): eq_('an & entity', bleach.clean('an & entity')) eq_('an < entity', bleach.clean('an < entity')) eq_('tag < and entity', bleach.clean('tag < and entity')) eq_('&', bleach.clean('&')) def test_escaped_entities(): s = u'<em>strong</em>' eq_(s, bleach.clean(s)) def test_serializer(): s = u'
' eq_(s, bleach.clean(s, tags=['table'])) eq_(u'test
', bleach.linkify(u'test
')) eq_(u'
test
', bleach.clean(u'
test
', tags=['p'])) def test_no_href_links(): s = u'x' eq_(s, bleach.linkify(s)) def test_weird_strings(): s = 'with html tags', bleach.clean('a test with html tags', strip=True)) eq_('a test with html tags', bleach.clean('a test with

' 'html tags', strip=True)) s = '

' eq_('

link text

', bleach.clean(s, tags=['p'], strip=True)) s = '

multiply nested text

' eq_('

multiply nested text

', bleach.clean(s, tags=['p'], strip=True)) s = ('

' '

') eq_('

', bleach.clean(s, tags=['p', 'a'], strip=True)) def test_allowed_styles(): ATTR = ['style'] STYLE = ['color'] blank = '' s = '' eq_(blank, bleach.clean('', attributes=ATTR)) eq_(s, bleach.clean(s, attributes=ATTR, styles=STYLE)) eq_(s, bleach.clean('', attributes=ATTR, styles=STYLE)) def test_idempotent(): """Make sure that applying the filter twice doesn't change anything.""" dirty = u'invalid & < extra http://link.com' clean = bleach.clean(dirty) eq_(clean, bleach.clean(clean)) linked = bleach.linkify(dirty) eq_(linked, bleach.linkify(linked)) def test_lowercase_html(): """We should output lowercase HTML.""" dirty = u'BAR' clean = u'BAR' eq_(clean, bleach.clean(dirty, attributes=['class'])) def test_wildcard_attributes(): ATTR = { '*': ['id'], 'img': ['src'], } TAG = ['img', 'em'] dirty = (u'both can have ' u'') clean = u'both can have ' eq_(clean, bleach.clean(dirty, tags=TAG, attributes=ATTR)) def test_sarcasm(): """Jokes should crash.""" dirty = u'Yeah right ' clean = u'Yeah right <sarcasm/>' eq_(clean, bleach.clean(dirty))