import html5lib
from nose.tools import eq_
import bleach
def test_empty():
eq_('', bleach.clean(''))
def test_nbsp():
eq_(u'\xa0test string\xa0', bleach.clean(' test string '))
def test_comments_only():
comment = ''
open_comment = '' % open_comment, bleach.clean(open_comment,
strip_comments=False))
def test_with_comments():
html = 'Just text'
eq_('Just text', bleach.clean(html))
eq_(html, bleach.clean(html, strip_comments=False))
def test_no_html():
eq_('no html string', bleach.clean('no html string'))
def test_allowed_html():
eq_('an allowed tag',
bleach.clean('an allowed tag'))
eq_('another good tag',
bleach.clean('another good tag'))
def test_bad_html():
eq_('a fixed tag',
bleach.clean('a fixed tag'))
def test_function_arguments():
TAGS = ['span', 'br']
ATTRS = {'span': ['style']}
eq_('a
test',
bleach.clean('a
test',
tags=TAGS, attributes=ATTRS))
def test_named_arguments():
ATTRS = {'a': ['rel', 'href']}
s = u'xx.com'
eq_('xx.com', bleach.clean(s))
eq_(s, bleach.clean(s, attributes=ATTRS))
def test_disallowed_html():
eq_('a <script>safe()</script> test',
bleach.clean('a test'))
eq_('a <style>body{}</style> test',
bleach.clean('a test'))
def test_bad_href():
eq_('no link',
bleach.clean('no link'))
def test_bare_entities():
eq_('an & entity', bleach.clean('an & entity'))
eq_('an < entity', bleach.clean('an < entity'))
eq_('tag < and entity',
bleach.clean('tag < and entity'))
eq_('&', bleach.clean('&'))
def test_escaped_entities():
s = u'<em>strong</em>'
eq_(s, bleach.clean(s))
def test_serializer():
s = u''
eq_(s, bleach.clean(s, tags=['table']))
eq_(u'test', bleach.linkify(u''))
eq_(u'test
', bleach.clean(u'test
', tags=['p']))
def test_no_href_links():
s = u'x'
eq_(s, bleach.linkify(s))
def test_weird_strings():
s = '3'
eq_(bleach.clean(s), '')
def test_xml_render():
parser = html5lib.HTMLParser()
eq_(bleach._render(parser.parseFragment('')), '')
def test_stripping():
eq_('a test with html tags',
bleach.clean('a test with html tags', strip=True))
eq_('a test with html tags',
bleach.clean('a test with '
'html tags', strip=True))
s = 'link text
'
eq_('link text
', bleach.clean(s, tags=['p'], strip=True))
s = 'multiply nested text
'
eq_('multiply nested text
', bleach.clean(s, tags=['p'], strip=True))
s = (''
'
')
eq_('
',
bleach.clean(s, tags=['p', 'a'], strip=True))
def test_allowed_styles():
ATTR = ['style']
STYLE = ['color']
blank = ''
s = ''
eq_(blank, bleach.clean('', attributes=ATTR))
eq_(s, bleach.clean(s, attributes=ATTR, styles=STYLE))
eq_(s, bleach.clean('',
attributes=ATTR, styles=STYLE))
def test_idempotent():
"""Make sure that applying the filter twice doesn't change anything."""
dirty = u'invalid & < extra http://link.com'
clean = bleach.clean(dirty)
eq_(clean, bleach.clean(clean))
linked = bleach.linkify(dirty)
eq_(linked, bleach.linkify(linked))
def test_lowercase_html():
"""We should output lowercase HTML."""
dirty = u'BAR'
clean = u'BAR'
eq_(clean, bleach.clean(dirty, attributes=['class']))
def test_wildcard_attributes():
ATTR = {
'*': ['id'],
'img': ['src'],
}
TAG = ['img', 'em']
dirty = (u'both can have '
u'')
clean = u'both can have '
eq_(clean, bleach.clean(dirty, tags=TAG, attributes=ATTR))
def test_sarcasm():
"""Jokes should crash."""
dirty = u'Yeah right '
clean = u'Yeah right <sarcasm/>'
eq_(clean, bleach.clean(dirty))