diff options
Diffstat (limited to 'bleach/tests')
-rw-r--r-- | bleach/tests/__init__.py | 0 | ||||
-rw-r--r-- | bleach/tests/test_basics.py | 170 | ||||
-rw-r--r-- | bleach/tests/test_css.py | 85 | ||||
-rw-r--r-- | bleach/tests/test_delinkify.py | 109 | ||||
-rw-r--r-- | bleach/tests/test_links.py | 312 | ||||
-rw-r--r-- | bleach/tests/test_security.py | 108 | ||||
-rw-r--r-- | bleach/tests/test_unicode.py | 54 |
7 files changed, 838 insertions, 0 deletions
diff --git a/bleach/tests/__init__.py b/bleach/tests/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/bleach/tests/__init__.py diff --git a/bleach/tests/test_basics.py b/bleach/tests/test_basics.py new file mode 100644 index 0000000..60be11d --- /dev/null +++ b/bleach/tests/test_basics.py @@ -0,0 +1,170 @@ +import html5lib +from nose.tools import eq_ + +import bleach + + +def test_empty(): + eq_('', bleach.clean('')) + + +def test_comments_only(): + comment = '<!-- this is a comment -->' + open_comment = '<!-- this is an open comment' + eq_('', bleach.clean(comment)) + eq_('', bleach.clean(open_comment)) + eq_(comment, bleach.clean(comment, strip_comments=False)) + eq_('%s-->' % open_comment, bleach.clean(open_comment, + strip_comments=False)) + + +def test_with_comments(): + html = '<!-- comment -->Just text' + eq_('Just text', bleach.clean(html)) + eq_(html, bleach.clean(html, strip_comments=False)) + + +def test_no_html(): + eq_('no html string', bleach.clean('no html string')) + + +def test_allowed_html(): + eq_('an <strong>allowed</strong> tag', + bleach.clean('an <strong>allowed</strong> tag')) + eq_('another <em>good</em> tag', + bleach.clean('another <em>good</em> tag')) + + +def test_bad_html(): + eq_('a <em>fixed tag</em>', + bleach.clean('a <em>fixed tag')) + + +def test_function_arguments(): + TAGS = ['span', 'br'] + ATTRS = {'span': ['style']} + + eq_('a <br><span style="">test</span>', + bleach.clean('a <br/><span style="color:red">test</span>', + tags=TAGS, attributes=ATTRS)) + + +def test_named_arguments(): + ATTRS = {'a': ['rel', 'href']} + s = u'<a href="http://xx.com" rel="alternate">xx.com</a>' + eq_('<a href="http://xx.com">xx.com</a>', bleach.clean(s)) + eq_(s, bleach.clean(s, attributes=ATTRS)) + + +def test_disallowed_html(): + eq_('a <script>safe()</script> test', + bleach.clean('a <script>safe()</script> test')) + eq_('a <style>body{}</style> test', + bleach.clean('a <style>body{}</style> test')) + + +def test_bad_href(): + eq_('<em>no link</em>', + bleach.clean('<em href="fail">no link</em>')) + + +def test_bare_entities(): + eq_('an & entity', bleach.clean('an & entity')) + eq_('an < entity', bleach.clean('an < entity')) + eq_('tag < <em>and</em> entity', + bleach.clean('tag < <em>and</em> entity')) + eq_('&', bleach.clean('&')) + + +def test_escaped_entities(): + s = u'<em>strong</em>' + eq_(s, bleach.clean(s)) + + +def test_serializer(): + s = u'<table></table>' + eq_(s, bleach.clean(s, tags=['table'])) + eq_(u'test<table></table>', bleach.linkify(u'<table>test</table>')) + eq_(u'<p>test</p>', bleach.clean(u'<p>test</p>', tags=['p'])) + + +def test_no_href_links(): + s = u'<a name="anchor">x</a>' + eq_(s, bleach.linkify(s)) + eq_(s, bleach.linkify(s, nofollow=False)) + + +def test_weird_strings(): + s = '</3' + eq_(bleach.clean(s), '') + + +def test_xml_render(): + parser = html5lib.HTMLParser() + eq_(bleach._render(parser.parseFragment('')), '') + + +def test_stripping(): + eq_('a test <em>with</em> <b>html</b> tags', + bleach.clean('a test <em>with</em> <b>html</b> tags', strip=True)) + eq_('a test <em>with</em> <b>html</b> tags', + bleach.clean('a test <em>with</em> <img src="http://example.com/"> ' + '<b>html</b> tags', strip=True)) + + s = '<p><a href="http://example.com/">link text</a></p>' + eq_('<p>link text</p>', bleach.clean(s, tags=['p'], strip=True)) + s = '<p><span>multiply <span>nested <span>text</span></span></span></p>' + eq_('<p>multiply nested text</p>', bleach.clean(s, tags=['p'], strip=True)) + + s = ('<p><a href="http://example.com/"><img src="http://example.com/">' + '</a></p>') + eq_('<p><a href="http://example.com/"></a></p>', + bleach.clean(s, tags=['p', 'a'], strip=True)) + + +def test_allowed_styles(): + ATTR = ['style'] + STYLE = ['color'] + blank = '<b style=""></b>' + s = '<b style="color: blue;"></b>' + eq_(blank, bleach.clean('<b style="top:0"></b>', attributes=ATTR)) + eq_(s, bleach.clean(s, attributes=ATTR, styles=STYLE)) + eq_(s, bleach.clean('<b style="top: 0; color: blue;"></b>', + attributes=ATTR, styles=STYLE)) + + +def test_idempotent(): + """Make sure that applying the filter twice doesn't change anything.""" + dirty = u'<span>invalid & </span> < extra http://link.com<em>' + + clean = bleach.clean(dirty) + eq_(clean, bleach.clean(clean)) + + linked = bleach.linkify(dirty) + eq_(linked, bleach.linkify(linked)) + + +def test_lowercase_html(): + """We should output lowercase HTML.""" + dirty = u'<EM CLASS="FOO">BAR</EM>' + clean = u'<em class="FOO">BAR</em>' + eq_(clean, bleach.clean(dirty, attributes=['class'])) + + +def test_wildcard_attributes(): + ATTR = { + '*': ['id'], + 'img': ['src'], + } + TAG = ['img', 'em'] + dirty = (u'both <em id="foo" style="color: black">can</em> have ' + u'<img id="bar" src="foo"/>') + clean = u'both <em id="foo">can</em> have <img id="bar" src="foo">' + eq_(clean, bleach.clean(dirty, tags=TAG, attributes=ATTR)) + + +def test_sarcasm(): + """Jokes should crash.<sarcasm/>""" + dirty = u'Yeah right <sarcasm/>' + clean = u'Yeah right <sarcasm/>' + eq_(clean, bleach.clean(dirty)) diff --git a/bleach/tests/test_css.py b/bleach/tests/test_css.py new file mode 100644 index 0000000..fdb3f65 --- /dev/null +++ b/bleach/tests/test_css.py @@ -0,0 +1,85 @@ +from functools import partial + +from nose.tools import eq_ + +from bleach import clean + + +clean = partial(clean, tags=['p'], attributes=['style']) + + +def test_allowed_css(): + tests = ( + ('font-family: Arial; color: red; float: left; ' + 'background-color: red;', 'color: red;', ['color']), + ('border: 1px solid blue; color: red; float: left;', 'color: red;', + ['color']), + ('border: 1px solid blue; color: red; float: left;', + 'color: red; float: left;', ['color', 'float']), + ('color: red; float: left; padding: 1em;', 'color: red; float: left;', + ['color', 'float']), + ('color: red; float: left; padding: 1em;', 'color: red;', ['color']), + ('cursor: -moz-grab;', 'cursor: -moz-grab;', ['cursor']), + ('color: hsl(30,100%,50%);', 'color: hsl(30,100%,50%);', ['color']), + ('color: rgba(255,0,0,0.4);', 'color: rgba(255,0,0,0.4);', ['color']), + ("text-overflow: ',' ellipsis;", "text-overflow: ',' ellipsis;", ['text-overflow']), + ) + + p = '<p style="%s">bar</p>' + + def check(input, output, styles): + eq_(p % output, clean(p % input, styles=styles)) + + for i, o, s in tests: + yield check, i, o, s + + +def test_valid_css(): + """The sanitizer should fix missing CSS values.""" + styles = ['color', 'float'] + eq_('<p style="float: left;">foo</p>', + clean('<p style="float: left; color: ">foo</p>', styles=styles)) + eq_('<p style="">foo</p>', + clean('<p style="color: float: left;">foo</p>', styles=styles)) + + +def test_style_hang(): + """The sanitizer should not hang on any inline styles""" + # TODO: Neaten this up. It's copypasta from MDN/Kuma to repro the bug + style = ("""margin-top: 0px; margin-right: 0px; margin-bottom: 1.286em; """ + """margin-left: 0px; padding-top: 15px; padding-right: 15px; """ + """padding-bottom: 15px; padding-left: 15px; border-top-width: """ + """1px; border-right-width: 1px; border-bottom-width: 1px; """ + """border-left-width: 1px; border-top-style: dotted; """ + """border-right-style: dotted; border-bottom-style: dotted; """ + """border-left-style: dotted; border-top-color: rgb(203, 200, """ + """185); border-right-color: rgb(203, 200, 185); """ + """border-bottom-color: rgb(203, 200, 185); border-left-color: """ + """rgb(203, 200, 185); background-image: initial; """ + """background-attachment: initial; background-origin: initial; """ + """background-clip: initial; background-color: """ + """rgb(246, 246, 242); overflow-x: auto; overflow-y: auto; """ + """font: normal normal normal 100%/normal 'Courier New', """ + """'Andale Mono', monospace; background-position: initial """ + """initial; background-repeat: initial initial;""") + html = '<p style="%s">Hello world</p>' % style + styles = [ + 'border', 'float', 'overflow', 'min-height', 'vertical-align', + 'white-space', + 'margin', 'margin-left', 'margin-top', 'margin-bottom', 'margin-right', + 'padding', 'padding-left', 'padding-top', 'padding-bottom', 'padding-right', + 'background', + 'background-color', + 'font', 'font-size', 'font-weight', 'text-align', 'text-transform', + ] + + expected = ("""<p style="margin-top: 0px; margin-right: 0px; """ + """margin-bottom: 1.286em; margin-left: 0px; padding-top: """ + """15px; padding-right: 15px; padding-bottom: 15px; """ + """padding-left: 15px; background-color: """ + """rgb(246, 246, 242); font: normal normal normal """ + """100%/normal 'Courier New', 'Andale Mono', monospace;">""" + """Hello world</p>""") + + result = clean(html, styles=styles) + eq_(expected, result) diff --git a/bleach/tests/test_delinkify.py b/bleach/tests/test_delinkify.py new file mode 100644 index 0000000..f216d2f --- /dev/null +++ b/bleach/tests/test_delinkify.py @@ -0,0 +1,109 @@ +from nose.tools import eq_ + +import bleach + + +def test_delinkify(): + eq_('test', bleach.delinkify('<a href="http://ex.mp">test</a>')) + eq_('footestbar', + bleach.delinkify('foo<a href="http://ex.mp">test</a>bar')) + + +def test_whitelist(): + html = '<a href="http://ex.mp">test</a>' + eq_(html, bleach.delinkify(html, allow_domains=['ex.mp'])) + eq_('test', bleach.delinkify(html, allow_domains=['ex2.mp'])) + # Allow a single domain as a special case. + eq_(html, bleach.delinkify(html, allow_domains='ex.mp')) + + +def test_nested_a(): + html = '<a href="http://ex.mp">test<a href="http://foo.bar">test</a></a>' + eq_('testtest', bleach.delinkify(html)) + eq_('<a href="http://ex.mp">test</a>test', + bleach.delinkify(html, allow_domains=['ex.mp'])) + + +def test_nested_tag(): + html = '<a href="http://ex.mp">test<span>test</span></a>' + eq_('test<span>test</span>', bleach.delinkify(html)) + + +def test_a_name(): + """Don't screw with non-link <a> tags.""" + html = '<a name="foo">bar</a>' + eq_(html, bleach.delinkify(html)) + + +def test_relative(): + """Relative links are optionally OK.""" + html = 'some <a href="/foo/bar">link</a>' + eq_('some link', bleach.delinkify(html)) + eq_(html, bleach.delinkify(html, allow_relative=True)) + + +def test_protocol_relative(): + """Protocol-relative links aren't relative.""" + html = 'bad <a href="//ex.mp">link</a>' + expect = 'bad link' + eq_(expect, bleach.delinkify(html)) + eq_(expect, bleach.delinkify(html, allow_relative=True)) + eq_(html, bleach.delinkify(html, allow_domains='ex.mp')) + + +def test_domain_match(): + tests = ( + ('ex.mp', 'ex.mp', True), + ('ex.mp', '*.ex.mp', True), + ('test.ex.mp', '*.ex.mp', True), + ('test.ex.mp', 'ex.mp', False), + ('test.test.ex.mp', '*.ex.mp', False), + ('test.test.ex.mp', '**.ex.mp', True), + ('wrong.mp', 'ex.mp', False), + ('wrong.mp', '*.ex.mp', False), + ('really.wrong.mp', 'ex.mp', False), + ('really.wrong.mp', '*.ex.mp', False), + ('really.very.wrong.mp', '*.ex.mp', False), + ('EX.mp', 'ex.mp', True), # Domains are case-insensitive. + ('ex.mp', 'an.ex.mp', False), + ('ex.mp', '*.an.ex.mp', False), + ('an.ex.am.pl', 'an.*.am.pl', True), + ('a.ex.am.pl', 'an.*.am.pl', False), + ('ex.am.pl', 'an.*.am.pl', False), + ) + + def _check(t, c, v): + eq_(v, bleach._domain_match(t, c)) + + for t, c, v in tests: + yield _check, t, c, v + + +def test_double_star(): + assert bleach._domain_match('ex.mp', '**.ex.mp') + try: + bleach._domain_match('ex.mp', 'an.**.ex.mp') + except bleach.ValidationError: + pass + else: + assert False, '_domain_match should not accept an.**.ex.mp' + + +def test_allow_subdomains(): + domains = ('ex.mp', '*.exa.mp', 'an.exam.pl', '*.my.examp.le') + html = ( + ('<a href="http://an.ex.mp">bad</a>', 'bad'), + ('<a href="http://exa.mp">good</a>', None), + ('<a href="http://an.exa.mp">good</a>', None), + ('<a href="http://an.exam.pl">good</a>', None), + ('<a href="http://another.exam.pl">bad</a>', 'bad'), + ('<a href="http://a.bad.examp.le">bad</a>', 'bad'), + ('<a href="http://a.very.bad.examp.le">bad</a>', 'bad'), + ) + + def _check(html, text): + output = bleach.delinkify(html, allow_domains=domains) + eq_(html if text is None else text, output) + + for t, o in html: + yield _check, t, o diff --git a/bleach/tests/test_links.py b/bleach/tests/test_links.py new file mode 100644 index 0000000..7caf006 --- /dev/null +++ b/bleach/tests/test_links.py @@ -0,0 +1,312 @@ +import urllib + +from html5lib.tokenizer import HTMLTokenizer +from nose.tools import eq_ + +from bleach import linkify, url_re + + +def filter_url(url): + return u'http://bouncer/?u=%s' % urllib.quote_plus(url) + + +def test_url_re(): + def no_match(s): + match = url_re.search(s) + if match: + assert not match, 'matched %s' % s[slice(*match.span())] + yield no_match, 'just what i am looking for...it' + + +def test_empty(): + eq_('', linkify('')) + + +def test_simple_link(): + eq_('a <a href="http://example.com" rel="nofollow">http://example.com' + '</a> link', + linkify('a http://example.com link')) + eq_('a <a href="https://example.com" rel="nofollow">https://example.com' + '</a> link', + linkify('a https://example.com link')) + eq_('an <a href="http://example.com" rel="nofollow">example.com</a> link', + linkify('an example.com link')) + + +def test_trailing_slash(): + eq_('<a href="http://example.com/" rel="nofollow">http://example.com/</a>', + linkify('http://example.com/')) + eq_('<a href="http://example.com/foo/" rel="nofollow">' + 'http://example.com/foo/</a>', + linkify('http://example.com/foo/')) + eq_('<a href="http://example.com/foo/bar/" rel="nofollow">' + 'http://example.com/foo/bar/</a>', + linkify('http://example.com/foo/bar/')) + + +def test_mangle_link(): + eq_('<a href="http://bouncer/?u=http%3A%2F%2Fexample.com" rel="nofollow">' + 'http://example.com</a>', + linkify('http://example.com', filter_url=filter_url)) + + +def test_email_link(): + eq_('a james@example.com mailto', + linkify('a james@example.com mailto')) + eq_('a james@example.com.au mailto', + linkify('a james@example.com.au mailto')) + eq_('a <a href="mailto:james@example.com" rel="nofollow">' + 'james@example.com</a> mailto', + linkify('a james@example.com mailto', parse_email=True)) + eq_('aussie <a href="mailto:james@example.com.au" rel="nofollow">' + 'james@example.com.au</a> mailto', + linkify('aussie james@example.com.au mailto', parse_email=True)) + eq_('email to <a href="james@example.com" rel="nofollow">' + 'james@example.com</a>', + linkify('email to <a href="james@example.com">' + 'james@example.com</a>', parse_email=True)) + + +def test_email_link_escaping(): + eq_('''<a href='mailto:"james"@example.com' rel="nofollow">''' + '''"james"@example.com</a>''', + linkify('"james"@example.com', parse_email=True)) + eq_('''<a href="mailto:"j'ames"@example.com" rel="nofollow">''' + '''"j'ames"@example.com</a>''', + linkify('"j\'ames"@example.com', parse_email=True)) + eq_('''<a href='mailto:"ja>mes"@example.com' rel="nofollow">''' + '''"ja>mes"@example.com</a>''', + linkify('"ja>mes"@example.com', parse_email=True)) + + +def test_tlds(): + eq_('<a href="http://example.com" rel="nofollow">example.com</a>', + linkify('example.com')) + eq_('<a href="http://example.co.uk" rel="nofollow">example.co.uk</a>', + linkify('example.co.uk')) + eq_('<a href="http://example.edu" rel="nofollow">example.edu</a>', + linkify('example.edu')) + eq_('example.xxx', linkify('example.xxx')) + eq_(' brie', linkify(' brie')) + eq_('<a href="http://bit.ly/fun" rel="nofollow">bit.ly/fun</a>', + linkify('bit.ly/fun')) + + +def test_escaping(): + eq_('< unrelated', linkify('< unrelated')) + + +def test_nofollow_off(): + eq_('<a href="http://example.com">example.com</a>', + linkify(u'example.com', nofollow=False)) + + +def test_link_in_html(): + eq_('<i><a href="http://yy.com" rel="nofollow">http://yy.com</a></i>', + linkify('<i>http://yy.com</i>')) + eq_('<em><strong><a href="http://xx.com" rel="nofollow">http://xx.com</a>' + '</strong></em>', + linkify('<em><strong>http://xx.com</strong></em>')) + + +def test_links_https(): + eq_('<a href="https://yy.com" rel="nofollow">https://yy.com</a>', + linkify('https://yy.com')) + + +def test_add_rel_nofollow(): + """Verify that rel="nofollow" is added to an existing link""" + eq_('<a href="http://yy.com" rel="nofollow">http://yy.com</a>', + linkify('<a href="http://yy.com">http://yy.com</a>')) + + +def test_url_with_path(): + eq_('<a href="http://example.com/path/to/file" rel="nofollow">' + 'http://example.com/path/to/file</a>', + linkify('http://example.com/path/to/file')) + + +def test_link_ftp(): + eq_('<a href="ftp://ftp.mozilla.org/some/file" rel="nofollow">' + 'ftp://ftp.mozilla.org/some/file</a>', + linkify('ftp://ftp.mozilla.org/some/file')) + + +def test_link_query(): + eq_('<a href="http://xx.com/?test=win" rel="nofollow">' + 'http://xx.com/?test=win</a>', + linkify('http://xx.com/?test=win')) + eq_('<a href="http://xx.com/?test=win" rel="nofollow">' + 'xx.com/?test=win</a>', + linkify('xx.com/?test=win')) + eq_('<a href="http://xx.com?test=win" rel="nofollow">' + 'xx.com?test=win</a>', + linkify('xx.com?test=win')) + + +def test_link_fragment(): + eq_('<a href="http://xx.com/path#frag" rel="nofollow">' + 'http://xx.com/path#frag</a>', + linkify('http://xx.com/path#frag')) + + +def test_link_entities(): + eq_('<a href="http://xx.com/?a=1&b=2" rel="nofollow">' + 'http://xx.com/?a=1&b=2</a>', + linkify('http://xx.com/?a=1&b=2')) + + +def test_escaped_html(): + """If I pass in escaped HTML, it should probably come out escaped.""" + s = '<em>strong</em>' + eq_(s, linkify(s)) + + +def test_link_http_complete(): + eq_('<a href="https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d' + '&e#f" rel="nofollow">' + 'https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f</a>', + linkify('https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f')) + + +def test_non_url(): + """document.vulnerable should absolutely not be linkified.""" + s = 'document.vulnerable' + eq_(s, linkify(s)) + + +def test_javascript_url(): + """javascript: urls should never be linkified.""" + s = 'javascript:document.vulnerable' + eq_(s, linkify(s)) + + +def test_unsafe_url(): + """Any unsafe char ({}[]<>, etc.) in the path should end URL scanning.""" + eq_('All your{"<a href="http://xx.yy.com/grover.png" ' + 'rel="nofollow">xx.yy.com/grover.png</a>"}base are', + linkify('All your{"xx.yy.com/grover.png"}base are')) + + +def test_skip_pre(): + """Skip linkification in <pre> tags.""" + simple = 'http://xx.com <pre>http://xx.com</pre>' + linked = ('<a href="http://xx.com" rel="nofollow">http://xx.com</a> ' + '<pre>http://xx.com</pre>') + all_linked = ('<a href="http://xx.com" rel="nofollow">http://xx.com</a> ' + '<pre><a href="http://xx.com" rel="nofollow">http://xx.com' + '</a></pre>') + eq_(linked, linkify(simple, skip_pre=True)) + eq_(all_linked, linkify(simple)) + + already_linked = '<pre><a href="http://xx.com">xx</a></pre>' + nofollowed = '<pre><a href="http://xx.com" rel="nofollow">xx</a></pre>' + eq_(nofollowed, linkify(already_linked)) + eq_(nofollowed, linkify(already_linked, skip_pre=True)) + + +def test_libgl(): + """libgl.so.1 should not be linkified.""" + eq_('libgl.so.1', linkify('libgl.so.1')) + + +def test_end_of_sentence(): + """example.com. should match.""" + out = u'<a href="http://%s" rel="nofollow">%s</a>%s' + in_ = u'%s%s' + + def check(u, p): + eq_(out % (u, u, p), linkify(in_ % (u, p))) + + tests = ( + ('example.com', '.'), + ('example.com', '...'), + ('ex.com/foo', '.'), + ('ex.com/foo', '....'), + ) + + for u, p in tests: + yield check, u, p + + +def test_end_of_clause(): + """example.com/foo, shouldn't include the ,""" + eq_('<a href="http://ex.com/foo" rel="nofollow">ex.com/foo</a>, bar', + linkify('ex.com/foo, bar')) + + +def test_sarcasm(): + """Jokes should crash.<sarcasm/>""" + dirty = u'Yeah right <sarcasm/>' + clean = u'Yeah right <sarcasm/>' + eq_(clean, linkify(dirty)) + + +def test_wrapping_parentheses(): + """URLs wrapped in parantheses should not include them.""" + out = u'%s<a href="http://%s" rel="nofollow">%s</a>%s' + + tests = ( + ('(example.com)', out % ('(', 'example.com', 'example.com', ')')), + ('(example.com/)', out % ('(', 'example.com/', 'example.com/', ')')), + ('(example.com/foo)', out % ('(', 'example.com/foo', + 'example.com/foo', ')')), + ('(((example.com/))))', out % ('(((', 'example.com/)', + 'example.com/)', ')))')), + ('example.com/))', out % ('', 'example.com/))', + 'example.com/))', '')), + ('http://en.wikipedia.org/wiki/Test_(assessment)', + out % ('', 'en.wikipedia.org/wiki/Test_(assessment)', + 'http://en.wikipedia.org/wiki/Test_(assessment)', '')), + ('(http://en.wikipedia.org/wiki/Test_(assessment))', + out % ('(', 'en.wikipedia.org/wiki/Test_(assessment)', + 'http://en.wikipedia.org/wiki/Test_(assessment)', ')')), + ('((http://en.wikipedia.org/wiki/Test_(assessment))', + out % ('((', 'en.wikipedia.org/wiki/Test_(assessment', + 'http://en.wikipedia.org/wiki/Test_(assessment', '))')), + ('(http://en.wikipedia.org/wiki/Test_(assessment)))', + out % ('(', 'en.wikipedia.org/wiki/Test_(assessment))', + 'http://en.wikipedia.org/wiki/Test_(assessment))', ')')), + ('(http://en.wikipedia.org/wiki/)Test_(assessment', + out % ('(', 'en.wikipedia.org/wiki/)Test_(assessment', + 'http://en.wikipedia.org/wiki/)Test_(assessment', '')), + ) + + def check(test, expected_output): + eq_(expected_output, linkify(test)) + + for test, expected_output in tests: + yield check, test, expected_output + + +def test_ports(): + """URLs can contain port numbers.""" + tests = ( + ('http://foo.com:8000', ('http://foo.com:8000', '')), + ('http://foo.com:8000/', ('http://foo.com:8000/', '')), + ('http://bar.com:xkcd', ('http://bar.com', ':xkcd')), + ('http://foo.com:81/bar', ('http://foo.com:81/bar', '')), + ('http://foo.com:', ('http://foo.com', ':')), + ) + + def check(test, output): + eq_(u'<a href="{0}" rel="nofollow">{0}</a>{1}'.format(*output), + linkify(test)) + + for test, output in tests: + yield check, test, output + + +def test_target(): + eq_('<a href="http://example.com" rel="nofollow" ' + 'target="_blank">example.com</a>', + linkify(u'example.com', target='_blank')) + eq_('<a href="http://example.com" target="_blank">example.com</a>', + linkify(u'example.com', target='_blank', nofollow=False)) + + +def test_tokenizer(): + """Linkify doesn't always have to sanitize.""" + raw = '<em>test<x></x></em>' + eq_('<em>test<x></x></em>', linkify(raw)) + eq_(raw, linkify(raw, tokenizer=HTMLTokenizer)) diff --git a/bleach/tests/test_security.py b/bleach/tests/test_security.py new file mode 100644 index 0000000..9e9bb7b --- /dev/null +++ b/bleach/tests/test_security.py @@ -0,0 +1,108 @@ +"""More advanced security tests""" + +from nose.tools import eq_ + +from bleach import clean + + +def test_nested_script_tag(): + eq_('<<script>script>evil()<</script>/script>', + clean('<<script>script>evil()<</script>/script>')) + eq_('<<x>script>evil()<</x>/script>', + clean('<<x>script>evil()<</x>/script>')) + + +def test_nested_script_tag_r(): + eq_('<script<script>>evil()</script<>>', + clean('<script<script>>evil()</script</script>>')) + + +def test_invalid_attr(): + IMG = ['img', ] + IMG_ATTR = ['src'] + + eq_('<a href="test">test</a>', + clean('<a onclick="evil" href="test">test</a>')) + eq_('<img src="test">', + clean('<img onclick="evil" src="test" />', + tags=IMG, attributes=IMG_ATTR)) + eq_('<img src="test">', + clean('<img href="invalid" src="test" />', + tags=IMG, attributes=IMG_ATTR)) + + +def test_unquoted_attr(): + eq_('<abbr title="mytitle">myabbr</abbr>', + clean('<abbr title=mytitle>myabbr</abbr>')) + + +def test_unquoted_event_handler(): + eq_('<a href="http://xx.com">xx.com</a>', + clean('<a href="http://xx.com" onclick=foo()>xx.com</a>')) + + +def test_invalid_attr_value(): + eq_('<img src="javascript:alert(\'XSS\');">', + clean('<img src="javascript:alert(\'XSS\');">')) + + +def test_invalid_href_attr(): + eq_('<a>xss</a>', + clean('<a href="javascript:alert(\'XSS\')">xss</a>')) + + +def test_invalid_filter_attr(): + IMG = ['img', ] + IMG_ATTR = {'img': lambda n, v: n == 'src' and v == "http://example.com/"} + + eq_('<img src="http://example.com/">', + clean('<img onclick="evil" src="http://example.com/" />', + tags=IMG, attributes=IMG_ATTR)) + + eq_('<img>', clean('<img onclick="evil" src="http://badhost.com/" />', + tags=IMG, attributes=IMG_ATTR)) + + +def test_invalid_tag_char(): + eq_('<script xss="" src="http://xx.com/xss.js"></script>', + clean('<script/xss src="http://xx.com/xss.js"></script>')) + eq_('<script src="http://xx.com/xss.js"></script>', + clean('<script/src="http://xx.com/xss.js"></script>')) + + +def test_unclosed_tag(): + eq_('<script src="http://xx.com/xss.js&lt;b">', + clean('<script src=http://xx.com/xss.js<b>')) + eq_('<script src="http://xx.com/xss.js" <b="">', + clean('<script src="http://xx.com/xss.js"<b>')) + eq_('<script src="http://xx.com/xss.js" <b="">', + clean('<script src="http://xx.com/xss.js" <b>')) + + +def test_strip(): + """Using strip=True shouldn't result in malicious content.""" + s = '<scri<script>pt>alert(1)</scr</script>ipt>' + eq_('pt>alert(1)ipt>', clean(s, strip=True)) + s = '<scri<scri<script>pt>pt>alert(1)</script>' + eq_('pt>pt>alert(1)', clean(s, strip=True)) + + +def test_nasty(): + """Nested, broken up, multiple tags, are still foiled!""" + test = ('<scr<script></script>ipt type="text/javascript">alert("foo");</' + '<script></script>script<del></del>>') + expect = (u'<scr<script></script>ipt type="text/javascript"' + u'>alert("foo");</script>script<del></del>' + u'>') + eq_(expect, clean(test)) + + +def test_poster_attribute(): + """Poster attributes should not allow javascript.""" + tags = ['video'] + attrs = {'video': ['poster']} + test = '<video poster="javascript:alert(1)"></video>' + expect = '<video></video>' + eq_(expect, clean(test, tags=tags, attributes=attrs)) + ok = '<video poster="/foo.png"></video>' + eq_(ok, clean(ok, tags=tags, attributes=attrs)) diff --git a/bleach/tests/test_unicode.py b/bleach/tests/test_unicode.py new file mode 100644 index 0000000..67123cc --- /dev/null +++ b/bleach/tests/test_unicode.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- + +from nose.tools import eq_ + +from bleach import clean, linkify + + +def test_japanese_safe_simple(): + eq_(u'ヘルプとチュートリアル', clean(u'ヘルプとチュートリアル')) + eq_(u'ヘルプとチュートリアル', linkify(u'ヘルプとチュートリアル')) + + +def test_japanese_strip(): + eq_(u'<em>ヘルプとチュートリアル</em>', + clean(u'<em>ヘルプとチュートリアル</em>')) + eq_(u'<span>ヘルプとチュートリアル</span>', + clean(u'<span>ヘルプとチュートリアル</span>')) + + +def test_russian_simple(): + eq_(u'Домашняя', clean(u'Домашняя')) + eq_(u'Домашняя', linkify(u'Домашняя')) + + +def test_mixed(): + eq_(u'Домашняяヘルプとチュートリアル', + clean(u'Домашняяヘルプとチュートリアル')) + + +def test_mixed_linkify(): + eq_(u'Домашняя <a href="http://example.com" rel="nofollow">' + u'http://example.com</a> ヘルプとチュートリアル', + linkify(u'Домашняя http://example.com ヘルプとチュートリアル')) + + +def test_url_utf8(): + """Allow UTF8 characters in URLs themselves.""" + out = u'<a href="%(url)s" rel="nofollow">%(url)s</a>' + + tests = ( + ('http://éxámplé.com/', out % {'url': u'http://éxámplé.com/'}), + ('http://éxámplé.com/íàñá/', + out % {'url': u'http://éxámplé.com/íàñá/'}), + ('http://éxámplé.com/íàñá/?foo=bar', + out % {'url': u'http://éxámplé.com/íàñá/?foo=bar'}), + ('http://éxámplé.com/íàñá/?fóo=bár', + out % {'url': u'http://éxámplé.com/íàñá/?fóo=bár'}), + ) + + def check(test, expected_output): + eq_(expected_output, linkify(test)) + + for test, expected_output in tests: + yield check, test, expected_output |