aboutsummaryrefslogtreecommitdiff
path: root/bleach/tests
diff options
context:
space:
mode:
Diffstat (limited to 'bleach/tests')
-rw-r--r--bleach/tests/__init__.py0
-rw-r--r--bleach/tests/test_basics.py170
-rw-r--r--bleach/tests/test_css.py85
-rw-r--r--bleach/tests/test_delinkify.py109
-rw-r--r--bleach/tests/test_links.py312
-rw-r--r--bleach/tests/test_security.py108
-rw-r--r--bleach/tests/test_unicode.py54
7 files changed, 838 insertions, 0 deletions
diff --git a/bleach/tests/__init__.py b/bleach/tests/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/bleach/tests/__init__.py
diff --git a/bleach/tests/test_basics.py b/bleach/tests/test_basics.py
new file mode 100644
index 0000000..60be11d
--- /dev/null
+++ b/bleach/tests/test_basics.py
@@ -0,0 +1,170 @@
+import html5lib
+from nose.tools import eq_
+
+import bleach
+
+
+def test_empty():
+ eq_('', bleach.clean(''))
+
+
+def test_comments_only():
+ comment = '<!-- this is a comment -->'
+ open_comment = '<!-- this is an open comment'
+ eq_('', bleach.clean(comment))
+ eq_('', bleach.clean(open_comment))
+ eq_(comment, bleach.clean(comment, strip_comments=False))
+ eq_('%s-->' % open_comment, bleach.clean(open_comment,
+ strip_comments=False))
+
+
+def test_with_comments():
+ html = '<!-- comment -->Just text'
+ eq_('Just text', bleach.clean(html))
+ eq_(html, bleach.clean(html, strip_comments=False))
+
+
+def test_no_html():
+ eq_('no html string', bleach.clean('no html string'))
+
+
+def test_allowed_html():
+ eq_('an <strong>allowed</strong> tag',
+ bleach.clean('an <strong>allowed</strong> tag'))
+ eq_('another <em>good</em> tag',
+ bleach.clean('another <em>good</em> tag'))
+
+
+def test_bad_html():
+ eq_('a <em>fixed tag</em>',
+ bleach.clean('a <em>fixed tag'))
+
+
+def test_function_arguments():
+ TAGS = ['span', 'br']
+ ATTRS = {'span': ['style']}
+
+ eq_('a <br><span style="">test</span>',
+ bleach.clean('a <br/><span style="color:red">test</span>',
+ tags=TAGS, attributes=ATTRS))
+
+
+def test_named_arguments():
+ ATTRS = {'a': ['rel', 'href']}
+ s = u'<a href="http://xx.com" rel="alternate">xx.com</a>'
+ eq_('<a href="http://xx.com">xx.com</a>', bleach.clean(s))
+ eq_(s, bleach.clean(s, attributes=ATTRS))
+
+
+def test_disallowed_html():
+ eq_('a &lt;script&gt;safe()&lt;/script&gt; test',
+ bleach.clean('a <script>safe()</script> test'))
+ eq_('a &lt;style&gt;body{}&lt;/style&gt; test',
+ bleach.clean('a <style>body{}</style> test'))
+
+
+def test_bad_href():
+ eq_('<em>no link</em>',
+ bleach.clean('<em href="fail">no link</em>'))
+
+
+def test_bare_entities():
+ eq_('an &amp; entity', bleach.clean('an & entity'))
+ eq_('an &lt; entity', bleach.clean('an < entity'))
+ eq_('tag &lt; <em>and</em> entity',
+ bleach.clean('tag < <em>and</em> entity'))
+ eq_('&amp;', bleach.clean('&amp;'))
+
+
+def test_escaped_entities():
+ s = u'&lt;em&gt;strong&lt;/em&gt;'
+ eq_(s, bleach.clean(s))
+
+
+def test_serializer():
+ s = u'<table></table>'
+ eq_(s, bleach.clean(s, tags=['table']))
+ eq_(u'test<table></table>', bleach.linkify(u'<table>test</table>'))
+ eq_(u'<p>test</p>', bleach.clean(u'<p>test</p>', tags=['p']))
+
+
+def test_no_href_links():
+ s = u'<a name="anchor">x</a>'
+ eq_(s, bleach.linkify(s))
+ eq_(s, bleach.linkify(s, nofollow=False))
+
+
+def test_weird_strings():
+ s = '</3'
+ eq_(bleach.clean(s), '')
+
+
+def test_xml_render():
+ parser = html5lib.HTMLParser()
+ eq_(bleach._render(parser.parseFragment('')), '')
+
+
+def test_stripping():
+ eq_('a test <em>with</em> <b>html</b> tags',
+ bleach.clean('a test <em>with</em> <b>html</b> tags', strip=True))
+ eq_('a test <em>with</em> <b>html</b> tags',
+ bleach.clean('a test <em>with</em> <img src="http://example.com/"> '
+ '<b>html</b> tags', strip=True))
+
+ s = '<p><a href="http://example.com/">link text</a></p>'
+ eq_('<p>link text</p>', bleach.clean(s, tags=['p'], strip=True))
+ s = '<p><span>multiply <span>nested <span>text</span></span></span></p>'
+ eq_('<p>multiply nested text</p>', bleach.clean(s, tags=['p'], strip=True))
+
+ s = ('<p><a href="http://example.com/"><img src="http://example.com/">'
+ '</a></p>')
+ eq_('<p><a href="http://example.com/"></a></p>',
+ bleach.clean(s, tags=['p', 'a'], strip=True))
+
+
+def test_allowed_styles():
+ ATTR = ['style']
+ STYLE = ['color']
+ blank = '<b style=""></b>'
+ s = '<b style="color: blue;"></b>'
+ eq_(blank, bleach.clean('<b style="top:0"></b>', attributes=ATTR))
+ eq_(s, bleach.clean(s, attributes=ATTR, styles=STYLE))
+ eq_(s, bleach.clean('<b style="top: 0; color: blue;"></b>',
+ attributes=ATTR, styles=STYLE))
+
+
+def test_idempotent():
+ """Make sure that applying the filter twice doesn't change anything."""
+ dirty = u'<span>invalid & </span> < extra http://link.com<em>'
+
+ clean = bleach.clean(dirty)
+ eq_(clean, bleach.clean(clean))
+
+ linked = bleach.linkify(dirty)
+ eq_(linked, bleach.linkify(linked))
+
+
+def test_lowercase_html():
+ """We should output lowercase HTML."""
+ dirty = u'<EM CLASS="FOO">BAR</EM>'
+ clean = u'<em class="FOO">BAR</em>'
+ eq_(clean, bleach.clean(dirty, attributes=['class']))
+
+
+def test_wildcard_attributes():
+ ATTR = {
+ '*': ['id'],
+ 'img': ['src'],
+ }
+ TAG = ['img', 'em']
+ dirty = (u'both <em id="foo" style="color: black">can</em> have '
+ u'<img id="bar" src="foo"/>')
+ clean = u'both <em id="foo">can</em> have <img id="bar" src="foo">'
+ eq_(clean, bleach.clean(dirty, tags=TAG, attributes=ATTR))
+
+
+def test_sarcasm():
+ """Jokes should crash.<sarcasm/>"""
+ dirty = u'Yeah right <sarcasm/>'
+ clean = u'Yeah right &lt;sarcasm/&gt;'
+ eq_(clean, bleach.clean(dirty))
diff --git a/bleach/tests/test_css.py b/bleach/tests/test_css.py
new file mode 100644
index 0000000..fdb3f65
--- /dev/null
+++ b/bleach/tests/test_css.py
@@ -0,0 +1,85 @@
+from functools import partial
+
+from nose.tools import eq_
+
+from bleach import clean
+
+
+clean = partial(clean, tags=['p'], attributes=['style'])
+
+
+def test_allowed_css():
+ tests = (
+ ('font-family: Arial; color: red; float: left; '
+ 'background-color: red;', 'color: red;', ['color']),
+ ('border: 1px solid blue; color: red; float: left;', 'color: red;',
+ ['color']),
+ ('border: 1px solid blue; color: red; float: left;',
+ 'color: red; float: left;', ['color', 'float']),
+ ('color: red; float: left; padding: 1em;', 'color: red; float: left;',
+ ['color', 'float']),
+ ('color: red; float: left; padding: 1em;', 'color: red;', ['color']),
+ ('cursor: -moz-grab;', 'cursor: -moz-grab;', ['cursor']),
+ ('color: hsl(30,100%,50%);', 'color: hsl(30,100%,50%);', ['color']),
+ ('color: rgba(255,0,0,0.4);', 'color: rgba(255,0,0,0.4);', ['color']),
+ ("text-overflow: ',' ellipsis;", "text-overflow: ',' ellipsis;", ['text-overflow']),
+ )
+
+ p = '<p style="%s">bar</p>'
+
+ def check(input, output, styles):
+ eq_(p % output, clean(p % input, styles=styles))
+
+ for i, o, s in tests:
+ yield check, i, o, s
+
+
+def test_valid_css():
+ """The sanitizer should fix missing CSS values."""
+ styles = ['color', 'float']
+ eq_('<p style="float: left;">foo</p>',
+ clean('<p style="float: left; color: ">foo</p>', styles=styles))
+ eq_('<p style="">foo</p>',
+ clean('<p style="color: float: left;">foo</p>', styles=styles))
+
+
+def test_style_hang():
+ """The sanitizer should not hang on any inline styles"""
+ # TODO: Neaten this up. It's copypasta from MDN/Kuma to repro the bug
+ style = ("""margin-top: 0px; margin-right: 0px; margin-bottom: 1.286em; """
+ """margin-left: 0px; padding-top: 15px; padding-right: 15px; """
+ """padding-bottom: 15px; padding-left: 15px; border-top-width: """
+ """1px; border-right-width: 1px; border-bottom-width: 1px; """
+ """border-left-width: 1px; border-top-style: dotted; """
+ """border-right-style: dotted; border-bottom-style: dotted; """
+ """border-left-style: dotted; border-top-color: rgb(203, 200, """
+ """185); border-right-color: rgb(203, 200, 185); """
+ """border-bottom-color: rgb(203, 200, 185); border-left-color: """
+ """rgb(203, 200, 185); background-image: initial; """
+ """background-attachment: initial; background-origin: initial; """
+ """background-clip: initial; background-color: """
+ """rgb(246, 246, 242); overflow-x: auto; overflow-y: auto; """
+ """font: normal normal normal 100%/normal 'Courier New', """
+ """'Andale Mono', monospace; background-position: initial """
+ """initial; background-repeat: initial initial;""")
+ html = '<p style="%s">Hello world</p>' % style
+ styles = [
+ 'border', 'float', 'overflow', 'min-height', 'vertical-align',
+ 'white-space',
+ 'margin', 'margin-left', 'margin-top', 'margin-bottom', 'margin-right',
+ 'padding', 'padding-left', 'padding-top', 'padding-bottom', 'padding-right',
+ 'background',
+ 'background-color',
+ 'font', 'font-size', 'font-weight', 'text-align', 'text-transform',
+ ]
+
+ expected = ("""<p style="margin-top: 0px; margin-right: 0px; """
+ """margin-bottom: 1.286em; margin-left: 0px; padding-top: """
+ """15px; padding-right: 15px; padding-bottom: 15px; """
+ """padding-left: 15px; background-color: """
+ """rgb(246, 246, 242); font: normal normal normal """
+ """100%/normal 'Courier New', 'Andale Mono', monospace;">"""
+ """Hello world</p>""")
+
+ result = clean(html, styles=styles)
+ eq_(expected, result)
diff --git a/bleach/tests/test_delinkify.py b/bleach/tests/test_delinkify.py
new file mode 100644
index 0000000..f216d2f
--- /dev/null
+++ b/bleach/tests/test_delinkify.py
@@ -0,0 +1,109 @@
+from nose.tools import eq_
+
+import bleach
+
+
+def test_delinkify():
+ eq_('test', bleach.delinkify('<a href="http://ex.mp">test</a>'))
+ eq_('footestbar',
+ bleach.delinkify('foo<a href="http://ex.mp">test</a>bar'))
+
+
+def test_whitelist():
+ html = '<a href="http://ex.mp">test</a>'
+ eq_(html, bleach.delinkify(html, allow_domains=['ex.mp']))
+ eq_('test', bleach.delinkify(html, allow_domains=['ex2.mp']))
+ # Allow a single domain as a special case.
+ eq_(html, bleach.delinkify(html, allow_domains='ex.mp'))
+
+
+def test_nested_a():
+ html = '<a href="http://ex.mp">test<a href="http://foo.bar">test</a></a>'
+ eq_('testtest', bleach.delinkify(html))
+ eq_('<a href="http://ex.mp">test</a>test',
+ bleach.delinkify(html, allow_domains=['ex.mp']))
+
+
+def test_nested_tag():
+ html = '<a href="http://ex.mp">test<span>test</span></a>'
+ eq_('test<span>test</span>', bleach.delinkify(html))
+
+
+def test_a_name():
+ """Don't screw with non-link <a> tags."""
+ html = '<a name="foo">bar</a>'
+ eq_(html, bleach.delinkify(html))
+
+
+def test_relative():
+ """Relative links are optionally OK."""
+ html = 'some <a href="/foo/bar">link</a>'
+ eq_('some link', bleach.delinkify(html))
+ eq_(html, bleach.delinkify(html, allow_relative=True))
+
+
+def test_protocol_relative():
+ """Protocol-relative links aren't relative."""
+ html = 'bad <a href="//ex.mp">link</a>'
+ expect = 'bad link'
+ eq_(expect, bleach.delinkify(html))
+ eq_(expect, bleach.delinkify(html, allow_relative=True))
+ eq_(html, bleach.delinkify(html, allow_domains='ex.mp'))
+
+
+def test_domain_match():
+ tests = (
+ ('ex.mp', 'ex.mp', True),
+ ('ex.mp', '*.ex.mp', True),
+ ('test.ex.mp', '*.ex.mp', True),
+ ('test.ex.mp', 'ex.mp', False),
+ ('test.test.ex.mp', '*.ex.mp', False),
+ ('test.test.ex.mp', '**.ex.mp', True),
+ ('wrong.mp', 'ex.mp', False),
+ ('wrong.mp', '*.ex.mp', False),
+ ('really.wrong.mp', 'ex.mp', False),
+ ('really.wrong.mp', '*.ex.mp', False),
+ ('really.very.wrong.mp', '*.ex.mp', False),
+ ('EX.mp', 'ex.mp', True), # Domains are case-insensitive.
+ ('ex.mp', 'an.ex.mp', False),
+ ('ex.mp', '*.an.ex.mp', False),
+ ('an.ex.am.pl', 'an.*.am.pl', True),
+ ('a.ex.am.pl', 'an.*.am.pl', False),
+ ('ex.am.pl', 'an.*.am.pl', False),
+ )
+
+ def _check(t, c, v):
+ eq_(v, bleach._domain_match(t, c))
+
+ for t, c, v in tests:
+ yield _check, t, c, v
+
+
+def test_double_star():
+ assert bleach._domain_match('ex.mp', '**.ex.mp')
+ try:
+ bleach._domain_match('ex.mp', 'an.**.ex.mp')
+ except bleach.ValidationError:
+ pass
+ else:
+ assert False, '_domain_match should not accept an.**.ex.mp'
+
+
+def test_allow_subdomains():
+ domains = ('ex.mp', '*.exa.mp', 'an.exam.pl', '*.my.examp.le')
+ html = (
+ ('<a href="http://an.ex.mp">bad</a>', 'bad'),
+ ('<a href="http://exa.mp">good</a>', None),
+ ('<a href="http://an.exa.mp">good</a>', None),
+ ('<a href="http://an.exam.pl">good</a>', None),
+ ('<a href="http://another.exam.pl">bad</a>', 'bad'),
+ ('<a href="http://a.bad.examp.le">bad</a>', 'bad'),
+ ('<a href="http://a.very.bad.examp.le">bad</a>', 'bad'),
+ )
+
+ def _check(html, text):
+ output = bleach.delinkify(html, allow_domains=domains)
+ eq_(html if text is None else text, output)
+
+ for t, o in html:
+ yield _check, t, o
diff --git a/bleach/tests/test_links.py b/bleach/tests/test_links.py
new file mode 100644
index 0000000..7caf006
--- /dev/null
+++ b/bleach/tests/test_links.py
@@ -0,0 +1,312 @@
+import urllib
+
+from html5lib.tokenizer import HTMLTokenizer
+from nose.tools import eq_
+
+from bleach import linkify, url_re
+
+
+def filter_url(url):
+ return u'http://bouncer/?u=%s' % urllib.quote_plus(url)
+
+
+def test_url_re():
+ def no_match(s):
+ match = url_re.search(s)
+ if match:
+ assert not match, 'matched %s' % s[slice(*match.span())]
+ yield no_match, 'just what i am looking for...it'
+
+
+def test_empty():
+ eq_('', linkify(''))
+
+
+def test_simple_link():
+ eq_('a <a href="http://example.com" rel="nofollow">http://example.com'
+ '</a> link',
+ linkify('a http://example.com link'))
+ eq_('a <a href="https://example.com" rel="nofollow">https://example.com'
+ '</a> link',
+ linkify('a https://example.com link'))
+ eq_('an <a href="http://example.com" rel="nofollow">example.com</a> link',
+ linkify('an example.com link'))
+
+
+def test_trailing_slash():
+ eq_('<a href="http://example.com/" rel="nofollow">http://example.com/</a>',
+ linkify('http://example.com/'))
+ eq_('<a href="http://example.com/foo/" rel="nofollow">'
+ 'http://example.com/foo/</a>',
+ linkify('http://example.com/foo/'))
+ eq_('<a href="http://example.com/foo/bar/" rel="nofollow">'
+ 'http://example.com/foo/bar/</a>',
+ linkify('http://example.com/foo/bar/'))
+
+
+def test_mangle_link():
+ eq_('<a href="http://bouncer/?u=http%3A%2F%2Fexample.com" rel="nofollow">'
+ 'http://example.com</a>',
+ linkify('http://example.com', filter_url=filter_url))
+
+
+def test_email_link():
+ eq_('a james@example.com mailto',
+ linkify('a james@example.com mailto'))
+ eq_('a james@example.com.au mailto',
+ linkify('a james@example.com.au mailto'))
+ eq_('a <a href="mailto:james@example.com" rel="nofollow">'
+ 'james@example.com</a> mailto',
+ linkify('a james@example.com mailto', parse_email=True))
+ eq_('aussie <a href="mailto:james@example.com.au" rel="nofollow">'
+ 'james@example.com.au</a> mailto',
+ linkify('aussie james@example.com.au mailto', parse_email=True))
+ eq_('email to <a href="james@example.com" rel="nofollow">'
+ 'james@example.com</a>',
+ linkify('email to <a href="james@example.com">'
+ 'james@example.com</a>', parse_email=True))
+
+
+def test_email_link_escaping():
+ eq_('''<a href='mailto:"james"@example.com' rel="nofollow">'''
+ '''"james"@example.com</a>''',
+ linkify('"james"@example.com', parse_email=True))
+ eq_('''<a href="mailto:&quot;j'ames&quot;@example.com" rel="nofollow">'''
+ '''"j'ames"@example.com</a>''',
+ linkify('"j\'ames"@example.com', parse_email=True))
+ eq_('''<a href='mailto:"ja>mes"@example.com' rel="nofollow">'''
+ '''"ja&gt;mes"@example.com</a>''',
+ linkify('"ja>mes"@example.com', parse_email=True))
+
+
+def test_tlds():
+ eq_('<a href="http://example.com" rel="nofollow">example.com</a>',
+ linkify('example.com'))
+ eq_('<a href="http://example.co.uk" rel="nofollow">example.co.uk</a>',
+ linkify('example.co.uk'))
+ eq_('<a href="http://example.edu" rel="nofollow">example.edu</a>',
+ linkify('example.edu'))
+ eq_('example.xxx', linkify('example.xxx'))
+ eq_(' brie', linkify(' brie'))
+ eq_('<a href="http://bit.ly/fun" rel="nofollow">bit.ly/fun</a>',
+ linkify('bit.ly/fun'))
+
+
+def test_escaping():
+ eq_('&lt; unrelated', linkify('< unrelated'))
+
+
+def test_nofollow_off():
+ eq_('<a href="http://example.com">example.com</a>',
+ linkify(u'example.com', nofollow=False))
+
+
+def test_link_in_html():
+ eq_('<i><a href="http://yy.com" rel="nofollow">http://yy.com</a></i>',
+ linkify('<i>http://yy.com</i>'))
+ eq_('<em><strong><a href="http://xx.com" rel="nofollow">http://xx.com</a>'
+ '</strong></em>',
+ linkify('<em><strong>http://xx.com</strong></em>'))
+
+
+def test_links_https():
+ eq_('<a href="https://yy.com" rel="nofollow">https://yy.com</a>',
+ linkify('https://yy.com'))
+
+
+def test_add_rel_nofollow():
+ """Verify that rel="nofollow" is added to an existing link"""
+ eq_('<a href="http://yy.com" rel="nofollow">http://yy.com</a>',
+ linkify('<a href="http://yy.com">http://yy.com</a>'))
+
+
+def test_url_with_path():
+ eq_('<a href="http://example.com/path/to/file" rel="nofollow">'
+ 'http://example.com/path/to/file</a>',
+ linkify('http://example.com/path/to/file'))
+
+
+def test_link_ftp():
+ eq_('<a href="ftp://ftp.mozilla.org/some/file" rel="nofollow">'
+ 'ftp://ftp.mozilla.org/some/file</a>',
+ linkify('ftp://ftp.mozilla.org/some/file'))
+
+
+def test_link_query():
+ eq_('<a href="http://xx.com/?test=win" rel="nofollow">'
+ 'http://xx.com/?test=win</a>',
+ linkify('http://xx.com/?test=win'))
+ eq_('<a href="http://xx.com/?test=win" rel="nofollow">'
+ 'xx.com/?test=win</a>',
+ linkify('xx.com/?test=win'))
+ eq_('<a href="http://xx.com?test=win" rel="nofollow">'
+ 'xx.com?test=win</a>',
+ linkify('xx.com?test=win'))
+
+
+def test_link_fragment():
+ eq_('<a href="http://xx.com/path#frag" rel="nofollow">'
+ 'http://xx.com/path#frag</a>',
+ linkify('http://xx.com/path#frag'))
+
+
+def test_link_entities():
+ eq_('<a href="http://xx.com/?a=1&amp;b=2" rel="nofollow">'
+ 'http://xx.com/?a=1&amp;b=2</a>',
+ linkify('http://xx.com/?a=1&b=2'))
+
+
+def test_escaped_html():
+ """If I pass in escaped HTML, it should probably come out escaped."""
+ s = '&lt;em&gt;strong&lt;/em&gt;'
+ eq_(s, linkify(s))
+
+
+def test_link_http_complete():
+ eq_('<a href="https://user:pass@ftp.mozilla.org/x/y.exe?a=b&amp;c=d'
+ '&amp;e#f" rel="nofollow">'
+ 'https://user:pass@ftp.mozilla.org/x/y.exe?a=b&amp;c=d&amp;e#f</a>',
+ linkify('https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f'))
+
+
+def test_non_url():
+ """document.vulnerable should absolutely not be linkified."""
+ s = 'document.vulnerable'
+ eq_(s, linkify(s))
+
+
+def test_javascript_url():
+ """javascript: urls should never be linkified."""
+ s = 'javascript:document.vulnerable'
+ eq_(s, linkify(s))
+
+
+def test_unsafe_url():
+ """Any unsafe char ({}[]<>, etc.) in the path should end URL scanning."""
+ eq_('All your{"<a href="http://xx.yy.com/grover.png" '
+ 'rel="nofollow">xx.yy.com/grover.png</a>"}base are',
+ linkify('All your{"xx.yy.com/grover.png"}base are'))
+
+
+def test_skip_pre():
+ """Skip linkification in <pre> tags."""
+ simple = 'http://xx.com <pre>http://xx.com</pre>'
+ linked = ('<a href="http://xx.com" rel="nofollow">http://xx.com</a> '
+ '<pre>http://xx.com</pre>')
+ all_linked = ('<a href="http://xx.com" rel="nofollow">http://xx.com</a> '
+ '<pre><a href="http://xx.com" rel="nofollow">http://xx.com'
+ '</a></pre>')
+ eq_(linked, linkify(simple, skip_pre=True))
+ eq_(all_linked, linkify(simple))
+
+ already_linked = '<pre><a href="http://xx.com">xx</a></pre>'
+ nofollowed = '<pre><a href="http://xx.com" rel="nofollow">xx</a></pre>'
+ eq_(nofollowed, linkify(already_linked))
+ eq_(nofollowed, linkify(already_linked, skip_pre=True))
+
+
+def test_libgl():
+ """libgl.so.1 should not be linkified."""
+ eq_('libgl.so.1', linkify('libgl.so.1'))
+
+
+def test_end_of_sentence():
+ """example.com. should match."""
+ out = u'<a href="http://%s" rel="nofollow">%s</a>%s'
+ in_ = u'%s%s'
+
+ def check(u, p):
+ eq_(out % (u, u, p), linkify(in_ % (u, p)))
+
+ tests = (
+ ('example.com', '.'),
+ ('example.com', '...'),
+ ('ex.com/foo', '.'),
+ ('ex.com/foo', '....'),
+ )
+
+ for u, p in tests:
+ yield check, u, p
+
+
+def test_end_of_clause():
+ """example.com/foo, shouldn't include the ,"""
+ eq_('<a href="http://ex.com/foo" rel="nofollow">ex.com/foo</a>, bar',
+ linkify('ex.com/foo, bar'))
+
+
+def test_sarcasm():
+ """Jokes should crash.<sarcasm/>"""
+ dirty = u'Yeah right <sarcasm/>'
+ clean = u'Yeah right &lt;sarcasm/&gt;'
+ eq_(clean, linkify(dirty))
+
+
+def test_wrapping_parentheses():
+ """URLs wrapped in parantheses should not include them."""
+ out = u'%s<a href="http://%s" rel="nofollow">%s</a>%s'
+
+ tests = (
+ ('(example.com)', out % ('(', 'example.com', 'example.com', ')')),
+ ('(example.com/)', out % ('(', 'example.com/', 'example.com/', ')')),
+ ('(example.com/foo)', out % ('(', 'example.com/foo',
+ 'example.com/foo', ')')),
+ ('(((example.com/))))', out % ('(((', 'example.com/)',
+ 'example.com/)', ')))')),
+ ('example.com/))', out % ('', 'example.com/))',
+ 'example.com/))', '')),
+ ('http://en.wikipedia.org/wiki/Test_(assessment)',
+ out % ('', 'en.wikipedia.org/wiki/Test_(assessment)',
+ 'http://en.wikipedia.org/wiki/Test_(assessment)', '')),
+ ('(http://en.wikipedia.org/wiki/Test_(assessment))',
+ out % ('(', 'en.wikipedia.org/wiki/Test_(assessment)',
+ 'http://en.wikipedia.org/wiki/Test_(assessment)', ')')),
+ ('((http://en.wikipedia.org/wiki/Test_(assessment))',
+ out % ('((', 'en.wikipedia.org/wiki/Test_(assessment',
+ 'http://en.wikipedia.org/wiki/Test_(assessment', '))')),
+ ('(http://en.wikipedia.org/wiki/Test_(assessment)))',
+ out % ('(', 'en.wikipedia.org/wiki/Test_(assessment))',
+ 'http://en.wikipedia.org/wiki/Test_(assessment))', ')')),
+ ('(http://en.wikipedia.org/wiki/)Test_(assessment',
+ out % ('(', 'en.wikipedia.org/wiki/)Test_(assessment',
+ 'http://en.wikipedia.org/wiki/)Test_(assessment', '')),
+ )
+
+ def check(test, expected_output):
+ eq_(expected_output, linkify(test))
+
+ for test, expected_output in tests:
+ yield check, test, expected_output
+
+
+def test_ports():
+ """URLs can contain port numbers."""
+ tests = (
+ ('http://foo.com:8000', ('http://foo.com:8000', '')),
+ ('http://foo.com:8000/', ('http://foo.com:8000/', '')),
+ ('http://bar.com:xkcd', ('http://bar.com', ':xkcd')),
+ ('http://foo.com:81/bar', ('http://foo.com:81/bar', '')),
+ ('http://foo.com:', ('http://foo.com', ':')),
+ )
+
+ def check(test, output):
+ eq_(u'<a href="{0}" rel="nofollow">{0}</a>{1}'.format(*output),
+ linkify(test))
+
+ for test, output in tests:
+ yield check, test, output
+
+
+def test_target():
+ eq_('<a href="http://example.com" rel="nofollow" '
+ 'target="_blank">example.com</a>',
+ linkify(u'example.com', target='_blank'))
+ eq_('<a href="http://example.com" target="_blank">example.com</a>',
+ linkify(u'example.com', target='_blank', nofollow=False))
+
+
+def test_tokenizer():
+ """Linkify doesn't always have to sanitize."""
+ raw = '<em>test<x></x></em>'
+ eq_('<em>test&lt;x&gt;&lt;/x&gt;</em>', linkify(raw))
+ eq_(raw, linkify(raw, tokenizer=HTMLTokenizer))
diff --git a/bleach/tests/test_security.py b/bleach/tests/test_security.py
new file mode 100644
index 0000000..9e9bb7b
--- /dev/null
+++ b/bleach/tests/test_security.py
@@ -0,0 +1,108 @@
+"""More advanced security tests"""
+
+from nose.tools import eq_
+
+from bleach import clean
+
+
+def test_nested_script_tag():
+ eq_('&lt;&lt;script&gt;script&gt;evil()&lt;&lt;/script&gt;/script&gt;',
+ clean('<<script>script>evil()<</script>/script>'))
+ eq_('&lt;&lt;x&gt;script&gt;evil()&lt;&lt;/x&gt;/script&gt;',
+ clean('<<x>script>evil()<</x>/script>'))
+
+
+def test_nested_script_tag_r():
+ eq_('&lt;script&lt;script&gt;&gt;evil()&lt;/script&lt;&gt;&gt;',
+ clean('<script<script>>evil()</script</script>>'))
+
+
+def test_invalid_attr():
+ IMG = ['img', ]
+ IMG_ATTR = ['src']
+
+ eq_('<a href="test">test</a>',
+ clean('<a onclick="evil" href="test">test</a>'))
+ eq_('<img src="test">',
+ clean('<img onclick="evil" src="test" />',
+ tags=IMG, attributes=IMG_ATTR))
+ eq_('<img src="test">',
+ clean('<img href="invalid" src="test" />',
+ tags=IMG, attributes=IMG_ATTR))
+
+
+def test_unquoted_attr():
+ eq_('<abbr title="mytitle">myabbr</abbr>',
+ clean('<abbr title=mytitle>myabbr</abbr>'))
+
+
+def test_unquoted_event_handler():
+ eq_('<a href="http://xx.com">xx.com</a>',
+ clean('<a href="http://xx.com" onclick=foo()>xx.com</a>'))
+
+
+def test_invalid_attr_value():
+ eq_('&lt;img src="javascript:alert(\'XSS\');"&gt;',
+ clean('<img src="javascript:alert(\'XSS\');">'))
+
+
+def test_invalid_href_attr():
+ eq_('<a>xss</a>',
+ clean('<a href="javascript:alert(\'XSS\')">xss</a>'))
+
+
+def test_invalid_filter_attr():
+ IMG = ['img', ]
+ IMG_ATTR = {'img': lambda n, v: n == 'src' and v == "http://example.com/"}
+
+ eq_('<img src="http://example.com/">',
+ clean('<img onclick="evil" src="http://example.com/" />',
+ tags=IMG, attributes=IMG_ATTR))
+
+ eq_('<img>', clean('<img onclick="evil" src="http://badhost.com/" />',
+ tags=IMG, attributes=IMG_ATTR))
+
+
+def test_invalid_tag_char():
+ eq_('&lt;script xss="" src="http://xx.com/xss.js"&gt;&lt;/script&gt;',
+ clean('<script/xss src="http://xx.com/xss.js"></script>'))
+ eq_('&lt;script src="http://xx.com/xss.js"&gt;&lt;/script&gt;',
+ clean('<script/src="http://xx.com/xss.js"></script>'))
+
+
+def test_unclosed_tag():
+ eq_('&lt;script src="http://xx.com/xss.js&amp;lt;b"&gt;',
+ clean('<script src=http://xx.com/xss.js<b>'))
+ eq_('&lt;script src="http://xx.com/xss.js" &lt;b=""&gt;',
+ clean('<script src="http://xx.com/xss.js"<b>'))
+ eq_('&lt;script src="http://xx.com/xss.js" &lt;b=""&gt;',
+ clean('<script src="http://xx.com/xss.js" <b>'))
+
+
+def test_strip():
+ """Using strip=True shouldn't result in malicious content."""
+ s = '<scri<script>pt>alert(1)</scr</script>ipt>'
+ eq_('pt&gt;alert(1)ipt&gt;', clean(s, strip=True))
+ s = '<scri<scri<script>pt>pt>alert(1)</script>'
+ eq_('pt&gt;pt&gt;alert(1)', clean(s, strip=True))
+
+
+def test_nasty():
+ """Nested, broken up, multiple tags, are still foiled!"""
+ test = ('<scr<script></script>ipt type="text/javascript">alert("foo");</'
+ '<script></script>script<del></del>>')
+ expect = (u'&lt;scr&lt;script&gt;&lt;/script&gt;ipt type="text/javascript"'
+ u'&gt;alert("foo");&lt;/script&gt;script&lt;del&gt;&lt;/del&gt;'
+ u'&gt;')
+ eq_(expect, clean(test))
+
+
+def test_poster_attribute():
+ """Poster attributes should not allow javascript."""
+ tags = ['video']
+ attrs = {'video': ['poster']}
+ test = '<video poster="javascript:alert(1)"></video>'
+ expect = '<video></video>'
+ eq_(expect, clean(test, tags=tags, attributes=attrs))
+ ok = '<video poster="/foo.png"></video>'
+ eq_(ok, clean(ok, tags=tags, attributes=attrs))
diff --git a/bleach/tests/test_unicode.py b/bleach/tests/test_unicode.py
new file mode 100644
index 0000000..67123cc
--- /dev/null
+++ b/bleach/tests/test_unicode.py
@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+
+from nose.tools import eq_
+
+from bleach import clean, linkify
+
+
+def test_japanese_safe_simple():
+ eq_(u'ヘルプとチュートリアル', clean(u'ヘルプとチュートリアル'))
+ eq_(u'ヘルプとチュートリアル', linkify(u'ヘルプとチュートリアル'))
+
+
+def test_japanese_strip():
+ eq_(u'<em>ヘルプとチュートリアル</em>',
+ clean(u'<em>ヘルプとチュートリアル</em>'))
+ eq_(u'&lt;span&gt;ヘルプとチュートリアル&lt;/span&gt;',
+ clean(u'<span>ヘルプとチュートリアル</span>'))
+
+
+def test_russian_simple():
+ eq_(u'Домашняя', clean(u'Домашняя'))
+ eq_(u'Домашняя', linkify(u'Домашняя'))
+
+
+def test_mixed():
+ eq_(u'Домашняяヘルプとチュートリアル',
+ clean(u'Домашняяヘルプとチュートリアル'))
+
+
+def test_mixed_linkify():
+ eq_(u'Домашняя <a href="http://example.com" rel="nofollow">'
+ u'http://example.com</a> ヘルプとチュートリアル',
+ linkify(u'Домашняя http://example.com ヘルプとチュートリアル'))
+
+
+def test_url_utf8():
+ """Allow UTF8 characters in URLs themselves."""
+ out = u'<a href="%(url)s" rel="nofollow">%(url)s</a>'
+
+ tests = (
+ ('http://éxámplé.com/', out % {'url': u'http://éxámplé.com/'}),
+ ('http://éxámplé.com/íàñá/',
+ out % {'url': u'http://éxámplé.com/íàñá/'}),
+ ('http://éxámplé.com/íàñá/?foo=bar',
+ out % {'url': u'http://éxámplé.com/íàñá/?foo=bar'}),
+ ('http://éxámplé.com/íàñá/?fóo=bár',
+ out % {'url': u'http://éxámplé.com/íàñá/?fóo=bár'}),
+ )
+
+ def check(test, expected_output):
+ eq_(expected_output, linkify(test))
+
+ for test, expected_output in tests:
+ yield check, test, expected_output