diff options
author | Per Andersson <avtobiff@gmail.com> | 2013-06-09 19:45:54 +0200 |
---|---|---|
committer | Per Andersson <avtobiff@gmail.com> | 2013-06-09 19:45:54 +0200 |
commit | fac84c6d90e0875e6c1b10c5ef02d577ee008af4 (patch) | |
tree | 4080efdb87c814d5dc409e9e87aa449f4b273ff3 /bleach/tests | |
parent | 38dc3b8f231cf36bcc771001318556d9e84c2889 (diff) | |
download | python-bleach-fac84c6d90e0875e6c1b10c5ef02d577ee008af4.tar python-bleach-fac84c6d90e0875e6c1b10c5ef02d577ee008af4.tar.gz |
Imported Upstream version 1.2.2upstream/1.2.2
Diffstat (limited to 'bleach/tests')
-rw-r--r-- | bleach/tests/test_basics.py | 5 | ||||
-rw-r--r-- | bleach/tests/test_css.py | 16 | ||||
-rw-r--r-- | bleach/tests/test_delinkify.py | 109 | ||||
-rw-r--r-- | bleach/tests/test_links.py | 200 | ||||
-rw-r--r-- | bleach/tests/test_security.py | 4 |
5 files changed, 184 insertions, 150 deletions
diff --git a/bleach/tests/test_basics.py b/bleach/tests/test_basics.py index 60be11d..9eca687 100644 --- a/bleach/tests/test_basics.py +++ b/bleach/tests/test_basics.py @@ -8,6 +8,10 @@ def test_empty(): eq_('', bleach.clean('')) +def test_nbsp(): + eq_(u'\xa0test string\xa0', bleach.clean(' test string ')) + + def test_comments_only(): comment = '<!-- this is a comment -->' open_comment = '<!-- this is an open comment' @@ -91,7 +95,6 @@ def test_serializer(): def test_no_href_links(): s = u'<a name="anchor">x</a>' eq_(s, bleach.linkify(s)) - eq_(s, bleach.linkify(s, nofollow=False)) def test_weird_strings(): diff --git a/bleach/tests/test_css.py b/bleach/tests/test_css.py index fdb3f65..588c8ce 100644 --- a/bleach/tests/test_css.py +++ b/bleach/tests/test_css.py @@ -22,13 +22,21 @@ def test_allowed_css(): ('cursor: -moz-grab;', 'cursor: -moz-grab;', ['cursor']), ('color: hsl(30,100%,50%);', 'color: hsl(30,100%,50%);', ['color']), ('color: rgba(255,0,0,0.4);', 'color: rgba(255,0,0,0.4);', ['color']), - ("text-overflow: ',' ellipsis;", "text-overflow: ',' ellipsis;", ['text-overflow']), + ("text-overflow: ',' ellipsis;", "text-overflow: ',' ellipsis;", + ['text-overflow']), + ('text-overflow: "," ellipsis;', 'text-overflow: "," ellipsis;', + ['text-overflow']), + ('font-family: "Arial";', 'font-family: "Arial";', ['font-family']), ) - p = '<p style="%s">bar</p>' + p_single = '<p style="%s">bar</p>' + p_double = "<p style='%s'>bar</p>" - def check(input, output, styles): - eq_(p % output, clean(p % input, styles=styles)) + def check(i, o, s): + if '"' in i: + eq_(p_double % o, clean(p_double % i, styles=s)) + else: + eq_(p_single % o, clean(p_single % i, styles=s)) for i, o, s in tests: yield check, i, o, s diff --git a/bleach/tests/test_delinkify.py b/bleach/tests/test_delinkify.py deleted file mode 100644 index f216d2f..0000000 --- a/bleach/tests/test_delinkify.py +++ /dev/null @@ -1,109 +0,0 @@ -from nose.tools import eq_ - -import bleach - - -def test_delinkify(): - eq_('test', bleach.delinkify('<a href="http://ex.mp">test</a>')) - eq_('footestbar', - bleach.delinkify('foo<a href="http://ex.mp">test</a>bar')) - - -def test_whitelist(): - html = '<a href="http://ex.mp">test</a>' - eq_(html, bleach.delinkify(html, allow_domains=['ex.mp'])) - eq_('test', bleach.delinkify(html, allow_domains=['ex2.mp'])) - # Allow a single domain as a special case. - eq_(html, bleach.delinkify(html, allow_domains='ex.mp')) - - -def test_nested_a(): - html = '<a href="http://ex.mp">test<a href="http://foo.bar">test</a></a>' - eq_('testtest', bleach.delinkify(html)) - eq_('<a href="http://ex.mp">test</a>test', - bleach.delinkify(html, allow_domains=['ex.mp'])) - - -def test_nested_tag(): - html = '<a href="http://ex.mp">test<span>test</span></a>' - eq_('test<span>test</span>', bleach.delinkify(html)) - - -def test_a_name(): - """Don't screw with non-link <a> tags.""" - html = '<a name="foo">bar</a>' - eq_(html, bleach.delinkify(html)) - - -def test_relative(): - """Relative links are optionally OK.""" - html = 'some <a href="/foo/bar">link</a>' - eq_('some link', bleach.delinkify(html)) - eq_(html, bleach.delinkify(html, allow_relative=True)) - - -def test_protocol_relative(): - """Protocol-relative links aren't relative.""" - html = 'bad <a href="//ex.mp">link</a>' - expect = 'bad link' - eq_(expect, bleach.delinkify(html)) - eq_(expect, bleach.delinkify(html, allow_relative=True)) - eq_(html, bleach.delinkify(html, allow_domains='ex.mp')) - - -def test_domain_match(): - tests = ( - ('ex.mp', 'ex.mp', True), - ('ex.mp', '*.ex.mp', True), - ('test.ex.mp', '*.ex.mp', True), - ('test.ex.mp', 'ex.mp', False), - ('test.test.ex.mp', '*.ex.mp', False), - ('test.test.ex.mp', '**.ex.mp', True), - ('wrong.mp', 'ex.mp', False), - ('wrong.mp', '*.ex.mp', False), - ('really.wrong.mp', 'ex.mp', False), - ('really.wrong.mp', '*.ex.mp', False), - ('really.very.wrong.mp', '*.ex.mp', False), - ('EX.mp', 'ex.mp', True), # Domains are case-insensitive. - ('ex.mp', 'an.ex.mp', False), - ('ex.mp', '*.an.ex.mp', False), - ('an.ex.am.pl', 'an.*.am.pl', True), - ('a.ex.am.pl', 'an.*.am.pl', False), - ('ex.am.pl', 'an.*.am.pl', False), - ) - - def _check(t, c, v): - eq_(v, bleach._domain_match(t, c)) - - for t, c, v in tests: - yield _check, t, c, v - - -def test_double_star(): - assert bleach._domain_match('ex.mp', '**.ex.mp') - try: - bleach._domain_match('ex.mp', 'an.**.ex.mp') - except bleach.ValidationError: - pass - else: - assert False, '_domain_match should not accept an.**.ex.mp' - - -def test_allow_subdomains(): - domains = ('ex.mp', '*.exa.mp', 'an.exam.pl', '*.my.examp.le') - html = ( - ('<a href="http://an.ex.mp">bad</a>', 'bad'), - ('<a href="http://exa.mp">good</a>', None), - ('<a href="http://an.exa.mp">good</a>', None), - ('<a href="http://an.exam.pl">good</a>', None), - ('<a href="http://another.exam.pl">bad</a>', 'bad'), - ('<a href="http://a.bad.examp.le">bad</a>', 'bad'), - ('<a href="http://a.very.bad.examp.le">bad</a>', 'bad'), - ) - - def _check(html, text): - output = bleach.delinkify(html, allow_domains=domains) - eq_(html if text is None else text, output) - - for t, o in html: - yield _check, t, o diff --git a/bleach/tests/test_links.py b/bleach/tests/test_links.py index 7caf006..ac593c4 100644 --- a/bleach/tests/test_links.py +++ b/bleach/tests/test_links.py @@ -3,11 +3,9 @@ import urllib from html5lib.tokenizer import HTMLTokenizer from nose.tools import eq_ -from bleach import linkify, url_re +from bleach import linkify, url_re, DEFAULT_CALLBACKS as DC -def filter_url(url): - return u'http://bouncer/?u=%s' % urllib.quote_plus(url) def test_url_re(): @@ -45,38 +43,139 @@ def test_trailing_slash(): def test_mangle_link(): + """We can muck with the href attribute of the link.""" + def filter_url(attrs, new=False): + attrs['href'] = (u'http://bouncer/?u=%s' % + urllib.quote_plus(attrs['href'])) + return attrs + eq_('<a href="http://bouncer/?u=http%3A%2F%2Fexample.com" rel="nofollow">' 'http://example.com</a>', - linkify('http://example.com', filter_url=filter_url)) + linkify('http://example.com', DC + [filter_url])) + + +def test_mangle_text(): + """We can muck with the inner text of a link.""" + + def ft(attrs, new=False): + attrs['_text'] = 'bar' + return attrs + + eq_('<a href="http://ex.mp">bar</a> <a href="http://ex.mp/foo">bar</a>', + linkify('http://ex.mp <a href="http://ex.mp/foo">foo</a>', [ft])) def test_email_link(): - eq_('a james@example.com mailto', - linkify('a james@example.com mailto')) - eq_('a james@example.com.au mailto', - linkify('a james@example.com.au mailto')) - eq_('a <a href="mailto:james@example.com" rel="nofollow">' - 'james@example.com</a> mailto', - linkify('a james@example.com mailto', parse_email=True)) - eq_('aussie <a href="mailto:james@example.com.au" rel="nofollow">' - 'james@example.com.au</a> mailto', - linkify('aussie james@example.com.au mailto', parse_email=True)) - eq_('email to <a href="james@example.com" rel="nofollow">' - 'james@example.com</a>', - linkify('email to <a href="james@example.com">' - 'james@example.com</a>', parse_email=True)) + tests = ( + ('a james@example.com mailto', False, 'a james@example.com mailto'), + ('a james@example.com.au mailto', False, + 'a james@example.com.au mailto'), + ('a <a href="mailto:james@example.com">james@example.com</a> mailto', + True, 'a james@example.com mailto'), + ('aussie <a href="mailto:james@example.com.au">' + 'james@example.com.au</a> mailto', True, + 'aussie james@example.com.au mailto'), + # This is kind of a pathological case. I guess we do our best here. + ('email to <a href="james@example.com" rel="nofollow">' + 'james@example.com</a>', True, + 'email to <a href="james@example.com">james@example.com</a>'), + ) + + def _check(o, p, i): + eq_(o, linkify(i, parse_email=p)) + + for (o, p, i) in tests: + yield _check, o, p, i def test_email_link_escaping(): - eq_('''<a href='mailto:"james"@example.com' rel="nofollow">''' - '''"james"@example.com</a>''', - linkify('"james"@example.com', parse_email=True)) - eq_('''<a href="mailto:"j'ames"@example.com" rel="nofollow">''' - '''"j'ames"@example.com</a>''', - linkify('"j\'ames"@example.com', parse_email=True)) - eq_('''<a href='mailto:"ja>mes"@example.com' rel="nofollow">''' - '''"ja>mes"@example.com</a>''', - linkify('"ja>mes"@example.com', parse_email=True)) + tests = ( + ('''<a href='mailto:"james"@example.com'>''' + '''"james"@example.com</a>''', + '"james"@example.com'), + ('''<a href="mailto:"j'ames"@example.com">''' + '''"j'ames"@example.com</a>''', + '"j\'ames"@example.com'), + ('''<a href='mailto:"ja>mes"@example.com'>''' + '''"ja>mes"@example.com</a>''', + '"ja>mes"@example.com'), + ) + + def _check(o, i): + eq_(o, linkify(i, parse_email=True)) + + for (o, i) in tests: + yield _check, o, i + + +def test_prevent_links(): + """Returning None from any callback should remove links or prevent them + from being created.""" + + def no_new_links(attrs, new=False): + if new: + return None + return attrs + + def no_old_links(attrs, new=False): + if not new: + return None + return attrs + + def noop(attrs, new=False): + return attrs + + in_text = 'a ex.mp <a href="http://example.com">example</a>' + out_text = 'a <a href="http://ex.mp">ex.mp</a> example' + tests = ( + ([noop], ('a <a href="http://ex.mp">ex.mp</a> ' + '<a href="http://example.com">example</a>'), 'noop'), + ([no_new_links, noop], in_text, 'no new, noop'), + ([noop, no_new_links], in_text, 'noop, no new'), + ([no_old_links, noop], out_text, 'no old, noop'), + ([noop, no_old_links], out_text, 'noop, no old'), + ([no_old_links, no_new_links], 'a ex.mp example', 'no links'), + ) + + def _check(cb, o, msg): + eq_(o, linkify(in_text, cb), msg) + + for (cb, o, msg) in tests: + yield _check, cb, o, msg + + +def test_set_attrs(): + """We can set random attributes on links.""" + + def set_attr(attrs, new=False): + attrs['rev'] = 'canonical' + return attrs + + eq_('<a href="http://ex.mp" rev="canonical">ex.mp</a>', + linkify('ex.mp', [set_attr])) + + +def test_only_proto_links(): + """Only create links if there's a protocol.""" + def only_proto(attrs, new=False): + if new and not attrs['_text'].startswith(('http:', 'https:')): + return None + return attrs + + in_text = 'a ex.mp http://ex.mp <a href="/foo">bar</a>' + out_text = ('a ex.mp <a href="http://ex.mp">http://ex.mp</a> ' + '<a href="/foo">bar</a>') + eq_(out_text, linkify(in_text, [only_proto])) + + +def test_stop_email(): + """Returning None should prevent a link from being created.""" + def no_email(attrs, new=False): + if attrs['href'].startswith('mailto:'): + return None + return attrs + text = 'do not link james@example.com' + eq_(text, linkify(text, parse_email=True, callbacks=[no_email])) def test_tlds(): @@ -98,7 +197,7 @@ def test_escaping(): def test_nofollow_off(): eq_('<a href="http://example.com">example.com</a>', - linkify(u'example.com', nofollow=False)) + linkify(u'example.com', [])) def test_link_in_html(): @@ -297,16 +396,45 @@ def test_ports(): yield check, test, output -def test_target(): - eq_('<a href="http://example.com" rel="nofollow" ' - 'target="_blank">example.com</a>', - linkify(u'example.com', target='_blank')) - eq_('<a href="http://example.com" target="_blank">example.com</a>', - linkify(u'example.com', target='_blank', nofollow=False)) - - def test_tokenizer(): """Linkify doesn't always have to sanitize.""" raw = '<em>test<x></x></em>' eq_('<em>test<x></x></em>', linkify(raw)) eq_(raw, linkify(raw, tokenizer=HTMLTokenizer)) + + +def test_ignore_bad_protocols(): + eq_('foohttp://bar', + linkify('foohttp://bar')) + eq_('foohttp://<a href="http://exampl.com" rel="nofollow">exampl.com</a>', + linkify('foohttp://exampl.com')) + + +def test_max_recursion_depth(): + """If we hit the max recursion depth, just return the string.""" + test = '<em>' * 2000 + 'foo' + '</em>' * 2000 + eq_(test, linkify(test)) + + +def test_link_emails_and_urls(): + """parse_email=True shouldn't prevent URLs from getting linkified.""" + output = ('<a href="http://example.com" rel="nofollow">' + 'http://example.com</a> <a href="mailto:person@example.com">' + 'person@example.com</a>') + eq_(output, linkify('http://example.com person@example.com', + parse_email=True)) + + +def test_links_case_insensitive(): + """Protocols and domain names are case insensitive.""" + expect = ('<a href="HTTP://EXAMPLE.COM" rel="nofollow">' + 'HTTP://EXAMPLE.COM</a>') + eq_(expect, linkify('HTTP://EXAMPLE.COM')) + + +def test_elements_inside_links(): + eq_(u'<a href="#" rel="nofollow">hello<br></a>', + linkify('<a href="#">hello<br></a>')) + + eq_(u'<a href="#" rel="nofollow"><strong>bold</strong> hello<br></a>', + linkify('<a href="#"><strong>bold</strong> hello<br></a>')) diff --git a/bleach/tests/test_security.py b/bleach/tests/test_security.py index 9e9bb7b..6c2b33f 100644 --- a/bleach/tests/test_security.py +++ b/bleach/tests/test_security.py @@ -106,3 +106,7 @@ def test_poster_attribute(): eq_(expect, clean(test, tags=tags, attributes=attrs)) ok = '<video poster="/foo.png"></video>' eq_(ok, clean(ok, tags=tags, attributes=attrs)) + + +def test_feed_protocol(): + eq_('<a>foo</a>', clean('<a href="feed:file:///tmp/foo">foo</a>')) |