summaryrefslogtreecommitdiff
path: root/bleach/tests
diff options
context:
space:
mode:
authorPer Andersson <avtobiff@gmail.com>2013-06-09 19:45:54 +0200
committerPer Andersson <avtobiff@gmail.com>2013-06-09 19:45:54 +0200
commitfac84c6d90e0875e6c1b10c5ef02d577ee008af4 (patch)
tree4080efdb87c814d5dc409e9e87aa449f4b273ff3 /bleach/tests
parent38dc3b8f231cf36bcc771001318556d9e84c2889 (diff)
downloadpython-bleach-fac84c6d90e0875e6c1b10c5ef02d577ee008af4.tar
python-bleach-fac84c6d90e0875e6c1b10c5ef02d577ee008af4.tar.gz
Imported Upstream version 1.2.2upstream/1.2.2
Diffstat (limited to 'bleach/tests')
-rw-r--r--bleach/tests/test_basics.py5
-rw-r--r--bleach/tests/test_css.py16
-rw-r--r--bleach/tests/test_delinkify.py109
-rw-r--r--bleach/tests/test_links.py200
-rw-r--r--bleach/tests/test_security.py4
5 files changed, 184 insertions, 150 deletions
diff --git a/bleach/tests/test_basics.py b/bleach/tests/test_basics.py
index 60be11d..9eca687 100644
--- a/bleach/tests/test_basics.py
+++ b/bleach/tests/test_basics.py
@@ -8,6 +8,10 @@ def test_empty():
eq_('', bleach.clean(''))
+def test_nbsp():
+ eq_(u'\xa0test string\xa0', bleach.clean('&nbsp;test string&nbsp;'))
+
+
def test_comments_only():
comment = '<!-- this is a comment -->'
open_comment = '<!-- this is an open comment'
@@ -91,7 +95,6 @@ def test_serializer():
def test_no_href_links():
s = u'<a name="anchor">x</a>'
eq_(s, bleach.linkify(s))
- eq_(s, bleach.linkify(s, nofollow=False))
def test_weird_strings():
diff --git a/bleach/tests/test_css.py b/bleach/tests/test_css.py
index fdb3f65..588c8ce 100644
--- a/bleach/tests/test_css.py
+++ b/bleach/tests/test_css.py
@@ -22,13 +22,21 @@ def test_allowed_css():
('cursor: -moz-grab;', 'cursor: -moz-grab;', ['cursor']),
('color: hsl(30,100%,50%);', 'color: hsl(30,100%,50%);', ['color']),
('color: rgba(255,0,0,0.4);', 'color: rgba(255,0,0,0.4);', ['color']),
- ("text-overflow: ',' ellipsis;", "text-overflow: ',' ellipsis;", ['text-overflow']),
+ ("text-overflow: ',' ellipsis;", "text-overflow: ',' ellipsis;",
+ ['text-overflow']),
+ ('text-overflow: "," ellipsis;', 'text-overflow: "," ellipsis;',
+ ['text-overflow']),
+ ('font-family: "Arial";', 'font-family: "Arial";', ['font-family']),
)
- p = '<p style="%s">bar</p>'
+ p_single = '<p style="%s">bar</p>'
+ p_double = "<p style='%s'>bar</p>"
- def check(input, output, styles):
- eq_(p % output, clean(p % input, styles=styles))
+ def check(i, o, s):
+ if '"' in i:
+ eq_(p_double % o, clean(p_double % i, styles=s))
+ else:
+ eq_(p_single % o, clean(p_single % i, styles=s))
for i, o, s in tests:
yield check, i, o, s
diff --git a/bleach/tests/test_delinkify.py b/bleach/tests/test_delinkify.py
deleted file mode 100644
index f216d2f..0000000
--- a/bleach/tests/test_delinkify.py
+++ /dev/null
@@ -1,109 +0,0 @@
-from nose.tools import eq_
-
-import bleach
-
-
-def test_delinkify():
- eq_('test', bleach.delinkify('<a href="http://ex.mp">test</a>'))
- eq_('footestbar',
- bleach.delinkify('foo<a href="http://ex.mp">test</a>bar'))
-
-
-def test_whitelist():
- html = '<a href="http://ex.mp">test</a>'
- eq_(html, bleach.delinkify(html, allow_domains=['ex.mp']))
- eq_('test', bleach.delinkify(html, allow_domains=['ex2.mp']))
- # Allow a single domain as a special case.
- eq_(html, bleach.delinkify(html, allow_domains='ex.mp'))
-
-
-def test_nested_a():
- html = '<a href="http://ex.mp">test<a href="http://foo.bar">test</a></a>'
- eq_('testtest', bleach.delinkify(html))
- eq_('<a href="http://ex.mp">test</a>test',
- bleach.delinkify(html, allow_domains=['ex.mp']))
-
-
-def test_nested_tag():
- html = '<a href="http://ex.mp">test<span>test</span></a>'
- eq_('test<span>test</span>', bleach.delinkify(html))
-
-
-def test_a_name():
- """Don't screw with non-link <a> tags."""
- html = '<a name="foo">bar</a>'
- eq_(html, bleach.delinkify(html))
-
-
-def test_relative():
- """Relative links are optionally OK."""
- html = 'some <a href="/foo/bar">link</a>'
- eq_('some link', bleach.delinkify(html))
- eq_(html, bleach.delinkify(html, allow_relative=True))
-
-
-def test_protocol_relative():
- """Protocol-relative links aren't relative."""
- html = 'bad <a href="//ex.mp">link</a>'
- expect = 'bad link'
- eq_(expect, bleach.delinkify(html))
- eq_(expect, bleach.delinkify(html, allow_relative=True))
- eq_(html, bleach.delinkify(html, allow_domains='ex.mp'))
-
-
-def test_domain_match():
- tests = (
- ('ex.mp', 'ex.mp', True),
- ('ex.mp', '*.ex.mp', True),
- ('test.ex.mp', '*.ex.mp', True),
- ('test.ex.mp', 'ex.mp', False),
- ('test.test.ex.mp', '*.ex.mp', False),
- ('test.test.ex.mp', '**.ex.mp', True),
- ('wrong.mp', 'ex.mp', False),
- ('wrong.mp', '*.ex.mp', False),
- ('really.wrong.mp', 'ex.mp', False),
- ('really.wrong.mp', '*.ex.mp', False),
- ('really.very.wrong.mp', '*.ex.mp', False),
- ('EX.mp', 'ex.mp', True), # Domains are case-insensitive.
- ('ex.mp', 'an.ex.mp', False),
- ('ex.mp', '*.an.ex.mp', False),
- ('an.ex.am.pl', 'an.*.am.pl', True),
- ('a.ex.am.pl', 'an.*.am.pl', False),
- ('ex.am.pl', 'an.*.am.pl', False),
- )
-
- def _check(t, c, v):
- eq_(v, bleach._domain_match(t, c))
-
- for t, c, v in tests:
- yield _check, t, c, v
-
-
-def test_double_star():
- assert bleach._domain_match('ex.mp', '**.ex.mp')
- try:
- bleach._domain_match('ex.mp', 'an.**.ex.mp')
- except bleach.ValidationError:
- pass
- else:
- assert False, '_domain_match should not accept an.**.ex.mp'
-
-
-def test_allow_subdomains():
- domains = ('ex.mp', '*.exa.mp', 'an.exam.pl', '*.my.examp.le')
- html = (
- ('<a href="http://an.ex.mp">bad</a>', 'bad'),
- ('<a href="http://exa.mp">good</a>', None),
- ('<a href="http://an.exa.mp">good</a>', None),
- ('<a href="http://an.exam.pl">good</a>', None),
- ('<a href="http://another.exam.pl">bad</a>', 'bad'),
- ('<a href="http://a.bad.examp.le">bad</a>', 'bad'),
- ('<a href="http://a.very.bad.examp.le">bad</a>', 'bad'),
- )
-
- def _check(html, text):
- output = bleach.delinkify(html, allow_domains=domains)
- eq_(html if text is None else text, output)
-
- for t, o in html:
- yield _check, t, o
diff --git a/bleach/tests/test_links.py b/bleach/tests/test_links.py
index 7caf006..ac593c4 100644
--- a/bleach/tests/test_links.py
+++ b/bleach/tests/test_links.py
@@ -3,11 +3,9 @@ import urllib
from html5lib.tokenizer import HTMLTokenizer
from nose.tools import eq_
-from bleach import linkify, url_re
+from bleach import linkify, url_re, DEFAULT_CALLBACKS as DC
-def filter_url(url):
- return u'http://bouncer/?u=%s' % urllib.quote_plus(url)
def test_url_re():
@@ -45,38 +43,139 @@ def test_trailing_slash():
def test_mangle_link():
+ """We can muck with the href attribute of the link."""
+ def filter_url(attrs, new=False):
+ attrs['href'] = (u'http://bouncer/?u=%s' %
+ urllib.quote_plus(attrs['href']))
+ return attrs
+
eq_('<a href="http://bouncer/?u=http%3A%2F%2Fexample.com" rel="nofollow">'
'http://example.com</a>',
- linkify('http://example.com', filter_url=filter_url))
+ linkify('http://example.com', DC + [filter_url]))
+
+
+def test_mangle_text():
+ """We can muck with the inner text of a link."""
+
+ def ft(attrs, new=False):
+ attrs['_text'] = 'bar'
+ return attrs
+
+ eq_('<a href="http://ex.mp">bar</a> <a href="http://ex.mp/foo">bar</a>',
+ linkify('http://ex.mp <a href="http://ex.mp/foo">foo</a>', [ft]))
def test_email_link():
- eq_('a james@example.com mailto',
- linkify('a james@example.com mailto'))
- eq_('a james@example.com.au mailto',
- linkify('a james@example.com.au mailto'))
- eq_('a <a href="mailto:james@example.com" rel="nofollow">'
- 'james@example.com</a> mailto',
- linkify('a james@example.com mailto', parse_email=True))
- eq_('aussie <a href="mailto:james@example.com.au" rel="nofollow">'
- 'james@example.com.au</a> mailto',
- linkify('aussie james@example.com.au mailto', parse_email=True))
- eq_('email to <a href="james@example.com" rel="nofollow">'
- 'james@example.com</a>',
- linkify('email to <a href="james@example.com">'
- 'james@example.com</a>', parse_email=True))
+ tests = (
+ ('a james@example.com mailto', False, 'a james@example.com mailto'),
+ ('a james@example.com.au mailto', False,
+ 'a james@example.com.au mailto'),
+ ('a <a href="mailto:james@example.com">james@example.com</a> mailto',
+ True, 'a james@example.com mailto'),
+ ('aussie <a href="mailto:james@example.com.au">'
+ 'james@example.com.au</a> mailto', True,
+ 'aussie james@example.com.au mailto'),
+ # This is kind of a pathological case. I guess we do our best here.
+ ('email to <a href="james@example.com" rel="nofollow">'
+ 'james@example.com</a>', True,
+ 'email to <a href="james@example.com">james@example.com</a>'),
+ )
+
+ def _check(o, p, i):
+ eq_(o, linkify(i, parse_email=p))
+
+ for (o, p, i) in tests:
+ yield _check, o, p, i
def test_email_link_escaping():
- eq_('''<a href='mailto:"james"@example.com' rel="nofollow">'''
- '''"james"@example.com</a>''',
- linkify('"james"@example.com', parse_email=True))
- eq_('''<a href="mailto:&quot;j'ames&quot;@example.com" rel="nofollow">'''
- '''"j'ames"@example.com</a>''',
- linkify('"j\'ames"@example.com', parse_email=True))
- eq_('''<a href='mailto:"ja>mes"@example.com' rel="nofollow">'''
- '''"ja&gt;mes"@example.com</a>''',
- linkify('"ja>mes"@example.com', parse_email=True))
+ tests = (
+ ('''<a href='mailto:"james"@example.com'>'''
+ '''"james"@example.com</a>''',
+ '"james"@example.com'),
+ ('''<a href="mailto:&quot;j'ames&quot;@example.com">'''
+ '''"j'ames"@example.com</a>''',
+ '"j\'ames"@example.com'),
+ ('''<a href='mailto:"ja>mes"@example.com'>'''
+ '''"ja&gt;mes"@example.com</a>''',
+ '"ja>mes"@example.com'),
+ )
+
+ def _check(o, i):
+ eq_(o, linkify(i, parse_email=True))
+
+ for (o, i) in tests:
+ yield _check, o, i
+
+
+def test_prevent_links():
+ """Returning None from any callback should remove links or prevent them
+ from being created."""
+
+ def no_new_links(attrs, new=False):
+ if new:
+ return None
+ return attrs
+
+ def no_old_links(attrs, new=False):
+ if not new:
+ return None
+ return attrs
+
+ def noop(attrs, new=False):
+ return attrs
+
+ in_text = 'a ex.mp <a href="http://example.com">example</a>'
+ out_text = 'a <a href="http://ex.mp">ex.mp</a> example'
+ tests = (
+ ([noop], ('a <a href="http://ex.mp">ex.mp</a> '
+ '<a href="http://example.com">example</a>'), 'noop'),
+ ([no_new_links, noop], in_text, 'no new, noop'),
+ ([noop, no_new_links], in_text, 'noop, no new'),
+ ([no_old_links, noop], out_text, 'no old, noop'),
+ ([noop, no_old_links], out_text, 'noop, no old'),
+ ([no_old_links, no_new_links], 'a ex.mp example', 'no links'),
+ )
+
+ def _check(cb, o, msg):
+ eq_(o, linkify(in_text, cb), msg)
+
+ for (cb, o, msg) in tests:
+ yield _check, cb, o, msg
+
+
+def test_set_attrs():
+ """We can set random attributes on links."""
+
+ def set_attr(attrs, new=False):
+ attrs['rev'] = 'canonical'
+ return attrs
+
+ eq_('<a href="http://ex.mp" rev="canonical">ex.mp</a>',
+ linkify('ex.mp', [set_attr]))
+
+
+def test_only_proto_links():
+ """Only create links if there's a protocol."""
+ def only_proto(attrs, new=False):
+ if new and not attrs['_text'].startswith(('http:', 'https:')):
+ return None
+ return attrs
+
+ in_text = 'a ex.mp http://ex.mp <a href="/foo">bar</a>'
+ out_text = ('a ex.mp <a href="http://ex.mp">http://ex.mp</a> '
+ '<a href="/foo">bar</a>')
+ eq_(out_text, linkify(in_text, [only_proto]))
+
+
+def test_stop_email():
+ """Returning None should prevent a link from being created."""
+ def no_email(attrs, new=False):
+ if attrs['href'].startswith('mailto:'):
+ return None
+ return attrs
+ text = 'do not link james@example.com'
+ eq_(text, linkify(text, parse_email=True, callbacks=[no_email]))
def test_tlds():
@@ -98,7 +197,7 @@ def test_escaping():
def test_nofollow_off():
eq_('<a href="http://example.com">example.com</a>',
- linkify(u'example.com', nofollow=False))
+ linkify(u'example.com', []))
def test_link_in_html():
@@ -297,16 +396,45 @@ def test_ports():
yield check, test, output
-def test_target():
- eq_('<a href="http://example.com" rel="nofollow" '
- 'target="_blank">example.com</a>',
- linkify(u'example.com', target='_blank'))
- eq_('<a href="http://example.com" target="_blank">example.com</a>',
- linkify(u'example.com', target='_blank', nofollow=False))
-
-
def test_tokenizer():
"""Linkify doesn't always have to sanitize."""
raw = '<em>test<x></x></em>'
eq_('<em>test&lt;x&gt;&lt;/x&gt;</em>', linkify(raw))
eq_(raw, linkify(raw, tokenizer=HTMLTokenizer))
+
+
+def test_ignore_bad_protocols():
+ eq_('foohttp://bar',
+ linkify('foohttp://bar'))
+ eq_('foohttp://<a href="http://exampl.com" rel="nofollow">exampl.com</a>',
+ linkify('foohttp://exampl.com'))
+
+
+def test_max_recursion_depth():
+ """If we hit the max recursion depth, just return the string."""
+ test = '<em>' * 2000 + 'foo' + '</em>' * 2000
+ eq_(test, linkify(test))
+
+
+def test_link_emails_and_urls():
+ """parse_email=True shouldn't prevent URLs from getting linkified."""
+ output = ('<a href="http://example.com" rel="nofollow">'
+ 'http://example.com</a> <a href="mailto:person@example.com">'
+ 'person@example.com</a>')
+ eq_(output, linkify('http://example.com person@example.com',
+ parse_email=True))
+
+
+def test_links_case_insensitive():
+ """Protocols and domain names are case insensitive."""
+ expect = ('<a href="HTTP://EXAMPLE.COM" rel="nofollow">'
+ 'HTTP://EXAMPLE.COM</a>')
+ eq_(expect, linkify('HTTP://EXAMPLE.COM'))
+
+
+def test_elements_inside_links():
+ eq_(u'<a href="#" rel="nofollow">hello<br></a>',
+ linkify('<a href="#">hello<br></a>'))
+
+ eq_(u'<a href="#" rel="nofollow"><strong>bold</strong> hello<br></a>',
+ linkify('<a href="#"><strong>bold</strong> hello<br></a>'))
diff --git a/bleach/tests/test_security.py b/bleach/tests/test_security.py
index 9e9bb7b..6c2b33f 100644
--- a/bleach/tests/test_security.py
+++ b/bleach/tests/test_security.py
@@ -106,3 +106,7 @@ def test_poster_attribute():
eq_(expect, clean(test, tags=tags, attributes=attrs))
ok = '<video poster="/foo.png"></video>'
eq_(ok, clean(ok, tags=tags, attributes=attrs))
+
+
+def test_feed_protocol():
+ eq_('<a>foo</a>', clean('<a href="feed:file:///tmp/foo">foo</a>'))