diff options
author | Per Andersson <avtobiff@gmail.com> | 2014-01-30 01:25:15 +0100 |
---|---|---|
committer | Per Andersson <avtobiff@gmail.com> | 2014-01-30 01:25:15 +0100 |
commit | c084fbcb2e2f2f167c8d65ca1ecfee406f14c24b (patch) | |
tree | 6c5ca20a48eb50ea000748abb5374247d6de1888 /bleach/tests | |
parent | 973d326d7847161827a8f75b5f589008a83a342d (diff) | |
parent | 6cff86ce6de27fbd4f9fc07716fb1205b14ffae4 (diff) | |
download | python-bleach-c084fbcb2e2f2f167c8d65ca1ecfee406f14c24b.tar python-bleach-c084fbcb2e2f2f167c8d65ca1ecfee406f14c24b.tar.gz |
Merge tag 'upstream/1.4'
Upstream version 1.4
# gpg: Signature made Thu 30 Jan 2014 01:25:12 AM CET using RSA key ID 79F492F8
# gpg: Good signature from "Per Andersson <avtobiff@fripost.org>"
# gpg: aka "Per Andersson <avtobiff@debian.org>"
# gpg: aka "Per Andersson <avtobiff@gmail.com>"
# gpg: aka "Per Andersson <avtobiff@foo.nu>"
Diffstat (limited to 'bleach/tests')
-rw-r--r-- | bleach/tests/test_basics.py | 65 | ||||
-rw-r--r-- | bleach/tests/test_css.py | 13 | ||||
-rw-r--r-- | bleach/tests/test_links.py | 241 | ||||
-rw-r--r-- | bleach/tests/test_security.py | 12 | ||||
-rw-r--r-- | bleach/tests/test_unicode.py | 47 | ||||
-rw-r--r-- | bleach/tests/tools.py | 7 |
6 files changed, 245 insertions, 140 deletions
diff --git a/bleach/tests/test_basics.py b/bleach/tests/test_basics.py index 9eca687..822407f 100644 --- a/bleach/tests/test_basics.py +++ b/bleach/tests/test_basics.py @@ -1,7 +1,9 @@ +import six import html5lib from nose.tools import eq_ import bleach +from bleach.tests.tools import in_ def test_empty(): @@ -9,7 +11,12 @@ def test_empty(): def test_nbsp(): - eq_(u'\xa0test string\xa0', bleach.clean(' test string ')) + if six.PY3: + expected = '\xa0test string\xa0' + else: + expected = six.u('\\xa0test string\\xa0') + + eq_(expected, bleach.clean(' test string ')) def test_comments_only(): @@ -18,8 +25,8 @@ def test_comments_only(): eq_('', bleach.clean(comment)) eq_('', bleach.clean(open_comment)) eq_(comment, bleach.clean(comment, strip_comments=False)) - eq_('%s-->' % open_comment, bleach.clean(open_comment, - strip_comments=False)) + eq_('{0!s}-->'.format(open_comment), bleach.clean(open_comment, + strip_comments=False)) def test_with_comments(): @@ -55,9 +62,11 @@ def test_function_arguments(): def test_named_arguments(): ATTRS = {'a': ['rel', 'href']} - s = u'<a href="http://xx.com" rel="alternate">xx.com</a>' - eq_('<a href="http://xx.com">xx.com</a>', bleach.clean(s)) - eq_(s, bleach.clean(s, attributes=ATTRS)) + s = ('<a href="http://xx.com" rel="alternate">xx.com</a>', + '<a rel="alternate" href="http://xx.com">xx.com</a>') + + eq_('<a href="http://xx.com">xx.com</a>', bleach.clean(s[0])) + in_(s, bleach.clean(s[0], attributes=ATTRS)) def test_disallowed_html(): @@ -81,19 +90,19 @@ def test_bare_entities(): def test_escaped_entities(): - s = u'<em>strong</em>' + s = '<em>strong</em>' eq_(s, bleach.clean(s)) def test_serializer(): - s = u'<table></table>' + s = '<table></table>' eq_(s, bleach.clean(s, tags=['table'])) - eq_(u'test<table></table>', bleach.linkify(u'<table>test</table>')) - eq_(u'<p>test</p>', bleach.clean(u'<p>test</p>', tags=['p'])) + eq_('test<table></table>', bleach.linkify('<table>test</table>')) + eq_('<p>test</p>', bleach.clean('<p>test</p>', tags=['p'])) def test_no_href_links(): - s = u'<a name="anchor">x</a>' + s = '<a name="anchor">x</a>' eq_(s, bleach.linkify(s)) @@ -112,7 +121,7 @@ def test_stripping(): bleach.clean('a test <em>with</em> <b>html</b> tags', strip=True)) eq_('a test <em>with</em> <b>html</b> tags', bleach.clean('a test <em>with</em> <img src="http://example.com/"> ' - '<b>html</b> tags', strip=True)) + '<b>html</b> tags', strip=True)) s = '<p><a href="http://example.com/">link text</a></p>' eq_('<p>link text</p>', bleach.clean(s, tags=['p'], strip=True)) @@ -138,7 +147,7 @@ def test_allowed_styles(): def test_idempotent(): """Make sure that applying the filter twice doesn't change anything.""" - dirty = u'<span>invalid & </span> < extra http://link.com<em>' + dirty = '<span>invalid & </span> < extra http://link.com<em>' clean = bleach.clean(dirty) eq_(clean, bleach.clean(clean)) @@ -147,10 +156,23 @@ def test_idempotent(): eq_(linked, bleach.linkify(linked)) +def test_rel_already_there(): + """Make sure rel attribute is updated not replaced""" + linked = ('Click <a href="http://example.com" rel="tooltip">' + 'here</a>.') + link_good = (('Click <a href="http://example.com" rel="tooltip nofollow">' + 'here</a>.'), + ('Click <a rel="tooltip nofollow" href="http://example.com">' + 'here</a>.')) + + in_(link_good, bleach.linkify(linked)) + in_(link_good, bleach.linkify(link_good[0])) + + def test_lowercase_html(): """We should output lowercase HTML.""" - dirty = u'<EM CLASS="FOO">BAR</EM>' - clean = u'<em class="FOO">BAR</em>' + dirty = '<EM CLASS="FOO">BAR</EM>' + clean = '<em class="FOO">BAR</em>' eq_(clean, bleach.clean(dirty, attributes=['class'])) @@ -160,14 +182,15 @@ def test_wildcard_attributes(): 'img': ['src'], } TAG = ['img', 'em'] - dirty = (u'both <em id="foo" style="color: black">can</em> have ' - u'<img id="bar" src="foo"/>') - clean = u'both <em id="foo">can</em> have <img id="bar" src="foo">' - eq_(clean, bleach.clean(dirty, tags=TAG, attributes=ATTR)) + dirty = ('both <em id="foo" style="color: black">can</em> have ' + '<img id="bar" src="foo"/>') + clean = ('both <em id="foo">can</em> have <img src="foo" id="bar">', + 'both <em id="foo">can</em> have <img id="bar" src="foo">') + in_(clean, bleach.clean(dirty, tags=TAG, attributes=ATTR)) def test_sarcasm(): """Jokes should crash.<sarcasm/>""" - dirty = u'Yeah right <sarcasm/>' - clean = u'Yeah right <sarcasm/>' + dirty = 'Yeah right <sarcasm/>' + clean = 'Yeah right <sarcasm/>' eq_(clean, bleach.clean(dirty)) diff --git a/bleach/tests/test_css.py b/bleach/tests/test_css.py index 588c8ce..b40596f 100644 --- a/bleach/tests/test_css.py +++ b/bleach/tests/test_css.py @@ -29,14 +29,14 @@ def test_allowed_css(): ('font-family: "Arial";', 'font-family: "Arial";', ['font-family']), ) - p_single = '<p style="%s">bar</p>' - p_double = "<p style='%s'>bar</p>" + p_single = '<p style="{0!s}">bar</p>' + p_double = "<p style='{0!s}'>bar</p>" def check(i, o, s): if '"' in i: - eq_(p_double % o, clean(p_double % i, styles=s)) + eq_(p_double.format(o), clean(p_double.format(i), styles=s)) else: - eq_(p_single % o, clean(p_single % i, styles=s)) + eq_(p_single.format(o), clean(p_single.format(i), styles=s)) for i, o, s in tests: yield check, i, o, s @@ -70,12 +70,13 @@ def test_style_hang(): """font: normal normal normal 100%/normal 'Courier New', """ """'Andale Mono', monospace; background-position: initial """ """initial; background-repeat: initial initial;""") - html = '<p style="%s">Hello world</p>' % style + html = '<p style="{0!s}">Hello world</p>'.format(style) styles = [ 'border', 'float', 'overflow', 'min-height', 'vertical-align', 'white-space', 'margin', 'margin-left', 'margin-top', 'margin-bottom', 'margin-right', - 'padding', 'padding-left', 'padding-top', 'padding-bottom', 'padding-right', + 'padding', 'padding-left', 'padding-top', 'padding-bottom', + 'padding-right', 'background', 'background-color', 'font', 'font-size', 'font-weight', 'text-align', 'text-transform', diff --git a/bleach/tests/test_links.py b/bleach/tests/test_links.py index ac593c4..abf889d 100644 --- a/bleach/tests/test_links.py +++ b/bleach/tests/test_links.py @@ -1,18 +1,20 @@ -import urllib +try: + from urllib.parse import quote_plus +except ImportError: + from urllib import quote_plus from html5lib.tokenizer import HTMLTokenizer from nose.tools import eq_ from bleach import linkify, url_re, DEFAULT_CALLBACKS as DC - - +from bleach.tests.tools import in_ def test_url_re(): def no_match(s): match = url_re.search(s) if match: - assert not match, 'matched %s' % s[slice(*match.span())] + assert not match, 'matched {0!s}'.format(s[slice(*match.span())]) yield no_match, 'just what i am looking for...it' @@ -21,36 +23,48 @@ def test_empty(): def test_simple_link(): - eq_('a <a href="http://example.com" rel="nofollow">http://example.com' + in_(('a <a href="http://example.com" rel="nofollow">http://example.com' '</a> link', + 'a <a rel="nofollow" href="http://example.com">http://example.com' + '</a> link'), linkify('a http://example.com link')) - eq_('a <a href="https://example.com" rel="nofollow">https://example.com' + in_(('a <a href="https://example.com" rel="nofollow">https://example.com' '</a> link', + 'a <a rel="nofollow" href="https://example.com">https://example.com' + '</a> link'), linkify('a https://example.com link')) - eq_('an <a href="http://example.com" rel="nofollow">example.com</a> link', - linkify('an example.com link')) + in_(('a <a href="http://example.com" rel="nofollow">example.com</a> link', + 'a <a rel="nofollow" href="http://example.com">example.com</a> link'), + linkify('a example.com link')) def test_trailing_slash(): - eq_('<a href="http://example.com/" rel="nofollow">http://example.com/</a>', - linkify('http://example.com/')) - eq_('<a href="http://example.com/foo/" rel="nofollow">' - 'http://example.com/foo/</a>', - linkify('http://example.com/foo/')) - eq_('<a href="http://example.com/foo/bar/" rel="nofollow">' - 'http://example.com/foo/bar/</a>', - linkify('http://example.com/foo/bar/')) + in_(('<a href="http://examp.com/" rel="nofollow">http://examp.com/</a>', + '<a rel="nofollow" href="http://examp.com/">http://examp.com/</a>'), + linkify('http://examp.com/')) + in_(('<a href="http://example.com/foo/" rel="nofollow">' + 'http://example.com/foo/</a>', + '<a rel="nofollow" href="http://example.com/foo/">' + 'http://example.com/foo/</a>'), + linkify('http://example.com/foo/')) + in_(('<a href="http://example.com/foo/bar/" rel="nofollow">' + 'http://example.com/foo/bar/</a>', + '<a rel="nofollow" href="http://example.com/foo/bar/">' + 'http://example.com/foo/bar/</a>'), + linkify('http://example.com/foo/bar/')) def test_mangle_link(): """We can muck with the href attribute of the link.""" def filter_url(attrs, new=False): - attrs['href'] = (u'http://bouncer/?u=%s' % - urllib.quote_plus(attrs['href'])) + quoted = quote_plus(attrs['href']) + attrs['href'] = 'http://bouncer/?u={0!s}'.format(quoted) return attrs - eq_('<a href="http://bouncer/?u=http%3A%2F%2Fexample.com" rel="nofollow">' - 'http://example.com</a>', + in_(('<a href="http://bouncer/?u=http%3A%2F%2Fexample.com" rel="nofollow">' + 'http://example.com</a>', + '<a rel="nofollow" href="http://bouncer/?u=http%3A%2F%2Fexample.com">' + 'http://example.com</a>'), linkify('http://example.com', DC + [filter_url])) @@ -76,13 +90,19 @@ def test_email_link(): 'james@example.com.au</a> mailto', True, 'aussie james@example.com.au mailto'), # This is kind of a pathological case. I guess we do our best here. - ('email to <a href="james@example.com" rel="nofollow">' - 'james@example.com</a>', True, - 'email to <a href="james@example.com">james@example.com</a>'), + (('email to <a href="james@example.com" rel="nofollow">' + 'james@example.com</a>', + 'email to <a rel="nofollow" href="james@example.com">' + 'james@example.com</a>'), + True, + 'email to <a href="james@example.com">james@example.com</a>'), ) def _check(o, p, i): - eq_(o, linkify(i, parse_email=p)) + if isinstance(o, (list, tuple)): + in_(o, linkify(i, parse_email=p)) + else: + eq_(o, linkify(i, parse_email=p)) for (o, p, i) in tests: yield _check, o, p, i @@ -151,7 +171,8 @@ def test_set_attrs(): attrs['rev'] = 'canonical' return attrs - eq_('<a href="http://ex.mp" rev="canonical">ex.mp</a>', + in_(('<a href="http://ex.mp" rev="canonical">ex.mp</a>', + '<a rev="canonical" href="http://ex.mp">ex.mp</a>'), linkify('ex.mp', [set_attr])) @@ -179,15 +200,19 @@ def test_stop_email(): def test_tlds(): - eq_('<a href="http://example.com" rel="nofollow">example.com</a>', + in_(('<a href="http://example.com" rel="nofollow">example.com</a>', + '<a rel="nofollow" href="http://example.com">example.com</a>'), linkify('example.com')) - eq_('<a href="http://example.co.uk" rel="nofollow">example.co.uk</a>', + in_(('<a href="http://example.co.uk" rel="nofollow">example.co.uk</a>', + '<a rel="nofollow" href="http://example.co.uk">example.co.uk</a>'), linkify('example.co.uk')) - eq_('<a href="http://example.edu" rel="nofollow">example.edu</a>', + in_(('<a href="http://example.edu" rel="nofollow">example.edu</a>', + '<a rel="nofollow" href="http://example.edu">example.edu</a>'), linkify('example.edu')) eq_('example.xxx', linkify('example.xxx')) eq_(' brie', linkify(' brie')) - eq_('<a href="http://bit.ly/fun" rel="nofollow">bit.ly/fun</a>', + in_(('<a href="http://bit.ly/fun" rel="nofollow">bit.ly/fun</a>', + '<a rel="nofollow" href="http://bit.ly/fun">bit.ly/fun</a>'), linkify('bit.ly/fun')) @@ -197,61 +222,81 @@ def test_escaping(): def test_nofollow_off(): eq_('<a href="http://example.com">example.com</a>', - linkify(u'example.com', [])) + linkify('example.com', [])) def test_link_in_html(): - eq_('<i><a href="http://yy.com" rel="nofollow">http://yy.com</a></i>', + in_(('<i><a href="http://yy.com" rel="nofollow">http://yy.com</a></i>', + '<i><a rel="nofollow" href="http://yy.com">http://yy.com</a></i>'), linkify('<i>http://yy.com</i>')) - eq_('<em><strong><a href="http://xx.com" rel="nofollow">http://xx.com</a>' - '</strong></em>', + + in_(('<em><strong><a href="http://xx.com" rel="nofollow">http://xx.com' + '</a></strong></em>', + '<em><strong><a rel="nofollow" href="http://xx.com">http://xx.com' + '</a></strong></em>'), linkify('<em><strong>http://xx.com</strong></em>')) def test_links_https(): - eq_('<a href="https://yy.com" rel="nofollow">https://yy.com</a>', + in_(('<a href="https://yy.com" rel="nofollow">https://yy.com</a>', + '<a rel="nofollow" href="https://yy.com">https://yy.com</a>'), linkify('https://yy.com')) def test_add_rel_nofollow(): """Verify that rel="nofollow" is added to an existing link""" - eq_('<a href="http://yy.com" rel="nofollow">http://yy.com</a>', + in_(('<a href="http://yy.com" rel="nofollow">http://yy.com</a>', + '<a rel="nofollow" href="http://yy.com">http://yy.com</a>'), linkify('<a href="http://yy.com">http://yy.com</a>')) def test_url_with_path(): - eq_('<a href="http://example.com/path/to/file" rel="nofollow">' - 'http://example.com/path/to/file</a>', + in_(('<a href="http://example.com/path/to/file" rel="nofollow">' + 'http://example.com/path/to/file</a>', + '<a rel="nofollow" href="http://example.com/path/to/file">' + 'http://example.com/path/to/file</a>'), linkify('http://example.com/path/to/file')) def test_link_ftp(): - eq_('<a href="ftp://ftp.mozilla.org/some/file" rel="nofollow">' - 'ftp://ftp.mozilla.org/some/file</a>', + in_(('<a href="ftp://ftp.mozilla.org/some/file" rel="nofollow">' + 'ftp://ftp.mozilla.org/some/file</a>', + '<a rel="nofollow" href="ftp://ftp.mozilla.org/some/file">' + 'ftp://ftp.mozilla.org/some/file</a>'), linkify('ftp://ftp.mozilla.org/some/file')) def test_link_query(): - eq_('<a href="http://xx.com/?test=win" rel="nofollow">' + in_(('<a href="http://xx.com/?test=win" rel="nofollow">' 'http://xx.com/?test=win</a>', + '<a rel="nofollow" href="http://xx.com/?test=win">' + 'http://xx.com/?test=win</a>'), linkify('http://xx.com/?test=win')) - eq_('<a href="http://xx.com/?test=win" rel="nofollow">' + in_(('<a href="http://xx.com/?test=win" rel="nofollow">' 'xx.com/?test=win</a>', + '<a rel="nofollow" href="http://xx.com/?test=win">' + 'xx.com/?test=win</a>'), linkify('xx.com/?test=win')) - eq_('<a href="http://xx.com?test=win" rel="nofollow">' + in_(('<a href="http://xx.com?test=win" rel="nofollow">' 'xx.com?test=win</a>', + '<a rel="nofollow" href="http://xx.com?test=win">' + 'xx.com?test=win</a>'), linkify('xx.com?test=win')) def test_link_fragment(): - eq_('<a href="http://xx.com/path#frag" rel="nofollow">' - 'http://xx.com/path#frag</a>', + in_(('<a href="http://xx.com/path#frag" rel="nofollow">' + 'http://xx.com/path#frag</a>', + '<a rel="nofollow" href="http://xx.com/path#frag">' + 'http://xx.com/path#frag</a>'), linkify('http://xx.com/path#frag')) def test_link_entities(): - eq_('<a href="http://xx.com/?a=1&b=2" rel="nofollow">' + in_(('<a href="http://xx.com/?a=1&b=2" rel="nofollow">' 'http://xx.com/?a=1&b=2</a>', + '<a rel="nofollow" href="http://xx.com/?a=1&b=2">' + 'http://xx.com/?a=1&b=2</a>'), linkify('http://xx.com/?a=1&b=2')) @@ -262,9 +307,12 @@ def test_escaped_html(): def test_link_http_complete(): - eq_('<a href="https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d' + in_(('<a href="https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d' '&e#f" rel="nofollow">' 'https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f</a>', + '<a rel="nofollow" href="https://user:pass@ftp.mozilla.org/x/' + 'y.exe?a=b&c=d&e#f">' + 'https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f</a>'), linkify('https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f')) @@ -282,8 +330,10 @@ def test_javascript_url(): def test_unsafe_url(): """Any unsafe char ({}[]<>, etc.) in the path should end URL scanning.""" - eq_('All your{"<a href="http://xx.yy.com/grover.png" ' - 'rel="nofollow">xx.yy.com/grover.png</a>"}base are', + in_(('All your{"<a href="http://xx.yy.com/grover.png" ' + 'rel="nofollow">xx.yy.com/grover.png</a>"}base are', + 'All your{"<a rel="nofollow" href="http://xx.yy.com/grover.png"' + '>xx.yy.com/grover.png</a>"}base are'), linkify('All your{"xx.yy.com/grover.png"}base are')) @@ -291,17 +341,23 @@ def test_skip_pre(): """Skip linkification in <pre> tags.""" simple = 'http://xx.com <pre>http://xx.com</pre>' linked = ('<a href="http://xx.com" rel="nofollow">http://xx.com</a> ' + '<pre>http://xx.com</pre>', + '<a rel="nofollow" href="http://xx.com">http://xx.com</a> ' '<pre>http://xx.com</pre>') all_linked = ('<a href="http://xx.com" rel="nofollow">http://xx.com</a> ' '<pre><a href="http://xx.com" rel="nofollow">http://xx.com' + '</a></pre>', + '<a rel="nofollow" href="http://xx.com">http://xx.com</a> ' + '<pre><a rel="nofollow" href="http://xx.com">http://xx.com' '</a></pre>') - eq_(linked, linkify(simple, skip_pre=True)) - eq_(all_linked, linkify(simple)) + in_(linked, linkify(simple, skip_pre=True)) + in_(all_linked, linkify(simple)) already_linked = '<pre><a href="http://xx.com">xx</a></pre>' - nofollowed = '<pre><a href="http://xx.com" rel="nofollow">xx</a></pre>' - eq_(nofollowed, linkify(already_linked)) - eq_(nofollowed, linkify(already_linked, skip_pre=True)) + nofollowed = ('<pre><a href="http://xx.com" rel="nofollow">xx</a></pre>', + '<pre><a rel="nofollow" href="http://xx.com">xx</a></pre>') + in_(nofollowed, linkify(already_linked)) + in_(nofollowed, linkify(already_linked, skip_pre=True)) def test_libgl(): @@ -311,11 +367,13 @@ def test_libgl(): def test_end_of_sentence(): """example.com. should match.""" - out = u'<a href="http://%s" rel="nofollow">%s</a>%s' - in_ = u'%s%s' + outs = ('<a href="http://{0!s}" rel="nofollow">{0!s}</a>{1!s}', + '<a rel="nofollow" href="http://{0!s}">{0!s}</a>{1!s}') + intxt = '{0!s}{1!s}' def check(u, p): - eq_(out % (u, u, p), linkify(in_ % (u, p))) + in_([out.format(u, p) for out in outs], + linkify(intxt.format(u, p))) tests = ( ('example.com', '.'), @@ -330,49 +388,50 @@ def test_end_of_sentence(): def test_end_of_clause(): """example.com/foo, shouldn't include the ,""" - eq_('<a href="http://ex.com/foo" rel="nofollow">ex.com/foo</a>, bar', + in_(('<a href="http://ex.com/foo" rel="nofollow">ex.com/foo</a>, bar', + '<a rel="nofollow" href="http://ex.com/foo">ex.com/foo</a>, bar'), linkify('ex.com/foo, bar')) def test_sarcasm(): """Jokes should crash.<sarcasm/>""" - dirty = u'Yeah right <sarcasm/>' - clean = u'Yeah right <sarcasm/>' + dirty = 'Yeah right <sarcasm/>' + clean = 'Yeah right <sarcasm/>' eq_(clean, linkify(dirty)) def test_wrapping_parentheses(): """URLs wrapped in parantheses should not include them.""" - out = u'%s<a href="http://%s" rel="nofollow">%s</a>%s' + outs = ('{0!s}<a href="http://{1!s}" rel="nofollow">{2!s}</a>{3!s}', + '{0!s}<a rel="nofollow" href="http://{1!s}">{2!s}</a>{3!s}') tests = ( - ('(example.com)', out % ('(', 'example.com', 'example.com', ')')), - ('(example.com/)', out % ('(', 'example.com/', 'example.com/', ')')), - ('(example.com/foo)', out % ('(', 'example.com/foo', - 'example.com/foo', ')')), - ('(((example.com/))))', out % ('(((', 'example.com/)', - 'example.com/)', ')))')), - ('example.com/))', out % ('', 'example.com/))', - 'example.com/))', '')), + ('(example.com)', ('(', 'example.com', 'example.com', ')')), + ('(example.com/)', ('(', 'example.com/', 'example.com/', ')')), + ('(example.com/foo)', ('(', 'example.com/foo', + 'example.com/foo', ')')), + ('(((example.com/))))', ('(((', 'example.com/)', + 'example.com/)', ')))')), + ('example.com/))', ('', 'example.com/))', 'example.com/))', '')), ('http://en.wikipedia.org/wiki/Test_(assessment)', - out % ('', 'en.wikipedia.org/wiki/Test_(assessment)', - 'http://en.wikipedia.org/wiki/Test_(assessment)', '')), + ('', 'en.wikipedia.org/wiki/Test_(assessment)', + 'http://en.wikipedia.org/wiki/Test_(assessment)', '')), ('(http://en.wikipedia.org/wiki/Test_(assessment))', - out % ('(', 'en.wikipedia.org/wiki/Test_(assessment)', - 'http://en.wikipedia.org/wiki/Test_(assessment)', ')')), + ('(', 'en.wikipedia.org/wiki/Test_(assessment)', + 'http://en.wikipedia.org/wiki/Test_(assessment)', ')')), ('((http://en.wikipedia.org/wiki/Test_(assessment))', - out % ('((', 'en.wikipedia.org/wiki/Test_(assessment', - 'http://en.wikipedia.org/wiki/Test_(assessment', '))')), + ('((', 'en.wikipedia.org/wiki/Test_(assessment', + 'http://en.wikipedia.org/wiki/Test_(assessment', '))')), ('(http://en.wikipedia.org/wiki/Test_(assessment)))', - out % ('(', 'en.wikipedia.org/wiki/Test_(assessment))', - 'http://en.wikipedia.org/wiki/Test_(assessment))', ')')), + ('(', 'en.wikipedia.org/wiki/Test_(assessment))', + 'http://en.wikipedia.org/wiki/Test_(assessment))', ')')), ('(http://en.wikipedia.org/wiki/)Test_(assessment', - out % ('(', 'en.wikipedia.org/wiki/)Test_(assessment', - 'http://en.wikipedia.org/wiki/)Test_(assessment', '')), + ('(', 'en.wikipedia.org/wiki/)Test_(assessment', + 'http://en.wikipedia.org/wiki/)Test_(assessment', '')), ) def check(test, expected_output): - eq_(expected_output, linkify(test)) + in_([o.format(*expected_output) for o in outs], linkify(test)) for test, expected_output in tests: yield check, test, expected_output @@ -389,7 +448,9 @@ def test_ports(): ) def check(test, output): - eq_(u'<a href="{0}" rel="nofollow">{0}</a>{1}'.format(*output), + outs = ('<a href="{0}" rel="nofollow">{0}</a>{1}', + '<a rel="nofollow" href="{0}">{0}</a>{1}') + in_([out.format(*output) for out in outs], linkify(test)) for test, output in tests: @@ -406,8 +467,9 @@ def test_tokenizer(): def test_ignore_bad_protocols(): eq_('foohttp://bar', linkify('foohttp://bar')) - eq_('foohttp://<a href="http://exampl.com" rel="nofollow">exampl.com</a>', - linkify('foohttp://exampl.com')) + in_(('fohttp://<a href="http://exampl.com" rel="nofollow">exampl.com</a>', + 'fohttp://<a rel="nofollow" href="http://exampl.com">exampl.com</a>'), + linkify('fohttp://exampl.com')) def test_max_recursion_depth(): @@ -420,21 +482,28 @@ def test_link_emails_and_urls(): """parse_email=True shouldn't prevent URLs from getting linkified.""" output = ('<a href="http://example.com" rel="nofollow">' 'http://example.com</a> <a href="mailto:person@example.com">' + 'person@example.com</a>', + '<a rel="nofollow" href="http://example.com">' + 'http://example.com</a> <a href="mailto:person@example.com">' 'person@example.com</a>') - eq_(output, linkify('http://example.com person@example.com', + in_(output, linkify('http://example.com person@example.com', parse_email=True)) def test_links_case_insensitive(): """Protocols and domain names are case insensitive.""" expect = ('<a href="HTTP://EXAMPLE.COM" rel="nofollow">' + 'HTTP://EXAMPLE.COM</a>', + '<a rel="nofollow" href="HTTP://EXAMPLE.COM">' 'HTTP://EXAMPLE.COM</a>') - eq_(expect, linkify('HTTP://EXAMPLE.COM')) + in_(expect, linkify('HTTP://EXAMPLE.COM')) def test_elements_inside_links(): - eq_(u'<a href="#" rel="nofollow">hello<br></a>', + in_(('<a href="#" rel="nofollow">hello<br></a>', + '<a rel="nofollow" href="#">hello<br></a>'), linkify('<a href="#">hello<br></a>')) - eq_(u'<a href="#" rel="nofollow"><strong>bold</strong> hello<br></a>', + in_(('<a href="#" rel="nofollow"><strong>bold</strong> hello<br></a>', + '<a rel="nofollow" href="#"><strong>bold</strong> hello<br></a>'), linkify('<a href="#"><strong>bold</strong> hello<br></a>')) diff --git a/bleach/tests/test_security.py b/bleach/tests/test_security.py index 6c2b33f..6adab59 100644 --- a/bleach/tests/test_security.py +++ b/bleach/tests/test_security.py @@ -25,10 +25,10 @@ def test_invalid_attr(): clean('<a onclick="evil" href="test">test</a>')) eq_('<img src="test">', clean('<img onclick="evil" src="test" />', - tags=IMG, attributes=IMG_ATTR)) + tags=IMG, attributes=IMG_ATTR)) eq_('<img src="test">', clean('<img href="invalid" src="test" />', - tags=IMG, attributes=IMG_ATTR)) + tags=IMG, attributes=IMG_ATTR)) def test_unquoted_attr(): @@ -57,7 +57,7 @@ def test_invalid_filter_attr(): eq_('<img src="http://example.com/">', clean('<img onclick="evil" src="http://example.com/" />', - tags=IMG, attributes=IMG_ATTR)) + tags=IMG, attributes=IMG_ATTR)) eq_('<img>', clean('<img onclick="evil" src="http://badhost.com/" />', tags=IMG, attributes=IMG_ATTR)) @@ -91,9 +91,9 @@ def test_nasty(): """Nested, broken up, multiple tags, are still foiled!""" test = ('<scr<script></script>ipt type="text/javascript">alert("foo");</' '<script></script>script<del></del>>') - expect = (u'<scr<script></script>ipt type="text/javascript"' - u'>alert("foo");</script>script<del></del>' - u'>') + expect = ('<scr<script></script>ipt type="text/javascript"' + '>alert("foo");</script>script<del></del>' + '>') eq_(expect, clean(test)) diff --git a/bleach/tests/test_unicode.py b/bleach/tests/test_unicode.py index 67123cc..796924d 100644 --- a/bleach/tests/test_unicode.py +++ b/bleach/tests/test_unicode.py @@ -1,54 +1,59 @@ # -*- coding: utf-8 -*- - +from __future__ import unicode_literals from nose.tools import eq_ from bleach import clean, linkify +from bleach.tests.tools import in_ def test_japanese_safe_simple(): - eq_(u'ヘルプとチュートリアル', clean(u'ヘルプとチュートリアル')) - eq_(u'ヘルプとチュートリアル', linkify(u'ヘルプとチュートリアル')) + eq_('ヘルプとチュートリアル', clean('ヘルプとチュートリアル')) + eq_('ヘルプとチュートリアル', linkify('ヘルプとチュートリアル')) def test_japanese_strip(): - eq_(u'<em>ヘルプとチュートリアル</em>', - clean(u'<em>ヘルプとチュートリアル</em>')) - eq_(u'<span>ヘルプとチュートリアル</span>', - clean(u'<span>ヘルプとチュートリアル</span>')) + eq_('<em>ヘルプとチュートリアル</em>', + clean('<em>ヘルプとチュートリアル</em>')) + eq_('<span>ヘルプとチュートリアル</span>', + clean('<span>ヘルプとチュートリアル</span>')) def test_russian_simple(): - eq_(u'Домашняя', clean(u'Домашняя')) - eq_(u'Домашняя', linkify(u'Домашняя')) + eq_('Домашняя', clean('Домашняя')) + eq_('Домашняя', linkify('Домашняя')) def test_mixed(): - eq_(u'Домашняяヘルプとチュートリアル', - clean(u'Домашняяヘルプとチュートリアル')) + eq_('Домашняяヘルプとチュートリアル', + clean('Домашняяヘルプとチュートリアル')) def test_mixed_linkify(): - eq_(u'Домашняя <a href="http://example.com" rel="nofollow">' - u'http://example.com</a> ヘルプとチュートリアル', - linkify(u'Домашняя http://example.com ヘルプとチュートリアル')) + in_(('Домашняя <a href="http://example.com" rel="nofollow">' + 'http://example.com</a> ヘルプとチュートリアル', + 'Домашняя <a rel="nofollow" href="http://example.com">' + 'http://example.com</a> ヘルプとチュートリアル'), + linkify('Домашняя http://example.com ヘルプとチュートリアル')) def test_url_utf8(): """Allow UTF8 characters in URLs themselves.""" - out = u'<a href="%(url)s" rel="nofollow">%(url)s</a>' + outs = ('<a href="{0!s}" rel="nofollow">{0!s}</a>', + '<a rel="nofollow" href="{0!s}">{0!s}</a>') + + out = lambda url: [x.format(url) for x in outs] tests = ( - ('http://éxámplé.com/', out % {'url': u'http://éxámplé.com/'}), - ('http://éxámplé.com/íàñá/', - out % {'url': u'http://éxámplé.com/íàñá/'}), + ('http://éxámplé.com/', out('http://éxámplé.com/')), + ('http://éxámplé.com/íàñá/', out('http://éxámplé.com/íàñá/')), ('http://éxámplé.com/íàñá/?foo=bar', - out % {'url': u'http://éxámplé.com/íàñá/?foo=bar'}), + out('http://éxámplé.com/íàñá/?foo=bar')), ('http://éxámplé.com/íàñá/?fóo=bár', - out % {'url': u'http://éxámplé.com/íàñá/?fóo=bár'}), + out('http://éxámplé.com/íàñá/?fóo=bár')), ) def check(test, expected_output): - eq_(expected_output, linkify(test)) + in_(expected_output, linkify(test)) for test, expected_output in tests: yield check, test, expected_output diff --git a/bleach/tests/tools.py b/bleach/tests/tools.py new file mode 100644 index 0000000..87f926c --- /dev/null +++ b/bleach/tests/tools.py @@ -0,0 +1,7 @@ + + +def in_(l, a, msg=None): + """Shorthand for 'assert a in l, "%r not in %r" % (a, l) + """ + if not a in l: + raise AssertionError(msg or "%r not in %r" % (a, l)) |