From 6cff86ce6de27fbd4f9fc07716fb1205b14ffae4 Mon Sep 17 00:00:00 2001 From: Per Andersson Date: Thu, 30 Jan 2014 01:25:11 +0100 Subject: Imported Upstream version 1.4 --- bleach/tests/test_links.py | 241 +++++++++++++++++++++++++++++---------------- 1 file changed, 155 insertions(+), 86 deletions(-) (limited to 'bleach/tests/test_links.py') diff --git a/bleach/tests/test_links.py b/bleach/tests/test_links.py index ac593c4..abf889d 100644 --- a/bleach/tests/test_links.py +++ b/bleach/tests/test_links.py @@ -1,18 +1,20 @@ -import urllib +try: + from urllib.parse import quote_plus +except ImportError: + from urllib import quote_plus from html5lib.tokenizer import HTMLTokenizer from nose.tools import eq_ from bleach import linkify, url_re, DEFAULT_CALLBACKS as DC - - +from bleach.tests.tools import in_ def test_url_re(): def no_match(s): match = url_re.search(s) if match: - assert not match, 'matched %s' % s[slice(*match.span())] + assert not match, 'matched {0!s}'.format(s[slice(*match.span())]) yield no_match, 'just what i am looking for...it' @@ -21,36 +23,48 @@ def test_empty(): def test_simple_link(): - eq_('a http://example.com' + in_(('a http://example.com' ' link', + 'a http://example.com' + ' link'), linkify('a http://example.com link')) - eq_('a https://example.com' + in_(('a https://example.com' ' link', + 'a https://example.com' + ' link'), linkify('a https://example.com link')) - eq_('an example.com link', - linkify('an example.com link')) + in_(('a example.com link', + 'a example.com link'), + linkify('a example.com link')) def test_trailing_slash(): - eq_('http://example.com/', - linkify('http://example.com/')) - eq_('' - 'http://example.com/foo/', - linkify('http://example.com/foo/')) - eq_('' - 'http://example.com/foo/bar/', - linkify('http://example.com/foo/bar/')) + in_(('http://examp.com/', + 'http://examp.com/'), + linkify('http://examp.com/')) + in_(('' + 'http://example.com/foo/', + '' + 'http://example.com/foo/'), + linkify('http://example.com/foo/')) + in_(('' + 'http://example.com/foo/bar/', + '' + 'http://example.com/foo/bar/'), + linkify('http://example.com/foo/bar/')) def test_mangle_link(): """We can muck with the href attribute of the link.""" def filter_url(attrs, new=False): - attrs['href'] = (u'http://bouncer/?u=%s' % - urllib.quote_plus(attrs['href'])) + quoted = quote_plus(attrs['href']) + attrs['href'] = 'http://bouncer/?u={0!s}'.format(quoted) return attrs - eq_('' - 'http://example.com', + in_(('' + 'http://example.com', + '' + 'http://example.com'), linkify('http://example.com', DC + [filter_url])) @@ -76,13 +90,19 @@ def test_email_link(): 'james@example.com.au mailto', True, 'aussie james@example.com.au mailto'), # This is kind of a pathological case. I guess we do our best here. - ('email to ' - 'james@example.com', True, - 'email to james@example.com'), + (('email to ' + 'james@example.com', + 'email to ' + 'james@example.com'), + True, + 'email to james@example.com'), ) def _check(o, p, i): - eq_(o, linkify(i, parse_email=p)) + if isinstance(o, (list, tuple)): + in_(o, linkify(i, parse_email=p)) + else: + eq_(o, linkify(i, parse_email=p)) for (o, p, i) in tests: yield _check, o, p, i @@ -151,7 +171,8 @@ def test_set_attrs(): attrs['rev'] = 'canonical' return attrs - eq_('ex.mp', + in_(('ex.mp', + 'ex.mp'), linkify('ex.mp', [set_attr])) @@ -179,15 +200,19 @@ def test_stop_email(): def test_tlds(): - eq_('example.com', + in_(('example.com', + 'example.com'), linkify('example.com')) - eq_('example.co.uk', + in_(('example.co.uk', + 'example.co.uk'), linkify('example.co.uk')) - eq_('example.edu', + in_(('example.edu', + 'example.edu'), linkify('example.edu')) eq_('example.xxx', linkify('example.xxx')) eq_(' brie', linkify(' brie')) - eq_('bit.ly/fun', + in_(('bit.ly/fun', + 'bit.ly/fun'), linkify('bit.ly/fun')) @@ -197,61 +222,81 @@ def test_escaping(): def test_nofollow_off(): eq_('example.com', - linkify(u'example.com', [])) + linkify('example.com', [])) def test_link_in_html(): - eq_('http://yy.com', + in_(('http://yy.com', + 'http://yy.com'), linkify('http://yy.com')) - eq_('http://xx.com' - '', + + in_(('http://xx.com' + '', + 'http://xx.com' + ''), linkify('http://xx.com')) def test_links_https(): - eq_('https://yy.com', + in_(('https://yy.com', + 'https://yy.com'), linkify('https://yy.com')) def test_add_rel_nofollow(): """Verify that rel="nofollow" is added to an existing link""" - eq_('http://yy.com', + in_(('http://yy.com', + 'http://yy.com'), linkify('http://yy.com')) def test_url_with_path(): - eq_('' - 'http://example.com/path/to/file', + in_(('' + 'http://example.com/path/to/file', + '' + 'http://example.com/path/to/file'), linkify('http://example.com/path/to/file')) def test_link_ftp(): - eq_('' - 'ftp://ftp.mozilla.org/some/file', + in_(('' + 'ftp://ftp.mozilla.org/some/file', + '' + 'ftp://ftp.mozilla.org/some/file'), linkify('ftp://ftp.mozilla.org/some/file')) def test_link_query(): - eq_('' + in_(('' 'http://xx.com/?test=win', + '' + 'http://xx.com/?test=win'), linkify('http://xx.com/?test=win')) - eq_('' + in_(('' 'xx.com/?test=win', + '' + 'xx.com/?test=win'), linkify('xx.com/?test=win')) - eq_('' + in_(('' 'xx.com?test=win', + '' + 'xx.com?test=win'), linkify('xx.com?test=win')) def test_link_fragment(): - eq_('' - 'http://xx.com/path#frag', + in_(('' + 'http://xx.com/path#frag', + '' + 'http://xx.com/path#frag'), linkify('http://xx.com/path#frag')) def test_link_entities(): - eq_('' + in_(('' 'http://xx.com/?a=1&b=2', + '' + 'http://xx.com/?a=1&b=2'), linkify('http://xx.com/?a=1&b=2')) @@ -262,9 +307,12 @@ def test_escaped_html(): def test_link_http_complete(): - eq_('' 'https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f', + '' + 'https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f'), linkify('https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f')) @@ -282,8 +330,10 @@ def test_javascript_url(): def test_unsafe_url(): """Any unsafe char ({}[]<>, etc.) in the path should end URL scanning.""" - eq_('All your{"xx.yy.com/grover.png"}base are', + in_(('All your{"xx.yy.com/grover.png"}base are', + 'All your{"xx.yy.com/grover.png"}base are'), linkify('All your{"xx.yy.com/grover.png"}base are')) @@ -291,17 +341,23 @@ def test_skip_pre(): """Skip linkification in
 tags."""
     simple = 'http://xx.com 
http://xx.com
' linked = ('http://xx.com ' + '
http://xx.com
', + 'http://xx.com ' '
http://xx.com
') all_linked = ('http://xx.com ' '
http://xx.com'
+                  '
', + 'http://xx.com ' + '
http://xx.com'
                   '
') - eq_(linked, linkify(simple, skip_pre=True)) - eq_(all_linked, linkify(simple)) + in_(linked, linkify(simple, skip_pre=True)) + in_(all_linked, linkify(simple)) already_linked = '
xx
' - nofollowed = '
xx
' - eq_(nofollowed, linkify(already_linked)) - eq_(nofollowed, linkify(already_linked, skip_pre=True)) + nofollowed = ('
xx
', + '
xx
') + in_(nofollowed, linkify(already_linked)) + in_(nofollowed, linkify(already_linked, skip_pre=True)) def test_libgl(): @@ -311,11 +367,13 @@ def test_libgl(): def test_end_of_sentence(): """example.com. should match.""" - out = u'%s%s' - in_ = u'%s%s' + outs = ('{0!s}{1!s}', + '{0!s}{1!s}') + intxt = '{0!s}{1!s}' def check(u, p): - eq_(out % (u, u, p), linkify(in_ % (u, p))) + in_([out.format(u, p) for out in outs], + linkify(intxt.format(u, p))) tests = ( ('example.com', '.'), @@ -330,49 +388,50 @@ def test_end_of_sentence(): def test_end_of_clause(): """example.com/foo, shouldn't include the ,""" - eq_('ex.com/foo, bar', + in_(('ex.com/foo, bar', + 'ex.com/foo, bar'), linkify('ex.com/foo, bar')) def test_sarcasm(): """Jokes should crash.""" - dirty = u'Yeah right ' - clean = u'Yeah right <sarcasm/>' + dirty = 'Yeah right ' + clean = 'Yeah right <sarcasm/>' eq_(clean, linkify(dirty)) def test_wrapping_parentheses(): """URLs wrapped in parantheses should not include them.""" - out = u'%s%s%s' + outs = ('{0!s}{2!s}{3!s}', + '{0!s}{2!s}{3!s}') tests = ( - ('(example.com)', out % ('(', 'example.com', 'example.com', ')')), - ('(example.com/)', out % ('(', 'example.com/', 'example.com/', ')')), - ('(example.com/foo)', out % ('(', 'example.com/foo', - 'example.com/foo', ')')), - ('(((example.com/))))', out % ('(((', 'example.com/)', - 'example.com/)', ')))')), - ('example.com/))', out % ('', 'example.com/))', - 'example.com/))', '')), + ('(example.com)', ('(', 'example.com', 'example.com', ')')), + ('(example.com/)', ('(', 'example.com/', 'example.com/', ')')), + ('(example.com/foo)', ('(', 'example.com/foo', + 'example.com/foo', ')')), + ('(((example.com/))))', ('(((', 'example.com/)', + 'example.com/)', ')))')), + ('example.com/))', ('', 'example.com/))', 'example.com/))', '')), ('http://en.wikipedia.org/wiki/Test_(assessment)', - out % ('', 'en.wikipedia.org/wiki/Test_(assessment)', - 'http://en.wikipedia.org/wiki/Test_(assessment)', '')), + ('', 'en.wikipedia.org/wiki/Test_(assessment)', + 'http://en.wikipedia.org/wiki/Test_(assessment)', '')), ('(http://en.wikipedia.org/wiki/Test_(assessment))', - out % ('(', 'en.wikipedia.org/wiki/Test_(assessment)', - 'http://en.wikipedia.org/wiki/Test_(assessment)', ')')), + ('(', 'en.wikipedia.org/wiki/Test_(assessment)', + 'http://en.wikipedia.org/wiki/Test_(assessment)', ')')), ('((http://en.wikipedia.org/wiki/Test_(assessment))', - out % ('((', 'en.wikipedia.org/wiki/Test_(assessment', - 'http://en.wikipedia.org/wiki/Test_(assessment', '))')), + ('((', 'en.wikipedia.org/wiki/Test_(assessment', + 'http://en.wikipedia.org/wiki/Test_(assessment', '))')), ('(http://en.wikipedia.org/wiki/Test_(assessment)))', - out % ('(', 'en.wikipedia.org/wiki/Test_(assessment))', - 'http://en.wikipedia.org/wiki/Test_(assessment))', ')')), + ('(', 'en.wikipedia.org/wiki/Test_(assessment))', + 'http://en.wikipedia.org/wiki/Test_(assessment))', ')')), ('(http://en.wikipedia.org/wiki/)Test_(assessment', - out % ('(', 'en.wikipedia.org/wiki/)Test_(assessment', - 'http://en.wikipedia.org/wiki/)Test_(assessment', '')), + ('(', 'en.wikipedia.org/wiki/)Test_(assessment', + 'http://en.wikipedia.org/wiki/)Test_(assessment', '')), ) def check(test, expected_output): - eq_(expected_output, linkify(test)) + in_([o.format(*expected_output) for o in outs], linkify(test)) for test, expected_output in tests: yield check, test, expected_output @@ -389,7 +448,9 @@ def test_ports(): ) def check(test, output): - eq_(u'{0}{1}'.format(*output), + outs = ('{0}{1}', + '{0}{1}') + in_([out.format(*output) for out in outs], linkify(test)) for test, output in tests: @@ -406,8 +467,9 @@ def test_tokenizer(): def test_ignore_bad_protocols(): eq_('foohttp://bar', linkify('foohttp://bar')) - eq_('foohttp://exampl.com', - linkify('foohttp://exampl.com')) + in_(('fohttp://exampl.com', + 'fohttp://exampl.com'), + linkify('fohttp://exampl.com')) def test_max_recursion_depth(): @@ -419,22 +481,29 @@ def test_max_recursion_depth(): def test_link_emails_and_urls(): """parse_email=True shouldn't prevent URLs from getting linkified.""" output = ('' + 'http://example.com ' + 'person@example.com', + '' 'http://example.com ' 'person@example.com') - eq_(output, linkify('http://example.com person@example.com', + in_(output, linkify('http://example.com person@example.com', parse_email=True)) def test_links_case_insensitive(): """Protocols and domain names are case insensitive.""" expect = ('' + 'HTTP://EXAMPLE.COM', + '' 'HTTP://EXAMPLE.COM') - eq_(expect, linkify('HTTP://EXAMPLE.COM')) + in_(expect, linkify('HTTP://EXAMPLE.COM')) def test_elements_inside_links(): - eq_(u'hello
', + in_(('hello
', + 'hello
'), linkify('hello
')) - eq_(u'bold hello
', + in_(('bold hello
', + 'bold hello
'), linkify('bold hello
')) -- cgit v1.2.3