From 11b8160e584470439c8c0b3ab51012c9300f6788 Mon Sep 17 00:00:00 2001 From: Christopher Baines Date: Sun, 6 Dec 2015 13:31:51 +0000 Subject: Imported Upstream version 1.4.2 --- bleach/__init__.py | 36 ++++---- bleach/callbacks.py | 2 +- bleach/sanitizer.py | 2 +- bleach/tests/test_links.py | 193 +++++++++++++++++-------------------------- bleach/tests/test_unicode.py | 6 +- bleach/tests/tools.py | 2 +- 6 files changed, 101 insertions(+), 140 deletions(-) (limited to 'bleach') diff --git a/bleach/__init__.py b/bleach/__init__.py index b110972..1d8caa2 100644 --- a/bleach/__init__.py +++ b/bleach/__init__.py @@ -13,8 +13,8 @@ from .encoding import force_unicode from .sanitizer import BleachSanitizer -VERSION = (1, 4, 0) -__version__ = '1.4' +VERSION = (1, 4, 2) +__version__ = '.'.join([str(n) for n in VERSION]) __all__ = ['clean', 'linkify'] @@ -51,16 +51,17 @@ TLDS = """ac ad ae aero af ag ai al am an ao aq ar arpa as asia at au aw ax az im in info int io iq ir is it je jm jo jobs jp ke kg kh ki km kn kp kr kw ky kz la lb lc li lk lr ls lt lu lv ly ma mc md me mg mh mil mk ml mm mn mo mobi mp mq mr ms mt mu museum mv mw mx my mz na name nc ne - net nf ng ni nl no np nr nu nz om org pa pe pf pg ph pk pl pm pn pr pro - ps pt pw py qa re ro rs ru rw sa sb sc sd se sg sh si sj sk sl sm sn so - sr st su sv sy sz tc td tel tf tg th tj tk tl tm tn to tp tr travel tt - tv tw tz ua ug uk us uy uz va vc ve vg vi vn vu wf ws xn ye yt yu za zm - zw""".split() - -PROTOCOLS = HTMLSanitizer.acceptable_protocols + net nf ng ni nl no np nr nu nz om org pa pe pf pg ph pk pl pm pn post + pr pro ps pt pw py qa re ro rs ru rw sa sb sc sd se sg sh si sj sk sl + sm sn so sr ss st su sv sx sy sz tc td tel tf tg th tj tk tl tm tn to + tp tr travel tt tv tw tz ua ug uk us uy uz va vc ve vg vi vn vu wf ws + xn xxx ye yt yu za zm zw""".split() +# Make sure that .com doesn't get matched by .co first TLDS.reverse() +PROTOCOLS = HTMLSanitizer.acceptable_protocols + url_re = re.compile( r"""\(* # Match any opening parentheses. \b(?http://example.com' + eq_('a http://example.com' ' link', - 'a http://example.com' - ' link'), linkify('a http://example.com link')) - in_(('a https://example.com' + eq_('a https://example.com' ' link', - 'a https://example.com' - ' link'), linkify('a https://example.com link')) - in_(('a example.com link', - 'a example.com link'), + eq_('a example.com link', linkify('a example.com link')) def test_trailing_slash(): - in_(('http://examp.com/', - 'http://examp.com/'), + eq_('http://examp.com/', linkify('http://examp.com/')) - in_(('' - 'http://example.com/foo/', - '' - 'http://example.com/foo/'), + eq_('' + 'http://example.com/foo/', linkify('http://example.com/foo/')) - in_(('' - 'http://example.com/foo/bar/', - '' - 'http://example.com/foo/bar/'), + eq_('' + 'http://example.com/foo/bar/', linkify('http://example.com/foo/bar/')) @@ -61,10 +50,8 @@ def test_mangle_link(): attrs['href'] = 'http://bouncer/?u={0!s}'.format(quoted) return attrs - in_(('' - 'http://example.com', - '' - 'http://example.com'), + eq_('' + 'http://example.com', linkify('http://example.com', DC + [filter_url])) @@ -90,19 +77,18 @@ def test_email_link(): 'james@example.com.au mailto', True, 'aussie james@example.com.au mailto'), # This is kind of a pathological case. I guess we do our best here. - (('email to ' - 'james@example.com', - 'email to ' - 'james@example.com'), + ('email to ' + 'james@example.com', True, 'email to james@example.com'), + ('
' + 'jinkyun@example.com', + True, + '
jinkyun@example.com'), ) def _check(o, p, i): - if isinstance(o, (list, tuple)): - in_(o, linkify(i, parse_email=p)) - else: - eq_(o, linkify(i, parse_email=p)) + eq_(o, linkify(i, parse_email=p)) for (o, p, i) in tests: yield _check, o, p, i @@ -171,8 +157,7 @@ def test_set_attrs(): attrs['rev'] = 'canonical' return attrs - in_(('ex.mp', - 'ex.mp'), + eq_('ex.mp', linkify('ex.mp', [set_attr])) @@ -200,19 +185,19 @@ def test_stop_email(): def test_tlds(): - in_(('example.com', - 'example.com'), + eq_('example.com', linkify('example.com')) - in_(('example.co.uk', - 'example.co.uk'), + eq_('example.co', + linkify('example.co')) + eq_('example.co.uk', linkify('example.co.uk')) - in_(('example.edu', - 'example.edu'), + eq_('example.edu', linkify('example.edu')) - eq_('example.xxx', linkify('example.xxx')) + eq_('example.xxx', + linkify('example.xxx')) + eq_('example.yyy', linkify('example.yyy')) eq_(' brie', linkify(' brie')) - in_(('bit.ly/fun', - 'bit.ly/fun'), + eq_('bit.ly/fun', linkify('bit.ly/fun')) @@ -226,77 +211,58 @@ def test_nofollow_off(): def test_link_in_html(): - in_(('http://yy.com', - 'http://yy.com'), + eq_('http://yy.com', linkify('http://yy.com')) - in_(('http://xx.com' - '', - 'http://xx.com' - ''), + eq_('http://xx.com' + '', linkify('http://xx.com')) def test_links_https(): - in_(('https://yy.com', - 'https://yy.com'), + eq_('https://yy.com', linkify('https://yy.com')) def test_add_rel_nofollow(): """Verify that rel="nofollow" is added to an existing link""" - in_(('http://yy.com', - 'http://yy.com'), + eq_('http://yy.com', linkify('http://yy.com')) def test_url_with_path(): - in_(('' - 'http://example.com/path/to/file', - '' - 'http://example.com/path/to/file'), + eq_('' + 'http://example.com/path/to/file', linkify('http://example.com/path/to/file')) def test_link_ftp(): - in_(('' - 'ftp://ftp.mozilla.org/some/file', - '' - 'ftp://ftp.mozilla.org/some/file'), + eq_('' + 'ftp://ftp.mozilla.org/some/file', linkify('ftp://ftp.mozilla.org/some/file')) def test_link_query(): - in_(('' + eq_('' 'http://xx.com/?test=win', - '' - 'http://xx.com/?test=win'), linkify('http://xx.com/?test=win')) - in_(('' + eq_('' 'xx.com/?test=win', - '' - 'xx.com/?test=win'), linkify('xx.com/?test=win')) - in_(('' + eq_('' 'xx.com?test=win', - '' - 'xx.com?test=win'), linkify('xx.com?test=win')) def test_link_fragment(): - in_(('' - 'http://xx.com/path#frag', - '' - 'http://xx.com/path#frag'), + eq_('' + 'http://xx.com/path#frag', linkify('http://xx.com/path#frag')) def test_link_entities(): - in_(('' + eq_('' 'http://xx.com/?a=1&b=2', - '' - 'http://xx.com/?a=1&b=2'), linkify('http://xx.com/?a=1&b=2')) @@ -307,12 +273,9 @@ def test_escaped_html(): def test_link_http_complete(): - in_(('' 'https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f', - '' - 'https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f'), linkify('https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f')) @@ -330,10 +293,8 @@ def test_javascript_url(): def test_unsafe_url(): """Any unsafe char ({}[]<>, etc.) in the path should end URL scanning.""" - in_(('All your{"xx.yy.com/grover.png"}base are', - 'All your{"xx.yy.com/grover.png"}base are'), + eq_('All your{"xx.yy.com/grover.png"}base are', linkify('All your{"xx.yy.com/grover.png"}base are')) @@ -341,23 +302,17 @@ def test_skip_pre(): """Skip linkification in
 tags."""
     simple = 'http://xx.com 
http://xx.com
' linked = ('http://xx.com ' - '
http://xx.com
', - 'http://xx.com ' '
http://xx.com
') all_linked = ('http://xx.com ' '
http://xx.com'
-                  '
', - 'http://xx.com ' - '
http://xx.com'
                   '
') - in_(linked, linkify(simple, skip_pre=True)) - in_(all_linked, linkify(simple)) + eq_(linked, linkify(simple, skip_pre=True)) + eq_(all_linked, linkify(simple)) already_linked = '
xx
' - nofollowed = ('
xx
', - '
xx
') - in_(nofollowed, linkify(already_linked)) - in_(nofollowed, linkify(already_linked, skip_pre=True)) + nofollowed = '
xx
' + eq_(nofollowed, linkify(already_linked)) + eq_(nofollowed, linkify(already_linked, skip_pre=True)) def test_libgl(): @@ -367,12 +322,11 @@ def test_libgl(): def test_end_of_sentence(): """example.com. should match.""" - outs = ('{0!s}{1!s}', - '{0!s}{1!s}') + out = '{0!s}{1!s}' intxt = '{0!s}{1!s}' def check(u, p): - in_([out.format(u, p) for out in outs], + eq_(out.format(u, p), linkify(intxt.format(u, p))) tests = ( @@ -388,8 +342,7 @@ def test_end_of_sentence(): def test_end_of_clause(): """example.com/foo, shouldn't include the ,""" - in_(('ex.com/foo, bar', - 'ex.com/foo, bar'), + eq_('ex.com/foo, bar', linkify('ex.com/foo, bar')) @@ -402,8 +355,7 @@ def test_sarcasm(): def test_wrapping_parentheses(): """URLs wrapped in parantheses should not include them.""" - outs = ('{0!s}{2!s}{3!s}', - '{0!s}{2!s}{3!s}') + out = '{0!s}{2!s}{3!s}' tests = ( ('(example.com)', ('(', 'example.com', 'example.com', ')')), @@ -431,12 +383,17 @@ def test_wrapping_parentheses(): ) def check(test, expected_output): - in_([o.format(*expected_output) for o in outs], linkify(test)) + eq_(out.format(*expected_output), linkify(test)) for test, expected_output in tests: yield check, test, expected_output +def test_parentheses_with_removing(): + expect = '(test.py)' + eq_(expect, linkify(expect, callbacks=[lambda *a: None])) + + def test_ports(): """URLs can contain port numbers.""" tests = ( @@ -448,9 +405,8 @@ def test_ports(): ) def check(test, output): - outs = ('{0}{1}', - '{0}{1}') - in_([out.format(*output) for out in outs], + out = '{0}{1}' + eq_(out.format(*output), linkify(test)) for test, output in tests: @@ -467,8 +423,7 @@ def test_tokenizer(): def test_ignore_bad_protocols(): eq_('foohttp://bar', linkify('foohttp://bar')) - in_(('fohttp://exampl.com', - 'fohttp://exampl.com'), + eq_('fohttp://exampl.com', linkify('fohttp://exampl.com')) @@ -481,29 +436,29 @@ def test_max_recursion_depth(): def test_link_emails_and_urls(): """parse_email=True shouldn't prevent URLs from getting linkified.""" output = ('' - 'http://example.com ' - 'person@example.com', - '' 'http://example.com ' 'person@example.com') - in_(output, linkify('http://example.com person@example.com', + eq_(output, linkify('http://example.com person@example.com', parse_email=True)) def test_links_case_insensitive(): """Protocols and domain names are case insensitive.""" expect = ('' - 'HTTP://EXAMPLE.COM', - '' 'HTTP://EXAMPLE.COM') - in_(expect, linkify('HTTP://EXAMPLE.COM')) + eq_(expect, linkify('HTTP://EXAMPLE.COM')) def test_elements_inside_links(): - in_(('hello
', - 'hello
'), + eq_('hello
', linkify('hello
')) - in_(('bold hello
', - 'bold hello
'), + eq_('bold hello
', linkify('bold hello
')) + + +def test_remove_first_childlink(): + expect = '

something

' + callbacks = [lambda *a: None] + eq_(expect, + linkify('

something

', callbacks=callbacks)) diff --git a/bleach/tests/test_unicode.py b/bleach/tests/test_unicode.py index 796924d..723df5f 100644 --- a/bleach/tests/test_unicode.py +++ b/bleach/tests/test_unicode.py @@ -30,9 +30,9 @@ def test_mixed(): def test_mixed_linkify(): in_(('Домашняя ' - 'http://example.com ヘルプとチュートリアル', - 'Домашняя ' - 'http://example.com ヘルプとチュートリアル'), + 'http://example.com ヘルプとチュートリアル', + 'Домашняя ' + 'http://example.com ヘルプとチュートリアル'), linkify('Домашняя http://example.com ヘルプとチュートリアル')) diff --git a/bleach/tests/tools.py b/bleach/tests/tools.py index 87f926c..3ae047e 100644 --- a/bleach/tests/tools.py +++ b/bleach/tests/tools.py @@ -3,5 +3,5 @@ def in_(l, a, msg=None): """Shorthand for 'assert a in l, "%r not in %r" % (a, l) """ - if not a in l: + if a not in l: raise AssertionError(msg or "%r not in %r" % (a, l)) -- cgit v1.2.3