diff options
author | Christopher Baines <mail@cbaines.net> | 2015-12-06 13:31:51 +0000 |
---|---|---|
committer | Christopher Baines <mail@cbaines.net> | 2015-12-06 13:31:51 +0000 |
commit | 4cf150e98a62a0bcc307065a050f7d3a592289a2 (patch) | |
tree | 584483fb7e5e200b91b8f4a09a61d8253a5b597c | |
parent | 25893d8e7894f3e77f3f8ce9a6b84132968c15a9 (diff) | |
parent | 11b8160e584470439c8c0b3ab51012c9300f6788 (diff) | |
download | python-bleach-4cf150e98a62a0bcc307065a050f7d3a592289a2.tar python-bleach-4cf150e98a62a0bcc307065a050f7d3a592289a2.tar.gz |
Merge tag 'upstream/1.4.2'
Upstream version 1.4.2
-rw-r--r-- | .gitignore | 9 | ||||
-rw-r--r-- | .travis.yml | 14 | ||||
-rw-r--r-- | CHANGES | 18 | ||||
-rw-r--r-- | CONTRIBUTING.rst | 7 | ||||
-rw-r--r-- | README.rst | 19 | ||||
-rw-r--r-- | bleach/__init__.py | 36 | ||||
-rw-r--r-- | bleach/callbacks.py | 2 | ||||
-rw-r--r-- | bleach/sanitizer.py | 2 | ||||
-rw-r--r-- | bleach/tests/test_links.py | 193 | ||||
-rw-r--r-- | bleach/tests/test_unicode.py | 6 | ||||
-rw-r--r-- | bleach/tests/tools.py | 2 | ||||
-rw-r--r-- | docs/conf.py | 6 | ||||
-rw-r--r-- | docs/goals.rst | 3 | ||||
-rw-r--r-- | requirements.txt | 1 | ||||
-rw-r--r-- | setup.cfg | 2 | ||||
-rw-r--r-- | setup.py | 25 | ||||
-rw-r--r-- | tox.ini | 4 |
17 files changed, 198 insertions, 151 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..96e22b0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +*.pyo +*.pyc +pip-log.txt +.coverage +dist +*.egg-info +.noseids +build +.tox diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..193f70a --- /dev/null +++ b/.travis.yml @@ -0,0 +1,14 @@ +sudo: false +language: python +python: + - "2.6" + - "2.7" + - "3.2" + - "3.3" + - "3.4" + - "pypy" +install: + - "pip install -r requirements.txt" +script: + - nosetests + - flake8 bleach/ @@ -1,10 +1,26 @@ Bleach Changes ============== +Version 1.4.2 +------------- + +- Fix hang in linkify with parse_email=True. #124 +- Fix crash in linkify when removing a link that is a first-child. #136 +- Updated TLDs. +- Don't remove exterior brackets when linkifying. #146 + + +Version 1.4.1 +------------- + +- Consistent order of attributes in output. +- Python 3.4. + + Version 1.4 ----------- -- Update linkify to use etree type Treeewalker instead of simpletree. +- Update linkify to use etree type Treewalker instead of simpletree. - Updated html5lib to version >= 0.999. - Update all code to be compatible with Python 3 and 2 using six. - Switch to Apache License. diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst new file mode 100644 index 0000000..015ceb8 --- /dev/null +++ b/CONTRIBUTING.rst @@ -0,0 +1,7 @@ +Reporting Security Issues +========================= + +If you believe you have found an exploit in a patched version of Bleach, +master or the latest released version on PyPI, **please do not post it +in a GitHub issue**. Please contact me privately, at +`me+bleach@jamessocol.com <mailto:me+bleach@jamessocol.com>`. @@ -2,6 +2,12 @@ Bleach ====== +.. image:: https://travis-ci.org/jsocol/bleach.png?branch=master + :target: https://travis-ci.org/jsocol/bleach + +.. image:: https://badge.fury.io/py/Bleach.svg + :target: http://badge.fury.io/py/Bleach + Bleach is an HTML sanitizing library that escapes or strips markup and attributes based on a white list. Bleach can also linkify text safely, applying filters that Django's ``urlize`` filter cannot, and optionally setting ``rel`` @@ -20,10 +26,21 @@ The version on GitHub_ is the most up-to-date and contains the latest bug fixes. You can find full documentation on `ReadTheDocs`_. +Reporting Security Issues +========================= + +If you believe you have found an exploit in a patched version of Bleach, +master or the latest released version on PyPI, **please do not post it +in a GitHub issue**. Please contact me privately, at +`me+bleach@jamessocol.com <mailto:me+bleach@jamessocol.com>`. + + Basic Use ========= -The simplest way to use Bleach is:: +The simplest way to use Bleach is: + +.. code-block:: python >>> import bleach diff --git a/bleach/__init__.py b/bleach/__init__.py index b110972..1d8caa2 100644 --- a/bleach/__init__.py +++ b/bleach/__init__.py @@ -13,8 +13,8 @@ from .encoding import force_unicode from .sanitizer import BleachSanitizer -VERSION = (1, 4, 0) -__version__ = '1.4' +VERSION = (1, 4, 2) +__version__ = '.'.join([str(n) for n in VERSION]) __all__ = ['clean', 'linkify'] @@ -51,16 +51,17 @@ TLDS = """ac ad ae aero af ag ai al am an ao aq ar arpa as asia at au aw ax az im in info int io iq ir is it je jm jo jobs jp ke kg kh ki km kn kp kr kw ky kz la lb lc li lk lr ls lt lu lv ly ma mc md me mg mh mil mk ml mm mn mo mobi mp mq mr ms mt mu museum mv mw mx my mz na name nc ne - net nf ng ni nl no np nr nu nz om org pa pe pf pg ph pk pl pm pn pr pro - ps pt pw py qa re ro rs ru rw sa sb sc sd se sg sh si sj sk sl sm sn so - sr st su sv sy sz tc td tel tf tg th tj tk tl tm tn to tp tr travel tt - tv tw tz ua ug uk us uy uz va vc ve vg vi vn vu wf ws xn ye yt yu za zm - zw""".split() - -PROTOCOLS = HTMLSanitizer.acceptable_protocols + net nf ng ni nl no np nr nu nz om org pa pe pf pg ph pk pl pm pn post + pr pro ps pt pw py qa re ro rs ru rw sa sb sc sd se sg sh si sj sk sl + sm sn so sr ss st su sv sx sy sz tc td tel tf tg th tj tk tl tm tn to + tp tr travel tt tv tw tz ua ug uk us uy uz va vc ve vg vi vn vu wf ws + xn xxx ye yt yu za zm zw""".split() +# Make sure that .com doesn't get matched by .co first TLDS.reverse() +PROTOCOLS = HTMLSanitizer.acceptable_protocols + url_re = re.compile( r"""\(* # Match any opening parentheses. \b(?<![@.])(?:(?:{0}):/{{0,3}}(?:(?:\w+:)?\w+@)?)? # http:// @@ -145,14 +146,16 @@ def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_pre=False, # capture any non-tag text at the start of the fragment if new_tree.text: if index == 0: + tree.text = tree.text or '' tree.text += new_tree.text else: - tree[index-1].tail += new_tree.text + tree[index - 1].tail = tree[index - 1].tail or '' + tree[index - 1].tail += new_tree.text # the put in the tagged elements into the old tree for n in new_tree: if n.tag == ETREE_TAG('a'): _seen.add(n) - tree.insert(index+count, n) + tree.insert(index + count, n) count += 1 # if we got a node to remove... if node is not None: @@ -252,15 +255,17 @@ def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_pre=False, if new_tail != node.tail: node.tail = '' adj = replace_nodes(tree, new_tail, None, - current_child+1) - #insert the new nodes made from my tail into + current_child + 1) + # Insert the new nodes made from my tail into # the tree right after me. current_child+1 children += adj + continue new_tail = re.sub(url_re, link_repl, new_tail) if new_tail != old_tail: node.tail = '' - adj = replace_nodes(tree, new_tail, None, current_child+1) + adj = replace_nodes(tree, new_tail, None, + current_child + 1) children += adj if node.tag == ETREE_TAG('a') and not (node in _seen): @@ -342,7 +347,7 @@ def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_pre=False, link = apply_callbacks(link, True) if link is None: - return url + return '(' * open_brackets + url + ')' * close_brackets _text = link.pop('_text') _href = link.pop('href') @@ -373,5 +378,6 @@ def _serialize(domtree): walker = html5lib.treewalkers.getTreeWalker('etree') stream = walker(domtree) serializer = HTMLSerializer(quote_attr_values=True, + alphabetical_attributes=True, omit_optional_tags=False) return serializer.render(stream) diff --git a/bleach/callbacks.py b/bleach/callbacks.py index 227f089..3cb82c2 100644 --- a/bleach/callbacks.py +++ b/bleach/callbacks.py @@ -6,7 +6,7 @@ def nofollow(attrs, new=False): if attrs['href'].startswith('mailto:'): return attrs rel = [x for x in attrs.get('rel', '').split(' ') if x] - if not 'nofollow' in [x.lower() for x in rel]: + if 'nofollow' not in [x.lower() for x in rel]: rel.append('nofollow') attrs['rel'] = ' '.join(rel) diff --git a/bleach/sanitizer.py b/bleach/sanitizer.py index 88246f8..eec6659 100644 --- a/bleach/sanitizer.py +++ b/bleach/sanitizer.py @@ -49,7 +49,7 @@ class BleachSanitizerMixin(HTMLSanitizerMixin): if callable(allowed_attributes) else name in allowed_attributes)]) for attr in self.attr_val_is_uri: - if not attr in attrs: + if attr not in attrs: continue val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', unescape(attrs[attr])).lower() diff --git a/bleach/tests/test_links.py b/bleach/tests/test_links.py index abf889d..62da8d1 100644 --- a/bleach/tests/test_links.py +++ b/bleach/tests/test_links.py @@ -7,7 +7,6 @@ from html5lib.tokenizer import HTMLTokenizer from nose.tools import eq_ from bleach import linkify, url_re, DEFAULT_CALLBACKS as DC -from bleach.tests.tools import in_ def test_url_re(): @@ -23,34 +22,24 @@ def test_empty(): def test_simple_link(): - in_(('a <a href="http://example.com" rel="nofollow">http://example.com' + eq_('a <a href="http://example.com" rel="nofollow">http://example.com' '</a> link', - 'a <a rel="nofollow" href="http://example.com">http://example.com' - '</a> link'), linkify('a http://example.com link')) - in_(('a <a href="https://example.com" rel="nofollow">https://example.com' + eq_('a <a href="https://example.com" rel="nofollow">https://example.com' '</a> link', - 'a <a rel="nofollow" href="https://example.com">https://example.com' - '</a> link'), linkify('a https://example.com link')) - in_(('a <a href="http://example.com" rel="nofollow">example.com</a> link', - 'a <a rel="nofollow" href="http://example.com">example.com</a> link'), + eq_('a <a href="http://example.com" rel="nofollow">example.com</a> link', linkify('a example.com link')) def test_trailing_slash(): - in_(('<a href="http://examp.com/" rel="nofollow">http://examp.com/</a>', - '<a rel="nofollow" href="http://examp.com/">http://examp.com/</a>'), + eq_('<a href="http://examp.com/" rel="nofollow">http://examp.com/</a>', linkify('http://examp.com/')) - in_(('<a href="http://example.com/foo/" rel="nofollow">' - 'http://example.com/foo/</a>', - '<a rel="nofollow" href="http://example.com/foo/">' - 'http://example.com/foo/</a>'), + eq_('<a href="http://example.com/foo/" rel="nofollow">' + 'http://example.com/foo/</a>', linkify('http://example.com/foo/')) - in_(('<a href="http://example.com/foo/bar/" rel="nofollow">' - 'http://example.com/foo/bar/</a>', - '<a rel="nofollow" href="http://example.com/foo/bar/">' - 'http://example.com/foo/bar/</a>'), + eq_('<a href="http://example.com/foo/bar/" rel="nofollow">' + 'http://example.com/foo/bar/</a>', linkify('http://example.com/foo/bar/')) @@ -61,10 +50,8 @@ def test_mangle_link(): attrs['href'] = 'http://bouncer/?u={0!s}'.format(quoted) return attrs - in_(('<a href="http://bouncer/?u=http%3A%2F%2Fexample.com" rel="nofollow">' - 'http://example.com</a>', - '<a rel="nofollow" href="http://bouncer/?u=http%3A%2F%2Fexample.com">' - 'http://example.com</a>'), + eq_('<a href="http://bouncer/?u=http%3A%2F%2Fexample.com" rel="nofollow">' + 'http://example.com</a>', linkify('http://example.com', DC + [filter_url])) @@ -90,19 +77,18 @@ def test_email_link(): 'james@example.com.au</a> mailto', True, 'aussie james@example.com.au mailto'), # This is kind of a pathological case. I guess we do our best here. - (('email to <a href="james@example.com" rel="nofollow">' - 'james@example.com</a>', - 'email to <a rel="nofollow" href="james@example.com">' - 'james@example.com</a>'), + ('email to <a href="james@example.com" rel="nofollow">' + 'james@example.com</a>', True, 'email to <a href="james@example.com">james@example.com</a>'), + ('<br><a href="mailto:jinkyun@example.com">' + 'jinkyun@example.com</a>', + True, + '<br>jinkyun@example.com'), ) def _check(o, p, i): - if isinstance(o, (list, tuple)): - in_(o, linkify(i, parse_email=p)) - else: - eq_(o, linkify(i, parse_email=p)) + eq_(o, linkify(i, parse_email=p)) for (o, p, i) in tests: yield _check, o, p, i @@ -171,8 +157,7 @@ def test_set_attrs(): attrs['rev'] = 'canonical' return attrs - in_(('<a href="http://ex.mp" rev="canonical">ex.mp</a>', - '<a rev="canonical" href="http://ex.mp">ex.mp</a>'), + eq_('<a href="http://ex.mp" rev="canonical">ex.mp</a>', linkify('ex.mp', [set_attr])) @@ -200,19 +185,19 @@ def test_stop_email(): def test_tlds(): - in_(('<a href="http://example.com" rel="nofollow">example.com</a>', - '<a rel="nofollow" href="http://example.com">example.com</a>'), + eq_('<a href="http://example.com" rel="nofollow">example.com</a>', linkify('example.com')) - in_(('<a href="http://example.co.uk" rel="nofollow">example.co.uk</a>', - '<a rel="nofollow" href="http://example.co.uk">example.co.uk</a>'), + eq_('<a href="http://example.co" rel="nofollow">example.co</a>', + linkify('example.co')) + eq_('<a href="http://example.co.uk" rel="nofollow">example.co.uk</a>', linkify('example.co.uk')) - in_(('<a href="http://example.edu" rel="nofollow">example.edu</a>', - '<a rel="nofollow" href="http://example.edu">example.edu</a>'), + eq_('<a href="http://example.edu" rel="nofollow">example.edu</a>', linkify('example.edu')) - eq_('example.xxx', linkify('example.xxx')) + eq_('<a href="http://example.xxx" rel="nofollow">example.xxx</a>', + linkify('example.xxx')) + eq_('example.yyy', linkify('example.yyy')) eq_(' brie', linkify(' brie')) - in_(('<a href="http://bit.ly/fun" rel="nofollow">bit.ly/fun</a>', - '<a rel="nofollow" href="http://bit.ly/fun">bit.ly/fun</a>'), + eq_('<a href="http://bit.ly/fun" rel="nofollow">bit.ly/fun</a>', linkify('bit.ly/fun')) @@ -226,77 +211,58 @@ def test_nofollow_off(): def test_link_in_html(): - in_(('<i><a href="http://yy.com" rel="nofollow">http://yy.com</a></i>', - '<i><a rel="nofollow" href="http://yy.com">http://yy.com</a></i>'), + eq_('<i><a href="http://yy.com" rel="nofollow">http://yy.com</a></i>', linkify('<i>http://yy.com</i>')) - in_(('<em><strong><a href="http://xx.com" rel="nofollow">http://xx.com' - '</a></strong></em>', - '<em><strong><a rel="nofollow" href="http://xx.com">http://xx.com' - '</a></strong></em>'), + eq_('<em><strong><a href="http://xx.com" rel="nofollow">http://xx.com' + '</a></strong></em>', linkify('<em><strong>http://xx.com</strong></em>')) def test_links_https(): - in_(('<a href="https://yy.com" rel="nofollow">https://yy.com</a>', - '<a rel="nofollow" href="https://yy.com">https://yy.com</a>'), + eq_('<a href="https://yy.com" rel="nofollow">https://yy.com</a>', linkify('https://yy.com')) def test_add_rel_nofollow(): """Verify that rel="nofollow" is added to an existing link""" - in_(('<a href="http://yy.com" rel="nofollow">http://yy.com</a>', - '<a rel="nofollow" href="http://yy.com">http://yy.com</a>'), + eq_('<a href="http://yy.com" rel="nofollow">http://yy.com</a>', linkify('<a href="http://yy.com">http://yy.com</a>')) def test_url_with_path(): - in_(('<a href="http://example.com/path/to/file" rel="nofollow">' - 'http://example.com/path/to/file</a>', - '<a rel="nofollow" href="http://example.com/path/to/file">' - 'http://example.com/path/to/file</a>'), + eq_('<a href="http://example.com/path/to/file" rel="nofollow">' + 'http://example.com/path/to/file</a>', linkify('http://example.com/path/to/file')) def test_link_ftp(): - in_(('<a href="ftp://ftp.mozilla.org/some/file" rel="nofollow">' - 'ftp://ftp.mozilla.org/some/file</a>', - '<a rel="nofollow" href="ftp://ftp.mozilla.org/some/file">' - 'ftp://ftp.mozilla.org/some/file</a>'), + eq_('<a href="ftp://ftp.mozilla.org/some/file" rel="nofollow">' + 'ftp://ftp.mozilla.org/some/file</a>', linkify('ftp://ftp.mozilla.org/some/file')) def test_link_query(): - in_(('<a href="http://xx.com/?test=win" rel="nofollow">' + eq_('<a href="http://xx.com/?test=win" rel="nofollow">' 'http://xx.com/?test=win</a>', - '<a rel="nofollow" href="http://xx.com/?test=win">' - 'http://xx.com/?test=win</a>'), linkify('http://xx.com/?test=win')) - in_(('<a href="http://xx.com/?test=win" rel="nofollow">' + eq_('<a href="http://xx.com/?test=win" rel="nofollow">' 'xx.com/?test=win</a>', - '<a rel="nofollow" href="http://xx.com/?test=win">' - 'xx.com/?test=win</a>'), linkify('xx.com/?test=win')) - in_(('<a href="http://xx.com?test=win" rel="nofollow">' + eq_('<a href="http://xx.com?test=win" rel="nofollow">' 'xx.com?test=win</a>', - '<a rel="nofollow" href="http://xx.com?test=win">' - 'xx.com?test=win</a>'), linkify('xx.com?test=win')) def test_link_fragment(): - in_(('<a href="http://xx.com/path#frag" rel="nofollow">' - 'http://xx.com/path#frag</a>', - '<a rel="nofollow" href="http://xx.com/path#frag">' - 'http://xx.com/path#frag</a>'), + eq_('<a href="http://xx.com/path#frag" rel="nofollow">' + 'http://xx.com/path#frag</a>', linkify('http://xx.com/path#frag')) def test_link_entities(): - in_(('<a href="http://xx.com/?a=1&b=2" rel="nofollow">' + eq_('<a href="http://xx.com/?a=1&b=2" rel="nofollow">' 'http://xx.com/?a=1&b=2</a>', - '<a rel="nofollow" href="http://xx.com/?a=1&b=2">' - 'http://xx.com/?a=1&b=2</a>'), linkify('http://xx.com/?a=1&b=2')) @@ -307,12 +273,9 @@ def test_escaped_html(): def test_link_http_complete(): - in_(('<a href="https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d' + eq_('<a href="https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d' '&e#f" rel="nofollow">' 'https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f</a>', - '<a rel="nofollow" href="https://user:pass@ftp.mozilla.org/x/' - 'y.exe?a=b&c=d&e#f">' - 'https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f</a>'), linkify('https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f')) @@ -330,10 +293,8 @@ def test_javascript_url(): def test_unsafe_url(): """Any unsafe char ({}[]<>, etc.) in the path should end URL scanning.""" - in_(('All your{"<a href="http://xx.yy.com/grover.png" ' - 'rel="nofollow">xx.yy.com/grover.png</a>"}base are', - 'All your{"<a rel="nofollow" href="http://xx.yy.com/grover.png"' - '>xx.yy.com/grover.png</a>"}base are'), + eq_('All your{"<a href="http://xx.yy.com/grover.png" ' + 'rel="nofollow">xx.yy.com/grover.png</a>"}base are', linkify('All your{"xx.yy.com/grover.png"}base are')) @@ -341,23 +302,17 @@ def test_skip_pre(): """Skip linkification in <pre> tags.""" simple = 'http://xx.com <pre>http://xx.com</pre>' linked = ('<a href="http://xx.com" rel="nofollow">http://xx.com</a> ' - '<pre>http://xx.com</pre>', - '<a rel="nofollow" href="http://xx.com">http://xx.com</a> ' '<pre>http://xx.com</pre>') all_linked = ('<a href="http://xx.com" rel="nofollow">http://xx.com</a> ' '<pre><a href="http://xx.com" rel="nofollow">http://xx.com' - '</a></pre>', - '<a rel="nofollow" href="http://xx.com">http://xx.com</a> ' - '<pre><a rel="nofollow" href="http://xx.com">http://xx.com' '</a></pre>') - in_(linked, linkify(simple, skip_pre=True)) - in_(all_linked, linkify(simple)) + eq_(linked, linkify(simple, skip_pre=True)) + eq_(all_linked, linkify(simple)) already_linked = '<pre><a href="http://xx.com">xx</a></pre>' - nofollowed = ('<pre><a href="http://xx.com" rel="nofollow">xx</a></pre>', - '<pre><a rel="nofollow" href="http://xx.com">xx</a></pre>') - in_(nofollowed, linkify(already_linked)) - in_(nofollowed, linkify(already_linked, skip_pre=True)) + nofollowed = '<pre><a href="http://xx.com" rel="nofollow">xx</a></pre>' + eq_(nofollowed, linkify(already_linked)) + eq_(nofollowed, linkify(already_linked, skip_pre=True)) def test_libgl(): @@ -367,12 +322,11 @@ def test_libgl(): def test_end_of_sentence(): """example.com. should match.""" - outs = ('<a href="http://{0!s}" rel="nofollow">{0!s}</a>{1!s}', - '<a rel="nofollow" href="http://{0!s}">{0!s}</a>{1!s}') + out = '<a href="http://{0!s}" rel="nofollow">{0!s}</a>{1!s}' intxt = '{0!s}{1!s}' def check(u, p): - in_([out.format(u, p) for out in outs], + eq_(out.format(u, p), linkify(intxt.format(u, p))) tests = ( @@ -388,8 +342,7 @@ def test_end_of_sentence(): def test_end_of_clause(): """example.com/foo, shouldn't include the ,""" - in_(('<a href="http://ex.com/foo" rel="nofollow">ex.com/foo</a>, bar', - '<a rel="nofollow" href="http://ex.com/foo">ex.com/foo</a>, bar'), + eq_('<a href="http://ex.com/foo" rel="nofollow">ex.com/foo</a>, bar', linkify('ex.com/foo, bar')) @@ -402,8 +355,7 @@ def test_sarcasm(): def test_wrapping_parentheses(): """URLs wrapped in parantheses should not include them.""" - outs = ('{0!s}<a href="http://{1!s}" rel="nofollow">{2!s}</a>{3!s}', - '{0!s}<a rel="nofollow" href="http://{1!s}">{2!s}</a>{3!s}') + out = '{0!s}<a href="http://{1!s}" rel="nofollow">{2!s}</a>{3!s}' tests = ( ('(example.com)', ('(', 'example.com', 'example.com', ')')), @@ -431,12 +383,17 @@ def test_wrapping_parentheses(): ) def check(test, expected_output): - in_([o.format(*expected_output) for o in outs], linkify(test)) + eq_(out.format(*expected_output), linkify(test)) for test, expected_output in tests: yield check, test, expected_output +def test_parentheses_with_removing(): + expect = '(test.py)' + eq_(expect, linkify(expect, callbacks=[lambda *a: None])) + + def test_ports(): """URLs can contain port numbers.""" tests = ( @@ -448,9 +405,8 @@ def test_ports(): ) def check(test, output): - outs = ('<a href="{0}" rel="nofollow">{0}</a>{1}', - '<a rel="nofollow" href="{0}">{0}</a>{1}') - in_([out.format(*output) for out in outs], + out = '<a href="{0}" rel="nofollow">{0}</a>{1}' + eq_(out.format(*output), linkify(test)) for test, output in tests: @@ -467,8 +423,7 @@ def test_tokenizer(): def test_ignore_bad_protocols(): eq_('foohttp://bar', linkify('foohttp://bar')) - in_(('fohttp://<a href="http://exampl.com" rel="nofollow">exampl.com</a>', - 'fohttp://<a rel="nofollow" href="http://exampl.com">exampl.com</a>'), + eq_('fohttp://<a href="http://exampl.com" rel="nofollow">exampl.com</a>', linkify('fohttp://exampl.com')) @@ -482,28 +437,28 @@ def test_link_emails_and_urls(): """parse_email=True shouldn't prevent URLs from getting linkified.""" output = ('<a href="http://example.com" rel="nofollow">' 'http://example.com</a> <a href="mailto:person@example.com">' - 'person@example.com</a>', - '<a rel="nofollow" href="http://example.com">' - 'http://example.com</a> <a href="mailto:person@example.com">' 'person@example.com</a>') - in_(output, linkify('http://example.com person@example.com', + eq_(output, linkify('http://example.com person@example.com', parse_email=True)) def test_links_case_insensitive(): """Protocols and domain names are case insensitive.""" expect = ('<a href="HTTP://EXAMPLE.COM" rel="nofollow">' - 'HTTP://EXAMPLE.COM</a>', - '<a rel="nofollow" href="HTTP://EXAMPLE.COM">' 'HTTP://EXAMPLE.COM</a>') - in_(expect, linkify('HTTP://EXAMPLE.COM')) + eq_(expect, linkify('HTTP://EXAMPLE.COM')) def test_elements_inside_links(): - in_(('<a href="#" rel="nofollow">hello<br></a>', - '<a rel="nofollow" href="#">hello<br></a>'), + eq_('<a href="#" rel="nofollow">hello<br></a>', linkify('<a href="#">hello<br></a>')) - in_(('<a href="#" rel="nofollow"><strong>bold</strong> hello<br></a>', - '<a rel="nofollow" href="#"><strong>bold</strong> hello<br></a>'), + eq_('<a href="#" rel="nofollow"><strong>bold</strong> hello<br></a>', linkify('<a href="#"><strong>bold</strong> hello<br></a>')) + + +def test_remove_first_childlink(): + expect = '<p>something</p>' + callbacks = [lambda *a: None] + eq_(expect, + linkify('<p><a href="/foo">something</a></p>', callbacks=callbacks)) diff --git a/bleach/tests/test_unicode.py b/bleach/tests/test_unicode.py index 796924d..723df5f 100644 --- a/bleach/tests/test_unicode.py +++ b/bleach/tests/test_unicode.py @@ -30,9 +30,9 @@ def test_mixed(): def test_mixed_linkify(): in_(('Домашняя <a href="http://example.com" rel="nofollow">' - 'http://example.com</a> ヘルプとチュートリアル', - 'Домашняя <a rel="nofollow" href="http://example.com">' - 'http://example.com</a> ヘルプとチュートリアル'), + 'http://example.com</a> ヘルプとチュートリアル', + 'Домашняя <a rel="nofollow" href="http://example.com">' + 'http://example.com</a> ヘルプとチュートリアル'), linkify('Домашняя http://example.com ヘルプとチュートリアル')) diff --git a/bleach/tests/tools.py b/bleach/tests/tools.py index 87f926c..3ae047e 100644 --- a/bleach/tests/tools.py +++ b/bleach/tests/tools.py @@ -3,5 +3,5 @@ def in_(l, a, msg=None): """Shorthand for 'assert a in l, "%r not in %r" % (a, l) """ - if not a in l: + if a not in l: raise AssertionError(msg or "%r not in %r" % (a, l)) diff --git a/docs/conf.py b/docs/conf.py index 96b2fc8..78bee32 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -41,16 +41,16 @@ master_doc = 'index' # General information about the project. project = u'Bleach' -copyright = u'2012, James Socol' +copyright = u'2012-2104, James Socol' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '1.3' +version = '1.4' # The full version, including alpha/beta/rc tags. -release = '1.3.1' +release = '1.4.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/goals.rst b/docs/goals.rst index 5477f9c..d62d54b 100644 --- a/docs/goals.rst +++ b/docs/goals.rst @@ -66,6 +66,9 @@ non-goal use cases include: you have to allow so many tags that a blacklist approach (e.g. forbidding ``<script>`` or ``<object>``) may be more appropriate. +* **Removing *all* HTML.** There are much faster tools available if you want to + remove or escape all HTML from a document. + * **Cleaning up after trusted users.** Bleach is powerful but it is not fast. If you trust your users, trust them and don't rely on Bleach to clean up their mess. diff --git a/requirements.txt b/requirements.txt index d6e9357..a4c0b99 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +ordereddict six html5lib>=0.999 # Requirements to run the test suite: diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..81cd366 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[flake8] +ignore = E731,W503 @@ -1,8 +1,22 @@ from setuptools import setup, find_packages +install_requires = [ + 'six', + 'html5lib>=0.999', +] + +try: + from collections import OrderedDict # noqa +except ImportError: + # We don't use ordereddict, but html5lib does when you request + # alpha-sorted attributes and on Python 2.6 and it doesn't specify it + # as a dependency (see + # https://github.com/html5lib/html5lib-python/pull/177) + install_requires.append('ordereddict') + setup( name='bleach', - version='1.4', + version='1.4.2', description='An easy whitelist-based HTML-sanitizing tool.', long_description=open('README.rst').read(), author='James Socol', @@ -13,14 +27,14 @@ setup( include_package_data=True, package_data={'': ['README.rst']}, zip_safe=False, - install_requires=[ - 'six', - 'html5lib>=0.999', + install_requires=install_requires, + tests_require=[ + 'nose>=1.3', ], + test_suite='nose.collector', classifiers=[ 'Development Status :: 5 - Production/Stable', 'Environment :: Web Environment', - 'Environment :: Web Environment :: Mozilla', 'Intended Audience :: Developers', 'License :: OSI Approved :: Apache Software License', 'Operating System :: OS Independent', @@ -31,6 +45,7 @@ setup( 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.2', 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', 'Topic :: Software Development :: Libraries :: Python Modules', ] ) @@ -4,9 +4,11 @@ # and then run "tox" from this directory. [tox] -envlist = py26, py27, py32, py33, pypy +envlist = py26, py27, py32, py33, py34, pypy [testenv] commands = nosetests {posargs:-v} deps = + six + html5lib==0.999 nose |