aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristopher Baines <mail@cbaines.net>2015-12-06 13:31:51 +0000
committerChristopher Baines <mail@cbaines.net>2015-12-06 13:31:51 +0000
commit4cf150e98a62a0bcc307065a050f7d3a592289a2 (patch)
tree584483fb7e5e200b91b8f4a09a61d8253a5b597c
parent25893d8e7894f3e77f3f8ce9a6b84132968c15a9 (diff)
parent11b8160e584470439c8c0b3ab51012c9300f6788 (diff)
downloadpython-bleach-4cf150e98a62a0bcc307065a050f7d3a592289a2.tar
python-bleach-4cf150e98a62a0bcc307065a050f7d3a592289a2.tar.gz
Merge tag 'upstream/1.4.2'
Upstream version 1.4.2
-rw-r--r--.gitignore9
-rw-r--r--.travis.yml14
-rw-r--r--CHANGES18
-rw-r--r--CONTRIBUTING.rst7
-rw-r--r--README.rst19
-rw-r--r--bleach/__init__.py36
-rw-r--r--bleach/callbacks.py2
-rw-r--r--bleach/sanitizer.py2
-rw-r--r--bleach/tests/test_links.py193
-rw-r--r--bleach/tests/test_unicode.py6
-rw-r--r--bleach/tests/tools.py2
-rw-r--r--docs/conf.py6
-rw-r--r--docs/goals.rst3
-rw-r--r--requirements.txt1
-rw-r--r--setup.cfg2
-rw-r--r--setup.py25
-rw-r--r--tox.ini4
17 files changed, 198 insertions, 151 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..96e22b0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,9 @@
+*.pyo
+*.pyc
+pip-log.txt
+.coverage
+dist
+*.egg-info
+.noseids
+build
+.tox
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..193f70a
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,14 @@
+sudo: false
+language: python
+python:
+ - "2.6"
+ - "2.7"
+ - "3.2"
+ - "3.3"
+ - "3.4"
+ - "pypy"
+install:
+ - "pip install -r requirements.txt"
+script:
+ - nosetests
+ - flake8 bleach/
diff --git a/CHANGES b/CHANGES
index 1def1a2..00ed505 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,10 +1,26 @@
Bleach Changes
==============
+Version 1.4.2
+-------------
+
+- Fix hang in linkify with parse_email=True. #124
+- Fix crash in linkify when removing a link that is a first-child. #136
+- Updated TLDs.
+- Don't remove exterior brackets when linkifying. #146
+
+
+Version 1.4.1
+-------------
+
+- Consistent order of attributes in output.
+- Python 3.4.
+
+
Version 1.4
-----------
-- Update linkify to use etree type Treeewalker instead of simpletree.
+- Update linkify to use etree type Treewalker instead of simpletree.
- Updated html5lib to version >= 0.999.
- Update all code to be compatible with Python 3 and 2 using six.
- Switch to Apache License.
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
new file mode 100644
index 0000000..015ceb8
--- /dev/null
+++ b/CONTRIBUTING.rst
@@ -0,0 +1,7 @@
+Reporting Security Issues
+=========================
+
+If you believe you have found an exploit in a patched version of Bleach,
+master or the latest released version on PyPI, **please do not post it
+in a GitHub issue**. Please contact me privately, at
+`me+bleach@jamessocol.com <mailto:me+bleach@jamessocol.com>`.
diff --git a/README.rst b/README.rst
index 5e52cae..2dc7420 100644
--- a/README.rst
+++ b/README.rst
@@ -2,6 +2,12 @@
Bleach
======
+.. image:: https://travis-ci.org/jsocol/bleach.png?branch=master
+ :target: https://travis-ci.org/jsocol/bleach
+
+.. image:: https://badge.fury.io/py/Bleach.svg
+ :target: http://badge.fury.io/py/Bleach
+
Bleach is an HTML sanitizing library that escapes or strips markup and
attributes based on a white list. Bleach can also linkify text safely, applying
filters that Django's ``urlize`` filter cannot, and optionally setting ``rel``
@@ -20,10 +26,21 @@ The version on GitHub_ is the most up-to-date and contains the latest bug
fixes. You can find full documentation on `ReadTheDocs`_.
+Reporting Security Issues
+=========================
+
+If you believe you have found an exploit in a patched version of Bleach,
+master or the latest released version on PyPI, **please do not post it
+in a GitHub issue**. Please contact me privately, at
+`me+bleach@jamessocol.com <mailto:me+bleach@jamessocol.com>`.
+
+
Basic Use
=========
-The simplest way to use Bleach is::
+The simplest way to use Bleach is:
+
+.. code-block:: python
>>> import bleach
diff --git a/bleach/__init__.py b/bleach/__init__.py
index b110972..1d8caa2 100644
--- a/bleach/__init__.py
+++ b/bleach/__init__.py
@@ -13,8 +13,8 @@ from .encoding import force_unicode
from .sanitizer import BleachSanitizer
-VERSION = (1, 4, 0)
-__version__ = '1.4'
+VERSION = (1, 4, 2)
+__version__ = '.'.join([str(n) for n in VERSION])
__all__ = ['clean', 'linkify']
@@ -51,16 +51,17 @@ TLDS = """ac ad ae aero af ag ai al am an ao aq ar arpa as asia at au aw ax az
im in info int io iq ir is it je jm jo jobs jp ke kg kh ki km kn kp
kr kw ky kz la lb lc li lk lr ls lt lu lv ly ma mc md me mg mh mil mk
ml mm mn mo mobi mp mq mr ms mt mu museum mv mw mx my mz na name nc ne
- net nf ng ni nl no np nr nu nz om org pa pe pf pg ph pk pl pm pn pr pro
- ps pt pw py qa re ro rs ru rw sa sb sc sd se sg sh si sj sk sl sm sn so
- sr st su sv sy sz tc td tel tf tg th tj tk tl tm tn to tp tr travel tt
- tv tw tz ua ug uk us uy uz va vc ve vg vi vn vu wf ws xn ye yt yu za zm
- zw""".split()
-
-PROTOCOLS = HTMLSanitizer.acceptable_protocols
+ net nf ng ni nl no np nr nu nz om org pa pe pf pg ph pk pl pm pn post
+ pr pro ps pt pw py qa re ro rs ru rw sa sb sc sd se sg sh si sj sk sl
+ sm sn so sr ss st su sv sx sy sz tc td tel tf tg th tj tk tl tm tn to
+ tp tr travel tt tv tw tz ua ug uk us uy uz va vc ve vg vi vn vu wf ws
+ xn xxx ye yt yu za zm zw""".split()
+# Make sure that .com doesn't get matched by .co first
TLDS.reverse()
+PROTOCOLS = HTMLSanitizer.acceptable_protocols
+
url_re = re.compile(
r"""\(* # Match any opening parentheses.
\b(?<![@.])(?:(?:{0}):/{{0,3}}(?:(?:\w+:)?\w+@)?)? # http://
@@ -145,14 +146,16 @@ def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_pre=False,
# capture any non-tag text at the start of the fragment
if new_tree.text:
if index == 0:
+ tree.text = tree.text or ''
tree.text += new_tree.text
else:
- tree[index-1].tail += new_tree.text
+ tree[index - 1].tail = tree[index - 1].tail or ''
+ tree[index - 1].tail += new_tree.text
# the put in the tagged elements into the old tree
for n in new_tree:
if n.tag == ETREE_TAG('a'):
_seen.add(n)
- tree.insert(index+count, n)
+ tree.insert(index + count, n)
count += 1
# if we got a node to remove...
if node is not None:
@@ -252,15 +255,17 @@ def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_pre=False,
if new_tail != node.tail:
node.tail = ''
adj = replace_nodes(tree, new_tail, None,
- current_child+1)
- #insert the new nodes made from my tail into
+ current_child + 1)
+ # Insert the new nodes made from my tail into
# the tree right after me. current_child+1
children += adj
+ continue
new_tail = re.sub(url_re, link_repl, new_tail)
if new_tail != old_tail:
node.tail = ''
- adj = replace_nodes(tree, new_tail, None, current_child+1)
+ adj = replace_nodes(tree, new_tail, None,
+ current_child + 1)
children += adj
if node.tag == ETREE_TAG('a') and not (node in _seen):
@@ -342,7 +347,7 @@ def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_pre=False,
link = apply_callbacks(link, True)
if link is None:
- return url
+ return '(' * open_brackets + url + ')' * close_brackets
_text = link.pop('_text')
_href = link.pop('href')
@@ -373,5 +378,6 @@ def _serialize(domtree):
walker = html5lib.treewalkers.getTreeWalker('etree')
stream = walker(domtree)
serializer = HTMLSerializer(quote_attr_values=True,
+ alphabetical_attributes=True,
omit_optional_tags=False)
return serializer.render(stream)
diff --git a/bleach/callbacks.py b/bleach/callbacks.py
index 227f089..3cb82c2 100644
--- a/bleach/callbacks.py
+++ b/bleach/callbacks.py
@@ -6,7 +6,7 @@ def nofollow(attrs, new=False):
if attrs['href'].startswith('mailto:'):
return attrs
rel = [x for x in attrs.get('rel', '').split(' ') if x]
- if not 'nofollow' in [x.lower() for x in rel]:
+ if 'nofollow' not in [x.lower() for x in rel]:
rel.append('nofollow')
attrs['rel'] = ' '.join(rel)
diff --git a/bleach/sanitizer.py b/bleach/sanitizer.py
index 88246f8..eec6659 100644
--- a/bleach/sanitizer.py
+++ b/bleach/sanitizer.py
@@ -49,7 +49,7 @@ class BleachSanitizerMixin(HTMLSanitizerMixin):
if callable(allowed_attributes)
else name in allowed_attributes)])
for attr in self.attr_val_is_uri:
- if not attr in attrs:
+ if attr not in attrs:
continue
val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
unescape(attrs[attr])).lower()
diff --git a/bleach/tests/test_links.py b/bleach/tests/test_links.py
index abf889d..62da8d1 100644
--- a/bleach/tests/test_links.py
+++ b/bleach/tests/test_links.py
@@ -7,7 +7,6 @@ from html5lib.tokenizer import HTMLTokenizer
from nose.tools import eq_
from bleach import linkify, url_re, DEFAULT_CALLBACKS as DC
-from bleach.tests.tools import in_
def test_url_re():
@@ -23,34 +22,24 @@ def test_empty():
def test_simple_link():
- in_(('a <a href="http://example.com" rel="nofollow">http://example.com'
+ eq_('a <a href="http://example.com" rel="nofollow">http://example.com'
'</a> link',
- 'a <a rel="nofollow" href="http://example.com">http://example.com'
- '</a> link'),
linkify('a http://example.com link'))
- in_(('a <a href="https://example.com" rel="nofollow">https://example.com'
+ eq_('a <a href="https://example.com" rel="nofollow">https://example.com'
'</a> link',
- 'a <a rel="nofollow" href="https://example.com">https://example.com'
- '</a> link'),
linkify('a https://example.com link'))
- in_(('a <a href="http://example.com" rel="nofollow">example.com</a> link',
- 'a <a rel="nofollow" href="http://example.com">example.com</a> link'),
+ eq_('a <a href="http://example.com" rel="nofollow">example.com</a> link',
linkify('a example.com link'))
def test_trailing_slash():
- in_(('<a href="http://examp.com/" rel="nofollow">http://examp.com/</a>',
- '<a rel="nofollow" href="http://examp.com/">http://examp.com/</a>'),
+ eq_('<a href="http://examp.com/" rel="nofollow">http://examp.com/</a>',
linkify('http://examp.com/'))
- in_(('<a href="http://example.com/foo/" rel="nofollow">'
- 'http://example.com/foo/</a>',
- '<a rel="nofollow" href="http://example.com/foo/">'
- 'http://example.com/foo/</a>'),
+ eq_('<a href="http://example.com/foo/" rel="nofollow">'
+ 'http://example.com/foo/</a>',
linkify('http://example.com/foo/'))
- in_(('<a href="http://example.com/foo/bar/" rel="nofollow">'
- 'http://example.com/foo/bar/</a>',
- '<a rel="nofollow" href="http://example.com/foo/bar/">'
- 'http://example.com/foo/bar/</a>'),
+ eq_('<a href="http://example.com/foo/bar/" rel="nofollow">'
+ 'http://example.com/foo/bar/</a>',
linkify('http://example.com/foo/bar/'))
@@ -61,10 +50,8 @@ def test_mangle_link():
attrs['href'] = 'http://bouncer/?u={0!s}'.format(quoted)
return attrs
- in_(('<a href="http://bouncer/?u=http%3A%2F%2Fexample.com" rel="nofollow">'
- 'http://example.com</a>',
- '<a rel="nofollow" href="http://bouncer/?u=http%3A%2F%2Fexample.com">'
- 'http://example.com</a>'),
+ eq_('<a href="http://bouncer/?u=http%3A%2F%2Fexample.com" rel="nofollow">'
+ 'http://example.com</a>',
linkify('http://example.com', DC + [filter_url]))
@@ -90,19 +77,18 @@ def test_email_link():
'james@example.com.au</a> mailto', True,
'aussie james@example.com.au mailto'),
# This is kind of a pathological case. I guess we do our best here.
- (('email to <a href="james@example.com" rel="nofollow">'
- 'james@example.com</a>',
- 'email to <a rel="nofollow" href="james@example.com">'
- 'james@example.com</a>'),
+ ('email to <a href="james@example.com" rel="nofollow">'
+ 'james@example.com</a>',
True,
'email to <a href="james@example.com">james@example.com</a>'),
+ ('<br><a href="mailto:jinkyun@example.com">'
+ 'jinkyun@example.com</a>',
+ True,
+ '<br>jinkyun@example.com'),
)
def _check(o, p, i):
- if isinstance(o, (list, tuple)):
- in_(o, linkify(i, parse_email=p))
- else:
- eq_(o, linkify(i, parse_email=p))
+ eq_(o, linkify(i, parse_email=p))
for (o, p, i) in tests:
yield _check, o, p, i
@@ -171,8 +157,7 @@ def test_set_attrs():
attrs['rev'] = 'canonical'
return attrs
- in_(('<a href="http://ex.mp" rev="canonical">ex.mp</a>',
- '<a rev="canonical" href="http://ex.mp">ex.mp</a>'),
+ eq_('<a href="http://ex.mp" rev="canonical">ex.mp</a>',
linkify('ex.mp', [set_attr]))
@@ -200,19 +185,19 @@ def test_stop_email():
def test_tlds():
- in_(('<a href="http://example.com" rel="nofollow">example.com</a>',
- '<a rel="nofollow" href="http://example.com">example.com</a>'),
+ eq_('<a href="http://example.com" rel="nofollow">example.com</a>',
linkify('example.com'))
- in_(('<a href="http://example.co.uk" rel="nofollow">example.co.uk</a>',
- '<a rel="nofollow" href="http://example.co.uk">example.co.uk</a>'),
+ eq_('<a href="http://example.co" rel="nofollow">example.co</a>',
+ linkify('example.co'))
+ eq_('<a href="http://example.co.uk" rel="nofollow">example.co.uk</a>',
linkify('example.co.uk'))
- in_(('<a href="http://example.edu" rel="nofollow">example.edu</a>',
- '<a rel="nofollow" href="http://example.edu">example.edu</a>'),
+ eq_('<a href="http://example.edu" rel="nofollow">example.edu</a>',
linkify('example.edu'))
- eq_('example.xxx', linkify('example.xxx'))
+ eq_('<a href="http://example.xxx" rel="nofollow">example.xxx</a>',
+ linkify('example.xxx'))
+ eq_('example.yyy', linkify('example.yyy'))
eq_(' brie', linkify(' brie'))
- in_(('<a href="http://bit.ly/fun" rel="nofollow">bit.ly/fun</a>',
- '<a rel="nofollow" href="http://bit.ly/fun">bit.ly/fun</a>'),
+ eq_('<a href="http://bit.ly/fun" rel="nofollow">bit.ly/fun</a>',
linkify('bit.ly/fun'))
@@ -226,77 +211,58 @@ def test_nofollow_off():
def test_link_in_html():
- in_(('<i><a href="http://yy.com" rel="nofollow">http://yy.com</a></i>',
- '<i><a rel="nofollow" href="http://yy.com">http://yy.com</a></i>'),
+ eq_('<i><a href="http://yy.com" rel="nofollow">http://yy.com</a></i>',
linkify('<i>http://yy.com</i>'))
- in_(('<em><strong><a href="http://xx.com" rel="nofollow">http://xx.com'
- '</a></strong></em>',
- '<em><strong><a rel="nofollow" href="http://xx.com">http://xx.com'
- '</a></strong></em>'),
+ eq_('<em><strong><a href="http://xx.com" rel="nofollow">http://xx.com'
+ '</a></strong></em>',
linkify('<em><strong>http://xx.com</strong></em>'))
def test_links_https():
- in_(('<a href="https://yy.com" rel="nofollow">https://yy.com</a>',
- '<a rel="nofollow" href="https://yy.com">https://yy.com</a>'),
+ eq_('<a href="https://yy.com" rel="nofollow">https://yy.com</a>',
linkify('https://yy.com'))
def test_add_rel_nofollow():
"""Verify that rel="nofollow" is added to an existing link"""
- in_(('<a href="http://yy.com" rel="nofollow">http://yy.com</a>',
- '<a rel="nofollow" href="http://yy.com">http://yy.com</a>'),
+ eq_('<a href="http://yy.com" rel="nofollow">http://yy.com</a>',
linkify('<a href="http://yy.com">http://yy.com</a>'))
def test_url_with_path():
- in_(('<a href="http://example.com/path/to/file" rel="nofollow">'
- 'http://example.com/path/to/file</a>',
- '<a rel="nofollow" href="http://example.com/path/to/file">'
- 'http://example.com/path/to/file</a>'),
+ eq_('<a href="http://example.com/path/to/file" rel="nofollow">'
+ 'http://example.com/path/to/file</a>',
linkify('http://example.com/path/to/file'))
def test_link_ftp():
- in_(('<a href="ftp://ftp.mozilla.org/some/file" rel="nofollow">'
- 'ftp://ftp.mozilla.org/some/file</a>',
- '<a rel="nofollow" href="ftp://ftp.mozilla.org/some/file">'
- 'ftp://ftp.mozilla.org/some/file</a>'),
+ eq_('<a href="ftp://ftp.mozilla.org/some/file" rel="nofollow">'
+ 'ftp://ftp.mozilla.org/some/file</a>',
linkify('ftp://ftp.mozilla.org/some/file'))
def test_link_query():
- in_(('<a href="http://xx.com/?test=win" rel="nofollow">'
+ eq_('<a href="http://xx.com/?test=win" rel="nofollow">'
'http://xx.com/?test=win</a>',
- '<a rel="nofollow" href="http://xx.com/?test=win">'
- 'http://xx.com/?test=win</a>'),
linkify('http://xx.com/?test=win'))
- in_(('<a href="http://xx.com/?test=win" rel="nofollow">'
+ eq_('<a href="http://xx.com/?test=win" rel="nofollow">'
'xx.com/?test=win</a>',
- '<a rel="nofollow" href="http://xx.com/?test=win">'
- 'xx.com/?test=win</a>'),
linkify('xx.com/?test=win'))
- in_(('<a href="http://xx.com?test=win" rel="nofollow">'
+ eq_('<a href="http://xx.com?test=win" rel="nofollow">'
'xx.com?test=win</a>',
- '<a rel="nofollow" href="http://xx.com?test=win">'
- 'xx.com?test=win</a>'),
linkify('xx.com?test=win'))
def test_link_fragment():
- in_(('<a href="http://xx.com/path#frag" rel="nofollow">'
- 'http://xx.com/path#frag</a>',
- '<a rel="nofollow" href="http://xx.com/path#frag">'
- 'http://xx.com/path#frag</a>'),
+ eq_('<a href="http://xx.com/path#frag" rel="nofollow">'
+ 'http://xx.com/path#frag</a>',
linkify('http://xx.com/path#frag'))
def test_link_entities():
- in_(('<a href="http://xx.com/?a=1&amp;b=2" rel="nofollow">'
+ eq_('<a href="http://xx.com/?a=1&amp;b=2" rel="nofollow">'
'http://xx.com/?a=1&amp;b=2</a>',
- '<a rel="nofollow" href="http://xx.com/?a=1&amp;b=2">'
- 'http://xx.com/?a=1&amp;b=2</a>'),
linkify('http://xx.com/?a=1&b=2'))
@@ -307,12 +273,9 @@ def test_escaped_html():
def test_link_http_complete():
- in_(('<a href="https://user:pass@ftp.mozilla.org/x/y.exe?a=b&amp;c=d'
+ eq_('<a href="https://user:pass@ftp.mozilla.org/x/y.exe?a=b&amp;c=d'
'&amp;e#f" rel="nofollow">'
'https://user:pass@ftp.mozilla.org/x/y.exe?a=b&amp;c=d&amp;e#f</a>',
- '<a rel="nofollow" href="https://user:pass@ftp.mozilla.org/x/'
- 'y.exe?a=b&amp;c=d&amp;e#f">'
- 'https://user:pass@ftp.mozilla.org/x/y.exe?a=b&amp;c=d&amp;e#f</a>'),
linkify('https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f'))
@@ -330,10 +293,8 @@ def test_javascript_url():
def test_unsafe_url():
"""Any unsafe char ({}[]<>, etc.) in the path should end URL scanning."""
- in_(('All your{"<a href="http://xx.yy.com/grover.png" '
- 'rel="nofollow">xx.yy.com/grover.png</a>"}base are',
- 'All your{"<a rel="nofollow" href="http://xx.yy.com/grover.png"'
- '>xx.yy.com/grover.png</a>"}base are'),
+ eq_('All your{"<a href="http://xx.yy.com/grover.png" '
+ 'rel="nofollow">xx.yy.com/grover.png</a>"}base are',
linkify('All your{"xx.yy.com/grover.png"}base are'))
@@ -341,23 +302,17 @@ def test_skip_pre():
"""Skip linkification in <pre> tags."""
simple = 'http://xx.com <pre>http://xx.com</pre>'
linked = ('<a href="http://xx.com" rel="nofollow">http://xx.com</a> '
- '<pre>http://xx.com</pre>',
- '<a rel="nofollow" href="http://xx.com">http://xx.com</a> '
'<pre>http://xx.com</pre>')
all_linked = ('<a href="http://xx.com" rel="nofollow">http://xx.com</a> '
'<pre><a href="http://xx.com" rel="nofollow">http://xx.com'
- '</a></pre>',
- '<a rel="nofollow" href="http://xx.com">http://xx.com</a> '
- '<pre><a rel="nofollow" href="http://xx.com">http://xx.com'
'</a></pre>')
- in_(linked, linkify(simple, skip_pre=True))
- in_(all_linked, linkify(simple))
+ eq_(linked, linkify(simple, skip_pre=True))
+ eq_(all_linked, linkify(simple))
already_linked = '<pre><a href="http://xx.com">xx</a></pre>'
- nofollowed = ('<pre><a href="http://xx.com" rel="nofollow">xx</a></pre>',
- '<pre><a rel="nofollow" href="http://xx.com">xx</a></pre>')
- in_(nofollowed, linkify(already_linked))
- in_(nofollowed, linkify(already_linked, skip_pre=True))
+ nofollowed = '<pre><a href="http://xx.com" rel="nofollow">xx</a></pre>'
+ eq_(nofollowed, linkify(already_linked))
+ eq_(nofollowed, linkify(already_linked, skip_pre=True))
def test_libgl():
@@ -367,12 +322,11 @@ def test_libgl():
def test_end_of_sentence():
"""example.com. should match."""
- outs = ('<a href="http://{0!s}" rel="nofollow">{0!s}</a>{1!s}',
- '<a rel="nofollow" href="http://{0!s}">{0!s}</a>{1!s}')
+ out = '<a href="http://{0!s}" rel="nofollow">{0!s}</a>{1!s}'
intxt = '{0!s}{1!s}'
def check(u, p):
- in_([out.format(u, p) for out in outs],
+ eq_(out.format(u, p),
linkify(intxt.format(u, p)))
tests = (
@@ -388,8 +342,7 @@ def test_end_of_sentence():
def test_end_of_clause():
"""example.com/foo, shouldn't include the ,"""
- in_(('<a href="http://ex.com/foo" rel="nofollow">ex.com/foo</a>, bar',
- '<a rel="nofollow" href="http://ex.com/foo">ex.com/foo</a>, bar'),
+ eq_('<a href="http://ex.com/foo" rel="nofollow">ex.com/foo</a>, bar',
linkify('ex.com/foo, bar'))
@@ -402,8 +355,7 @@ def test_sarcasm():
def test_wrapping_parentheses():
"""URLs wrapped in parantheses should not include them."""
- outs = ('{0!s}<a href="http://{1!s}" rel="nofollow">{2!s}</a>{3!s}',
- '{0!s}<a rel="nofollow" href="http://{1!s}">{2!s}</a>{3!s}')
+ out = '{0!s}<a href="http://{1!s}" rel="nofollow">{2!s}</a>{3!s}'
tests = (
('(example.com)', ('(', 'example.com', 'example.com', ')')),
@@ -431,12 +383,17 @@ def test_wrapping_parentheses():
)
def check(test, expected_output):
- in_([o.format(*expected_output) for o in outs], linkify(test))
+ eq_(out.format(*expected_output), linkify(test))
for test, expected_output in tests:
yield check, test, expected_output
+def test_parentheses_with_removing():
+ expect = '(test.py)'
+ eq_(expect, linkify(expect, callbacks=[lambda *a: None]))
+
+
def test_ports():
"""URLs can contain port numbers."""
tests = (
@@ -448,9 +405,8 @@ def test_ports():
)
def check(test, output):
- outs = ('<a href="{0}" rel="nofollow">{0}</a>{1}',
- '<a rel="nofollow" href="{0}">{0}</a>{1}')
- in_([out.format(*output) for out in outs],
+ out = '<a href="{0}" rel="nofollow">{0}</a>{1}'
+ eq_(out.format(*output),
linkify(test))
for test, output in tests:
@@ -467,8 +423,7 @@ def test_tokenizer():
def test_ignore_bad_protocols():
eq_('foohttp://bar',
linkify('foohttp://bar'))
- in_(('fohttp://<a href="http://exampl.com" rel="nofollow">exampl.com</a>',
- 'fohttp://<a rel="nofollow" href="http://exampl.com">exampl.com</a>'),
+ eq_('fohttp://<a href="http://exampl.com" rel="nofollow">exampl.com</a>',
linkify('fohttp://exampl.com'))
@@ -482,28 +437,28 @@ def test_link_emails_and_urls():
"""parse_email=True shouldn't prevent URLs from getting linkified."""
output = ('<a href="http://example.com" rel="nofollow">'
'http://example.com</a> <a href="mailto:person@example.com">'
- 'person@example.com</a>',
- '<a rel="nofollow" href="http://example.com">'
- 'http://example.com</a> <a href="mailto:person@example.com">'
'person@example.com</a>')
- in_(output, linkify('http://example.com person@example.com',
+ eq_(output, linkify('http://example.com person@example.com',
parse_email=True))
def test_links_case_insensitive():
"""Protocols and domain names are case insensitive."""
expect = ('<a href="HTTP://EXAMPLE.COM" rel="nofollow">'
- 'HTTP://EXAMPLE.COM</a>',
- '<a rel="nofollow" href="HTTP://EXAMPLE.COM">'
'HTTP://EXAMPLE.COM</a>')
- in_(expect, linkify('HTTP://EXAMPLE.COM'))
+ eq_(expect, linkify('HTTP://EXAMPLE.COM'))
def test_elements_inside_links():
- in_(('<a href="#" rel="nofollow">hello<br></a>',
- '<a rel="nofollow" href="#">hello<br></a>'),
+ eq_('<a href="#" rel="nofollow">hello<br></a>',
linkify('<a href="#">hello<br></a>'))
- in_(('<a href="#" rel="nofollow"><strong>bold</strong> hello<br></a>',
- '<a rel="nofollow" href="#"><strong>bold</strong> hello<br></a>'),
+ eq_('<a href="#" rel="nofollow"><strong>bold</strong> hello<br></a>',
linkify('<a href="#"><strong>bold</strong> hello<br></a>'))
+
+
+def test_remove_first_childlink():
+ expect = '<p>something</p>'
+ callbacks = [lambda *a: None]
+ eq_(expect,
+ linkify('<p><a href="/foo">something</a></p>', callbacks=callbacks))
diff --git a/bleach/tests/test_unicode.py b/bleach/tests/test_unicode.py
index 796924d..723df5f 100644
--- a/bleach/tests/test_unicode.py
+++ b/bleach/tests/test_unicode.py
@@ -30,9 +30,9 @@ def test_mixed():
def test_mixed_linkify():
in_(('Домашняя <a href="http://example.com" rel="nofollow">'
- 'http://example.com</a> ヘルプとチュートリアル',
- 'Домашняя <a rel="nofollow" href="http://example.com">'
- 'http://example.com</a> ヘルプとチュートリアル'),
+ 'http://example.com</a> ヘルプとチュートリアル',
+ 'Домашняя <a rel="nofollow" href="http://example.com">'
+ 'http://example.com</a> ヘルプとチュートリアル'),
linkify('Домашняя http://example.com ヘルプとチュートリアル'))
diff --git a/bleach/tests/tools.py b/bleach/tests/tools.py
index 87f926c..3ae047e 100644
--- a/bleach/tests/tools.py
+++ b/bleach/tests/tools.py
@@ -3,5 +3,5 @@
def in_(l, a, msg=None):
"""Shorthand for 'assert a in l, "%r not in %r" % (a, l)
"""
- if not a in l:
+ if a not in l:
raise AssertionError(msg or "%r not in %r" % (a, l))
diff --git a/docs/conf.py b/docs/conf.py
index 96b2fc8..78bee32 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -41,16 +41,16 @@ master_doc = 'index'
# General information about the project.
project = u'Bleach'
-copyright = u'2012, James Socol'
+copyright = u'2012-2104, James Socol'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
-version = '1.3'
+version = '1.4'
# The full version, including alpha/beta/rc tags.
-release = '1.3.1'
+release = '1.4.1'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
diff --git a/docs/goals.rst b/docs/goals.rst
index 5477f9c..d62d54b 100644
--- a/docs/goals.rst
+++ b/docs/goals.rst
@@ -66,6 +66,9 @@ non-goal use cases include:
you have to allow so many tags that a blacklist approach (e.g. forbidding
``<script>`` or ``<object>``) may be more appropriate.
+* **Removing *all* HTML.** There are much faster tools available if you want to
+ remove or escape all HTML from a document.
+
* **Cleaning up after trusted users.** Bleach is powerful but it is not fast.
If you trust your users, trust them and don't rely on Bleach to clean up
their mess.
diff --git a/requirements.txt b/requirements.txt
index d6e9357..a4c0b99 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
+ordereddict
six
html5lib>=0.999
# Requirements to run the test suite:
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..81cd366
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,2 @@
+[flake8]
+ignore = E731,W503
diff --git a/setup.py b/setup.py
index 6d5cfb4..e1efc93 100644
--- a/setup.py
+++ b/setup.py
@@ -1,8 +1,22 @@
from setuptools import setup, find_packages
+install_requires = [
+ 'six',
+ 'html5lib>=0.999',
+]
+
+try:
+ from collections import OrderedDict # noqa
+except ImportError:
+ # We don't use ordereddict, but html5lib does when you request
+ # alpha-sorted attributes and on Python 2.6 and it doesn't specify it
+ # as a dependency (see
+ # https://github.com/html5lib/html5lib-python/pull/177)
+ install_requires.append('ordereddict')
+
setup(
name='bleach',
- version='1.4',
+ version='1.4.2',
description='An easy whitelist-based HTML-sanitizing tool.',
long_description=open('README.rst').read(),
author='James Socol',
@@ -13,14 +27,14 @@ setup(
include_package_data=True,
package_data={'': ['README.rst']},
zip_safe=False,
- install_requires=[
- 'six',
- 'html5lib>=0.999',
+ install_requires=install_requires,
+ tests_require=[
+ 'nose>=1.3',
],
+ test_suite='nose.collector',
classifiers=[
'Development Status :: 5 - Production/Stable',
'Environment :: Web Environment',
- 'Environment :: Web Environment :: Mozilla',
'Intended Audience :: Developers',
'License :: OSI Approved :: Apache Software License',
'Operating System :: OS Independent',
@@ -31,6 +45,7 @@ setup(
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.2',
'Programming Language :: Python :: 3.3',
+ 'Programming Language :: Python :: 3.4',
'Topic :: Software Development :: Libraries :: Python Modules',
]
)
diff --git a/tox.ini b/tox.ini
index 4d8e5f6..5d4fe51 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,9 +4,11 @@
# and then run "tox" from this directory.
[tox]
-envlist = py26, py27, py32, py33, pypy
+envlist = py26, py27, py32, py33, py34, pypy
[testenv]
commands = nosetests {posargs:-v}
deps =
+ six
+ html5lib==0.999
nose