summaryrefslogtreecommitdiff
path: root/bleach/tests
diff options
context:
space:
mode:
Diffstat (limited to 'bleach/tests')
-rw-r--r--bleach/tests/test_basics.py65
-rw-r--r--bleach/tests/test_css.py13
-rw-r--r--bleach/tests/test_links.py241
-rw-r--r--bleach/tests/test_security.py12
-rw-r--r--bleach/tests/test_unicode.py47
-rw-r--r--bleach/tests/tools.py7
6 files changed, 245 insertions, 140 deletions
diff --git a/bleach/tests/test_basics.py b/bleach/tests/test_basics.py
index 9eca687..822407f 100644
--- a/bleach/tests/test_basics.py
+++ b/bleach/tests/test_basics.py
@@ -1,7 +1,9 @@
+import six
import html5lib
from nose.tools import eq_
import bleach
+from bleach.tests.tools import in_
def test_empty():
@@ -9,7 +11,12 @@ def test_empty():
def test_nbsp():
- eq_(u'\xa0test string\xa0', bleach.clean(' test string '))
+ if six.PY3:
+ expected = '\xa0test string\xa0'
+ else:
+ expected = six.u('\\xa0test string\\xa0')
+
+ eq_(expected, bleach.clean(' test string '))
def test_comments_only():
@@ -18,8 +25,8 @@ def test_comments_only():
eq_('', bleach.clean(comment))
eq_('', bleach.clean(open_comment))
eq_(comment, bleach.clean(comment, strip_comments=False))
- eq_('%s-->' % open_comment, bleach.clean(open_comment,
- strip_comments=False))
+ eq_('{0!s}-->'.format(open_comment), bleach.clean(open_comment,
+ strip_comments=False))
def test_with_comments():
@@ -55,9 +62,11 @@ def test_function_arguments():
def test_named_arguments():
ATTRS = {'a': ['rel', 'href']}
- s = u'<a href="http://xx.com" rel="alternate">xx.com</a>'
- eq_('<a href="http://xx.com">xx.com</a>', bleach.clean(s))
- eq_(s, bleach.clean(s, attributes=ATTRS))
+ s = ('<a href="http://xx.com" rel="alternate">xx.com</a>',
+ '<a rel="alternate" href="http://xx.com">xx.com</a>')
+
+ eq_('<a href="http://xx.com">xx.com</a>', bleach.clean(s[0]))
+ in_(s, bleach.clean(s[0], attributes=ATTRS))
def test_disallowed_html():
@@ -81,19 +90,19 @@ def test_bare_entities():
def test_escaped_entities():
- s = u'&lt;em&gt;strong&lt;/em&gt;'
+ s = '&lt;em&gt;strong&lt;/em&gt;'
eq_(s, bleach.clean(s))
def test_serializer():
- s = u'<table></table>'
+ s = '<table></table>'
eq_(s, bleach.clean(s, tags=['table']))
- eq_(u'test<table></table>', bleach.linkify(u'<table>test</table>'))
- eq_(u'<p>test</p>', bleach.clean(u'<p>test</p>', tags=['p']))
+ eq_('test<table></table>', bleach.linkify('<table>test</table>'))
+ eq_('<p>test</p>', bleach.clean('<p>test</p>', tags=['p']))
def test_no_href_links():
- s = u'<a name="anchor">x</a>'
+ s = '<a name="anchor">x</a>'
eq_(s, bleach.linkify(s))
@@ -112,7 +121,7 @@ def test_stripping():
bleach.clean('a test <em>with</em> <b>html</b> tags', strip=True))
eq_('a test <em>with</em> <b>html</b> tags',
bleach.clean('a test <em>with</em> <img src="http://example.com/"> '
- '<b>html</b> tags', strip=True))
+ '<b>html</b> tags', strip=True))
s = '<p><a href="http://example.com/">link text</a></p>'
eq_('<p>link text</p>', bleach.clean(s, tags=['p'], strip=True))
@@ -138,7 +147,7 @@ def test_allowed_styles():
def test_idempotent():
"""Make sure that applying the filter twice doesn't change anything."""
- dirty = u'<span>invalid & </span> < extra http://link.com<em>'
+ dirty = '<span>invalid & </span> < extra http://link.com<em>'
clean = bleach.clean(dirty)
eq_(clean, bleach.clean(clean))
@@ -147,10 +156,23 @@ def test_idempotent():
eq_(linked, bleach.linkify(linked))
+def test_rel_already_there():
+ """Make sure rel attribute is updated not replaced"""
+ linked = ('Click <a href="http://example.com" rel="tooltip">'
+ 'here</a>.')
+ link_good = (('Click <a href="http://example.com" rel="tooltip nofollow">'
+ 'here</a>.'),
+ ('Click <a rel="tooltip nofollow" href="http://example.com">'
+ 'here</a>.'))
+
+ in_(link_good, bleach.linkify(linked))
+ in_(link_good, bleach.linkify(link_good[0]))
+
+
def test_lowercase_html():
"""We should output lowercase HTML."""
- dirty = u'<EM CLASS="FOO">BAR</EM>'
- clean = u'<em class="FOO">BAR</em>'
+ dirty = '<EM CLASS="FOO">BAR</EM>'
+ clean = '<em class="FOO">BAR</em>'
eq_(clean, bleach.clean(dirty, attributes=['class']))
@@ -160,14 +182,15 @@ def test_wildcard_attributes():
'img': ['src'],
}
TAG = ['img', 'em']
- dirty = (u'both <em id="foo" style="color: black">can</em> have '
- u'<img id="bar" src="foo"/>')
- clean = u'both <em id="foo">can</em> have <img id="bar" src="foo">'
- eq_(clean, bleach.clean(dirty, tags=TAG, attributes=ATTR))
+ dirty = ('both <em id="foo" style="color: black">can</em> have '
+ '<img id="bar" src="foo"/>')
+ clean = ('both <em id="foo">can</em> have <img src="foo" id="bar">',
+ 'both <em id="foo">can</em> have <img id="bar" src="foo">')
+ in_(clean, bleach.clean(dirty, tags=TAG, attributes=ATTR))
def test_sarcasm():
"""Jokes should crash.<sarcasm/>"""
- dirty = u'Yeah right <sarcasm/>'
- clean = u'Yeah right &lt;sarcasm/&gt;'
+ dirty = 'Yeah right <sarcasm/>'
+ clean = 'Yeah right &lt;sarcasm/&gt;'
eq_(clean, bleach.clean(dirty))
diff --git a/bleach/tests/test_css.py b/bleach/tests/test_css.py
index 588c8ce..b40596f 100644
--- a/bleach/tests/test_css.py
+++ b/bleach/tests/test_css.py
@@ -29,14 +29,14 @@ def test_allowed_css():
('font-family: "Arial";', 'font-family: "Arial";', ['font-family']),
)
- p_single = '<p style="%s">bar</p>'
- p_double = "<p style='%s'>bar</p>"
+ p_single = '<p style="{0!s}">bar</p>'
+ p_double = "<p style='{0!s}'>bar</p>"
def check(i, o, s):
if '"' in i:
- eq_(p_double % o, clean(p_double % i, styles=s))
+ eq_(p_double.format(o), clean(p_double.format(i), styles=s))
else:
- eq_(p_single % o, clean(p_single % i, styles=s))
+ eq_(p_single.format(o), clean(p_single.format(i), styles=s))
for i, o, s in tests:
yield check, i, o, s
@@ -70,12 +70,13 @@ def test_style_hang():
"""font: normal normal normal 100%/normal 'Courier New', """
"""'Andale Mono', monospace; background-position: initial """
"""initial; background-repeat: initial initial;""")
- html = '<p style="%s">Hello world</p>' % style
+ html = '<p style="{0!s}">Hello world</p>'.format(style)
styles = [
'border', 'float', 'overflow', 'min-height', 'vertical-align',
'white-space',
'margin', 'margin-left', 'margin-top', 'margin-bottom', 'margin-right',
- 'padding', 'padding-left', 'padding-top', 'padding-bottom', 'padding-right',
+ 'padding', 'padding-left', 'padding-top', 'padding-bottom',
+ 'padding-right',
'background',
'background-color',
'font', 'font-size', 'font-weight', 'text-align', 'text-transform',
diff --git a/bleach/tests/test_links.py b/bleach/tests/test_links.py
index ac593c4..abf889d 100644
--- a/bleach/tests/test_links.py
+++ b/bleach/tests/test_links.py
@@ -1,18 +1,20 @@
-import urllib
+try:
+ from urllib.parse import quote_plus
+except ImportError:
+ from urllib import quote_plus
from html5lib.tokenizer import HTMLTokenizer
from nose.tools import eq_
from bleach import linkify, url_re, DEFAULT_CALLBACKS as DC
-
-
+from bleach.tests.tools import in_
def test_url_re():
def no_match(s):
match = url_re.search(s)
if match:
- assert not match, 'matched %s' % s[slice(*match.span())]
+ assert not match, 'matched {0!s}'.format(s[slice(*match.span())])
yield no_match, 'just what i am looking for...it'
@@ -21,36 +23,48 @@ def test_empty():
def test_simple_link():
- eq_('a <a href="http://example.com" rel="nofollow">http://example.com'
+ in_(('a <a href="http://example.com" rel="nofollow">http://example.com'
'</a> link',
+ 'a <a rel="nofollow" href="http://example.com">http://example.com'
+ '</a> link'),
linkify('a http://example.com link'))
- eq_('a <a href="https://example.com" rel="nofollow">https://example.com'
+ in_(('a <a href="https://example.com" rel="nofollow">https://example.com'
'</a> link',
+ 'a <a rel="nofollow" href="https://example.com">https://example.com'
+ '</a> link'),
linkify('a https://example.com link'))
- eq_('an <a href="http://example.com" rel="nofollow">example.com</a> link',
- linkify('an example.com link'))
+ in_(('a <a href="http://example.com" rel="nofollow">example.com</a> link',
+ 'a <a rel="nofollow" href="http://example.com">example.com</a> link'),
+ linkify('a example.com link'))
def test_trailing_slash():
- eq_('<a href="http://example.com/" rel="nofollow">http://example.com/</a>',
- linkify('http://example.com/'))
- eq_('<a href="http://example.com/foo/" rel="nofollow">'
- 'http://example.com/foo/</a>',
- linkify('http://example.com/foo/'))
- eq_('<a href="http://example.com/foo/bar/" rel="nofollow">'
- 'http://example.com/foo/bar/</a>',
- linkify('http://example.com/foo/bar/'))
+ in_(('<a href="http://examp.com/" rel="nofollow">http://examp.com/</a>',
+ '<a rel="nofollow" href="http://examp.com/">http://examp.com/</a>'),
+ linkify('http://examp.com/'))
+ in_(('<a href="http://example.com/foo/" rel="nofollow">'
+ 'http://example.com/foo/</a>',
+ '<a rel="nofollow" href="http://example.com/foo/">'
+ 'http://example.com/foo/</a>'),
+ linkify('http://example.com/foo/'))
+ in_(('<a href="http://example.com/foo/bar/" rel="nofollow">'
+ 'http://example.com/foo/bar/</a>',
+ '<a rel="nofollow" href="http://example.com/foo/bar/">'
+ 'http://example.com/foo/bar/</a>'),
+ linkify('http://example.com/foo/bar/'))
def test_mangle_link():
"""We can muck with the href attribute of the link."""
def filter_url(attrs, new=False):
- attrs['href'] = (u'http://bouncer/?u=%s' %
- urllib.quote_plus(attrs['href']))
+ quoted = quote_plus(attrs['href'])
+ attrs['href'] = 'http://bouncer/?u={0!s}'.format(quoted)
return attrs
- eq_('<a href="http://bouncer/?u=http%3A%2F%2Fexample.com" rel="nofollow">'
- 'http://example.com</a>',
+ in_(('<a href="http://bouncer/?u=http%3A%2F%2Fexample.com" rel="nofollow">'
+ 'http://example.com</a>',
+ '<a rel="nofollow" href="http://bouncer/?u=http%3A%2F%2Fexample.com">'
+ 'http://example.com</a>'),
linkify('http://example.com', DC + [filter_url]))
@@ -76,13 +90,19 @@ def test_email_link():
'james@example.com.au</a> mailto', True,
'aussie james@example.com.au mailto'),
# This is kind of a pathological case. I guess we do our best here.
- ('email to <a href="james@example.com" rel="nofollow">'
- 'james@example.com</a>', True,
- 'email to <a href="james@example.com">james@example.com</a>'),
+ (('email to <a href="james@example.com" rel="nofollow">'
+ 'james@example.com</a>',
+ 'email to <a rel="nofollow" href="james@example.com">'
+ 'james@example.com</a>'),
+ True,
+ 'email to <a href="james@example.com">james@example.com</a>'),
)
def _check(o, p, i):
- eq_(o, linkify(i, parse_email=p))
+ if isinstance(o, (list, tuple)):
+ in_(o, linkify(i, parse_email=p))
+ else:
+ eq_(o, linkify(i, parse_email=p))
for (o, p, i) in tests:
yield _check, o, p, i
@@ -151,7 +171,8 @@ def test_set_attrs():
attrs['rev'] = 'canonical'
return attrs
- eq_('<a href="http://ex.mp" rev="canonical">ex.mp</a>',
+ in_(('<a href="http://ex.mp" rev="canonical">ex.mp</a>',
+ '<a rev="canonical" href="http://ex.mp">ex.mp</a>'),
linkify('ex.mp', [set_attr]))
@@ -179,15 +200,19 @@ def test_stop_email():
def test_tlds():
- eq_('<a href="http://example.com" rel="nofollow">example.com</a>',
+ in_(('<a href="http://example.com" rel="nofollow">example.com</a>',
+ '<a rel="nofollow" href="http://example.com">example.com</a>'),
linkify('example.com'))
- eq_('<a href="http://example.co.uk" rel="nofollow">example.co.uk</a>',
+ in_(('<a href="http://example.co.uk" rel="nofollow">example.co.uk</a>',
+ '<a rel="nofollow" href="http://example.co.uk">example.co.uk</a>'),
linkify('example.co.uk'))
- eq_('<a href="http://example.edu" rel="nofollow">example.edu</a>',
+ in_(('<a href="http://example.edu" rel="nofollow">example.edu</a>',
+ '<a rel="nofollow" href="http://example.edu">example.edu</a>'),
linkify('example.edu'))
eq_('example.xxx', linkify('example.xxx'))
eq_(' brie', linkify(' brie'))
- eq_('<a href="http://bit.ly/fun" rel="nofollow">bit.ly/fun</a>',
+ in_(('<a href="http://bit.ly/fun" rel="nofollow">bit.ly/fun</a>',
+ '<a rel="nofollow" href="http://bit.ly/fun">bit.ly/fun</a>'),
linkify('bit.ly/fun'))
@@ -197,61 +222,81 @@ def test_escaping():
def test_nofollow_off():
eq_('<a href="http://example.com">example.com</a>',
- linkify(u'example.com', []))
+ linkify('example.com', []))
def test_link_in_html():
- eq_('<i><a href="http://yy.com" rel="nofollow">http://yy.com</a></i>',
+ in_(('<i><a href="http://yy.com" rel="nofollow">http://yy.com</a></i>',
+ '<i><a rel="nofollow" href="http://yy.com">http://yy.com</a></i>'),
linkify('<i>http://yy.com</i>'))
- eq_('<em><strong><a href="http://xx.com" rel="nofollow">http://xx.com</a>'
- '</strong></em>',
+
+ in_(('<em><strong><a href="http://xx.com" rel="nofollow">http://xx.com'
+ '</a></strong></em>',
+ '<em><strong><a rel="nofollow" href="http://xx.com">http://xx.com'
+ '</a></strong></em>'),
linkify('<em><strong>http://xx.com</strong></em>'))
def test_links_https():
- eq_('<a href="https://yy.com" rel="nofollow">https://yy.com</a>',
+ in_(('<a href="https://yy.com" rel="nofollow">https://yy.com</a>',
+ '<a rel="nofollow" href="https://yy.com">https://yy.com</a>'),
linkify('https://yy.com'))
def test_add_rel_nofollow():
"""Verify that rel="nofollow" is added to an existing link"""
- eq_('<a href="http://yy.com" rel="nofollow">http://yy.com</a>',
+ in_(('<a href="http://yy.com" rel="nofollow">http://yy.com</a>',
+ '<a rel="nofollow" href="http://yy.com">http://yy.com</a>'),
linkify('<a href="http://yy.com">http://yy.com</a>'))
def test_url_with_path():
- eq_('<a href="http://example.com/path/to/file" rel="nofollow">'
- 'http://example.com/path/to/file</a>',
+ in_(('<a href="http://example.com/path/to/file" rel="nofollow">'
+ 'http://example.com/path/to/file</a>',
+ '<a rel="nofollow" href="http://example.com/path/to/file">'
+ 'http://example.com/path/to/file</a>'),
linkify('http://example.com/path/to/file'))
def test_link_ftp():
- eq_('<a href="ftp://ftp.mozilla.org/some/file" rel="nofollow">'
- 'ftp://ftp.mozilla.org/some/file</a>',
+ in_(('<a href="ftp://ftp.mozilla.org/some/file" rel="nofollow">'
+ 'ftp://ftp.mozilla.org/some/file</a>',
+ '<a rel="nofollow" href="ftp://ftp.mozilla.org/some/file">'
+ 'ftp://ftp.mozilla.org/some/file</a>'),
linkify('ftp://ftp.mozilla.org/some/file'))
def test_link_query():
- eq_('<a href="http://xx.com/?test=win" rel="nofollow">'
+ in_(('<a href="http://xx.com/?test=win" rel="nofollow">'
'http://xx.com/?test=win</a>',
+ '<a rel="nofollow" href="http://xx.com/?test=win">'
+ 'http://xx.com/?test=win</a>'),
linkify('http://xx.com/?test=win'))
- eq_('<a href="http://xx.com/?test=win" rel="nofollow">'
+ in_(('<a href="http://xx.com/?test=win" rel="nofollow">'
'xx.com/?test=win</a>',
+ '<a rel="nofollow" href="http://xx.com/?test=win">'
+ 'xx.com/?test=win</a>'),
linkify('xx.com/?test=win'))
- eq_('<a href="http://xx.com?test=win" rel="nofollow">'
+ in_(('<a href="http://xx.com?test=win" rel="nofollow">'
'xx.com?test=win</a>',
+ '<a rel="nofollow" href="http://xx.com?test=win">'
+ 'xx.com?test=win</a>'),
linkify('xx.com?test=win'))
def test_link_fragment():
- eq_('<a href="http://xx.com/path#frag" rel="nofollow">'
- 'http://xx.com/path#frag</a>',
+ in_(('<a href="http://xx.com/path#frag" rel="nofollow">'
+ 'http://xx.com/path#frag</a>',
+ '<a rel="nofollow" href="http://xx.com/path#frag">'
+ 'http://xx.com/path#frag</a>'),
linkify('http://xx.com/path#frag'))
def test_link_entities():
- eq_('<a href="http://xx.com/?a=1&amp;b=2" rel="nofollow">'
+ in_(('<a href="http://xx.com/?a=1&amp;b=2" rel="nofollow">'
'http://xx.com/?a=1&amp;b=2</a>',
+ '<a rel="nofollow" href="http://xx.com/?a=1&amp;b=2">'
+ 'http://xx.com/?a=1&amp;b=2</a>'),
linkify('http://xx.com/?a=1&b=2'))
@@ -262,9 +307,12 @@ def test_escaped_html():
def test_link_http_complete():
- eq_('<a href="https://user:pass@ftp.mozilla.org/x/y.exe?a=b&amp;c=d'
+ in_(('<a href="https://user:pass@ftp.mozilla.org/x/y.exe?a=b&amp;c=d'
'&amp;e#f" rel="nofollow">'
'https://user:pass@ftp.mozilla.org/x/y.exe?a=b&amp;c=d&amp;e#f</a>',
+ '<a rel="nofollow" href="https://user:pass@ftp.mozilla.org/x/'
+ 'y.exe?a=b&amp;c=d&amp;e#f">'
+ 'https://user:pass@ftp.mozilla.org/x/y.exe?a=b&amp;c=d&amp;e#f</a>'),
linkify('https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f'))
@@ -282,8 +330,10 @@ def test_javascript_url():
def test_unsafe_url():
"""Any unsafe char ({}[]<>, etc.) in the path should end URL scanning."""
- eq_('All your{"<a href="http://xx.yy.com/grover.png" '
- 'rel="nofollow">xx.yy.com/grover.png</a>"}base are',
+ in_(('All your{"<a href="http://xx.yy.com/grover.png" '
+ 'rel="nofollow">xx.yy.com/grover.png</a>"}base are',
+ 'All your{"<a rel="nofollow" href="http://xx.yy.com/grover.png"'
+ '>xx.yy.com/grover.png</a>"}base are'),
linkify('All your{"xx.yy.com/grover.png"}base are'))
@@ -291,17 +341,23 @@ def test_skip_pre():
"""Skip linkification in <pre> tags."""
simple = 'http://xx.com <pre>http://xx.com</pre>'
linked = ('<a href="http://xx.com" rel="nofollow">http://xx.com</a> '
+ '<pre>http://xx.com</pre>',
+ '<a rel="nofollow" href="http://xx.com">http://xx.com</a> '
'<pre>http://xx.com</pre>')
all_linked = ('<a href="http://xx.com" rel="nofollow">http://xx.com</a> '
'<pre><a href="http://xx.com" rel="nofollow">http://xx.com'
+ '</a></pre>',
+ '<a rel="nofollow" href="http://xx.com">http://xx.com</a> '
+ '<pre><a rel="nofollow" href="http://xx.com">http://xx.com'
'</a></pre>')
- eq_(linked, linkify(simple, skip_pre=True))
- eq_(all_linked, linkify(simple))
+ in_(linked, linkify(simple, skip_pre=True))
+ in_(all_linked, linkify(simple))
already_linked = '<pre><a href="http://xx.com">xx</a></pre>'
- nofollowed = '<pre><a href="http://xx.com" rel="nofollow">xx</a></pre>'
- eq_(nofollowed, linkify(already_linked))
- eq_(nofollowed, linkify(already_linked, skip_pre=True))
+ nofollowed = ('<pre><a href="http://xx.com" rel="nofollow">xx</a></pre>',
+ '<pre><a rel="nofollow" href="http://xx.com">xx</a></pre>')
+ in_(nofollowed, linkify(already_linked))
+ in_(nofollowed, linkify(already_linked, skip_pre=True))
def test_libgl():
@@ -311,11 +367,13 @@ def test_libgl():
def test_end_of_sentence():
"""example.com. should match."""
- out = u'<a href="http://%s" rel="nofollow">%s</a>%s'
- in_ = u'%s%s'
+ outs = ('<a href="http://{0!s}" rel="nofollow">{0!s}</a>{1!s}',
+ '<a rel="nofollow" href="http://{0!s}">{0!s}</a>{1!s}')
+ intxt = '{0!s}{1!s}'
def check(u, p):
- eq_(out % (u, u, p), linkify(in_ % (u, p)))
+ in_([out.format(u, p) for out in outs],
+ linkify(intxt.format(u, p)))
tests = (
('example.com', '.'),
@@ -330,49 +388,50 @@ def test_end_of_sentence():
def test_end_of_clause():
"""example.com/foo, shouldn't include the ,"""
- eq_('<a href="http://ex.com/foo" rel="nofollow">ex.com/foo</a>, bar',
+ in_(('<a href="http://ex.com/foo" rel="nofollow">ex.com/foo</a>, bar',
+ '<a rel="nofollow" href="http://ex.com/foo">ex.com/foo</a>, bar'),
linkify('ex.com/foo, bar'))
def test_sarcasm():
"""Jokes should crash.<sarcasm/>"""
- dirty = u'Yeah right <sarcasm/>'
- clean = u'Yeah right &lt;sarcasm/&gt;'
+ dirty = 'Yeah right <sarcasm/>'
+ clean = 'Yeah right &lt;sarcasm/&gt;'
eq_(clean, linkify(dirty))
def test_wrapping_parentheses():
"""URLs wrapped in parantheses should not include them."""
- out = u'%s<a href="http://%s" rel="nofollow">%s</a>%s'
+ outs = ('{0!s}<a href="http://{1!s}" rel="nofollow">{2!s}</a>{3!s}',
+ '{0!s}<a rel="nofollow" href="http://{1!s}">{2!s}</a>{3!s}')
tests = (
- ('(example.com)', out % ('(', 'example.com', 'example.com', ')')),
- ('(example.com/)', out % ('(', 'example.com/', 'example.com/', ')')),
- ('(example.com/foo)', out % ('(', 'example.com/foo',
- 'example.com/foo', ')')),
- ('(((example.com/))))', out % ('(((', 'example.com/)',
- 'example.com/)', ')))')),
- ('example.com/))', out % ('', 'example.com/))',
- 'example.com/))', '')),
+ ('(example.com)', ('(', 'example.com', 'example.com', ')')),
+ ('(example.com/)', ('(', 'example.com/', 'example.com/', ')')),
+ ('(example.com/foo)', ('(', 'example.com/foo',
+ 'example.com/foo', ')')),
+ ('(((example.com/))))', ('(((', 'example.com/)',
+ 'example.com/)', ')))')),
+ ('example.com/))', ('', 'example.com/))', 'example.com/))', '')),
('http://en.wikipedia.org/wiki/Test_(assessment)',
- out % ('', 'en.wikipedia.org/wiki/Test_(assessment)',
- 'http://en.wikipedia.org/wiki/Test_(assessment)', '')),
+ ('', 'en.wikipedia.org/wiki/Test_(assessment)',
+ 'http://en.wikipedia.org/wiki/Test_(assessment)', '')),
('(http://en.wikipedia.org/wiki/Test_(assessment))',
- out % ('(', 'en.wikipedia.org/wiki/Test_(assessment)',
- 'http://en.wikipedia.org/wiki/Test_(assessment)', ')')),
+ ('(', 'en.wikipedia.org/wiki/Test_(assessment)',
+ 'http://en.wikipedia.org/wiki/Test_(assessment)', ')')),
('((http://en.wikipedia.org/wiki/Test_(assessment))',
- out % ('((', 'en.wikipedia.org/wiki/Test_(assessment',
- 'http://en.wikipedia.org/wiki/Test_(assessment', '))')),
+ ('((', 'en.wikipedia.org/wiki/Test_(assessment',
+ 'http://en.wikipedia.org/wiki/Test_(assessment', '))')),
('(http://en.wikipedia.org/wiki/Test_(assessment)))',
- out % ('(', 'en.wikipedia.org/wiki/Test_(assessment))',
- 'http://en.wikipedia.org/wiki/Test_(assessment))', ')')),
+ ('(', 'en.wikipedia.org/wiki/Test_(assessment))',
+ 'http://en.wikipedia.org/wiki/Test_(assessment))', ')')),
('(http://en.wikipedia.org/wiki/)Test_(assessment',
- out % ('(', 'en.wikipedia.org/wiki/)Test_(assessment',
- 'http://en.wikipedia.org/wiki/)Test_(assessment', '')),
+ ('(', 'en.wikipedia.org/wiki/)Test_(assessment',
+ 'http://en.wikipedia.org/wiki/)Test_(assessment', '')),
)
def check(test, expected_output):
- eq_(expected_output, linkify(test))
+ in_([o.format(*expected_output) for o in outs], linkify(test))
for test, expected_output in tests:
yield check, test, expected_output
@@ -389,7 +448,9 @@ def test_ports():
)
def check(test, output):
- eq_(u'<a href="{0}" rel="nofollow">{0}</a>{1}'.format(*output),
+ outs = ('<a href="{0}" rel="nofollow">{0}</a>{1}',
+ '<a rel="nofollow" href="{0}">{0}</a>{1}')
+ in_([out.format(*output) for out in outs],
linkify(test))
for test, output in tests:
@@ -406,8 +467,9 @@ def test_tokenizer():
def test_ignore_bad_protocols():
eq_('foohttp://bar',
linkify('foohttp://bar'))
- eq_('foohttp://<a href="http://exampl.com" rel="nofollow">exampl.com</a>',
- linkify('foohttp://exampl.com'))
+ in_(('fohttp://<a href="http://exampl.com" rel="nofollow">exampl.com</a>',
+ 'fohttp://<a rel="nofollow" href="http://exampl.com">exampl.com</a>'),
+ linkify('fohttp://exampl.com'))
def test_max_recursion_depth():
@@ -420,21 +482,28 @@ def test_link_emails_and_urls():
"""parse_email=True shouldn't prevent URLs from getting linkified."""
output = ('<a href="http://example.com" rel="nofollow">'
'http://example.com</a> <a href="mailto:person@example.com">'
+ 'person@example.com</a>',
+ '<a rel="nofollow" href="http://example.com">'
+ 'http://example.com</a> <a href="mailto:person@example.com">'
'person@example.com</a>')
- eq_(output, linkify('http://example.com person@example.com',
+ in_(output, linkify('http://example.com person@example.com',
parse_email=True))
def test_links_case_insensitive():
"""Protocols and domain names are case insensitive."""
expect = ('<a href="HTTP://EXAMPLE.COM" rel="nofollow">'
+ 'HTTP://EXAMPLE.COM</a>',
+ '<a rel="nofollow" href="HTTP://EXAMPLE.COM">'
'HTTP://EXAMPLE.COM</a>')
- eq_(expect, linkify('HTTP://EXAMPLE.COM'))
+ in_(expect, linkify('HTTP://EXAMPLE.COM'))
def test_elements_inside_links():
- eq_(u'<a href="#" rel="nofollow">hello<br></a>',
+ in_(('<a href="#" rel="nofollow">hello<br></a>',
+ '<a rel="nofollow" href="#">hello<br></a>'),
linkify('<a href="#">hello<br></a>'))
- eq_(u'<a href="#" rel="nofollow"><strong>bold</strong> hello<br></a>',
+ in_(('<a href="#" rel="nofollow"><strong>bold</strong> hello<br></a>',
+ '<a rel="nofollow" href="#"><strong>bold</strong> hello<br></a>'),
linkify('<a href="#"><strong>bold</strong> hello<br></a>'))
diff --git a/bleach/tests/test_security.py b/bleach/tests/test_security.py
index 6c2b33f..6adab59 100644
--- a/bleach/tests/test_security.py
+++ b/bleach/tests/test_security.py
@@ -25,10 +25,10 @@ def test_invalid_attr():
clean('<a onclick="evil" href="test">test</a>'))
eq_('<img src="test">',
clean('<img onclick="evil" src="test" />',
- tags=IMG, attributes=IMG_ATTR))
+ tags=IMG, attributes=IMG_ATTR))
eq_('<img src="test">',
clean('<img href="invalid" src="test" />',
- tags=IMG, attributes=IMG_ATTR))
+ tags=IMG, attributes=IMG_ATTR))
def test_unquoted_attr():
@@ -57,7 +57,7 @@ def test_invalid_filter_attr():
eq_('<img src="http://example.com/">',
clean('<img onclick="evil" src="http://example.com/" />',
- tags=IMG, attributes=IMG_ATTR))
+ tags=IMG, attributes=IMG_ATTR))
eq_('<img>', clean('<img onclick="evil" src="http://badhost.com/" />',
tags=IMG, attributes=IMG_ATTR))
@@ -91,9 +91,9 @@ def test_nasty():
"""Nested, broken up, multiple tags, are still foiled!"""
test = ('<scr<script></script>ipt type="text/javascript">alert("foo");</'
'<script></script>script<del></del>>')
- expect = (u'&lt;scr&lt;script&gt;&lt;/script&gt;ipt type="text/javascript"'
- u'&gt;alert("foo");&lt;/script&gt;script&lt;del&gt;&lt;/del&gt;'
- u'&gt;')
+ expect = ('&lt;scr&lt;script&gt;&lt;/script&gt;ipt type="text/javascript"'
+ '&gt;alert("foo");&lt;/script&gt;script&lt;del&gt;&lt;/del&gt;'
+ '&gt;')
eq_(expect, clean(test))
diff --git a/bleach/tests/test_unicode.py b/bleach/tests/test_unicode.py
index 67123cc..796924d 100644
--- a/bleach/tests/test_unicode.py
+++ b/bleach/tests/test_unicode.py
@@ -1,54 +1,59 @@
# -*- coding: utf-8 -*-
-
+from __future__ import unicode_literals
from nose.tools import eq_
from bleach import clean, linkify
+from bleach.tests.tools import in_
def test_japanese_safe_simple():
- eq_(u'ヘルプとチュートリアル', clean(u'ヘルプとチュートリアル'))
- eq_(u'ヘルプとチュートリアル', linkify(u'ヘルプとチュートリアル'))
+ eq_('ヘルプとチュートリアル', clean('ヘルプとチュートリアル'))
+ eq_('ヘルプとチュートリアル', linkify('ヘルプとチュートリアル'))
def test_japanese_strip():
- eq_(u'<em>ヘルプとチュートリアル</em>',
- clean(u'<em>ヘルプとチュートリアル</em>'))
- eq_(u'&lt;span&gt;ヘルプとチュートリアル&lt;/span&gt;',
- clean(u'<span>ヘルプとチュートリアル</span>'))
+ eq_('<em>ヘルプとチュートリアル</em>',
+ clean('<em>ヘルプとチュートリアル</em>'))
+ eq_('&lt;span&gt;ヘルプとチュートリアル&lt;/span&gt;',
+ clean('<span>ヘルプとチュートリアル</span>'))
def test_russian_simple():
- eq_(u'Домашняя', clean(u'Домашняя'))
- eq_(u'Домашняя', linkify(u'Домашняя'))
+ eq_('Домашняя', clean('Домашняя'))
+ eq_('Домашняя', linkify('Домашняя'))
def test_mixed():
- eq_(u'Домашняяヘルプとチュートリアル',
- clean(u'Домашняяヘルプとチュートリアル'))
+ eq_('Домашняяヘルプとチュートリアル',
+ clean('Домашняяヘルプとチュートリアル'))
def test_mixed_linkify():
- eq_(u'Домашняя <a href="http://example.com" rel="nofollow">'
- u'http://example.com</a> ヘルプとチュートリアル',
- linkify(u'Домашняя http://example.com ヘルプとチュートリアル'))
+ in_(('Домашняя <a href="http://example.com" rel="nofollow">'
+ 'http://example.com</a> ヘルプとチュートリアル',
+ 'Домашняя <a rel="nofollow" href="http://example.com">'
+ 'http://example.com</a> ヘルプとチュートリアル'),
+ linkify('Домашняя http://example.com ヘルプとチュートリアル'))
def test_url_utf8():
"""Allow UTF8 characters in URLs themselves."""
- out = u'<a href="%(url)s" rel="nofollow">%(url)s</a>'
+ outs = ('<a href="{0!s}" rel="nofollow">{0!s}</a>',
+ '<a rel="nofollow" href="{0!s}">{0!s}</a>')
+
+ out = lambda url: [x.format(url) for x in outs]
tests = (
- ('http://éxámplé.com/', out % {'url': u'http://éxámplé.com/'}),
- ('http://éxámplé.com/íàñá/',
- out % {'url': u'http://éxámplé.com/íàñá/'}),
+ ('http://éxámplé.com/', out('http://éxámplé.com/')),
+ ('http://éxámplé.com/íàñá/', out('http://éxámplé.com/íàñá/')),
('http://éxámplé.com/íàñá/?foo=bar',
- out % {'url': u'http://éxámplé.com/íàñá/?foo=bar'}),
+ out('http://éxámplé.com/íàñá/?foo=bar')),
('http://éxámplé.com/íàñá/?fóo=bár',
- out % {'url': u'http://éxámplé.com/íàñá/?fóo=bár'}),
+ out('http://éxámplé.com/íàñá/?fóo=bár')),
)
def check(test, expected_output):
- eq_(expected_output, linkify(test))
+ in_(expected_output, linkify(test))
for test, expected_output in tests:
yield check, test, expected_output
diff --git a/bleach/tests/tools.py b/bleach/tests/tools.py
new file mode 100644
index 0000000..87f926c
--- /dev/null
+++ b/bleach/tests/tools.py
@@ -0,0 +1,7 @@
+
+
+def in_(l, a, msg=None):
+ """Shorthand for 'assert a in l, "%r not in %r" % (a, l)
+ """
+ if not a in l:
+ raise AssertionError(msg or "%r not in %r" % (a, l))