1 files changed, 164 insertions, 36 deletions
diff --git a/bleach/tests/test_links.py b/bleach/tests/test_links.py
index 7caf006..ac593c4 100644
--- a/bleach/tests/test_links.py
+++ b/bleach/tests/test_links.py
@@ -3,11 +3,9 @@ import urllib
 from html5lib.tokenizer import HTMLTokenizer
 from nose.tools import eq_
 
-from bleach import linkify, url_re
+from bleach import linkify, url_re, DEFAULT_CALLBACKS as DC
 
 
-def filter_url(url):
-    return u'http://bouncer/?u=%s' % urllib.quote_plus(url)
 
 
 def test_url_re():
@@ -45,38 +43,139 @@ def test_trailing_slash():
 
 
 def test_mangle_link():
+    """We can muck with the href attribute of the link."""
+    def filter_url(attrs, new=False):
+        attrs['href'] = (u'http://bouncer/?u=%s' %
+                         urllib.quote_plus(attrs['href']))
+        return attrs
+
     eq_('<a href="http://bouncer/?u=http%3A%2F%2Fexample.com" rel="nofollow">'
         'http://example.com</a>',
-        linkify('http://example.com', filter_url=filter_url))
+        linkify('http://example.com', DC + [filter_url]))
+
+
+def test_mangle_text():
+    """We can muck with the inner text of a link."""
+
+    def ft(attrs, new=False):
+        attrs['_text'] = 'bar'
+        return attrs
+
+    eq_('<a href="http://ex.mp">bar</a> <a href="http://ex.mp/foo">bar</a>',
+        linkify('http://ex.mp <a href="http://ex.mp/foo">foo</a>', [ft]))
 
 
 def test_email_link():
-    eq_('a james@example.com mailto',
-        linkify('a james@example.com mailto'))
-    eq_('a james@example.com.au mailto',
-        linkify('a james@example.com.au mailto'))
-    eq_('a <a href="mailto:james@example.com" rel="nofollow">'
-        'james@example.com</a> mailto',
-        linkify('a james@example.com mailto', parse_email=True))
-    eq_('aussie <a href="mailto:james@example.com.au" rel="nofollow">'
-        'james@example.com.au</a> mailto',
-        linkify('aussie james@example.com.au mailto', parse_email=True))
-    eq_('email to <a href="james@example.com" rel="nofollow">'
-        'james@example.com</a>',
-        linkify('email to <a href="james@example.com">'
-        'james@example.com</a>', parse_email=True))
+    tests = (
+        ('a james@example.com mailto', False, 'a james@example.com mailto'),
+        ('a james@example.com.au mailto', False,
+            'a james@example.com.au mailto'),
+        ('a <a href="mailto:james@example.com">james@example.com</a> mailto',
+            True, 'a james@example.com mailto'),
+        ('aussie <a href="mailto:james@example.com.au">'
+            'james@example.com.au</a> mailto', True,
+            'aussie james@example.com.au mailto'),
+        # This is kind of a pathological case. I guess we do our best here.
+        ('email to <a href="james@example.com" rel="nofollow">'
+            'james@example.com</a>', True,
+            'email to <a href="james@example.com">james@example.com</a>'),
+    )
+
+    def _check(o, p, i):
+        eq_(o, linkify(i, parse_email=p))
+
+    for (o, p, i) in tests:
+        yield _check, o, p, i
 
 
 def test_email_link_escaping():
-    eq_('''<a href='mailto:"james"@example.com' rel="nofollow">'''
-        '''"james"@example.com</a>''',
-        linkify('"james"@example.com', parse_email=True))
-    eq_('''<a href="mailto:&quot;j'ames&quot;@example.com" rel="nofollow">'''
-        '''"j'ames"@example.com</a>''',
-        linkify('"j\'ames"@example.com', parse_email=True))
-    eq_('''<a href='mailto:"ja>mes"@example.com' rel="nofollow">'''
-        '''"ja&gt;mes"@example.com</a>''',
-        linkify('"ja>mes"@example.com', parse_email=True))
+    tests = (
+        ('''<a href='mailto:"james"@example.com'>'''
+            '''"james"@example.com</a>''',
+            '"james"@example.com'),
+        ('''<a href="mailto:&quot;j'ames&quot;@example.com">'''
+            '''"j'ames"@example.com</a>''',
+            '"j\'ames"@example.com'),
+        ('''<a href='mailto:"ja>mes"@example.com'>'''
+            '''"ja&gt;mes"@example.com</a>''',
+            '"ja>mes"@example.com'),
+    )
+
+    def _check(o, i):
+        eq_(o, linkify(i, parse_email=True))
+
+    for (o, i) in tests:
+        yield _check, o, i
+
+
+def test_prevent_links():
+    """Returning None from any callback should remove links or prevent them
+    from being created."""
+
+    def no_new_links(attrs, new=False):
+        if new:
+            return None
+        return attrs
+
+    def no_old_links(attrs, new=False):
+        if not new:
+            return None
+        return attrs
+
+    def noop(attrs, new=False):
+        return attrs
+
+    in_text = 'a ex.mp <a href="http://example.com">example</a>'
+    out_text = 'a <a href="http://ex.mp">ex.mp</a> example'
+    tests = (
+        ([noop], ('a <a href="http://ex.mp">ex.mp</a> '
+                  '<a href="http://example.com">example</a>'), 'noop'),
+        ([no_new_links, noop], in_text, 'no new, noop'),
+        ([noop, no_new_links], in_text, 'noop, no new'),
+        ([no_old_links, noop], out_text, 'no old, noop'),
+        ([noop, no_old_links], out_text, 'noop, no old'),
+        ([no_old_links, no_new_links], 'a ex.mp example', 'no links'),
+    )
+
+    def _check(cb, o, msg):
+        eq_(o, linkify(in_text, cb), msg)
+
+    for (cb, o, msg) in tests:
+        yield _check, cb, o, msg
+
+
+def test_set_attrs():
+    """We can set random attributes on links."""
+
+    def set_attr(attrs, new=False):
+        attrs['rev'] = 'canonical'
+        return attrs
+
+    eq_('<a href="http://ex.mp" rev="canonical">ex.mp</a>',
+        linkify('ex.mp', [set_attr]))
+
+
+def test_only_proto_links():
+    """Only create links if there's a protocol."""
+    def only_proto(attrs, new=False):
+        if new and not attrs['_text'].startswith(('http:', 'https:')):
+            return None
+        return attrs
+
+    in_text = 'a ex.mp http://ex.mp <a href="/foo">bar</a>'
+    out_text = ('a ex.mp <a href="http://ex.mp">http://ex.mp</a> '
+                '<a href="/foo">bar</a>')
+    eq_(out_text, linkify(in_text, [only_proto]))
+
+
+def test_stop_email():
+    """Returning None should prevent a link from being created."""
+    def no_email(attrs, new=False):
+        if attrs['href'].startswith('mailto:'):
+            return None
+        return attrs
+    text = 'do not link james@example.com'
+    eq_(text, linkify(text, parse_email=True, callbacks=[no_email]))
 
 
 def test_tlds():
@@ -98,7 +197,7 @@ def test_escaping():
 
 def test_nofollow_off():
     eq_('<a href="http://example.com">example.com</a>',
-        linkify(u'example.com', nofollow=False))
+        linkify(u'example.com', []))
 
 
 def test_link_in_html():
@@ -297,16 +396,45 @@ def test_ports():
         yield check, test, output
 
 
-def test_target():
-    eq_('<a href="http://example.com" rel="nofollow" '
-        'target="_blank">example.com</a>',
-        linkify(u'example.com', target='_blank'))
-    eq_('<a href="http://example.com" target="_blank">example.com</a>',
-        linkify(u'example.com', target='_blank', nofollow=False))
-
-
 def test_tokenizer():
     """Linkify doesn't always have to sanitize."""
     raw = '<em>test<x></x></em>'
     eq_('<em>test&lt;x&gt;&lt;/x&gt;</em>', linkify(raw))
     eq_(raw, linkify(raw, tokenizer=HTMLTokenizer))
+
+
+def test_ignore_bad_protocols():
+    eq_('foohttp://bar',
+        linkify('foohttp://bar'))
+    eq_('foohttp://<a href="http://exampl.com" rel="nofollow">exampl.com</a>',
+        linkify('foohttp://exampl.com'))
+
+
+def test_max_recursion_depth():
+    """If we hit the max recursion depth, just return the string."""
+    test = '<em>' * 2000 + 'foo' + '</em>' * 2000
+    eq_(test, linkify(test))
+
+
+def test_link_emails_and_urls():
+    """parse_email=True shouldn't prevent URLs from getting linkified."""
+    output = ('<a href="http://example.com" rel="nofollow">'
+              'http://example.com</a> <a href="mailto:person@example.com">'
+              'person@example.com</a>')
+    eq_(output, linkify('http://example.com person@example.com',
+                        parse_email=True))
+
+
+def test_links_case_insensitive():
+    """Protocols and domain names are case insensitive."""
+    expect = ('<a href="HTTP://EXAMPLE.COM" rel="nofollow">'
+              'HTTP://EXAMPLE.COM</a>')
+    eq_(expect, linkify('HTTP://EXAMPLE.COM'))
+
+
+def test_elements_inside_links():
+    eq_(u'<a href="#" rel="nofollow">hello<br></a>',
+        linkify('<a href="#">hello<br></a>'))
+
+    eq_(u'<a href="#" rel="nofollow"><strong>bold</strong> hello<br></a>',
+        linkify('<a href="#"><strong>bold</strong> hello<br></a>'))