aboutsummaryrefslogtreecommitdiff
path: root/bleach/tests/test_delinkify.py
blob: f216d2f6f829a43f299cd1b246a2a6a561bd1ac7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from nose.tools import eq_

import bleach


def test_delinkify():
    eq_('test', bleach.delinkify('<a href="http://ex.mp">test</a>'))
    eq_('footestbar',
        bleach.delinkify('foo<a href="http://ex.mp">test</a>bar'))


def test_whitelist():
    html = '<a href="http://ex.mp">test</a>'
    eq_(html, bleach.delinkify(html, allow_domains=['ex.mp']))
    eq_('test', bleach.delinkify(html, allow_domains=['ex2.mp']))
    # Allow a single domain as a special case.
    eq_(html, bleach.delinkify(html, allow_domains='ex.mp'))


def test_nested_a():
    html = '<a href="http://ex.mp">test<a href="http://foo.bar">test</a></a>'
    eq_('testtest', bleach.delinkify(html))
    eq_('<a href="http://ex.mp">test</a>test',
        bleach.delinkify(html, allow_domains=['ex.mp']))


def test_nested_tag():
    html = '<a href="http://ex.mp">test<span>test</span></a>'
    eq_('test<span>test</span>', bleach.delinkify(html))


def test_a_name():
    """Don't screw with non-link <a> tags."""
    html = '<a name="foo">bar</a>'
    eq_(html, bleach.delinkify(html))


def test_relative():
    """Relative links are optionally OK."""
    html = 'some <a href="/foo/bar">link</a>'
    eq_('some link', bleach.delinkify(html))
    eq_(html, bleach.delinkify(html, allow_relative=True))


def test_protocol_relative():
    """Protocol-relative links aren't relative."""
    html = 'bad <a href="//ex.mp">link</a>'
    expect = 'bad link'
    eq_(expect, bleach.delinkify(html))
    eq_(expect, bleach.delinkify(html, allow_relative=True))
    eq_(html, bleach.delinkify(html, allow_domains='ex.mp'))


def test_domain_match():
    tests = (
        ('ex.mp', 'ex.mp', True),
        ('ex.mp', '*.ex.mp', True),
        ('test.ex.mp', '*.ex.mp', True),
        ('test.ex.mp', 'ex.mp', False),
        ('test.test.ex.mp', '*.ex.mp', False),
        ('test.test.ex.mp', '**.ex.mp', True),
        ('wrong.mp', 'ex.mp', False),
        ('wrong.mp', '*.ex.mp', False),
        ('really.wrong.mp', 'ex.mp', False),
        ('really.wrong.mp', '*.ex.mp', False),
        ('really.very.wrong.mp', '*.ex.mp', False),
        ('EX.mp', 'ex.mp', True),  # Domains are case-insensitive.
        ('ex.mp', 'an.ex.mp', False),
        ('ex.mp', '*.an.ex.mp', False),
        ('an.ex.am.pl', 'an.*.am.pl', True),
        ('a.ex.am.pl', 'an.*.am.pl', False),
        ('ex.am.pl', 'an.*.am.pl', False),
    )

    def _check(t, c, v):
        eq_(v, bleach._domain_match(t, c))

    for t, c, v in tests:
        yield _check, t, c, v


def test_double_star():
    assert bleach._domain_match('ex.mp', '**.ex.mp')
    try:
        bleach._domain_match('ex.mp', 'an.**.ex.mp')
    except bleach.ValidationError:
        pass
    else:
        assert False, '_domain_match should not accept an.**.ex.mp'


def test_allow_subdomains():
    domains = ('ex.mp', '*.exa.mp', 'an.exam.pl', '*.my.examp.le')
    html = (
        ('<a href="http://an.ex.mp">bad</a>', 'bad'),
        ('<a href="http://exa.mp">good</a>', None),
        ('<a href="http://an.exa.mp">good</a>', None),
        ('<a href="http://an.exam.pl">good</a>', None),
        ('<a href="http://another.exam.pl">bad</a>', 'bad'),
        ('<a href="http://a.bad.examp.le">bad</a>', 'bad'),
        ('<a href="http://a.very.bad.examp.le">bad</a>', 'bad'),
    )

    def _check(html, text):
        output = bleach.delinkify(html, allow_domains=domains)
        eq_(html if text is None else text, output)

    for t, o in html:
        yield _check, t, o