1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
from nose.tools import eq_
import bleach
def test_delinkify():
eq_('test', bleach.delinkify('<a href="http://ex.mp">test</a>'))
eq_('footestbar',
bleach.delinkify('foo<a href="http://ex.mp">test</a>bar'))
def test_whitelist():
html = '<a href="http://ex.mp">test</a>'
eq_(html, bleach.delinkify(html, allow_domains=['ex.mp']))
eq_('test', bleach.delinkify(html, allow_domains=['ex2.mp']))
# Allow a single domain as a special case.
eq_(html, bleach.delinkify(html, allow_domains='ex.mp'))
def test_nested_a():
html = '<a href="http://ex.mp">test<a href="http://foo.bar">test</a></a>'
eq_('testtest', bleach.delinkify(html))
eq_('<a href="http://ex.mp">test</a>test',
bleach.delinkify(html, allow_domains=['ex.mp']))
def test_nested_tag():
html = '<a href="http://ex.mp">test<span>test</span></a>'
eq_('test<span>test</span>', bleach.delinkify(html))
def test_a_name():
"""Don't screw with non-link <a> tags."""
html = '<a name="foo">bar</a>'
eq_(html, bleach.delinkify(html))
def test_relative():
"""Relative links are optionally OK."""
html = 'some <a href="/foo/bar">link</a>'
eq_('some link', bleach.delinkify(html))
eq_(html, bleach.delinkify(html, allow_relative=True))
def test_protocol_relative():
"""Protocol-relative links aren't relative."""
html = 'bad <a href="//ex.mp">link</a>'
expect = 'bad link'
eq_(expect, bleach.delinkify(html))
eq_(expect, bleach.delinkify(html, allow_relative=True))
eq_(html, bleach.delinkify(html, allow_domains='ex.mp'))
def test_domain_match():
tests = (
('ex.mp', 'ex.mp', True),
('ex.mp', '*.ex.mp', True),
('test.ex.mp', '*.ex.mp', True),
('test.ex.mp', 'ex.mp', False),
('test.test.ex.mp', '*.ex.mp', False),
('test.test.ex.mp', '**.ex.mp', True),
('wrong.mp', 'ex.mp', False),
('wrong.mp', '*.ex.mp', False),
('really.wrong.mp', 'ex.mp', False),
('really.wrong.mp', '*.ex.mp', False),
('really.very.wrong.mp', '*.ex.mp', False),
('EX.mp', 'ex.mp', True), # Domains are case-insensitive.
('ex.mp', 'an.ex.mp', False),
('ex.mp', '*.an.ex.mp', False),
('an.ex.am.pl', 'an.*.am.pl', True),
('a.ex.am.pl', 'an.*.am.pl', False),
('ex.am.pl', 'an.*.am.pl', False),
)
def _check(t, c, v):
eq_(v, bleach._domain_match(t, c))
for t, c, v in tests:
yield _check, t, c, v
def test_double_star():
assert bleach._domain_match('ex.mp', '**.ex.mp')
try:
bleach._domain_match('ex.mp', 'an.**.ex.mp')
except bleach.ValidationError:
pass
else:
assert False, '_domain_match should not accept an.**.ex.mp'
def test_allow_subdomains():
domains = ('ex.mp', '*.exa.mp', 'an.exam.pl', '*.my.examp.le')
html = (
('<a href="http://an.ex.mp">bad</a>', 'bad'),
('<a href="http://exa.mp">good</a>', None),
('<a href="http://an.exa.mp">good</a>', None),
('<a href="http://an.exam.pl">good</a>', None),
('<a href="http://another.exam.pl">bad</a>', 'bad'),
('<a href="http://a.bad.examp.le">bad</a>', 'bad'),
('<a href="http://a.very.bad.examp.le">bad</a>', 'bad'),
)
def _check(html, text):
output = bleach.delinkify(html, allow_domains=domains)
eq_(html if text is None else text, output)
for t, o in html:
yield _check, t, o
|