aboutsummaryrefslogtreecommitdiff
path: root/bleach/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'bleach/__init__.py')
-rw-r--r--bleach/__init__.py36
1 files changed, 21 insertions, 15 deletions
diff --git a/bleach/__init__.py b/bleach/__init__.py
index b110972..1d8caa2 100644
--- a/bleach/__init__.py
+++ b/bleach/__init__.py
@@ -13,8 +13,8 @@ from .encoding import force_unicode
from .sanitizer import BleachSanitizer
-VERSION = (1, 4, 0)
-__version__ = '1.4'
+VERSION = (1, 4, 2)
+__version__ = '.'.join([str(n) for n in VERSION])
__all__ = ['clean', 'linkify']
@@ -51,16 +51,17 @@ TLDS = """ac ad ae aero af ag ai al am an ao aq ar arpa as asia at au aw ax az
im in info int io iq ir is it je jm jo jobs jp ke kg kh ki km kn kp
kr kw ky kz la lb lc li lk lr ls lt lu lv ly ma mc md me mg mh mil mk
ml mm mn mo mobi mp mq mr ms mt mu museum mv mw mx my mz na name nc ne
- net nf ng ni nl no np nr nu nz om org pa pe pf pg ph pk pl pm pn pr pro
- ps pt pw py qa re ro rs ru rw sa sb sc sd se sg sh si sj sk sl sm sn so
- sr st su sv sy sz tc td tel tf tg th tj tk tl tm tn to tp tr travel tt
- tv tw tz ua ug uk us uy uz va vc ve vg vi vn vu wf ws xn ye yt yu za zm
- zw""".split()
-
-PROTOCOLS = HTMLSanitizer.acceptable_protocols
+ net nf ng ni nl no np nr nu nz om org pa pe pf pg ph pk pl pm pn post
+ pr pro ps pt pw py qa re ro rs ru rw sa sb sc sd se sg sh si sj sk sl
+ sm sn so sr ss st su sv sx sy sz tc td tel tf tg th tj tk tl tm tn to
+ tp tr travel tt tv tw tz ua ug uk us uy uz va vc ve vg vi vn vu wf ws
+ xn xxx ye yt yu za zm zw""".split()
+# Make sure that .com doesn't get matched by .co first
TLDS.reverse()
+PROTOCOLS = HTMLSanitizer.acceptable_protocols
+
url_re = re.compile(
r"""\(* # Match any opening parentheses.
\b(?<![@.])(?:(?:{0}):/{{0,3}}(?:(?:\w+:)?\w+@)?)? # http://
@@ -145,14 +146,16 @@ def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_pre=False,
# capture any non-tag text at the start of the fragment
if new_tree.text:
if index == 0:
+ tree.text = tree.text or ''
tree.text += new_tree.text
else:
- tree[index-1].tail += new_tree.text
+ tree[index - 1].tail = tree[index - 1].tail or ''
+ tree[index - 1].tail += new_tree.text
# the put in the tagged elements into the old tree
for n in new_tree:
if n.tag == ETREE_TAG('a'):
_seen.add(n)
- tree.insert(index+count, n)
+ tree.insert(index + count, n)
count += 1
# if we got a node to remove...
if node is not None:
@@ -252,15 +255,17 @@ def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_pre=False,
if new_tail != node.tail:
node.tail = ''
adj = replace_nodes(tree, new_tail, None,
- current_child+1)
- #insert the new nodes made from my tail into
+ current_child + 1)
+ # Insert the new nodes made from my tail into
# the tree right after me. current_child+1
children += adj
+ continue
new_tail = re.sub(url_re, link_repl, new_tail)
if new_tail != old_tail:
node.tail = ''
- adj = replace_nodes(tree, new_tail, None, current_child+1)
+ adj = replace_nodes(tree, new_tail, None,
+ current_child + 1)
children += adj
if node.tag == ETREE_TAG('a') and not (node in _seen):
@@ -342,7 +347,7 @@ def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_pre=False,
link = apply_callbacks(link, True)
if link is None:
- return url
+ return '(' * open_brackets + url + ')' * close_brackets
_text = link.pop('_text')
_href = link.pop('href')
@@ -373,5 +378,6 @@ def _serialize(domtree):
walker = html5lib.treewalkers.getTreeWalker('etree')
stream = walker(domtree)
serializer = HTMLSerializer(quote_attr_values=True,
+ alphabetical_attributes=True,
omit_optional_tags=False)
return serializer.render(stream)