From fac84c6d90e0875e6c1b10c5ef02d577ee008af4 Mon Sep 17 00:00:00 2001 From: Per Andersson Date: Sun, 9 Jun 2013 19:45:54 +0200 Subject: Imported Upstream version 1.2.2 --- bleach/__init__.py | 230 ++++++++++++++++++----------------------- bleach/callbacks.py | 15 +++ bleach/sanitizer.py | 6 +- bleach/tests/test_basics.py | 5 +- bleach/tests/test_css.py | 16 ++- bleach/tests/test_delinkify.py | 109 ------------------- bleach/tests/test_links.py | 200 ++++++++++++++++++++++++++++------- bleach/tests/test_security.py | 4 + 8 files changed, 306 insertions(+), 279 deletions(-) create mode 100644 bleach/callbacks.py delete mode 100644 bleach/tests/test_delinkify.py (limited to 'bleach') diff --git a/bleach/__init__.py b/bleach/__init__.py index bc8e49c..af75d0f 100644 --- a/bleach/__init__.py +++ b/bleach/__init__.py @@ -1,19 +1,18 @@ -import itertools import logging import re import sys -import urlparse import html5lib from html5lib.sanitizer import HTMLSanitizer from html5lib.serializer.htmlserializer import HTMLSerializer -from encoding import force_unicode -from sanitizer import BleachSanitizer +from . import callbacks as linkify_callbacks +from .encoding import force_unicode +from .sanitizer import BleachSanitizer -VERSION = (1, 1, 5) -__version__ = '.'.join(map(str, VERSION)) +VERSION = (1, 2, 1) +__version__ = '1.2.1' __all__ = ['clean', 'linkify'] @@ -56,18 +55,21 @@ TLDS = """ac ad ae aero af ag ai al am an ao aq ar arpa as asia at au aw ax az tv tw tz ua ug uk us uy uz va vc ve vg vi vn vu wf ws xn ye yt yu za zm zw""".split() +PROTOCOLS = HTMLSanitizer.acceptable_protocols + TLDS.reverse() url_re = re.compile( r"""\(* # Match any opening parentheses. - \b(?"]*)? # /path/zz (excluding "unsafe" chars from RFC 1738, # except for # and ~, which happen in practice) - """ % u'|'.join(TLDS), re.VERBOSE | re.UNICODE) + """ % (u'|'.join(PROTOCOLS), u'|'.join(TLDS)), + re.IGNORECASE | re.VERBOSE | re.UNICODE) -proto_re = re.compile(r'^[\w-]+:/{0,3}') +proto_re = re.compile(r'^[\w-]+:/{0,3}', re.IGNORECASE) punct_re = re.compile(r'([\.,]+)$') @@ -83,7 +85,10 @@ email_re = re.compile( NODE_TEXT = 4 # The numeric ID of a text node in simpletree. -identity = lambda x: x # The identity function. +DEFAULT_CALLBACKS = [linkify_callbacks.nofollow] + +PY_26 = (sys.version_info < (2, 7)) +RECURSION_EXCEPTION = RuntimeError if not PY_26 else AttributeError def clean(text, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES, @@ -93,8 +98,6 @@ def clean(text, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES, return u'' text = force_unicode(text) - if text.startswith(u'' open_comment = '