diff options
author | SVN-Git Migration <python-modules-team@lists.alioth.debian.org> | 2015-10-08 13:41:29 -0700 |
---|---|---|
committer | SVN-Git Migration <python-modules-team@lists.alioth.debian.org> | 2015-10-08 13:41:29 -0700 |
commit | 224200a9815f792f93632d03a38e4f0763ae69ef (patch) | |
tree | 161977259a7d8aa262aab60d7c8fce757ad3bb0f /requests/utils.py | |
parent | 653256249d44c67a0852d57a166948a9dc712ef4 (diff) | |
download | python-requests-224200a9815f792f93632d03a38e4f0763ae69ef.tar python-requests-224200a9815f792f93632d03a38e4f0763ae69ef.tar.gz |
Imported Upstream version 2.0.0
Diffstat (limited to 'requests/utils.py')
-rw-r--r-- | requests/utils.py | 77 |
1 files changed, 49 insertions, 28 deletions
diff --git a/requests/utils.py b/requests/utils.py index b21bf8f..3ec6131 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -21,9 +21,11 @@ from netrc import netrc, NetrcParseError from . import __version__ from . import certs from .compat import parse_http_list as _parse_list_header -from .compat import quote, urlparse, bytes, str, OrderedDict, urlunparse +from .compat import (quote, urlparse, bytes, str, OrderedDict, urlunparse, + is_py2, is_py3, builtin_str, getproxies, proxy_bypass) from .cookies import RequestsCookieJar, cookiejar_from_dict from .structures import CaseInsensitiveDict +from .exceptions import MissingSchema, InvalidURL _hush_pyflakes = (RequestsCookieJar,) @@ -264,8 +266,12 @@ def get_encodings_from_content(content): """ charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I) + pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I) + xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]') - return charset_re.findall(content) + return (charset_re.findall(content) + + pragma_re.findall(content) + + xml_re.findall(content)) def get_encoding_from_headers(headers): @@ -301,7 +307,7 @@ def stream_decode_response_unicode(iterator, r): rv = decoder.decode(chunk) if rv: yield rv - rv = decoder.decode('', final=True) + rv = decoder.decode(b'', final=True) if rv: yield rv @@ -361,7 +367,11 @@ def unquote_unreserved(uri): for i in range(1, len(parts)): h = parts[i][0:2] if len(h) == 2 and h.isalnum(): - c = chr(int(h, 16)) + try: + c = chr(int(h, 16)) + except ValueError: + raise InvalidURL("Invalid percent-escape sequence: '%s'" % h) + if c in UNRESERVED_SET: parts[i] = c + parts[i][2:] else: @@ -386,25 +396,17 @@ def requote_uri(uri): def get_environ_proxies(url): """Return a dict of environment proxies.""" - proxy_keys = [ - 'all', - 'http', - 'https', - 'ftp', - 'socks' - ] - get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) # First check whether no_proxy is defined. If it is, check that the URL # we're getting isn't in the no_proxy list. no_proxy = get_proxy('no_proxy') + netloc = urlparse(url).netloc if no_proxy: # We need to check whether we match here. We need to see if we match # the end of the netloc, both with and without the port. - no_proxy = no_proxy.split(',') - netloc = urlparse(url).netloc + no_proxy = no_proxy.replace(' ', '').split(',') for host in no_proxy: if netloc.endswith(host) or netloc.split(':')[0].endswith(host): @@ -412,10 +414,15 @@ def get_environ_proxies(url): # to apply the proxies on this URL. return {} + # If the system proxy settings indicate that this URL should be bypassed, + # don't proxy. + if proxy_bypass(netloc): + return {} + # If we get here, we either didn't have no_proxy set or we're not going - # anywhere that no_proxy applies to. - proxies = [(key, get_proxy(key + '_proxy')) for key in proxy_keys] - return dict([(key, val) for (key, val) in proxies if val]) + # anywhere that no_proxy applies to, and the system settings don't require + # bypassing the proxy for the current URL. + return getproxies() def default_user_agent(): @@ -526,18 +533,13 @@ def guess_json_utf(data): return None -def prepend_scheme_if_needed(url, new_scheme): - '''Given a URL that may or may not have a scheme, prepend the given scheme. - Does not replace a present scheme with the one provided as an argument.''' - scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) - - # urlparse is a finicky beast, and sometimes decides that there isn't a - # netloc present. Assume that it's being over-cautious, and switch netloc - # and path if urlparse decided there was no netloc. - if not netloc: - netloc, path = path, netloc +def except_on_missing_scheme(url): + """Given a URL, raise a MissingSchema exception if the scheme is missing. + """ + scheme, netloc, path, params, query, fragment = urlparse(url) - return urlunparse((scheme, netloc, path, params, query, fragment)) + if not scheme: + raise MissingSchema('Proxy URLs must have explicit schemes.') def get_auth_from_url(url): @@ -548,3 +550,22 @@ def get_auth_from_url(url): return (parsed.username, parsed.password) else: return ('', '') + + +def to_native_string(string, encoding='ascii'): + """ + Given a string object, regardless of type, returns a representation of that + string in the native string type, encoding and decoding where necessary. + This assumes ASCII unless told otherwise. + """ + out = None + + if isinstance(string, builtin_str): + out = string + else: + if is_py2: + out = string.encode(encoding) + else: + out = string.decode(encoding) + + return out |