From e75853fc04102c7f72f2e955b63f9692c472f64a Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:41:21 -0700 Subject: Imported Upstream version 0.10.8 --- requests/utils.py | 140 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 98 insertions(+), 42 deletions(-) (limited to 'requests/utils.py') diff --git a/requests/utils.py b/requests/utils.py index 0e0f69e..6952a99 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -14,10 +14,47 @@ import codecs import os import random import re +import traceback import zlib +from netrc import netrc, NetrcParseError from .compat import parse_http_list as _parse_list_header -from .compat import quote, unquote, cookielib, SimpleCookie, is_py2 +from .compat import quote, cookielib, SimpleCookie, is_py2, urlparse +from .compat import basestring, bytes, str + + +NETRC_FILES = ('.netrc', '_netrc') + + +def get_netrc_auth(url): + """Returns the Requests tuple auth for a given url from netrc.""" + + locations = (os.path.expanduser('~/{0}'.format(f)) for f in NETRC_FILES) + netrc_path = None + + for loc in locations: + if os.path.exists(loc) and not netrc_path: + netrc_path = loc + + # Abort early if there isn't one. + if netrc_path is None: + return netrc_path + + ri = urlparse(url) + + # Strip port numbers from netloc + host = ri.netloc.split(':')[0] + + try: + _netrc = netrc(netrc_path).authenticators(host) + if _netrc: + # Return with login / password + login_i = (0 if _netrc[0] else 1) + return (_netrc[login_i], _netrc[2]) + except (NetrcParseError, IOError): + # If there was a parsing error or a permissions issue reading the file, + # we'll just skip netrc auth + pass def dict_from_string(s): @@ -25,20 +62,26 @@ def dict_from_string(s): cookies = dict() - c = SimpleCookie() - c.load(s) + try: + c = SimpleCookie() + c.load(s) - for k,v in list(c.items()): - cookies.update({k: v.value}) + for k, v in list(c.items()): + cookies.update({k: v.value}) + # This stuff is not to be trusted. + except Exception: + pass return cookies + def guess_filename(obj): """Tries to guess the filename of the given object.""" name = getattr(obj, 'name', None) if name and name[0] != '<' and name[-1] != '>': return name + # From mitsuhiko/werkzeug (used with permission). def parse_list_header(value): """Parse lists as described by RFC 2068 Section 2. @@ -145,8 +188,14 @@ def header_expand(headers): if isinstance(headers, dict): headers = list(headers.items()) - + elif isinstance(headers, basestring): + return headers elif isinstance(headers, str): + # As discussed in https://github.com/kennethreitz/requests/issues/400 + # latin-1 is the most conservative encoding used on the web. Anyone + # who needs more can encode to a byte-string before calling + return headers.encode("latin-1") + elif headers is None: return headers for i, (value, params) in enumerate(headers): @@ -164,10 +213,9 @@ def header_expand(headers): collector.append('; '.join(_params)) - if not len(headers) == i+1: + if not len(headers) == i + 1: collector.append(', ') - # Remove trailing separators. if collector[-1] in (', ', '; '): del collector[-1] @@ -175,7 +223,6 @@ def header_expand(headers): return ''.join(collector) - def randombytes(n): """Return n random bytes.""" if is_py2: @@ -286,23 +333,6 @@ def get_encoding_from_headers(headers): return 'ISO-8859-1' -def unicode_from_html(content): - """Attempts to decode an HTML string into unicode. - If unsuccessful, the original content is returned. - """ - - encodings = get_encodings_from_content(content) - - for encoding in encodings: - - try: - return str(content, encoding) - except (UnicodeError, TypeError): - pass - - return content - - def stream_decode_response_unicode(iterator, r): """Stream decodes a iterator.""" @@ -354,15 +384,6 @@ def get_unicode_from_response(r): return r.content -def decode_gzip(content): - """Return gzip-decoded string. - - :param content: bytestring to gzip-decode. - """ - - return zlib.decompress(content, 16 + zlib.MAX_WBITS) - - def stream_decompress(iterator, mode='gzip'): """ Stream decodes an iterator over compressed data @@ -390,18 +411,53 @@ def stream_decompress(iterator, mode='gzip'): yield chunk else: # Make sure everything has been returned from the decompression object - buf = dec.decompress('') + buf = dec.decompress(bytes()) rv = buf + dec.flush() if rv: yield rv -def requote_path(path): - """Re-quote the given URL path component. +def stream_untransfer(gen, resp): + if 'gzip' in resp.headers.get('content-encoding', ''): + gen = stream_decompress(gen, mode='gzip') + elif 'deflate' in resp.headers.get('content-encoding', ''): + gen = stream_decompress(gen, mode='deflate') + + return gen - This function passes the given path through an unquote/quote cycle to + +# The unreserved URI characters (RFC 3986) +UNRESERVED_SET = frozenset( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + + "0123456789-._~") + + +def unquote_unreserved(uri): + """Un-escape any percent-escape sequences in a URI that are unreserved + characters. + This leaves all reserved, illegal and non-ASCII bytes encoded. + """ + parts = uri.split('%') + for i in range(1, len(parts)): + h = parts[i][0:2] + if len(h) == 2: + c = chr(int(h, 16)) + if c in UNRESERVED_SET: + parts[i] = c + parts[i][2:] + else: + parts[i] = '%' + parts[i] + else: + parts[i] = '%' + parts[i] + return ''.join(parts) + + +def requote_uri(uri): + """Re-quote the given URI. + + This function passes the given URI through an unquote/quote cycle to ensure that it is fully and consistently quoted. """ - parts = path.split(b"/") - parts = (quote(unquote(part), safe=b"") for part in parts) - return b"/".join(parts) + # Unquote only the unreserved characters + # Then quote only illegal characters (do not quote reserved, unreserved, + # or '%') + return quote(unquote_unreserved(uri), safe="!#$%&'()*+,/:;=?@[]~") -- cgit v1.2.3