1 files changed, 98 insertions, 42 deletions
diff --git a/requests/utils.py b/requests/utils.py
index 0e0f69e..6952a99 100644
--- a/requests/utils.py
+++ b/requests/utils.py
@@ -14,10 +14,47 @@ import codecs
 import os
 import random
 import re
+import traceback
 import zlib
+from netrc import netrc, NetrcParseError
 
 from .compat import parse_http_list as _parse_list_header
-from .compat import quote, unquote, cookielib, SimpleCookie, is_py2
+from .compat import quote, cookielib, SimpleCookie, is_py2, urlparse
+from .compat import basestring, bytes, str
+
+
+NETRC_FILES = ('.netrc', '_netrc')
+
+
+def get_netrc_auth(url):
+    """Returns the Requests tuple auth for a given url from netrc."""
+
+    locations = (os.path.expanduser('~/{0}'.format(f)) for f in NETRC_FILES)
+    netrc_path = None
+
+    for loc in locations:
+        if os.path.exists(loc) and not netrc_path:
+            netrc_path = loc
+
+    # Abort early if there isn't one.
+    if netrc_path is None:
+        return netrc_path
+
+    ri = urlparse(url)
+
+    # Strip port numbers from netloc
+    host = ri.netloc.split(':')[0]
+
+    try:
+        _netrc = netrc(netrc_path).authenticators(host)
+        if _netrc:
+            # Return with login / password
+            login_i = (0 if _netrc[0] else 1)
+            return (_netrc[login_i], _netrc[2])
+    except (NetrcParseError, IOError):
+        # If there was a parsing error or a permissions issue reading the file,
+        # we'll just skip netrc auth
+        pass
 
 
 def dict_from_string(s):
@@ -25,20 +62,26 @@ def dict_from_string(s):
 
     cookies = dict()
 
-    c = SimpleCookie()
-    c.load(s)
+    try:
+        c = SimpleCookie()
+        c.load(s)
 
-    for k,v in list(c.items()):
-        cookies.update({k: v.value})
+        for k, v in list(c.items()):
+            cookies.update({k: v.value})
+    # This stuff is not to be trusted.
+    except Exception:
+        pass
 
     return cookies
 
+
 def guess_filename(obj):
     """Tries to guess the filename of the given object."""
     name = getattr(obj, 'name', None)
     if name and name[0] != '<' and name[-1] != '>':
         return name
 
+
 # From mitsuhiko/werkzeug (used with permission).
 def parse_list_header(value):
     """Parse lists as described by RFC 2068 Section 2.
@@ -145,8 +188,14 @@ def header_expand(headers):
 
     if isinstance(headers, dict):
         headers = list(headers.items())
-
+    elif isinstance(headers, basestring):
+        return headers
     elif isinstance(headers, str):
+        # As discussed in https://github.com/kennethreitz/requests/issues/400
+        # latin-1 is the most conservative encoding used on the web. Anyone
+        # who needs more can encode to a byte-string before calling
+        return headers.encode("latin-1")
+    elif headers is None:
         return headers
 
     for i, (value, params) in enumerate(headers):
@@ -164,10 +213,9 @@ def header_expand(headers):
 
             collector.append('; '.join(_params))
 
-            if not len(headers) == i+1:
+            if not len(headers) == i + 1:
                 collector.append(', ')
 
-
     # Remove trailing separators.
     if collector[-1] in (', ', '; '):
         del collector[-1]
@@ -175,7 +223,6 @@ def header_expand(headers):
     return ''.join(collector)
 
 
-
 def randombytes(n):
     """Return n random bytes."""
     if is_py2:
@@ -286,23 +333,6 @@ def get_encoding_from_headers(headers):
         return 'ISO-8859-1'
 
 
-def unicode_from_html(content):
-    """Attempts to decode an HTML string into unicode.
-    If unsuccessful, the original content is returned.
-    """
-
-    encodings = get_encodings_from_content(content)
-
-    for encoding in encodings:
-
-        try:
-            return str(content, encoding)
-        except (UnicodeError, TypeError):
-            pass
-
-        return content
-
-
 def stream_decode_response_unicode(iterator, r):
     """Stream decodes a iterator."""
 
@@ -354,15 +384,6 @@ def get_unicode_from_response(r):
         return r.content
 
 
-def decode_gzip(content):
-    """Return gzip-decoded string.
-
-    :param content: bytestring to gzip-decode.
-    """
-
-    return zlib.decompress(content, 16 + zlib.MAX_WBITS)
-
-
 def stream_decompress(iterator, mode='gzip'):
     """
     Stream decodes an iterator over compressed data
@@ -390,18 +411,53 @@ def stream_decompress(iterator, mode='gzip'):
             yield chunk
     else:
         # Make sure everything has been returned from the decompression object
-        buf = dec.decompress('')
+        buf = dec.decompress(bytes())
         rv = buf + dec.flush()
         if rv:
             yield rv
 
 
-def requote_path(path):
-    """Re-quote the given URL path component.
+def stream_untransfer(gen, resp):
+    if 'gzip' in resp.headers.get('content-encoding', ''):
+        gen = stream_decompress(gen, mode='gzip')
+    elif 'deflate' in resp.headers.get('content-encoding', ''):
+        gen = stream_decompress(gen, mode='deflate')
+
+    return gen
 
-    This function passes the given path through an unquote/quote cycle to
+
+# The unreserved URI characters (RFC 3986)
+UNRESERVED_SET = frozenset(
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+    + "0123456789-._~")
+
+
+def unquote_unreserved(uri):
+    """Un-escape any percent-escape sequences in a URI that are unreserved
+    characters.
+    This leaves all reserved, illegal and non-ASCII bytes encoded.
+    """
+    parts = uri.split('%')
+    for i in range(1, len(parts)):
+        h = parts[i][0:2]
+        if len(h) == 2:
+            c = chr(int(h, 16))
+            if c in UNRESERVED_SET:
+                parts[i] = c + parts[i][2:]
+            else:
+                parts[i] = '%' + parts[i]
+        else:
+            parts[i] = '%' + parts[i]
+    return ''.join(parts)
+
+
+def requote_uri(uri):
+    """Re-quote the given URI.
+
+    This function passes the given URI through an unquote/quote cycle to
     ensure that it is fully and consistently quoted.
     """
-    parts = path.split(b"/")
-    parts = (quote(unquote(part), safe=b"") for part in parts)
-    return b"/".join(parts)
+    # Unquote only the unreserved characters
+    # Then quote only illegal characters (do not quote reserved, unreserved,
+    # or '%')
+    return quote(unquote_unreserved(uri), safe="!#$%&'()*+,/:;=?@[]~")