From e75853fc04102c7f72f2e955b63f9692c472f64a Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:41:21 -0700 Subject: Imported Upstream version 0.10.8 --- requests/__init__.py | 5 +- requests/api.py | 2 +- requests/async.py | 36 +++-- requests/auth.py | 4 +- requests/compat.py | 5 +- requests/defaults.py | 8 + requests/exceptions.py | 3 +- requests/models.py | 232 +++++++++++++++------------- requests/packages/oreos/monkeys.py | 5 +- requests/packages/oreos/structures.py | 10 +- requests/packages/urllib3/__init__.py | 2 +- requests/packages/urllib3/connectionpool.py | 55 +++++-- requests/packages/urllib3/exceptions.py | 21 ++- requests/packages/urllib3/poolmanager.py | 10 ++ requests/packages/urllib3/response.py | 7 +- requests/sessions.py | 32 ++-- requests/utils.py | 140 ++++++++++++----- 17 files changed, 365 insertions(+), 212 deletions(-) (limited to 'requests') diff --git a/requests/__init__.py b/requests/__init__.py index 48fb389..73d81f6 100644 --- a/requests/__init__.py +++ b/requests/__init__.py @@ -15,14 +15,13 @@ requests """ __title__ = 'requests' -__version__ = '0.10.1' -__build__ = 0x001001 +__version__ = '0.10.8' +__build__ = 0x001008 __author__ = 'Kenneth Reitz' __license__ = 'ISC' __copyright__ = 'Copyright 2012 Kenneth Reitz' - from . import utils from .models import Request, Response from .api import request, get, head, post, patch, put, delete, options diff --git a/requests/api.py b/requests/api.py index b7d4158..b079eed 100644 --- a/requests/api.py +++ b/requests/api.py @@ -69,7 +69,7 @@ def head(url, **kwargs): :param **kwargs: Optional arguments that ``request`` takes. """ - kwargs.setdefault('allow_redirects', True) + kwargs.setdefault('allow_redirects', False) return request('head', url, **kwargs) diff --git a/requests/async.py b/requests/async.py index 9488447..f2dad69 100644 --- a/requests/async.py +++ b/requests/async.py @@ -23,7 +23,7 @@ from . import api __all__ = ( - 'map', + 'map', 'imap', 'get', 'options', 'head', 'post', 'put', 'patch', 'delete', 'request' ) @@ -46,15 +46,15 @@ def patched(f): return wrapped -def send(r, pool=None): - """Sends the request object using the specified pool. If a pool isn't +def send(r, pool=None, prefetch=False): + """Sends the request object using the specified pool. If a pool isn't specified this method blocks. Pools are useful because you can specify size and can hence limit concurrency.""" if pool != None: - return pool.spawn(r.send) + return pool.spawn(r.send, prefetch=prefetch) - return gevent.spawn(r.send) + return gevent.spawn(r.send, prefetch=prefetch) # Patched requests.api functions. @@ -79,10 +79,28 @@ def map(requests, prefetch=True, size=None): requests = list(requests) pool = Pool(size) if size else None - jobs = [send(r, pool) for r in requests] + jobs = [send(r, pool, prefetch=prefetch) for r in requests] gevent.joinall(jobs) - if prefetch: - [r.response.content for r in requests] + return [r.response for r in requests] - return [r.response for r in requests] \ No newline at end of file + +def imap(requests, prefetch=True, size=2): + """Concurrently converts a generator object of Requests to + a generator of Responses. + + :param requests: a generator of Request objects. + :param prefetch: If False, the content will not be downloaded immediately. + :param size: Specifies the number of requests to make at a time. default is 2 + """ + + pool = Pool(size) + + def send(r): + r.send(prefetch) + return r.response + + for r in pool.imap_unordered(send, requests): + yield r + + pool.join() \ No newline at end of file diff --git a/requests/auth.py b/requests/auth.py index 183731b..2e2bebc 100644 --- a/requests/auth.py +++ b/requests/auth.py @@ -7,8 +7,6 @@ requests.auth This module contains the authentication handlers for Requests. """ -from __future__ import unicode_literals - import time import hashlib @@ -21,7 +19,7 @@ from .utils import randombytes, parse_dict_header def _basic_auth_str(username, password): """Returns a Basic Auth string.""" - return 'Basic ' + b64encode(("%s:%s" % (username, password)).encode('utf-8')).strip().decode('utf-8') + return 'Basic ' + b64encode(('%s:%s' % (username, password)).encode('latin1')).strip().decode('latin1') class AuthBase(object): diff --git a/requests/compat.py b/requests/compat.py index 224bfd0..fec7a01 100644 --- a/requests/compat.py +++ b/requests/compat.py @@ -86,8 +86,10 @@ if is_py2: from .packages.oreos.monkeys import SimpleCookie from StringIO import StringIO - str = unicode bytes = str + str = unicode + basestring = basestring + elif is_py3: @@ -99,4 +101,5 @@ elif is_py3: str = str bytes = bytes + basestring = (str,bytes) diff --git a/requests/defaults.py b/requests/defaults.py index 424d373..9af9773 100644 --- a/requests/defaults.py +++ b/requests/defaults.py @@ -17,8 +17,12 @@ Configurations: :safe_mode: If true, Requests will catch all errors. :pool_maxsize: The maximium size of an HTTP connection pool. :pool_connections: The number of active HTTP connection pools to use. +:encode_uri: If true, URIs will automatically be percent-encoded. +:trust_env: If true, the surrouding environment will be trusted (environ, netrc). """ +SCHEMAS = ['http', 'https'] + from . import __version__ defaults = dict() @@ -38,3 +42,7 @@ defaults['max_retries'] = 0 defaults['danger_mode'] = False defaults['safe_mode'] = False defaults['keep_alive'] = True +defaults['encode_uri'] = True +defaults['trust_env'] = True + + diff --git a/requests/exceptions.py b/requests/exceptions.py index c7b98e6..d5b2ab1 100644 --- a/requests/exceptions.py +++ b/requests/exceptions.py @@ -8,12 +8,13 @@ This module contains the set of Requests' exceptions. """ -class RequestException(Exception): +class RequestException(RuntimeError): """There was an ambiguous exception that occurred while handling your request.""" class HTTPError(RequestException): """An HTTP error occurred.""" + response = None class ConnectionError(RequestException): """A Connection error occurred.""" diff --git a/requests/models.py b/requests/models.py index c200896..753e83a 100644 --- a/requests/models.py +++ b/requests/models.py @@ -21,14 +21,16 @@ from .packages.urllib3.exceptions import SSLError as _SSLError from .packages.urllib3.exceptions import HTTPError as _HTTPError from .packages.urllib3 import connectionpool, poolmanager from .packages.urllib3.filepost import encode_multipart_formdata +from .defaults import SCHEMAS from .exceptions import ( ConnectionError, HTTPError, RequestException, Timeout, TooManyRedirects, URLRequired, SSLError) from .utils import ( - get_encoding_from_headers, stream_decode_response_unicode, - stream_decompress, guess_filename, requote_path, dict_from_string) - -from .compat import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, str, bytes, SimpleCookie, is_py3, is_py2 + get_encoding_from_headers, stream_untransfer, guess_filename, requote_uri, + dict_from_string, stream_decode_response_unicode, get_netrc_auth) +from .compat import ( + urlparse, urlunparse, urljoin, urlsplit, urlencode, str, bytes, + SimpleCookie, is_py2) # Import chardet if it is available. try: @@ -39,7 +41,6 @@ except ImportError: REDIRECT_STATI = (codes.moved, codes.found, codes.other, codes.temporary_moved) - class Request(object): """The :class:`Request ` object. It carries out all functionality of Requests. Recommended interface is with the Requests functions. @@ -64,16 +65,14 @@ class Request(object): verify=None, session=None): + #: Dictionary of configurations for this request. + self.config = dict(config or []) + #: Float describes the timeout of the request. # (Use socket.setdefaulttimeout() as fallback) self.timeout = timeout #: Request URL. - - # if isinstance(url, str): - # url = url.encode('utf-8') - # print(dir(url)) - self.url = url #: Dictionary of HTTP Headers to attach to the :class:`Request `. @@ -103,6 +102,14 @@ class Request(object): # Dictionary mapping protocol to the URL of the proxy (e.g. {'http': 'foo.bar:3128'}) self.proxies = dict(proxies or []) + # If no proxies are given, allow configuration by environment variables + # HTTP_PROXY and HTTPS_PROXY. + if not self.proxies and self.config.get('trust_env'): + if 'HTTP_PROXY' in os.environ: + self.proxies['http'] = os.environ['HTTP_PROXY'] + if 'HTTPS_PROXY' in os.environ: + self.proxies['https'] = os.environ['HTTPS_PROXY'] + self.data, self._enc_data = self._encode_params(data) self.params, self._enc_params = self._encode_params(params) @@ -116,9 +123,6 @@ class Request(object): #: CookieJar to attach to :class:`Request `. self.cookies = dict(cookies or []) - #: Dictionary of configurations for this request. - self.config = dict(config or []) - #: True if Request has been sent. self.sent = False @@ -152,15 +156,9 @@ class Request(object): self.headers = headers self._poolmanager = _poolmanager - # Pre-request hook. - r = dispatch_hook('pre_request', hooks, self) - self.__dict__.update(r.__dict__) - - def __repr__(self): return '' % (self.method) - def _build_response(self, resp): """Build internal :class:`Response ` object from given response. @@ -200,26 +198,31 @@ class Request(object): # Save original response for later. response.raw = resp - response.url = self.full_url + if isinstance(self.full_url, bytes): + response.url = self.full_url.decode('utf-8') + else: + response.url = self.full_url return response history = [] r = build(resp) - cookies = self.cookies + self.cookies.update(r.cookies) if r.status_code in REDIRECT_STATI and not self.redirect: + while (('location' in r.headers) and + ((r.status_code is codes.see_other) or (self.allow_redirects))): - while ( - ('location' in r.headers) and - ((r.status_code is codes.see_other) or (self.allow_redirects)) - ): + r.content # Consume socket so it can be released if not len(history) < self.config.get('max_redirects'): raise TooManyRedirects() + # Release the connection back into the pool. + r.raw.release_conn() + history.append(r) url = r.headers['location'] @@ -232,7 +235,10 @@ class Request(object): # Facilitate non-RFC2616-compliant 'location' headers # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') if not urlparse(url).netloc: - url = urljoin(r.url, url) + url = urljoin(r.url, + # Compliant with RFC3986, we percent + # encode the url. + requote_uri(url)) # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4 if r.status_code is codes.see_other: @@ -254,18 +260,17 @@ class Request(object): method=method, params=self.session.params, auth=self.auth, - cookies=cookies, + cookies=self.cookies, redirect=True, config=self.config, timeout=self.timeout, _poolmanager=self._poolmanager, - proxies = self.proxies, - verify = self.verify, - session = self.session + proxies=self.proxies, + verify=self.verify, + session=self.session ) request.send() - cookies.update(request.response.cookies) r = request.response self.cookies.update(r.cookies) @@ -275,7 +280,6 @@ class Request(object): self.response.request = self self.response.cookies.update(self.cookies) - @staticmethod def _encode_params(data): """Encode parameters in a piece of data. @@ -288,10 +292,12 @@ class Request(object): returns it twice. """ + if isinstance(data, bytes): + return data, data + if hasattr(data, '__iter__') and not isinstance(data, str): data = dict(data) - if hasattr(data, 'items'): result = [] for k, vs in list(data.items()): @@ -314,30 +320,44 @@ class Request(object): # Support for unicode domain names and paths. scheme, netloc, path, params, query, fragment = urlparse(url) - if not scheme: raise ValueError("Invalid URL %r: No schema supplied" % url) + if not scheme in SCHEMAS: + raise ValueError("Invalid scheme %r" % scheme) + netloc = netloc.encode('idna').decode('utf-8') + if not path: + path = '/' + + if is_py2: + if isinstance(scheme, str): + scheme = scheme.encode('utf-8') + if isinstance(netloc, str): + netloc = netloc.encode('utf-8') if isinstance(path, str): path = path.encode('utf-8') + if isinstance(params, str): + params = params.encode('utf-8') + if isinstance(query, str): + query = query.encode('utf-8') + if isinstance(fragment, str): + fragment = fragment.encode('utf-8') - path = requote_path(path) - - # print([ scheme, netloc, path, params, query, fragment ]) - # print('---------------------') - - url = (urlunparse([ scheme, netloc, path, params, query, fragment ])) + url = (urlunparse([scheme, netloc, path, params, query, fragment])) if self._enc_params: if urlparse(url).query: - return '%s&%s' % (url, self._enc_params) + url = '%s&%s' % (url, self._enc_params) else: - return '%s?%s' % (url, self._enc_params) - else: - return url + url = '%s?%s' % (url, self._enc_params) + + if self.config.get('encode_uri', True): + url = requote_uri(url) + + return url @property def path_url(self): @@ -355,9 +375,6 @@ class Request(object): if not path: path = '/' - # if is_py3: - path = quote(path.encode('utf-8')) - url.append(path) query = p.query @@ -365,19 +382,15 @@ class Request(object): url.append('?') url.append(query) - # print(url) - return ''.join(url) - def register_hook(self, event, hook): """Properly register a hook.""" return self.hooks[event].append(hook) - def send(self, anyway=False, prefetch=False): - """Sends the request. Returns True of successful, false if not. + """Sends the request. Returns True of successful, False if not. If there was an HTTPError during transmission, self.response.status_code will contain the HTTPError code. @@ -435,6 +448,10 @@ class Request(object): if (content_type) and (not 'content-type' in self.headers): self.headers['Content-Type'] = content_type + # Use .netrc auth if none was provided. + if not self.auth and self.config.get('trust_env'): + self.auth = get_netrc_auth(url) + if self.auth: if isinstance(self.auth, tuple) and len(self.auth) == 2: # special-case basic HTTP auth @@ -472,13 +489,12 @@ class Request(object): if self.verify is not True: cert_loc = self.verify - # Look for configuration. - if not cert_loc: + if not cert_loc and self.config.get('trust_env'): cert_loc = os.environ.get('REQUESTS_CA_BUNDLE') # Curl compatiblity. - if not cert_loc: + if not cert_loc and self.config.get('trust_env'): cert_loc = os.environ.get('CURL_CA_BUNDLE') # Use the awesome certifi list. @@ -509,6 +525,10 @@ class Request(object): # Attach Cookie header to request. self.headers['Cookie'] = cookie_header + # Pre-request hook. + r = dispatch_hook('pre_request', self.hooks, self) + self.__dict__.update(r.__dict__) + try: # The inner try .. except re-raises certain exceptions as # internal exception types; the outer suppresses exceptions @@ -523,7 +543,7 @@ class Request(object): redirect=False, assert_same_host=False, preload_content=False, - decode_content=True, + decode_content=False, retries=self.config.get('max_retries', 0), timeout=self.timeout, ) @@ -613,7 +633,6 @@ class Response(object): #: Dictionary of configurations for this request. self.config = {} - def __repr__(self): return '' % (self.status_code) @@ -633,7 +652,6 @@ class Response(object): return False return True - def iter_content(self, chunk_size=10 * 1024, decode_unicode=False): """Iterates over the response data. This avoids reading the content at once into memory for large responses. The chunk size is the number @@ -660,74 +678,64 @@ class Response(object): pending_bytes = resp.chunk_left while pending_bytes: chunk = fp.read(min(chunk_size, pending_bytes)) - pending_bytes-=len(chunk) + pending_bytes -= len(chunk) yield chunk - fp.read(2) # throw away crlf + fp.read(2) # throw away crlf while 1: #XXX correct line size? (httplib has 64kb, seems insane) pending_bytes = fp.readline(40).strip() + if not len(pending_bytes): + # No content, like a HEAD request. Break out. + break pending_bytes = int(pending_bytes, 16) if pending_bytes == 0: break while pending_bytes: chunk = fp.read(min(chunk_size, pending_bytes)) - pending_bytes-=len(chunk) + pending_bytes -= len(chunk) yield chunk - fp.read(2) # throw away crlf + fp.read(2) # throw away crlf self._content_consumed = True fp.close() - if getattr(getattr(self.raw, '_original_response', None), 'chunked', False): gen = generate_chunked() else: gen = generate() - if 'gzip' in self.headers.get('content-encoding', ''): - gen = stream_decompress(gen, mode='gzip') - elif 'deflate' in self.headers.get('content-encoding', ''): - gen = stream_decompress(gen, mode='deflate') + gen = stream_untransfer(gen, self) if decode_unicode: gen = stream_decode_response_unicode(gen, self) return gen - def iter_lines(self, chunk_size=10 * 1024, decode_unicode=None): """Iterates over the response data, one line at a time. This avoids reading the content at once into memory for large responses. """ - #TODO: why rstrip by default pending = None - for chunk in self.iter_content(chunk_size, decode_unicode=decode_unicode): + for chunk in self.iter_content( + chunk_size=chunk_size, + decode_unicode=decode_unicode): if pending is not None: chunk = pending + chunk - lines = chunk.splitlines(True) + lines = chunk.splitlines() - for line in lines[:-1]: - yield line.rstrip() - - # Save the last part of the chunk for next iteration, to keep full line together - # lines may be empty for the last chunk of a chunked response - - if lines: - pending = lines[-1] - #if pending is a complete line, give it baack - if pending[-1] == '\n': - yield pending.rstrip() - pending = None + if lines[-1][-1] == chunk[-1]: + pending = lines.pop() else: pending = None - # Yield the last line - if pending is not None: - yield pending.rstrip() + for line in lines: + yield line + if pending is not None: + yield pending @property def content(self): @@ -740,13 +748,26 @@ class Response(object): raise RuntimeError( 'The content for this response was already consumed') - self._content = self.raw.read() + if self.status_code is 0: + self._content = None + else: + self._content = bytes().join(self.iter_content()) or bytes() + except AttributeError: self._content = None self._content_consumed = True return self._content + def _detected_encoding(self): + try: + detected = chardet.detect(self.content) or {} + return detected.get('encoding') + + # Trust that chardet isn't available or something went terribly wrong. + except Exception: + pass + @property def text(self): @@ -762,43 +783,34 @@ class Response(object): # Fallback to auto-detected encoding if chardet is available. if self.encoding is None: - try: - detected = chardet.detect(self.content) or {} - encoding = detected.get('encoding') - - # Trust that chardet isn't available or something went terribly wrong. - except Exception: - pass + encoding = self._detected_encoding() # Decode unicode from given encoding. try: - content = str(self.content, encoding) + content = str(self.content, encoding, errors='replace') except (UnicodeError, TypeError): pass - # Try to fall back: - if not content: - try: - content = str(content, encoding, errors='replace') - except (UnicodeError, TypeError): - pass - return content - - def raise_for_status(self): + def raise_for_status(self, allow_redirects=True): """Raises stored :class:`HTTPError` or :class:`URLError`, if one occurred.""" if self.error: raise self.error - if (self.status_code >= 300) and (self.status_code < 400): - raise HTTPError('%s Redirection' % self.status_code) + if (self.status_code >= 300) and (self.status_code < 400) and not allow_redirects: + http_error = HTTPError('%s Redirection' % self.status_code) + http_error.response = self + raise http_error elif (self.status_code >= 400) and (self.status_code < 500): - raise HTTPError('%s Client Error' % self.status_code) - - elif (self.status_code >= 500) and (self.status_code < 600): - raise HTTPError('%s Server Error' % self.status_code) + http_error = HTTPError('%s Client Error' % self.status_code) + http_error.response = self + raise http_error + elif (self.status_code >= 500) and (self.status_code < 600): + http_error = HTTPError('%s Server Error' % self.status_code) + http_error.response = self + raise http_error diff --git a/requests/packages/oreos/monkeys.py b/requests/packages/oreos/monkeys.py index 2269e30..72ce68d 100644 --- a/requests/packages/oreos/monkeys.py +++ b/requests/packages/oreos/monkeys.py @@ -249,10 +249,13 @@ class CookieError(Exception): # quoted with a preceeding '\' slash. # # These are taken from RFC2068 and RFC2109. +# _RFC2965Forbidden is the list of forbidden chars we accept anyway # _LegalChars is the list of chars which don't require "'s # _Translator hash-table for fast quoting # -_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~[]_" +_RFC2965Forbidden = "[]:{}=" +_LegalChars = ( string.ascii_letters + string.digits + + "!#$%&'*+-.^_`|~_" + _RFC2965Forbidden ) _Translator = { '\000' : '\\000', '\001' : '\\001', '\002' : '\\002', '\003' : '\\003', '\004' : '\\004', '\005' : '\\005', diff --git a/requests/packages/oreos/structures.py b/requests/packages/oreos/structures.py index 063d5f9..8329277 100644 --- a/requests/packages/oreos/structures.py +++ b/requests/packages/oreos/structures.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- """ -oreos.sructures -~~~~~~~~~~~~~~~ +oreos.structures +~~~~~~~~~~~~~~~~ The plastic blue packaging. @@ -362,7 +362,7 @@ class MultiDict(TypeConversionDict): """ try: return dict.pop(self, key)[0] - except KeyError, e: + except KeyError as e: if default is not _missing: return default raise KeyError(str(e)) @@ -372,7 +372,7 @@ class MultiDict(TypeConversionDict): try: item = dict.popitem(self) return (item[0], item[1][0]) - except KeyError, e: + except KeyError as e: raise KeyError(str(e)) def poplist(self, key): @@ -389,7 +389,7 @@ class MultiDict(TypeConversionDict): """Pop a ``(key, list)`` tuple from the dict.""" try: return dict.popitem(self) - except KeyError, e: + except KeyError as e: raise KeyError(str(e)) def __copy__(self): diff --git a/requests/packages/urllib3/__init__.py b/requests/packages/urllib3/__init__.py index 5f70c56..2e9c663 100644 --- a/requests/packages/urllib3/__init__.py +++ b/requests/packages/urllib3/__init__.py @@ -10,7 +10,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.1' +__version__ = '1.2.2' from .connectionpool import ( diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py index 52b1802..39e652e 100644 --- a/requests/packages/urllib3/connectionpool.py +++ b/requests/packages/urllib3/connectionpool.py @@ -7,6 +7,7 @@ import logging import socket +from base64 import b64encode from socket import error as SocketError, timeout as SocketTimeout try: @@ -16,33 +17,45 @@ except ImportError: # Doesn't exist on OSX and other platforms poll = False try: # Python 3 - from http.client import HTTPConnection, HTTPSConnection, HTTPException + from http.client import HTTPConnection, HTTPException from http.client import HTTP_PORT, HTTPS_PORT except ImportError: - from httplib import HTTPConnection, HTTPSConnection, HTTPException + from httplib import HTTPConnection, HTTPException from httplib import HTTP_PORT, HTTPS_PORT try: # Python 3 - from queue import Queue, Empty, Full + from queue import LifoQueue, Empty, Full except ImportError: - from Queue import Queue, Empty, Full + from Queue import LifoQueue, Empty, Full + try: # Compiled with SSL? + HTTPSConnection = object + BaseSSLError = None + ssl = None + + try: # Python 3 + from http.client import HTTPSConnection + except ImportError: + from httplib import HTTPSConnection + import ssl BaseSSLError = ssl.SSLError + except ImportError: - ssl = None - BaseSSLError = None + pass from .packages.ssl_match_hostname import match_hostname, CertificateError from .request import RequestMethods from .response import HTTPResponse -from .exceptions import (SSLError, +from .exceptions import ( + EmptyPoolError, + HostChangedError, + LocationParseError, MaxRetryError, + SSLError, TimeoutError, - HostChangedError, - EmptyPoolError, ) from .packages.ssl_match_hostname import match_hostname, CertificateError @@ -103,6 +116,7 @@ class ConnectionPool(object): """ scheme = None + QueueCls = LifoQueue def __init__(self, host, port=None): self.host = host @@ -156,11 +170,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, block=False, headers=None): - self.host = host - self.port = port + super(HTTPConnectionPool, self).__init__(host, port) + self.strict = strict self.timeout = timeout - self.pool = Queue(maxsize) + self.pool = self.QueueCls(maxsize) self.block = block self.headers = headers or {} @@ -468,7 +482,11 @@ class HTTPSConnectionPool(HTTPConnectionPool): log.info("Starting new HTTPS connection (%d): %s" % (self.num_connections, self.host)) - if not ssl: + if not ssl: # Platform-specific: Python compiled without +ssl + if not HTTPSConnection or HTTPSConnection is object: + raise SSLError("Can't connect to HTTPS URL because the SSL " + "module is not available.") + return HTTPSConnection(host=self.host, port=self.port) connection = VerifiedHTTPSConnection(host=self.host, port=self.port) @@ -526,7 +544,7 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, if basic_auth: headers['authorization'] = 'Basic ' + \ - basic_auth.encode('base64').strip() + b64encode(six.b(basic_auth)).decode('utf-8') return headers @@ -542,10 +560,12 @@ def get_host(url): >>> get_host('google.com:80') ('http', 'google.com', 80) """ + # This code is actually similar to urlparse.urlsplit, but much # simplified for our needs. port = None scheme = 'http' + if '://' in url: scheme, url = url.split('://', 1) if '/' in url: @@ -554,7 +574,12 @@ def get_host(url): _auth, url = url.split('@', 1) if ':' in url: url, port = url.split(':', 1) + + if not port.isdigit(): + raise LocationParseError("Failed to parse: %s") + port = int(port) + return scheme, url, port @@ -592,7 +617,7 @@ def is_connection_dropped(conn): :param conn: ``HTTPConnection`` object. """ - if not poll: + if not poll: # Platform-specific return select([conn.sock], [], [], 0.0)[0] # This version is better on platforms that support it. diff --git a/requests/packages/urllib3/exceptions.py b/requests/packages/urllib3/exceptions.py index 0bffeb4..15c9699 100644 --- a/requests/packages/urllib3/exceptions.py +++ b/requests/packages/urllib3/exceptions.py @@ -4,6 +4,7 @@ # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php + ## Base Exceptions class HTTPError(Exception): @@ -27,18 +28,20 @@ class SSLError(HTTPError): class MaxRetryError(PoolError): "Raised when the maximum number of retries is exceeded." + def __init__(self, pool, url): - PoolError.__init__(self, pool, - "Max retries exceeded with url: %s" % url) + message = "Max retries exceeded with url: %s" % url + PoolError.__init__(self, pool, message) self.url = url class HostChangedError(PoolError): "Raised when an existing pool gets a request for a foreign host." + def __init__(self, pool, url, retries=3): - PoolError.__init__(self, pool, - "Tried to open a foreign host with url: %s" % url) + message = "Tried to open a foreign host with url: %s" % url + PoolError.__init__(self, pool, message) self.url = url self.retries = retries @@ -52,3 +55,13 @@ class TimeoutError(PoolError): class EmptyPoolError(PoolError): "Raised when a pool runs out of connections and no more are allowed." pass + + +class LocationParseError(ValueError, HTTPError): + "Raised when get_host or similar fails to parse the URL input." + + def __init__(self, location): + message = "Failed to parse: %s" % location + super(LocationParseError, self).__init__(self, message) + + self.location = location diff --git a/requests/packages/urllib3/poolmanager.py b/requests/packages/urllib3/poolmanager.py index f194b2e..d42f35b 100644 --- a/requests/packages/urllib3/poolmanager.py +++ b/requests/packages/urllib3/poolmanager.py @@ -117,9 +117,19 @@ class ProxyManager(RequestMethods): def __init__(self, proxy_pool): self.proxy_pool = proxy_pool + def _set_proxy_headers(self, headers=None): + headers = headers or {} + + # Same headers are curl passes for --proxy1.0 + headers['Accept'] = '*/*' + headers['Proxy-Connection'] = 'Keep-Alive' + + return headers + def urlopen(self, method, url, **kw): "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." kw['assert_same_host'] = False + kw['headers'] = self._set_proxy_headers(kw.get('headers')) return self.proxy_pool.urlopen(method, url, **kw) diff --git a/requests/packages/urllib3/response.py b/requests/packages/urllib3/response.py index e023970..4dd431e 100644 --- a/requests/packages/urllib3/response.py +++ b/requests/packages/urllib3/response.py @@ -11,12 +11,7 @@ import zlib from io import BytesIO from .exceptions import HTTPError - - -try: - basestring = basestring -except NameError: # Python 3 - basestring = (str, bytes) +from .packages.six import string_types as basestring log = logging.getLogger(__name__) diff --git a/requests/sessions.py b/requests/sessions.py index d9683b0..87320d6 100644 --- a/requests/sessions.py +++ b/requests/sessions.py @@ -40,7 +40,7 @@ def merge_kwargs(local_kwarg, default_kwarg): kwargs.update(local_kwarg) # Remove keys that are set to None. - for (k,v) in list(local_kwarg.items()): + for (k, v) in list(local_kwarg.items()): if v is None: del kwargs[k] @@ -52,7 +52,7 @@ class Session(object): __attrs__ = [ 'headers', 'cookies', 'auth', 'timeout', 'proxies', 'hooks', - 'params', 'config'] + 'params', 'config', 'verify'] def __init__(self, @@ -64,6 +64,7 @@ class Session(object): hooks=None, params=None, config=None, + prefetch=False, verify=True): self.headers = headers or {} @@ -74,15 +75,13 @@ class Session(object): self.hooks = hooks or {} self.params = params or {} self.config = config or {} + self.prefetch = prefetch self.verify = verify for (k, v) in list(defaults.items()): self.config.setdefault(k, v) - self.poolmanager = PoolManager( - num_pools=self.config.get('pool_connections'), - maxsize=self.config.get('pool_maxsize') - ) + self.init_poolmanager() # Set up a CookieJar to be used by default self.cookies = {} @@ -91,6 +90,12 @@ class Session(object): if cookies is not None: self.cookies.update(cookies) + def init_poolmanager(self): + self.poolmanager = PoolManager( + num_pools=self.config.get('pool_connections'), + maxsize=self.config.get('pool_maxsize') + ) + def __repr__(self): return '' % (id(self)) @@ -145,9 +150,7 @@ class Session(object): headers = {} if headers is None else headers params = {} if params is None else params hooks = {} if hooks is None else hooks - - if verify is None: - verify = self.verify + prefetch = self.prefetch or prefetch # use session's hooks as defaults for key, cb in list(self.hooks.items()): @@ -235,7 +238,7 @@ class Session(object): :param **kwargs: Optional arguments that ``request`` takes. """ - kwargs.setdefault('allow_redirects', True) + kwargs.setdefault('allow_redirects', False) return self.request('head', url, **kwargs) @@ -281,6 +284,15 @@ class Session(object): return self.request('delete', url, **kwargs) + def __getstate__(self): + return dict((attr, getattr(self, attr, None)) for attr in self.__attrs__) + + def __setstate__(self, state): + for attr, value in state.items(): + setattr(self, attr, value) + + self.init_poolmanager() + def session(**kwargs): """Returns a :class:`Session` for context-management.""" diff --git a/requests/utils.py b/requests/utils.py index 0e0f69e..6952a99 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -14,10 +14,47 @@ import codecs import os import random import re +import traceback import zlib +from netrc import netrc, NetrcParseError from .compat import parse_http_list as _parse_list_header -from .compat import quote, unquote, cookielib, SimpleCookie, is_py2 +from .compat import quote, cookielib, SimpleCookie, is_py2, urlparse +from .compat import basestring, bytes, str + + +NETRC_FILES = ('.netrc', '_netrc') + + +def get_netrc_auth(url): + """Returns the Requests tuple auth for a given url from netrc.""" + + locations = (os.path.expanduser('~/{0}'.format(f)) for f in NETRC_FILES) + netrc_path = None + + for loc in locations: + if os.path.exists(loc) and not netrc_path: + netrc_path = loc + + # Abort early if there isn't one. + if netrc_path is None: + return netrc_path + + ri = urlparse(url) + + # Strip port numbers from netloc + host = ri.netloc.split(':')[0] + + try: + _netrc = netrc(netrc_path).authenticators(host) + if _netrc: + # Return with login / password + login_i = (0 if _netrc[0] else 1) + return (_netrc[login_i], _netrc[2]) + except (NetrcParseError, IOError): + # If there was a parsing error or a permissions issue reading the file, + # we'll just skip netrc auth + pass def dict_from_string(s): @@ -25,20 +62,26 @@ def dict_from_string(s): cookies = dict() - c = SimpleCookie() - c.load(s) + try: + c = SimpleCookie() + c.load(s) - for k,v in list(c.items()): - cookies.update({k: v.value}) + for k, v in list(c.items()): + cookies.update({k: v.value}) + # This stuff is not to be trusted. + except Exception: + pass return cookies + def guess_filename(obj): """Tries to guess the filename of the given object.""" name = getattr(obj, 'name', None) if name and name[0] != '<' and name[-1] != '>': return name + # From mitsuhiko/werkzeug (used with permission). def parse_list_header(value): """Parse lists as described by RFC 2068 Section 2. @@ -145,8 +188,14 @@ def header_expand(headers): if isinstance(headers, dict): headers = list(headers.items()) - + elif isinstance(headers, basestring): + return headers elif isinstance(headers, str): + # As discussed in https://github.com/kennethreitz/requests/issues/400 + # latin-1 is the most conservative encoding used on the web. Anyone + # who needs more can encode to a byte-string before calling + return headers.encode("latin-1") + elif headers is None: return headers for i, (value, params) in enumerate(headers): @@ -164,10 +213,9 @@ def header_expand(headers): collector.append('; '.join(_params)) - if not len(headers) == i+1: + if not len(headers) == i + 1: collector.append(', ') - # Remove trailing separators. if collector[-1] in (', ', '; '): del collector[-1] @@ -175,7 +223,6 @@ def header_expand(headers): return ''.join(collector) - def randombytes(n): """Return n random bytes.""" if is_py2: @@ -286,23 +333,6 @@ def get_encoding_from_headers(headers): return 'ISO-8859-1' -def unicode_from_html(content): - """Attempts to decode an HTML string into unicode. - If unsuccessful, the original content is returned. - """ - - encodings = get_encodings_from_content(content) - - for encoding in encodings: - - try: - return str(content, encoding) - except (UnicodeError, TypeError): - pass - - return content - - def stream_decode_response_unicode(iterator, r): """Stream decodes a iterator.""" @@ -354,15 +384,6 @@ def get_unicode_from_response(r): return r.content -def decode_gzip(content): - """Return gzip-decoded string. - - :param content: bytestring to gzip-decode. - """ - - return zlib.decompress(content, 16 + zlib.MAX_WBITS) - - def stream_decompress(iterator, mode='gzip'): """ Stream decodes an iterator over compressed data @@ -390,18 +411,53 @@ def stream_decompress(iterator, mode='gzip'): yield chunk else: # Make sure everything has been returned from the decompression object - buf = dec.decompress('') + buf = dec.decompress(bytes()) rv = buf + dec.flush() if rv: yield rv -def requote_path(path): - """Re-quote the given URL path component. +def stream_untransfer(gen, resp): + if 'gzip' in resp.headers.get('content-encoding', ''): + gen = stream_decompress(gen, mode='gzip') + elif 'deflate' in resp.headers.get('content-encoding', ''): + gen = stream_decompress(gen, mode='deflate') + + return gen - This function passes the given path through an unquote/quote cycle to + +# The unreserved URI characters (RFC 3986) +UNRESERVED_SET = frozenset( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + + "0123456789-._~") + + +def unquote_unreserved(uri): + """Un-escape any percent-escape sequences in a URI that are unreserved + characters. + This leaves all reserved, illegal and non-ASCII bytes encoded. + """ + parts = uri.split('%') + for i in range(1, len(parts)): + h = parts[i][0:2] + if len(h) == 2: + c = chr(int(h, 16)) + if c in UNRESERVED_SET: + parts[i] = c + parts[i][2:] + else: + parts[i] = '%' + parts[i] + else: + parts[i] = '%' + parts[i] + return ''.join(parts) + + +def requote_uri(uri): + """Re-quote the given URI. + + This function passes the given URI through an unquote/quote cycle to ensure that it is fully and consistently quoted. """ - parts = path.split(b"/") - parts = (quote(unquote(part), safe=b"") for part in parts) - return b"/".join(parts) + # Unquote only the unreserved characters + # Then quote only illegal characters (do not quote reserved, unreserved, + # or '%') + return quote(unquote_unreserved(uri), safe="!#$%&'()*+,/:;=?@[]~") -- cgit v1.2.3