diff options
Diffstat (limited to 'requests')
24 files changed, 1409 insertions, 321 deletions
| diff --git a/requests/__init__.py b/requests/__init__.py index 1af8d8e..837f0df 100644 --- a/requests/__init__.py +++ b/requests/__init__.py @@ -42,15 +42,15 @@ is at <http://python-requests.org>.  """  __title__ = 'requests' -__version__ = '1.2.3' -__build__ = 0x010203 +__version__ = '2.0.0' +__build__ = 0x020000  __author__ = 'Kenneth Reitz'  __license__ = 'Apache 2.0'  __copyright__ = 'Copyright 2013 Kenneth Reitz'  # Attempt to enable urllib3's SNI support, if possible  try: -    from requests.packages.urllib3.contrib import pyopenssl +    from .packages.urllib3.contrib import pyopenssl      pyopenssl.inject_into_urllib3()  except ImportError:      pass diff --git a/requests/adapters.py b/requests/adapters.py index 98b7317..d557b74 100644 --- a/requests/adapters.py +++ b/requests/adapters.py @@ -11,11 +11,12 @@ and maintain connections.  import socket  from .models import Response -from .packages.urllib3.poolmanager import PoolManager, ProxyManager +from .packages.urllib3.poolmanager import PoolManager, proxy_from_url  from .packages.urllib3.response import HTTPResponse +from .packages.urllib3.util import Timeout as TimeoutSauce  from .compat import urlparse, basestring, urldefrag, unquote  from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers, -                    prepend_scheme_if_needed, get_auth_from_url) +                    except_on_missing_scheme, get_auth_from_url)  from .structures import CaseInsensitiveDict  from .packages.urllib3.exceptions import MaxRetryError  from .packages.urllib3.exceptions import TimeoutError @@ -71,6 +72,7 @@ class HTTPAdapter(BaseAdapter):                   pool_block=DEFAULT_POOLBLOCK):          self.max_retries = max_retries          self.config = {} +        self.proxy_manager = {}          super(HTTPAdapter, self).__init__() @@ -118,7 +120,7 @@ class HTTPAdapter(BaseAdapter):          :param verify: Whether we should actually verify the certificate.          :param cert: The SSL certificate to verify.          """ -        if url.startswith('https') and verify: +        if url.lower().startswith('https') and verify:              cert_loc = None @@ -184,19 +186,26 @@ class HTTPAdapter(BaseAdapter):      def get_connection(self, url, proxies=None):          """Returns a urllib3 connection for the given URL. This should not be          called from user code, and is only exposed for use when subclassing the -        :class:`HTTPAdapter <reqeusts.adapters.HTTPAdapter>`. +        :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.          :param url: The URL to connect to.          :param proxies: (optional) A Requests-style dictionary of proxies used on this request.          """          proxies = proxies or {} -        proxy = proxies.get(urlparse(url).scheme) +        proxy = proxies.get(urlparse(url.lower()).scheme)          if proxy: -            proxy = prepend_scheme_if_needed(proxy, urlparse(url).scheme) -            conn = ProxyManager(self.poolmanager.connection_from_url(proxy)) +            except_on_missing_scheme(proxy) +            proxy_headers = self.proxy_headers(proxy) + +            if not proxy in self.proxy_manager: +                self.proxy_manager[proxy] = proxy_from_url( +                                                proxy, +                                                proxy_headers=proxy_headers) + +            conn = self.proxy_manager[proxy].connection_from_url(url)          else: -            conn = self.poolmanager.connection_from_url(url) +            conn = self.poolmanager.connection_from_url(url.lower())          return conn @@ -214,7 +223,7 @@ class HTTPAdapter(BaseAdapter):          If the message is being sent through a proxy, the full URL has to be          used. Otherwise, we should only use the path portion of the URL. -        This shoudl not be called from user code, and is only exposed for use +        This should not be called from user code, and is only exposed for use          when subclassing the          :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. @@ -232,8 +241,9 @@ class HTTPAdapter(BaseAdapter):          return url      def add_headers(self, request, **kwargs): -        """Add any headers needed by the connection. Currently this adds a -        Proxy-Authorization header. +        """Add any headers needed by the connection. As of v2.0 this does +        nothing by default, but is left for overriding by users that subclass +        the :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.          This should not be called from user code, and is only exposed for use          when subclassing the @@ -242,12 +252,22 @@ class HTTPAdapter(BaseAdapter):          :param request: The :class:`PreparedRequest <PreparedRequest>` to add headers to.          :param kwargs: The keyword arguments from the call to send().          """ -        proxies = kwargs.get('proxies', {}) +        pass -        if proxies is None: -            proxies = {} +    def proxy_headers(self, proxy): +        """Returns a dictionary of the headers to add to any request sent +        through a proxy. This works with urllib3 magic to ensure that they are +        correctly sent to the proxy, rather than in a tunnelled request if +        CONNECT is being used. -        proxy = proxies.get(urlparse(request.url).scheme) +        This should not be called from user code, and is only exposed for use +        when subclassing the +        :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. + +        :param proxies: The url of the proxy being used for this request. +        :param kwargs: Optional additional keyword arguments. +        """ +        headers = {}          username, password = get_auth_from_url(proxy)          if username and password: @@ -255,8 +275,10 @@ class HTTPAdapter(BaseAdapter):              # to decode them.              username = unquote(username)              password = unquote(password) -            request.headers['Proxy-Authorization'] = _basic_auth_str(username, -                                                                     password) +            headers['Proxy-Authorization'] = _basic_auth_str(username, +                                                             password) + +        return headers      def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None):          """Sends PreparedRequest object. Returns Response object. @@ -273,10 +295,15 @@ class HTTPAdapter(BaseAdapter):          self.cert_verify(conn, request.url, verify, cert)          url = self.request_url(request, proxies) -        self.add_headers(request, proxies=proxies) +        self.add_headers(request)          chunked = not (request.body is None or 'Content-Length' in request.headers) +        if stream: +            timeout = TimeoutSauce(connect=timeout) +        else: +            timeout = TimeoutSauce(connect=timeout, read=timeout) +          try:              if not chunked:                  resp = conn.urlopen( diff --git a/requests/auth.py b/requests/auth.py index fab05cf..30529e2 100644 --- a/requests/auth.py +++ b/requests/auth.py @@ -18,7 +18,6 @@ from base64 import b64encode  from .compat import urlparse, str  from .utils import parse_dict_header -  log = logging.getLogger(__name__)  CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded' @@ -106,7 +105,9 @@ class HTTPDigestAuth(AuthBase):          A1 = '%s:%s:%s' % (self.username, realm, self.password)          A2 = '%s:%s' % (method, path) -        if qop == 'auth': +        if qop is None: +            respdig = KD(hash_utf8(A1), "%s:%s" % (nonce, hash_utf8(A2))) +        elif qop == 'auth' or 'auth' in qop.split(','):              if nonce == self.last_nonce:                  self.nonce_count += 1              else: @@ -121,8 +122,6 @@ class HTTPDigestAuth(AuthBase):              cnonce = (hashlib.sha1(s).hexdigest()[:16])              noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, hash_utf8(A2))              respdig = KD(hash_utf8(A1), noncebit) -        elif qop is None: -            respdig = KD(hash_utf8(A1), "%s:%s" % (nonce, hash_utf8(A2)))          else:              # XXX handle auth-int.              return None @@ -159,10 +158,14 @@ class HTTPDigestAuth(AuthBase):              # to allow our new request to reuse the same one.              r.content              r.raw.release_conn() +            prep = r.request.copy() +            prep.prepare_cookies(r.cookies) -            r.request.headers['Authorization'] = self.build_digest_header(r.request.method, r.request.url) -            _r = r.connection.send(r.request, **kwargs) +            prep.headers['Authorization'] = self.build_digest_header( +                prep.method, prep.url) +            _r = r.connection.send(prep, **kwargs)              _r.history.append(r) +            _r.request = prep              return _r diff --git a/requests/compat.py b/requests/compat.py index bcf94b0..0d61a57 100644 --- a/requests/compat.py +++ b/requests/compat.py @@ -83,13 +83,14 @@ except ImportError:  # ---------  if is_py2: -    from urllib import quote, unquote, quote_plus, unquote_plus, urlencode +    from urllib import quote, unquote, quote_plus, unquote_plus, urlencode, getproxies, proxy_bypass      from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag      from urllib2 import parse_http_list      import cookielib      from Cookie import Morsel      from StringIO import StringIO      from .packages.urllib3.packages.ordered_dict import OrderedDict +    from httplib import IncompleteRead      builtin_str = str      bytes = str @@ -100,11 +101,12 @@ if is_py2:  elif is_py3:      from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag -    from urllib.request import parse_http_list +    from urllib.request import parse_http_list, getproxies, proxy_bypass      from http import cookiejar as cookielib      from http.cookies import Morsel      from io import StringIO      from collections import OrderedDict +    from http.client import IncompleteRead      builtin_str = str      str = str diff --git a/requests/cookies.py b/requests/cookies.py index d759d0a..f3ac64f 100644 --- a/requests/cookies.py +++ b/requests/cookies.py @@ -6,6 +6,7 @@ Compatibility code to be able to use `cookielib.CookieJar` with requests.  requests.utils imports from here, so be careful with imports.  """ +import time  import collections  from .compat import cookielib, urlparse, Morsel @@ -73,6 +74,10 @@ class MockRequest(object):      def origin_req_host(self):          return self.get_origin_req_host() +    @property +    def host(self): +        return self.get_host() +  class MockResponse(object):      """Wraps a `httplib.HTTPMessage` to mimic a `urllib.addinfourl`. @@ -102,6 +107,9 @@ def extract_cookies_to_jar(jar, request, response):      :param request: our own requests.Request object      :param response: urllib3.HTTPResponse object      """ +    if not (hasattr(response, '_original_response') and +            response._original_response): +        return      # the _original_response field is the wrapped httplib.HTTPResponse object,      req = MockRequest(request)      # pull out the HTTPMessage with the headers and put it in the mock: @@ -258,6 +266,11 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping):          """Deletes a cookie given a name. Wraps cookielib.CookieJar's remove_cookie_by_name()."""          remove_cookie_by_name(self, name) +    def set_cookie(self, cookie, *args, **kwargs): +        if cookie.value.startswith('"') and cookie.value.endswith('"'): +            cookie.value = cookie.value.replace('\\"', '') +        return super(RequestsCookieJar, self).set_cookie(cookie, *args, **kwargs) +      def update(self, other):          """Updates this jar with cookies from another CookieJar or dict-like"""          if isinstance(other, cookielib.CookieJar): @@ -354,19 +367,23 @@ def create_cookie(name, value, **kwargs):  def morsel_to_cookie(morsel):      """Convert a Morsel object into a Cookie containing the one k/v pair.""" +    expires = None +    if morsel["max-age"]: +        expires = time.time() + morsel["max-age"] +    elif morsel['expires']: +        expires = morsel['expires'] +        if type(expires) == type(""): +            time_template = "%a, %d-%b-%Y %H:%M:%S GMT" +            expires = time.mktime(time.strptime(expires, time_template))      c = create_cookie(          name=morsel.key,          value=morsel.value,          version=morsel['version'] or 0,          port=None, -        port_specified=False,          domain=morsel['domain'], -        domain_specified=bool(morsel['domain']), -        domain_initial_dot=morsel['domain'].startswith('.'),          path=morsel['path'], -        path_specified=bool(morsel['path']),          secure=bool(morsel['secure']), -        expires=morsel['max-age'] or morsel['expires'], +        expires=expires,          discard=False,          comment=morsel['comment'],          comment_url=bool(morsel['comment']), diff --git a/requests/exceptions.py b/requests/exceptions.py index c0588f6..22207e3 100644 --- a/requests/exceptions.py +++ b/requests/exceptions.py @@ -9,7 +9,7 @@ This module contains the set of Requests' exceptions.  """ -class RequestException(RuntimeError): +class RequestException(IOError):      """There was an ambiguous exception that occurred while handling your      request.""" @@ -53,3 +53,7 @@ class InvalidSchema(RequestException, ValueError):  class InvalidURL(RequestException, ValueError):      """ The URL provided was somehow invalid. """ + + +class ChunkedEncodingError(RequestException): +    """The server declared chunked encoding but sent an invalid chunk.""" diff --git a/requests/models.py b/requests/models.py index 6cf2aaa..8fd9735 100644 --- a/requests/models.py +++ b/requests/models.py @@ -11,7 +11,7 @@ import collections  import logging  import datetime -from io import BytesIO +from io import BytesIO, UnsupportedOperation  from .hooks import default_hooks  from .structures import CaseInsensitiveDict @@ -19,14 +19,16 @@ from .auth import HTTPBasicAuth  from .cookies import cookiejar_from_dict, get_cookie_header  from .packages.urllib3.filepost import encode_multipart_formdata  from .packages.urllib3.util import parse_url -from .exceptions import HTTPError, RequestException, MissingSchema, InvalidURL +from .exceptions import ( +    HTTPError, RequestException, MissingSchema, InvalidURL, +    ChunkedEncodingError)  from .utils import (      guess_filename, get_auth_from_url, requote_uri,      stream_decode_response_unicode, to_key_val_list, parse_header_links, -    iter_slices, guess_json_utf, super_len) +    iter_slices, guess_json_utf, super_len, to_native_string)  from .compat import ( -    cookielib, urlparse, urlunparse, urlsplit, urlencode, str, bytes, StringIO, -    is_py2, chardet, json, builtin_str, basestring) +    cookielib, urlunparse, urlsplit, urlencode, str, bytes, StringIO, +    is_py2, chardet, json, builtin_str, basestring, IncompleteRead)  CONTENT_CHUNK_SIZE = 10 * 1024  ITER_CHUNK_SIZE = 512 @@ -92,8 +94,10 @@ class RequestEncodingMixin(object):          if parameters are supplied as a dict.          """ -        if (not files) or isinstance(data, str): -            return None +        if (not files): +            raise ValueError("Files must be provided.") +        elif isinstance(data, basestring): +            raise ValueError("Data must not be a string.")          new_fields = []          fields = to_key_val_list(data or {}) @@ -104,6 +108,10 @@ class RequestEncodingMixin(object):                  val = [val]              for v in val:                  if v is not None: +                    # Don't call str() on bytestrings: in Py3 it all goes wrong. +                    if not isinstance(v, bytes): +                        v = str(v) +                      new_fields.append(                          (field.decode('utf-8') if isinstance(field, bytes) else field,                           v.encode('utf-8') if isinstance(v, str) else v)) @@ -139,6 +147,9 @@ class RequestHooksMixin(object):      def register_hook(self, event, hook):          """Properly register a hook.""" +        if event not in self.hooks: +            raise ValueError('Unsupported event specified, with event name "%s"' % (event)) +          if isinstance(hook, collections.Callable):              self.hooks[event].append(hook)          elif hasattr(hook, '__iter__'): @@ -184,8 +195,8 @@ class Request(RequestHooksMixin):          url=None,          headers=None,          files=None, -        data=dict(), -        params=dict(), +        data=None, +        params=None,          auth=None,          cookies=None,          hooks=None): @@ -209,7 +220,6 @@ class Request(RequestHooksMixin):          self.params = params          self.auth = auth          self.cookies = cookies -        self.hooks = hooks      def __repr__(self):          return '<Request [%s]>' % (self.method) @@ -217,19 +227,17 @@ class Request(RequestHooksMixin):      def prepare(self):          """Constructs a :class:`PreparedRequest <PreparedRequest>` for transmission and returns it."""          p = PreparedRequest() - -        p.prepare_method(self.method) -        p.prepare_url(self.url, self.params) -        p.prepare_headers(self.headers) -        p.prepare_cookies(self.cookies) -        p.prepare_body(self.data, self.files) -        p.prepare_auth(self.auth, self.url) -        # Note that prepare_auth must be last to enable authentication schemes -        # such as OAuth to work on a fully prepared request. - -        # This MUST go after prepare_auth. Authenticators could add a hook -        p.prepare_hooks(self.hooks) - +        p.prepare( +            method=self.method, +            url=self.url, +            headers=self.headers, +            files=self.files, +            data=self.data, +            params=self.params, +            auth=self.auth, +            cookies=self.cookies, +            hooks=self.hooks, +        )          return p @@ -264,9 +272,34 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):          #: dictionary of callback hooks, for internal usage.          self.hooks = default_hooks() +    def prepare(self, method=None, url=None, headers=None, files=None, +                data=None, params=None, auth=None, cookies=None, hooks=None): +        """Prepares the the entire request with the given parameters.""" + +        self.prepare_method(method) +        self.prepare_url(url, params) +        self.prepare_headers(headers) +        self.prepare_cookies(cookies) +        self.prepare_body(data, files) +        self.prepare_auth(auth, url) +        # Note that prepare_auth must be last to enable authentication schemes +        # such as OAuth to work on a fully prepared request. + +        # This MUST go after prepare_auth. Authenticators could add a hook +        self.prepare_hooks(hooks) +      def __repr__(self):          return '<PreparedRequest [%s]>' % (self.method) +    def copy(self): +        p = PreparedRequest() +        p.method = self.method +        p.url = self.url +        p.headers = self.headers +        p.body = self.body +        p.hooks = self.hooks +        return p +      def prepare_method(self, method):          """Prepares the given HTTP method."""          self.method = method @@ -337,8 +370,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):          """Prepares the given HTTP headers."""          if headers: -            headers = dict((name.encode('ascii'), value) for name, value in headers.items()) -            self.headers = CaseInsensitiveDict(headers) +            self.headers = CaseInsensitiveDict((to_native_string(name), value) for name, value in headers.items())          else:              self.headers = CaseInsensitiveDict() @@ -352,7 +384,6 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):          body = None          content_type = None          length = None -        is_stream = False          is_stream = all([              hasattr(data, '__iter__'), @@ -363,8 +394,8 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):          try:              length = super_len(data) -        except (TypeError, AttributeError): -            length = False +        except (TypeError, AttributeError, UnsupportedOperation): +            length = None          if is_stream:              body = data @@ -372,13 +403,10 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):              if files:                  raise NotImplementedError('Streamed bodies and files are mutually exclusive.') -            if length: +            if length is not None:                  self.headers['Content-Length'] = str(length)              else:                  self.headers['Transfer-Encoding'] = 'chunked' -        # Check if file, fo, generator, iterator. -        # If not, run through normal process. -          else:              # Multi-part file uploads.              if files: @@ -537,11 +565,22 @@ class Response(object):              return iter_slices(self._content, chunk_size)          def generate(): -            while 1: -                chunk = self.raw.read(chunk_size, decode_content=True) -                if not chunk: -                    break -                yield chunk +            try: +                # Special case for urllib3. +                try: +                    for chunk in self.raw.stream(chunk_size, +                                                 decode_content=True): +                        yield chunk +                except IncompleteRead as e: +                    raise ChunkedEncodingError(e) +            except AttributeError: +                # Standard file-like object. +                while 1: +                    chunk = self.raw.read(chunk_size) +                    if not chunk: +                        break +                    yield chunk +              self._content_consumed = True          gen = generate() @@ -683,4 +722,9 @@ class Response(object):              raise HTTPError(http_error_msg, response=self)      def close(self): +        """Closes the underlying file descriptor and releases the connection +        back to the pool. + +        *Note: Should not normally need to be called explicitly.* +        """          return self.raw.release_conn() diff --git a/requests/packages/urllib3/__init__.py b/requests/packages/urllib3/__init__.py index bff80b8..73071f7 100644 --- a/requests/packages/urllib3/__init__.py +++ b/requests/packages/urllib3/__init__.py @@ -23,7 +23,7 @@ from . import exceptions  from .filepost import encode_multipart_formdata  from .poolmanager import PoolManager, ProxyManager, proxy_from_url  from .response import HTTPResponse -from .util import make_headers, get_host +from .util import make_headers, get_host, Timeout  # Set default logging handler to avoid "No handler found" warnings. diff --git a/requests/packages/urllib3/_collections.py b/requests/packages/urllib3/_collections.py index b35a736..282b8d5 100644 --- a/requests/packages/urllib3/_collections.py +++ b/requests/packages/urllib3/_collections.py @@ -5,7 +5,7 @@  # the MIT License: http://www.opensource.org/licenses/mit-license.php  from collections import MutableMapping -from threading import Lock +from threading import RLock  try: # Python 2.7+      from collections import OrderedDict @@ -40,18 +40,18 @@ class RecentlyUsedContainer(MutableMapping):          self.dispose_func = dispose_func          self._container = self.ContainerCls() -        self._lock = Lock() +        self.lock = RLock()      def __getitem__(self, key):          # Re-insert the item, moving it to the end of the eviction line. -        with self._lock: +        with self.lock:              item = self._container.pop(key)              self._container[key] = item              return item      def __setitem__(self, key, value):          evicted_value = _Null -        with self._lock: +        with self.lock:              # Possibly evict the existing value of 'key'              evicted_value = self._container.get(key, _Null)              self._container[key] = value @@ -65,21 +65,21 @@ class RecentlyUsedContainer(MutableMapping):              self.dispose_func(evicted_value)      def __delitem__(self, key): -        with self._lock: +        with self.lock:              value = self._container.pop(key)          if self.dispose_func:              self.dispose_func(value)      def __len__(self): -        with self._lock: +        with self.lock:              return len(self._container)      def __iter__(self):          raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.')      def clear(self): -        with self._lock: +        with self.lock:              # Copy pointers to all values, then wipe the mapping              # under Python 2, this copies the list of values twice :-|              values = list(self._container.values()) @@ -90,5 +90,5 @@ class RecentlyUsedContainer(MutableMapping):                  self.dispose_func(value)      def keys(self): -        with self._lock: +        with self.lock:              return self._container.keys() diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py index f3e9260..691d4e2 100644 --- a/requests/packages/urllib3/connectionpool.py +++ b/requests/packages/urllib3/connectionpool.py @@ -4,12 +4,11 @@  # This module is part of urllib3 and is released under  # the MIT License: http://www.opensource.org/licenses/mit-license.php -import logging -import socket  import errno +import logging  from socket import error as SocketError, timeout as SocketTimeout -from .util import resolve_cert_reqs, resolve_ssl_version, assert_fingerprint +import socket  try: # Python 3      from http.client import HTTPConnection, HTTPException @@ -22,11 +21,15 @@ try: # Python 3      from queue import LifoQueue, Empty, Full  except ImportError:      from Queue import LifoQueue, Empty, Full +    import Queue as _  # Platform-specific: Windows  try: # Compiled with SSL?      HTTPSConnection = object -    BaseSSLError = None + +    class BaseSSLError(BaseException): +        pass +      ssl = None      try: # Python 3 @@ -41,21 +44,29 @@ except (ImportError, AttributeError): # Platform-specific: No SSL.      pass -from .request import RequestMethods -from .response import HTTPResponse -from .util import get_host, is_connection_dropped, ssl_wrap_socket  from .exceptions import (      ClosedPoolError, +    ConnectTimeoutError,      EmptyPoolError,      HostChangedError,      MaxRetryError,      SSLError, -    TimeoutError, +    ReadTimeoutError, +    ProxyError,  ) - -from .packages.ssl_match_hostname import match_hostname, CertificateError +from .packages.ssl_match_hostname import CertificateError, match_hostname  from .packages import six - +from .request import RequestMethods +from .response import HTTPResponse +from .util import ( +    assert_fingerprint, +    get_host, +    is_connection_dropped, +    resolve_cert_reqs, +    resolve_ssl_version, +    ssl_wrap_socket, +    Timeout, +)  xrange = six.moves.xrange @@ -93,11 +104,24 @@ class VerifiedHTTPSConnection(HTTPSConnection):      def connect(self):          # Add certificate verification -        sock = socket.create_connection((self.host, self.port), self.timeout) +        try: +            sock = socket.create_connection( +                address=(self.host, self.port), +                timeout=self.timeout) +        except SocketTimeout: +                raise ConnectTimeoutError( +                    self, "Connection to %s timed out. (connect timeout=%s)" % +                    (self.host, self.timeout))          resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)          resolved_ssl_version = resolve_ssl_version(self.ssl_version) +        if self._tunnel_host: +            self.sock = sock +            # Calls self._set_hostport(), so self.host is +            # self._tunnel_host below. +            self._tunnel() +          # Wrap socket using verification with the root certs in          # trusted_root_certs          self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, @@ -110,10 +134,11 @@ class VerifiedHTTPSConnection(HTTPSConnection):              if self.assert_fingerprint:                  assert_fingerprint(self.sock.getpeercert(binary_form=True),                                     self.assert_fingerprint) -            else: +            elif self.assert_hostname is not False:                  match_hostname(self.sock.getpeercert(),                                 self.assert_hostname or self.host) +  ## Pool objects  class ConnectionPool(object): @@ -126,6 +151,9 @@ class ConnectionPool(object):      QueueCls = LifoQueue      def __init__(self, host, port=None): +        # httplib doesn't like it when we include brackets in ipv6 addresses +        host = host.strip('[]') +          self.host = host          self.port = port @@ -133,6 +161,8 @@ class ConnectionPool(object):          return '%s(host=%r, port=%r)' % (type(self).__name__,                                           self.host, self.port) +# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 +_blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK])  class HTTPConnectionPool(ConnectionPool, RequestMethods):      """ @@ -151,9 +181,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):          as a valid HTTP/1.0 or 1.1 status line, passed into          :class:`httplib.HTTPConnection`. +        .. note:: +           Only works in Python 2. This parameter is ignored in Python 3. +      :param timeout: -        Socket timeout for each individual connection, can be a float. None -        disables timeout. +        Socket timeout in seconds for each individual connection. This can +        be a float or integer, which sets the timeout for the HTTP request, +        or an instance of :class:`urllib3.util.Timeout` which gives you more +        fine-grained control over request timeouts. After the constructor has +        been parsed, this is always a `urllib3.util.Timeout` object.      :param maxsize:          Number of connections to save that can be reused. More than 1 is useful @@ -171,20 +207,39 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):      :param headers:          Headers to include with all requests, unless other headers are given          explicitly. + +    :param _proxy: +        Parsed proxy URL, should not be used directly, instead, see +        :class:`urllib3.connectionpool.ProxyManager`" + +    :param _proxy_headers: +        A dictionary with proxy headers, should not be used directly, +        instead, see :class:`urllib3.connectionpool.ProxyManager`"      """      scheme = 'http' -    def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, -                 block=False, headers=None): +    def __init__(self, host, port=None, strict=False, +                 timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, +                 headers=None, _proxy=None, _proxy_headers=None):          ConnectionPool.__init__(self, host, port)          RequestMethods.__init__(self, headers)          self.strict = strict + +        # This is for backwards compatibility and can be removed once a timeout +        # can only be set to a Timeout object +        if not isinstance(timeout, Timeout): +            timeout = Timeout.from_float(timeout) +          self.timeout = timeout +          self.pool = self.QueueCls(maxsize)          self.block = block +        self.proxy = _proxy +        self.proxy_headers = _proxy_headers or {} +          # Fill the queue up so that doing get() on it will block properly          for _ in xrange(maxsize):              self.pool.put(None) @@ -200,9 +255,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):          self.num_connections += 1          log.info("Starting new HTTP connection (%d): %s" %                   (self.num_connections, self.host)) -        return HTTPConnection(host=self.host, -                              port=self.port, -                              strict=self.strict) +        extra_params = {} +        if not six.PY3:  # Python 2 +            extra_params['strict'] = self.strict + +        return HTTPConnection(host=self.host, port=self.port, +                              timeout=self.timeout.connect_timeout, +                              **extra_params) +      def _get_conn(self, timeout=None):          """ @@ -263,31 +323,89 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):                          % self.host)          # Connection never got put back into the pool, close it. -        conn.close() +        if conn: +            conn.close() + +    def _get_timeout(self, timeout): +        """ Helper that always returns a :class:`urllib3.util.Timeout` """ +        if timeout is _Default: +            return self.timeout.clone() + +        if isinstance(timeout, Timeout): +            return timeout.clone() +        else: +            # User passed us an int/float. This is for backwards compatibility, +            # can be removed later +            return Timeout.from_float(timeout)      def _make_request(self, conn, method, url, timeout=_Default,                        **httplib_request_kw):          """          Perform a request on a given httplib connection object taken from our          pool. + +        :param conn: +            a connection from one of our connection pools + +        :param timeout: +            Socket timeout in seconds for the request. This can be a +            float or integer, which will set the same timeout value for +            the socket connect and the socket read, or an instance of +            :class:`urllib3.util.Timeout`, which gives you more fine-grained +            control over your timeouts.          """          self.num_requests += 1 -        if timeout is _Default: -            timeout = self.timeout +        timeout_obj = self._get_timeout(timeout) + +        try: +            timeout_obj.start_connect() +            conn.timeout = timeout_obj.connect_timeout +            # conn.request() calls httplib.*.request, not the method in +            # request.py. It also calls makefile (recv) on the socket +            conn.request(method, url, **httplib_request_kw) +        except SocketTimeout: +            raise ConnectTimeoutError( +                self, "Connection to %s timed out. (connect timeout=%s)" % +                (self.host, timeout_obj.connect_timeout)) -        conn.timeout = timeout # This only does anything in Py26+ -        conn.request(method, url, **httplib_request_kw) +        # Reset the timeout for the recv() on the socket +        read_timeout = timeout_obj.read_timeout +        log.debug("Setting read timeout to %s" % read_timeout) +        # App Engine doesn't have a sock attr +        if hasattr(conn, 'sock') and \ +            read_timeout is not None and \ +            read_timeout is not Timeout.DEFAULT_TIMEOUT: +            # In Python 3 socket.py will catch EAGAIN and return None when you +            # try and read into the file pointer created by http.client, which +            # instead raises a BadStatusLine exception. Instead of catching +            # the exception and assuming all BadStatusLine exceptions are read +            # timeouts, check for a zero timeout before making the request. +            if read_timeout == 0: +                raise ReadTimeoutError( +                    self, url, +                    "Read timed out. (read timeout=%s)" % read_timeout) +            conn.sock.settimeout(read_timeout) -        # Set timeout -        sock = getattr(conn, 'sock', False) # AppEngine doesn't have sock attr. -        if sock: -            sock.settimeout(timeout) +        # Receive the response from the server +        try: +            try: # Python 2.7+, use buffering of HTTP responses +                httplib_response = conn.getresponse(buffering=True) +            except TypeError: # Python 2.6 and older +                httplib_response = conn.getresponse() +        except SocketTimeout: +            raise ReadTimeoutError( +                self, url, "Read timed out. (read timeout=%s)" % read_timeout) + +        except SocketError as e: # Platform-specific: Python 2 +            # See the above comment about EAGAIN in Python 3. In Python 2 we +            # have to specifically catch it and throw the timeout error +            if e.errno in _blocking_errnos: +                raise ReadTimeoutError( +                    self, url, +                    "Read timed out. (read timeout=%s)" % read_timeout) +            raise -        try: # Python 2.7+, use buffering of HTTP responses -            httplib_response = conn.getresponse(buffering=True) -        except TypeError: # Python 2.6 and older -            httplib_response = conn.getresponse()          # AppEngine doesn't have a version attr.          http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') @@ -367,7 +485,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):          :param redirect:              If True, automatically handle redirects (status codes 301, 302, -            303, 307). Each redirect counts as a retry. +            303, 307, 308). Each redirect counts as a retry.          :param assert_same_host:              If ``True``, will make sure that the host of the pool requests is @@ -375,7 +493,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):              use the pool on an HTTP proxy and request foreign hosts.          :param timeout: -            If specified, overrides the default timeout for this one request. +            If specified, overrides the default timeout for this one +            request. It may be a float (in seconds) or an instance of +            :class:`urllib3.util.Timeout`.          :param pool_timeout:              If set and the pool is set to block=True, then this method will @@ -402,18 +522,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):          if retries < 0:              raise MaxRetryError(self, url) -        if timeout is _Default: -            timeout = self.timeout -          if release_conn is None:              release_conn = response_kw.get('preload_content', True)          # Check host          if assert_same_host and not self.is_same_host(url): -            host = "%s://%s" % (self.scheme, self.host) -            if self.port: -                host = "%s:%d" % (host, self.port) -              raise HostChangedError(self, url, retries - 1)          conn = None @@ -444,20 +557,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):              #     ``response.release_conn()`` is called (implicitly by              #     ``response.read()``) -        except Empty as e: +        except Empty:              # Timed out by queue -            raise TimeoutError(self, url, -                               "Request timed out. (pool_timeout=%s)" % -                               pool_timeout) +            raise ReadTimeoutError( +                self, url, "Read timed out, no pool connections are available.") -        except SocketTimeout as e: +        except SocketTimeout:              # Timed out by socket -            raise TimeoutError(self, url, -                               "Request timed out. (timeout=%s)" % -                               timeout) +            raise ReadTimeoutError(self, url, "Read timed out.")          except BaseSSLError as e:              # SSL certificate error +            if 'timed out' in str(e) or \ +               'did not complete (read)' in str(e): # Platform-specific: Python 2.6 +                raise ReadTimeoutError(self, url, "Read timed out.")              raise SSLError(e)          except CertificateError as e: @@ -465,6 +578,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):              raise SSLError(e)          except (HTTPException, SocketError) as e: +            if isinstance(e, SocketError) and self.proxy is not None: +                raise ProxyError('Cannot connect to proxy. ' +                                 'Socket error: %s.' % e) +              # Connection broken, discard. It will be replaced next _get_conn().              conn = None              # This is necessary so we can access e below @@ -513,6 +630,7 @@ class HTTPSConnectionPool(HTTPConnectionPool):      :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``,      ``assert_hostname`` and ``host`` in this order to verify connections. +    If ``assert_hostname`` is False, no verification is done.      The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs`` and      ``ssl_version`` are only used if :mod:`ssl` is available and are fed into @@ -525,13 +643,13 @@ class HTTPSConnectionPool(HTTPConnectionPool):      def __init__(self, host, port=None,                   strict=False, timeout=None, maxsize=1,                   block=False, headers=None, +                 _proxy=None, _proxy_headers=None,                   key_file=None, cert_file=None, cert_reqs=None,                   ca_certs=None, ssl_version=None,                   assert_hostname=None, assert_fingerprint=None): -        HTTPConnectionPool.__init__(self, host, port, -                                    strict, timeout, maxsize, -                                    block, headers) +        HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, +                                    block, headers, _proxy, _proxy_headers)          self.key_file = key_file          self.cert_file = cert_file          self.cert_reqs = cert_reqs @@ -540,6 +658,34 @@ class HTTPSConnectionPool(HTTPConnectionPool):          self.assert_hostname = assert_hostname          self.assert_fingerprint = assert_fingerprint +    def _prepare_conn(self, connection): +        """ +        Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket` +        and establish the tunnel if proxy is used. +        """ + +        if isinstance(connection, VerifiedHTTPSConnection): +            connection.set_cert(key_file=self.key_file, +                                cert_file=self.cert_file, +                                cert_reqs=self.cert_reqs, +                                ca_certs=self.ca_certs, +                                assert_hostname=self.assert_hostname, +                                assert_fingerprint=self.assert_fingerprint) +            connection.ssl_version = self.ssl_version + +        if self.proxy is not None: +            # Python 2.7+ +            try: +                set_tunnel = connection.set_tunnel +            except AttributeError:  # Platform-specific: Python 2.6 +                set_tunnel = connection._set_tunnel +            set_tunnel(self.host, self.port, self.proxy_headers) +            # Establish tunnel connection early, because otherwise httplib +            # would improperly set Host: header to proxy's IP:port. +            connection.connect() + +        return connection +      def _new_conn(self):          """          Return a fresh :class:`httplib.HTTPSConnection`. @@ -548,26 +694,28 @@ class HTTPSConnectionPool(HTTPConnectionPool):          log.info("Starting new HTTPS connection (%d): %s"                   % (self.num_connections, self.host)) +        actual_host = self.host +        actual_port = self.port +        if self.proxy is not None: +            actual_host = self.proxy.host +            actual_port = self.proxy.port +          if not ssl:  # Platform-specific: Python compiled without +ssl              if not HTTPSConnection or HTTPSConnection is object:                  raise SSLError("Can't connect to HTTPS URL because the SSL "                                 "module is not available.") +            connection_class = HTTPSConnection +        else: +            connection_class = VerifiedHTTPSConnection -            return HTTPSConnection(host=self.host, -                                   port=self.port, -                                   strict=self.strict) +        extra_params = {} +        if not six.PY3:  # Python 2 +            extra_params['strict'] = self.strict +        connection = connection_class(host=actual_host, port=actual_port, +                                      timeout=self.timeout.connect_timeout, +                                      **extra_params) -        connection = VerifiedHTTPSConnection(host=self.host, -                                             port=self.port, -                                             strict=self.strict) -        connection.set_cert(key_file=self.key_file, cert_file=self.cert_file, -                            cert_reqs=self.cert_reqs, ca_certs=self.ca_certs, -                            assert_hostname=self.assert_hostname, -                            assert_fingerprint=self.assert_fingerprint) - -        connection.ssl_version = self.ssl_version - -        return connection +        return self._prepare_conn(connection)  def connection_from_url(url, **kw): diff --git a/requests/packages/urllib3/contrib/ntlmpool.py b/requests/packages/urllib3/contrib/ntlmpool.py index 277ee0b..b8cd933 100644 --- a/requests/packages/urllib3/contrib/ntlmpool.py +++ b/requests/packages/urllib3/contrib/ntlmpool.py @@ -33,7 +33,7 @@ class NTLMConnectionPool(HTTPSConnectionPool):      def __init__(self, user, pw, authurl, *args, **kwargs):          """          authurl is a random URL on the server that is protected by NTLM. -        user is the Windows user, probably in the DOMAIN\username format. +        user is the Windows user, probably in the DOMAIN\\username format.          pw is the password for the user.          """          super(NTLMConnectionPool, self).__init__(*args, **kwargs) diff --git a/requests/packages/urllib3/contrib/pyopenssl.py b/requests/packages/urllib3/contrib/pyopenssl.py index 5c4c6d8..d43bcd6 100644 --- a/requests/packages/urllib3/contrib/pyopenssl.py +++ b/requests/packages/urllib3/contrib/pyopenssl.py @@ -20,13 +20,13 @@ Now you can use :mod:`urllib3` as you normally would, and it will support SNI  when the required modules are installed.  ''' -from ndg.httpsclient.ssl_peer_verification import (ServerSSLCertVerification, -                                                   SUBJ_ALT_NAME_SUPPORT) +from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT  from ndg.httpsclient.subj_alt_name import SubjectAltName  import OpenSSL.SSL  from pyasn1.codec.der import decoder as der_decoder  from socket import _fileobject  import ssl +from cStringIO import StringIO  from .. import connectionpool  from .. import util @@ -99,6 +99,172 @@ def get_subj_alt_name(peer_cert):      return dns_name +class fileobject(_fileobject): + +    def read(self, size=-1): +        # Use max, disallow tiny reads in a loop as they are very inefficient. +        # We never leave read() with any leftover data from a new recv() call +        # in our internal buffer. +        rbufsize = max(self._rbufsize, self.default_bufsize) +        # Our use of StringIO rather than lists of string objects returned by +        # recv() minimizes memory usage and fragmentation that occurs when +        # rbufsize is large compared to the typical return value of recv(). +        buf = self._rbuf +        buf.seek(0, 2)  # seek end +        if size < 0: +            # Read until EOF +            self._rbuf = StringIO()  # reset _rbuf.  we consume it via buf. +            while True: +                try: +                    data = self._sock.recv(rbufsize) +                except OpenSSL.SSL.WantReadError: +                    continue +                if not data: +                    break +                buf.write(data) +            return buf.getvalue() +        else: +            # Read until size bytes or EOF seen, whichever comes first +            buf_len = buf.tell() +            if buf_len >= size: +                # Already have size bytes in our buffer?  Extract and return. +                buf.seek(0) +                rv = buf.read(size) +                self._rbuf = StringIO() +                self._rbuf.write(buf.read()) +                return rv + +            self._rbuf = StringIO()  # reset _rbuf.  we consume it via buf. +            while True: +                left = size - buf_len +                # recv() will malloc the amount of memory given as its +                # parameter even though it often returns much less data +                # than that.  The returned data string is short lived +                # as we copy it into a StringIO and free it.  This avoids +                # fragmentation issues on many platforms. +                try: +                    data = self._sock.recv(left) +                except OpenSSL.SSL.WantReadError: +                    continue +                if not data: +                    break +                n = len(data) +                if n == size and not buf_len: +                    # Shortcut.  Avoid buffer data copies when: +                    # - We have no data in our buffer. +                    # AND +                    # - Our call to recv returned exactly the +                    #   number of bytes we were asked to read. +                    return data +                if n == left: +                    buf.write(data) +                    del data  # explicit free +                    break +                assert n <= left, "recv(%d) returned %d bytes" % (left, n) +                buf.write(data) +                buf_len += n +                del data  # explicit free +                #assert buf_len == buf.tell() +            return buf.getvalue() + +    def readline(self, size=-1): +        buf = self._rbuf +        buf.seek(0, 2)  # seek end +        if buf.tell() > 0: +            # check if we already have it in our buffer +            buf.seek(0) +            bline = buf.readline(size) +            if bline.endswith('\n') or len(bline) == size: +                self._rbuf = StringIO() +                self._rbuf.write(buf.read()) +                return bline +            del bline +        if size < 0: +            # Read until \n or EOF, whichever comes first +            if self._rbufsize <= 1: +                # Speed up unbuffered case +                buf.seek(0) +                buffers = [buf.read()] +                self._rbuf = StringIO()  # reset _rbuf.  we consume it via buf. +                data = None +                recv = self._sock.recv +                while True: +                    try: +                        while data != "\n": +                            data = recv(1) +                            if not data: +                                break +                            buffers.append(data) +                    except OpenSSL.SSL.WantReadError: +                        continue +                    break +                return "".join(buffers) + +            buf.seek(0, 2)  # seek end +            self._rbuf = StringIO()  # reset _rbuf.  we consume it via buf. +            while True: +                try: +                    data = self._sock.recv(self._rbufsize) +                except OpenSSL.SSL.WantReadError: +                    continue +                if not data: +                    break +                nl = data.find('\n') +                if nl >= 0: +                    nl += 1 +                    buf.write(data[:nl]) +                    self._rbuf.write(data[nl:]) +                    del data +                    break +                buf.write(data) +            return buf.getvalue() +        else: +            # Read until size bytes or \n or EOF seen, whichever comes first +            buf.seek(0, 2)  # seek end +            buf_len = buf.tell() +            if buf_len >= size: +                buf.seek(0) +                rv = buf.read(size) +                self._rbuf = StringIO() +                self._rbuf.write(buf.read()) +                return rv +            self._rbuf = StringIO()  # reset _rbuf.  we consume it via buf. +            while True: +                try: +                    data = self._sock.recv(self._rbufsize) +                except OpenSSL.SSL.WantReadError: +                        continue +                if not data: +                    break +                left = size - buf_len +                # did we just receive a newline? +                nl = data.find('\n', 0, left) +                if nl >= 0: +                    nl += 1 +                    # save the excess data to _rbuf +                    self._rbuf.write(data[nl:]) +                    if buf_len: +                        buf.write(data[:nl]) +                        break +                    else: +                        # Shortcut.  Avoid data copy through buf when returning +                        # a substring of our first recv(). +                        return data[:nl] +                n = len(data) +                if n == size and not buf_len: +                    # Shortcut.  Avoid data copy through buf when +                    # returning exactly all of our first recv(). +                    return data +                if n >= left: +                    buf.write(data[:left]) +                    self._rbuf.write(data[left:]) +                    break +                buf.write(data) +                buf_len += n +                #assert buf_len == buf.tell() +            return buf.getvalue() + +  class WrappedSocket(object):      '''API-compatibility wrapper for Python OpenSSL's Connection-class.''' @@ -106,8 +272,11 @@ class WrappedSocket(object):          self.connection = connection          self.socket = socket +    def fileno(self): +        return self.socket.fileno() +      def makefile(self, mode, bufsize=-1): -        return _fileobject(self.connection, mode, bufsize) +        return fileobject(self.connection, mode, bufsize)      def settimeout(self, timeout):          return self.socket.settimeout(timeout) @@ -115,10 +284,14 @@ class WrappedSocket(object):      def sendall(self, data):          return self.connection.sendall(data) +    def close(self): +        return self.connection.shutdown() +      def getpeercert(self, binary_form=False):          x509 = self.connection.get_peer_certificate() +          if not x509: -            raise ssl.SSLError('') +            return x509          if binary_form:              return OpenSSL.crypto.dump_certificate( @@ -159,9 +332,13 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,      cnx = OpenSSL.SSL.Connection(ctx, sock)      cnx.set_tlsext_host_name(server_hostname)      cnx.set_connect_state() -    try: -        cnx.do_handshake() -    except OpenSSL.SSL.Error as e: -        raise ssl.SSLError('bad handshake', e) +    while True: +        try: +            cnx.do_handshake() +        except OpenSSL.SSL.WantReadError: +            continue +        except OpenSSL.SSL.Error as e: +            raise ssl.SSLError('bad handshake', e) +        break      return WrappedSocket(cnx, sock) diff --git a/requests/packages/urllib3/exceptions.py b/requests/packages/urllib3/exceptions.py index 2e2a259..98ef9ab 100644 --- a/requests/packages/urllib3/exceptions.py +++ b/requests/packages/urllib3/exceptions.py @@ -39,6 +39,11 @@ class SSLError(HTTPError):      pass +class ProxyError(HTTPError): +    "Raised when the connection to a proxy fails." +    pass + +  class DecodeError(HTTPError):      "Raised when automatic decoding based on Content-Type fails."      pass @@ -70,8 +75,29 @@ class HostChangedError(RequestError):          self.retries = retries -class TimeoutError(RequestError): -    "Raised when a socket timeout occurs." +class TimeoutStateError(HTTPError): +    """ Raised when passing an invalid state to a timeout """ +    pass + + +class TimeoutError(HTTPError): +    """ Raised when a socket timeout error occurs. + +    Catching this error will catch both :exc:`ReadTimeoutErrors +    <ReadTimeoutError>` and :exc:`ConnectTimeoutErrors <ConnectTimeoutError>`. +    """ +    pass + + +class ReadTimeoutError(TimeoutError, RequestError): +    "Raised when a socket timeout occurs while receiving data from a server" +    pass + + +# This timeout error does not have a URL attached and needs to inherit from the +# base HTTPError +class ConnectTimeoutError(TimeoutError): +    "Raised when a socket timeout occurs while connecting to a server"      pass diff --git a/requests/packages/urllib3/fields.py b/requests/packages/urllib3/fields.py new file mode 100644 index 0000000..ed01765 --- /dev/null +++ b/requests/packages/urllib3/fields.py @@ -0,0 +1,177 @@ +# urllib3/fields.py +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import email.utils +import mimetypes + +from .packages import six + + +def guess_content_type(filename, default='application/octet-stream'): +    """ +    Guess the "Content-Type" of a file. + +    :param filename: +        The filename to guess the "Content-Type" of using :mod:`mimetimes`. +    :param default: +        If no "Content-Type" can be guessed, default to `default`. +    """ +    if filename: +        return mimetypes.guess_type(filename)[0] or default +    return default + + +def format_header_param(name, value): +    """ +    Helper function to format and quote a single header parameter. + +    Particularly useful for header parameters which might contain +    non-ASCII values, like file names. This follows RFC 2231, as +    suggested by RFC 2388 Section 4.4. + +    :param name: +        The name of the parameter, a string expected to be ASCII only. +    :param value: +        The value of the parameter, provided as a unicode string. +    """ +    if not any(ch in value for ch in '"\\\r\n'): +        result = '%s="%s"' % (name, value) +        try: +            result.encode('ascii') +        except UnicodeEncodeError: +            pass +        else: +            return result +    if not six.PY3:  # Python 2: +        value = value.encode('utf-8') +    value = email.utils.encode_rfc2231(value, 'utf-8') +    value = '%s*=%s' % (name, value) +    return value + + +class RequestField(object): +    """ +    A data container for request body parameters. + +    :param name: +        The name of this request field. +    :param data: +        The data/value body. +    :param filename: +        An optional filename of the request field. +    :param headers: +        An optional dict-like object of headers to initially use for the field. +    """ +    def __init__(self, name, data, filename=None, headers=None): +        self._name = name +        self._filename = filename +        self.data = data +        self.headers = {} +        if headers: +            self.headers = dict(headers) + +    @classmethod +    def from_tuples(cls, fieldname, value): +        """ +        A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. + +        Supports constructing :class:`~urllib3.fields.RequestField` from parameter +        of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) +        tuple where the MIME type is optional. For example: :: + +            'foo': 'bar', +            'fakefile': ('foofile.txt', 'contents of foofile'), +            'realfile': ('barfile.txt', open('realfile').read()), +            'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'), +            'nonamefile': 'contents of nonamefile field', + +        Field names and filenames must be unicode. +        """ +        if isinstance(value, tuple): +            if len(value) == 3: +                filename, data, content_type = value +            else: +                filename, data = value +                content_type = guess_content_type(filename) +        else: +            filename = None +            content_type = None +            data = value + +        request_param = cls(fieldname, data, filename=filename) +        request_param.make_multipart(content_type=content_type) + +        return request_param + +    def _render_part(self, name, value): +        """ +        Overridable helper function to format a single header parameter. + +        :param name: +            The name of the parameter, a string expected to be ASCII only. +        :param value: +            The value of the parameter, provided as a unicode string. +        """ +        return format_header_param(name, value) + +    def _render_parts(self, header_parts): +        """ +        Helper function to format and quote a single header. + +        Useful for single headers that are composed of multiple items. E.g., +        'Content-Disposition' fields. + +        :param header_parts: +            A sequence of (k, v) typles or a :class:`dict` of (k, v) to format as +            `k1="v1"; k2="v2"; ...`. +        """ +        parts = [] +        iterable = header_parts +        if isinstance(header_parts, dict): +            iterable = header_parts.items() + +        for name, value in iterable: +            if value: +                parts.append(self._render_part(name, value)) + +        return '; '.join(parts) + +    def render_headers(self): +        """ +        Renders the headers for this request field. +        """ +        lines = [] + +        sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location'] +        for sort_key in sort_keys: +            if self.headers.get(sort_key, False): +                lines.append('%s: %s' % (sort_key, self.headers[sort_key])) + +        for header_name, header_value in self.headers.items(): +            if header_name not in sort_keys: +                if header_value: +                    lines.append('%s: %s' % (header_name, header_value)) + +        lines.append('\r\n') +        return '\r\n'.join(lines) + +    def make_multipart(self, content_disposition=None, content_type=None, content_location=None): +        """ +        Makes this request field into a multipart request field. + +        This method overrides "Content-Disposition", "Content-Type" and +        "Content-Location" headers to the request parameter. + +        :param content_type: +            The 'Content-Type' of the request body. +        :param content_location: +            The 'Content-Location' of the request body. + +        """ +        self.headers['Content-Disposition'] = content_disposition or 'form-data' +        self.headers['Content-Disposition'] += '; '.join(['', self._render_parts((('name', self._name), ('filename', self._filename)))]) +        self.headers['Content-Type'] = content_type +        self.headers['Content-Location'] = content_location diff --git a/requests/packages/urllib3/filepost.py b/requests/packages/urllib3/filepost.py index 470309a..4575582 100644 --- a/requests/packages/urllib3/filepost.py +++ b/requests/packages/urllib3/filepost.py @@ -1,5 +1,5 @@  # urllib3/filepost.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)  #  # This module is part of urllib3 and is released under  # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -12,6 +12,7 @@ from io import BytesIO  from .packages import six  from .packages.six import b +from .fields import RequestField  writer = codecs.lookup('utf-8')[3] @@ -23,15 +24,38 @@ def choose_boundary():      return uuid4().hex -def get_content_type(filename): -    return mimetypes.guess_type(filename)[0] or 'application/octet-stream' +def iter_field_objects(fields): +    """ +    Iterate over fields. + +    Supports list of (k, v) tuples and dicts, and lists of +    :class:`~urllib3.fields.RequestField`. + +    """ +    if isinstance(fields, dict): +        i = six.iteritems(fields) +    else: +        i = iter(fields) + +    for field in i: +      if isinstance(field, RequestField): +        yield field +      else: +        yield RequestField.from_tuples(*field)  def iter_fields(fields):      """      Iterate over fields. +    .. deprecated :: + +      The addition of `~urllib3.fields.RequestField` makes this function +      obsolete. Instead, use :func:`iter_field_objects`, which returns +      `~urllib3.fields.RequestField` objects, instead. +      Supports list of (k, v) tuples and dicts. +      """      if isinstance(fields, dict):          return ((k, v) for k, v in six.iteritems(fields)) @@ -44,15 +68,7 @@ def encode_multipart_formdata(fields, boundary=None):      Encode a dictionary of ``fields`` using the multipart/form-data MIME format.      :param fields: -        Dictionary of fields or list of (key, value) or (key, value, MIME type) -        field tuples.  The key is treated as the field name, and the value as -        the body of the form-data bytes. If the value is a tuple of two -        elements, then the first element is treated as the filename of the -        form-data section and a suitable MIME type is guessed based on the -        filename. If the value is a tuple of three elements, then the third -        element is treated as an explicit MIME type of the form-data section. - -        Field names and filenames must be unicode. +        Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`).      :param boundary:          If not specified, then a random boundary will be generated using @@ -62,24 +78,11 @@ def encode_multipart_formdata(fields, boundary=None):      if boundary is None:          boundary = choose_boundary() -    for fieldname, value in iter_fields(fields): +    for field in iter_field_objects(fields):          body.write(b('--%s\r\n' % (boundary))) -        if isinstance(value, tuple): -            if len(value) == 3: -                filename, data, content_type = value -            else: -                filename, data = value -                content_type = get_content_type(filename) -            writer(body).write('Content-Disposition: form-data; name="%s"; ' -                               'filename="%s"\r\n' % (fieldname, filename)) -            body.write(b('Content-Type: %s\r\n\r\n' % -                       (content_type,))) -        else: -            data = value -            writer(body).write('Content-Disposition: form-data; name="%s"\r\n' -                               % (fieldname)) -            body.write(b'\r\n') +        writer(body).write(field.render_headers()) +        data = field.data          if isinstance(data, int):              data = str(data)  # Backwards compatibility diff --git a/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py b/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py index 9560b04..2d61ac2 100644 --- a/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py +++ b/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py @@ -7,23 +7,60 @@ __version__ = '3.2.2'  class CertificateError(ValueError):      pass -def _dnsname_to_pat(dn): +def _dnsname_match(dn, hostname, max_wildcards=1): +    """Matching according to RFC 6125, section 6.4.3 + +    http://tools.ietf.org/html/rfc6125#section-6.4.3 +    """      pats = [] -    for frag in dn.split(r'.'): -        if frag == '*': -            # When '*' is a fragment by itself, it matches a non-empty dotless -            # fragment. -            pats.append('[^.]+') -        else: -            # Otherwise, '*' matches any dotless fragment. -            frag = re.escape(frag) -            pats.append(frag.replace(r'\*', '[^.]*')) -    return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) +    if not dn: +        return False + +    parts = dn.split(r'.') +    leftmost = parts[0] + +    wildcards = leftmost.count('*') +    if wildcards > max_wildcards: +        # Issue #17980: avoid denials of service by refusing more +        # than one wildcard per fragment.  A survery of established +        # policy among SSL implementations showed it to be a +        # reasonable choice. +        raise CertificateError( +            "too many wildcards in certificate DNS name: " + repr(dn)) + +    # speed up common case w/o wildcards +    if not wildcards: +        return dn.lower() == hostname.lower() + +    # RFC 6125, section 6.4.3, subitem 1. +    # The client SHOULD NOT attempt to match a presented identifier in which +    # the wildcard character comprises a label other than the left-most label. +    if leftmost == '*': +        # When '*' is a fragment by itself, it matches a non-empty dotless +        # fragment. +        pats.append('[^.]+') +    elif leftmost.startswith('xn--') or hostname.startswith('xn--'): +        # RFC 6125, section 6.4.3, subitem 3. +        # The client SHOULD NOT attempt to match a presented identifier +        # where the wildcard character is embedded within an A-label or +        # U-label of an internationalized domain name. +        pats.append(re.escape(leftmost)) +    else: +        # Otherwise, '*' matches any dotless string, e.g. www* +        pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) + +    # add the remaining fragments, ignore any wildcards +    for frag in parts[1:]: +        pats.append(re.escape(frag)) + +    pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) +    return pat.match(hostname) +  def match_hostname(cert, hostname):      """Verify that *cert* (in decoded format as returned by -    SSLSocket.getpeercert()) matches the *hostname*.  RFC 2818 rules -    are mostly followed, but IP addresses are not accepted for *hostname*. +    SSLSocket.getpeercert()) matches the *hostname*.  RFC 2818 and RFC 6125 +    rules are followed, but IP addresses are not accepted for *hostname*.      CertificateError is raised on failure. On success, the function      returns nothing. @@ -34,7 +71,7 @@ def match_hostname(cert, hostname):      san = cert.get('subjectAltName', ())      for key, value in san:          if key == 'DNS': -            if _dnsname_to_pat(value).match(hostname): +            if _dnsname_match(value, hostname):                  return              dnsnames.append(value)      if not dnsnames: @@ -45,7 +82,7 @@ def match_hostname(cert, hostname):                  # XXX according to RFC 2818, the most specific Common Name                  # must be used.                  if key == 'commonName': -                    if _dnsname_to_pat(value).match(hostname): +                    if _dnsname_match(value, hostname):                          return                      dnsnames.append(value)      if len(dnsnames) > 1: diff --git a/requests/packages/urllib3/poolmanager.py b/requests/packages/urllib3/poolmanager.py index ce0c248..e7f8667 100644 --- a/requests/packages/urllib3/poolmanager.py +++ b/requests/packages/urllib3/poolmanager.py @@ -6,9 +6,14 @@  import logging +try:  # Python 3 +    from urllib.parse import urljoin +except ImportError: +    from urlparse import urljoin +  from ._collections import RecentlyUsedContainer  from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool -from .connectionpool import connection_from_url, port_by_scheme +from .connectionpool import port_by_scheme  from .request import RequestMethods  from .util import parse_url @@ -55,6 +60,8 @@ class PoolManager(RequestMethods):      """ +    proxy = None +      def __init__(self, num_pools=10, headers=None, **connection_pool_kw):          RequestMethods.__init__(self, headers)          self.connection_pool_kw = connection_pool_kw @@ -94,20 +101,23 @@ class PoolManager(RequestMethods):          If ``port`` isn't given, it will be derived from the ``scheme`` using          ``urllib3.connectionpool.port_by_scheme``.          """ +          scheme = scheme or 'http' +          port = port or port_by_scheme.get(scheme, 80)          pool_key = (scheme, host, port) -        # If the scheme, host, or port doesn't match existing open connections, -        # open a new ConnectionPool. -        pool = self.pools.get(pool_key) -        if pool: -            return pool +        with self.pools.lock: +            # If the scheme, host, or port doesn't match existing open +            # connections, open a new ConnectionPool. +            pool = self.pools.get(pool_key) +            if pool: +                return pool -        # Make a fresh ConnectionPool of the desired type -        pool = self._new_pool(scheme, host, port) -        self.pools[pool_key] = pool +            # Make a fresh ConnectionPool of the desired type +            pool = self._new_pool(scheme, host, port) +            self.pools[pool_key] = pool          return pool      def connection_from_url(self, url): @@ -139,12 +149,19 @@ class PoolManager(RequestMethods):          if 'headers' not in kw:              kw['headers'] = self.headers -        response = conn.urlopen(method, u.request_uri, **kw) +        if self.proxy is not None and u.scheme == "http": +            response = conn.urlopen(method, url, **kw) +        else: +            response = conn.urlopen(method, u.request_uri, **kw)          redirect_location = redirect and response.get_redirect_location()          if not redirect_location:              return response +        # Support relative URLs for redirecting. +        redirect_location = urljoin(url, redirect_location) + +        # RFC 2616, Section 10.3.4          if response.status == 303:              method = 'GET' @@ -154,15 +171,59 @@ class PoolManager(RequestMethods):          return self.urlopen(method, redirect_location, **kw) -class ProxyManager(RequestMethods): +class ProxyManager(PoolManager):      """ -    Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method -    will make requests to any url through the defined proxy. The ProxyManager -    class will automatically set the 'Host' header if it is not provided. +    Behaves just like :class:`PoolManager`, but sends all requests through +    the defined proxy, using the CONNECT method for HTTPS URLs. + +    :param poxy_url: +        The URL of the proxy to be used. + +    :param proxy_headers: +        A dictionary contaning headers that will be sent to the proxy. In case +        of HTTP they are being sent with each request, while in the +        HTTPS/CONNECT case they are sent only once. Could be used for proxy +        authentication. + +    Example: +        >>> proxy = urllib3.ProxyManager('http://localhost:3128/') +        >>> r1 = proxy.request('GET', 'http://google.com/') +        >>> r2 = proxy.request('GET', 'http://httpbin.org/') +        >>> len(proxy.pools) +        1 +        >>> r3 = proxy.request('GET', 'https://httpbin.org/') +        >>> r4 = proxy.request('GET', 'https://twitter.com/') +        >>> len(proxy.pools) +        3 +      """ -    def __init__(self, proxy_pool): -        self.proxy_pool = proxy_pool +    def __init__(self, proxy_url, num_pools=10, headers=None, +                 proxy_headers=None, **connection_pool_kw): + +        if isinstance(proxy_url, HTTPConnectionPool): +            proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host, +                                        proxy_url.port) +        proxy = parse_url(proxy_url) +        if not proxy.port: +            port = port_by_scheme.get(proxy.scheme, 80) +            proxy = proxy._replace(port=port) +        self.proxy = proxy +        self.proxy_headers = proxy_headers or {} +        assert self.proxy.scheme in ("http", "https"), \ +            'Not supported proxy scheme %s' % self.proxy.scheme +        connection_pool_kw['_proxy'] = self.proxy +        connection_pool_kw['_proxy_headers'] = self.proxy_headers +        super(ProxyManager, self).__init__( +            num_pools, headers, **connection_pool_kw) + +    def connection_from_host(self, host, port=None, scheme='http'): +        if scheme == "https": +            return super(ProxyManager, self).connection_from_host( +                host, port, scheme) + +        return super(ProxyManager, self).connection_from_host( +            self.proxy.host, self.proxy.port, self.proxy.scheme)      def _set_proxy_headers(self, url, headers=None):          """ @@ -171,22 +232,28 @@ class ProxyManager(RequestMethods):          """          headers_ = {'Accept': '*/*'} -        host = parse_url(url).host -        if host: -            headers_['Host'] = host +        netloc = parse_url(url).netloc +        if netloc: +            headers_['Host'] = netloc          if headers:              headers_.update(headers) -          return headers_ -    def urlopen(self, method, url, **kw): +    def urlopen(self, method, url, redirect=True, **kw):          "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." -        kw['assert_same_host'] = False -        kw['headers'] = self._set_proxy_headers(url, headers=kw.get('headers')) -        return self.proxy_pool.urlopen(method, url, **kw) +        u = parse_url(url) + +        if u.scheme == "http": +            # It's too late to set proxy headers on per-request basis for +            # tunnelled HTTPS connections, should use +            # constructor's proxy_headers instead. +            kw['headers'] = self._set_proxy_headers(url, kw.get('headers', +                                                                self.headers)) +            kw['headers'].update(self.proxy_headers) + +        return super(ProxyManager, self).urlopen(method, url, redirect, **kw) -def proxy_from_url(url, **pool_kw): -    proxy_pool = connection_from_url(url, **pool_kw) -    return ProxyManager(proxy_pool) +def proxy_from_url(url, **kw): +    return ProxyManager(proxy_url=url, **kw) diff --git a/requests/packages/urllib3/request.py b/requests/packages/urllib3/request.py index bf0256e..66a9a0e 100644 --- a/requests/packages/urllib3/request.py +++ b/requests/packages/urllib3/request.py @@ -30,7 +30,7 @@ class RequestMethods(object):      in the URL (such as GET, HEAD, DELETE).      :meth:`.request_encode_body` is for sending requests whose fields are -    encoded in the *body* of the request using multipart or www-orm-urlencoded +    encoded in the *body* of the request using multipart or www-form-urlencoded      (such as for POST, PUT, PATCH).      :meth:`.request` is for making any kind of request, it will look up the diff --git a/requests/packages/urllib3/response.py b/requests/packages/urllib3/response.py index 2fa4078..4efff5a 100644 --- a/requests/packages/urllib3/response.py +++ b/requests/packages/urllib3/response.py @@ -1,5 +1,5 @@  # urllib3/response.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)  #  # This module is part of urllib3 and is released under  # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -7,9 +7,11 @@  import logging  import zlib +import io  from .exceptions import DecodeError  from .packages.six import string_types as basestring, binary_type +from .util import is_fp_closed  log = logging.getLogger(__name__) @@ -48,7 +50,7 @@ def _get_decoder(mode):      return DeflateDecoder() -class HTTPResponse(object): +class HTTPResponse(io.IOBase):      """      HTTP Response container. @@ -72,6 +74,7 @@ class HTTPResponse(object):      """      CONTENT_DECODERS = ['gzip', 'deflate'] +    REDIRECT_STATUSES = [301, 302, 303, 307, 308]      def __init__(self, body='', headers=None, status=0, version=0, reason=None,                   strict=0, preload_content=True, decode_content=True, @@ -105,7 +108,7 @@ class HTTPResponse(object):              code and valid location. ``None`` if redirect status and no              location. ``False`` if not a redirect status code.          """ -        if self.status in [301, 302, 303, 307]: +        if self.status in self.REDIRECT_STATUSES:              return self.headers.get('location')          return False @@ -183,11 +186,13 @@ class HTTPResponse(object):              try:                  if decode_content and self._decoder:                      data = self._decoder.decompress(data) -            except (IOError, zlib.error): -                raise DecodeError("Received response with content-encoding: %s, but " -                                  "failed to decode it." % content_encoding) +            except (IOError, zlib.error) as e: +                raise DecodeError( +                    "Received response with content-encoding: %s, but " +                    "failed to decode it." % content_encoding, +                    e) -            if flush_decoder and self._decoder: +            if flush_decoder and decode_content and self._decoder:                  buf = self._decoder.decompress(binary_type())                  data += buf + self._decoder.flush() @@ -200,6 +205,29 @@ class HTTPResponse(object):              if self._original_response and self._original_response.isclosed():                  self.release_conn() +    def stream(self, amt=2**16, decode_content=None): +        """ +        A generator wrapper for the read() method. A call will block until +        ``amt`` bytes have been read from the connection or until the +        connection is closed. + +        :param amt: +            How much of the content to read. The generator will return up to +            much data per iteration, but may return less. This is particularly +            likely when using compressed data. However, the empty string will +            never be returned. + +        :param decode_content: +            If True, will attempt to decode the body based on the +            'content-encoding' header. +        """ +        while not is_fp_closed(self._fp): +            data = self.read(amt=amt, decode_content=decode_content) + +            if data: +                yield data + +      @classmethod      def from_httplib(ResponseCls, r, **response_kw):          """ @@ -239,3 +267,35 @@ class HTTPResponse(object):      def getheader(self, name, default=None):          return self.headers.get(name, default) + +    # Overrides from io.IOBase +    def close(self): +        if not self.closed: +            self._fp.close() + +    @property +    def closed(self): +        if self._fp is None: +            return True +        elif hasattr(self._fp, 'closed'): +            return self._fp.closed +        elif hasattr(self._fp, 'isclosed'):  # Python 2 +            return self._fp.isclosed() +        else: +            return True + +    def fileno(self): +        if self._fp is None: +            raise IOError("HTTPResponse has no file to get a fileno from") +        elif hasattr(self._fp, "fileno"): +            return self._fp.fileno() +        else: +            raise IOError("The file-like object  this HTTPResponse is wrapped " +                          "around has no file descriptor") + +    def flush(self): +        if self._fp is not None and hasattr(self._fp, 'flush'): +            return self._fp.flush() + +    def readable(self): +        return True diff --git a/requests/packages/urllib3/util.py b/requests/packages/urllib3/util.py index 544f9ed..266c9ed 100644 --- a/requests/packages/urllib3/util.py +++ b/requests/packages/urllib3/util.py @@ -6,10 +6,11 @@  from base64 import b64encode +from binascii import hexlify, unhexlify  from collections import namedtuple -from socket import error as SocketError  from hashlib import md5, sha1 -from binascii import hexlify, unhexlify +from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT +import time  try:      from select import poll, POLLIN @@ -31,9 +32,234 @@ try:  # Test for SSL features  except ImportError:      pass -  from .packages import six -from .exceptions import LocationParseError, SSLError +from .exceptions import LocationParseError, SSLError, TimeoutStateError + + +_Default = object() +# The default timeout to use for socket connections. This is the attribute used +# by httplib to define the default timeout + + +def current_time(): +    """ +    Retrieve the current time, this function is mocked out in unit testing. +    """ +    return time.time() + + +class Timeout(object): +    """ +    Utility object for storing timeout values. + +    Example usage: + +    .. code-block:: python + +        timeout = urllib3.util.Timeout(connect=2.0, read=7.0) +        pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) +        pool.request(...) # Etc, etc + +    :param connect: +        The maximum amount of time to wait for a connection attempt to a server +        to succeed. Omitting the parameter will default the connect timeout to +        the system default, probably `the global default timeout in socket.py +        <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_. +        None will set an infinite timeout for connection attempts. + +    :type connect: integer, float, or None + +    :param read: +        The maximum amount of time to wait between consecutive +        read operations for a response from the server. Omitting +        the parameter will default the read timeout to the system +        default, probably `the global default timeout in socket.py +        <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_. +        None will set an infinite timeout. + +    :type read: integer, float, or None + +    :param total: +        The maximum amount of time to wait for an HTTP request to connect and +        return. This combines the connect and read timeouts into one. In the +        event that both a connect timeout and a total are specified, or a read +        timeout and a total are specified, the shorter timeout will be applied. + +        Defaults to None. + + +    :type total: integer, float, or None + +    .. note:: + +        Many factors can affect the total amount of time for urllib3 to return +        an HTTP response. Specifically, Python's DNS resolver does not obey the +        timeout specified on the socket. Other factors that can affect total +        request time include high CPU load, high swap, the program running at a +        low priority level, or other behaviors. The observed running time for +        urllib3 to return a response may be greater than the value passed to +        `total`. + +        In addition, the read and total timeouts only measure the time between +        read operations on the socket connecting the client and the server, not +        the total amount of time for the request to return a complete response. +        As an example, you may want a request to return within 7 seconds or +        fail, so you set the ``total`` timeout to 7 seconds. If the server +        sends one byte to you every 5 seconds, the request will **not** trigger +        time out. This case is admittedly rare. +    """ + +    #: A sentinel object representing the default timeout value +    DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT + +    def __init__(self, connect=_Default, read=_Default, total=None): +        self._connect = self._validate_timeout(connect, 'connect') +        self._read = self._validate_timeout(read, 'read') +        self.total = self._validate_timeout(total, 'total') +        self._start_connect = None + +    def __str__(self): +        return '%s(connect=%r, read=%r, total=%r)' % ( +            type(self).__name__, self._connect, self._read, self.total) + + +    @classmethod +    def _validate_timeout(cls, value, name): +        """ Check that a timeout attribute is valid + +        :param value: The timeout value to validate +        :param name: The name of the timeout attribute to validate. This is used +            for clear error messages +        :return: the value +        :raises ValueError: if the type is not an integer or a float, or if it +            is a numeric value less than zero +        """ +        if value is _Default: +            return cls.DEFAULT_TIMEOUT + +        if value is None or value is cls.DEFAULT_TIMEOUT: +            return value + +        try: +            float(value) +        except (TypeError, ValueError): +            raise ValueError("Timeout value %s was %s, but it must be an " +                             "int or float." % (name, value)) + +        try: +            if value < 0: +                raise ValueError("Attempted to set %s timeout to %s, but the " +                                 "timeout cannot be set to a value less " +                                 "than 0." % (name, value)) +        except TypeError: # Python 3 +            raise ValueError("Timeout value %s was %s, but it must be an " +                             "int or float." % (name, value)) + +        return value + +    @classmethod +    def from_float(cls, timeout): +        """ Create a new Timeout from a legacy timeout value. + +        The timeout value used by httplib.py sets the same timeout on the +        connect(), and recv() socket requests. This creates a :class:`Timeout` +        object that sets the individual timeouts to the ``timeout`` value passed +        to this function. + +        :param timeout: The legacy timeout value +        :type timeout: integer, float, sentinel default object, or None +        :return: a Timeout object +        :rtype: :class:`Timeout` +        """ +        return Timeout(read=timeout, connect=timeout) + +    def clone(self): +        """ Create a copy of the timeout object + +        Timeout properties are stored per-pool but each request needs a fresh +        Timeout object to ensure each one has its own start/stop configured. + +        :return: a copy of the timeout object +        :rtype: :class:`Timeout` +        """ +        # We can't use copy.deepcopy because that will also create a new object +        # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to +        # detect the user default. +        return Timeout(connect=self._connect, read=self._read, +                       total=self.total) + +    def start_connect(self): +        """ Start the timeout clock, used during a connect() attempt + +        :raises urllib3.exceptions.TimeoutStateError: if you attempt +            to start a timer that has been started already. +        """ +        if self._start_connect is not None: +            raise TimeoutStateError("Timeout timer has already been started.") +        self._start_connect = current_time() +        return self._start_connect + +    def get_connect_duration(self): +        """ Gets the time elapsed since the call to :meth:`start_connect`. + +        :return: the elapsed time +        :rtype: float +        :raises urllib3.exceptions.TimeoutStateError: if you attempt +            to get duration for a timer that hasn't been started. +        """ +        if self._start_connect is None: +            raise TimeoutStateError("Can't get connect duration for timer " +                                    "that has not started.") +        return current_time() - self._start_connect + +    @property +    def connect_timeout(self): +        """ Get the value to use when setting a connection timeout. + +        This will be a positive float or integer, the value None +        (never timeout), or the default system timeout. + +        :return: the connect timeout +        :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None +        """ +        if self.total is None: +            return self._connect + +        if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: +            return self.total + +        return min(self._connect, self.total) + +    @property +    def read_timeout(self): +        """ Get the value for the read timeout. + +        This assumes some time has elapsed in the connection timeout and +        computes the read timeout appropriately. + +        If self.total is set, the read timeout is dependent on the amount of +        time taken by the connect timeout. If the connection time has not been +        established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be +        raised. + +        :return: the value to use for the read timeout +        :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None +        :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` +            has not yet been called on this object. +        """ +        if (self.total is not None and +            self.total is not self.DEFAULT_TIMEOUT and +            self._read is not None and +            self._read is not self.DEFAULT_TIMEOUT): +            # in case the connect timeout has not yet been established. +            if self._start_connect is None: +                return self._read +            return max(0, min(self.total - self.get_connect_duration(), +                              self._read)) +        elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: +            return max(0, self.total - self.get_connect_duration()) +        else: +            return self._read  class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): @@ -61,6 +287,13 @@ class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query',          return uri +    @property +    def netloc(self): +        """Network location including host and port""" +        if self.port: +            return '%s:%d' % (self.host, self.port) +        return self.host +  def split_first(s, delims):      """ @@ -114,7 +347,7 @@ def parse_url(url):      # While this code has overlap with stdlib's urlparse, it is much      # simplified for our needs and less annoying. -    # Additionally, this imeplementations does silly things to be optimal +    # Additionally, this implementations does silly things to be optimal      # on CPython.      scheme = None @@ -143,7 +376,8 @@ def parse_url(url):      # IPv6      if url and url[0] == '[': -        host, url = url[1:].split(']', 1) +        host, url = url.split(']', 1) +        host += ']'      # Port      if ':' in url: @@ -341,6 +575,20 @@ def assert_fingerprint(cert, fingerprint):                         .format(hexlify(fingerprint_bytes),                                 hexlify(cert_digest))) +def is_fp_closed(obj): +    """ +    Checks whether a given file-like object is closed. + +    :param obj: +        The file-like object to check. +    """ +    if hasattr(obj, 'fp'): +        # Object is a container for another file-like object that gets released +        # on exhaustion (e.g. HTTPResponse) +        return obj.fp is None + +    return obj.closed +  if SSLContext is not None:  # Python 3.2+      def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, diff --git a/requests/sessions.py b/requests/sessions.py index f4aeeee..aa956d3 100644 --- a/requests/sessions.py +++ b/requests/sessions.py @@ -71,15 +71,10 @@ class SessionRedirectMixin(object):          """Receives a Response. Returns a generator of Responses."""          i = 0 -        prepared_request = PreparedRequest() -        prepared_request.body = req.body -        prepared_request.headers = req.headers.copy() -        prepared_request.hooks = req.hooks -        prepared_request.method = req.method -        prepared_request.url = req.url          # ((resp.status_code is codes.see_other))          while (('location' in resp.headers and resp.status_code in REDIRECT_STATI)): +            prepared_request = req.copy()              resp.content  # Consume socket so it can be released @@ -90,13 +85,18 @@ class SessionRedirectMixin(object):              resp.close()              url = resp.headers['location'] -            method = prepared_request.method +            method = req.method              # Handle redirection without scheme (see: RFC 1808 Section 4)              if url.startswith('//'):                  parsed_rurl = urlparse(resp.url)                  url = '%s:%s' % (parsed_rurl.scheme, url) +            # The scheme should be lower case... +            if '://' in url: +                scheme, uri = url.split('://', 1) +                url = '%s://%s' % (scheme.lower(), uri) +              # Facilitate non-RFC2616-compliant 'location' headers              # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')              # Compliant with RFC3986, we percent encode the url. @@ -109,12 +109,12 @@ class SessionRedirectMixin(object):              # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4              if (resp.status_code == codes.see_other and -                    prepared_request.method != 'HEAD'): +                    method != 'HEAD'):                  method = 'GET'              # Do what the browsers do, despite standards...              if (resp.status_code in (codes.moved, codes.found) and -                    prepared_request.method not in ('GET', 'HEAD')): +                    method not in ('GET', 'HEAD')):                  method = 'GET'              prepared_request.method = method @@ -153,7 +153,7 @@ class SessionRedirectMixin(object):  class Session(SessionRedirectMixin):      """A Requests session. -    Provides cookie persistience, connection-pooling, and configuration. +    Provides cookie persistence, connection-pooling, and configuration.      Basic Usage:: @@ -208,7 +208,10 @@ class Session(SessionRedirectMixin):          #: Should we trust the environment?          self.trust_env = True -        # Set up a CookieJar to be used by default +        #: A CookieJar containing all currently outstanding cookies set on this +        #: session. By default it is a +        #: :class:`RequestsCookieJar <requests.cookies.RequestsCookieJar>`, but +        #: may be any other ``cookielib.CookieJar`` compatible object.          self.cookies = cookiejar_from_dict({})          # Default connection adapters. @@ -222,6 +225,46 @@ class Session(SessionRedirectMixin):      def __exit__(self, *args):          self.close() +    def prepare_request(self, request): +        """Constructs a :class:`PreparedRequest <PreparedRequest>` for +        transmission and returns it. The :class:`PreparedRequest` has settings +        merged from the :class:`Request <Request>` instance and those of the +        :class:`Session`. + +        :param request: :class:`Request` instance to prepare with this +        session's settings. +        """ +        cookies = request.cookies or {} + +        # Bootstrap CookieJar. +        if not isinstance(cookies, cookielib.CookieJar): +            cookies = cookiejar_from_dict(cookies) + +        # Merge with session cookies +        merged_cookies = RequestsCookieJar() +        merged_cookies.update(self.cookies) +        merged_cookies.update(cookies) + + +        # Set environment's basic authentication if not explicitly set. +        auth = request.auth +        if self.trust_env and not auth and not self.auth: +            auth = get_netrc_auth(request.url) + +        p = PreparedRequest() +        p.prepare( +            method=request.method.upper(), +            url=request.url, +            files=request.files, +            data=request.data, +            headers=merge_setting(request.headers, self.headers, dict_class=CaseInsensitiveDict), +            params=merge_setting(request.params, self.params), +            auth=merge_setting(auth, self.auth), +            cookies=merged_cookies, +            hooks=merge_setting(request.hooks, self.hooks), +        ) +        return p +      def request(self, method, url,          params=None,          data=None, @@ -265,20 +308,22 @@ class Session(SessionRedirectMixin):          :param cert: (optional) if String, path to ssl client cert file (.pem).              If Tuple, ('cert', 'key') pair.          """ +        # Create the Request. +        req = Request( +            method = method.upper(), +            url = url, +            headers = headers, +            files = files, +            data = data or {}, +            params = params or {}, +            auth = auth, +            cookies = cookies, +            hooks = hooks, +        ) +        prep = self.prepare_request(req) -        cookies = cookies or {}          proxies = proxies or {} -        # Bootstrap CookieJar. -        if not isinstance(cookies, cookielib.CookieJar): -            cookies = cookiejar_from_dict(cookies) - -        # Merge with session cookies -        merged_cookies = RequestsCookieJar() -        merged_cookies.update(self.cookies) -        merged_cookies.update(cookies) -        cookies = merged_cookies -          # Gather clues from the surrounding environment.          if self.trust_env:              # Set environment's proxies. @@ -286,10 +331,6 @@ class Session(SessionRedirectMixin):              for (k, v) in env_proxies.items():                  proxies.setdefault(k, v) -            # Set environment's basic authentication. -            if not auth: -                auth = get_netrc_auth(url) -              # Look for configuration.              if not verify and verify is not False:                  verify = os.environ.get('REQUESTS_CA_BUNDLE') @@ -299,30 +340,11 @@ class Session(SessionRedirectMixin):                  verify = os.environ.get('CURL_CA_BUNDLE')          # Merge all the kwargs. -        params = merge_setting(params, self.params) -        headers = merge_setting(headers, self.headers, dict_class=CaseInsensitiveDict) -        auth = merge_setting(auth, self.auth)          proxies = merge_setting(proxies, self.proxies) -        hooks = merge_setting(hooks, self.hooks)          stream = merge_setting(stream, self.stream)          verify = merge_setting(verify, self.verify)          cert = merge_setting(cert, self.cert) -        # Create the Request. -        req = Request() -        req.method = method.upper() -        req.url = url -        req.headers = headers -        req.files = files -        req.data = data -        req.params = params -        req.auth = auth -        req.cookies = cookies -        req.hooks = hooks - -        # Prepare the Request. -        prep = req.prepare() -          # Send the request.          send_kwargs = {              'stream': stream, @@ -416,7 +438,7 @@ class Session(SessionRedirectMixin):          # It's possible that users might accidentally send a Request object.          # Guard against that specific failure case. -        if getattr(request, 'prepare', None): +        if not isinstance(request, PreparedRequest):              raise ValueError('You can only send PreparedRequests.')          # Set up variables needed for resolve_redirects and dispatching of @@ -443,6 +465,10 @@ class Session(SessionRedirectMixin):          r = dispatch_hook('response', hooks, r, **kwargs)          # Persist cookies +        if r.history: +            # If the hooks create history then we want those cookies too +            for resp in r.history: +                extract_cookies_to_jar(self.cookies, resp.request, resp.raw)          extract_cookies_to_jar(self.cookies, request, r.raw)          # Redirect resolving generator. @@ -467,7 +493,7 @@ class Session(SessionRedirectMixin):          """Returns the appropriate connnection adapter for the given URL."""          for (prefix, adapter) in self.adapters.items(): -            if url.startswith(prefix): +            if url.lower().startswith(prefix):                  return adapter          # Nothing matches :-/ @@ -475,7 +501,7 @@ class Session(SessionRedirectMixin):      def close(self):          """Closes all adapters and as such the session""" -        for _, v in self.adapters.items(): +        for v in self.adapters.values():              v.close()      def mount(self, prefix, adapter): diff --git a/requests/status_codes.py b/requests/status_codes.py index de38486..ed7a866 100644 --- a/requests/status_codes.py +++ b/requests/status_codes.py @@ -18,7 +18,8 @@ _codes = {      205: ('reset_content', 'reset'),      206: ('partial_content', 'partial'),      207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'), -    208: ('im_used',), +    208: ('already_reported',), +    226: ('im_used',),      # Redirection.      300: ('multiple_choices',), diff --git a/requests/structures.py b/requests/structures.py index 8d02ea6..a175913 100644 --- a/requests/structures.py +++ b/requests/structures.py @@ -103,7 +103,7 @@ class CaseInsensitiveDict(collections.MutableMapping):      # Copy is required      def copy(self): -         return CaseInsensitiveDict(self._store.values()) +        return CaseInsensitiveDict(self._store.values())      def __repr__(self):          return '%s(%r)' % (self.__class__.__name__, dict(self.items())) diff --git a/requests/utils.py b/requests/utils.py index b21bf8f..3ec6131 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -21,9 +21,11 @@ from netrc import netrc, NetrcParseError  from . import __version__  from . import certs  from .compat import parse_http_list as _parse_list_header -from .compat import quote, urlparse, bytes, str, OrderedDict, urlunparse +from .compat import (quote, urlparse, bytes, str, OrderedDict, urlunparse, +                     is_py2, is_py3, builtin_str, getproxies, proxy_bypass)  from .cookies import RequestsCookieJar, cookiejar_from_dict  from .structures import CaseInsensitiveDict +from .exceptions import MissingSchema, InvalidURL  _hush_pyflakes = (RequestsCookieJar,) @@ -264,8 +266,12 @@ def get_encodings_from_content(content):      """      charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I) +    pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I) +    xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]') -    return charset_re.findall(content) +    return (charset_re.findall(content) + +            pragma_re.findall(content) + +            xml_re.findall(content))  def get_encoding_from_headers(headers): @@ -301,7 +307,7 @@ def stream_decode_response_unicode(iterator, r):          rv = decoder.decode(chunk)          if rv:              yield rv -    rv = decoder.decode('', final=True) +    rv = decoder.decode(b'', final=True)      if rv:          yield rv @@ -361,7 +367,11 @@ def unquote_unreserved(uri):      for i in range(1, len(parts)):          h = parts[i][0:2]          if len(h) == 2 and h.isalnum(): -            c = chr(int(h, 16)) +            try: +                c = chr(int(h, 16)) +            except ValueError: +                raise InvalidURL("Invalid percent-escape sequence: '%s'" % h) +              if c in UNRESERVED_SET:                  parts[i] = c + parts[i][2:]              else: @@ -386,25 +396,17 @@ def requote_uri(uri):  def get_environ_proxies(url):      """Return a dict of environment proxies.""" -    proxy_keys = [ -        'all', -        'http', -        'https', -        'ftp', -        'socks' -    ] -      get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())      # First check whether no_proxy is defined. If it is, check that the URL      # we're getting isn't in the no_proxy list.      no_proxy = get_proxy('no_proxy') +    netloc = urlparse(url).netloc      if no_proxy:          # We need to check whether we match here. We need to see if we match          # the end of the netloc, both with and without the port. -        no_proxy = no_proxy.split(',') -        netloc = urlparse(url).netloc +        no_proxy = no_proxy.replace(' ', '').split(',')          for host in no_proxy:              if netloc.endswith(host) or netloc.split(':')[0].endswith(host): @@ -412,10 +414,15 @@ def get_environ_proxies(url):                  # to apply the proxies on this URL.                  return {} +    # If the system proxy settings indicate that this URL should be bypassed, +    # don't proxy. +    if proxy_bypass(netloc): +        return {} +      # If we get here, we either didn't have no_proxy set or we're not going -    # anywhere that no_proxy applies to. -    proxies = [(key, get_proxy(key + '_proxy')) for key in proxy_keys] -    return dict([(key, val) for (key, val) in proxies if val]) +    # anywhere that no_proxy applies to, and the system settings don't require +    # bypassing the proxy for the current URL. +    return getproxies()  def default_user_agent(): @@ -526,18 +533,13 @@ def guess_json_utf(data):      return None -def prepend_scheme_if_needed(url, new_scheme): -    '''Given a URL that may or may not have a scheme, prepend the given scheme. -    Does not replace a present scheme with the one provided as an argument.''' -    scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) - -    # urlparse is a finicky beast, and sometimes decides that there isn't a -    # netloc present. Assume that it's being over-cautious, and switch netloc -    # and path if urlparse decided there was no netloc. -    if not netloc: -        netloc, path = path, netloc +def except_on_missing_scheme(url): +    """Given a URL, raise a MissingSchema exception if the scheme is missing. +    """ +    scheme, netloc, path, params, query, fragment = urlparse(url) -    return urlunparse((scheme, netloc, path, params, query, fragment)) +    if not scheme: +        raise MissingSchema('Proxy URLs must have explicit schemes.')  def get_auth_from_url(url): @@ -548,3 +550,22 @@ def get_auth_from_url(url):          return (parsed.username, parsed.password)      else:          return ('', '') + + +def to_native_string(string, encoding='ascii'): +    """ +    Given a string object, regardless of type, returns a representation of that +    string in the native string type, encoding and decoding where necessary. +    This assumes ASCII unless told otherwise. +    """ +    out = None + +    if isinstance(string, builtin_str): +        out = string +    else: +        if is_py2: +            out = string.encode(encoding) +        else: +            out = string.decode(encoding) + +    return out |