aboutsummaryrefslogtreecommitdiff
path: root/urllib3/util.py
diff options
context:
space:
mode:
Diffstat (limited to 'urllib3/util.py')
-rw-r--r--urllib3/util.py194
1 files changed, 155 insertions, 39 deletions
diff --git a/urllib3/util.py b/urllib3/util.py
index 2684a2f..8ec990b 100644
--- a/urllib3/util.py
+++ b/urllib3/util.py
@@ -6,6 +6,8 @@
from base64 import b64encode
+from collections import namedtuple
+from socket import error as SocketError
try:
from select import poll, POLLIN
@@ -20,6 +22,152 @@ from .packages import six
from .exceptions import LocationParseError
+class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])):
+ """
+ Datastructure for representing an HTTP URL. Used as a return value for
+ :func:`parse_url`.
+ """
+ slots = ()
+
+ def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None):
+ return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment)
+
+ @property
+ def hostname(self):
+ """For backwards-compatibility with urlparse. We're nice like that."""
+ return self.host
+
+ @property
+ def request_uri(self):
+ """Absolute path including the query string."""
+ uri = self.path or '/'
+
+ if self.query is not None:
+ uri += '?' + self.query
+
+ return uri
+
+
+def split_first(s, delims):
+ """
+ Given a string and an iterable of delimiters, split on the first found
+ delimiter. Return two split parts and the matched delimiter.
+
+ If not found, then the first part is the full input string.
+
+ Example: ::
+
+ >>> split_first('foo/bar?baz', '?/=')
+ ('foo', 'bar?baz', '/')
+ >>> split_first('foo/bar?baz', '123')
+ ('foo/bar?baz', '', None)
+
+ Scales linearly with number of delims. Not ideal for large number of delims.
+ """
+ min_idx = None
+ min_delim = None
+ for d in delims:
+ idx = s.find(d)
+ if idx < 0:
+ continue
+
+ if min_idx is None or idx < min_idx:
+ min_idx = idx
+ min_delim = d
+
+ if min_idx is None or min_idx < 0:
+ return s, '', None
+
+ return s[:min_idx], s[min_idx+1:], min_delim
+
+
+def parse_url(url):
+ """
+ Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
+ performed to parse incomplete urls. Fields not provided will be None.
+
+ Partly backwards-compatible with :mod:`urlparse`.
+
+ Example: ::
+
+ >>> parse_url('http://google.com/mail/')
+ Url(scheme='http', host='google.com', port=None, path='/', ...)
+ >>> prase_url('google.com:80')
+ Url(scheme=None, host='google.com', port=80, path=None, ...)
+ >>> prase_url('/foo?bar')
+ Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
+ """
+
+ # While this code has overlap with stdlib's urlparse, it is much
+ # simplified for our needs and less annoying.
+ # Additionally, this imeplementations does silly things to be optimal
+ # on CPython.
+
+ scheme = None
+ auth = None
+ host = None
+ port = None
+ path = None
+ fragment = None
+ query = None
+
+ # Scheme
+ if '://' in url:
+ scheme, url = url.split('://', 1)
+
+ # Find the earliest Authority Terminator
+ # (http://tools.ietf.org/html/rfc3986#section-3.2)
+ url, path_, delim = split_first(url, ['/', '?', '#'])
+
+ if delim:
+ # Reassemble the path
+ path = delim + path_
+
+ # Auth
+ if '@' in url:
+ auth, url = url.split('@', 1)
+
+ # IPv6
+ if url and url[0] == '[':
+ host, url = url[1:].split(']', 1)
+
+ # Port
+ if ':' in url:
+ _host, port = url.split(':', 1)
+
+ if not host:
+ host = _host
+
+ if not port.isdigit():
+ raise LocationParseError("Failed to parse: %s" % url)
+
+ port = int(port)
+
+ elif not host and url:
+ host = url
+
+ if not path:
+ return Url(scheme, auth, host, port, path, query, fragment)
+
+ # Fragment
+ if '#' in path:
+ path, fragment = path.split('#', 1)
+
+ # Query
+ if '?' in path:
+ path, query = path.split('?', 1)
+
+ return Url(scheme, auth, host, port, path, query, fragment)
+
+
+def get_host(url):
+ """
+ Deprecated. Use :func:`.parse_url` instead.
+ """
+ p = parse_url(url)
+ return p.scheme or 'http', p.hostname, p.port
+
+
def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
basic_auth=None):
"""
@@ -72,60 +220,28 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
return headers
-def get_host(url):
- """
- Given a url, return its scheme, host and port (None if it's not there).
-
- For example: ::
-
- >>> get_host('http://google.com/mail/')
- ('http', 'google.com', None)
- >>> get_host('google.com:80')
- ('http', 'google.com', 80)
- """
-
- # This code is actually similar to urlparse.urlsplit, but much
- # simplified for our needs.
- port = None
- scheme = 'http'
-
- if '://' in url:
- scheme, url = url.split('://', 1)
- if '/' in url:
- url, _path = url.split('/', 1)
- if '@' in url:
- _auth, url = url.split('@', 1)
- if ':' in url:
- url, port = url.split(':', 1)
-
- if not port.isdigit():
- raise LocationParseError("Failed to parse: %s" % url)
-
- port = int(port)
-
- return scheme, url, port
-
-
-
def is_connection_dropped(conn):
"""
Returns True if the connection is dropped and should be closed.
:param conn:
- ``HTTPConnection`` object.
+ :class:`httplib.HTTPConnection` object.
Note: For platforms like AppEngine, this will always return ``False`` to
let the platform handle connection recycling transparently for us.
"""
sock = getattr(conn, 'sock', False)
- if not sock: #Platform-specific: AppEngine
+ if not sock: # Platform-specific: AppEngine
return False
if not poll: # Platform-specific
- if not select: #Platform-specific: AppEngine
+ if not select: # Platform-specific: AppEngine
return False
- return select([sock], [], [], 0.0)[0]
+ try:
+ return select([sock], [], [], 0.0)[0]
+ except SocketError:
+ return True
# This version is better on platforms that support it.
p = poll()