From 4fad8c1375c73a3d4483fe78e4534c7dabc453f5 Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:41:15 -0700 Subject: Imported Upstream version 0.4.1 --- requests/packages/__init__.py | 3 + requests/packages/poster/__init__.py | 34 +++ requests/packages/poster/encode.py | 414 ++++++++++++++++++++++++++++++ requests/packages/poster/streaminghttp.py | 199 ++++++++++++++ 4 files changed, 650 insertions(+) create mode 100644 requests/packages/__init__.py create mode 100644 requests/packages/poster/__init__.py create mode 100644 requests/packages/poster/encode.py create mode 100644 requests/packages/poster/streaminghttp.py (limited to 'requests/packages') diff --git a/requests/packages/__init__.py b/requests/packages/__init__.py new file mode 100644 index 0000000..ab2669e --- /dev/null +++ b/requests/packages/__init__.py @@ -0,0 +1,3 @@ +from __future__ import absolute_import + +from . import poster diff --git a/requests/packages/poster/__init__.py b/requests/packages/poster/__init__.py new file mode 100644 index 0000000..6e216fc --- /dev/null +++ b/requests/packages/poster/__init__.py @@ -0,0 +1,34 @@ +# Copyright (c) 2010 Chris AtLee +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +"""poster module + +Support for streaming HTTP uploads, and multipart/form-data encoding + +```poster.version``` is a 3-tuple of integers representing the version number. +New releases of poster will always have a version number that compares greater +than an older version of poster. +New in version 0.6.""" + +from __future__ import absolute_import + +from . import streaminghttp +from . import encode + +version = (0, 8, 0) # Thanks JP! diff --git a/requests/packages/poster/encode.py b/requests/packages/poster/encode.py new file mode 100644 index 0000000..cf2298d --- /dev/null +++ b/requests/packages/poster/encode.py @@ -0,0 +1,414 @@ +"""multipart/form-data encoding module + +This module provides functions that faciliate encoding name/value pairs +as multipart/form-data suitable for a HTTP POST or PUT request. + +multipart/form-data is the standard way to upload files over HTTP""" + +__all__ = ['gen_boundary', 'encode_and_quote', 'MultipartParam', + 'encode_string', 'encode_file_header', 'get_body_size', 'get_headers', + 'multipart_encode'] + +try: + import uuid + def gen_boundary(): + """Returns a random string to use as the boundary for a message""" + return uuid.uuid4().hex +except ImportError: + import random, sha + def gen_boundary(): + """Returns a random string to use as the boundary for a message""" + bits = random.getrandbits(160) + return sha.new(str(bits)).hexdigest() + +import urllib, re, os, mimetypes +try: + from email.header import Header +except ImportError: + # Python 2.4 + from email.Header import Header + +def encode_and_quote(data): + """If ``data`` is unicode, return urllib.quote_plus(data.encode("utf-8")) + otherwise return urllib.quote_plus(data)""" + if data is None: + return None + + if isinstance(data, unicode): + data = data.encode("utf-8") + return urllib.quote_plus(data) + +def _strify(s): + """If s is a unicode string, encode it to UTF-8 and return the results, + otherwise return str(s), or None if s is None""" + if s is None: + return None + if isinstance(s, unicode): + return s.encode("utf-8") + return str(s) + +class MultipartParam(object): + """Represents a single parameter in a multipart/form-data request + + ``name`` is the name of this parameter. + + If ``value`` is set, it must be a string or unicode object to use as the + data for this parameter. + + If ``filename`` is set, it is what to say that this parameter's filename + is. Note that this does not have to be the actual filename any local file. + + If ``filetype`` is set, it is used as the Content-Type for this parameter. + If unset it defaults to "text/plain; charset=utf8" + + If ``filesize`` is set, it specifies the length of the file ``fileobj`` + + If ``fileobj`` is set, it must be a file-like object that supports + .read(). + + Both ``value`` and ``fileobj`` must not be set, doing so will + raise a ValueError assertion. + + If ``fileobj`` is set, and ``filesize`` is not specified, then + the file's size will be determined first by stat'ing ``fileobj``'s + file descriptor, and if that fails, by seeking to the end of the file, + recording the current position as the size, and then by seeking back to the + beginning of the file. + + ``cb`` is a callable which will be called from iter_encode with (self, + current, total), representing the current parameter, current amount + transferred, and the total size. + """ + def __init__(self, name, value=None, filename=None, filetype=None, + filesize=None, fileobj=None, cb=None): + self.name = Header(name).encode() + self.value = _strify(value) + if filename is None: + self.filename = None + else: + if isinstance(filename, unicode): + # Encode with XML entities + self.filename = filename.encode("ascii", "xmlcharrefreplace") + else: + self.filename = str(filename) + self.filename = self.filename.encode("string_escape").\ + replace('"', '\\"') + self.filetype = _strify(filetype) + + self.filesize = filesize + self.fileobj = fileobj + self.cb = cb + + if self.value is not None and self.fileobj is not None: + raise ValueError("Only one of value or fileobj may be specified") + + if fileobj is not None and filesize is None: + # Try and determine the file size + try: + self.filesize = os.fstat(fileobj.fileno()).st_size + except (OSError, AttributeError): + try: + fileobj.seek(0, 2) + self.filesize = fileobj.tell() + fileobj.seek(0) + except: + raise ValueError("Could not determine filesize") + + def __cmp__(self, other): + attrs = ['name', 'value', 'filename', 'filetype', 'filesize', 'fileobj'] + myattrs = [getattr(self, a) for a in attrs] + oattrs = [getattr(other, a) for a in attrs] + return cmp(myattrs, oattrs) + + def reset(self): + if self.fileobj is not None: + self.fileobj.seek(0) + elif self.value is None: + raise ValueError("Don't know how to reset this parameter") + + @classmethod + def from_file(cls, paramname, filename): + """Returns a new MultipartParam object constructed from the local + file at ``filename``. + + ``filesize`` is determined by os.path.getsize(``filename``) + + ``filetype`` is determined by mimetypes.guess_type(``filename``)[0] + + ``filename`` is set to os.path.basename(``filename``) + """ + + return cls(paramname, filename=os.path.basename(filename), + filetype=mimetypes.guess_type(filename)[0], + filesize=os.path.getsize(filename), + fileobj=open(filename, "rb")) + + @classmethod + def from_params(cls, params): + """Returns a list of MultipartParam objects from a sequence of + name, value pairs, MultipartParam instances, + or from a mapping of names to values + + The values may be strings or file objects, or MultipartParam objects. + MultipartParam object names must match the given names in the + name,value pairs or mapping, if applicable.""" + if hasattr(params, 'items'): + params = params.items() + + retval = [] + for item in params: + if isinstance(item, cls): + retval.append(item) + continue + name, value = item + if isinstance(value, cls): + assert value.name == name + retval.append(value) + continue + if hasattr(value, 'read'): + # Looks like a file object + filename = getattr(value, 'name', None) + if filename is not None: + filetype = mimetypes.guess_type(filename)[0] + else: + filetype = None + + retval.append(cls(name=name, filename=filename, + filetype=filetype, fileobj=value)) + else: + retval.append(cls(name, value)) + return retval + + def encode_hdr(self, boundary): + """Returns the header of the encoding of this parameter""" + boundary = encode_and_quote(boundary) + + headers = ["--%s" % boundary] + + if self.filename: + disposition = 'form-data; name="%s"; filename="%s"' % (self.name, + self.filename) + else: + disposition = 'form-data; name="%s"' % self.name + + headers.append("Content-Disposition: %s" % disposition) + + if self.filetype: + filetype = self.filetype + else: + filetype = "text/plain; charset=utf-8" + + headers.append("Content-Type: %s" % filetype) + + headers.append("") + headers.append("") + + return "\r\n".join(headers) + + def encode(self, boundary): + """Returns the string encoding of this parameter""" + if self.value is None: + value = self.fileobj.read() + else: + value = self.value + + if re.search("^--%s$" % re.escape(boundary), value, re.M): + raise ValueError("boundary found in encoded string") + + return "%s%s\r\n" % (self.encode_hdr(boundary), value) + + def iter_encode(self, boundary, blocksize=4096): + """Yields the encoding of this parameter + If self.fileobj is set, then blocks of ``blocksize`` bytes are read and + yielded.""" + total = self.get_size(boundary) + current = 0 + if self.value is not None: + block = self.encode(boundary) + current += len(block) + yield block + if self.cb: + self.cb(self, current, total) + else: + block = self.encode_hdr(boundary) + current += len(block) + yield block + if self.cb: + self.cb(self, current, total) + last_block = "" + encoded_boundary = "--%s" % encode_and_quote(boundary) + boundary_exp = re.compile("^%s$" % re.escape(encoded_boundary), + re.M) + while True: + block = self.fileobj.read(blocksize) + if not block: + current += 2 + yield "\r\n" + if self.cb: + self.cb(self, current, total) + break + last_block += block + if boundary_exp.search(last_block): + raise ValueError("boundary found in file data") + last_block = last_block[-len(encoded_boundary)-2:] + current += len(block) + yield block + if self.cb: + self.cb(self, current, total) + + def get_size(self, boundary): + """Returns the size in bytes that this param will be when encoded + with the given boundary.""" + if self.filesize is not None: + valuesize = self.filesize + else: + valuesize = len(self.value) + + return len(self.encode_hdr(boundary)) + 2 + valuesize + +def encode_string(boundary, name, value): + """Returns ``name`` and ``value`` encoded as a multipart/form-data + variable. ``boundary`` is the boundary string used throughout + a single request to separate variables.""" + + return MultipartParam(name, value).encode(boundary) + +def encode_file_header(boundary, paramname, filesize, filename=None, + filetype=None): + """Returns the leading data for a multipart/form-data field that contains + file data. + + ``boundary`` is the boundary string used throughout a single request to + separate variables. + + ``paramname`` is the name of the variable in this request. + + ``filesize`` is the size of the file data. + + ``filename`` if specified is the filename to give to this field. This + field is only useful to the server for determining the original filename. + + ``filetype`` if specified is the MIME type of this file. + + The actual file data should be sent after this header has been sent. + """ + + return MultipartParam(paramname, filesize=filesize, filename=filename, + filetype=filetype).encode_hdr(boundary) + +def get_body_size(params, boundary): + """Returns the number of bytes that the multipart/form-data encoding + of ``params`` will be.""" + size = sum(p.get_size(boundary) for p in MultipartParam.from_params(params)) + return size + len(boundary) + 6 + +def get_headers(params, boundary): + """Returns a dictionary with Content-Type and Content-Length headers + for the multipart/form-data encoding of ``params``.""" + headers = {} + boundary = urllib.quote_plus(boundary) + headers['Content-Type'] = "multipart/form-data; boundary=%s" % boundary + headers['Content-Length'] = str(get_body_size(params, boundary)) + return headers + +class multipart_yielder: + def __init__(self, params, boundary, cb): + self.params = params + self.boundary = boundary + self.cb = cb + + self.i = 0 + self.p = None + self.param_iter = None + self.current = 0 + self.total = get_body_size(params, boundary) + + def __iter__(self): + return self + + def next(self): + """generator function to yield multipart/form-data representation + of parameters""" + if self.param_iter is not None: + try: + block = self.param_iter.next() + self.current += len(block) + if self.cb: + self.cb(self.p, self.current, self.total) + return block + except StopIteration: + self.p = None + self.param_iter = None + + if self.i is None: + raise StopIteration + elif self.i >= len(self.params): + self.param_iter = None + self.p = None + self.i = None + block = "--%s--\r\n" % self.boundary + self.current += len(block) + if self.cb: + self.cb(self.p, self.current, self.total) + return block + + self.p = self.params[self.i] + self.param_iter = self.p.iter_encode(self.boundary) + self.i += 1 + return self.next() + + def reset(self): + self.i = 0 + self.current = 0 + for param in self.params: + param.reset() + +def multipart_encode(params, boundary=None, cb=None): + """Encode ``params`` as multipart/form-data. + + ``params`` should be a sequence of (name, value) pairs or MultipartParam + objects, or a mapping of names to values. + Values are either strings parameter values, or file-like objects to use as + the parameter value. The file-like objects must support .read() and either + .fileno() or both .seek() and .tell(). + + If ``boundary`` is set, then it as used as the MIME boundary. Otherwise + a randomly generated boundary will be used. In either case, if the + boundary string appears in the parameter values a ValueError will be + raised. + + If ``cb`` is set, it should be a callback which will get called as blocks + of data are encoded. It will be called with (param, current, total), + indicating the current parameter being encoded, the current amount encoded, + and the total amount to encode. + + Returns a tuple of `datagen`, `headers`, where `datagen` is a + generator that will yield blocks of data that make up the encoded + parameters, and `headers` is a dictionary with the assoicated + Content-Type and Content-Length headers. + + Examples: + + >>> datagen, headers = multipart_encode( [("key", "value1"), ("key", "value2")] ) + >>> s = "".join(datagen) + >>> assert "value2" in s and "value1" in s + + >>> p = MultipartParam("key", "value2") + >>> datagen, headers = multipart_encode( [("key", "value1"), p] ) + >>> s = "".join(datagen) + >>> assert "value2" in s and "value1" in s + + >>> datagen, headers = multipart_encode( {"key": "value1"} ) + >>> s = "".join(datagen) + >>> assert "value2" not in s and "value1" in s + + """ + if boundary is None: + boundary = gen_boundary() + else: + boundary = urllib.quote_plus(boundary) + + headers = get_headers(params, boundary) + params = MultipartParam.from_params(params) + + return multipart_yielder(params, boundary, cb), headers diff --git a/requests/packages/poster/streaminghttp.py b/requests/packages/poster/streaminghttp.py new file mode 100644 index 0000000..1b591d4 --- /dev/null +++ b/requests/packages/poster/streaminghttp.py @@ -0,0 +1,199 @@ +"""Streaming HTTP uploads module. + +This module extends the standard httplib and urllib2 objects so that +iterable objects can be used in the body of HTTP requests. + +In most cases all one should have to do is call :func:`register_openers()` +to register the new streaming http handlers which will take priority over +the default handlers, and then you can use iterable objects in the body +of HTTP requests. + +**N.B.** You must specify a Content-Length header if using an iterable object +since there is no way to determine in advance the total size that will be +yielded, and there is no way to reset an interator. + +Example usage: + +>>> from StringIO import StringIO +>>> import urllib2, poster.streaminghttp + +>>> opener = poster.streaminghttp.register_openers() + +>>> s = "Test file data" +>>> f = StringIO(s) + +>>> req = urllib2.Request("http://localhost:5000", f, +... {'Content-Length': str(len(s))}) +""" + +import httplib, urllib2, socket +from httplib import NotConnected + +__all__ = ['StreamingHTTPConnection', 'StreamingHTTPRedirectHandler', + 'StreamingHTTPHandler', 'register_openers'] + +if hasattr(httplib, 'HTTPS'): + __all__.extend(['StreamingHTTPSHandler', 'StreamingHTTPSConnection']) + +class _StreamingHTTPMixin: + """Mixin class for HTTP and HTTPS connections that implements a streaming + send method.""" + def send(self, value): + """Send ``value`` to the server. + + ``value`` can be a string object, a file-like object that supports + a .read() method, or an iterable object that supports a .next() + method. + """ + # Based on python 2.6's httplib.HTTPConnection.send() + if self.sock is None: + if self.auto_open: + self.connect() + else: + raise NotConnected() + + # send the data to the server. if we get a broken pipe, then close + # the socket. we want to reconnect when somebody tries to send again. + # + # NOTE: we DO propagate the error, though, because we cannot simply + # ignore the error... the caller will know if they can retry. + if self.debuglevel > 0: + print "send:", repr(value) + try: + blocksize = 8192 + if hasattr(value, 'read') : + if hasattr(value, 'seek'): + value.seek(0) + if self.debuglevel > 0: + print "sendIng a read()able" + data = value.read(blocksize) + while data: + self.sock.sendall(data) + data = value.read(blocksize) + elif hasattr(value, 'next'): + if hasattr(value, 'reset'): + value.reset() + if self.debuglevel > 0: + print "sendIng an iterable" + for data in value: + self.sock.sendall(data) + else: + self.sock.sendall(value) + except socket.error, v: + if v[0] == 32: # Broken pipe + self.close() + raise + +class StreamingHTTPConnection(_StreamingHTTPMixin, httplib.HTTPConnection): + """Subclass of `httplib.HTTPConnection` that overrides the `send()` method + to support iterable body objects""" + +class StreamingHTTPRedirectHandler(urllib2.HTTPRedirectHandler): + """Subclass of `urllib2.HTTPRedirectHandler` that overrides the + `redirect_request` method to properly handle redirected POST requests + + This class is required because python 2.5's HTTPRedirectHandler does + not remove the Content-Type or Content-Length headers when requesting + the new resource, but the body of the original request is not preserved. + """ + + handler_order = urllib2.HTTPRedirectHandler.handler_order - 1 + + # From python2.6 urllib2's HTTPRedirectHandler + def redirect_request(self, req, fp, code, msg, headers, newurl): + """Return a Request or None in response to a redirect. + + This is called by the http_error_30x methods when a + redirection response is received. If a redirection should + take place, return a new Request to allow http_error_30x to + perform the redirect. Otherwise, raise HTTPError if no-one + else should try to handle this url. Return None if you can't + but another Handler might. + """ + m = req.get_method() + if (code in (301, 302, 303, 307) and m in ("GET", "HEAD") + or code in (301, 302, 303) and m == "POST"): + # Strictly (according to RFC 2616), 301 or 302 in response + # to a POST MUST NOT cause a redirection without confirmation + # from the user (of urllib2, in this case). In practice, + # essentially all clients do redirect in this case, so we + # do the same. + # be conciliant with URIs containing a space + newurl = newurl.replace(' ', '%20') + newheaders = dict((k, v) for k, v in req.headers.items() + if k.lower() not in ( + "content-length", "content-type") + ) + return urllib2.Request(newurl, + headers=newheaders, + origin_req_host=req.get_origin_req_host(), + unverifiable=True) + else: + raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp) + +class StreamingHTTPHandler(urllib2.HTTPHandler): + """Subclass of `urllib2.HTTPHandler` that uses + StreamingHTTPConnection as its http connection class.""" + + handler_order = urllib2.HTTPHandler.handler_order - 1 + + def http_open(self, req): + """Open a StreamingHTTPConnection for the given request""" + return self.do_open(StreamingHTTPConnection, req) + + def http_request(self, req): + """Handle a HTTP request. Make sure that Content-Length is specified + if we're using an interable value""" + # Make sure that if we're using an iterable object as the request + # body, that we've also specified Content-Length + if req.has_data(): + data = req.get_data() + if hasattr(data, 'read') or hasattr(data, 'next'): + if not req.has_header('Content-length'): + raise ValueError( + "No Content-Length specified for iterable body") + return urllib2.HTTPHandler.do_request_(self, req) + +if hasattr(httplib, 'HTTPS'): + class StreamingHTTPSConnection(_StreamingHTTPMixin, + httplib.HTTPSConnection): + """Subclass of `httplib.HTTSConnection` that overrides the `send()` + method to support iterable body objects""" + + class StreamingHTTPSHandler(urllib2.HTTPSHandler): + """Subclass of `urllib2.HTTPSHandler` that uses + StreamingHTTPSConnection as its http connection class.""" + + handler_order = urllib2.HTTPSHandler.handler_order - 1 + + def https_open(self, req): + return self.do_open(StreamingHTTPSConnection, req) + + def https_request(self, req): + # Make sure that if we're using an iterable object as the request + # body, that we've also specified Content-Length + if req.has_data(): + data = req.get_data() + if hasattr(data, 'read') or hasattr(data, 'next'): + if not req.has_header('Content-length'): + raise ValueError( + "No Content-Length specified for iterable body") + return urllib2.HTTPSHandler.do_request_(self, req) + + +def get_handlers(): + handlers = [StreamingHTTPHandler, StreamingHTTPRedirectHandler] + if hasattr(httplib, "HTTPS"): + handlers.append(StreamingHTTPSHandler) + return handlers + +def register_openers(): + """Register the streaming http handlers in the global urllib2 default + opener object. + + Returns the created OpenerDirector object.""" + opener = urllib2.build_opener(*get_handlers()) + + urllib2.install_opener(opener) + + return opener -- cgit v1.2.3