From bf12eaaa5428798962777e05fd98be024e0ce27c Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:41:18 -0700 Subject: Imported Upstream version 0.6.4 --- requests/api.py | 85 ++++++++--------------- requests/async.py | 93 ++++++++++++++++++------- requests/config.py | 1 + requests/core.py | 10 +-- requests/models.py | 160 +++++++++++++++++++++++++++++-------------- requests/patches.py | 5 -- requests/utils.py | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 7 files changed, 400 insertions(+), 146 deletions(-) delete mode 100644 requests/patches.py (limited to 'requests') diff --git a/requests/api.py b/requests/api.py index 0cea63d..1b847b7 100644 --- a/requests/api.py +++ b/requests/api.py @@ -15,18 +15,17 @@ import config from .models import Request, Response, AuthObject from .status_codes import codes from .hooks import dispatch_hook -from .utils import cookiejar_from_dict +from .utils import cookiejar_from_dict, header_expand -from urlparse import urlparse __all__ = ('request', 'get', 'head', 'post', 'patch', 'put', 'delete') def request(method, url, params=None, data=None, headers=None, cookies=None, files=None, auth=None, - timeout=None, allow_redirects=False, proxies=None, hooks=None): + timeout=None, allow_redirects=False, proxies=None, hooks=None, return_response=True): - """Constructs and sends a :class:`Request `. - Returns :class:`Response ` object. + """Constructs and sends a :class:`Request `. + Returns :class:`Response ` object. :param method: method for the new :class:`Request` object. :param url: URL for the new :class:`Request` object. @@ -39,13 +38,21 @@ def request(method, url, :param timeout: (optional) Float describing the timeout of the request. :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed. :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. + :param return_response: (optional) If False, an un-sent Request object will returned. """ + method = str(method).upper() + if cookies is None: cookies = {} cookies = cookiejar_from_dict(cookies) + # Expand header values + if headers: + for k, v in headers.items() or {}: + headers[k] = header_expand(v) + args = dict( method = method, url = url, @@ -55,6 +62,7 @@ def request(method, url, cookiejar = cookies, files = files, auth = auth, + hooks = hooks, timeout = timeout or config.settings.timeout, allow_redirects = allow_redirects, proxies = proxies or config.settings.proxies, @@ -68,6 +76,10 @@ def request(method, url, # Pre-request hook. r = dispatch_hook('pre_request', hooks, r) + # Don't send if asked nicely. + if not return_response: + return r + # Send the HTTP Request. r.send() @@ -85,50 +97,34 @@ def get(url, **kwargs): """Sends a GET request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. - :param params: (optional) Dictionary of parameters, or bytes, to be sent in the query string for the :class:`Request`. - :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. - :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. - :param auth: (optional) AuthObject to enable Basic HTTP Auth. - :param timeout: (optional) Float describing the timeout of the request. - :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. + :param **kwargs: Optional arguments that ``request`` takes. """ + + kwargs.setdefault('allow_redirects', True) return request('GET', url, **kwargs) def head(url, **kwargs): - """Sends a HEAD request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. - :param params: (optional) Dictionary of parameters, or bytes, to be sent in the query string for the :class:`Request`. - :param headers: (optional) Dictionary of HTTP Headers to sent with the :class:`Request`. - :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. - :param auth: (optional) AuthObject to enable Basic HTTP Auth. - :param timeout: (optional) Float describing the timeout of the request. - :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. + :param **kwargs: Optional arguments that ``request`` takes. """ + kwargs.setdefault('allow_redirects', True) return request('HEAD', url, **kwargs) def post(url, data='', **kwargs): - """Sends a POST request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary or bytes to send in the body of the :class:`Request`. - :param headers: (optional) Dictionary of HTTP Headers to sent with the :class:`Request`. - :param files: (optional) Dictionary of 'filename': file-like-objects for multipart encoding upload. - :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. - :param auth: (optional) AuthObject to enable Basic HTTP Auth. - :param timeout: (optional) Float describing the timeout of the request. - :param allow_redirects: (optional) Boolean. Set to True if redirect following is allowed. - :param params: (optional) Dictionary of parameters, or bytes, to be sent in the query string for the :class:`Request`. - :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. + :param **kwargs: Optional arguments that ``request`` takes. """ - return request('POST', url, data=data, **kwargs) + return request('post', url, data=data, **kwargs) def put(url, data='', **kwargs): @@ -136,17 +132,10 @@ def put(url, data='', **kwargs): :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary or bytes to send in the body of the :class:`Request`. - :param headers: (optional) Dictionary of HTTP Headers to sent with the :class:`Request`. - :param files: (optional) Dictionary of 'filename': file-like-objects for multipart encoding upload. - :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. - :param auth: (optional) AuthObject to enable Basic HTTP Auth. - :param timeout: (optional) Float describing the timeout of the request. - :param allow_redirects: (optional) Boolean. Set to True if redirect following is allowed. - :param params: (optional) Dictionary of parameters, or bytes, to be sent in the query string for the :class:`Request`. - :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. + :param **kwargs: Optional arguments that ``request`` takes. """ - return request('PUT', url, data=data, **kwargs) + return request('put', url, data=data, **kwargs) def patch(url, data='', **kwargs): @@ -154,31 +143,17 @@ def patch(url, data='', **kwargs): :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary or bytes to send in the body of the :class:`Request`. - :param headers: (optional) Dictionary of HTTP Headers to sent with the :class:`Request`. - :param files: (optional) Dictionary of 'filename': file-like-objects for multipart encoding upload. - :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. - :param auth: (optional) AuthObject to enable Basic HTTP Auth. - :param timeout: (optional) Float describing the timeout of the request. - :param allow_redirects: (optional) Boolean. Set to True if redirect following is allowed. - :param params: (optional) Dictionary of parameters, or bytes, to be sent in the query string for the :class:`Request`. - :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. + :param **kwargs: Optional arguments that ``request`` takes. """ - return request('PATCH', url, **kwargs) + return request('patch', url, **kwargs) def delete(url, **kwargs): - """Sends a DELETE request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. - :param params: (optional) Dictionary of parameters, or bytes, to be sent in the query string for the :class:`Request`. - :param headers: (optional) Dictionary of HTTP Headers to sent with the :class:`Request`. - :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. - :param auth: (optional) AuthObject to enable Basic HTTP Auth. - :param timeout: (optional) Float describing the timeout of the request. - :param allow_redirects: (optional) Boolean. Set to True if redirect following is allowed. - :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. + :param **kwargs: Optional arguments that ``request`` takes. """ - return request('DELETE', url, **kwargs) + return request('delete', url, **kwargs) diff --git a/requests/async.py b/requests/async.py index ab04084..db25f6a 100644 --- a/requests/async.py +++ b/requests/async.py @@ -1,41 +1,84 @@ # -*- coding: utf-8 -*- """ - requests.async - ~~~~~~~~~~~~~~ +requests.async +~~~~~~~~~~~~~~ - This module implements the main Requests system, after monkey-patching - the urllib2 module with eventlet or gevent.. - - :copyright: (c) 2011 by Kenneth Reitz. - :license: ISC, see LICENSE for more details. +This module contains an asynchronous replica of ``requests.api``, powered +by gevent. All API methods return a ``Request`` instance (as opposed to +``Response``). A list of requests can be sent with ``map()``. """ +try: + import gevent + from gevent import monkey as curious_george +except ImportError: + raise RuntimeError('Gevent is required for requests.async.') + +# Monkey-patch. +curious_george.patch_all(thread=False) -from __future__ import absolute_import +from . import api +from .hooks import dispatch_hook -import urllib -import urllib2 -from urllib2 import HTTPError +__all__ = ( + 'map', + 'get', 'head', 'post', 'put', 'patch', 'delete', 'request' +) -try: - import eventlet - eventlet.monkey_patch() -except ImportError: - pass +def _patched(f): + """Patches a given API function to not send.""" + + def wrapped(*args, **kwargs): + return f(*args, return_response=False, **kwargs) + + return wrapped + + +def _send(r, pools=None): + """Sends a given Request object.""" + + if pools: + r._pools = pools + + r.send() + + # Post-request hook. + r = dispatch_hook('post_request', r.hooks, r) + + # Response manipulation hook. + r.response = dispatch_hook('response', r.hooks, r.response) + + return r.response + + +# Patched requests.api functions. +get = _patched(api.get) +head = _patched(api.head) +post = _patched(api.post) +put = _patched(api.put) +patch = _patched(api.patch) +delete = _patched(api.delete) +request = _patched(api.request) + + +def map(requests, prefetch=True): + """Concurrently converts a list of Requests to Responses. + + :param requests: a collection of Request objects. + :param prefetch: If False, the content will not be downloaded immediately. + """ + + jobs = [gevent.spawn(_send, r) for r in requests] + gevent.joinall(jobs) + + if prefetch: + [r.response.content for r in requests] -if not 'eventlet' in locals(): - try: - from gevent import monkey - monkey.patch_all() - except ImportError: - pass + return [r.response for r in requests] -if not 'eventlet' in locals(): - raise ImportError('No Async adaptations of urllib2 found!') -from .core import * diff --git a/requests/config.py b/requests/config.py index 39be2ed..794109c 100644 --- a/requests/config.py +++ b/requests/config.py @@ -62,6 +62,7 @@ settings.proxies = None settings.verbose = None settings.timeout = None settings.max_redirects = 30 +settings.decode_unicode = True #: Use socket.setdefaulttimeout() as fallback? settings.timeout_fallback = True diff --git a/requests/core.py b/requests/core.py index 8ba34a2..de05cf9 100644 --- a/requests/core.py +++ b/requests/core.py @@ -12,16 +12,18 @@ This module implements the main Requests system. """ __title__ = 'requests' -__version__ = '0.6.1' -__build__ = 0x000601 +__version__ = '0.6.4' +__build__ = 0x000604 __author__ = 'Kenneth Reitz' __license__ = 'ISC' __copyright__ = 'Copyright 2011 Kenneth Reitz' -from models import HTTPError +from models import HTTPError, Request, Response from api import * from exceptions import * from sessions import session from status_codes import codes -from config import settings \ No newline at end of file +from config import settings + +import utils diff --git a/requests/models.py b/requests/models.py index 2d7fc8f..9a8f5f9 100644 --- a/requests/models.py +++ b/requests/models.py @@ -9,8 +9,10 @@ requests.models import urllib import urllib2 import socket +import codecs import zlib + from urllib2 import HTTPError from urlparse import urlparse, urlunparse, urljoin from datetime import datetime @@ -20,9 +22,9 @@ from .monkeys import Request as _Request, HTTPBasicAuthHandler, HTTPForcedBasicA from .structures import CaseInsensitiveDict from .packages.poster.encode import multipart_encode from .packages.poster.streaminghttp import register_openers, get_handlers -from .utils import dict_from_cookiejar -from .exceptions import RequestException, AuthenticationError, Timeout, URLRequired, InvalidMethod, TooManyRedirects +from .utils import dict_from_cookiejar, get_unicode_from_response, stream_decode_response_unicode, decode_gzip, stream_decode_gzip from .status_codes import codes +from .exceptions import RequestException, AuthenticationError, Timeout, URLRequired, InvalidMethod, TooManyRedirects REDIRECT_STATI = (codes.moved, codes.found, codes.other, codes.temporary_moved) @@ -30,14 +32,14 @@ REDIRECT_STATI = (codes.moved, codes.found, codes.other, codes.temporary_moved) class Request(object): - """The :class:`Request ` object. It carries out all functionality of + """The :class:`Request ` object. It carries out all functionality of Requests. Recommended interface is with the Requests functions. """ def __init__(self, url=None, headers=dict(), files=None, method=None, data=dict(), params=dict(), auth=None, cookiejar=None, timeout=None, redirect=False, - allow_redirects=False, proxies=None): + allow_redirects=False, proxies=None, hooks=None): #: Float describ the timeout of the request. # (Use socket.setdefaulttimeout() as fallback) @@ -46,24 +48,24 @@ class Request(object): #: Request URL. self.url = url - #: Dictonary of HTTP Headers to attach to the :class:`Request `. + #: Dictonary of HTTP Headers to attach to the :class:`Request `. self.headers = headers #: Dictionary of files to multipart upload (``{filename: content}``). self.files = files - #: HTTP Method to use. Available: GET, HEAD, PUT, POST, DELETE. + #: HTTP Method to use. self.method = method #: Dictionary or byte of request body data to attach to the - #: :class:`Request `. + #: :class:`Request `. self.data = None #: Dictionary or byte of querystring data to attach to the - #: :class:`Request `. + #: :class:`Request `. self.params = None - #: True if :class:`Request ` is part of a redirect chain (disables history + #: True if :class:`Request ` is part of a redirect chain (disables history #: and HTTPError storage). self.redirect = redirect @@ -76,7 +78,7 @@ class Request(object): self.data, self._enc_data = self._encode_params(data) self.params, self._enc_params = self._encode_params(params) - #: :class:`Response ` instance, containing + #: :class:`Response ` instance, containing #: content and metadata of HTTP Response, once :attr:`sent `. self.response = Response() @@ -85,15 +87,17 @@ class Request(object): if not auth: auth = auth_manager.get_auth(self.url) - #: :class:`AuthObject` to attach to :class:`Request `. + #: :class:`AuthObject` to attach to :class:`Request `. self.auth = auth - #: CookieJar to attach to :class:`Request `. + #: CookieJar to attach to :class:`Request `. self.cookiejar = cookiejar #: True if Request has been sent. self.sent = False + #: Event-handling hooks. + self.hooks = hooks # Header manipulation and defaults. @@ -132,9 +136,16 @@ class Request(object): _handlers.append(urllib2.HTTPCookieProcessor(self.cookiejar)) if self.auth: - if not isinstance(self.auth.handler, (urllib2.AbstractBasicAuthHandler, urllib2.AbstractDigestAuthHandler)): + if not isinstance(self.auth.handler, + (urllib2.AbstractBasicAuthHandler, + urllib2.AbstractDigestAuthHandler)): + # TODO: REMOVE THIS COMPLETELY - auth_manager.add_password(self.auth.realm, self.url, self.auth.username, self.auth.password) + auth_manager.add_password( + self.auth.realm, self.url, + self.auth.username, + self.auth.password) + self.auth.handler = self.auth.handler(auth_manager) auth_manager.add_auth(self.url, self.auth) @@ -166,7 +177,10 @@ class Request(object): def _build_response(self, resp, is_error=False): - """Build internal :class:`Response ` object from given response.""" + """Build internal :class:`Response ` object + from given response. + """ + def build(resp): @@ -175,12 +189,9 @@ class Request(object): try: response.headers = CaseInsensitiveDict(getattr(resp.info(), 'dict', None)) - response.read = resp.read - response._resp = resp - response._close = resp.close + response.raw = resp if self.cookiejar: - response.cookies = dict_from_cookiejar(self.cookiejar) @@ -203,12 +214,10 @@ class Request(object): while ( ('location' in r.headers) and - ((self.method in ('GET', 'HEAD')) or - (r.status_code is codes.see_other) or - (self.allow_redirects)) + ((r.status_code is codes.see_other) or (self.allow_redirects)) ): - r.close() + r.raw.close() if not len(history) < settings.max_redirects: raise TooManyRedirects() @@ -257,8 +266,8 @@ class Request(object): Otherwise, assumes the data is already encoded appropriately, and returns it twice. - """ + if hasattr(data, 'items'): result = [] for k, vs in data.items(): @@ -302,7 +311,6 @@ class Request(object): """ self._checks() - success = False # Logging if settings.verbose: @@ -363,10 +371,11 @@ class Request(object): if hasattr(why, 'reason'): if isinstance(why.reason, socket.timeout): why = Timeout(why) + elif isinstance(why.reason, socket.error): + why = Timeout(why) self._build_response(why, is_error=True) - else: self._build_response(resp) self.response.ok = True @@ -377,37 +386,46 @@ class Request(object): return self.sent - class Response(object): - """The core :class:`Response ` object. All - :class:`Request ` objects contain a - :class:`response ` attribute, which is an instance + """The core :class:`Response ` object. All + :class:`Request ` objects contain a + :class:`response ` attribute, which is an instance of this class. """ def __init__(self): - #: Raw content of the response, in bytes. - #: If ``content-encoding`` of response was set to ``gzip``, the - #: response data will be automatically deflated. + self._content = None + self._content_consumed = False + #: Integer Code of responded HTTP Status. self.status_code = None + #: Case-insensitive Dictionary of Response Headers. #: For example, ``headers['content-encoding']`` will return the #: value of a ``'Content-Encoding'`` response header. self.headers = CaseInsensitiveDict() + + #: File-like object representation of response (for advanced usage). + self.raw = None + #: Final URL location of Response. self.url = None + #: True if no :attr:`error` occured. self.ok = False + #: Resulting :class:`HTTPError` of request, if one occured. self.error = None - #: A list of :class:`Response ` objects from + + #: A list of :class:`Response ` objects from #: the history of the Request. Any redirect responses will end #: up here. self.history = [] - #: The Request that created the Response. + + #: The :class:`Request ` that created the Response. self.request = None + #: A dictionary of Cookies the server sent back. self.cookies = None @@ -418,23 +436,65 @@ class Response(object): def __nonzero__(self): """Returns true if :attr:`status_code` is 'OK'.""" + return not self.error + def iter_content(self, chunk_size=10 * 1024, decode_unicode=None): + """Iterates over the response data. This avoids reading the content + at once into memory for large responses. The chunk size is the number + of bytes it should read into memory. This is not necessarily the + length of each item returned as decoding can take place. + """ + if self._content_consumed: + raise RuntimeError('The content for this response was ' + 'already consumed') + + def generate(): + while 1: + chunk = self.raw.read(chunk_size) + if not chunk: + break + yield chunk + self._content_consumed = True + gen = generate() + if 'gzip' in self.headers.get('content-encoding', ''): + gen = stream_decode_gzip(gen) + if decode_unicode is None: + decode_unicode = settings.decode_unicode + if decode_unicode: + gen = stream_decode_response_unicode(gen, self) + return gen + + @property + def content(self): + """Content of the response, in bytes or unicode + (if available). + """ - def __getattr__(self, name): - """Read and returns the full stream when accessing to :attr: `content`""" - if name == 'content': - if self._content is not None: - return self._content - self._content = self.read() - if self.headers.get('content-encoding', '') == 'gzip': - try: - self._content = zlib.decompress(self._content, 16+zlib.MAX_WBITS) - except zlib.error: - pass + if self._content is not None: return self._content - else: - raise AttributeError + + if self._content_consumed: + raise RuntimeError('The content for this response was ' + 'already consumed') + + # Read the contents. + self._content = self.raw.read() + + # Decode GZip'd content. + if 'gzip' in self.headers.get('content-encoding', ''): + try: + self._content = decode_gzip(self._content) + except zlib.error: + pass + + # Decode unicode content. + if settings.decode_unicode: + self._content = get_unicode_from_response(self) + + self._content_consumed = True + return self._content + def raise_for_status(self): """Raises stored :class:`HTTPError` or :class:`URLError`, if one occured.""" @@ -442,10 +502,6 @@ class Response(object): raise self.error - def close(self): - if self._resp.fp is not None and hasattr(self._resp.fp, '_sock'): - self._resp.fp._sock.recv = None - self._close() class AuthManager(object): """Requests Authentication Manager.""" diff --git a/requests/patches.py b/requests/patches.py deleted file mode 100644 index 43a3b4c..0000000 --- a/requests/patches.py +++ /dev/null @@ -1,5 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -requests.monkeys -""" diff --git a/requests/utils.py b/requests/utils.py index 8ac78b4..2e16163 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -9,15 +9,69 @@ that are also useful for external consumption. """ +import cgi +import codecs import cookielib +import re +import zlib -def dict_from_cookiejar(cookiejar): - """Returns a key/value dictionary from a CookieJar.""" +def header_expand(headers): + """Returns an HTTP Header value string from a dictionary. + + Example expansion:: + + {'text/x-dvi': {'q': '.8', 'mxb': '100000', 'mxt': '5.0'}, 'text/x-c': {}} + # Accept: text/x-dvi; q=.8; mxb=100000; mxt=5.0, text/x-c + + (('text/x-dvi', {'q': '.8', 'mxb': '100000', 'mxt': '5.0'}), ('text/x-c', {})) + # Accept: text/x-dvi; q=.8; mxb=100000; mxt=5.0, text/x-c + """ + + collector = [] + + if isinstance(headers, dict): + headers = headers.items() + + elif isinstance(headers, basestring): + return headers + + for i, (value, params) in enumerate(headers): + + _params = [] + + for (p_k, p_v) in params.items(): + + _params.append('%s=%s' % (p_k, p_v)) + + collector.append(value) + collector.append('; ') + + if len(params): + + collector.append('; '.join(_params)) + + if not len(headers) == i+1: + collector.append(', ') + + + # Remove trailing seperators. + if collector[-1] in (', ', '; '): + del collector[-1] + + return ''.join(collector) + + + +def dict_from_cookiejar(cj): + """Returns a key/value dictionary from a CookieJar. + + :param cj: CookieJar object to extract cookies from. + """ cookie_dict = {} - for _, cookies in cookiejar._cookies.items(): + for _, cookies in cj._cookies.items(): for _, cookies in cookies.items(): for cookie in cookies.values(): # print cookie @@ -27,7 +81,10 @@ def dict_from_cookiejar(cookiejar): def cookiejar_from_dict(cookie_dict): - """Returns a CookieJar from a key/value dictionary.""" + """Returns a CookieJar from a key/value dictionary. + + :param cookie_dict: Dict of key/values to insert into CookieJar. + """ # return cookiejar if one was passed in if isinstance(cookie_dict, cookielib.CookieJar): @@ -42,7 +99,11 @@ def cookiejar_from_dict(cookie_dict): def add_dict_to_cookiejar(cj, cookie_dict): - """Returns a CookieJar from a key/value dictionary.""" + """Returns a CookieJar from a key/value dictionary. + + :param cj: CookieJar to insert cookies into. + :param cookie_dict: Dict of key/values to insert into CookieJar. + """ for k, v in cookie_dict.items(): @@ -70,3 +131,124 @@ def add_dict_to_cookiejar(cj, cookie_dict): cj.set_cookie(cookie) return cj + + +def get_encodings_from_content(content): + """Returns encodings from given content string. + + :param content: bytestring to extract encodings from. + """ + + charset_re = re.compile(r']', flags=re.I) + + return charset_re.findall(content) + + +def get_encoding_from_headers(headers): + """Returns encodings from given HTTP Header Dict. + + :param headers: dictionary to extract encoding from. + """ + + content_type = headers.get('content-type') + + if not content_type: + return None + + content_type, params = cgi.parse_header(content_type) + + if 'charset' in params: + return params['charset'].strip("'\"") + + +def unicode_from_html(content): + """Attempts to decode an HTML string into unicode. + If unsuccessful, the original content is returned. + """ + + encodings = get_encodings_from_content(content) + + for encoding in encodings: + + try: + return unicode(content, encoding) + except (UnicodeError, TypeError): + pass + + return content + + +def stream_decode_response_unicode(iterator, r): + """Stream decodes a iterator.""" + encoding = get_encoding_from_headers(r.headers) + if encoding is None: + for item in iterator: + yield item + return + + decoder = codecs.getincrementaldecoder(encoding)(errors='replace') + for chunk in iterator: + rv = decoder.decode(chunk) + if rv: + yield rv + rv = decoder.decode('', final=True) + if rv: + yield rv + + +def get_unicode_from_response(r): + """Returns the requested content back in unicode. + + :param r: Reponse object to get unicode content from. + + Tried: + + 1. charset from content-type + + 2. every encodings from ```` + + 3. fall back and replace all unicode characters + + """ + + tried_encodings = [] + + # Try charset from content-type + encoding = get_encoding_from_headers(r.headers) + + if encoding: + try: + return unicode(r.content, encoding) + except UnicodeError: + tried_encodings.append(encoding) + + # Fall back: + try: + return unicode(r.content, encoding, errors='replace') + except TypeError: + return r.content + + +def decode_gzip(content): + """Return gzip-decoded string. + + :param content: bytestring to gzip-decode. + """ + + return zlib.decompress(content, 16 + zlib.MAX_WBITS) + + +def stream_decode_gzip(iterator): + """Stream decodes a gzip-encoded iterator""" + try: + dec = zlib.decompressobj(16 + zlib.MAX_WBITS) + for chunk in iterator: + rv = dec.decompress(chunk) + if rv: + yield rv + buf = dec.decompress('') + rv = buf + dec.flush() + if rv: + yield rv + except zlib.error: + pass -- cgit v1.2.3