aboutsummaryrefslogtreecommitdiff
path: root/requests/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'requests/utils.py')
-rw-r--r--requests/utils.py192
1 files changed, 187 insertions, 5 deletions
diff --git a/requests/utils.py b/requests/utils.py
index 8ac78b4..2e16163 100644
--- a/requests/utils.py
+++ b/requests/utils.py
@@ -9,15 +9,69 @@ that are also useful for external consumption.
"""
+import cgi
+import codecs
import cookielib
+import re
+import zlib
-def dict_from_cookiejar(cookiejar):
- """Returns a key/value dictionary from a CookieJar."""
+def header_expand(headers):
+ """Returns an HTTP Header value string from a dictionary.
+
+ Example expansion::
+
+ {'text/x-dvi': {'q': '.8', 'mxb': '100000', 'mxt': '5.0'}, 'text/x-c': {}}
+ # Accept: text/x-dvi; q=.8; mxb=100000; mxt=5.0, text/x-c
+
+ (('text/x-dvi', {'q': '.8', 'mxb': '100000', 'mxt': '5.0'}), ('text/x-c', {}))
+ # Accept: text/x-dvi; q=.8; mxb=100000; mxt=5.0, text/x-c
+ """
+
+ collector = []
+
+ if isinstance(headers, dict):
+ headers = headers.items()
+
+ elif isinstance(headers, basestring):
+ return headers
+
+ for i, (value, params) in enumerate(headers):
+
+ _params = []
+
+ for (p_k, p_v) in params.items():
+
+ _params.append('%s=%s' % (p_k, p_v))
+
+ collector.append(value)
+ collector.append('; ')
+
+ if len(params):
+
+ collector.append('; '.join(_params))
+
+ if not len(headers) == i+1:
+ collector.append(', ')
+
+
+ # Remove trailing seperators.
+ if collector[-1] in (', ', '; '):
+ del collector[-1]
+
+ return ''.join(collector)
+
+
+
+def dict_from_cookiejar(cj):
+ """Returns a key/value dictionary from a CookieJar.
+
+ :param cj: CookieJar object to extract cookies from.
+ """
cookie_dict = {}
- for _, cookies in cookiejar._cookies.items():
+ for _, cookies in cj._cookies.items():
for _, cookies in cookies.items():
for cookie in cookies.values():
# print cookie
@@ -27,7 +81,10 @@ def dict_from_cookiejar(cookiejar):
def cookiejar_from_dict(cookie_dict):
- """Returns a CookieJar from a key/value dictionary."""
+ """Returns a CookieJar from a key/value dictionary.
+
+ :param cookie_dict: Dict of key/values to insert into CookieJar.
+ """
# return cookiejar if one was passed in
if isinstance(cookie_dict, cookielib.CookieJar):
@@ -42,7 +99,11 @@ def cookiejar_from_dict(cookie_dict):
def add_dict_to_cookiejar(cj, cookie_dict):
- """Returns a CookieJar from a key/value dictionary."""
+ """Returns a CookieJar from a key/value dictionary.
+
+ :param cj: CookieJar to insert cookies into.
+ :param cookie_dict: Dict of key/values to insert into CookieJar.
+ """
for k, v in cookie_dict.items():
@@ -70,3 +131,124 @@ def add_dict_to_cookiejar(cj, cookie_dict):
cj.set_cookie(cookie)
return cj
+
+
+def get_encodings_from_content(content):
+ """Returns encodings from given content string.
+
+ :param content: bytestring to extract encodings from.
+ """
+
+ charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
+
+ return charset_re.findall(content)
+
+
+def get_encoding_from_headers(headers):
+ """Returns encodings from given HTTP Header Dict.
+
+ :param headers: dictionary to extract encoding from.
+ """
+
+ content_type = headers.get('content-type')
+
+ if not content_type:
+ return None
+
+ content_type, params = cgi.parse_header(content_type)
+
+ if 'charset' in params:
+ return params['charset'].strip("'\"")
+
+
+def unicode_from_html(content):
+ """Attempts to decode an HTML string into unicode.
+ If unsuccessful, the original content is returned.
+ """
+
+ encodings = get_encodings_from_content(content)
+
+ for encoding in encodings:
+
+ try:
+ return unicode(content, encoding)
+ except (UnicodeError, TypeError):
+ pass
+
+ return content
+
+
+def stream_decode_response_unicode(iterator, r):
+ """Stream decodes a iterator."""
+ encoding = get_encoding_from_headers(r.headers)
+ if encoding is None:
+ for item in iterator:
+ yield item
+ return
+
+ decoder = codecs.getincrementaldecoder(encoding)(errors='replace')
+ for chunk in iterator:
+ rv = decoder.decode(chunk)
+ if rv:
+ yield rv
+ rv = decoder.decode('', final=True)
+ if rv:
+ yield rv
+
+
+def get_unicode_from_response(r):
+ """Returns the requested content back in unicode.
+
+ :param r: Reponse object to get unicode content from.
+
+ Tried:
+
+ 1. charset from content-type
+
+ 2. every encodings from ``<meta ... charset=XXX>``
+
+ 3. fall back and replace all unicode characters
+
+ """
+
+ tried_encodings = []
+
+ # Try charset from content-type
+ encoding = get_encoding_from_headers(r.headers)
+
+ if encoding:
+ try:
+ return unicode(r.content, encoding)
+ except UnicodeError:
+ tried_encodings.append(encoding)
+
+ # Fall back:
+ try:
+ return unicode(r.content, encoding, errors='replace')
+ except TypeError:
+ return r.content
+
+
+def decode_gzip(content):
+ """Return gzip-decoded string.
+
+ :param content: bytestring to gzip-decode.
+ """
+
+ return zlib.decompress(content, 16 + zlib.MAX_WBITS)
+
+
+def stream_decode_gzip(iterator):
+ """Stream decodes a gzip-encoded iterator"""
+ try:
+ dec = zlib.decompressobj(16 + zlib.MAX_WBITS)
+ for chunk in iterator:
+ rv = dec.decompress(chunk)
+ if rv:
+ yield rv
+ buf = dec.decompress('')
+ rv = buf + dec.flush()
+ if rv:
+ yield rv
+ except zlib.error:
+ pass