diff options
Diffstat (limited to 'requests/packages')
| -rw-r--r-- | requests/packages/chardet/__init__.py (renamed from requests/packages/charade/__init__.py) | 64 | ||||
| -rw-r--r-- | requests/packages/chardet/big5freq.py (renamed from requests/packages/charade/big5freq.py) | 0 | ||||
| -rw-r--r-- | requests/packages/chardet/big5prober.py (renamed from requests/packages/charade/big5prober.py) | 84 | ||||
| -rwxr-xr-x | requests/packages/chardet/chardetect.py | 46 | ||||
| -rw-r--r-- | requests/packages/chardet/chardistribution.py (renamed from requests/packages/charade/chardistribution.py) | 462 | ||||
| -rw-r--r-- | requests/packages/chardet/charsetgroupprober.py (renamed from requests/packages/charade/charsetgroupprober.py) | 212 | ||||
| -rw-r--r-- | requests/packages/chardet/charsetprober.py (renamed from requests/packages/charade/charsetprober.py) | 0 | ||||
| -rw-r--r-- | requests/packages/chardet/codingstatemachine.py (renamed from requests/packages/charade/codingstatemachine.py) | 122 | ||||
| -rw-r--r-- | requests/packages/chardet/compat.py (renamed from requests/packages/charade/compat.py) | 0 | ||||
| -rw-r--r-- | requests/packages/chardet/constants.py (renamed from requests/packages/charade/constants.py) | 78 | ||||
| -rw-r--r-- | requests/packages/chardet/cp949prober.py (renamed from requests/packages/charade/cp949prober.py) | 88 | ||||
| -rw-r--r-- | requests/packages/chardet/escprober.py (renamed from requests/packages/charade/escprober.py) | 172 | ||||
| -rw-r--r-- | requests/packages/chardet/escsm.py (renamed from requests/packages/charade/escsm.py) | 484 | ||||
| -rw-r--r-- | requests/packages/chardet/eucjpprober.py (renamed from requests/packages/charade/eucjpprober.py) | 180 | ||||
| -rw-r--r-- | requests/packages/chardet/euckrfreq.py (renamed from requests/packages/charade/euckrfreq.py) | 0 | ||||
| -rw-r--r-- | requests/packages/chardet/euckrprober.py (renamed from requests/packages/charade/euckrprober.py) | 84 | ||||
| -rw-r--r-- | requests/packages/chardet/euctwfreq.py (renamed from requests/packages/charade/euctwfreq.py) | 0 | ||||
| -rw-r--r-- | requests/packages/chardet/euctwprober.py (renamed from requests/packages/charade/euctwprober.py) | 82 | ||||
| -rw-r--r-- | requests/packages/chardet/gb2312freq.py (renamed from requests/packages/charade/gb2312freq.py) | 0 | ||||
| -rw-r--r-- | requests/packages/chardet/gb2312prober.py (renamed from requests/packages/charade/gb2312prober.py) | 82 | ||||
| -rw-r--r-- | requests/packages/chardet/hebrewprober.py (renamed from requests/packages/charade/hebrewprober.py) | 566 | ||||
| -rw-r--r-- | requests/packages/chardet/jisfreq.py (renamed from requests/packages/charade/jisfreq.py) | 0 | ||||
| -rw-r--r-- | requests/packages/chardet/jpcntx.py (renamed from requests/packages/charade/jpcntx.py) | 438 | ||||
| -rw-r--r-- | requests/packages/chardet/langbulgarianmodel.py (renamed from requests/packages/charade/langbulgarianmodel.py) | 458 | ||||
| -rw-r--r-- | requests/packages/chardet/langcyrillicmodel.py (renamed from requests/packages/charade/langcyrillicmodel.py) | 658 | ||||
| -rw-r--r-- | requests/packages/chardet/langgreekmodel.py (renamed from requests/packages/charade/langgreekmodel.py) | 450 | ||||
| -rw-r--r-- | requests/packages/chardet/langhebrewmodel.py (renamed from requests/packages/charade/langhebrewmodel.py) | 402 | ||||
| -rw-r--r-- | requests/packages/chardet/langhungarianmodel.py (renamed from requests/packages/charade/langhungarianmodel.py) | 450 | ||||
| -rw-r--r-- | requests/packages/chardet/langthaimodel.py (renamed from requests/packages/charade/langthaimodel.py) | 400 | ||||
| -rw-r--r-- | requests/packages/chardet/latin1prober.py (renamed from requests/packages/charade/latin1prober.py) | 278 | ||||
| -rw-r--r-- | requests/packages/chardet/mbcharsetprober.py (renamed from requests/packages/charade/mbcharsetprober.py) | 172 | ||||
| -rw-r--r-- | requests/packages/chardet/mbcsgroupprober.py (renamed from requests/packages/charade/mbcsgroupprober.py) | 108 | ||||
| -rw-r--r-- | requests/packages/chardet/mbcssm.py (renamed from requests/packages/charade/mbcssm.py) | 1150 | ||||
| -rw-r--r-- | requests/packages/chardet/sbcharsetprober.py (renamed from requests/packages/charade/sbcharsetprober.py) | 240 | ||||
| -rw-r--r-- | requests/packages/chardet/sbcsgroupprober.py (renamed from requests/packages/charade/sbcsgroupprober.py) | 138 | ||||
| -rw-r--r-- | requests/packages/chardet/sjisprober.py (renamed from requests/packages/charade/sjisprober.py) | 182 | ||||
| -rw-r--r-- | requests/packages/chardet/universaldetector.py (renamed from requests/packages/charade/universaldetector.py) | 342 | ||||
| -rw-r--r-- | requests/packages/chardet/utf8prober.py (renamed from requests/packages/charade/utf8prober.py) | 152 | ||||
| -rw-r--r-- | requests/packages/urllib3/_collections.py | 11 | ||||
| -rw-r--r-- | requests/packages/urllib3/connection.py | 178 | ||||
| -rw-r--r-- | requests/packages/urllib3/connectionpool.py | 244 | ||||
| -rw-r--r-- | requests/packages/urllib3/contrib/pyopenssl.py | 58 | ||||
| -rw-r--r-- | requests/packages/urllib3/filepost.py | 11 | ||||
| -rw-r--r-- | requests/packages/urllib3/packages/ssl_match_hostname/__init__.py | 109 | ||||
| -rw-r--r-- | requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py | 105 | ||||
| -rw-r--r-- | requests/packages/urllib3/poolmanager.py | 11 | ||||
| -rw-r--r-- | requests/packages/urllib3/request.py | 1 | ||||
| -rw-r--r-- | requests/packages/urllib3/response.py | 11 | ||||
| -rw-r--r-- | requests/packages/urllib3/util.py | 54 | 
49 files changed, 4943 insertions, 4674 deletions
| diff --git a/requests/packages/charade/__init__.py b/requests/packages/chardet/__init__.py index 1aadf3e..e4f0799 100644 --- a/requests/packages/charade/__init__.py +++ b/requests/packages/chardet/__init__.py @@ -1,32 +1,32 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -__version__ = "1.0.3"
 -from sys import version_info
 -
 -
 -def detect(aBuf):
 -    if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or
 -            (version_info >= (3, 0) and not isinstance(aBuf, bytes))):
 -        raise ValueError('Expected a bytes object, not a unicode object')
 -
 -    from . import universaldetector
 -    u = universaldetector.UniversalDetector()
 -    u.reset()
 -    u.feed(aBuf)
 -    u.close()
 -    return u.result
 +######################## BEGIN LICENSE BLOCK ######################## +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +__version__ = "2.2.1" +from sys import version_info + + +def detect(aBuf): +    if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or +            (version_info >= (3, 0) and not isinstance(aBuf, bytes))): +        raise ValueError('Expected a bytes object, not a unicode object') + +    from . import universaldetector +    u = universaldetector.UniversalDetector() +    u.reset() +    u.feed(aBuf) +    u.close() +    return u.result diff --git a/requests/packages/charade/big5freq.py b/requests/packages/chardet/big5freq.py index 65bffc0..65bffc0 100644 --- a/requests/packages/charade/big5freq.py +++ b/requests/packages/chardet/big5freq.py diff --git a/requests/packages/charade/big5prober.py b/requests/packages/chardet/big5prober.py index 7382f7c..becce81 100644 --- a/requests/packages/charade/big5prober.py +++ b/requests/packages/chardet/big5prober.py @@ -1,42 +1,42 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is Mozilla Communicator client code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -from .mbcharsetprober import MultiByteCharSetProber
 -from .codingstatemachine import CodingStateMachine
 -from .chardistribution import Big5DistributionAnalysis
 -from .mbcssm import Big5SMModel
 -
 -
 -class Big5Prober(MultiByteCharSetProber):
 -    def __init__(self):
 -        MultiByteCharSetProber.__init__(self)
 -        self._mCodingSM = CodingStateMachine(Big5SMModel)
 -        self._mDistributionAnalyzer = Big5DistributionAnalysis()
 -        self.reset()
 -
 -    def get_charset_name(self):
 -        return "Big5"
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import Big5DistributionAnalysis +from .mbcssm import Big5SMModel + + +class Big5Prober(MultiByteCharSetProber): +    def __init__(self): +        MultiByteCharSetProber.__init__(self) +        self._mCodingSM = CodingStateMachine(Big5SMModel) +        self._mDistributionAnalyzer = Big5DistributionAnalysis() +        self.reset() + +    def get_charset_name(self): +        return "Big5" diff --git a/requests/packages/chardet/chardetect.py b/requests/packages/chardet/chardetect.py new file mode 100755 index 0000000..ecd0163 --- /dev/null +++ b/requests/packages/chardet/chardetect.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +""" +Script which takes one or more file paths and reports on their detected +encodings + +Example:: + +    % chardetect somefile someotherfile +    somefile: windows-1252 with confidence 0.5 +    someotherfile: ascii with confidence 1.0 + +If no paths are provided, it takes its input from stdin. + +""" +from io import open +from sys import argv, stdin + +from chardet.universaldetector import UniversalDetector + + +def description_of(file, name='stdin'): +    """Return a string describing the probable encoding of a file.""" +    u = UniversalDetector() +    for line in file: +        u.feed(line) +    u.close() +    result = u.result +    if result['encoding']: +        return '%s: %s with confidence %s' % (name, +                                              result['encoding'], +                                              result['confidence']) +    else: +        return '%s: no result' % name + + +def main(): +    if len(argv) <= 1: +        print(description_of(stdin)) +    else: +        for path in argv[1:]: +            with open(path, 'rb') as f: +                print(description_of(f, path)) + + +if __name__ == '__main__': +    main() diff --git a/requests/packages/charade/chardistribution.py b/requests/packages/chardet/chardistribution.py index dfd3355..4e64a00 100644 --- a/requests/packages/charade/chardistribution.py +++ b/requests/packages/chardet/chardistribution.py @@ -1,231 +1,231 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is Mozilla Communicator client code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE,
 -                        EUCTW_TYPICAL_DISTRIBUTION_RATIO)
 -from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE,
 -                        EUCKR_TYPICAL_DISTRIBUTION_RATIO)
 -from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE,
 -                         GB2312_TYPICAL_DISTRIBUTION_RATIO)
 -from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE,
 -                       BIG5_TYPICAL_DISTRIBUTION_RATIO)
 -from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE,
 -                      JIS_TYPICAL_DISTRIBUTION_RATIO)
 -from .compat import wrap_ord
 -
 -ENOUGH_DATA_THRESHOLD = 1024
 -SURE_YES = 0.99
 -SURE_NO = 0.01
 -MINIMUM_DATA_THRESHOLD = 3
 -
 -
 -class CharDistributionAnalysis:
 -    def __init__(self):
 -        # Mapping table to get frequency order from char order (get from
 -        # GetOrder())
 -        self._mCharToFreqOrder = None
 -        self._mTableSize = None  # Size of above table
 -        # This is a constant value which varies from language to language,
 -        # used in calculating confidence.  See
 -        # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
 -        # for further detail.
 -        self._mTypicalDistributionRatio = None
 -        self.reset()
 -
 -    def reset(self):
 -        """reset analyser, clear any state"""
 -        # If this flag is set to True, detection is done and conclusion has
 -        # been made
 -        self._mDone = False
 -        self._mTotalChars = 0  # Total characters encountered
 -        # The number of characters whose frequency order is less than 512
 -        self._mFreqChars = 0
 -
 -    def feed(self, aBuf, aCharLen):
 -        """feed a character with known length"""
 -        if aCharLen == 2:
 -            # we only care about 2-bytes character in our distribution analysis
 -            order = self.get_order(aBuf)
 -        else:
 -            order = -1
 -        if order >= 0:
 -            self._mTotalChars += 1
 -            # order is valid
 -            if order < self._mTableSize:
 -                if 512 > self._mCharToFreqOrder[order]:
 -                    self._mFreqChars += 1
 -
 -    def get_confidence(self):
 -        """return confidence based on existing data"""
 -        # if we didn't receive any character in our consideration range,
 -        # return negative answer
 -        if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD:
 -            return SURE_NO
 -
 -        if self._mTotalChars != self._mFreqChars:
 -            r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars)
 -                 * self._mTypicalDistributionRatio))
 -            if r < SURE_YES:
 -                return r
 -
 -        # normalize confidence (we don't want to be 100% sure)
 -        return SURE_YES
 -
 -    def got_enough_data(self):
 -        # It is not necessary to receive all data to draw conclusion.
 -        # For charset detection, certain amount of data is enough
 -        return self._mTotalChars > ENOUGH_DATA_THRESHOLD
 -
 -    def get_order(self, aBuf):
 -        # We do not handle characters based on the original encoding string,
 -        # but convert this encoding string to a number, here called order.
 -        # This allows multiple encodings of a language to share one frequency
 -        # table.
 -        return -1
 -
 -
 -class EUCTWDistributionAnalysis(CharDistributionAnalysis):
 -    def __init__(self):
 -        CharDistributionAnalysis.__init__(self)
 -        self._mCharToFreqOrder = EUCTWCharToFreqOrder
 -        self._mTableSize = EUCTW_TABLE_SIZE
 -        self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
 -
 -    def get_order(self, aBuf):
 -        # for euc-TW encoding, we are interested
 -        #   first  byte range: 0xc4 -- 0xfe
 -        #   second byte range: 0xa1 -- 0xfe
 -        # no validation needed here. State machine has done that
 -        first_char = wrap_ord(aBuf[0])
 -        if first_char >= 0xC4:
 -            return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1
 -        else:
 -            return -1
 -
 -
 -class EUCKRDistributionAnalysis(CharDistributionAnalysis):
 -    def __init__(self):
 -        CharDistributionAnalysis.__init__(self)
 -        self._mCharToFreqOrder = EUCKRCharToFreqOrder
 -        self._mTableSize = EUCKR_TABLE_SIZE
 -        self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
 -
 -    def get_order(self, aBuf):
 -        # for euc-KR encoding, we are interested
 -        #   first  byte range: 0xb0 -- 0xfe
 -        #   second byte range: 0xa1 -- 0xfe
 -        # no validation needed here. State machine has done that
 -        first_char = wrap_ord(aBuf[0])
 -        if first_char >= 0xB0:
 -            return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1
 -        else:
 -            return -1
 -
 -
 -class GB2312DistributionAnalysis(CharDistributionAnalysis):
 -    def __init__(self):
 -        CharDistributionAnalysis.__init__(self)
 -        self._mCharToFreqOrder = GB2312CharToFreqOrder
 -        self._mTableSize = GB2312_TABLE_SIZE
 -        self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO
 -
 -    def get_order(self, aBuf):
 -        # for GB2312 encoding, we are interested
 -        #  first  byte range: 0xb0 -- 0xfe
 -        #  second byte range: 0xa1 -- 0xfe
 -        # no validation needed here. State machine has done that
 -        first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
 -        if (first_char >= 0xB0) and (second_char >= 0xA1):
 -            return 94 * (first_char - 0xB0) + second_char - 0xA1
 -        else:
 -            return -1
 -
 -
 -class Big5DistributionAnalysis(CharDistributionAnalysis):
 -    def __init__(self):
 -        CharDistributionAnalysis.__init__(self)
 -        self._mCharToFreqOrder = Big5CharToFreqOrder
 -        self._mTableSize = BIG5_TABLE_SIZE
 -        self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO
 -
 -    def get_order(self, aBuf):
 -        # for big5 encoding, we are interested
 -        #   first  byte range: 0xa4 -- 0xfe
 -        #   second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
 -        # no validation needed here. State machine has done that
 -        first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
 -        if first_char >= 0xA4:
 -            if second_char >= 0xA1:
 -                return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
 -            else:
 -                return 157 * (first_char - 0xA4) + second_char - 0x40
 -        else:
 -            return -1
 -
 -
 -class SJISDistributionAnalysis(CharDistributionAnalysis):
 -    def __init__(self):
 -        CharDistributionAnalysis.__init__(self)
 -        self._mCharToFreqOrder = JISCharToFreqOrder
 -        self._mTableSize = JIS_TABLE_SIZE
 -        self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
 -
 -    def get_order(self, aBuf):
 -        # for sjis encoding, we are interested
 -        #   first  byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
 -        #   second byte range: 0x40 -- 0x7e,  0x81 -- oxfe
 -        # no validation needed here. State machine has done that
 -        first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
 -        if (first_char >= 0x81) and (first_char <= 0x9F):
 -            order = 188 * (first_char - 0x81)
 -        elif (first_char >= 0xE0) and (first_char <= 0xEF):
 -            order = 188 * (first_char - 0xE0 + 31)
 -        else:
 -            return -1
 -        order = order + second_char - 0x40
 -        if second_char > 0x7F:
 -            order = -1
 -        return order
 -
 -
 -class EUCJPDistributionAnalysis(CharDistributionAnalysis):
 -    def __init__(self):
 -        CharDistributionAnalysis.__init__(self)
 -        self._mCharToFreqOrder = JISCharToFreqOrder
 -        self._mTableSize = JIS_TABLE_SIZE
 -        self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
 -
 -    def get_order(self, aBuf):
 -        # for euc-JP encoding, we are interested
 -        #   first  byte range: 0xa0 -- 0xfe
 -        #   second byte range: 0xa1 -- 0xfe
 -        # no validation needed here. State machine has done that
 -        char = wrap_ord(aBuf[0])
 -        if char >= 0xA0:
 -            return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1
 -        else:
 -            return -1
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE, +                        EUCTW_TYPICAL_DISTRIBUTION_RATIO) +from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE, +                        EUCKR_TYPICAL_DISTRIBUTION_RATIO) +from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE, +                         GB2312_TYPICAL_DISTRIBUTION_RATIO) +from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE, +                       BIG5_TYPICAL_DISTRIBUTION_RATIO) +from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE, +                      JIS_TYPICAL_DISTRIBUTION_RATIO) +from .compat import wrap_ord + +ENOUGH_DATA_THRESHOLD = 1024 +SURE_YES = 0.99 +SURE_NO = 0.01 +MINIMUM_DATA_THRESHOLD = 3 + + +class CharDistributionAnalysis: +    def __init__(self): +        # Mapping table to get frequency order from char order (get from +        # GetOrder()) +        self._mCharToFreqOrder = None +        self._mTableSize = None  # Size of above table +        # This is a constant value which varies from language to language, +        # used in calculating confidence.  See +        # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html +        # for further detail. +        self._mTypicalDistributionRatio = None +        self.reset() + +    def reset(self): +        """reset analyser, clear any state""" +        # If this flag is set to True, detection is done and conclusion has +        # been made +        self._mDone = False +        self._mTotalChars = 0  # Total characters encountered +        # The number of characters whose frequency order is less than 512 +        self._mFreqChars = 0 + +    def feed(self, aBuf, aCharLen): +        """feed a character with known length""" +        if aCharLen == 2: +            # we only care about 2-bytes character in our distribution analysis +            order = self.get_order(aBuf) +        else: +            order = -1 +        if order >= 0: +            self._mTotalChars += 1 +            # order is valid +            if order < self._mTableSize: +                if 512 > self._mCharToFreqOrder[order]: +                    self._mFreqChars += 1 + +    def get_confidence(self): +        """return confidence based on existing data""" +        # if we didn't receive any character in our consideration range, +        # return negative answer +        if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD: +            return SURE_NO + +        if self._mTotalChars != self._mFreqChars: +            r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars) +                 * self._mTypicalDistributionRatio)) +            if r < SURE_YES: +                return r + +        # normalize confidence (we don't want to be 100% sure) +        return SURE_YES + +    def got_enough_data(self): +        # It is not necessary to receive all data to draw conclusion. +        # For charset detection, certain amount of data is enough +        return self._mTotalChars > ENOUGH_DATA_THRESHOLD + +    def get_order(self, aBuf): +        # We do not handle characters based on the original encoding string, +        # but convert this encoding string to a number, here called order. +        # This allows multiple encodings of a language to share one frequency +        # table. +        return -1 + + +class EUCTWDistributionAnalysis(CharDistributionAnalysis): +    def __init__(self): +        CharDistributionAnalysis.__init__(self) +        self._mCharToFreqOrder = EUCTWCharToFreqOrder +        self._mTableSize = EUCTW_TABLE_SIZE +        self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO + +    def get_order(self, aBuf): +        # for euc-TW encoding, we are interested +        #   first  byte range: 0xc4 -- 0xfe +        #   second byte range: 0xa1 -- 0xfe +        # no validation needed here. State machine has done that +        first_char = wrap_ord(aBuf[0]) +        if first_char >= 0xC4: +            return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1 +        else: +            return -1 + + +class EUCKRDistributionAnalysis(CharDistributionAnalysis): +    def __init__(self): +        CharDistributionAnalysis.__init__(self) +        self._mCharToFreqOrder = EUCKRCharToFreqOrder +        self._mTableSize = EUCKR_TABLE_SIZE +        self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO + +    def get_order(self, aBuf): +        # for euc-KR encoding, we are interested +        #   first  byte range: 0xb0 -- 0xfe +        #   second byte range: 0xa1 -- 0xfe +        # no validation needed here. State machine has done that +        first_char = wrap_ord(aBuf[0]) +        if first_char >= 0xB0: +            return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1 +        else: +            return -1 + + +class GB2312DistributionAnalysis(CharDistributionAnalysis): +    def __init__(self): +        CharDistributionAnalysis.__init__(self) +        self._mCharToFreqOrder = GB2312CharToFreqOrder +        self._mTableSize = GB2312_TABLE_SIZE +        self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO + +    def get_order(self, aBuf): +        # for GB2312 encoding, we are interested +        #  first  byte range: 0xb0 -- 0xfe +        #  second byte range: 0xa1 -- 0xfe +        # no validation needed here. State machine has done that +        first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) +        if (first_char >= 0xB0) and (second_char >= 0xA1): +            return 94 * (first_char - 0xB0) + second_char - 0xA1 +        else: +            return -1 + + +class Big5DistributionAnalysis(CharDistributionAnalysis): +    def __init__(self): +        CharDistributionAnalysis.__init__(self) +        self._mCharToFreqOrder = Big5CharToFreqOrder +        self._mTableSize = BIG5_TABLE_SIZE +        self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO + +    def get_order(self, aBuf): +        # for big5 encoding, we are interested +        #   first  byte range: 0xa4 -- 0xfe +        #   second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe +        # no validation needed here. State machine has done that +        first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) +        if first_char >= 0xA4: +            if second_char >= 0xA1: +                return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63 +            else: +                return 157 * (first_char - 0xA4) + second_char - 0x40 +        else: +            return -1 + + +class SJISDistributionAnalysis(CharDistributionAnalysis): +    def __init__(self): +        CharDistributionAnalysis.__init__(self) +        self._mCharToFreqOrder = JISCharToFreqOrder +        self._mTableSize = JIS_TABLE_SIZE +        self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO + +    def get_order(self, aBuf): +        # for sjis encoding, we are interested +        #   first  byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe +        #   second byte range: 0x40 -- 0x7e,  0x81 -- oxfe +        # no validation needed here. State machine has done that +        first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) +        if (first_char >= 0x81) and (first_char <= 0x9F): +            order = 188 * (first_char - 0x81) +        elif (first_char >= 0xE0) and (first_char <= 0xEF): +            order = 188 * (first_char - 0xE0 + 31) +        else: +            return -1 +        order = order + second_char - 0x40 +        if second_char > 0x7F: +            order = -1 +        return order + + +class EUCJPDistributionAnalysis(CharDistributionAnalysis): +    def __init__(self): +        CharDistributionAnalysis.__init__(self) +        self._mCharToFreqOrder = JISCharToFreqOrder +        self._mTableSize = JIS_TABLE_SIZE +        self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO + +    def get_order(self, aBuf): +        # for euc-JP encoding, we are interested +        #   first  byte range: 0xa0 -- 0xfe +        #   second byte range: 0xa1 -- 0xfe +        # no validation needed here. State machine has done that +        char = wrap_ord(aBuf[0]) +        if char >= 0xA0: +            return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1 +        else: +            return -1 diff --git a/requests/packages/charade/charsetgroupprober.py b/requests/packages/chardet/charsetgroupprober.py index 2959654..85e7a1c 100644 --- a/requests/packages/charade/charsetgroupprober.py +++ b/requests/packages/chardet/charsetgroupprober.py @@ -1,106 +1,106 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is Mozilla Communicator client code.
 -# 
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -# 
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -# 
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -# 
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -from . import constants
 -import sys
 -from .charsetprober import CharSetProber
 -
 -
 -class CharSetGroupProber(CharSetProber):
 -    def __init__(self):
 -        CharSetProber.__init__(self)
 -        self._mActiveNum = 0
 -        self._mProbers = []
 -        self._mBestGuessProber = None
 -
 -    def reset(self):
 -        CharSetProber.reset(self)
 -        self._mActiveNum = 0
 -        for prober in self._mProbers:
 -            if prober:
 -                prober.reset()
 -                prober.active = True
 -                self._mActiveNum += 1
 -        self._mBestGuessProber = None
 -
 -    def get_charset_name(self):
 -        if not self._mBestGuessProber:
 -            self.get_confidence()
 -            if not self._mBestGuessProber:
 -                return None
 -#                self._mBestGuessProber = self._mProbers[0]
 -        return self._mBestGuessProber.get_charset_name()
 -
 -    def feed(self, aBuf):
 -        for prober in self._mProbers:
 -            if not prober:
 -                continue
 -            if not prober.active:
 -                continue
 -            st = prober.feed(aBuf)
 -            if not st:
 -                continue
 -            if st == constants.eFoundIt:
 -                self._mBestGuessProber = prober
 -                return self.get_state()
 -            elif st == constants.eNotMe:
 -                prober.active = False
 -                self._mActiveNum -= 1
 -                if self._mActiveNum <= 0:
 -                    self._mState = constants.eNotMe
 -                    return self.get_state()
 -        return self.get_state()
 -
 -    def get_confidence(self):
 -        st = self.get_state()
 -        if st == constants.eFoundIt:
 -            return 0.99
 -        elif st == constants.eNotMe:
 -            return 0.01
 -        bestConf = 0.0
 -        self._mBestGuessProber = None
 -        for prober in self._mProbers:
 -            if not prober:
 -                continue
 -            if not prober.active:
 -                if constants._debug:
 -                    sys.stderr.write(prober.get_charset_name()
 -                                     + ' not active\n')
 -                continue
 -            cf = prober.get_confidence()
 -            if constants._debug:
 -                sys.stderr.write('%s confidence = %s\n' %
 -                                 (prober.get_charset_name(), cf))
 -            if bestConf < cf:
 -                bestConf = cf
 -                self._mBestGuessProber = prober
 -        if not self._mBestGuessProber:
 -            return 0.0
 -        return bestConf
 -#        else:
 -#            self._mBestGuessProber = self._mProbers[0]
 -#            return self._mBestGuessProber.get_confidence()
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +#  +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +#  +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +#  +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +#  +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +from . import constants +import sys +from .charsetprober import CharSetProber + + +class CharSetGroupProber(CharSetProber): +    def __init__(self): +        CharSetProber.__init__(self) +        self._mActiveNum = 0 +        self._mProbers = [] +        self._mBestGuessProber = None + +    def reset(self): +        CharSetProber.reset(self) +        self._mActiveNum = 0 +        for prober in self._mProbers: +            if prober: +                prober.reset() +                prober.active = True +                self._mActiveNum += 1 +        self._mBestGuessProber = None + +    def get_charset_name(self): +        if not self._mBestGuessProber: +            self.get_confidence() +            if not self._mBestGuessProber: +                return None +#                self._mBestGuessProber = self._mProbers[0] +        return self._mBestGuessProber.get_charset_name() + +    def feed(self, aBuf): +        for prober in self._mProbers: +            if not prober: +                continue +            if not prober.active: +                continue +            st = prober.feed(aBuf) +            if not st: +                continue +            if st == constants.eFoundIt: +                self._mBestGuessProber = prober +                return self.get_state() +            elif st == constants.eNotMe: +                prober.active = False +                self._mActiveNum -= 1 +                if self._mActiveNum <= 0: +                    self._mState = constants.eNotMe +                    return self.get_state() +        return self.get_state() + +    def get_confidence(self): +        st = self.get_state() +        if st == constants.eFoundIt: +            return 0.99 +        elif st == constants.eNotMe: +            return 0.01 +        bestConf = 0.0 +        self._mBestGuessProber = None +        for prober in self._mProbers: +            if not prober: +                continue +            if not prober.active: +                if constants._debug: +                    sys.stderr.write(prober.get_charset_name() +                                     + ' not active\n') +                continue +            cf = prober.get_confidence() +            if constants._debug: +                sys.stderr.write('%s confidence = %s\n' % +                                 (prober.get_charset_name(), cf)) +            if bestConf < cf: +                bestConf = cf +                self._mBestGuessProber = prober +        if not self._mBestGuessProber: +            return 0.0 +        return bestConf +#        else: +#            self._mBestGuessProber = self._mProbers[0] +#            return self._mBestGuessProber.get_confidence() diff --git a/requests/packages/charade/charsetprober.py b/requests/packages/chardet/charsetprober.py index 9758171..9758171 100644 --- a/requests/packages/charade/charsetprober.py +++ b/requests/packages/chardet/charsetprober.py diff --git a/requests/packages/charade/codingstatemachine.py b/requests/packages/chardet/codingstatemachine.py index 1bda9ff..8dd8c91 100644 --- a/requests/packages/charade/codingstatemachine.py +++ b/requests/packages/chardet/codingstatemachine.py @@ -1,61 +1,61 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is mozilla.org code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -from .constants import eStart
 -from .compat import wrap_ord
 -
 -
 -class CodingStateMachine:
 -    def __init__(self, sm):
 -        self._mModel = sm
 -        self._mCurrentBytePos = 0
 -        self._mCurrentCharLen = 0
 -        self.reset()
 -
 -    def reset(self):
 -        self._mCurrentState = eStart
 -
 -    def next_state(self, c):
 -        # for each byte we get its class
 -        # if it is first byte, we also get byte length
 -        # PY3K: aBuf is a byte stream, so c is an int, not a byte
 -        byteCls = self._mModel['classTable'][wrap_ord(c)]
 -        if self._mCurrentState == eStart:
 -            self._mCurrentBytePos = 0
 -            self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]
 -        # from byte's class and stateTable, we get its next state
 -        curr_state = (self._mCurrentState * self._mModel['classFactor']
 -                      + byteCls)
 -        self._mCurrentState = self._mModel['stateTable'][curr_state]
 -        self._mCurrentBytePos += 1
 -        return self._mCurrentState
 -
 -    def get_current_charlen(self):
 -        return self._mCurrentCharLen
 -
 -    def get_coding_state_machine(self):
 -        return self._mModel['name']
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +from .constants import eStart +from .compat import wrap_ord + + +class CodingStateMachine: +    def __init__(self, sm): +        self._mModel = sm +        self._mCurrentBytePos = 0 +        self._mCurrentCharLen = 0 +        self.reset() + +    def reset(self): +        self._mCurrentState = eStart + +    def next_state(self, c): +        # for each byte we get its class +        # if it is first byte, we also get byte length +        # PY3K: aBuf is a byte stream, so c is an int, not a byte +        byteCls = self._mModel['classTable'][wrap_ord(c)] +        if self._mCurrentState == eStart: +            self._mCurrentBytePos = 0 +            self._mCurrentCharLen = self._mModel['charLenTable'][byteCls] +        # from byte's class and stateTable, we get its next state +        curr_state = (self._mCurrentState * self._mModel['classFactor'] +                      + byteCls) +        self._mCurrentState = self._mModel['stateTable'][curr_state] +        self._mCurrentBytePos += 1 +        return self._mCurrentState + +    def get_current_charlen(self): +        return self._mCurrentCharLen + +    def get_coding_state_machine(self): +        return self._mModel['name'] diff --git a/requests/packages/charade/compat.py b/requests/packages/chardet/compat.py index d9e30ad..d9e30ad 100644 --- a/requests/packages/charade/compat.py +++ b/requests/packages/chardet/compat.py diff --git a/requests/packages/charade/constants.py b/requests/packages/chardet/constants.py index a3d27de..e4d148b 100644 --- a/requests/packages/charade/constants.py +++ b/requests/packages/chardet/constants.py @@ -1,39 +1,39 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is Mozilla Universal charset detector code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 2001
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#   Shy Shalom - original C code
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -# 
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -# 
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -_debug = 0
 -
 -eDetecting = 0
 -eFoundIt = 1
 -eNotMe = 2
 -
 -eStart = 0
 -eError = 1
 -eItsMe = 2
 -
 -SHORTCUT_THRESHOLD = 0.95
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +#   Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +#  +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +#  +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +_debug = 0 + +eDetecting = 0 +eFoundIt = 1 +eNotMe = 2 + +eStart = 0 +eError = 1 +eItsMe = 2 + +SHORTCUT_THRESHOLD = 0.95 diff --git a/requests/packages/charade/cp949prober.py b/requests/packages/chardet/cp949prober.py index 543501f..ff4272f 100644 --- a/requests/packages/charade/cp949prober.py +++ b/requests/packages/chardet/cp949prober.py @@ -1,44 +1,44 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is mozilla.org code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -from .mbcharsetprober import MultiByteCharSetProber
 -from .codingstatemachine import CodingStateMachine
 -from .chardistribution import EUCKRDistributionAnalysis
 -from .mbcssm import CP949SMModel
 -
 -
 -class CP949Prober(MultiByteCharSetProber):
 -    def __init__(self):
 -        MultiByteCharSetProber.__init__(self)
 -        self._mCodingSM = CodingStateMachine(CP949SMModel)
 -        # NOTE: CP949 is a superset of EUC-KR, so the distribution should be
 -        #       not different.
 -        self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
 -        self.reset()
 -
 -    def get_charset_name(self):
 -        return "CP949"
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import EUCKRDistributionAnalysis +from .mbcssm import CP949SMModel + + +class CP949Prober(MultiByteCharSetProber): +    def __init__(self): +        MultiByteCharSetProber.__init__(self) +        self._mCodingSM = CodingStateMachine(CP949SMModel) +        # NOTE: CP949 is a superset of EUC-KR, so the distribution should be +        #       not different. +        self._mDistributionAnalyzer = EUCKRDistributionAnalysis() +        self.reset() + +    def get_charset_name(self): +        return "CP949" diff --git a/requests/packages/charade/escprober.py b/requests/packages/chardet/escprober.py index 0063935..80a844f 100644 --- a/requests/packages/charade/escprober.py +++ b/requests/packages/chardet/escprober.py @@ -1,86 +1,86 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is mozilla.org code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -from . import constants
 -from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,
 -                    ISO2022KRSMModel)
 -from .charsetprober import CharSetProber
 -from .codingstatemachine import CodingStateMachine
 -from .compat import wrap_ord
 -
 -
 -class EscCharSetProber(CharSetProber):
 -    def __init__(self):
 -        CharSetProber.__init__(self)
 -        self._mCodingSM = [
 -            CodingStateMachine(HZSMModel),
 -            CodingStateMachine(ISO2022CNSMModel),
 -            CodingStateMachine(ISO2022JPSMModel),
 -            CodingStateMachine(ISO2022KRSMModel)
 -        ]
 -        self.reset()
 -
 -    def reset(self):
 -        CharSetProber.reset(self)
 -        for codingSM in self._mCodingSM:
 -            if not codingSM:
 -                continue
 -            codingSM.active = True
 -            codingSM.reset()
 -        self._mActiveSM = len(self._mCodingSM)
 -        self._mDetectedCharset = None
 -
 -    def get_charset_name(self):
 -        return self._mDetectedCharset
 -
 -    def get_confidence(self):
 -        if self._mDetectedCharset:
 -            return 0.99
 -        else:
 -            return 0.00
 -
 -    def feed(self, aBuf):
 -        for c in aBuf:
 -            # PY3K: aBuf is a byte array, so c is an int, not a byte
 -            for codingSM in self._mCodingSM:
 -                if not codingSM:
 -                    continue
 -                if not codingSM.active:
 -                    continue
 -                codingState = codingSM.next_state(wrap_ord(c))
 -                if codingState == constants.eError:
 -                    codingSM.active = False
 -                    self._mActiveSM -= 1
 -                    if self._mActiveSM <= 0:
 -                        self._mState = constants.eNotMe
 -                        return self.get_state()
 -                elif codingState == constants.eItsMe:
 -                    self._mState = constants.eFoundIt
 -                    self._mDetectedCharset = codingSM.get_coding_state_machine()  # nopep8
 -                    return self.get_state()
 -
 -        return self.get_state()
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +from . import constants +from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, +                    ISO2022KRSMModel) +from .charsetprober import CharSetProber +from .codingstatemachine import CodingStateMachine +from .compat import wrap_ord + + +class EscCharSetProber(CharSetProber): +    def __init__(self): +        CharSetProber.__init__(self) +        self._mCodingSM = [ +            CodingStateMachine(HZSMModel), +            CodingStateMachine(ISO2022CNSMModel), +            CodingStateMachine(ISO2022JPSMModel), +            CodingStateMachine(ISO2022KRSMModel) +        ] +        self.reset() + +    def reset(self): +        CharSetProber.reset(self) +        for codingSM in self._mCodingSM: +            if not codingSM: +                continue +            codingSM.active = True +            codingSM.reset() +        self._mActiveSM = len(self._mCodingSM) +        self._mDetectedCharset = None + +    def get_charset_name(self): +        return self._mDetectedCharset + +    def get_confidence(self): +        if self._mDetectedCharset: +            return 0.99 +        else: +            return 0.00 + +    def feed(self, aBuf): +        for c in aBuf: +            # PY3K: aBuf is a byte array, so c is an int, not a byte +            for codingSM in self._mCodingSM: +                if not codingSM: +                    continue +                if not codingSM.active: +                    continue +                codingState = codingSM.next_state(wrap_ord(c)) +                if codingState == constants.eError: +                    codingSM.active = False +                    self._mActiveSM -= 1 +                    if self._mActiveSM <= 0: +                        self._mState = constants.eNotMe +                        return self.get_state() +                elif codingState == constants.eItsMe: +                    self._mState = constants.eFoundIt +                    self._mDetectedCharset = codingSM.get_coding_state_machine()  # nopep8 +                    return self.get_state() + +        return self.get_state() diff --git a/requests/packages/charade/escsm.py b/requests/packages/chardet/escsm.py index 1cf3aa6..bd302b4 100644 --- a/requests/packages/charade/escsm.py +++ b/requests/packages/chardet/escsm.py @@ -1,242 +1,242 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is mozilla.org code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -from .constants import eStart, eError, eItsMe
 -
 -HZ_cls = (
 -1,0,0,0,0,0,0,0,  # 00 - 07
 -0,0,0,0,0,0,0,0,  # 08 - 0f
 -0,0,0,0,0,0,0,0,  # 10 - 17
 -0,0,0,1,0,0,0,0,  # 18 - 1f
 -0,0,0,0,0,0,0,0,  # 20 - 27
 -0,0,0,0,0,0,0,0,  # 28 - 2f
 -0,0,0,0,0,0,0,0,  # 30 - 37
 -0,0,0,0,0,0,0,0,  # 38 - 3f
 -0,0,0,0,0,0,0,0,  # 40 - 47
 -0,0,0,0,0,0,0,0,  # 48 - 4f
 -0,0,0,0,0,0,0,0,  # 50 - 57
 -0,0,0,0,0,0,0,0,  # 58 - 5f
 -0,0,0,0,0,0,0,0,  # 60 - 67
 -0,0,0,0,0,0,0,0,  # 68 - 6f
 -0,0,0,0,0,0,0,0,  # 70 - 77
 -0,0,0,4,0,5,2,0,  # 78 - 7f
 -1,1,1,1,1,1,1,1,  # 80 - 87
 -1,1,1,1,1,1,1,1,  # 88 - 8f
 -1,1,1,1,1,1,1,1,  # 90 - 97
 -1,1,1,1,1,1,1,1,  # 98 - 9f
 -1,1,1,1,1,1,1,1,  # a0 - a7
 -1,1,1,1,1,1,1,1,  # a8 - af
 -1,1,1,1,1,1,1,1,  # b0 - b7
 -1,1,1,1,1,1,1,1,  # b8 - bf
 -1,1,1,1,1,1,1,1,  # c0 - c7
 -1,1,1,1,1,1,1,1,  # c8 - cf
 -1,1,1,1,1,1,1,1,  # d0 - d7
 -1,1,1,1,1,1,1,1,  # d8 - df
 -1,1,1,1,1,1,1,1,  # e0 - e7
 -1,1,1,1,1,1,1,1,  # e8 - ef
 -1,1,1,1,1,1,1,1,  # f0 - f7
 -1,1,1,1,1,1,1,1,  # f8 - ff
 -)
 -
 -HZ_st = (
 -eStart,eError,     3,eStart,eStart,eStart,eError,eError,# 00-07
 -eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
 -eItsMe,eItsMe,eError,eError,eStart,eStart,     4,eError,# 10-17
 -     5,eError,     6,eError,     5,     5,     4,eError,# 18-1f
 -     4,eError,     4,     4,     4,eError,     4,eError,# 20-27
 -     4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f
 -)
 -
 -HZCharLenTable = (0, 0, 0, 0, 0, 0)
 -
 -HZSMModel = {'classTable': HZ_cls,
 -             'classFactor': 6,
 -             'stateTable': HZ_st,
 -             'charLenTable': HZCharLenTable,
 -             'name': "HZ-GB-2312"}
 -
 -ISO2022CN_cls = (
 -2,0,0,0,0,0,0,0,  # 00 - 07
 -0,0,0,0,0,0,0,0,  # 08 - 0f
 -0,0,0,0,0,0,0,0,  # 10 - 17
 -0,0,0,1,0,0,0,0,  # 18 - 1f
 -0,0,0,0,0,0,0,0,  # 20 - 27
 -0,3,0,0,0,0,0,0,  # 28 - 2f
 -0,0,0,0,0,0,0,0,  # 30 - 37
 -0,0,0,0,0,0,0,0,  # 38 - 3f
 -0,0,0,4,0,0,0,0,  # 40 - 47
 -0,0,0,0,0,0,0,0,  # 48 - 4f
 -0,0,0,0,0,0,0,0,  # 50 - 57
 -0,0,0,0,0,0,0,0,  # 58 - 5f
 -0,0,0,0,0,0,0,0,  # 60 - 67
 -0,0,0,0,0,0,0,0,  # 68 - 6f
 -0,0,0,0,0,0,0,0,  # 70 - 77
 -0,0,0,0,0,0,0,0,  # 78 - 7f
 -2,2,2,2,2,2,2,2,  # 80 - 87
 -2,2,2,2,2,2,2,2,  # 88 - 8f
 -2,2,2,2,2,2,2,2,  # 90 - 97
 -2,2,2,2,2,2,2,2,  # 98 - 9f
 -2,2,2,2,2,2,2,2,  # a0 - a7
 -2,2,2,2,2,2,2,2,  # a8 - af
 -2,2,2,2,2,2,2,2,  # b0 - b7
 -2,2,2,2,2,2,2,2,  # b8 - bf
 -2,2,2,2,2,2,2,2,  # c0 - c7
 -2,2,2,2,2,2,2,2,  # c8 - cf
 -2,2,2,2,2,2,2,2,  # d0 - d7
 -2,2,2,2,2,2,2,2,  # d8 - df
 -2,2,2,2,2,2,2,2,  # e0 - e7
 -2,2,2,2,2,2,2,2,  # e8 - ef
 -2,2,2,2,2,2,2,2,  # f0 - f7
 -2,2,2,2,2,2,2,2,  # f8 - ff
 -)
 -
 -ISO2022CN_st = (
 -eStart,     3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
 -eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f
 -eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
 -eItsMe,eItsMe,eItsMe,eError,eError,eError,     4,eError,# 18-1f
 -eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27
 -     5,     6,eError,eError,eError,eError,eError,eError,# 28-2f
 -eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37
 -eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f
 -)
 -
 -ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0)
 -
 -ISO2022CNSMModel = {'classTable': ISO2022CN_cls,
 -                    'classFactor': 9,
 -                    'stateTable': ISO2022CN_st,
 -                    'charLenTable': ISO2022CNCharLenTable,
 -                    'name': "ISO-2022-CN"}
 -
 -ISO2022JP_cls = (
 -2,0,0,0,0,0,0,0,  # 00 - 07
 -0,0,0,0,0,0,2,2,  # 08 - 0f
 -0,0,0,0,0,0,0,0,  # 10 - 17
 -0,0,0,1,0,0,0,0,  # 18 - 1f
 -0,0,0,0,7,0,0,0,  # 20 - 27
 -3,0,0,0,0,0,0,0,  # 28 - 2f
 -0,0,0,0,0,0,0,0,  # 30 - 37
 -0,0,0,0,0,0,0,0,  # 38 - 3f
 -6,0,4,0,8,0,0,0,  # 40 - 47
 -0,9,5,0,0,0,0,0,  # 48 - 4f
 -0,0,0,0,0,0,0,0,  # 50 - 57
 -0,0,0,0,0,0,0,0,  # 58 - 5f
 -0,0,0,0,0,0,0,0,  # 60 - 67
 -0,0,0,0,0,0,0,0,  # 68 - 6f
 -0,0,0,0,0,0,0,0,  # 70 - 77
 -0,0,0,0,0,0,0,0,  # 78 - 7f
 -2,2,2,2,2,2,2,2,  # 80 - 87
 -2,2,2,2,2,2,2,2,  # 88 - 8f
 -2,2,2,2,2,2,2,2,  # 90 - 97
 -2,2,2,2,2,2,2,2,  # 98 - 9f
 -2,2,2,2,2,2,2,2,  # a0 - a7
 -2,2,2,2,2,2,2,2,  # a8 - af
 -2,2,2,2,2,2,2,2,  # b0 - b7
 -2,2,2,2,2,2,2,2,  # b8 - bf
 -2,2,2,2,2,2,2,2,  # c0 - c7
 -2,2,2,2,2,2,2,2,  # c8 - cf
 -2,2,2,2,2,2,2,2,  # d0 - d7
 -2,2,2,2,2,2,2,2,  # d8 - df
 -2,2,2,2,2,2,2,2,  # e0 - e7
 -2,2,2,2,2,2,2,2,  # e8 - ef
 -2,2,2,2,2,2,2,2,  # f0 - f7
 -2,2,2,2,2,2,2,2,  # f8 - ff
 -)
 -
 -ISO2022JP_st = (
 -eStart,     3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
 -eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f
 -eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
 -eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f
 -eError,     5,eError,eError,eError,     4,eError,eError,# 20-27
 -eError,eError,eError,     6,eItsMe,eError,eItsMe,eError,# 28-2f
 -eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37
 -eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f
 -eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47
 -)
 -
 -ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
 -
 -ISO2022JPSMModel = {'classTable': ISO2022JP_cls,
 -                    'classFactor': 10,
 -                    'stateTable': ISO2022JP_st,
 -                    'charLenTable': ISO2022JPCharLenTable,
 -                    'name': "ISO-2022-JP"}
 -
 -ISO2022KR_cls = (
 -2,0,0,0,0,0,0,0,  # 00 - 07
 -0,0,0,0,0,0,0,0,  # 08 - 0f
 -0,0,0,0,0,0,0,0,  # 10 - 17
 -0,0,0,1,0,0,0,0,  # 18 - 1f
 -0,0,0,0,3,0,0,0,  # 20 - 27
 -0,4,0,0,0,0,0,0,  # 28 - 2f
 -0,0,0,0,0,0,0,0,  # 30 - 37
 -0,0,0,0,0,0,0,0,  # 38 - 3f
 -0,0,0,5,0,0,0,0,  # 40 - 47
 -0,0,0,0,0,0,0,0,  # 48 - 4f
 -0,0,0,0,0,0,0,0,  # 50 - 57
 -0,0,0,0,0,0,0,0,  # 58 - 5f
 -0,0,0,0,0,0,0,0,  # 60 - 67
 -0,0,0,0,0,0,0,0,  # 68 - 6f
 -0,0,0,0,0,0,0,0,  # 70 - 77
 -0,0,0,0,0,0,0,0,  # 78 - 7f
 -2,2,2,2,2,2,2,2,  # 80 - 87
 -2,2,2,2,2,2,2,2,  # 88 - 8f
 -2,2,2,2,2,2,2,2,  # 90 - 97
 -2,2,2,2,2,2,2,2,  # 98 - 9f
 -2,2,2,2,2,2,2,2,  # a0 - a7
 -2,2,2,2,2,2,2,2,  # a8 - af
 -2,2,2,2,2,2,2,2,  # b0 - b7
 -2,2,2,2,2,2,2,2,  # b8 - bf
 -2,2,2,2,2,2,2,2,  # c0 - c7
 -2,2,2,2,2,2,2,2,  # c8 - cf
 -2,2,2,2,2,2,2,2,  # d0 - d7
 -2,2,2,2,2,2,2,2,  # d8 - df
 -2,2,2,2,2,2,2,2,  # e0 - e7
 -2,2,2,2,2,2,2,2,  # e8 - ef
 -2,2,2,2,2,2,2,2,  # f0 - f7
 -2,2,2,2,2,2,2,2,  # f8 - ff
 -)
 -
 -ISO2022KR_st = (
 -eStart,     3,eError,eStart,eStart,eStart,eError,eError,# 00-07
 -eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
 -eItsMe,eItsMe,eError,eError,eError,     4,eError,eError,# 10-17
 -eError,eError,eError,eError,     5,eError,eError,eError,# 18-1f
 -eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27
 -)
 -
 -ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0)
 -
 -ISO2022KRSMModel = {'classTable': ISO2022KR_cls,
 -                    'classFactor': 6,
 -                    'stateTable': ISO2022KR_st,
 -                    'charLenTable': ISO2022KRCharLenTable,
 -                    'name': "ISO-2022-KR"}
 -
 -# flake8: noqa
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +from .constants import eStart, eError, eItsMe + +HZ_cls = ( +1,0,0,0,0,0,0,0,  # 00 - 07 +0,0,0,0,0,0,0,0,  # 08 - 0f +0,0,0,0,0,0,0,0,  # 10 - 17 +0,0,0,1,0,0,0,0,  # 18 - 1f +0,0,0,0,0,0,0,0,  # 20 - 27 +0,0,0,0,0,0,0,0,  # 28 - 2f +0,0,0,0,0,0,0,0,  # 30 - 37 +0,0,0,0,0,0,0,0,  # 38 - 3f +0,0,0,0,0,0,0,0,  # 40 - 47 +0,0,0,0,0,0,0,0,  # 48 - 4f +0,0,0,0,0,0,0,0,  # 50 - 57 +0,0,0,0,0,0,0,0,  # 58 - 5f +0,0,0,0,0,0,0,0,  # 60 - 67 +0,0,0,0,0,0,0,0,  # 68 - 6f +0,0,0,0,0,0,0,0,  # 70 - 77 +0,0,0,4,0,5,2,0,  # 78 - 7f +1,1,1,1,1,1,1,1,  # 80 - 87 +1,1,1,1,1,1,1,1,  # 88 - 8f +1,1,1,1,1,1,1,1,  # 90 - 97 +1,1,1,1,1,1,1,1,  # 98 - 9f +1,1,1,1,1,1,1,1,  # a0 - a7 +1,1,1,1,1,1,1,1,  # a8 - af +1,1,1,1,1,1,1,1,  # b0 - b7 +1,1,1,1,1,1,1,1,  # b8 - bf +1,1,1,1,1,1,1,1,  # c0 - c7 +1,1,1,1,1,1,1,1,  # c8 - cf +1,1,1,1,1,1,1,1,  # d0 - d7 +1,1,1,1,1,1,1,1,  # d8 - df +1,1,1,1,1,1,1,1,  # e0 - e7 +1,1,1,1,1,1,1,1,  # e8 - ef +1,1,1,1,1,1,1,1,  # f0 - f7 +1,1,1,1,1,1,1,1,  # f8 - ff +) + +HZ_st = ( +eStart,eError,     3,eStart,eStart,eStart,eError,eError,# 00-07 +eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f +eItsMe,eItsMe,eError,eError,eStart,eStart,     4,eError,# 10-17 +     5,eError,     6,eError,     5,     5,     4,eError,# 18-1f +     4,eError,     4,     4,     4,eError,     4,eError,# 20-27 +     4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f +) + +HZCharLenTable = (0, 0, 0, 0, 0, 0) + +HZSMModel = {'classTable': HZ_cls, +             'classFactor': 6, +             'stateTable': HZ_st, +             'charLenTable': HZCharLenTable, +             'name': "HZ-GB-2312"} + +ISO2022CN_cls = ( +2,0,0,0,0,0,0,0,  # 00 - 07 +0,0,0,0,0,0,0,0,  # 08 - 0f +0,0,0,0,0,0,0,0,  # 10 - 17 +0,0,0,1,0,0,0,0,  # 18 - 1f +0,0,0,0,0,0,0,0,  # 20 - 27 +0,3,0,0,0,0,0,0,  # 28 - 2f +0,0,0,0,0,0,0,0,  # 30 - 37 +0,0,0,0,0,0,0,0,  # 38 - 3f +0,0,0,4,0,0,0,0,  # 40 - 47 +0,0,0,0,0,0,0,0,  # 48 - 4f +0,0,0,0,0,0,0,0,  # 50 - 57 +0,0,0,0,0,0,0,0,  # 58 - 5f +0,0,0,0,0,0,0,0,  # 60 - 67 +0,0,0,0,0,0,0,0,  # 68 - 6f +0,0,0,0,0,0,0,0,  # 70 - 77 +0,0,0,0,0,0,0,0,  # 78 - 7f +2,2,2,2,2,2,2,2,  # 80 - 87 +2,2,2,2,2,2,2,2,  # 88 - 8f +2,2,2,2,2,2,2,2,  # 90 - 97 +2,2,2,2,2,2,2,2,  # 98 - 9f +2,2,2,2,2,2,2,2,  # a0 - a7 +2,2,2,2,2,2,2,2,  # a8 - af +2,2,2,2,2,2,2,2,  # b0 - b7 +2,2,2,2,2,2,2,2,  # b8 - bf +2,2,2,2,2,2,2,2,  # c0 - c7 +2,2,2,2,2,2,2,2,  # c8 - cf +2,2,2,2,2,2,2,2,  # d0 - d7 +2,2,2,2,2,2,2,2,  # d8 - df +2,2,2,2,2,2,2,2,  # e0 - e7 +2,2,2,2,2,2,2,2,  # e8 - ef +2,2,2,2,2,2,2,2,  # f0 - f7 +2,2,2,2,2,2,2,2,  # f8 - ff +) + +ISO2022CN_st = ( +eStart,     3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 +eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f +eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 +eItsMe,eItsMe,eItsMe,eError,eError,eError,     4,eError,# 18-1f +eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27 +     5,     6,eError,eError,eError,eError,eError,eError,# 28-2f +eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37 +eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f +) + +ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0) + +ISO2022CNSMModel = {'classTable': ISO2022CN_cls, +                    'classFactor': 9, +                    'stateTable': ISO2022CN_st, +                    'charLenTable': ISO2022CNCharLenTable, +                    'name': "ISO-2022-CN"} + +ISO2022JP_cls = ( +2,0,0,0,0,0,0,0,  # 00 - 07 +0,0,0,0,0,0,2,2,  # 08 - 0f +0,0,0,0,0,0,0,0,  # 10 - 17 +0,0,0,1,0,0,0,0,  # 18 - 1f +0,0,0,0,7,0,0,0,  # 20 - 27 +3,0,0,0,0,0,0,0,  # 28 - 2f +0,0,0,0,0,0,0,0,  # 30 - 37 +0,0,0,0,0,0,0,0,  # 38 - 3f +6,0,4,0,8,0,0,0,  # 40 - 47 +0,9,5,0,0,0,0,0,  # 48 - 4f +0,0,0,0,0,0,0,0,  # 50 - 57 +0,0,0,0,0,0,0,0,  # 58 - 5f +0,0,0,0,0,0,0,0,  # 60 - 67 +0,0,0,0,0,0,0,0,  # 68 - 6f +0,0,0,0,0,0,0,0,  # 70 - 77 +0,0,0,0,0,0,0,0,  # 78 - 7f +2,2,2,2,2,2,2,2,  # 80 - 87 +2,2,2,2,2,2,2,2,  # 88 - 8f +2,2,2,2,2,2,2,2,  # 90 - 97 +2,2,2,2,2,2,2,2,  # 98 - 9f +2,2,2,2,2,2,2,2,  # a0 - a7 +2,2,2,2,2,2,2,2,  # a8 - af +2,2,2,2,2,2,2,2,  # b0 - b7 +2,2,2,2,2,2,2,2,  # b8 - bf +2,2,2,2,2,2,2,2,  # c0 - c7 +2,2,2,2,2,2,2,2,  # c8 - cf +2,2,2,2,2,2,2,2,  # d0 - d7 +2,2,2,2,2,2,2,2,  # d8 - df +2,2,2,2,2,2,2,2,  # e0 - e7 +2,2,2,2,2,2,2,2,  # e8 - ef +2,2,2,2,2,2,2,2,  # f0 - f7 +2,2,2,2,2,2,2,2,  # f8 - ff +) + +ISO2022JP_st = ( +eStart,     3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 +eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f +eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 +eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f +eError,     5,eError,eError,eError,     4,eError,eError,# 20-27 +eError,eError,eError,     6,eItsMe,eError,eItsMe,eError,# 28-2f +eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37 +eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f +eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47 +) + +ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + +ISO2022JPSMModel = {'classTable': ISO2022JP_cls, +                    'classFactor': 10, +                    'stateTable': ISO2022JP_st, +                    'charLenTable': ISO2022JPCharLenTable, +                    'name': "ISO-2022-JP"} + +ISO2022KR_cls = ( +2,0,0,0,0,0,0,0,  # 00 - 07 +0,0,0,0,0,0,0,0,  # 08 - 0f +0,0,0,0,0,0,0,0,  # 10 - 17 +0,0,0,1,0,0,0,0,  # 18 - 1f +0,0,0,0,3,0,0,0,  # 20 - 27 +0,4,0,0,0,0,0,0,  # 28 - 2f +0,0,0,0,0,0,0,0,  # 30 - 37 +0,0,0,0,0,0,0,0,  # 38 - 3f +0,0,0,5,0,0,0,0,  # 40 - 47 +0,0,0,0,0,0,0,0,  # 48 - 4f +0,0,0,0,0,0,0,0,  # 50 - 57 +0,0,0,0,0,0,0,0,  # 58 - 5f +0,0,0,0,0,0,0,0,  # 60 - 67 +0,0,0,0,0,0,0,0,  # 68 - 6f +0,0,0,0,0,0,0,0,  # 70 - 77 +0,0,0,0,0,0,0,0,  # 78 - 7f +2,2,2,2,2,2,2,2,  # 80 - 87 +2,2,2,2,2,2,2,2,  # 88 - 8f +2,2,2,2,2,2,2,2,  # 90 - 97 +2,2,2,2,2,2,2,2,  # 98 - 9f +2,2,2,2,2,2,2,2,  # a0 - a7 +2,2,2,2,2,2,2,2,  # a8 - af +2,2,2,2,2,2,2,2,  # b0 - b7 +2,2,2,2,2,2,2,2,  # b8 - bf +2,2,2,2,2,2,2,2,  # c0 - c7 +2,2,2,2,2,2,2,2,  # c8 - cf +2,2,2,2,2,2,2,2,  # d0 - d7 +2,2,2,2,2,2,2,2,  # d8 - df +2,2,2,2,2,2,2,2,  # e0 - e7 +2,2,2,2,2,2,2,2,  # e8 - ef +2,2,2,2,2,2,2,2,  # f0 - f7 +2,2,2,2,2,2,2,2,  # f8 - ff +) + +ISO2022KR_st = ( +eStart,     3,eError,eStart,eStart,eStart,eError,eError,# 00-07 +eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f +eItsMe,eItsMe,eError,eError,eError,     4,eError,eError,# 10-17 +eError,eError,eError,eError,     5,eError,eError,eError,# 18-1f +eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27 +) + +ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0) + +ISO2022KRSMModel = {'classTable': ISO2022KR_cls, +                    'classFactor': 6, +                    'stateTable': ISO2022KR_st, +                    'charLenTable': ISO2022KRCharLenTable, +                    'name': "ISO-2022-KR"} + +# flake8: noqa diff --git a/requests/packages/charade/eucjpprober.py b/requests/packages/chardet/eucjpprober.py index d70cfbb..8e64fdc 100644 --- a/requests/packages/charade/eucjpprober.py +++ b/requests/packages/chardet/eucjpprober.py @@ -1,90 +1,90 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is mozilla.org code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -import sys
 -from . import constants
 -from .mbcharsetprober import MultiByteCharSetProber
 -from .codingstatemachine import CodingStateMachine
 -from .chardistribution import EUCJPDistributionAnalysis
 -from .jpcntx import EUCJPContextAnalysis
 -from .mbcssm import EUCJPSMModel
 -
 -
 -class EUCJPProber(MultiByteCharSetProber):
 -    def __init__(self):
 -        MultiByteCharSetProber.__init__(self)
 -        self._mCodingSM = CodingStateMachine(EUCJPSMModel)
 -        self._mDistributionAnalyzer = EUCJPDistributionAnalysis()
 -        self._mContextAnalyzer = EUCJPContextAnalysis()
 -        self.reset()
 -
 -    def reset(self):
 -        MultiByteCharSetProber.reset(self)
 -        self._mContextAnalyzer.reset()
 -
 -    def get_charset_name(self):
 -        return "EUC-JP"
 -
 -    def feed(self, aBuf):
 -        aLen = len(aBuf)
 -        for i in range(0, aLen):
 -            # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte
 -            codingState = self._mCodingSM.next_state(aBuf[i])
 -            if codingState == constants.eError:
 -                if constants._debug:
 -                    sys.stderr.write(self.get_charset_name()
 -                                     + ' prober hit error at byte ' + str(i)
 -                                     + '\n')
 -                self._mState = constants.eNotMe
 -                break
 -            elif codingState == constants.eItsMe:
 -                self._mState = constants.eFoundIt
 -                break
 -            elif codingState == constants.eStart:
 -                charLen = self._mCodingSM.get_current_charlen()
 -                if i == 0:
 -                    self._mLastChar[1] = aBuf[0]
 -                    self._mContextAnalyzer.feed(self._mLastChar, charLen)
 -                    self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
 -                else:
 -                    self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen)
 -                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
 -                                                     charLen)
 -
 -        self._mLastChar[0] = aBuf[aLen - 1]
 -
 -        if self.get_state() == constants.eDetecting:
 -            if (self._mContextAnalyzer.got_enough_data() and
 -               (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
 -                self._mState = constants.eFoundIt
 -
 -        return self.get_state()
 -
 -    def get_confidence(self):
 -        contxtCf = self._mContextAnalyzer.get_confidence()
 -        distribCf = self._mDistributionAnalyzer.get_confidence()
 -        return max(contxtCf, distribCf)
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +import sys +from . import constants +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import EUCJPDistributionAnalysis +from .jpcntx import EUCJPContextAnalysis +from .mbcssm import EUCJPSMModel + + +class EUCJPProber(MultiByteCharSetProber): +    def __init__(self): +        MultiByteCharSetProber.__init__(self) +        self._mCodingSM = CodingStateMachine(EUCJPSMModel) +        self._mDistributionAnalyzer = EUCJPDistributionAnalysis() +        self._mContextAnalyzer = EUCJPContextAnalysis() +        self.reset() + +    def reset(self): +        MultiByteCharSetProber.reset(self) +        self._mContextAnalyzer.reset() + +    def get_charset_name(self): +        return "EUC-JP" + +    def feed(self, aBuf): +        aLen = len(aBuf) +        for i in range(0, aLen): +            # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte +            codingState = self._mCodingSM.next_state(aBuf[i]) +            if codingState == constants.eError: +                if constants._debug: +                    sys.stderr.write(self.get_charset_name() +                                     + ' prober hit error at byte ' + str(i) +                                     + '\n') +                self._mState = constants.eNotMe +                break +            elif codingState == constants.eItsMe: +                self._mState = constants.eFoundIt +                break +            elif codingState == constants.eStart: +                charLen = self._mCodingSM.get_current_charlen() +                if i == 0: +                    self._mLastChar[1] = aBuf[0] +                    self._mContextAnalyzer.feed(self._mLastChar, charLen) +                    self._mDistributionAnalyzer.feed(self._mLastChar, charLen) +                else: +                    self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen) +                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], +                                                     charLen) + +        self._mLastChar[0] = aBuf[aLen - 1] + +        if self.get_state() == constants.eDetecting: +            if (self._mContextAnalyzer.got_enough_data() and +               (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): +                self._mState = constants.eFoundIt + +        return self.get_state() + +    def get_confidence(self): +        contxtCf = self._mContextAnalyzer.get_confidence() +        distribCf = self._mDistributionAnalyzer.get_confidence() +        return max(contxtCf, distribCf) diff --git a/requests/packages/charade/euckrfreq.py b/requests/packages/chardet/euckrfreq.py index a179e4c..a179e4c 100644 --- a/requests/packages/charade/euckrfreq.py +++ b/requests/packages/chardet/euckrfreq.py diff --git a/requests/packages/charade/euckrprober.py b/requests/packages/chardet/euckrprober.py index def3e42..5982a46 100644 --- a/requests/packages/charade/euckrprober.py +++ b/requests/packages/chardet/euckrprober.py @@ -1,42 +1,42 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is mozilla.org code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -from .mbcharsetprober import MultiByteCharSetProber
 -from .codingstatemachine import CodingStateMachine
 -from .chardistribution import EUCKRDistributionAnalysis
 -from .mbcssm import EUCKRSMModel
 -
 -
 -class EUCKRProber(MultiByteCharSetProber):
 -    def __init__(self):
 -        MultiByteCharSetProber.__init__(self)
 -        self._mCodingSM = CodingStateMachine(EUCKRSMModel)
 -        self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
 -        self.reset()
 -
 -    def get_charset_name(self):
 -        return "EUC-KR"
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import EUCKRDistributionAnalysis +from .mbcssm import EUCKRSMModel + + +class EUCKRProber(MultiByteCharSetProber): +    def __init__(self): +        MultiByteCharSetProber.__init__(self) +        self._mCodingSM = CodingStateMachine(EUCKRSMModel) +        self._mDistributionAnalyzer = EUCKRDistributionAnalysis() +        self.reset() + +    def get_charset_name(self): +        return "EUC-KR" diff --git a/requests/packages/charade/euctwfreq.py b/requests/packages/chardet/euctwfreq.py index 576e750..576e750 100644 --- a/requests/packages/charade/euctwfreq.py +++ b/requests/packages/chardet/euctwfreq.py diff --git a/requests/packages/charade/euctwprober.py b/requests/packages/chardet/euctwprober.py index e601adf..fe652fe 100644 --- a/requests/packages/charade/euctwprober.py +++ b/requests/packages/chardet/euctwprober.py @@ -1,41 +1,41 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is mozilla.org code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -# 
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -# 
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -from .mbcharsetprober import MultiByteCharSetProber
 -from .codingstatemachine import CodingStateMachine
 -from .chardistribution import EUCTWDistributionAnalysis
 -from .mbcssm import EUCTWSMModel
 -
 -class EUCTWProber(MultiByteCharSetProber):
 -    def __init__(self):
 -        MultiByteCharSetProber.__init__(self)
 -        self._mCodingSM = CodingStateMachine(EUCTWSMModel)
 -        self._mDistributionAnalyzer = EUCTWDistributionAnalysis()
 -        self.reset()
 -
 -    def get_charset_name(self):
 -        return "EUC-TW"
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +#  +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +#  +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import EUCTWDistributionAnalysis +from .mbcssm import EUCTWSMModel + +class EUCTWProber(MultiByteCharSetProber): +    def __init__(self): +        MultiByteCharSetProber.__init__(self) +        self._mCodingSM = CodingStateMachine(EUCTWSMModel) +        self._mDistributionAnalyzer = EUCTWDistributionAnalysis() +        self.reset() + +    def get_charset_name(self): +        return "EUC-TW" diff --git a/requests/packages/charade/gb2312freq.py b/requests/packages/chardet/gb2312freq.py index 1238f51..1238f51 100644 --- a/requests/packages/charade/gb2312freq.py +++ b/requests/packages/chardet/gb2312freq.py diff --git a/requests/packages/charade/gb2312prober.py b/requests/packages/chardet/gb2312prober.py index 643fe25..0325a2d 100644 --- a/requests/packages/charade/gb2312prober.py +++ b/requests/packages/chardet/gb2312prober.py @@ -1,41 +1,41 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is mozilla.org code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -# 
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -# 
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -from .mbcharsetprober import MultiByteCharSetProber
 -from .codingstatemachine import CodingStateMachine
 -from .chardistribution import GB2312DistributionAnalysis
 -from .mbcssm import GB2312SMModel
 -
 -class GB2312Prober(MultiByteCharSetProber):
 -    def __init__(self):
 -        MultiByteCharSetProber.__init__(self)
 -        self._mCodingSM = CodingStateMachine(GB2312SMModel)
 -        self._mDistributionAnalyzer = GB2312DistributionAnalysis()
 -        self.reset()
 -
 -    def get_charset_name(self):
 -        return "GB2312"
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +#  +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +#  +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import GB2312DistributionAnalysis +from .mbcssm import GB2312SMModel + +class GB2312Prober(MultiByteCharSetProber): +    def __init__(self): +        MultiByteCharSetProber.__init__(self) +        self._mCodingSM = CodingStateMachine(GB2312SMModel) +        self._mDistributionAnalyzer = GB2312DistributionAnalysis() +        self.reset() + +    def get_charset_name(self): +        return "GB2312" diff --git a/requests/packages/charade/hebrewprober.py b/requests/packages/chardet/hebrewprober.py index 90d171f..ba225c5 100644 --- a/requests/packages/charade/hebrewprober.py +++ b/requests/packages/chardet/hebrewprober.py @@ -1,283 +1,283 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is Mozilla Universal charset detector code.
 -#
 -# The Initial Developer of the Original Code is
 -#          Shy Shalom
 -# Portions created by the Initial Developer are Copyright (C) 2005
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -from .charsetprober import CharSetProber
 -from .constants import eNotMe, eDetecting
 -from .compat import wrap_ord
 -
 -# This prober doesn't actually recognize a language or a charset.
 -# It is a helper prober for the use of the Hebrew model probers
 -
 -### General ideas of the Hebrew charset recognition ###
 -#
 -# Four main charsets exist in Hebrew:
 -# "ISO-8859-8" - Visual Hebrew
 -# "windows-1255" - Logical Hebrew
 -# "ISO-8859-8-I" - Logical Hebrew
 -# "x-mac-hebrew" - ?? Logical Hebrew ??
 -#
 -# Both "ISO" charsets use a completely identical set of code points, whereas
 -# "windows-1255" and "x-mac-hebrew" are two different proper supersets of
 -# these code points. windows-1255 defines additional characters in the range
 -# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific
 -# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6.
 -# x-mac-hebrew defines similar additional code points but with a different
 -# mapping.
 -#
 -# As far as an average Hebrew text with no diacritics is concerned, all four
 -# charsets are identical with respect to code points. Meaning that for the
 -# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters
 -# (including final letters).
 -#
 -# The dominant difference between these charsets is their directionality.
 -# "Visual" directionality means that the text is ordered as if the renderer is
 -# not aware of a BIDI rendering algorithm. The renderer sees the text and
 -# draws it from left to right. The text itself when ordered naturally is read
 -# backwards. A buffer of Visual Hebrew generally looks like so:
 -# "[last word of first line spelled backwards] [whole line ordered backwards
 -# and spelled backwards] [first word of first line spelled backwards]
 -# [end of line] [last word of second line] ... etc' "
 -# adding punctuation marks, numbers and English text to visual text is
 -# naturally also "visual" and from left to right.
 -#
 -# "Logical" directionality means the text is ordered "naturally" according to
 -# the order it is read. It is the responsibility of the renderer to display
 -# the text from right to left. A BIDI algorithm is used to place general
 -# punctuation marks, numbers and English text in the text.
 -#
 -# Texts in x-mac-hebrew are almost impossible to find on the Internet. From
 -# what little evidence I could find, it seems that its general directionality
 -# is Logical.
 -#
 -# To sum up all of the above, the Hebrew probing mechanism knows about two
 -# charsets:
 -# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are
 -#    backwards while line order is natural. For charset recognition purposes
 -#    the line order is unimportant (In fact, for this implementation, even
 -#    word order is unimportant).
 -# Logical Hebrew - "windows-1255" - normal, naturally ordered text.
 -#
 -# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be
 -#    specifically identified.
 -# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew
 -#    that contain special punctuation marks or diacritics is displayed with
 -#    some unconverted characters showing as question marks. This problem might
 -#    be corrected using another model prober for x-mac-hebrew. Due to the fact
 -#    that x-mac-hebrew texts are so rare, writing another model prober isn't
 -#    worth the effort and performance hit.
 -#
 -#### The Prober ####
 -#
 -# The prober is divided between two SBCharSetProbers and a HebrewProber,
 -# all of which are managed, created, fed data, inquired and deleted by the
 -# SBCSGroupProber. The two SBCharSetProbers identify that the text is in
 -# fact some kind of Hebrew, Logical or Visual. The final decision about which
 -# one is it is made by the HebrewProber by combining final-letter scores
 -# with the scores of the two SBCharSetProbers to produce a final answer.
 -#
 -# The SBCSGroupProber is responsible for stripping the original text of HTML
 -# tags, English characters, numbers, low-ASCII punctuation characters, spaces
 -# and new lines. It reduces any sequence of such characters to a single space.
 -# The buffer fed to each prober in the SBCS group prober is pure text in
 -# high-ASCII.
 -# The two SBCharSetProbers (model probers) share the same language model:
 -# Win1255Model.
 -# The first SBCharSetProber uses the model normally as any other
 -# SBCharSetProber does, to recognize windows-1255, upon which this model was
 -# built. The second SBCharSetProber is told to make the pair-of-letter
 -# lookup in the language model backwards. This in practice exactly simulates
 -# a visual Hebrew model using the windows-1255 logical Hebrew model.
 -#
 -# The HebrewProber is not using any language model. All it does is look for
 -# final-letter evidence suggesting the text is either logical Hebrew or visual
 -# Hebrew. Disjointed from the model probers, the results of the HebrewProber
 -# alone are meaningless. HebrewProber always returns 0.00 as confidence
 -# since it never identifies a charset by itself. Instead, the pointer to the
 -# HebrewProber is passed to the model probers as a helper "Name Prober".
 -# When the Group prober receives a positive identification from any prober,
 -# it asks for the name of the charset identified. If the prober queried is a
 -# Hebrew model prober, the model prober forwards the call to the
 -# HebrewProber to make the final decision. In the HebrewProber, the
 -# decision is made according to the final-letters scores maintained and Both
 -# model probers scores. The answer is returned in the form of the name of the
 -# charset identified, either "windows-1255" or "ISO-8859-8".
 -
 -# windows-1255 / ISO-8859-8 code points of interest
 -FINAL_KAF = 0xea
 -NORMAL_KAF = 0xeb
 -FINAL_MEM = 0xed
 -NORMAL_MEM = 0xee
 -FINAL_NUN = 0xef
 -NORMAL_NUN = 0xf0
 -FINAL_PE = 0xf3
 -NORMAL_PE = 0xf4
 -FINAL_TSADI = 0xf5
 -NORMAL_TSADI = 0xf6
 -
 -# Minimum Visual vs Logical final letter score difference.
 -# If the difference is below this, don't rely solely on the final letter score
 -# distance.
 -MIN_FINAL_CHAR_DISTANCE = 5
 -
 -# Minimum Visual vs Logical model score difference.
 -# If the difference is below this, don't rely at all on the model score
 -# distance.
 -MIN_MODEL_DISTANCE = 0.01
 -
 -VISUAL_HEBREW_NAME = "ISO-8859-8"
 -LOGICAL_HEBREW_NAME = "windows-1255"
 -
 -
 -class HebrewProber(CharSetProber):
 -    def __init__(self):
 -        CharSetProber.__init__(self)
 -        self._mLogicalProber = None
 -        self._mVisualProber = None
 -        self.reset()
 -
 -    def reset(self):
 -        self._mFinalCharLogicalScore = 0
 -        self._mFinalCharVisualScore = 0
 -        # The two last characters seen in the previous buffer,
 -        # mPrev and mBeforePrev are initialized to space in order to simulate
 -        # a word delimiter at the beginning of the data
 -        self._mPrev = ' '
 -        self._mBeforePrev = ' '
 -        # These probers are owned by the group prober.
 -
 -    def set_model_probers(self, logicalProber, visualProber):
 -        self._mLogicalProber = logicalProber
 -        self._mVisualProber = visualProber
 -
 -    def is_final(self, c):
 -        return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE,
 -                               FINAL_TSADI]
 -
 -    def is_non_final(self, c):
 -        # The normal Tsadi is not a good Non-Final letter due to words like
 -        # 'lechotet' (to chat) containing an apostrophe after the tsadi. This
 -        # apostrophe is converted to a space in FilterWithoutEnglishLetters
 -        # causing the Non-Final tsadi to appear at an end of a word even
 -        # though this is not the case in the original text.
 -        # The letters Pe and Kaf rarely display a related behavior of not being
 -        # a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak'
 -        # for example legally end with a Non-Final Pe or Kaf. However, the
 -        # benefit of these letters as Non-Final letters outweighs the damage
 -        # since these words are quite rare.
 -        return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE]
 -
 -    def feed(self, aBuf):
 -        # Final letter analysis for logical-visual decision.
 -        # Look for evidence that the received buffer is either logical Hebrew
 -        # or visual Hebrew.
 -        # The following cases are checked:
 -        # 1) A word longer than 1 letter, ending with a final letter. This is
 -        #    an indication that the text is laid out "naturally" since the
 -        #    final letter really appears at the end. +1 for logical score.
 -        # 2) A word longer than 1 letter, ending with a Non-Final letter. In
 -        #    normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi,
 -        #    should not end with the Non-Final form of that letter. Exceptions
 -        #    to this rule are mentioned above in isNonFinal(). This is an
 -        #    indication that the text is laid out backwards. +1 for visual
 -        #    score
 -        # 3) A word longer than 1 letter, starting with a final letter. Final
 -        #    letters should not appear at the beginning of a word. This is an
 -        #    indication that the text is laid out backwards. +1 for visual
 -        #    score.
 -        #
 -        # The visual score and logical score are accumulated throughout the
 -        # text and are finally checked against each other in GetCharSetName().
 -        # No checking for final letters in the middle of words is done since
 -        # that case is not an indication for either Logical or Visual text.
 -        #
 -        # We automatically filter out all 7-bit characters (replace them with
 -        # spaces) so the word boundary detection works properly. [MAP]
 -
 -        if self.get_state() == eNotMe:
 -            # Both model probers say it's not them. No reason to continue.
 -            return eNotMe
 -
 -        aBuf = self.filter_high_bit_only(aBuf)
 -
 -        for cur in aBuf:
 -            if cur == ' ':
 -                # We stand on a space - a word just ended
 -                if self._mBeforePrev != ' ':
 -                    # next-to-last char was not a space so self._mPrev is not a
 -                    # 1 letter word
 -                    if self.is_final(self._mPrev):
 -                        # case (1) [-2:not space][-1:final letter][cur:space]
 -                        self._mFinalCharLogicalScore += 1
 -                    elif self.is_non_final(self._mPrev):
 -                        # case (2) [-2:not space][-1:Non-Final letter][
 -                        #  cur:space]
 -                        self._mFinalCharVisualScore += 1
 -            else:
 -                # Not standing on a space
 -                if ((self._mBeforePrev == ' ') and
 -                        (self.is_final(self._mPrev)) and (cur != ' ')):
 -                    # case (3) [-2:space][-1:final letter][cur:not space]
 -                    self._mFinalCharVisualScore += 1
 -            self._mBeforePrev = self._mPrev
 -            self._mPrev = cur
 -
 -        # Forever detecting, till the end or until both model probers return
 -        # eNotMe (handled above)
 -        return eDetecting
 -
 -    def get_charset_name(self):
 -        # Make the decision: is it Logical or Visual?
 -        # If the final letter score distance is dominant enough, rely on it.
 -        finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore
 -        if finalsub >= MIN_FINAL_CHAR_DISTANCE:
 -            return LOGICAL_HEBREW_NAME
 -        if finalsub <= -MIN_FINAL_CHAR_DISTANCE:
 -            return VISUAL_HEBREW_NAME
 -
 -        # It's not dominant enough, try to rely on the model scores instead.
 -        modelsub = (self._mLogicalProber.get_confidence()
 -                    - self._mVisualProber.get_confidence())
 -        if modelsub > MIN_MODEL_DISTANCE:
 -            return LOGICAL_HEBREW_NAME
 -        if modelsub < -MIN_MODEL_DISTANCE:
 -            return VISUAL_HEBREW_NAME
 -
 -        # Still no good, back to final letter distance, maybe it'll save the
 -        # day.
 -        if finalsub < 0.0:
 -            return VISUAL_HEBREW_NAME
 -
 -        # (finalsub > 0 - Logical) or (don't know what to do) default to
 -        # Logical.
 -        return LOGICAL_HEBREW_NAME
 -
 -    def get_state(self):
 -        # Remain active as long as any of the model probers are active.
 -        if (self._mLogicalProber.get_state() == eNotMe) and \
 -           (self._mVisualProber.get_state() == eNotMe):
 -            return eNotMe
 -        return eDetecting
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +#          Shy Shalom +# Portions created by the Initial Developer are Copyright (C) 2005 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +from .charsetprober import CharSetProber +from .constants import eNotMe, eDetecting +from .compat import wrap_ord + +# This prober doesn't actually recognize a language or a charset. +# It is a helper prober for the use of the Hebrew model probers + +### General ideas of the Hebrew charset recognition ### +# +# Four main charsets exist in Hebrew: +# "ISO-8859-8" - Visual Hebrew +# "windows-1255" - Logical Hebrew +# "ISO-8859-8-I" - Logical Hebrew +# "x-mac-hebrew" - ?? Logical Hebrew ?? +# +# Both "ISO" charsets use a completely identical set of code points, whereas +# "windows-1255" and "x-mac-hebrew" are two different proper supersets of +# these code points. windows-1255 defines additional characters in the range +# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific +# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6. +# x-mac-hebrew defines similar additional code points but with a different +# mapping. +# +# As far as an average Hebrew text with no diacritics is concerned, all four +# charsets are identical with respect to code points. Meaning that for the +# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters +# (including final letters). +# +# The dominant difference between these charsets is their directionality. +# "Visual" directionality means that the text is ordered as if the renderer is +# not aware of a BIDI rendering algorithm. The renderer sees the text and +# draws it from left to right. The text itself when ordered naturally is read +# backwards. A buffer of Visual Hebrew generally looks like so: +# "[last word of first line spelled backwards] [whole line ordered backwards +# and spelled backwards] [first word of first line spelled backwards] +# [end of line] [last word of second line] ... etc' " +# adding punctuation marks, numbers and English text to visual text is +# naturally also "visual" and from left to right. +# +# "Logical" directionality means the text is ordered "naturally" according to +# the order it is read. It is the responsibility of the renderer to display +# the text from right to left. A BIDI algorithm is used to place general +# punctuation marks, numbers and English text in the text. +# +# Texts in x-mac-hebrew are almost impossible to find on the Internet. From +# what little evidence I could find, it seems that its general directionality +# is Logical. +# +# To sum up all of the above, the Hebrew probing mechanism knows about two +# charsets: +# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are +#    backwards while line order is natural. For charset recognition purposes +#    the line order is unimportant (In fact, for this implementation, even +#    word order is unimportant). +# Logical Hebrew - "windows-1255" - normal, naturally ordered text. +# +# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be +#    specifically identified. +# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew +#    that contain special punctuation marks or diacritics is displayed with +#    some unconverted characters showing as question marks. This problem might +#    be corrected using another model prober for x-mac-hebrew. Due to the fact +#    that x-mac-hebrew texts are so rare, writing another model prober isn't +#    worth the effort and performance hit. +# +#### The Prober #### +# +# The prober is divided between two SBCharSetProbers and a HebrewProber, +# all of which are managed, created, fed data, inquired and deleted by the +# SBCSGroupProber. The two SBCharSetProbers identify that the text is in +# fact some kind of Hebrew, Logical or Visual. The final decision about which +# one is it is made by the HebrewProber by combining final-letter scores +# with the scores of the two SBCharSetProbers to produce a final answer. +# +# The SBCSGroupProber is responsible for stripping the original text of HTML +# tags, English characters, numbers, low-ASCII punctuation characters, spaces +# and new lines. It reduces any sequence of such characters to a single space. +# The buffer fed to each prober in the SBCS group prober is pure text in +# high-ASCII. +# The two SBCharSetProbers (model probers) share the same language model: +# Win1255Model. +# The first SBCharSetProber uses the model normally as any other +# SBCharSetProber does, to recognize windows-1255, upon which this model was +# built. The second SBCharSetProber is told to make the pair-of-letter +# lookup in the language model backwards. This in practice exactly simulates +# a visual Hebrew model using the windows-1255 logical Hebrew model. +# +# The HebrewProber is not using any language model. All it does is look for +# final-letter evidence suggesting the text is either logical Hebrew or visual +# Hebrew. Disjointed from the model probers, the results of the HebrewProber +# alone are meaningless. HebrewProber always returns 0.00 as confidence +# since it never identifies a charset by itself. Instead, the pointer to the +# HebrewProber is passed to the model probers as a helper "Name Prober". +# When the Group prober receives a positive identification from any prober, +# it asks for the name of the charset identified. If the prober queried is a +# Hebrew model prober, the model prober forwards the call to the +# HebrewProber to make the final decision. In the HebrewProber, the +# decision is made according to the final-letters scores maintained and Both +# model probers scores. The answer is returned in the form of the name of the +# charset identified, either "windows-1255" or "ISO-8859-8". + +# windows-1255 / ISO-8859-8 code points of interest +FINAL_KAF = 0xea +NORMAL_KAF = 0xeb +FINAL_MEM = 0xed +NORMAL_MEM = 0xee +FINAL_NUN = 0xef +NORMAL_NUN = 0xf0 +FINAL_PE = 0xf3 +NORMAL_PE = 0xf4 +FINAL_TSADI = 0xf5 +NORMAL_TSADI = 0xf6 + +# Minimum Visual vs Logical final letter score difference. +# If the difference is below this, don't rely solely on the final letter score +# distance. +MIN_FINAL_CHAR_DISTANCE = 5 + +# Minimum Visual vs Logical model score difference. +# If the difference is below this, don't rely at all on the model score +# distance. +MIN_MODEL_DISTANCE = 0.01 + +VISUAL_HEBREW_NAME = "ISO-8859-8" +LOGICAL_HEBREW_NAME = "windows-1255" + + +class HebrewProber(CharSetProber): +    def __init__(self): +        CharSetProber.__init__(self) +        self._mLogicalProber = None +        self._mVisualProber = None +        self.reset() + +    def reset(self): +        self._mFinalCharLogicalScore = 0 +        self._mFinalCharVisualScore = 0 +        # The two last characters seen in the previous buffer, +        # mPrev and mBeforePrev are initialized to space in order to simulate +        # a word delimiter at the beginning of the data +        self._mPrev = ' ' +        self._mBeforePrev = ' ' +        # These probers are owned by the group prober. + +    def set_model_probers(self, logicalProber, visualProber): +        self._mLogicalProber = logicalProber +        self._mVisualProber = visualProber + +    def is_final(self, c): +        return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE, +                               FINAL_TSADI] + +    def is_non_final(self, c): +        # The normal Tsadi is not a good Non-Final letter due to words like +        # 'lechotet' (to chat) containing an apostrophe after the tsadi. This +        # apostrophe is converted to a space in FilterWithoutEnglishLetters +        # causing the Non-Final tsadi to appear at an end of a word even +        # though this is not the case in the original text. +        # The letters Pe and Kaf rarely display a related behavior of not being +        # a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' +        # for example legally end with a Non-Final Pe or Kaf. However, the +        # benefit of these letters as Non-Final letters outweighs the damage +        # since these words are quite rare. +        return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE] + +    def feed(self, aBuf): +        # Final letter analysis for logical-visual decision. +        # Look for evidence that the received buffer is either logical Hebrew +        # or visual Hebrew. +        # The following cases are checked: +        # 1) A word longer than 1 letter, ending with a final letter. This is +        #    an indication that the text is laid out "naturally" since the +        #    final letter really appears at the end. +1 for logical score. +        # 2) A word longer than 1 letter, ending with a Non-Final letter. In +        #    normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi, +        #    should not end with the Non-Final form of that letter. Exceptions +        #    to this rule are mentioned above in isNonFinal(). This is an +        #    indication that the text is laid out backwards. +1 for visual +        #    score +        # 3) A word longer than 1 letter, starting with a final letter. Final +        #    letters should not appear at the beginning of a word. This is an +        #    indication that the text is laid out backwards. +1 for visual +        #    score. +        # +        # The visual score and logical score are accumulated throughout the +        # text and are finally checked against each other in GetCharSetName(). +        # No checking for final letters in the middle of words is done since +        # that case is not an indication for either Logical or Visual text. +        # +        # We automatically filter out all 7-bit characters (replace them with +        # spaces) so the word boundary detection works properly. [MAP] + +        if self.get_state() == eNotMe: +            # Both model probers say it's not them. No reason to continue. +            return eNotMe + +        aBuf = self.filter_high_bit_only(aBuf) + +        for cur in aBuf: +            if cur == ' ': +                # We stand on a space - a word just ended +                if self._mBeforePrev != ' ': +                    # next-to-last char was not a space so self._mPrev is not a +                    # 1 letter word +                    if self.is_final(self._mPrev): +                        # case (1) [-2:not space][-1:final letter][cur:space] +                        self._mFinalCharLogicalScore += 1 +                    elif self.is_non_final(self._mPrev): +                        # case (2) [-2:not space][-1:Non-Final letter][ +                        #  cur:space] +                        self._mFinalCharVisualScore += 1 +            else: +                # Not standing on a space +                if ((self._mBeforePrev == ' ') and +                        (self.is_final(self._mPrev)) and (cur != ' ')): +                    # case (3) [-2:space][-1:final letter][cur:not space] +                    self._mFinalCharVisualScore += 1 +            self._mBeforePrev = self._mPrev +            self._mPrev = cur + +        # Forever detecting, till the end or until both model probers return +        # eNotMe (handled above) +        return eDetecting + +    def get_charset_name(self): +        # Make the decision: is it Logical or Visual? +        # If the final letter score distance is dominant enough, rely on it. +        finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore +        if finalsub >= MIN_FINAL_CHAR_DISTANCE: +            return LOGICAL_HEBREW_NAME +        if finalsub <= -MIN_FINAL_CHAR_DISTANCE: +            return VISUAL_HEBREW_NAME + +        # It's not dominant enough, try to rely on the model scores instead. +        modelsub = (self._mLogicalProber.get_confidence() +                    - self._mVisualProber.get_confidence()) +        if modelsub > MIN_MODEL_DISTANCE: +            return LOGICAL_HEBREW_NAME +        if modelsub < -MIN_MODEL_DISTANCE: +            return VISUAL_HEBREW_NAME + +        # Still no good, back to final letter distance, maybe it'll save the +        # day. +        if finalsub < 0.0: +            return VISUAL_HEBREW_NAME + +        # (finalsub > 0 - Logical) or (don't know what to do) default to +        # Logical. +        return LOGICAL_HEBREW_NAME + +    def get_state(self): +        # Remain active as long as any of the model probers are active. +        if (self._mLogicalProber.get_state() == eNotMe) and \ +           (self._mVisualProber.get_state() == eNotMe): +            return eNotMe +        return eDetecting diff --git a/requests/packages/charade/jisfreq.py b/requests/packages/chardet/jisfreq.py index 064345b..064345b 100644 --- a/requests/packages/charade/jisfreq.py +++ b/requests/packages/chardet/jisfreq.py diff --git a/requests/packages/charade/jpcntx.py b/requests/packages/chardet/jpcntx.py index b4e6af4..f7f69ba 100644 --- a/requests/packages/charade/jpcntx.py +++ b/requests/packages/chardet/jpcntx.py @@ -1,219 +1,219 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is Mozilla Communicator client code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -from .compat import wrap_ord
 -
 -NUM_OF_CATEGORY = 6
 -DONT_KNOW = -1
 -ENOUGH_REL_THRESHOLD = 100
 -MAX_REL_THRESHOLD = 1000
 -MINIMUM_DATA_THRESHOLD = 4
 -
 -# This is hiragana 2-char sequence table, the number in each cell represents its frequency category
 -jp2CharContext = (
 -(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1),
 -(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4),
 -(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2),
 -(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4),
 -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
 -(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4),
 -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
 -(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3),
 -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
 -(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4),
 -(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4),
 -(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3),
 -(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3),
 -(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3),
 -(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4),
 -(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3),
 -(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4),
 -(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3),
 -(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5),
 -(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3),
 -(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5),
 -(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4),
 -(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4),
 -(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3),
 -(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3),
 -(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3),
 -(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5),
 -(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4),
 -(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5),
 -(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3),
 -(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4),
 -(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4),
 -(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4),
 -(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1),
 -(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0),
 -(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3),
 -(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0),
 -(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3),
 -(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3),
 -(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5),
 -(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4),
 -(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5),
 -(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3),
 -(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3),
 -(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3),
 -(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3),
 -(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4),
 -(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4),
 -(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2),
 -(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3),
 -(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3),
 -(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3),
 -(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3),
 -(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4),
 -(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3),
 -(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4),
 -(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3),
 -(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3),
 -(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4),
 -(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4),
 -(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3),
 -(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4),
 -(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4),
 -(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3),
 -(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4),
 -(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4),
 -(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4),
 -(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3),
 -(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2),
 -(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2),
 -(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3),
 -(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3),
 -(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5),
 -(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3),
 -(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4),
 -(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4),
 -(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4),
 -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
 -(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3),
 -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1),
 -(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2),
 -(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3),
 -(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1),
 -)
 -
 -class JapaneseContextAnalysis:
 -    def __init__(self):
 -        self.reset()
 -
 -    def reset(self):
 -        self._mTotalRel = 0  # total sequence received
 -        # category counters, each interger counts sequence in its category
 -        self._mRelSample = [0] * NUM_OF_CATEGORY
 -        # if last byte in current buffer is not the last byte of a character,
 -        # we need to know how many bytes to skip in next buffer
 -        self._mNeedToSkipCharNum = 0
 -        self._mLastCharOrder = -1  # The order of previous char
 -        # If this flag is set to True, detection is done and conclusion has
 -        # been made
 -        self._mDone = False
 -
 -    def feed(self, aBuf, aLen):
 -        if self._mDone:
 -            return
 -
 -        # The buffer we got is byte oriented, and a character may span in more than one
 -        # buffers. In case the last one or two byte in last buffer is not
 -        # complete, we record how many byte needed to complete that character
 -        # and skip these bytes here.  We can choose to record those bytes as
 -        # well and analyse the character once it is complete, but since a
 -        # character will not make much difference, by simply skipping
 -        # this character will simply our logic and improve performance.
 -        i = self._mNeedToSkipCharNum
 -        while i < aLen:
 -            order, charLen = self.get_order(aBuf[i:i + 2])
 -            i += charLen
 -            if i > aLen:
 -                self._mNeedToSkipCharNum = i - aLen
 -                self._mLastCharOrder = -1
 -            else:
 -                if (order != -1) and (self._mLastCharOrder != -1):
 -                    self._mTotalRel += 1
 -                    if self._mTotalRel > MAX_REL_THRESHOLD:
 -                        self._mDone = True
 -                        break
 -                    self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1
 -                self._mLastCharOrder = order
 -
 -    def got_enough_data(self):
 -        return self._mTotalRel > ENOUGH_REL_THRESHOLD
 -
 -    def get_confidence(self):
 -        # This is just one way to calculate confidence. It works well for me.
 -        if self._mTotalRel > MINIMUM_DATA_THRESHOLD:
 -            return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel
 -        else:
 -            return DONT_KNOW
 -
 -    def get_order(self, aBuf):
 -        return -1, 1
 -
 -class SJISContextAnalysis(JapaneseContextAnalysis):
 -    def get_order(self, aBuf):
 -        if not aBuf:
 -            return -1, 1
 -        # find out current char's byte length
 -        first_char = wrap_ord(aBuf[0])
 -        if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)):
 -            charLen = 2
 -        else:
 -            charLen = 1
 -
 -        # return its order if it is hiragana
 -        if len(aBuf) > 1:
 -            second_char = wrap_ord(aBuf[1])
 -            if (first_char == 202) and (0x9F <= second_char <= 0xF1):
 -                return second_char - 0x9F, charLen
 -
 -        return -1, charLen
 -
 -class EUCJPContextAnalysis(JapaneseContextAnalysis):
 -    def get_order(self, aBuf):
 -        if not aBuf:
 -            return -1, 1
 -        # find out current char's byte length
 -        first_char = wrap_ord(aBuf[0])
 -        if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):
 -            charLen = 2
 -        elif first_char == 0x8F:
 -            charLen = 3
 -        else:
 -            charLen = 1
 -
 -        # return its order if it is hiragana
 -        if len(aBuf) > 1:
 -            second_char = wrap_ord(aBuf[1])
 -            if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):
 -                return second_char - 0xA1, charLen
 -
 -        return -1, charLen
 -
 -# flake8: noqa
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +from .compat import wrap_ord + +NUM_OF_CATEGORY = 6 +DONT_KNOW = -1 +ENOUGH_REL_THRESHOLD = 100 +MAX_REL_THRESHOLD = 1000 +MINIMUM_DATA_THRESHOLD = 4 + +# This is hiragana 2-char sequence table, the number in each cell represents its frequency category +jp2CharContext = ( +(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1), +(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4), +(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2), +(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4), +(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4), +(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3), +(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3), +(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3), +(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4), +(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3), +(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4), +(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3), +(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5), +(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3), +(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5), +(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4), +(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4), +(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3), +(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3), +(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3), +(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5), +(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4), +(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5), +(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3), +(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4), +(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4), +(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4), +(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1), +(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0), +(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3), +(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0), +(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3), +(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3), +(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5), +(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4), +(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5), +(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3), +(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3), +(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3), +(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3), +(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4), +(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4), +(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2), +(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3), +(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3), +(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3), +(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3), +(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4), +(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3), +(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4), +(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3), +(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3), +(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4), +(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4), +(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3), +(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4), +(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4), +(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3), +(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4), +(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4), +(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4), +(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3), +(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2), +(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2), +(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3), +(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3), +(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5), +(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3), +(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4), +(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4), +(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1), +(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2), +(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3), +(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1), +) + +class JapaneseContextAnalysis: +    def __init__(self): +        self.reset() + +    def reset(self): +        self._mTotalRel = 0  # total sequence received +        # category counters, each interger counts sequence in its category +        self._mRelSample = [0] * NUM_OF_CATEGORY +        # if last byte in current buffer is not the last byte of a character, +        # we need to know how many bytes to skip in next buffer +        self._mNeedToSkipCharNum = 0 +        self._mLastCharOrder = -1  # The order of previous char +        # If this flag is set to True, detection is done and conclusion has +        # been made +        self._mDone = False + +    def feed(self, aBuf, aLen): +        if self._mDone: +            return + +        # The buffer we got is byte oriented, and a character may span in more than one +        # buffers. In case the last one or two byte in last buffer is not +        # complete, we record how many byte needed to complete that character +        # and skip these bytes here.  We can choose to record those bytes as +        # well and analyse the character once it is complete, but since a +        # character will not make much difference, by simply skipping +        # this character will simply our logic and improve performance. +        i = self._mNeedToSkipCharNum +        while i < aLen: +            order, charLen = self.get_order(aBuf[i:i + 2]) +            i += charLen +            if i > aLen: +                self._mNeedToSkipCharNum = i - aLen +                self._mLastCharOrder = -1 +            else: +                if (order != -1) and (self._mLastCharOrder != -1): +                    self._mTotalRel += 1 +                    if self._mTotalRel > MAX_REL_THRESHOLD: +                        self._mDone = True +                        break +                    self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1 +                self._mLastCharOrder = order + +    def got_enough_data(self): +        return self._mTotalRel > ENOUGH_REL_THRESHOLD + +    def get_confidence(self): +        # This is just one way to calculate confidence. It works well for me. +        if self._mTotalRel > MINIMUM_DATA_THRESHOLD: +            return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel +        else: +            return DONT_KNOW + +    def get_order(self, aBuf): +        return -1, 1 + +class SJISContextAnalysis(JapaneseContextAnalysis): +    def get_order(self, aBuf): +        if not aBuf: +            return -1, 1 +        # find out current char's byte length +        first_char = wrap_ord(aBuf[0]) +        if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)): +            charLen = 2 +        else: +            charLen = 1 + +        # return its order if it is hiragana +        if len(aBuf) > 1: +            second_char = wrap_ord(aBuf[1]) +            if (first_char == 202) and (0x9F <= second_char <= 0xF1): +                return second_char - 0x9F, charLen + +        return -1, charLen + +class EUCJPContextAnalysis(JapaneseContextAnalysis): +    def get_order(self, aBuf): +        if not aBuf: +            return -1, 1 +        # find out current char's byte length +        first_char = wrap_ord(aBuf[0]) +        if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE): +            charLen = 2 +        elif first_char == 0x8F: +            charLen = 3 +        else: +            charLen = 1 + +        # return its order if it is hiragana +        if len(aBuf) > 1: +            second_char = wrap_ord(aBuf[1]) +            if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3): +                return second_char - 0xA1, charLen + +        return -1, charLen + +# flake8: noqa diff --git a/requests/packages/charade/langbulgarianmodel.py b/requests/packages/chardet/langbulgarianmodel.py index ea5a60b..e5788fc 100644 --- a/requests/packages/charade/langbulgarianmodel.py +++ b/requests/packages/chardet/langbulgarianmodel.py @@ -1,229 +1,229 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is Mozilla Communicator client code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -# 255: Control characters that usually does not exist in any text
 -# 254: Carriage/Return
 -# 253: symbol (punctuation) that does not belong to word
 -# 252: 0 - 9
 -
 -# Character Mapping Table:
 -# this table is modified base on win1251BulgarianCharToOrderMap, so
 -# only number <64 is sure valid
 -
 -Latin5_BulgarianCharToOrderMap = (
 -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
 -253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82,  # 40
 -110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253,  # 50
 -253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71,  # 60
 -116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253,  # 70
 -194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,  # 80
 -210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,  # 90
 - 81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238,  # a0
 - 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30,  # b0
 - 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56,  # c0
 -  1, 18,  9, 20, 11,  3, 23, 15,  2, 26, 12, 10, 14,  6,  4, 13,  # d0
 -  7,  8,  5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16,  # e0
 - 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253,  # f0
 -)
 -
 -win1251BulgarianCharToOrderMap = (
 -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
 -253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82,  # 40
 -110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253,  # 50
 -253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71,  # 60
 -116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253,  # 70
 -206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220,  # 80
 -221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229,  # 90
 - 88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240,  # a0
 - 73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250,  # b0
 - 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30,  # c0
 - 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56,  # d0
 -  1, 18,  9, 20, 11,  3, 23, 15,  2, 26, 12, 10, 14,  6,  4, 13,  # e0
 -  7,  8,  5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16,  # f0
 -)
 -
 -# Model Table:
 -# total sequences: 100%
 -# first 512 sequences: 96.9392%
 -# first 1024 sequences:3.0618%
 -# rest  sequences:     0.2992%
 -# negative sequences:  0.0020%
 -BulgarianLangModel = (
 -0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,
 -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2,
 -3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1,
 -0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
 -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0,
 -0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
 -3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,
 -0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,
 -0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0,
 -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
 -3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0,
 -0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
 -3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0,
 -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
 -3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0,
 -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
 -3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0,
 -1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
 -3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0,
 -0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
 -3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0,
 -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
 -3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0,
 -0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
 -3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0,
 -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
 -3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1,
 -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
 -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0,
 -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
 -3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0,
 -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
 -3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0,
 -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0,
 -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
 -3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
 -3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
 -3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0,
 -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
 -3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3,
 -2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1,
 -3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
 -3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2,
 -1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0,
 -3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1,
 -1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0,
 -2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2,
 -2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0,
 -3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2,
 -1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,
 -2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2,
 -2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,
 -3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2,
 -1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0,
 -2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2,
 -2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0,
 -2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2,
 -1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0,
 -2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2,
 -1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,
 -3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2,
 -1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0,
 -3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1,
 -1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0,
 -2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1,
 -1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0,
 -2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2,
 -1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,
 -2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1,
 -1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0,
 -3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,
 -1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2,
 -1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,
 -2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2,
 -1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,
 -2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2,
 -1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
 -1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1,
 -0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
 -1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2,
 -1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
 -2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1,
 -1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,
 -1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1,
 -0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
 -1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1,
 -0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
 -2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
 -0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
 -2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,
 -1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
 -0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
 -0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
 -1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1,
 -1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,
 -1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,
 -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
 -)
 -
 -Latin5BulgarianModel = {
 -  'charToOrderMap': Latin5_BulgarianCharToOrderMap,
 -  'precedenceMatrix': BulgarianLangModel,
 -  'mTypicalPositiveRatio': 0.969392,
 -  'keepEnglishLetter': False,
 -  'charsetName': "ISO-8859-5"
 -}
 -
 -Win1251BulgarianModel = {
 -  'charToOrderMap': win1251BulgarianCharToOrderMap,
 -  'precedenceMatrix': BulgarianLangModel,
 -  'mTypicalPositiveRatio': 0.969392,
 -  'keepEnglishLetter': False,
 -  'charsetName': "windows-1251"
 -}
 -
 -
 -# flake8: noqa
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +# this table is modified base on win1251BulgarianCharToOrderMap, so +# only number <64 is sure valid + +Latin5_BulgarianCharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30 +253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82,  # 40 +110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253,  # 50 +253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71,  # 60 +116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253,  # 70 +194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,  # 80 +210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,  # 90 + 81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238,  # a0 + 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30,  # b0 + 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56,  # c0 +  1, 18,  9, 20, 11,  3, 23, 15,  2, 26, 12, 10, 14,  6,  4, 13,  # d0 +  7,  8,  5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16,  # e0 + 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253,  # f0 +) + +win1251BulgarianCharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30 +253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82,  # 40 +110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253,  # 50 +253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71,  # 60 +116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253,  # 70 +206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220,  # 80 +221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229,  # 90 + 88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240,  # a0 + 73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250,  # b0 + 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30,  # c0 + 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56,  # d0 +  1, 18,  9, 20, 11,  3, 23, 15,  2, 26, 12, 10, 14,  6,  4, 13,  # e0 +  7,  8,  5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16,  # f0 +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 96.9392% +# first 1024 sequences:3.0618% +# rest  sequences:     0.2992% +# negative sequences:  0.0020% +BulgarianLangModel = ( +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2, +3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1, +0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0, +0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0, +0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0, +0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0, +0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3, +2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1, +3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2, +1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0, +3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1, +1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0, +2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2, +2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0, +3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2, +1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0, +2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2, +2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2, +1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0, +2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2, +2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0, +2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2, +1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0, +2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2, +1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0, +3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2, +1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0, +3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1, +1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0, +2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1, +1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0, +2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2, +1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0, +2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1, +1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2, +1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1, +2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2, +1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2, +1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1, +0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2, +1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1, +1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0, +1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1, +0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1, +0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, +0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1, +1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +) + +Latin5BulgarianModel = { +  'charToOrderMap': Latin5_BulgarianCharToOrderMap, +  'precedenceMatrix': BulgarianLangModel, +  'mTypicalPositiveRatio': 0.969392, +  'keepEnglishLetter': False, +  'charsetName': "ISO-8859-5" +} + +Win1251BulgarianModel = { +  'charToOrderMap': win1251BulgarianCharToOrderMap, +  'precedenceMatrix': BulgarianLangModel, +  'mTypicalPositiveRatio': 0.969392, +  'keepEnglishLetter': False, +  'charsetName': "windows-1251" +} + + +# flake8: noqa diff --git a/requests/packages/charade/langcyrillicmodel.py b/requests/packages/chardet/langcyrillicmodel.py index 15e338f..a86f54b 100644 --- a/requests/packages/charade/langcyrillicmodel.py +++ b/requests/packages/chardet/langcyrillicmodel.py @@ -1,329 +1,329 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is Mozilla Communicator client code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -# KOI8-R language model
 -# Character Mapping Table:
 -KOI8R_CharToOrderMap = (
 -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
 -253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40
 -155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50
 -253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60
 - 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70
 -191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,  # 80
 -207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,  # 90
 -223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237,  # a0
 -238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,  # b0
 - 27,  3, 21, 28, 13,  2, 39, 19, 26,  4, 23, 11,  8, 12,  5,  1,  # c0
 - 15, 16,  9,  7,  6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54,  # d0
 - 59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34,  # e0
 - 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70,  # f0
 -)
 -
 -win1251_CharToOrderMap = (
 -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
 -253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40
 -155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50
 -253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60
 - 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70
 -191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
 -207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
 -223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
 -239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253,
 - 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
 - 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
 -  3, 21, 10, 19, 13,  2, 24, 20,  4, 23, 11,  8, 12,  5,  1, 15,
 -  9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
 -)
 -
 -latin5_CharToOrderMap = (
 -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
 -253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40
 -155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50
 -253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60
 - 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70
 -191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
 -207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
 -223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
 - 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
 - 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
 -  3, 21, 10, 19, 13,  2, 24, 20,  4, 23, 11,  8, 12,  5,  1, 15,
 -  9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
 -239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
 -)
 -
 -macCyrillic_CharToOrderMap = (
 -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
 -253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40
 -155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50
 -253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60
 - 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70
 - 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
 - 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
 -191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
 -207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
 -223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
 -239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16,
 -  3, 21, 10, 19, 13,  2, 24, 20,  4, 23, 11,  8, 12,  5,  1, 15,
 -  9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
 -)
 -
 -IBM855_CharToOrderMap = (
 -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
 -253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40
 -155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50
 -253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60
 - 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70
 -191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205,
 -206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70,
 -  3, 37, 21, 44, 28, 58, 13, 41,  2, 48, 39, 53, 19, 46,218,219,
 -220,221,222,223,224, 26, 55,  4, 42,225,226,227,228, 23, 60,229,
 -230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243,
 -  8, 49, 12, 38,  5, 31,  1, 34, 15,244,245,246,247, 35, 16,248,
 - 43,  9, 45,  7, 32,  6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249,
 -250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
 -)
 -
 -IBM866_CharToOrderMap = (
 -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
 -253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40
 -155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50
 -253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60
 - 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70
 - 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
 - 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
 -  3, 21, 10, 19, 13,  2, 24, 20,  4, 23, 11,  8, 12,  5,  1, 15,
 -191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
 -207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
 -223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
 -  9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
 -239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
 -)
 -
 -# Model Table:
 -# total sequences: 100%
 -# first 512 sequences: 97.6601%
 -# first 1024 sequences: 2.3389%
 -# rest  sequences:      0.1237%
 -# negative sequences:   0.0009%
 -RussianLangModel = (
 -0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3,
 -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2,
 -3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0,
 -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0,
 -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
 -0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
 -0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0,
 -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0,
 -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0,
 -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0,
 -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0,
 -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0,
 -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0,
 -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0,
 -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0,
 -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
 -3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0,
 -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0,
 -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0,
 -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0,
 -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0,
 -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0,
 -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
 -2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0,
 -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0,
 -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0,
 -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0,
 -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0,
 -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
 -3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0,
 -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1,
 -1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
 -2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1,
 -1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0,
 -2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1,
 -1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,
 -3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1,
 -1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,
 -2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2,
 -1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1,
 -1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1,
 -1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
 -2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1,
 -1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,
 -3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2,
 -1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,
 -2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1,
 -1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,
 -2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0,
 -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1,
 -1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,
 -1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1,
 -1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0,
 -3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1,
 -2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,
 -3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,
 -1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,
 -1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1,
 -0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
 -2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1,
 -1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,
 -1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,
 -0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
 -1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1,
 -1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
 -2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2,
 -2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,
 -1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0,
 -1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,
 -2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
 -0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,
 -1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,
 -0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
 -2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1,
 -1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,
 -1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
 -0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,
 -0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1,
 -0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
 -1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,
 -0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
 -1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,
 -0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
 -1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1,
 -0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,
 -2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
 -1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,
 -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
 -1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,
 -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
 -1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,
 -0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
 -)
 -
 -Koi8rModel = {
 -  'charToOrderMap': KOI8R_CharToOrderMap,
 -  'precedenceMatrix': RussianLangModel,
 -  'mTypicalPositiveRatio': 0.976601,
 -  'keepEnglishLetter': False,
 -  'charsetName': "KOI8-R"
 -}
 -
 -Win1251CyrillicModel = {
 -  'charToOrderMap': win1251_CharToOrderMap,
 -  'precedenceMatrix': RussianLangModel,
 -  'mTypicalPositiveRatio': 0.976601,
 -  'keepEnglishLetter': False,
 -  'charsetName': "windows-1251"
 -}
 -
 -Latin5CyrillicModel = {
 -  'charToOrderMap': latin5_CharToOrderMap,
 -  'precedenceMatrix': RussianLangModel,
 -  'mTypicalPositiveRatio': 0.976601,
 -  'keepEnglishLetter': False,
 -  'charsetName': "ISO-8859-5"
 -}
 -
 -MacCyrillicModel = {
 -  'charToOrderMap': macCyrillic_CharToOrderMap,
 -  'precedenceMatrix': RussianLangModel,
 -  'mTypicalPositiveRatio': 0.976601,
 -  'keepEnglishLetter': False,
 -  'charsetName': "MacCyrillic"
 -};
 -
 -Ibm866Model = {
 -  'charToOrderMap': IBM866_CharToOrderMap,
 -  'precedenceMatrix': RussianLangModel,
 -  'mTypicalPositiveRatio': 0.976601,
 -  'keepEnglishLetter': False,
 -  'charsetName': "IBM866"
 -}
 -
 -Ibm855Model = {
 -  'charToOrderMap': IBM855_CharToOrderMap,
 -  'precedenceMatrix': RussianLangModel,
 -  'mTypicalPositiveRatio': 0.976601,
 -  'keepEnglishLetter': False,
 -  'charsetName': "IBM855"
 -}
 -
 -# flake8: noqa
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +# KOI8-R language model +# Character Mapping Table: +KOI8R_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,  # 80 +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,  # 90 +223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237,  # a0 +238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,  # b0 + 27,  3, 21, 28, 13,  2, 39, 19, 26,  4, 23, 11,  8, 12,  5,  1,  # c0 + 15, 16,  9,  7,  6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54,  # d0 + 59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34,  # e0 + 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70,  # f0 +) + +win1251_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, +239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253, + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, +  3, 21, 10, 19, 13,  2, 24, 20,  4, 23, 11,  8, 12,  5,  1, 15, +  9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +) + +latin5_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, +  3, 21, 10, 19, 13,  2, 24, 20,  4, 23, 11,  8, 12,  5,  1, 15, +  9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, +) + +macCyrillic_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70 + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, +239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16, +  3, 21, 10, 19, 13,  2, 24, 20,  4, 23, 11,  8, 12,  5,  1, 15, +  9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255, +) + +IBM855_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70 +191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205, +206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70, +  3, 37, 21, 44, 28, 58, 13, 41,  2, 48, 39, 53, 19, 46,218,219, +220,221,222,223,224, 26, 55,  4, 42,225,226,227,228, 23, 60,229, +230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243, +  8, 49, 12, 38,  5, 31,  1, 34, 15,244,245,246,247, 35, 16,248, + 43,  9, 45,  7, 32,  6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249, +250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255, +) + +IBM866_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70 + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, +  3, 21, 10, 19, 13,  2, 24, 20,  4, 23, 11,  8, 12,  5,  1, 15, +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, +  9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 97.6601% +# first 1024 sequences: 2.3389% +# rest  sequences:      0.1237% +# negative sequences:   0.0009% +RussianLangModel = ( +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2, +3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, +0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, +0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1, +1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1, +1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0, +2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1, +1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0, +3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1, +1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0, +2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2, +1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1, +1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, +2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1, +1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0, +3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2, +1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1, +2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1, +1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0, +2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1, +1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0, +1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1, +1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0, +3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1, +2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1, +3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1, +1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1, +1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1, +0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1, +1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0, +1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1, +0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1, +1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2, +2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1, +1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0, +1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0, +2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0, +1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1, +0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, +2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1, +1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1, +1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0, +0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1, +0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1, +0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1, +0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0, +0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1, +0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, +2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0, +0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +) + +Koi8rModel = { +  'charToOrderMap': KOI8R_CharToOrderMap, +  'precedenceMatrix': RussianLangModel, +  'mTypicalPositiveRatio': 0.976601, +  'keepEnglishLetter': False, +  'charsetName': "KOI8-R" +} + +Win1251CyrillicModel = { +  'charToOrderMap': win1251_CharToOrderMap, +  'precedenceMatrix': RussianLangModel, +  'mTypicalPositiveRatio': 0.976601, +  'keepEnglishLetter': False, +  'charsetName': "windows-1251" +} + +Latin5CyrillicModel = { +  'charToOrderMap': latin5_CharToOrderMap, +  'precedenceMatrix': RussianLangModel, +  'mTypicalPositiveRatio': 0.976601, +  'keepEnglishLetter': False, +  'charsetName': "ISO-8859-5" +} + +MacCyrillicModel = { +  'charToOrderMap': macCyrillic_CharToOrderMap, +  'precedenceMatrix': RussianLangModel, +  'mTypicalPositiveRatio': 0.976601, +  'keepEnglishLetter': False, +  'charsetName': "MacCyrillic" +}; + +Ibm866Model = { +  'charToOrderMap': IBM866_CharToOrderMap, +  'precedenceMatrix': RussianLangModel, +  'mTypicalPositiveRatio': 0.976601, +  'keepEnglishLetter': False, +  'charsetName': "IBM866" +} + +Ibm855Model = { +  'charToOrderMap': IBM855_CharToOrderMap, +  'precedenceMatrix': RussianLangModel, +  'mTypicalPositiveRatio': 0.976601, +  'keepEnglishLetter': False, +  'charsetName': "IBM855" +} + +# flake8: noqa diff --git a/requests/packages/charade/langgreekmodel.py b/requests/packages/chardet/langgreekmodel.py index 93241ce..ddb5837 100644 --- a/requests/packages/charade/langgreekmodel.py +++ b/requests/packages/chardet/langgreekmodel.py @@ -1,225 +1,225 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is Mozilla Communicator client code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -# 255: Control characters that usually does not exist in any text
 -# 254: Carriage/Return
 -# 253: symbol (punctuation) that does not belong to word
 -# 252: 0 - 9
 -
 -# Character Mapping Table:
 -Latin7_CharToOrderMap = (
 -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
 -253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85,  # 40
 - 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253,  # 50
 -253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55,  # 60
 - 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253,  # 70
 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 80
 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 90
 -253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253,  # a0
 -253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123,  # b0
 -110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39,  # c0
 - 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15,  # d0
 -124,  1, 29, 20, 21,  3, 32, 13, 25,  5, 11, 16, 10,  6, 30,  4,  # e0
 -  9,  8, 14,  7,  2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253,  # f0
 -)
 -
 -win1253_CharToOrderMap = (
 -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
 -253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85,  # 40
 - 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253,  # 50
 -253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55,  # 60
 - 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253,  # 70
 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 80
 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 90
 -253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253,  # a0
 -253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123,  # b0
 -110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39,  # c0
 - 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15,  # d0
 -124,  1, 29, 20, 21,  3, 32, 13, 25,  5, 11, 16, 10,  6, 30,  4,  # e0
 -  9,  8, 14,  7,  2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253,  # f0
 -)
 -
 -# Model Table:
 -# total sequences: 100%
 -# first 512 sequences: 98.2851%
 -# first 1024 sequences:1.7001%
 -# rest  sequences:     0.0359%
 -# negative sequences:  0.0148%
 -GreekLangModel = (
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0,
 -3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0,
 -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
 -0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0,
 -2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0,
 -0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0,
 -2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
 -0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0,
 -2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0,
 -0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0,
 -2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0,
 -0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0,
 -3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0,
 -3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0,
 -2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0,
 -2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0,
 -0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0,
 -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0,
 -0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0,
 -0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2,
 -0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,
 -0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2,
 -0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0,
 -0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2,
 -0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2,
 -0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,
 -0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2,
 -0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0,
 -0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
 -0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0,
 -0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
 -0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
 -0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,
 -0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2,
 -0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,
 -0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2,
 -0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0,
 -0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2,
 -0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
 -0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2,
 -0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,
 -0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1,
 -0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
 -0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2,
 -0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
 -0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2,
 -0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2,
 -0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
 -0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,
 -0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,
 -0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
 -0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0,
 -0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0,
 -0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -)
 -
 -Latin7GreekModel = {
 -  'charToOrderMap': Latin7_CharToOrderMap,
 -  'precedenceMatrix': GreekLangModel,
 -  'mTypicalPositiveRatio': 0.982851,
 -  'keepEnglishLetter': False,
 -  'charsetName': "ISO-8859-7"
 -}
 -
 -Win1253GreekModel = {
 -  'charToOrderMap': win1253_CharToOrderMap,
 -  'precedenceMatrix': GreekLangModel,
 -  'mTypicalPositiveRatio': 0.982851,
 -  'keepEnglishLetter': False,
 -  'charsetName': "windows-1253"
 -}
 -
 -# flake8: noqa
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +Latin7_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30 +253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85,  # 40 + 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253,  # 50 +253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55,  # 60 + 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253,  # 70 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 80 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 90 +253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253,  # a0 +253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123,  # b0 +110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39,  # c0 + 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15,  # d0 +124,  1, 29, 20, 21,  3, 32, 13, 25,  5, 11, 16, 10,  6, 30,  4,  # e0 +  9,  8, 14,  7,  2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253,  # f0 +) + +win1253_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30 +253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85,  # 40 + 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253,  # 50 +253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55,  # 60 + 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253,  # 70 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 80 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 90 +253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253,  # a0 +253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123,  # b0 +110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39,  # c0 + 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15,  # d0 +124,  1, 29, 20, 21,  3, 32, 13, 25,  5, 11, 16, 10,  6, 30,  4,  # e0 +  9,  8, 14,  7,  2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253,  # f0 +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 98.2851% +# first 1024 sequences:1.7001% +# rest  sequences:     0.0359% +# negative sequences:  0.0148% +GreekLangModel = ( +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0, +3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0, +2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0, +0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0, +2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0, +2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0, +0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0, +2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0, +0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0, +3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0, +3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0, +2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0, +2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0, +0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0, +0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0, +0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2, +0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0, +0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2, +0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0, +0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2, +0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2, +0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0, +0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2, +0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0, +0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0, +0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, +0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0, +0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2, +0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2, +0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2, +0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2, +0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0, +0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1, +0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2, +0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2, +0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2, +0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0, +0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0, +0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1, +0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0, +0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0, +0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +) + +Latin7GreekModel = { +  'charToOrderMap': Latin7_CharToOrderMap, +  'precedenceMatrix': GreekLangModel, +  'mTypicalPositiveRatio': 0.982851, +  'keepEnglishLetter': False, +  'charsetName': "ISO-8859-7" +} + +Win1253GreekModel = { +  'charToOrderMap': win1253_CharToOrderMap, +  'precedenceMatrix': GreekLangModel, +  'mTypicalPositiveRatio': 0.982851, +  'keepEnglishLetter': False, +  'charsetName': "windows-1253" +} + +# flake8: noqa diff --git a/requests/packages/charade/langhebrewmodel.py b/requests/packages/chardet/langhebrewmodel.py index d871324..75f2bc7 100644 --- a/requests/packages/charade/langhebrewmodel.py +++ b/requests/packages/chardet/langhebrewmodel.py @@ -1,201 +1,201 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is Mozilla Universal charset detector code.
 -#
 -# The Initial Developer of the Original Code is
 -#          Simon Montagu
 -# Portions created by the Initial Developer are Copyright (C) 2005
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#   Shy Shalom - original C code
 -#   Shoshannah Forbes - original C code (?)
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -# 255: Control characters that usually does not exist in any text
 -# 254: Carriage/Return
 -# 253: symbol (punctuation) that does not belong to word
 -# 252: 0 - 9
 -
 -# Windows-1255 language model
 -# Character Mapping Table:
 -win1255_CharToOrderMap = (
 -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
 -253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85,  # 40
 - 78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253,  # 50
 -253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49,  # 60
 - 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253,  # 70
 -124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214,
 -215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221,
 - 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227,
 -106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234,
 - 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237,
 -238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250,
 -  9,  8, 20, 16,  3,  2, 24, 14, 22,  1, 25, 15,  4, 11,  6, 23,
 - 12, 19, 13, 26, 18, 27, 21, 17,  7, 10,  5,251,252,128, 96,253,
 -)
 -
 -# Model Table:
 -# total sequences: 100%
 -# first 512 sequences: 98.4004%
 -# first 1024 sequences: 1.5981%
 -# rest  sequences:      0.087%
 -# negative sequences:   0.0015%
 -HebrewLangModel = (
 -0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,
 -3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,
 -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,
 -1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
 -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,
 -1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3,
 -1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,
 -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2,
 -1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
 -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2,
 -1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
 -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2,
 -0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
 -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,
 -0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,
 -3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2,
 -1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,
 -3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2,
 -0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0,
 -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1,
 -0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
 -3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,
 -0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,
 -0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
 -3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2,
 -0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,
 -3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2,
 -0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,
 -3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2,
 -0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
 -3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2,
 -0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
 -3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2,
 -0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,
 -3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1,
 -0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
 -3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2,
 -0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,
 -3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2,
 -0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2,
 -0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
 -3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2,
 -0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
 -3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,
 -1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2,
 -0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
 -3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0,
 -0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
 -3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3,
 -0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
 -2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,
 -0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0,
 -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
 -0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0,
 -0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
 -0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,
 -2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0,
 -0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1,
 -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1,
 -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,
 -0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
 -3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,
 -0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1,
 -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,
 -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1,
 -1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1,
 -0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,
 -2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1,
 -1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1,
 -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1,
 -2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,
 -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1,
 -1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1,
 -2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1,
 -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
 -0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,
 -1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1,
 -0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
 -)
 -
 -Win1255HebrewModel = {
 -  'charToOrderMap': win1255_CharToOrderMap,
 -  'precedenceMatrix': HebrewLangModel,
 -  'mTypicalPositiveRatio': 0.984004,
 -  'keepEnglishLetter': False,
 -  'charsetName': "windows-1255"
 -}
 -
 -# flake8: noqa
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +#          Simon Montagu +# Portions created by the Initial Developer are Copyright (C) 2005 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +#   Shy Shalom - original C code +#   Shoshannah Forbes - original C code (?) +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Windows-1255 language model +# Character Mapping Table: +win1255_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30 +253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85,  # 40 + 78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253,  # 50 +253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49,  # 60 + 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253,  # 70 +124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214, +215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221, + 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227, +106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234, + 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237, +238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250, +  9,  8, 20, 16,  3,  2, 24, 14, 22,  1, 25, 15,  4, 11,  6, 23, + 12, 19, 13, 26, 18, 27, 21, 17,  7, 10,  5,251,252,128, 96,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 98.4004% +# first 1024 sequences: 1.5981% +# rest  sequences:      0.087% +# negative sequences:   0.0015% +HebrewLangModel = ( +0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0, +3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2, +1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2, +1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3, +1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2, +1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2, +1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2, +0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2, +0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2, +1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2, +0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1, +0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0, +0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2, +0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2, +0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2, +0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2, +0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1, +0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2, +0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2, +0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2, +0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2, +0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0, +1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2, +0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3, +0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0, +0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0, +0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0, +2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0, +0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0, +0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1, +1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1, +0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1, +2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1, +2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1, +1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1, +2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, +0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1, +0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0, +) + +Win1255HebrewModel = { +  'charToOrderMap': win1255_CharToOrderMap, +  'precedenceMatrix': HebrewLangModel, +  'mTypicalPositiveRatio': 0.984004, +  'keepEnglishLetter': False, +  'charsetName': "windows-1255" +} + +# flake8: noqa diff --git a/requests/packages/charade/langhungarianmodel.py b/requests/packages/chardet/langhungarianmodel.py index 6f59c61..49d2f0f 100644 --- a/requests/packages/charade/langhungarianmodel.py +++ b/requests/packages/chardet/langhungarianmodel.py @@ -1,225 +1,225 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is Mozilla Communicator client code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -# 255: Control characters that usually does not exist in any text
 -# 254: Carriage/Return
 -# 253: symbol (punctuation) that does not belong to word
 -# 252: 0 - 9
 -
 -# Character Mapping Table:
 -Latin2_HungarianCharToOrderMap = (
 -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
 -253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
 - 46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
 -253,  2, 18, 26, 17,  1, 27, 12, 20,  9, 22,  7,  6, 13,  4,  8,
 - 23, 67, 10,  5,  3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
 -159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,
 -175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,
 -191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205,
 - 79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
 -221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231,
 -232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241,
 - 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85,
 -245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,
 -)
 -
 -win1250HungarianCharToOrderMap = (
 -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
 -253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
 - 46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
 -253,  2, 18, 26, 17,  1, 27, 12, 20,  9, 22,  7,  6, 13,  4,  8,
 - 23, 67, 10,  5,  3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
 -161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,
 -177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190,
 -191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205,
 - 81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
 -221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231,
 -232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241,
 - 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87,
 -245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,
 -)
 -
 -# Model Table:
 -# total sequences: 100%
 -# first 512 sequences: 94.7368%
 -# first 1024 sequences:5.2623%
 -# rest  sequences:     0.8894%
 -# negative sequences:  0.0009%
 -HungarianLangModel = (
 -0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
 -3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,
 -3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
 -3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
 -3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3,
 -0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2,
 -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
 -3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3,
 -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2,
 -0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1,
 -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
 -3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
 -3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
 -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3,
 -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,
 -3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2,
 -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2,
 -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
 -3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2,
 -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
 -3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2,
 -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2,
 -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
 -3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2,
 -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
 -3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1,
 -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
 -3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2,
 -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
 -3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
 -3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3,
 -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
 -3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1,
 -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
 -1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
 -2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1,
 -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
 -3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0,
 -1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0,
 -1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0,
 -1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1,
 -3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1,
 -2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1,
 -2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1,
 -2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1,
 -2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0,
 -2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
 -3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1,
 -2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1,
 -2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1,
 -2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,
 -1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1,
 -1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1,
 -3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0,
 -1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1,
 -1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1,
 -2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,
 -2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0,
 -2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1,
 -3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1,
 -2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1,
 -1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,
 -1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
 -2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1,
 -2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,
 -1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,
 -1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1,
 -2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0,
 -1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0,
 -1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0,
 -2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1,
 -2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1,
 -2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
 -1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1,
 -1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,
 -1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,
 -0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
 -2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1,
 -2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1,
 -1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1,
 -2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,
 -1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,
 -1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,
 -2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0,
 -2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1,
 -2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0,
 -1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
 -2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0,
 -0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
 -1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,
 -0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
 -1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
 -0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
 -0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
 -2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
 -)
 -
 -Latin2HungarianModel = {
 -  'charToOrderMap': Latin2_HungarianCharToOrderMap,
 -  'precedenceMatrix': HungarianLangModel,
 -  'mTypicalPositiveRatio': 0.947368,
 -  'keepEnglishLetter': True,
 -  'charsetName': "ISO-8859-2"
 -}
 -
 -Win1250HungarianModel = {
 -  'charToOrderMap': win1250HungarianCharToOrderMap,
 -  'precedenceMatrix': HungarianLangModel,
 -  'mTypicalPositiveRatio': 0.947368,
 -  'keepEnglishLetter': True,
 -  'charsetName': "windows-1250"
 -}
 -
 -# flake8: noqa
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +Latin2_HungarianCharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30 +253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, + 46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, +253,  2, 18, 26, 17,  1, 27, 12, 20,  9, 22,  7,  6, 13,  4,  8, + 23, 67, 10,  5,  3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, +159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174, +175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190, +191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205, + 79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, +221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231, +232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241, + 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85, +245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253, +) + +win1250HungarianCharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30 +253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, + 46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, +253,  2, 18, 26, 17,  1, 27, 12, 20,  9, 22,  7,  6, 13,  4,  8, + 23, 67, 10,  5,  3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, +161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176, +177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190, +191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205, + 81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, +221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231, +232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241, + 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87, +245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 94.7368% +# first 1024 sequences:5.2623% +# rest  sequences:     0.8894% +# negative sequences:  0.0009% +HungarianLangModel = ( +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2, +3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0, +3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3, +0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2, +0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0, +1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0, +1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0, +1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1, +3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1, +2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1, +2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1, +2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1, +2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0, +2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, +3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1, +2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1, +2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1, +2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1, +1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1, +1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1, +3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0, +1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1, +1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1, +2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1, +2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0, +2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1, +3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1, +2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1, +1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0, +1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1, +2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1, +1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0, +1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1, +2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0, +1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0, +1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0, +2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1, +2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1, +2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, +1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1, +1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1, +1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0, +0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0, +2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1, +2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1, +1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1, +2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1, +1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0, +1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0, +2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0, +2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1, +2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0, +1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0, +2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0, +0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0, +0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +) + +Latin2HungarianModel = { +  'charToOrderMap': Latin2_HungarianCharToOrderMap, +  'precedenceMatrix': HungarianLangModel, +  'mTypicalPositiveRatio': 0.947368, +  'keepEnglishLetter': True, +  'charsetName': "ISO-8859-2" +} + +Win1250HungarianModel = { +  'charToOrderMap': win1250HungarianCharToOrderMap, +  'precedenceMatrix': HungarianLangModel, +  'mTypicalPositiveRatio': 0.947368, +  'keepEnglishLetter': True, +  'charsetName': "windows-1250" +} + +# flake8: noqa diff --git a/requests/packages/charade/langthaimodel.py b/requests/packages/chardet/langthaimodel.py index df343a7..0508b1b 100644 --- a/requests/packages/charade/langthaimodel.py +++ b/requests/packages/chardet/langthaimodel.py @@ -1,200 +1,200 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is Mozilla Communicator client code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -# 255: Control characters that usually does not exist in any text
 -# 254: Carriage/Return
 -# 253: symbol (punctuation) that does not belong to word
 -# 252: 0 - 9
 -
 -# The following result for thai was collected from a limited sample (1M).
 -
 -# Character Mapping Table:
 -TIS620CharToOrderMap = (
 -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
 -253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111,  # 40
 -188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253,  # 50
 -253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82,  # 60
 - 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253,  # 70
 -209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222,
 -223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235,
 -236,  5, 30,237, 24,238, 75,  8, 26, 52, 34, 51,119, 47, 58, 57,
 - 49, 53, 55, 43, 20, 19, 44, 14, 48,  3, 17, 25, 39, 62, 31, 54,
 - 45,  9, 16,  2, 61, 15,239, 12, 42, 46, 18, 21, 76,  4, 66, 63,
 - 22, 10,  1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244,
 - 11, 28, 41, 29, 33,245, 50, 37,  6,  7, 67, 77, 38, 93,246,247,
 - 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,
 -)
 -
 -# Model Table:
 -# total sequences: 100%
 -# first 512 sequences: 92.6386%
 -# first 1024 sequences:7.3177%
 -# rest  sequences:     1.0230%
 -# negative sequences:  0.0436%
 -ThaiLangModel = (
 -0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,
 -0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,
 -3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3,
 -0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,
 -3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2,
 -3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1,
 -3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2,
 -3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1,
 -3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1,
 -3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,
 -3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1,
 -2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1,
 -3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1,
 -0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,
 -3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1,
 -0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,
 -3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2,
 -1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0,
 -3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3,
 -3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0,
 -1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2,
 -0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
 -2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3,
 -0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0,
 -3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1,
 -2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,
 -3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2,
 -0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2,
 -3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
 -3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0,
 -2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
 -3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1,
 -2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1,
 -3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,
 -3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0,
 -3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1,
 -3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
 -3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1,
 -3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1,
 -1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2,
 -0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
 -3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3,
 -0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,
 -3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0,
 -3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1,
 -1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0,
 -3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1,
 -3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2,
 -0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
 -0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0,
 -0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0,
 -1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1,
 -1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,
 -3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1,
 -0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
 -0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0,
 -0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
 -3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0,
 -3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0,
 -0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1,
 -0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0,
 -0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1,
 -0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,
 -0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0,
 -0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1,
 -0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,
 -3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0,
 -0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0,
 -0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
 -3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1,
 -2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,
 -0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0,
 -3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0,
 -0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
 -2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0,
 -1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3,
 -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,
 -1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
 -1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,
 -2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0,
 -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,
 -1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 -)
 -
 -TIS620ThaiModel = {
 -  'charToOrderMap': TIS620CharToOrderMap,
 -  'precedenceMatrix': ThaiLangModel,
 -  'mTypicalPositiveRatio': 0.926386,
 -  'keepEnglishLetter': False,
 -  'charsetName': "TIS-620"
 -}
 -
 -# flake8: noqa
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# The following result for thai was collected from a limited sample (1M). + +# Character Mapping Table: +TIS620CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30 +253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111,  # 40 +188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253,  # 50 +253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82,  # 60 + 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253,  # 70 +209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222, +223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235, +236,  5, 30,237, 24,238, 75,  8, 26, 52, 34, 51,119, 47, 58, 57, + 49, 53, 55, 43, 20, 19, 44, 14, 48,  3, 17, 25, 39, 62, 31, 54, + 45,  9, 16,  2, 61, 15,239, 12, 42, 46, 18, 21, 76,  4, 66, 63, + 22, 10,  1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244, + 11, 28, 41, 29, 33,245, 50, 37,  6,  7, 67, 77, 38, 93,246,247, + 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 92.6386% +# first 1024 sequences:7.3177% +# rest  sequences:     1.0230% +# negative sequences:  0.0436% +ThaiLangModel = ( +0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3, +0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2, +3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3, +0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1, +3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2, +3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1, +3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2, +3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1, +3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1, +3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1, +2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1, +3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1, +0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1, +0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2, +1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0, +3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3, +3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0, +1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2, +0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3, +0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0, +3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1, +2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0, +3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2, +0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2, +3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0, +2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, +3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1, +2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1, +3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0, +3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1, +3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1, +3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1, +1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2, +0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3, +0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1, +3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0, +3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1, +1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0, +3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1, +3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2, +0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0, +0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0, +1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1, +1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1, +3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1, +0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0, +3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0, +0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1, +0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0, +0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1, +0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1, +0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0, +0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1, +0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0, +3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0, +0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0, +0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, +3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1, +2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1, +0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0, +3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0, +1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +) + +TIS620ThaiModel = { +  'charToOrderMap': TIS620CharToOrderMap, +  'precedenceMatrix': ThaiLangModel, +  'mTypicalPositiveRatio': 0.926386, +  'keepEnglishLetter': False, +  'charsetName': "TIS-620" +} + +# flake8: noqa diff --git a/requests/packages/charade/latin1prober.py b/requests/packages/chardet/latin1prober.py index bebe1bc..ad695f5 100644 --- a/requests/packages/charade/latin1prober.py +++ b/requests/packages/chardet/latin1prober.py @@ -1,139 +1,139 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is Mozilla Universal charset detector code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 2001
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#   Shy Shalom - original C code
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -from .charsetprober import CharSetProber
 -from .constants import eNotMe
 -from .compat import wrap_ord
 -
 -FREQ_CAT_NUM = 4
 -
 -UDF = 0  # undefined
 -OTH = 1  # other
 -ASC = 2  # ascii capital letter
 -ASS = 3  # ascii small letter
 -ACV = 4  # accent capital vowel
 -ACO = 5  # accent capital other
 -ASV = 6  # accent small vowel
 -ASO = 7  # accent small other
 -CLASS_NUM = 8  # total classes
 -
 -Latin1_CharToClass = (
 -    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 00 - 07
 -    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 08 - 0F
 -    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 10 - 17
 -    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 18 - 1F
 -    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 20 - 27
 -    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 28 - 2F
 -    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 30 - 37
 -    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 38 - 3F
 -    OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 40 - 47
 -    ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 48 - 4F
 -    ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 50 - 57
 -    ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH,   # 58 - 5F
 -    OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 60 - 67
 -    ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 68 - 6F
 -    ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 70 - 77
 -    ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH,   # 78 - 7F
 -    OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH,   # 80 - 87
 -    OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF,   # 88 - 8F
 -    UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 90 - 97
 -    OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO,   # 98 - 9F
 -    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A0 - A7
 -    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A8 - AF
 -    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B0 - B7
 -    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B8 - BF
 -    ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO,   # C0 - C7
 -    ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV,   # C8 - CF
 -    ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH,   # D0 - D7
 -    ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO,   # D8 - DF
 -    ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO,   # E0 - E7
 -    ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV,   # E8 - EF
 -    ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH,   # F0 - F7
 -    ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO,   # F8 - FF
 -)
 -
 -# 0 : illegal
 -# 1 : very unlikely
 -# 2 : normal
 -# 3 : very likely
 -Latin1ClassModel = (
 -    # UDF OTH ASC ASS ACV ACO ASV ASO
 -    0,  0,  0,  0,  0,  0,  0,  0,  # UDF
 -    0,  3,  3,  3,  3,  3,  3,  3,  # OTH
 -    0,  3,  3,  3,  3,  3,  3,  3,  # ASC
 -    0,  3,  3,  3,  1,  1,  3,  3,  # ASS
 -    0,  3,  3,  3,  1,  2,  1,  2,  # ACV
 -    0,  3,  3,  3,  3,  3,  3,  3,  # ACO
 -    0,  3,  1,  3,  1,  1,  1,  3,  # ASV
 -    0,  3,  1,  3,  1,  1,  3,  3,  # ASO
 -)
 -
 -
 -class Latin1Prober(CharSetProber):
 -    def __init__(self):
 -        CharSetProber.__init__(self)
 -        self.reset()
 -
 -    def reset(self):
 -        self._mLastCharClass = OTH
 -        self._mFreqCounter = [0] * FREQ_CAT_NUM
 -        CharSetProber.reset(self)
 -
 -    def get_charset_name(self):
 -        return "windows-1252"
 -
 -    def feed(self, aBuf):
 -        aBuf = self.filter_with_english_letters(aBuf)
 -        for c in aBuf:
 -            charClass = Latin1_CharToClass[wrap_ord(c)]
 -            freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM)
 -                                    + charClass]
 -            if freq == 0:
 -                self._mState = eNotMe
 -                break
 -            self._mFreqCounter[freq] += 1
 -            self._mLastCharClass = charClass
 -
 -        return self.get_state()
 -
 -    def get_confidence(self):
 -        if self.get_state() == eNotMe:
 -            return 0.01
 -
 -        total = sum(self._mFreqCounter)
 -        if total < 0.01:
 -            confidence = 0.0
 -        else:
 -            confidence = ((self._mFreqCounter[3] / total)
 -                          - (self._mFreqCounter[1] * 20.0 / total))
 -        if confidence < 0.0:
 -            confidence = 0.0
 -        # lower the confidence of latin1 so that other more accurate
 -        # detector can take priority.
 -        confidence = confidence * 0.5
 -        return confidence
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +#   Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +from .charsetprober import CharSetProber +from .constants import eNotMe +from .compat import wrap_ord + +FREQ_CAT_NUM = 4 + +UDF = 0  # undefined +OTH = 1  # other +ASC = 2  # ascii capital letter +ASS = 3  # ascii small letter +ACV = 4  # accent capital vowel +ACO = 5  # accent capital other +ASV = 6  # accent small vowel +ASO = 7  # accent small other +CLASS_NUM = 8  # total classes + +Latin1_CharToClass = ( +    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 00 - 07 +    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 08 - 0F +    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 10 - 17 +    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 18 - 1F +    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 20 - 27 +    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 28 - 2F +    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 30 - 37 +    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 38 - 3F +    OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 40 - 47 +    ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 48 - 4F +    ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 50 - 57 +    ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH,   # 58 - 5F +    OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 60 - 67 +    ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 68 - 6F +    ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 70 - 77 +    ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH,   # 78 - 7F +    OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH,   # 80 - 87 +    OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF,   # 88 - 8F +    UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 90 - 97 +    OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO,   # 98 - 9F +    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A0 - A7 +    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A8 - AF +    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B0 - B7 +    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B8 - BF +    ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO,   # C0 - C7 +    ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV,   # C8 - CF +    ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH,   # D0 - D7 +    ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO,   # D8 - DF +    ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO,   # E0 - E7 +    ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV,   # E8 - EF +    ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH,   # F0 - F7 +    ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO,   # F8 - FF +) + +# 0 : illegal +# 1 : very unlikely +# 2 : normal +# 3 : very likely +Latin1ClassModel = ( +    # UDF OTH ASC ASS ACV ACO ASV ASO +    0,  0,  0,  0,  0,  0,  0,  0,  # UDF +    0,  3,  3,  3,  3,  3,  3,  3,  # OTH +    0,  3,  3,  3,  3,  3,  3,  3,  # ASC +    0,  3,  3,  3,  1,  1,  3,  3,  # ASS +    0,  3,  3,  3,  1,  2,  1,  2,  # ACV +    0,  3,  3,  3,  3,  3,  3,  3,  # ACO +    0,  3,  1,  3,  1,  1,  1,  3,  # ASV +    0,  3,  1,  3,  1,  1,  3,  3,  # ASO +) + + +class Latin1Prober(CharSetProber): +    def __init__(self): +        CharSetProber.__init__(self) +        self.reset() + +    def reset(self): +        self._mLastCharClass = OTH +        self._mFreqCounter = [0] * FREQ_CAT_NUM +        CharSetProber.reset(self) + +    def get_charset_name(self): +        return "windows-1252" + +    def feed(self, aBuf): +        aBuf = self.filter_with_english_letters(aBuf) +        for c in aBuf: +            charClass = Latin1_CharToClass[wrap_ord(c)] +            freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) +                                    + charClass] +            if freq == 0: +                self._mState = eNotMe +                break +            self._mFreqCounter[freq] += 1 +            self._mLastCharClass = charClass + +        return self.get_state() + +    def get_confidence(self): +        if self.get_state() == eNotMe: +            return 0.01 + +        total = sum(self._mFreqCounter) +        if total < 0.01: +            confidence = 0.0 +        else: +            confidence = ((self._mFreqCounter[3] / total) +                          - (self._mFreqCounter[1] * 20.0 / total)) +        if confidence < 0.0: +            confidence = 0.0 +        # lower the confidence of latin1 so that other more accurate +        # detector can take priority. +        confidence = confidence * 0.5 +        return confidence diff --git a/requests/packages/charade/mbcharsetprober.py b/requests/packages/chardet/mbcharsetprober.py index 1eee253..bb42f2f 100644 --- a/requests/packages/charade/mbcharsetprober.py +++ b/requests/packages/chardet/mbcharsetprober.py @@ -1,86 +1,86 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is Mozilla Universal charset detector code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 2001
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#   Shy Shalom - original C code
 -#   Proofpoint, Inc.
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -import sys
 -from . import constants
 -from .charsetprober import CharSetProber
 -
 -
 -class MultiByteCharSetProber(CharSetProber):
 -    def __init__(self):
 -        CharSetProber.__init__(self)
 -        self._mDistributionAnalyzer = None
 -        self._mCodingSM = None
 -        self._mLastChar = [0, 0]
 -
 -    def reset(self):
 -        CharSetProber.reset(self)
 -        if self._mCodingSM:
 -            self._mCodingSM.reset()
 -        if self._mDistributionAnalyzer:
 -            self._mDistributionAnalyzer.reset()
 -        self._mLastChar = [0, 0]
 -
 -    def get_charset_name(self):
 -        pass
 -
 -    def feed(self, aBuf):
 -        aLen = len(aBuf)
 -        for i in range(0, aLen):
 -            codingState = self._mCodingSM.next_state(aBuf[i])
 -            if codingState == constants.eError:
 -                if constants._debug:
 -                    sys.stderr.write(self.get_charset_name()
 -                                     + ' prober hit error at byte ' + str(i)
 -                                     + '\n')
 -                self._mState = constants.eNotMe
 -                break
 -            elif codingState == constants.eItsMe:
 -                self._mState = constants.eFoundIt
 -                break
 -            elif codingState == constants.eStart:
 -                charLen = self._mCodingSM.get_current_charlen()
 -                if i == 0:
 -                    self._mLastChar[1] = aBuf[0]
 -                    self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
 -                else:
 -                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
 -                                                     charLen)
 -
 -        self._mLastChar[0] = aBuf[aLen - 1]
 -
 -        if self.get_state() == constants.eDetecting:
 -            if (self._mDistributionAnalyzer.got_enough_data() and
 -                    (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
 -                self._mState = constants.eFoundIt
 -
 -        return self.get_state()
 -
 -    def get_confidence(self):
 -        return self._mDistributionAnalyzer.get_confidence()
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +#   Shy Shalom - original C code +#   Proofpoint, Inc. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +import sys +from . import constants +from .charsetprober import CharSetProber + + +class MultiByteCharSetProber(CharSetProber): +    def __init__(self): +        CharSetProber.__init__(self) +        self._mDistributionAnalyzer = None +        self._mCodingSM = None +        self._mLastChar = [0, 0] + +    def reset(self): +        CharSetProber.reset(self) +        if self._mCodingSM: +            self._mCodingSM.reset() +        if self._mDistributionAnalyzer: +            self._mDistributionAnalyzer.reset() +        self._mLastChar = [0, 0] + +    def get_charset_name(self): +        pass + +    def feed(self, aBuf): +        aLen = len(aBuf) +        for i in range(0, aLen): +            codingState = self._mCodingSM.next_state(aBuf[i]) +            if codingState == constants.eError: +                if constants._debug: +                    sys.stderr.write(self.get_charset_name() +                                     + ' prober hit error at byte ' + str(i) +                                     + '\n') +                self._mState = constants.eNotMe +                break +            elif codingState == constants.eItsMe: +                self._mState = constants.eFoundIt +                break +            elif codingState == constants.eStart: +                charLen = self._mCodingSM.get_current_charlen() +                if i == 0: +                    self._mLastChar[1] = aBuf[0] +                    self._mDistributionAnalyzer.feed(self._mLastChar, charLen) +                else: +                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], +                                                     charLen) + +        self._mLastChar[0] = aBuf[aLen - 1] + +        if self.get_state() == constants.eDetecting: +            if (self._mDistributionAnalyzer.got_enough_data() and +                    (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): +                self._mState = constants.eFoundIt + +        return self.get_state() + +    def get_confidence(self): +        return self._mDistributionAnalyzer.get_confidence() diff --git a/requests/packages/charade/mbcsgroupprober.py b/requests/packages/chardet/mbcsgroupprober.py index 2f6f5e8..03c9dcf 100644 --- a/requests/packages/charade/mbcsgroupprober.py +++ b/requests/packages/chardet/mbcsgroupprober.py @@ -1,54 +1,54 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is Mozilla Universal charset detector code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 2001
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#   Shy Shalom - original C code
 -#   Proofpoint, Inc.
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -from .charsetgroupprober import CharSetGroupProber
 -from .utf8prober import UTF8Prober
 -from .sjisprober import SJISProber
 -from .eucjpprober import EUCJPProber
 -from .gb2312prober import GB2312Prober
 -from .euckrprober import EUCKRProber
 -from .cp949prober import CP949Prober
 -from .big5prober import Big5Prober
 -from .euctwprober import EUCTWProber
 -
 -
 -class MBCSGroupProber(CharSetGroupProber):
 -    def __init__(self):
 -        CharSetGroupProber.__init__(self)
 -        self._mProbers = [
 -            UTF8Prober(),
 -            SJISProber(),
 -            EUCJPProber(),
 -            GB2312Prober(),
 -            EUCKRProber(),
 -            CP949Prober(),
 -            Big5Prober(),
 -            EUCTWProber()
 -        ]
 -        self.reset()
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +#   Shy Shalom - original C code +#   Proofpoint, Inc. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +from .charsetgroupprober import CharSetGroupProber +from .utf8prober import UTF8Prober +from .sjisprober import SJISProber +from .eucjpprober import EUCJPProber +from .gb2312prober import GB2312Prober +from .euckrprober import EUCKRProber +from .cp949prober import CP949Prober +from .big5prober import Big5Prober +from .euctwprober import EUCTWProber + + +class MBCSGroupProber(CharSetGroupProber): +    def __init__(self): +        CharSetGroupProber.__init__(self) +        self._mProbers = [ +            UTF8Prober(), +            SJISProber(), +            EUCJPProber(), +            GB2312Prober(), +            EUCKRProber(), +            CP949Prober(), +            Big5Prober(), +            EUCTWProber() +        ] +        self.reset() diff --git a/requests/packages/charade/mbcssm.py b/requests/packages/chardet/mbcssm.py index 55c02f0..3f93cfb 100644 --- a/requests/packages/charade/mbcssm.py +++ b/requests/packages/chardet/mbcssm.py @@ -1,575 +1,575 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is mozilla.org code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -from .constants import eStart, eError, eItsMe
 -
 -# BIG5
 -
 -BIG5_cls = (
 -    1,1,1,1,1,1,1,1,  # 00 - 07    #allow 0x00 as legal value
 -    1,1,1,1,1,1,0,0,  # 08 - 0f
 -    1,1,1,1,1,1,1,1,  # 10 - 17
 -    1,1,1,0,1,1,1,1,  # 18 - 1f
 -    1,1,1,1,1,1,1,1,  # 20 - 27
 -    1,1,1,1,1,1,1,1,  # 28 - 2f
 -    1,1,1,1,1,1,1,1,  # 30 - 37
 -    1,1,1,1,1,1,1,1,  # 38 - 3f
 -    2,2,2,2,2,2,2,2,  # 40 - 47
 -    2,2,2,2,2,2,2,2,  # 48 - 4f
 -    2,2,2,2,2,2,2,2,  # 50 - 57
 -    2,2,2,2,2,2,2,2,  # 58 - 5f
 -    2,2,2,2,2,2,2,2,  # 60 - 67
 -    2,2,2,2,2,2,2,2,  # 68 - 6f
 -    2,2,2,2,2,2,2,2,  # 70 - 77
 -    2,2,2,2,2,2,2,1,  # 78 - 7f
 -    4,4,4,4,4,4,4,4,  # 80 - 87
 -    4,4,4,4,4,4,4,4,  # 88 - 8f
 -    4,4,4,4,4,4,4,4,  # 90 - 97
 -    4,4,4,4,4,4,4,4,  # 98 - 9f
 -    4,3,3,3,3,3,3,3,  # a0 - a7
 -    3,3,3,3,3,3,3,3,  # a8 - af
 -    3,3,3,3,3,3,3,3,  # b0 - b7
 -    3,3,3,3,3,3,3,3,  # b8 - bf
 -    3,3,3,3,3,3,3,3,  # c0 - c7
 -    3,3,3,3,3,3,3,3,  # c8 - cf
 -    3,3,3,3,3,3,3,3,  # d0 - d7
 -    3,3,3,3,3,3,3,3,  # d8 - df
 -    3,3,3,3,3,3,3,3,  # e0 - e7
 -    3,3,3,3,3,3,3,3,  # e8 - ef
 -    3,3,3,3,3,3,3,3,  # f0 - f7
 -    3,3,3,3,3,3,3,0  # f8 - ff
 -)
 -
 -BIG5_st = (
 -    eError,eStart,eStart,     3,eError,eError,eError,eError,#00-07
 -    eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f
 -    eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17
 -)
 -
 -Big5CharLenTable = (0, 1, 1, 2, 0)
 -
 -Big5SMModel = {'classTable': BIG5_cls,
 -               'classFactor': 5,
 -               'stateTable': BIG5_st,
 -               'charLenTable': Big5CharLenTable,
 -               'name': 'Big5'}
 -
 -# CP949
 -
 -CP949_cls  = (
 -    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0,  # 00 - 0f
 -    1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1,  # 10 - 1f
 -    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,  # 20 - 2f
 -    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,  # 30 - 3f
 -    1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4,  # 40 - 4f
 -    4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1,  # 50 - 5f
 -    1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,  # 60 - 6f
 -    5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1,  # 70 - 7f
 -    0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,  # 80 - 8f
 -    6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,  # 90 - 9f
 -    6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8,  # a0 - af
 -    7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,  # b0 - bf
 -    7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2,  # c0 - cf
 -    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,  # d0 - df
 -    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,  # e0 - ef
 -    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0,  # f0 - ff
 -)
 -
 -CP949_st = (
 -#cls=    0      1      2      3      4      5      6      7      8      9  # previous state =
 -    eError,eStart,     3,eError,eStart,eStart,     4,     5,eError,     6, # eStart
 -    eError,eError,eError,eError,eError,eError,eError,eError,eError,eError, # eError
 -    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe, # eItsMe
 -    eError,eError,eStart,eStart,eError,eError,eError,eStart,eStart,eStart, # 3
 -    eError,eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 4
 -    eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 5
 -    eError,eStart,eStart,eStart,eStart,eError,eError,eStart,eStart,eStart, # 6
 -)
 -
 -CP949CharLenTable = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
 -
 -CP949SMModel = {'classTable': CP949_cls,
 -                'classFactor': 10,
 -                'stateTable': CP949_st,
 -                'charLenTable': CP949CharLenTable,
 -                'name': 'CP949'}
 -
 -# EUC-JP
 -
 -EUCJP_cls = (
 -    4,4,4,4,4,4,4,4,  # 00 - 07
 -    4,4,4,4,4,4,5,5,  # 08 - 0f
 -    4,4,4,4,4,4,4,4,  # 10 - 17
 -    4,4,4,5,4,4,4,4,  # 18 - 1f
 -    4,4,4,4,4,4,4,4,  # 20 - 27
 -    4,4,4,4,4,4,4,4,  # 28 - 2f
 -    4,4,4,4,4,4,4,4,  # 30 - 37
 -    4,4,4,4,4,4,4,4,  # 38 - 3f
 -    4,4,4,4,4,4,4,4,  # 40 - 47
 -    4,4,4,4,4,4,4,4,  # 48 - 4f
 -    4,4,4,4,4,4,4,4,  # 50 - 57
 -    4,4,4,4,4,4,4,4,  # 58 - 5f
 -    4,4,4,4,4,4,4,4,  # 60 - 67
 -    4,4,4,4,4,4,4,4,  # 68 - 6f
 -    4,4,4,4,4,4,4,4,  # 70 - 77
 -    4,4,4,4,4,4,4,4,  # 78 - 7f
 -    5,5,5,5,5,5,5,5,  # 80 - 87
 -    5,5,5,5,5,5,1,3,  # 88 - 8f
 -    5,5,5,5,5,5,5,5,  # 90 - 97
 -    5,5,5,5,5,5,5,5,  # 98 - 9f
 -    5,2,2,2,2,2,2,2,  # a0 - a7
 -    2,2,2,2,2,2,2,2,  # a8 - af
 -    2,2,2,2,2,2,2,2,  # b0 - b7
 -    2,2,2,2,2,2,2,2,  # b8 - bf
 -    2,2,2,2,2,2,2,2,  # c0 - c7
 -    2,2,2,2,2,2,2,2,  # c8 - cf
 -    2,2,2,2,2,2,2,2,  # d0 - d7
 -    2,2,2,2,2,2,2,2,  # d8 - df
 -    0,0,0,0,0,0,0,0,  # e0 - e7
 -    0,0,0,0,0,0,0,0,  # e8 - ef
 -    0,0,0,0,0,0,0,0,  # f0 - f7
 -    0,0,0,0,0,0,0,5  # f8 - ff
 -)
 -
 -EUCJP_st = (
 -          3,     4,     3,     5,eStart,eError,eError,eError,#00-07
 -     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
 -     eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17
 -     eError,eError,eStart,eError,eError,eError,     3,eError,#18-1f
 -          3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27
 -)
 -
 -EUCJPCharLenTable = (2, 2, 2, 3, 1, 0)
 -
 -EUCJPSMModel = {'classTable': EUCJP_cls,
 -                'classFactor': 6,
 -                'stateTable': EUCJP_st,
 -                'charLenTable': EUCJPCharLenTable,
 -                'name': 'EUC-JP'}
 -
 -# EUC-KR
 -
 -EUCKR_cls  = (
 -    1,1,1,1,1,1,1,1,  # 00 - 07
 -    1,1,1,1,1,1,0,0,  # 08 - 0f
 -    1,1,1,1,1,1,1,1,  # 10 - 17
 -    1,1,1,0,1,1,1,1,  # 18 - 1f
 -    1,1,1,1,1,1,1,1,  # 20 - 27
 -    1,1,1,1,1,1,1,1,  # 28 - 2f
 -    1,1,1,1,1,1,1,1,  # 30 - 37
 -    1,1,1,1,1,1,1,1,  # 38 - 3f
 -    1,1,1,1,1,1,1,1,  # 40 - 47
 -    1,1,1,1,1,1,1,1,  # 48 - 4f
 -    1,1,1,1,1,1,1,1,  # 50 - 57
 -    1,1,1,1,1,1,1,1,  # 58 - 5f
 -    1,1,1,1,1,1,1,1,  # 60 - 67
 -    1,1,1,1,1,1,1,1,  # 68 - 6f
 -    1,1,1,1,1,1,1,1,  # 70 - 77
 -    1,1,1,1,1,1,1,1,  # 78 - 7f
 -    0,0,0,0,0,0,0,0,  # 80 - 87
 -    0,0,0,0,0,0,0,0,  # 88 - 8f
 -    0,0,0,0,0,0,0,0,  # 90 - 97
 -    0,0,0,0,0,0,0,0,  # 98 - 9f
 -    0,2,2,2,2,2,2,2,  # a0 - a7
 -    2,2,2,2,2,3,3,3,  # a8 - af
 -    2,2,2,2,2,2,2,2,  # b0 - b7
 -    2,2,2,2,2,2,2,2,  # b8 - bf
 -    2,2,2,2,2,2,2,2,  # c0 - c7
 -    2,3,2,2,2,2,2,2,  # c8 - cf
 -    2,2,2,2,2,2,2,2,  # d0 - d7
 -    2,2,2,2,2,2,2,2,  # d8 - df
 -    2,2,2,2,2,2,2,2,  # e0 - e7
 -    2,2,2,2,2,2,2,2,  # e8 - ef
 -    2,2,2,2,2,2,2,2,  # f0 - f7
 -    2,2,2,2,2,2,2,0   # f8 - ff
 -)
 -
 -EUCKR_st = (
 -    eError,eStart,     3,eError,eError,eError,eError,eError,#00-07
 -    eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f
 -)
 -
 -EUCKRCharLenTable = (0, 1, 2, 0)
 -
 -EUCKRSMModel = {'classTable': EUCKR_cls,
 -                'classFactor': 4,
 -                'stateTable': EUCKR_st,
 -                'charLenTable': EUCKRCharLenTable,
 -                'name': 'EUC-KR'}
 -
 -# EUC-TW
 -
 -EUCTW_cls = (
 -    2,2,2,2,2,2,2,2,  # 00 - 07
 -    2,2,2,2,2,2,0,0,  # 08 - 0f
 -    2,2,2,2,2,2,2,2,  # 10 - 17
 -    2,2,2,0,2,2,2,2,  # 18 - 1f
 -    2,2,2,2,2,2,2,2,  # 20 - 27
 -    2,2,2,2,2,2,2,2,  # 28 - 2f
 -    2,2,2,2,2,2,2,2,  # 30 - 37
 -    2,2,2,2,2,2,2,2,  # 38 - 3f
 -    2,2,2,2,2,2,2,2,  # 40 - 47
 -    2,2,2,2,2,2,2,2,  # 48 - 4f
 -    2,2,2,2,2,2,2,2,  # 50 - 57
 -    2,2,2,2,2,2,2,2,  # 58 - 5f
 -    2,2,2,2,2,2,2,2,  # 60 - 67
 -    2,2,2,2,2,2,2,2,  # 68 - 6f
 -    2,2,2,2,2,2,2,2,  # 70 - 77
 -    2,2,2,2,2,2,2,2,  # 78 - 7f
 -    0,0,0,0,0,0,0,0,  # 80 - 87
 -    0,0,0,0,0,0,6,0,  # 88 - 8f
 -    0,0,0,0,0,0,0,0,  # 90 - 97
 -    0,0,0,0,0,0,0,0,  # 98 - 9f
 -    0,3,4,4,4,4,4,4,  # a0 - a7
 -    5,5,1,1,1,1,1,1,  # a8 - af
 -    1,1,1,1,1,1,1,1,  # b0 - b7
 -    1,1,1,1,1,1,1,1,  # b8 - bf
 -    1,1,3,1,3,3,3,3,  # c0 - c7
 -    3,3,3,3,3,3,3,3,  # c8 - cf
 -    3,3,3,3,3,3,3,3,  # d0 - d7
 -    3,3,3,3,3,3,3,3,  # d8 - df
 -    3,3,3,3,3,3,3,3,  # e0 - e7
 -    3,3,3,3,3,3,3,3,  # e8 - ef
 -    3,3,3,3,3,3,3,3,  # f0 - f7
 -    3,3,3,3,3,3,3,0   # f8 - ff
 -)
 -
 -EUCTW_st = (
 -    eError,eError,eStart,     3,     3,     3,     4,eError,#00-07
 -    eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f
 -    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17
 -    eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f
 -         5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27
 -    eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f
 -)
 -
 -EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3)
 -
 -EUCTWSMModel = {'classTable': EUCTW_cls,
 -                'classFactor': 7,
 -                'stateTable': EUCTW_st,
 -                'charLenTable': EUCTWCharLenTable,
 -                'name': 'x-euc-tw'}
 -
 -# GB2312
 -
 -GB2312_cls = (
 -    1,1,1,1,1,1,1,1,  # 00 - 07
 -    1,1,1,1,1,1,0,0,  # 08 - 0f
 -    1,1,1,1,1,1,1,1,  # 10 - 17
 -    1,1,1,0,1,1,1,1,  # 18 - 1f
 -    1,1,1,1,1,1,1,1,  # 20 - 27
 -    1,1,1,1,1,1,1,1,  # 28 - 2f
 -    3,3,3,3,3,3,3,3,  # 30 - 37
 -    3,3,1,1,1,1,1,1,  # 38 - 3f
 -    2,2,2,2,2,2,2,2,  # 40 - 47
 -    2,2,2,2,2,2,2,2,  # 48 - 4f
 -    2,2,2,2,2,2,2,2,  # 50 - 57
 -    2,2,2,2,2,2,2,2,  # 58 - 5f
 -    2,2,2,2,2,2,2,2,  # 60 - 67
 -    2,2,2,2,2,2,2,2,  # 68 - 6f
 -    2,2,2,2,2,2,2,2,  # 70 - 77
 -    2,2,2,2,2,2,2,4,  # 78 - 7f
 -    5,6,6,6,6,6,6,6,  # 80 - 87
 -    6,6,6,6,6,6,6,6,  # 88 - 8f
 -    6,6,6,6,6,6,6,6,  # 90 - 97
 -    6,6,6,6,6,6,6,6,  # 98 - 9f
 -    6,6,6,6,6,6,6,6,  # a0 - a7
 -    6,6,6,6,6,6,6,6,  # a8 - af
 -    6,6,6,6,6,6,6,6,  # b0 - b7
 -    6,6,6,6,6,6,6,6,  # b8 - bf
 -    6,6,6,6,6,6,6,6,  # c0 - c7
 -    6,6,6,6,6,6,6,6,  # c8 - cf
 -    6,6,6,6,6,6,6,6,  # d0 - d7
 -    6,6,6,6,6,6,6,6,  # d8 - df
 -    6,6,6,6,6,6,6,6,  # e0 - e7
 -    6,6,6,6,6,6,6,6,  # e8 - ef
 -    6,6,6,6,6,6,6,6,  # f0 - f7
 -    6,6,6,6,6,6,6,0   # f8 - ff
 -)
 -
 -GB2312_st = (
 -    eError,eStart,eStart,eStart,eStart,eStart,     3,eError,#00-07
 -    eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f
 -    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17
 -         4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f
 -    eError,eError,     5,eError,eError,eError,eItsMe,eError,#20-27
 -    eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f
 -)
 -
 -# To be accurate, the length of class 6 can be either 2 or 4.
 -# But it is not necessary to discriminate between the two since
 -# it is used for frequency analysis only, and we are validing
 -# each code range there as well. So it is safe to set it to be
 -# 2 here.
 -GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2)
 -
 -GB2312SMModel = {'classTable': GB2312_cls,
 -                  'classFactor': 7,
 -                  'stateTable': GB2312_st,
 -                  'charLenTable': GB2312CharLenTable,
 -                  'name': 'GB2312'}
 -
 -# Shift_JIS
 -
 -SJIS_cls = (
 -    1,1,1,1,1,1,1,1,  # 00 - 07
 -    1,1,1,1,1,1,0,0,  # 08 - 0f
 -    1,1,1,1,1,1,1,1,  # 10 - 17
 -    1,1,1,0,1,1,1,1,  # 18 - 1f
 -    1,1,1,1,1,1,1,1,  # 20 - 27
 -    1,1,1,1,1,1,1,1,  # 28 - 2f
 -    1,1,1,1,1,1,1,1,  # 30 - 37
 -    1,1,1,1,1,1,1,1,  # 38 - 3f
 -    2,2,2,2,2,2,2,2,  # 40 - 47
 -    2,2,2,2,2,2,2,2,  # 48 - 4f
 -    2,2,2,2,2,2,2,2,  # 50 - 57
 -    2,2,2,2,2,2,2,2,  # 58 - 5f
 -    2,2,2,2,2,2,2,2,  # 60 - 67
 -    2,2,2,2,2,2,2,2,  # 68 - 6f
 -    2,2,2,2,2,2,2,2,  # 70 - 77
 -    2,2,2,2,2,2,2,1,  # 78 - 7f
 -    3,3,3,3,3,3,3,3,  # 80 - 87
 -    3,3,3,3,3,3,3,3,  # 88 - 8f
 -    3,3,3,3,3,3,3,3,  # 90 - 97
 -    3,3,3,3,3,3,3,3,  # 98 - 9f
 -    #0xa0 is illegal in sjis encoding, but some pages does
 -    #contain such byte. We need to be more error forgiven.
 -    2,2,2,2,2,2,2,2,  # a0 - a7
 -    2,2,2,2,2,2,2,2,  # a8 - af
 -    2,2,2,2,2,2,2,2,  # b0 - b7
 -    2,2,2,2,2,2,2,2,  # b8 - bf
 -    2,2,2,2,2,2,2,2,  # c0 - c7
 -    2,2,2,2,2,2,2,2,  # c8 - cf
 -    2,2,2,2,2,2,2,2,  # d0 - d7
 -    2,2,2,2,2,2,2,2,  # d8 - df
 -    3,3,3,3,3,3,3,3,  # e0 - e7
 -    3,3,3,3,3,4,4,4,  # e8 - ef
 -    4,4,4,4,4,4,4,4,  # f0 - f7
 -    4,4,4,4,4,0,0,0   # f8 - ff
 -)
 -
 -
 -SJIS_st = (
 -    eError,eStart,eStart,     3,eError,eError,eError,eError,#00-07
 -    eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
 -    eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17
 -)
 -
 -SJISCharLenTable = (0, 1, 1, 2, 0, 0)
 -
 -SJISSMModel = {'classTable': SJIS_cls,
 -               'classFactor': 6,
 -               'stateTable': SJIS_st,
 -               'charLenTable': SJISCharLenTable,
 -               'name': 'Shift_JIS'}
 -
 -# UCS2-BE
 -
 -UCS2BE_cls = (
 -    0,0,0,0,0,0,0,0,  # 00 - 07
 -    0,0,1,0,0,2,0,0,  # 08 - 0f
 -    0,0,0,0,0,0,0,0,  # 10 - 17
 -    0,0,0,3,0,0,0,0,  # 18 - 1f
 -    0,0,0,0,0,0,0,0,  # 20 - 27
 -    0,3,3,3,3,3,0,0,  # 28 - 2f
 -    0,0,0,0,0,0,0,0,  # 30 - 37
 -    0,0,0,0,0,0,0,0,  # 38 - 3f
 -    0,0,0,0,0,0,0,0,  # 40 - 47
 -    0,0,0,0,0,0,0,0,  # 48 - 4f
 -    0,0,0,0,0,0,0,0,  # 50 - 57
 -    0,0,0,0,0,0,0,0,  # 58 - 5f
 -    0,0,0,0,0,0,0,0,  # 60 - 67
 -    0,0,0,0,0,0,0,0,  # 68 - 6f
 -    0,0,0,0,0,0,0,0,  # 70 - 77
 -    0,0,0,0,0,0,0,0,  # 78 - 7f
 -    0,0,0,0,0,0,0,0,  # 80 - 87
 -    0,0,0,0,0,0,0,0,  # 88 - 8f
 -    0,0,0,0,0,0,0,0,  # 90 - 97
 -    0,0,0,0,0,0,0,0,  # 98 - 9f
 -    0,0,0,0,0,0,0,0,  # a0 - a7
 -    0,0,0,0,0,0,0,0,  # a8 - af
 -    0,0,0,0,0,0,0,0,  # b0 - b7
 -    0,0,0,0,0,0,0,0,  # b8 - bf
 -    0,0,0,0,0,0,0,0,  # c0 - c7
 -    0,0,0,0,0,0,0,0,  # c8 - cf
 -    0,0,0,0,0,0,0,0,  # d0 - d7
 -    0,0,0,0,0,0,0,0,  # d8 - df
 -    0,0,0,0,0,0,0,0,  # e0 - e7
 -    0,0,0,0,0,0,0,0,  # e8 - ef
 -    0,0,0,0,0,0,0,0,  # f0 - f7
 -    0,0,0,0,0,0,4,5   # f8 - ff
 -)
 -
 -UCS2BE_st  = (
 -          5,     7,     7,eError,     4,     3,eError,eError,#00-07
 -     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
 -     eItsMe,eItsMe,     6,     6,     6,     6,eError,eError,#10-17
 -          6,     6,     6,     6,     6,eItsMe,     6,     6,#18-1f
 -          6,     6,     6,     6,     5,     7,     7,eError,#20-27
 -          5,     8,     6,     6,eError,     6,     6,     6,#28-2f
 -          6,     6,     6,     6,eError,eError,eStart,eStart #30-37
 -)
 -
 -UCS2BECharLenTable = (2, 2, 2, 0, 2, 2)
 -
 -UCS2BESMModel = {'classTable': UCS2BE_cls,
 -                 'classFactor': 6,
 -                 'stateTable': UCS2BE_st,
 -                 'charLenTable': UCS2BECharLenTable,
 -                 'name': 'UTF-16BE'}
 -
 -# UCS2-LE
 -
 -UCS2LE_cls = (
 -    0,0,0,0,0,0,0,0,  # 00 - 07
 -    0,0,1,0,0,2,0,0,  # 08 - 0f
 -    0,0,0,0,0,0,0,0,  # 10 - 17
 -    0,0,0,3,0,0,0,0,  # 18 - 1f
 -    0,0,0,0,0,0,0,0,  # 20 - 27
 -    0,3,3,3,3,3,0,0,  # 28 - 2f
 -    0,0,0,0,0,0,0,0,  # 30 - 37
 -    0,0,0,0,0,0,0,0,  # 38 - 3f
 -    0,0,0,0,0,0,0,0,  # 40 - 47
 -    0,0,0,0,0,0,0,0,  # 48 - 4f
 -    0,0,0,0,0,0,0,0,  # 50 - 57
 -    0,0,0,0,0,0,0,0,  # 58 - 5f
 -    0,0,0,0,0,0,0,0,  # 60 - 67
 -    0,0,0,0,0,0,0,0,  # 68 - 6f
 -    0,0,0,0,0,0,0,0,  # 70 - 77
 -    0,0,0,0,0,0,0,0,  # 78 - 7f
 -    0,0,0,0,0,0,0,0,  # 80 - 87
 -    0,0,0,0,0,0,0,0,  # 88 - 8f
 -    0,0,0,0,0,0,0,0,  # 90 - 97
 -    0,0,0,0,0,0,0,0,  # 98 - 9f
 -    0,0,0,0,0,0,0,0,  # a0 - a7
 -    0,0,0,0,0,0,0,0,  # a8 - af
 -    0,0,0,0,0,0,0,0,  # b0 - b7
 -    0,0,0,0,0,0,0,0,  # b8 - bf
 -    0,0,0,0,0,0,0,0,  # c0 - c7
 -    0,0,0,0,0,0,0,0,  # c8 - cf
 -    0,0,0,0,0,0,0,0,  # d0 - d7
 -    0,0,0,0,0,0,0,0,  # d8 - df
 -    0,0,0,0,0,0,0,0,  # e0 - e7
 -    0,0,0,0,0,0,0,0,  # e8 - ef
 -    0,0,0,0,0,0,0,0,  # f0 - f7
 -    0,0,0,0,0,0,4,5   # f8 - ff
 -)
 -
 -UCS2LE_st = (
 -          6,     6,     7,     6,     4,     3,eError,eError,#00-07
 -     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
 -     eItsMe,eItsMe,     5,     5,     5,eError,eItsMe,eError,#10-17
 -          5,     5,     5,eError,     5,eError,     6,     6,#18-1f
 -          7,     6,     8,     8,     5,     5,     5,eError,#20-27
 -          5,     5,     5,eError,eError,eError,     5,     5,#28-2f
 -          5,     5,     5,eError,     5,eError,eStart,eStart #30-37
 -)
 -
 -UCS2LECharLenTable = (2, 2, 2, 2, 2, 2)
 -
 -UCS2LESMModel = {'classTable': UCS2LE_cls,
 -                 'classFactor': 6,
 -                 'stateTable': UCS2LE_st,
 -                 'charLenTable': UCS2LECharLenTable,
 -                 'name': 'UTF-16LE'}
 -
 -# UTF-8
 -
 -UTF8_cls = (
 -    1,1,1,1,1,1,1,1,  # 00 - 07  #allow 0x00 as a legal value
 -    1,1,1,1,1,1,0,0,  # 08 - 0f
 -    1,1,1,1,1,1,1,1,  # 10 - 17
 -    1,1,1,0,1,1,1,1,  # 18 - 1f
 -    1,1,1,1,1,1,1,1,  # 20 - 27
 -    1,1,1,1,1,1,1,1,  # 28 - 2f
 -    1,1,1,1,1,1,1,1,  # 30 - 37
 -    1,1,1,1,1,1,1,1,  # 38 - 3f
 -    1,1,1,1,1,1,1,1,  # 40 - 47
 -    1,1,1,1,1,1,1,1,  # 48 - 4f
 -    1,1,1,1,1,1,1,1,  # 50 - 57
 -    1,1,1,1,1,1,1,1,  # 58 - 5f
 -    1,1,1,1,1,1,1,1,  # 60 - 67
 -    1,1,1,1,1,1,1,1,  # 68 - 6f
 -    1,1,1,1,1,1,1,1,  # 70 - 77
 -    1,1,1,1,1,1,1,1,  # 78 - 7f
 -    2,2,2,2,3,3,3,3,  # 80 - 87
 -    4,4,4,4,4,4,4,4,  # 88 - 8f
 -    4,4,4,4,4,4,4,4,  # 90 - 97
 -    4,4,4,4,4,4,4,4,  # 98 - 9f
 -    5,5,5,5,5,5,5,5,  # a0 - a7
 -    5,5,5,5,5,5,5,5,  # a8 - af
 -    5,5,5,5,5,5,5,5,  # b0 - b7
 -    5,5,5,5,5,5,5,5,  # b8 - bf
 -    0,0,6,6,6,6,6,6,  # c0 - c7
 -    6,6,6,6,6,6,6,6,  # c8 - cf
 -    6,6,6,6,6,6,6,6,  # d0 - d7
 -    6,6,6,6,6,6,6,6,  # d8 - df
 -    7,8,8,8,8,8,8,8,  # e0 - e7
 -    8,8,8,8,8,9,8,8,  # e8 - ef
 -    10,11,11,11,11,11,11,11,  # f0 - f7
 -    12,13,13,13,14,15,0,0    # f8 - ff
 -)
 -
 -UTF8_st = (
 -    eError,eStart,eError,eError,eError,eError,     12,   10,#00-07
 -         9,     11,     8,     7,     6,     5,     4,    3,#08-0f
 -    eError,eError,eError,eError,eError,eError,eError,eError,#10-17
 -    eError,eError,eError,eError,eError,eError,eError,eError,#18-1f
 -    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27
 -    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f
 -    eError,eError,     5,     5,     5,     5,eError,eError,#30-37
 -    eError,eError,eError,eError,eError,eError,eError,eError,#38-3f
 -    eError,eError,eError,     5,     5,     5,eError,eError,#40-47
 -    eError,eError,eError,eError,eError,eError,eError,eError,#48-4f
 -    eError,eError,     7,     7,     7,     7,eError,eError,#50-57
 -    eError,eError,eError,eError,eError,eError,eError,eError,#58-5f
 -    eError,eError,eError,eError,     7,     7,eError,eError,#60-67
 -    eError,eError,eError,eError,eError,eError,eError,eError,#68-6f
 -    eError,eError,     9,     9,     9,     9,eError,eError,#70-77
 -    eError,eError,eError,eError,eError,eError,eError,eError,#78-7f
 -    eError,eError,eError,eError,eError,     9,eError,eError,#80-87
 -    eError,eError,eError,eError,eError,eError,eError,eError,#88-8f
 -    eError,eError,    12,    12,    12,    12,eError,eError,#90-97
 -    eError,eError,eError,eError,eError,eError,eError,eError,#98-9f
 -    eError,eError,eError,eError,eError,    12,eError,eError,#a0-a7
 -    eError,eError,eError,eError,eError,eError,eError,eError,#a8-af
 -    eError,eError,    12,    12,    12,eError,eError,eError,#b0-b7
 -    eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf
 -    eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7
 -    eError,eError,eError,eError,eError,eError,eError,eError #c8-cf
 -)
 -
 -UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
 -
 -UTF8SMModel = {'classTable': UTF8_cls,
 -               'classFactor': 16,
 -               'stateTable': UTF8_st,
 -               'charLenTable': UTF8CharLenTable,
 -               'name': 'UTF-8'}
 -
 -# flake8: noqa
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +from .constants import eStart, eError, eItsMe + +# BIG5 + +BIG5_cls = ( +    1,1,1,1,1,1,1,1,  # 00 - 07    #allow 0x00 as legal value +    1,1,1,1,1,1,0,0,  # 08 - 0f +    1,1,1,1,1,1,1,1,  # 10 - 17 +    1,1,1,0,1,1,1,1,  # 18 - 1f +    1,1,1,1,1,1,1,1,  # 20 - 27 +    1,1,1,1,1,1,1,1,  # 28 - 2f +    1,1,1,1,1,1,1,1,  # 30 - 37 +    1,1,1,1,1,1,1,1,  # 38 - 3f +    2,2,2,2,2,2,2,2,  # 40 - 47 +    2,2,2,2,2,2,2,2,  # 48 - 4f +    2,2,2,2,2,2,2,2,  # 50 - 57 +    2,2,2,2,2,2,2,2,  # 58 - 5f +    2,2,2,2,2,2,2,2,  # 60 - 67 +    2,2,2,2,2,2,2,2,  # 68 - 6f +    2,2,2,2,2,2,2,2,  # 70 - 77 +    2,2,2,2,2,2,2,1,  # 78 - 7f +    4,4,4,4,4,4,4,4,  # 80 - 87 +    4,4,4,4,4,4,4,4,  # 88 - 8f +    4,4,4,4,4,4,4,4,  # 90 - 97 +    4,4,4,4,4,4,4,4,  # 98 - 9f +    4,3,3,3,3,3,3,3,  # a0 - a7 +    3,3,3,3,3,3,3,3,  # a8 - af +    3,3,3,3,3,3,3,3,  # b0 - b7 +    3,3,3,3,3,3,3,3,  # b8 - bf +    3,3,3,3,3,3,3,3,  # c0 - c7 +    3,3,3,3,3,3,3,3,  # c8 - cf +    3,3,3,3,3,3,3,3,  # d0 - d7 +    3,3,3,3,3,3,3,3,  # d8 - df +    3,3,3,3,3,3,3,3,  # e0 - e7 +    3,3,3,3,3,3,3,3,  # e8 - ef +    3,3,3,3,3,3,3,3,  # f0 - f7 +    3,3,3,3,3,3,3,0  # f8 - ff +) + +BIG5_st = ( +    eError,eStart,eStart,     3,eError,eError,eError,eError,#00-07 +    eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f +    eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17 +) + +Big5CharLenTable = (0, 1, 1, 2, 0) + +Big5SMModel = {'classTable': BIG5_cls, +               'classFactor': 5, +               'stateTable': BIG5_st, +               'charLenTable': Big5CharLenTable, +               'name': 'Big5'} + +# CP949 + +CP949_cls  = ( +    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0,  # 00 - 0f +    1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1,  # 10 - 1f +    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,  # 20 - 2f +    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,  # 30 - 3f +    1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4,  # 40 - 4f +    4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1,  # 50 - 5f +    1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,  # 60 - 6f +    5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1,  # 70 - 7f +    0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,  # 80 - 8f +    6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,  # 90 - 9f +    6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8,  # a0 - af +    7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,  # b0 - bf +    7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2,  # c0 - cf +    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,  # d0 - df +    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,  # e0 - ef +    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0,  # f0 - ff +) + +CP949_st = ( +#cls=    0      1      2      3      4      5      6      7      8      9  # previous state = +    eError,eStart,     3,eError,eStart,eStart,     4,     5,eError,     6, # eStart +    eError,eError,eError,eError,eError,eError,eError,eError,eError,eError, # eError +    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe, # eItsMe +    eError,eError,eStart,eStart,eError,eError,eError,eStart,eStart,eStart, # 3 +    eError,eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 4 +    eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 5 +    eError,eStart,eStart,eStart,eStart,eError,eError,eStart,eStart,eStart, # 6 +) + +CP949CharLenTable = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2) + +CP949SMModel = {'classTable': CP949_cls, +                'classFactor': 10, +                'stateTable': CP949_st, +                'charLenTable': CP949CharLenTable, +                'name': 'CP949'} + +# EUC-JP + +EUCJP_cls = ( +    4,4,4,4,4,4,4,4,  # 00 - 07 +    4,4,4,4,4,4,5,5,  # 08 - 0f +    4,4,4,4,4,4,4,4,  # 10 - 17 +    4,4,4,5,4,4,4,4,  # 18 - 1f +    4,4,4,4,4,4,4,4,  # 20 - 27 +    4,4,4,4,4,4,4,4,  # 28 - 2f +    4,4,4,4,4,4,4,4,  # 30 - 37 +    4,4,4,4,4,4,4,4,  # 38 - 3f +    4,4,4,4,4,4,4,4,  # 40 - 47 +    4,4,4,4,4,4,4,4,  # 48 - 4f +    4,4,4,4,4,4,4,4,  # 50 - 57 +    4,4,4,4,4,4,4,4,  # 58 - 5f +    4,4,4,4,4,4,4,4,  # 60 - 67 +    4,4,4,4,4,4,4,4,  # 68 - 6f +    4,4,4,4,4,4,4,4,  # 70 - 77 +    4,4,4,4,4,4,4,4,  # 78 - 7f +    5,5,5,5,5,5,5,5,  # 80 - 87 +    5,5,5,5,5,5,1,3,  # 88 - 8f +    5,5,5,5,5,5,5,5,  # 90 - 97 +    5,5,5,5,5,5,5,5,  # 98 - 9f +    5,2,2,2,2,2,2,2,  # a0 - a7 +    2,2,2,2,2,2,2,2,  # a8 - af +    2,2,2,2,2,2,2,2,  # b0 - b7 +    2,2,2,2,2,2,2,2,  # b8 - bf +    2,2,2,2,2,2,2,2,  # c0 - c7 +    2,2,2,2,2,2,2,2,  # c8 - cf +    2,2,2,2,2,2,2,2,  # d0 - d7 +    2,2,2,2,2,2,2,2,  # d8 - df +    0,0,0,0,0,0,0,0,  # e0 - e7 +    0,0,0,0,0,0,0,0,  # e8 - ef +    0,0,0,0,0,0,0,0,  # f0 - f7 +    0,0,0,0,0,0,0,5  # f8 - ff +) + +EUCJP_st = ( +          3,     4,     3,     5,eStart,eError,eError,eError,#00-07 +     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f +     eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17 +     eError,eError,eStart,eError,eError,eError,     3,eError,#18-1f +          3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27 +) + +EUCJPCharLenTable = (2, 2, 2, 3, 1, 0) + +EUCJPSMModel = {'classTable': EUCJP_cls, +                'classFactor': 6, +                'stateTable': EUCJP_st, +                'charLenTable': EUCJPCharLenTable, +                'name': 'EUC-JP'} + +# EUC-KR + +EUCKR_cls  = ( +    1,1,1,1,1,1,1,1,  # 00 - 07 +    1,1,1,1,1,1,0,0,  # 08 - 0f +    1,1,1,1,1,1,1,1,  # 10 - 17 +    1,1,1,0,1,1,1,1,  # 18 - 1f +    1,1,1,1,1,1,1,1,  # 20 - 27 +    1,1,1,1,1,1,1,1,  # 28 - 2f +    1,1,1,1,1,1,1,1,  # 30 - 37 +    1,1,1,1,1,1,1,1,  # 38 - 3f +    1,1,1,1,1,1,1,1,  # 40 - 47 +    1,1,1,1,1,1,1,1,  # 48 - 4f +    1,1,1,1,1,1,1,1,  # 50 - 57 +    1,1,1,1,1,1,1,1,  # 58 - 5f +    1,1,1,1,1,1,1,1,  # 60 - 67 +    1,1,1,1,1,1,1,1,  # 68 - 6f +    1,1,1,1,1,1,1,1,  # 70 - 77 +    1,1,1,1,1,1,1,1,  # 78 - 7f +    0,0,0,0,0,0,0,0,  # 80 - 87 +    0,0,0,0,0,0,0,0,  # 88 - 8f +    0,0,0,0,0,0,0,0,  # 90 - 97 +    0,0,0,0,0,0,0,0,  # 98 - 9f +    0,2,2,2,2,2,2,2,  # a0 - a7 +    2,2,2,2,2,3,3,3,  # a8 - af +    2,2,2,2,2,2,2,2,  # b0 - b7 +    2,2,2,2,2,2,2,2,  # b8 - bf +    2,2,2,2,2,2,2,2,  # c0 - c7 +    2,3,2,2,2,2,2,2,  # c8 - cf +    2,2,2,2,2,2,2,2,  # d0 - d7 +    2,2,2,2,2,2,2,2,  # d8 - df +    2,2,2,2,2,2,2,2,  # e0 - e7 +    2,2,2,2,2,2,2,2,  # e8 - ef +    2,2,2,2,2,2,2,2,  # f0 - f7 +    2,2,2,2,2,2,2,0   # f8 - ff +) + +EUCKR_st = ( +    eError,eStart,     3,eError,eError,eError,eError,eError,#00-07 +    eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f +) + +EUCKRCharLenTable = (0, 1, 2, 0) + +EUCKRSMModel = {'classTable': EUCKR_cls, +                'classFactor': 4, +                'stateTable': EUCKR_st, +                'charLenTable': EUCKRCharLenTable, +                'name': 'EUC-KR'} + +# EUC-TW + +EUCTW_cls = ( +    2,2,2,2,2,2,2,2,  # 00 - 07 +    2,2,2,2,2,2,0,0,  # 08 - 0f +    2,2,2,2,2,2,2,2,  # 10 - 17 +    2,2,2,0,2,2,2,2,  # 18 - 1f +    2,2,2,2,2,2,2,2,  # 20 - 27 +    2,2,2,2,2,2,2,2,  # 28 - 2f +    2,2,2,2,2,2,2,2,  # 30 - 37 +    2,2,2,2,2,2,2,2,  # 38 - 3f +    2,2,2,2,2,2,2,2,  # 40 - 47 +    2,2,2,2,2,2,2,2,  # 48 - 4f +    2,2,2,2,2,2,2,2,  # 50 - 57 +    2,2,2,2,2,2,2,2,  # 58 - 5f +    2,2,2,2,2,2,2,2,  # 60 - 67 +    2,2,2,2,2,2,2,2,  # 68 - 6f +    2,2,2,2,2,2,2,2,  # 70 - 77 +    2,2,2,2,2,2,2,2,  # 78 - 7f +    0,0,0,0,0,0,0,0,  # 80 - 87 +    0,0,0,0,0,0,6,0,  # 88 - 8f +    0,0,0,0,0,0,0,0,  # 90 - 97 +    0,0,0,0,0,0,0,0,  # 98 - 9f +    0,3,4,4,4,4,4,4,  # a0 - a7 +    5,5,1,1,1,1,1,1,  # a8 - af +    1,1,1,1,1,1,1,1,  # b0 - b7 +    1,1,1,1,1,1,1,1,  # b8 - bf +    1,1,3,1,3,3,3,3,  # c0 - c7 +    3,3,3,3,3,3,3,3,  # c8 - cf +    3,3,3,3,3,3,3,3,  # d0 - d7 +    3,3,3,3,3,3,3,3,  # d8 - df +    3,3,3,3,3,3,3,3,  # e0 - e7 +    3,3,3,3,3,3,3,3,  # e8 - ef +    3,3,3,3,3,3,3,3,  # f0 - f7 +    3,3,3,3,3,3,3,0   # f8 - ff +) + +EUCTW_st = ( +    eError,eError,eStart,     3,     3,     3,     4,eError,#00-07 +    eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f +    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17 +    eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f +         5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27 +    eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f +) + +EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3) + +EUCTWSMModel = {'classTable': EUCTW_cls, +                'classFactor': 7, +                'stateTable': EUCTW_st, +                'charLenTable': EUCTWCharLenTable, +                'name': 'x-euc-tw'} + +# GB2312 + +GB2312_cls = ( +    1,1,1,1,1,1,1,1,  # 00 - 07 +    1,1,1,1,1,1,0,0,  # 08 - 0f +    1,1,1,1,1,1,1,1,  # 10 - 17 +    1,1,1,0,1,1,1,1,  # 18 - 1f +    1,1,1,1,1,1,1,1,  # 20 - 27 +    1,1,1,1,1,1,1,1,  # 28 - 2f +    3,3,3,3,3,3,3,3,  # 30 - 37 +    3,3,1,1,1,1,1,1,  # 38 - 3f +    2,2,2,2,2,2,2,2,  # 40 - 47 +    2,2,2,2,2,2,2,2,  # 48 - 4f +    2,2,2,2,2,2,2,2,  # 50 - 57 +    2,2,2,2,2,2,2,2,  # 58 - 5f +    2,2,2,2,2,2,2,2,  # 60 - 67 +    2,2,2,2,2,2,2,2,  # 68 - 6f +    2,2,2,2,2,2,2,2,  # 70 - 77 +    2,2,2,2,2,2,2,4,  # 78 - 7f +    5,6,6,6,6,6,6,6,  # 80 - 87 +    6,6,6,6,6,6,6,6,  # 88 - 8f +    6,6,6,6,6,6,6,6,  # 90 - 97 +    6,6,6,6,6,6,6,6,  # 98 - 9f +    6,6,6,6,6,6,6,6,  # a0 - a7 +    6,6,6,6,6,6,6,6,  # a8 - af +    6,6,6,6,6,6,6,6,  # b0 - b7 +    6,6,6,6,6,6,6,6,  # b8 - bf +    6,6,6,6,6,6,6,6,  # c0 - c7 +    6,6,6,6,6,6,6,6,  # c8 - cf +    6,6,6,6,6,6,6,6,  # d0 - d7 +    6,6,6,6,6,6,6,6,  # d8 - df +    6,6,6,6,6,6,6,6,  # e0 - e7 +    6,6,6,6,6,6,6,6,  # e8 - ef +    6,6,6,6,6,6,6,6,  # f0 - f7 +    6,6,6,6,6,6,6,0   # f8 - ff +) + +GB2312_st = ( +    eError,eStart,eStart,eStart,eStart,eStart,     3,eError,#00-07 +    eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f +    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17 +         4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f +    eError,eError,     5,eError,eError,eError,eItsMe,eError,#20-27 +    eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f +) + +# To be accurate, the length of class 6 can be either 2 or 4. +# But it is not necessary to discriminate between the two since +# it is used for frequency analysis only, and we are validing +# each code range there as well. So it is safe to set it to be +# 2 here. +GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2) + +GB2312SMModel = {'classTable': GB2312_cls, +                  'classFactor': 7, +                  'stateTable': GB2312_st, +                  'charLenTable': GB2312CharLenTable, +                  'name': 'GB2312'} + +# Shift_JIS + +SJIS_cls = ( +    1,1,1,1,1,1,1,1,  # 00 - 07 +    1,1,1,1,1,1,0,0,  # 08 - 0f +    1,1,1,1,1,1,1,1,  # 10 - 17 +    1,1,1,0,1,1,1,1,  # 18 - 1f +    1,1,1,1,1,1,1,1,  # 20 - 27 +    1,1,1,1,1,1,1,1,  # 28 - 2f +    1,1,1,1,1,1,1,1,  # 30 - 37 +    1,1,1,1,1,1,1,1,  # 38 - 3f +    2,2,2,2,2,2,2,2,  # 40 - 47 +    2,2,2,2,2,2,2,2,  # 48 - 4f +    2,2,2,2,2,2,2,2,  # 50 - 57 +    2,2,2,2,2,2,2,2,  # 58 - 5f +    2,2,2,2,2,2,2,2,  # 60 - 67 +    2,2,2,2,2,2,2,2,  # 68 - 6f +    2,2,2,2,2,2,2,2,  # 70 - 77 +    2,2,2,2,2,2,2,1,  # 78 - 7f +    3,3,3,3,3,3,3,3,  # 80 - 87 +    3,3,3,3,3,3,3,3,  # 88 - 8f +    3,3,3,3,3,3,3,3,  # 90 - 97 +    3,3,3,3,3,3,3,3,  # 98 - 9f +    #0xa0 is illegal in sjis encoding, but some pages does +    #contain such byte. We need to be more error forgiven. +    2,2,2,2,2,2,2,2,  # a0 - a7 +    2,2,2,2,2,2,2,2,  # a8 - af +    2,2,2,2,2,2,2,2,  # b0 - b7 +    2,2,2,2,2,2,2,2,  # b8 - bf +    2,2,2,2,2,2,2,2,  # c0 - c7 +    2,2,2,2,2,2,2,2,  # c8 - cf +    2,2,2,2,2,2,2,2,  # d0 - d7 +    2,2,2,2,2,2,2,2,  # d8 - df +    3,3,3,3,3,3,3,3,  # e0 - e7 +    3,3,3,3,3,4,4,4,  # e8 - ef +    4,4,4,4,4,4,4,4,  # f0 - f7 +    4,4,4,4,4,0,0,0   # f8 - ff +) + + +SJIS_st = ( +    eError,eStart,eStart,     3,eError,eError,eError,eError,#00-07 +    eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f +    eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17 +) + +SJISCharLenTable = (0, 1, 1, 2, 0, 0) + +SJISSMModel = {'classTable': SJIS_cls, +               'classFactor': 6, +               'stateTable': SJIS_st, +               'charLenTable': SJISCharLenTable, +               'name': 'Shift_JIS'} + +# UCS2-BE + +UCS2BE_cls = ( +    0,0,0,0,0,0,0,0,  # 00 - 07 +    0,0,1,0,0,2,0,0,  # 08 - 0f +    0,0,0,0,0,0,0,0,  # 10 - 17 +    0,0,0,3,0,0,0,0,  # 18 - 1f +    0,0,0,0,0,0,0,0,  # 20 - 27 +    0,3,3,3,3,3,0,0,  # 28 - 2f +    0,0,0,0,0,0,0,0,  # 30 - 37 +    0,0,0,0,0,0,0,0,  # 38 - 3f +    0,0,0,0,0,0,0,0,  # 40 - 47 +    0,0,0,0,0,0,0,0,  # 48 - 4f +    0,0,0,0,0,0,0,0,  # 50 - 57 +    0,0,0,0,0,0,0,0,  # 58 - 5f +    0,0,0,0,0,0,0,0,  # 60 - 67 +    0,0,0,0,0,0,0,0,  # 68 - 6f +    0,0,0,0,0,0,0,0,  # 70 - 77 +    0,0,0,0,0,0,0,0,  # 78 - 7f +    0,0,0,0,0,0,0,0,  # 80 - 87 +    0,0,0,0,0,0,0,0,  # 88 - 8f +    0,0,0,0,0,0,0,0,  # 90 - 97 +    0,0,0,0,0,0,0,0,  # 98 - 9f +    0,0,0,0,0,0,0,0,  # a0 - a7 +    0,0,0,0,0,0,0,0,  # a8 - af +    0,0,0,0,0,0,0,0,  # b0 - b7 +    0,0,0,0,0,0,0,0,  # b8 - bf +    0,0,0,0,0,0,0,0,  # c0 - c7 +    0,0,0,0,0,0,0,0,  # c8 - cf +    0,0,0,0,0,0,0,0,  # d0 - d7 +    0,0,0,0,0,0,0,0,  # d8 - df +    0,0,0,0,0,0,0,0,  # e0 - e7 +    0,0,0,0,0,0,0,0,  # e8 - ef +    0,0,0,0,0,0,0,0,  # f0 - f7 +    0,0,0,0,0,0,4,5   # f8 - ff +) + +UCS2BE_st  = ( +          5,     7,     7,eError,     4,     3,eError,eError,#00-07 +     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f +     eItsMe,eItsMe,     6,     6,     6,     6,eError,eError,#10-17 +          6,     6,     6,     6,     6,eItsMe,     6,     6,#18-1f +          6,     6,     6,     6,     5,     7,     7,eError,#20-27 +          5,     8,     6,     6,eError,     6,     6,     6,#28-2f +          6,     6,     6,     6,eError,eError,eStart,eStart #30-37 +) + +UCS2BECharLenTable = (2, 2, 2, 0, 2, 2) + +UCS2BESMModel = {'classTable': UCS2BE_cls, +                 'classFactor': 6, +                 'stateTable': UCS2BE_st, +                 'charLenTable': UCS2BECharLenTable, +                 'name': 'UTF-16BE'} + +# UCS2-LE + +UCS2LE_cls = ( +    0,0,0,0,0,0,0,0,  # 00 - 07 +    0,0,1,0,0,2,0,0,  # 08 - 0f +    0,0,0,0,0,0,0,0,  # 10 - 17 +    0,0,0,3,0,0,0,0,  # 18 - 1f +    0,0,0,0,0,0,0,0,  # 20 - 27 +    0,3,3,3,3,3,0,0,  # 28 - 2f +    0,0,0,0,0,0,0,0,  # 30 - 37 +    0,0,0,0,0,0,0,0,  # 38 - 3f +    0,0,0,0,0,0,0,0,  # 40 - 47 +    0,0,0,0,0,0,0,0,  # 48 - 4f +    0,0,0,0,0,0,0,0,  # 50 - 57 +    0,0,0,0,0,0,0,0,  # 58 - 5f +    0,0,0,0,0,0,0,0,  # 60 - 67 +    0,0,0,0,0,0,0,0,  # 68 - 6f +    0,0,0,0,0,0,0,0,  # 70 - 77 +    0,0,0,0,0,0,0,0,  # 78 - 7f +    0,0,0,0,0,0,0,0,  # 80 - 87 +    0,0,0,0,0,0,0,0,  # 88 - 8f +    0,0,0,0,0,0,0,0,  # 90 - 97 +    0,0,0,0,0,0,0,0,  # 98 - 9f +    0,0,0,0,0,0,0,0,  # a0 - a7 +    0,0,0,0,0,0,0,0,  # a8 - af +    0,0,0,0,0,0,0,0,  # b0 - b7 +    0,0,0,0,0,0,0,0,  # b8 - bf +    0,0,0,0,0,0,0,0,  # c0 - c7 +    0,0,0,0,0,0,0,0,  # c8 - cf +    0,0,0,0,0,0,0,0,  # d0 - d7 +    0,0,0,0,0,0,0,0,  # d8 - df +    0,0,0,0,0,0,0,0,  # e0 - e7 +    0,0,0,0,0,0,0,0,  # e8 - ef +    0,0,0,0,0,0,0,0,  # f0 - f7 +    0,0,0,0,0,0,4,5   # f8 - ff +) + +UCS2LE_st = ( +          6,     6,     7,     6,     4,     3,eError,eError,#00-07 +     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f +     eItsMe,eItsMe,     5,     5,     5,eError,eItsMe,eError,#10-17 +          5,     5,     5,eError,     5,eError,     6,     6,#18-1f +          7,     6,     8,     8,     5,     5,     5,eError,#20-27 +          5,     5,     5,eError,eError,eError,     5,     5,#28-2f +          5,     5,     5,eError,     5,eError,eStart,eStart #30-37 +) + +UCS2LECharLenTable = (2, 2, 2, 2, 2, 2) + +UCS2LESMModel = {'classTable': UCS2LE_cls, +                 'classFactor': 6, +                 'stateTable': UCS2LE_st, +                 'charLenTable': UCS2LECharLenTable, +                 'name': 'UTF-16LE'} + +# UTF-8 + +UTF8_cls = ( +    1,1,1,1,1,1,1,1,  # 00 - 07  #allow 0x00 as a legal value +    1,1,1,1,1,1,0,0,  # 08 - 0f +    1,1,1,1,1,1,1,1,  # 10 - 17 +    1,1,1,0,1,1,1,1,  # 18 - 1f +    1,1,1,1,1,1,1,1,  # 20 - 27 +    1,1,1,1,1,1,1,1,  # 28 - 2f +    1,1,1,1,1,1,1,1,  # 30 - 37 +    1,1,1,1,1,1,1,1,  # 38 - 3f +    1,1,1,1,1,1,1,1,  # 40 - 47 +    1,1,1,1,1,1,1,1,  # 48 - 4f +    1,1,1,1,1,1,1,1,  # 50 - 57 +    1,1,1,1,1,1,1,1,  # 58 - 5f +    1,1,1,1,1,1,1,1,  # 60 - 67 +    1,1,1,1,1,1,1,1,  # 68 - 6f +    1,1,1,1,1,1,1,1,  # 70 - 77 +    1,1,1,1,1,1,1,1,  # 78 - 7f +    2,2,2,2,3,3,3,3,  # 80 - 87 +    4,4,4,4,4,4,4,4,  # 88 - 8f +    4,4,4,4,4,4,4,4,  # 90 - 97 +    4,4,4,4,4,4,4,4,  # 98 - 9f +    5,5,5,5,5,5,5,5,  # a0 - a7 +    5,5,5,5,5,5,5,5,  # a8 - af +    5,5,5,5,5,5,5,5,  # b0 - b7 +    5,5,5,5,5,5,5,5,  # b8 - bf +    0,0,6,6,6,6,6,6,  # c0 - c7 +    6,6,6,6,6,6,6,6,  # c8 - cf +    6,6,6,6,6,6,6,6,  # d0 - d7 +    6,6,6,6,6,6,6,6,  # d8 - df +    7,8,8,8,8,8,8,8,  # e0 - e7 +    8,8,8,8,8,9,8,8,  # e8 - ef +    10,11,11,11,11,11,11,11,  # f0 - f7 +    12,13,13,13,14,15,0,0    # f8 - ff +) + +UTF8_st = ( +    eError,eStart,eError,eError,eError,eError,     12,   10,#00-07 +         9,     11,     8,     7,     6,     5,     4,    3,#08-0f +    eError,eError,eError,eError,eError,eError,eError,eError,#10-17 +    eError,eError,eError,eError,eError,eError,eError,eError,#18-1f +    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27 +    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f +    eError,eError,     5,     5,     5,     5,eError,eError,#30-37 +    eError,eError,eError,eError,eError,eError,eError,eError,#38-3f +    eError,eError,eError,     5,     5,     5,eError,eError,#40-47 +    eError,eError,eError,eError,eError,eError,eError,eError,#48-4f +    eError,eError,     7,     7,     7,     7,eError,eError,#50-57 +    eError,eError,eError,eError,eError,eError,eError,eError,#58-5f +    eError,eError,eError,eError,     7,     7,eError,eError,#60-67 +    eError,eError,eError,eError,eError,eError,eError,eError,#68-6f +    eError,eError,     9,     9,     9,     9,eError,eError,#70-77 +    eError,eError,eError,eError,eError,eError,eError,eError,#78-7f +    eError,eError,eError,eError,eError,     9,eError,eError,#80-87 +    eError,eError,eError,eError,eError,eError,eError,eError,#88-8f +    eError,eError,    12,    12,    12,    12,eError,eError,#90-97 +    eError,eError,eError,eError,eError,eError,eError,eError,#98-9f +    eError,eError,eError,eError,eError,    12,eError,eError,#a0-a7 +    eError,eError,eError,eError,eError,eError,eError,eError,#a8-af +    eError,eError,    12,    12,    12,eError,eError,eError,#b0-b7 +    eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf +    eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7 +    eError,eError,eError,eError,eError,eError,eError,eError #c8-cf +) + +UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) + +UTF8SMModel = {'classTable': UTF8_cls, +               'classFactor': 16, +               'stateTable': UTF8_st, +               'charLenTable': UTF8CharLenTable, +               'name': 'UTF-8'} + +# flake8: noqa diff --git a/requests/packages/charade/sbcharsetprober.py b/requests/packages/chardet/sbcharsetprober.py index da26715..37291bd 100644 --- a/requests/packages/charade/sbcharsetprober.py +++ b/requests/packages/chardet/sbcharsetprober.py @@ -1,120 +1,120 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is Mozilla Universal charset detector code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 2001
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#   Shy Shalom - original C code
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -import sys
 -from . import constants
 -from .charsetprober import CharSetProber
 -from .compat import wrap_ord
 -
 -SAMPLE_SIZE = 64
 -SB_ENOUGH_REL_THRESHOLD = 1024
 -POSITIVE_SHORTCUT_THRESHOLD = 0.95
 -NEGATIVE_SHORTCUT_THRESHOLD = 0.05
 -SYMBOL_CAT_ORDER = 250
 -NUMBER_OF_SEQ_CAT = 4
 -POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
 -#NEGATIVE_CAT = 0
 -
 -
 -class SingleByteCharSetProber(CharSetProber):
 -    def __init__(self, model, reversed=False, nameProber=None):
 -        CharSetProber.__init__(self)
 -        self._mModel = model
 -        # TRUE if we need to reverse every pair in the model lookup
 -        self._mReversed = reversed
 -        # Optional auxiliary prober for name decision
 -        self._mNameProber = nameProber
 -        self.reset()
 -
 -    def reset(self):
 -        CharSetProber.reset(self)
 -        # char order of last character
 -        self._mLastOrder = 255
 -        self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
 -        self._mTotalSeqs = 0
 -        self._mTotalChar = 0
 -        # characters that fall in our sampling range
 -        self._mFreqChar = 0
 -
 -    def get_charset_name(self):
 -        if self._mNameProber:
 -            return self._mNameProber.get_charset_name()
 -        else:
 -            return self._mModel['charsetName']
 -
 -    def feed(self, aBuf):
 -        if not self._mModel['keepEnglishLetter']:
 -            aBuf = self.filter_without_english_letters(aBuf)
 -        aLen = len(aBuf)
 -        if not aLen:
 -            return self.get_state()
 -        for c in aBuf:
 -            order = self._mModel['charToOrderMap'][wrap_ord(c)]
 -            if order < SYMBOL_CAT_ORDER:
 -                self._mTotalChar += 1
 -            if order < SAMPLE_SIZE:
 -                self._mFreqChar += 1
 -                if self._mLastOrder < SAMPLE_SIZE:
 -                    self._mTotalSeqs += 1
 -                    if not self._mReversed:
 -                        i = (self._mLastOrder * SAMPLE_SIZE) + order
 -                        model = self._mModel['precedenceMatrix'][i]
 -                    else:  # reverse the order of the letters in the lookup
 -                        i = (order * SAMPLE_SIZE) + self._mLastOrder
 -                        model = self._mModel['precedenceMatrix'][i]
 -                    self._mSeqCounters[model] += 1
 -            self._mLastOrder = order
 -
 -        if self.get_state() == constants.eDetecting:
 -            if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD:
 -                cf = self.get_confidence()
 -                if cf > POSITIVE_SHORTCUT_THRESHOLD:
 -                    if constants._debug:
 -                        sys.stderr.write('%s confidence = %s, we have a'
 -                                         'winner\n' %
 -                                         (self._mModel['charsetName'], cf))
 -                    self._mState = constants.eFoundIt
 -                elif cf < NEGATIVE_SHORTCUT_THRESHOLD:
 -                    if constants._debug:
 -                        sys.stderr.write('%s confidence = %s, below negative'
 -                                         'shortcut threshhold %s\n' %
 -                                         (self._mModel['charsetName'], cf,
 -                                          NEGATIVE_SHORTCUT_THRESHOLD))
 -                    self._mState = constants.eNotMe
 -
 -        return self.get_state()
 -
 -    def get_confidence(self):
 -        r = 0.01
 -        if self._mTotalSeqs > 0:
 -            r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs
 -                 / self._mModel['mTypicalPositiveRatio'])
 -            r = r * self._mFreqChar / self._mTotalChar
 -            if r >= 1.0:
 -                r = 0.99
 -        return r
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +#   Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +import sys +from . import constants +from .charsetprober import CharSetProber +from .compat import wrap_ord + +SAMPLE_SIZE = 64 +SB_ENOUGH_REL_THRESHOLD = 1024 +POSITIVE_SHORTCUT_THRESHOLD = 0.95 +NEGATIVE_SHORTCUT_THRESHOLD = 0.05 +SYMBOL_CAT_ORDER = 250 +NUMBER_OF_SEQ_CAT = 4 +POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1 +#NEGATIVE_CAT = 0 + + +class SingleByteCharSetProber(CharSetProber): +    def __init__(self, model, reversed=False, nameProber=None): +        CharSetProber.__init__(self) +        self._mModel = model +        # TRUE if we need to reverse every pair in the model lookup +        self._mReversed = reversed +        # Optional auxiliary prober for name decision +        self._mNameProber = nameProber +        self.reset() + +    def reset(self): +        CharSetProber.reset(self) +        # char order of last character +        self._mLastOrder = 255 +        self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT +        self._mTotalSeqs = 0 +        self._mTotalChar = 0 +        # characters that fall in our sampling range +        self._mFreqChar = 0 + +    def get_charset_name(self): +        if self._mNameProber: +            return self._mNameProber.get_charset_name() +        else: +            return self._mModel['charsetName'] + +    def feed(self, aBuf): +        if not self._mModel['keepEnglishLetter']: +            aBuf = self.filter_without_english_letters(aBuf) +        aLen = len(aBuf) +        if not aLen: +            return self.get_state() +        for c in aBuf: +            order = self._mModel['charToOrderMap'][wrap_ord(c)] +            if order < SYMBOL_CAT_ORDER: +                self._mTotalChar += 1 +            if order < SAMPLE_SIZE: +                self._mFreqChar += 1 +                if self._mLastOrder < SAMPLE_SIZE: +                    self._mTotalSeqs += 1 +                    if not self._mReversed: +                        i = (self._mLastOrder * SAMPLE_SIZE) + order +                        model = self._mModel['precedenceMatrix'][i] +                    else:  # reverse the order of the letters in the lookup +                        i = (order * SAMPLE_SIZE) + self._mLastOrder +                        model = self._mModel['precedenceMatrix'][i] +                    self._mSeqCounters[model] += 1 +            self._mLastOrder = order + +        if self.get_state() == constants.eDetecting: +            if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD: +                cf = self.get_confidence() +                if cf > POSITIVE_SHORTCUT_THRESHOLD: +                    if constants._debug: +                        sys.stderr.write('%s confidence = %s, we have a' +                                         'winner\n' % +                                         (self._mModel['charsetName'], cf)) +                    self._mState = constants.eFoundIt +                elif cf < NEGATIVE_SHORTCUT_THRESHOLD: +                    if constants._debug: +                        sys.stderr.write('%s confidence = %s, below negative' +                                         'shortcut threshhold %s\n' % +                                         (self._mModel['charsetName'], cf, +                                          NEGATIVE_SHORTCUT_THRESHOLD)) +                    self._mState = constants.eNotMe + +        return self.get_state() + +    def get_confidence(self): +        r = 0.01 +        if self._mTotalSeqs > 0: +            r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs +                 / self._mModel['mTypicalPositiveRatio']) +            r = r * self._mFreqChar / self._mTotalChar +            if r >= 1.0: +                r = 0.99 +        return r diff --git a/requests/packages/charade/sbcsgroupprober.py b/requests/packages/chardet/sbcsgroupprober.py index b224814..1b6196c 100644 --- a/requests/packages/charade/sbcsgroupprober.py +++ b/requests/packages/chardet/sbcsgroupprober.py @@ -1,69 +1,69 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is Mozilla Universal charset detector code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 2001
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#   Shy Shalom - original C code
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -from .charsetgroupprober import CharSetGroupProber
 -from .sbcharsetprober import SingleByteCharSetProber
 -from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
 -                                Latin5CyrillicModel, MacCyrillicModel,
 -                                Ibm866Model, Ibm855Model)
 -from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
 -from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
 -from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
 -from .langthaimodel import TIS620ThaiModel
 -from .langhebrewmodel import Win1255HebrewModel
 -from .hebrewprober import HebrewProber
 -
 -
 -class SBCSGroupProber(CharSetGroupProber):
 -    def __init__(self):
 -        CharSetGroupProber.__init__(self)
 -        self._mProbers = [
 -            SingleByteCharSetProber(Win1251CyrillicModel),
 -            SingleByteCharSetProber(Koi8rModel),
 -            SingleByteCharSetProber(Latin5CyrillicModel),
 -            SingleByteCharSetProber(MacCyrillicModel),
 -            SingleByteCharSetProber(Ibm866Model),
 -            SingleByteCharSetProber(Ibm855Model),
 -            SingleByteCharSetProber(Latin7GreekModel),
 -            SingleByteCharSetProber(Win1253GreekModel),
 -            SingleByteCharSetProber(Latin5BulgarianModel),
 -            SingleByteCharSetProber(Win1251BulgarianModel),
 -            SingleByteCharSetProber(Latin2HungarianModel),
 -            SingleByteCharSetProber(Win1250HungarianModel),
 -            SingleByteCharSetProber(TIS620ThaiModel),
 -        ]
 -        hebrewProber = HebrewProber()
 -        logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,
 -                                                      False, hebrewProber)
 -        visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,
 -                                                     hebrewProber)
 -        hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
 -        self._mProbers.extend([hebrewProber, logicalHebrewProber,
 -                               visualHebrewProber])
 -
 -        self.reset()
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +#   Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +from .charsetgroupprober import CharSetGroupProber +from .sbcharsetprober import SingleByteCharSetProber +from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, +                                Latin5CyrillicModel, MacCyrillicModel, +                                Ibm866Model, Ibm855Model) +from .langgreekmodel import Latin7GreekModel, Win1253GreekModel +from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel +from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel +from .langthaimodel import TIS620ThaiModel +from .langhebrewmodel import Win1255HebrewModel +from .hebrewprober import HebrewProber + + +class SBCSGroupProber(CharSetGroupProber): +    def __init__(self): +        CharSetGroupProber.__init__(self) +        self._mProbers = [ +            SingleByteCharSetProber(Win1251CyrillicModel), +            SingleByteCharSetProber(Koi8rModel), +            SingleByteCharSetProber(Latin5CyrillicModel), +            SingleByteCharSetProber(MacCyrillicModel), +            SingleByteCharSetProber(Ibm866Model), +            SingleByteCharSetProber(Ibm855Model), +            SingleByteCharSetProber(Latin7GreekModel), +            SingleByteCharSetProber(Win1253GreekModel), +            SingleByteCharSetProber(Latin5BulgarianModel), +            SingleByteCharSetProber(Win1251BulgarianModel), +            SingleByteCharSetProber(Latin2HungarianModel), +            SingleByteCharSetProber(Win1250HungarianModel), +            SingleByteCharSetProber(TIS620ThaiModel), +        ] +        hebrewProber = HebrewProber() +        logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, +                                                      False, hebrewProber) +        visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True, +                                                     hebrewProber) +        hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber) +        self._mProbers.extend([hebrewProber, logicalHebrewProber, +                               visualHebrewProber]) + +        self.reset() diff --git a/requests/packages/charade/sjisprober.py b/requests/packages/chardet/sjisprober.py index 9bb0cdc..b173614 100644 --- a/requests/packages/charade/sjisprober.py +++ b/requests/packages/chardet/sjisprober.py @@ -1,91 +1,91 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is mozilla.org code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -import sys
 -from .mbcharsetprober import MultiByteCharSetProber
 -from .codingstatemachine import CodingStateMachine
 -from .chardistribution import SJISDistributionAnalysis
 -from .jpcntx import SJISContextAnalysis
 -from .mbcssm import SJISSMModel
 -from . import constants
 -
 -
 -class SJISProber(MultiByteCharSetProber):
 -    def __init__(self):
 -        MultiByteCharSetProber.__init__(self)
 -        self._mCodingSM = CodingStateMachine(SJISSMModel)
 -        self._mDistributionAnalyzer = SJISDistributionAnalysis()
 -        self._mContextAnalyzer = SJISContextAnalysis()
 -        self.reset()
 -
 -    def reset(self):
 -        MultiByteCharSetProber.reset(self)
 -        self._mContextAnalyzer.reset()
 -
 -    def get_charset_name(self):
 -        return "SHIFT_JIS"
 -
 -    def feed(self, aBuf):
 -        aLen = len(aBuf)
 -        for i in range(0, aLen):
 -            codingState = self._mCodingSM.next_state(aBuf[i])
 -            if codingState == constants.eError:
 -                if constants._debug:
 -                    sys.stderr.write(self.get_charset_name()
 -                                     + ' prober hit error at byte ' + str(i)
 -                                     + '\n')
 -                self._mState = constants.eNotMe
 -                break
 -            elif codingState == constants.eItsMe:
 -                self._mState = constants.eFoundIt
 -                break
 -            elif codingState == constants.eStart:
 -                charLen = self._mCodingSM.get_current_charlen()
 -                if i == 0:
 -                    self._mLastChar[1] = aBuf[0]
 -                    self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:],
 -                                                charLen)
 -                    self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
 -                else:
 -                    self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3
 -                                                     - charLen], charLen)
 -                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
 -                                                     charLen)
 -
 -        self._mLastChar[0] = aBuf[aLen - 1]
 -
 -        if self.get_state() == constants.eDetecting:
 -            if (self._mContextAnalyzer.got_enough_data() and
 -               (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
 -                self._mState = constants.eFoundIt
 -
 -        return self.get_state()
 -
 -    def get_confidence(self):
 -        contxtCf = self._mContextAnalyzer.get_confidence()
 -        distribCf = self._mDistributionAnalyzer.get_confidence()
 -        return max(contxtCf, distribCf)
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +import sys +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import SJISDistributionAnalysis +from .jpcntx import SJISContextAnalysis +from .mbcssm import SJISSMModel +from . import constants + + +class SJISProber(MultiByteCharSetProber): +    def __init__(self): +        MultiByteCharSetProber.__init__(self) +        self._mCodingSM = CodingStateMachine(SJISSMModel) +        self._mDistributionAnalyzer = SJISDistributionAnalysis() +        self._mContextAnalyzer = SJISContextAnalysis() +        self.reset() + +    def reset(self): +        MultiByteCharSetProber.reset(self) +        self._mContextAnalyzer.reset() + +    def get_charset_name(self): +        return "SHIFT_JIS" + +    def feed(self, aBuf): +        aLen = len(aBuf) +        for i in range(0, aLen): +            codingState = self._mCodingSM.next_state(aBuf[i]) +            if codingState == constants.eError: +                if constants._debug: +                    sys.stderr.write(self.get_charset_name() +                                     + ' prober hit error at byte ' + str(i) +                                     + '\n') +                self._mState = constants.eNotMe +                break +            elif codingState == constants.eItsMe: +                self._mState = constants.eFoundIt +                break +            elif codingState == constants.eStart: +                charLen = self._mCodingSM.get_current_charlen() +                if i == 0: +                    self._mLastChar[1] = aBuf[0] +                    self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:], +                                                charLen) +                    self._mDistributionAnalyzer.feed(self._mLastChar, charLen) +                else: +                    self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3 +                                                     - charLen], charLen) +                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], +                                                     charLen) + +        self._mLastChar[0] = aBuf[aLen - 1] + +        if self.get_state() == constants.eDetecting: +            if (self._mContextAnalyzer.got_enough_data() and +               (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): +                self._mState = constants.eFoundIt + +        return self.get_state() + +    def get_confidence(self): +        contxtCf = self._mContextAnalyzer.get_confidence() +        distribCf = self._mDistributionAnalyzer.get_confidence() +        return max(contxtCf, distribCf) diff --git a/requests/packages/charade/universaldetector.py b/requests/packages/chardet/universaldetector.py index 6175bfb..9a03ad3 100644 --- a/requests/packages/charade/universaldetector.py +++ b/requests/packages/chardet/universaldetector.py @@ -1,172 +1,170 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is Mozilla Universal charset detector code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 2001
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#   Shy Shalom - original C code
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -from . import constants
 -import sys
 -import codecs
 -from .latin1prober import Latin1Prober  # windows-1252
 -from .mbcsgroupprober import MBCSGroupProber  # multi-byte character sets
 -from .sbcsgroupprober import SBCSGroupProber  # single-byte character sets
 -from .escprober import EscCharSetProber  # ISO-2122, etc.
 -import re
 -
 -MINIMUM_THRESHOLD = 0.20
 -ePureAscii = 0
 -eEscAscii = 1
 -eHighbyte = 2
 -
 -
 -class UniversalDetector:
 -    def __init__(self):
 -        self._highBitDetector = re.compile(b'[\x80-\xFF]')
 -        self._escDetector = re.compile(b'(\033|~{)')
 -        self._mEscCharSetProber = None
 -        self._mCharSetProbers = []
 -        self.reset()
 -
 -    def reset(self):
 -        self.result = {'encoding': None, 'confidence': 0.0}
 -        self.done = False
 -        self._mStart = True
 -        self._mGotData = False
 -        self._mInputState = ePureAscii
 -        self._mLastChar = b''
 -        if self._mEscCharSetProber:
 -            self._mEscCharSetProber.reset()
 -        for prober in self._mCharSetProbers:
 -            prober.reset()
 -
 -    def feed(self, aBuf):
 -        if self.done:
 -            return
 -
 -        aLen = len(aBuf)
 -        if not aLen:
 -            return
 -
 -        if not self._mGotData:
 -            # If the data starts with BOM, we know it is UTF
 -            if aBuf[:3] == codecs.BOM:
 -                # EF BB BF  UTF-8 with BOM
 -                self.result = {'encoding': "UTF-8", 'confidence': 1.0}
 -            elif aBuf[:4] == codecs.BOM_UTF32_LE:
 -                # FF FE 00 00  UTF-32, little-endian BOM
 -                self.result = {'encoding': "UTF-32LE", 'confidence': 1.0}
 -            elif aBuf[:4] == codecs.BOM_UTF32_BE:
 -                # 00 00 FE FF  UTF-32, big-endian BOM
 -                self.result = {'encoding': "UTF-32BE", 'confidence': 1.0}
 -            elif aBuf[:4] == b'\xFE\xFF\x00\x00':
 -                # FE FF 00 00  UCS-4, unusual octet order BOM (3412)
 -                self.result = {
 -                    'encoding': "X-ISO-10646-UCS-4-3412",
 -                    'confidence': 1.0
 -                }
 -            elif aBuf[:4] == b'\x00\x00\xFF\xFE':
 -                # 00 00 FF FE  UCS-4, unusual octet order BOM (2143)
 -                self.result = {
 -                    'encoding': "X-ISO-10646-UCS-4-2143",
 -                    'confidence': 1.0
 -                }
 -            elif aBuf[:2] == codecs.BOM_LE:
 -                # FF FE  UTF-16, little endian BOM
 -                self.result = {'encoding': "UTF-16LE", 'confidence': 1.0}
 -            elif aBuf[:2] == codecs.BOM_BE:
 -                # FE FF  UTF-16, big endian BOM
 -                self.result = {'encoding': "UTF-16BE", 'confidence': 1.0}
 -
 -        self._mGotData = True
 -        if self.result['encoding'] and (self.result['confidence'] > 0.0):
 -            self.done = True
 -            return
 -
 -        if self._mInputState == ePureAscii:
 -            if self._highBitDetector.search(aBuf):
 -                self._mInputState = eHighbyte
 -            elif ((self._mInputState == ePureAscii) and
 -                    self._escDetector.search(self._mLastChar + aBuf)):
 -                self._mInputState = eEscAscii
 -
 -        self._mLastChar = aBuf[-1:]
 -
 -        if self._mInputState == eEscAscii:
 -            if not self._mEscCharSetProber:
 -                self._mEscCharSetProber = EscCharSetProber()
 -            if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt:
 -                self.result = {
 -                    'encoding': self._mEscCharSetProber.get_charset_name(),
 -                    'confidence': self._mEscCharSetProber.get_confidence()
 -                }
 -                self.done = True
 -        elif self._mInputState == eHighbyte:
 -            if not self._mCharSetProbers:
 -                self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(),
 -                                         Latin1Prober()]
 -            for prober in self._mCharSetProbers:
 -                if prober.feed(aBuf) == constants.eFoundIt:
 -                    self.result = {'encoding': prober.get_charset_name(),
 -                                   'confidence': prober.get_confidence()}
 -                    self.done = True
 -                    break
 -
 -    def close(self):
 -        if self.done:
 -            return
 -        if not self._mGotData:
 -            if constants._debug:
 -                sys.stderr.write('no data received!\n')
 -            return
 -        self.done = True
 -
 -        if self._mInputState == ePureAscii:
 -            self.result = {'encoding': 'ascii', 'confidence': 1.0}
 -            return self.result
 -
 -        if self._mInputState == eHighbyte:
 -            proberConfidence = None
 -            maxProberConfidence = 0.0
 -            maxProber = None
 -            for prober in self._mCharSetProbers:
 -                if not prober:
 -                    continue
 -                proberConfidence = prober.get_confidence()
 -                if proberConfidence > maxProberConfidence:
 -                    maxProberConfidence = proberConfidence
 -                    maxProber = prober
 -            if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD):
 -                self.result = {'encoding': maxProber.get_charset_name(),
 -                               'confidence': maxProber.get_confidence()}
 -                return self.result
 -
 -        if constants._debug:
 -            sys.stderr.write('no probers hit minimum threshhold\n')
 -            for prober in self._mCharSetProbers[0].mProbers:
 -                if not prober:
 -                    continue
 -                sys.stderr.write('%s confidence = %s\n' %
 -                                 (prober.get_charset_name(),
 -                                  prober.get_confidence()))
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +#   Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +from . import constants +import sys +import codecs +from .latin1prober import Latin1Prober  # windows-1252 +from .mbcsgroupprober import MBCSGroupProber  # multi-byte character sets +from .sbcsgroupprober import SBCSGroupProber  # single-byte character sets +from .escprober import EscCharSetProber  # ISO-2122, etc. +import re + +MINIMUM_THRESHOLD = 0.20 +ePureAscii = 0 +eEscAscii = 1 +eHighbyte = 2 + + +class UniversalDetector: +    def __init__(self): +        self._highBitDetector = re.compile(b'[\x80-\xFF]') +        self._escDetector = re.compile(b'(\033|~{)') +        self._mEscCharSetProber = None +        self._mCharSetProbers = [] +        self.reset() + +    def reset(self): +        self.result = {'encoding': None, 'confidence': 0.0} +        self.done = False +        self._mStart = True +        self._mGotData = False +        self._mInputState = ePureAscii +        self._mLastChar = b'' +        if self._mEscCharSetProber: +            self._mEscCharSetProber.reset() +        for prober in self._mCharSetProbers: +            prober.reset() + +    def feed(self, aBuf): +        if self.done: +            return + +        aLen = len(aBuf) +        if not aLen: +            return + +        if not self._mGotData: +            # If the data starts with BOM, we know it is UTF +            if aBuf[:3] == codecs.BOM: +                # EF BB BF  UTF-8 with BOM +                self.result = {'encoding': "UTF-8", 'confidence': 1.0} +            elif aBuf[:4] == codecs.BOM_UTF32_LE: +                # FF FE 00 00  UTF-32, little-endian BOM +                self.result = {'encoding': "UTF-32LE", 'confidence': 1.0} +            elif aBuf[:4] == codecs.BOM_UTF32_BE: +                # 00 00 FE FF  UTF-32, big-endian BOM +                self.result = {'encoding': "UTF-32BE", 'confidence': 1.0} +            elif aBuf[:4] == b'\xFE\xFF\x00\x00': +                # FE FF 00 00  UCS-4, unusual octet order BOM (3412) +                self.result = { +                    'encoding': "X-ISO-10646-UCS-4-3412", +                    'confidence': 1.0 +                } +            elif aBuf[:4] == b'\x00\x00\xFF\xFE': +                # 00 00 FF FE  UCS-4, unusual octet order BOM (2143) +                self.result = { +                    'encoding': "X-ISO-10646-UCS-4-2143", +                    'confidence': 1.0 +                } +            elif aBuf[:2] == codecs.BOM_LE: +                # FF FE  UTF-16, little endian BOM +                self.result = {'encoding': "UTF-16LE", 'confidence': 1.0} +            elif aBuf[:2] == codecs.BOM_BE: +                # FE FF  UTF-16, big endian BOM +                self.result = {'encoding': "UTF-16BE", 'confidence': 1.0} + +        self._mGotData = True +        if self.result['encoding'] and (self.result['confidence'] > 0.0): +            self.done = True +            return + +        if self._mInputState == ePureAscii: +            if self._highBitDetector.search(aBuf): +                self._mInputState = eHighbyte +            elif ((self._mInputState == ePureAscii) and +                    self._escDetector.search(self._mLastChar + aBuf)): +                self._mInputState = eEscAscii + +        self._mLastChar = aBuf[-1:] + +        if self._mInputState == eEscAscii: +            if not self._mEscCharSetProber: +                self._mEscCharSetProber = EscCharSetProber() +            if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt: +                self.result = {'encoding': self._mEscCharSetProber.get_charset_name(), +                               'confidence': self._mEscCharSetProber.get_confidence()} +                self.done = True +        elif self._mInputState == eHighbyte: +            if not self._mCharSetProbers: +                self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(), +                                         Latin1Prober()] +            for prober in self._mCharSetProbers: +                if prober.feed(aBuf) == constants.eFoundIt: +                    self.result = {'encoding': prober.get_charset_name(), +                                   'confidence': prober.get_confidence()} +                    self.done = True +                    break + +    def close(self): +        if self.done: +            return +        if not self._mGotData: +            if constants._debug: +                sys.stderr.write('no data received!\n') +            return +        self.done = True + +        if self._mInputState == ePureAscii: +            self.result = {'encoding': 'ascii', 'confidence': 1.0} +            return self.result + +        if self._mInputState == eHighbyte: +            proberConfidence = None +            maxProberConfidence = 0.0 +            maxProber = None +            for prober in self._mCharSetProbers: +                if not prober: +                    continue +                proberConfidence = prober.get_confidence() +                if proberConfidence > maxProberConfidence: +                    maxProberConfidence = proberConfidence +                    maxProber = prober +            if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD): +                self.result = {'encoding': maxProber.get_charset_name(), +                               'confidence': maxProber.get_confidence()} +                return self.result + +        if constants._debug: +            sys.stderr.write('no probers hit minimum threshhold\n') +            for prober in self._mCharSetProbers[0].mProbers: +                if not prober: +                    continue +                sys.stderr.write('%s confidence = %s\n' % +                                 (prober.get_charset_name(), +                                  prober.get_confidence())) diff --git a/requests/packages/charade/utf8prober.py b/requests/packages/chardet/utf8prober.py index 72c8d3d..1c0bb5d 100644 --- a/requests/packages/charade/utf8prober.py +++ b/requests/packages/chardet/utf8prober.py @@ -1,76 +1,76 @@ -######################## BEGIN LICENSE BLOCK ########################
 -# The Original Code is mozilla.org code.
 -#
 -# The Initial Developer of the Original Code is
 -# Netscape Communications Corporation.
 -# Portions created by the Initial Developer are Copyright (C) 1998
 -# the Initial Developer. All Rights Reserved.
 -#
 -# Contributor(s):
 -#   Mark Pilgrim - port to Python
 -#
 -# This library is free software; you can redistribute it and/or
 -# modify it under the terms of the GNU Lesser General Public
 -# License as published by the Free Software Foundation; either
 -# version 2.1 of the License, or (at your option) any later version.
 -#
 -# This library is distributed in the hope that it will be useful,
 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -# Lesser General Public License for more details.
 -#
 -# You should have received a copy of the GNU Lesser General Public
 -# License along with this library; if not, write to the Free Software
 -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 -# 02110-1301  USA
 -######################### END LICENSE BLOCK #########################
 -
 -from . import constants
 -from .charsetprober import CharSetProber
 -from .codingstatemachine import CodingStateMachine
 -from .mbcssm import UTF8SMModel
 -
 -ONE_CHAR_PROB = 0.5
 -
 -
 -class UTF8Prober(CharSetProber):
 -    def __init__(self):
 -        CharSetProber.__init__(self)
 -        self._mCodingSM = CodingStateMachine(UTF8SMModel)
 -        self.reset()
 -
 -    def reset(self):
 -        CharSetProber.reset(self)
 -        self._mCodingSM.reset()
 -        self._mNumOfMBChar = 0
 -
 -    def get_charset_name(self):
 -        return "utf-8"
 -
 -    def feed(self, aBuf):
 -        for c in aBuf:
 -            codingState = self._mCodingSM.next_state(c)
 -            if codingState == constants.eError:
 -                self._mState = constants.eNotMe
 -                break
 -            elif codingState == constants.eItsMe:
 -                self._mState = constants.eFoundIt
 -                break
 -            elif codingState == constants.eStart:
 -                if self._mCodingSM.get_current_charlen() >= 2:
 -                    self._mNumOfMBChar += 1
 -
 -        if self.get_state() == constants.eDetecting:
 -            if self.get_confidence() > constants.SHORTCUT_THRESHOLD:
 -                self._mState = constants.eFoundIt
 -
 -        return self.get_state()
 -
 -    def get_confidence(self):
 -        unlike = 0.99
 -        if self._mNumOfMBChar < 6:
 -            for i in range(0, self._mNumOfMBChar):
 -                unlike = unlike * ONE_CHAR_PROB
 -            return 1.0 - unlike
 -        else:
 -            return unlike
 +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +#   Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301  USA +######################### END LICENSE BLOCK ######################### + +from . import constants +from .charsetprober import CharSetProber +from .codingstatemachine import CodingStateMachine +from .mbcssm import UTF8SMModel + +ONE_CHAR_PROB = 0.5 + + +class UTF8Prober(CharSetProber): +    def __init__(self): +        CharSetProber.__init__(self) +        self._mCodingSM = CodingStateMachine(UTF8SMModel) +        self.reset() + +    def reset(self): +        CharSetProber.reset(self) +        self._mCodingSM.reset() +        self._mNumOfMBChar = 0 + +    def get_charset_name(self): +        return "utf-8" + +    def feed(self, aBuf): +        for c in aBuf: +            codingState = self._mCodingSM.next_state(c) +            if codingState == constants.eError: +                self._mState = constants.eNotMe +                break +            elif codingState == constants.eItsMe: +                self._mState = constants.eFoundIt +                break +            elif codingState == constants.eStart: +                if self._mCodingSM.get_current_charlen() >= 2: +                    self._mNumOfMBChar += 1 + +        if self.get_state() == constants.eDetecting: +            if self.get_confidence() > constants.SHORTCUT_THRESHOLD: +                self._mState = constants.eFoundIt + +        return self.get_state() + +    def get_confidence(self): +        unlike = 0.99 +        if self._mNumOfMBChar < 6: +            for i in range(0, self._mNumOfMBChar): +                unlike = unlike * ONE_CHAR_PROB +            return 1.0 - unlike +        else: +            return unlike diff --git a/requests/packages/urllib3/_collections.py b/requests/packages/urllib3/_collections.py index 282b8d5..5907b0d 100644 --- a/requests/packages/urllib3/_collections.py +++ b/requests/packages/urllib3/_collections.py @@ -5,7 +5,16 @@  # the MIT License: http://www.opensource.org/licenses/mit-license.php  from collections import MutableMapping -from threading import RLock +try: +    from threading import RLock +except ImportError: # Platform-specific: No threads available +    class RLock: +        def __enter__(self): +            pass + +        def __exit__(self, exc_type, exc_value, traceback): +            pass +  try: # Python 2.7+      from collections import OrderedDict diff --git a/requests/packages/urllib3/connection.py b/requests/packages/urllib3/connection.py new file mode 100644 index 0000000..2124774 --- /dev/null +++ b/requests/packages/urllib3/connection.py @@ -0,0 +1,178 @@ +# urllib3/connection.py +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import socket +from socket import timeout as SocketTimeout + +try: # Python 3 +    from http.client import HTTPConnection as _HTTPConnection, HTTPException +except ImportError: +    from httplib import HTTPConnection as _HTTPConnection, HTTPException + +class DummyConnection(object): +    "Used to detect a failed ConnectionCls import." +    pass + +try: # Compiled with SSL? +    ssl = None +    HTTPSConnection = DummyConnection + +    class BaseSSLError(BaseException): +        pass + +    try: # Python 3 +        from http.client import HTTPSConnection as _HTTPSConnection +    except ImportError: +        from httplib import HTTPSConnection as _HTTPSConnection + +    import ssl +    BaseSSLError = ssl.SSLError + +except (ImportError, AttributeError): # Platform-specific: No SSL. +    pass + +from .exceptions import ( +    ConnectTimeoutError, +) +from .packages.ssl_match_hostname import match_hostname +from .util import ( +    assert_fingerprint, +    resolve_cert_reqs, +    resolve_ssl_version, +    ssl_wrap_socket, +) + + +port_by_scheme = { +    'http': 80, +    'https': 443, +} + + +class HTTPConnection(_HTTPConnection, object): +    default_port = port_by_scheme['http'] + +    # By default, disable Nagle's Algorithm. +    tcp_nodelay = 1 + +    def _new_conn(self): +        """ Establish a socket connection and set nodelay settings on it + +        :return: a new socket connection +        """ +        try: +            conn = socket.create_connection( +                (self.host, self.port), +                self.timeout, +                self.source_address, +            ) +        except AttributeError: # Python 2.6 +            conn = socket.create_connection( +                (self.host, self.port), +                self.timeout, +            ) +        conn.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, +                        self.tcp_nodelay) +        return conn + +    def _prepare_conn(self, conn): +        self.sock = conn +        if self._tunnel_host: +            # TODO: Fix tunnel so it doesn't depend on self.sock state. +            self._tunnel() + +    def connect(self): +        conn = self._new_conn() +        self._prepare_conn(conn) + + +class HTTPSConnection(HTTPConnection): +    default_port = port_by_scheme['https'] + +    def __init__(self, host, port=None, key_file=None, cert_file=None, +                 strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, +                 source_address=None): +        try: +            HTTPConnection.__init__(self, host, port, strict, timeout, source_address) +        except TypeError: # Python 2.6 +            HTTPConnection.__init__(self, host, port, strict, timeout) +        self.key_file = key_file +        self.cert_file = cert_file + +    def connect(self): +        conn = self._new_conn() +        self._prepare_conn(conn) +        self.sock = ssl.wrap_socket(conn, self.key_file, self.cert_file) + + +class VerifiedHTTPSConnection(HTTPSConnection): +    """ +    Based on httplib.HTTPSConnection but wraps the socket with +    SSL certification. +    """ +    cert_reqs = None +    ca_certs = None +    ssl_version = None + +    def set_cert(self, key_file=None, cert_file=None, +                 cert_reqs=None, ca_certs=None, +                 assert_hostname=None, assert_fingerprint=None): + +        self.key_file = key_file +        self.cert_file = cert_file +        self.cert_reqs = cert_reqs +        self.ca_certs = ca_certs +        self.assert_hostname = assert_hostname +        self.assert_fingerprint = assert_fingerprint + +    def connect(self): +        # Add certificate verification +        try: +            sock = socket.create_connection( +                address=(self.host, self.port), +                timeout=self.timeout, +            ) +        except SocketTimeout: +            raise ConnectTimeoutError( +                self, "Connection to %s timed out. (connect timeout=%s)" % +                (self.host, self.timeout)) + +        sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, +                        self.tcp_nodelay) + +        resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) +        resolved_ssl_version = resolve_ssl_version(self.ssl_version) + +        # the _tunnel_host attribute was added in python 2.6.3 (via +        # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do +        # not have them. +        if getattr(self, '_tunnel_host', None): +            self.sock = sock +            # Calls self._set_hostport(), so self.host is +            # self._tunnel_host below. +            self._tunnel() + +        # Wrap socket using verification with the root certs in +        # trusted_root_certs +        self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, +                                    cert_reqs=resolved_cert_reqs, +                                    ca_certs=self.ca_certs, +                                    server_hostname=self.host, +                                    ssl_version=resolved_ssl_version) + +        if resolved_cert_reqs != ssl.CERT_NONE: +            if self.assert_fingerprint: +                assert_fingerprint(self.sock.getpeercert(binary_form=True), +                                   self.assert_fingerprint) +            elif self.assert_hostname is not False: +                match_hostname(self.sock.getpeercert(), +                               self.assert_hostname or self.host) + + +if ssl: +    # Make a copy for testing. +    UnverifiedHTTPSConnection = HTTPSConnection +    HTTPSConnection = VerifiedHTTPSConnection diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py index 691d4e2..243d700 100644 --- a/requests/packages/urllib3/connectionpool.py +++ b/requests/packages/urllib3/connectionpool.py @@ -11,39 +11,12 @@ from socket import error as SocketError, timeout as SocketTimeout  import socket  try: # Python 3 -    from http.client import HTTPConnection, HTTPException -    from http.client import HTTP_PORT, HTTPS_PORT -except ImportError: -    from httplib import HTTPConnection, HTTPException -    from httplib import HTTP_PORT, HTTPS_PORT - -try: # Python 3      from queue import LifoQueue, Empty, Full  except ImportError:      from Queue import LifoQueue, Empty, Full      import Queue as _  # Platform-specific: Windows -try: # Compiled with SSL? -    HTTPSConnection = object - -    class BaseSSLError(BaseException): -        pass - -    ssl = None - -    try: # Python 3 -        from http.client import HTTPSConnection -    except ImportError: -        from httplib import HTTPSConnection - -    import ssl -    BaseSSLError = ssl.SSLError - -except (ImportError, AttributeError): # Platform-specific: No SSL. -    pass - -  from .exceptions import (      ClosedPoolError,      ConnectTimeoutError, @@ -51,94 +24,34 @@ from .exceptions import (      HostChangedError,      MaxRetryError,      SSLError, +    TimeoutError,      ReadTimeoutError,      ProxyError,  ) -from .packages.ssl_match_hostname import CertificateError, match_hostname +from .packages.ssl_match_hostname import CertificateError  from .packages import six +from .connection import ( +    port_by_scheme, +    DummyConnection, +    HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection, +    HTTPException, BaseSSLError, +)  from .request import RequestMethods  from .response import HTTPResponse  from .util import (      assert_fingerprint,      get_host,      is_connection_dropped, -    resolve_cert_reqs, -    resolve_ssl_version, -    ssl_wrap_socket,      Timeout,  ) +  xrange = six.moves.xrange  log = logging.getLogger(__name__)  _Default = object() -port_by_scheme = { -    'http': HTTP_PORT, -    'https': HTTPS_PORT, -} - - -## Connection objects (extension of httplib) - -class VerifiedHTTPSConnection(HTTPSConnection): -    """ -    Based on httplib.HTTPSConnection but wraps the socket with -    SSL certification. -    """ -    cert_reqs = None -    ca_certs = None -    ssl_version = None - -    def set_cert(self, key_file=None, cert_file=None, -                 cert_reqs=None, ca_certs=None, -                 assert_hostname=None, assert_fingerprint=None): - -        self.key_file = key_file -        self.cert_file = cert_file -        self.cert_reqs = cert_reqs -        self.ca_certs = ca_certs -        self.assert_hostname = assert_hostname -        self.assert_fingerprint = assert_fingerprint - -    def connect(self): -        # Add certificate verification -        try: -            sock = socket.create_connection( -                address=(self.host, self.port), -                timeout=self.timeout) -        except SocketTimeout: -                raise ConnectTimeoutError( -                    self, "Connection to %s timed out. (connect timeout=%s)" % -                    (self.host, self.timeout)) - -        resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) -        resolved_ssl_version = resolve_ssl_version(self.ssl_version) - -        if self._tunnel_host: -            self.sock = sock -            # Calls self._set_hostport(), so self.host is -            # self._tunnel_host below. -            self._tunnel() - -        # Wrap socket using verification with the root certs in -        # trusted_root_certs -        self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, -                                    cert_reqs=resolved_cert_reqs, -                                    ca_certs=self.ca_certs, -                                    server_hostname=self.host, -                                    ssl_version=resolved_ssl_version) - -        if resolved_cert_reqs != ssl.CERT_NONE: -            if self.assert_fingerprint: -                assert_fingerprint(self.sock.getpeercert(binary_form=True), -                                   self.assert_fingerprint) -            elif self.assert_hostname is not False: -                match_hostname(self.sock.getpeercert(), -                               self.assert_hostname or self.host) - -  ## Pool objects  class ConnectionPool(object): @@ -218,6 +131,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):      """      scheme = 'http' +    ConnectionCls = HTTPConnection      def __init__(self, host, port=None, strict=False,                   timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, @@ -250,19 +164,24 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):      def _new_conn(self):          """ -        Return a fresh :class:`httplib.HTTPConnection`. +        Return a fresh :class:`HTTPConnection`.          """          self.num_connections += 1          log.info("Starting new HTTP connection (%d): %s" %                   (self.num_connections, self.host)) +          extra_params = {}          if not six.PY3:  # Python 2              extra_params['strict'] = self.strict -        return HTTPConnection(host=self.host, port=self.port, -                              timeout=self.timeout.connect_timeout, -                              **extra_params) - +        conn = self.ConnectionCls(host=self.host, port=self.port, +                                  timeout=self.timeout.connect_timeout, +                                  **extra_params) +        if self.proxy is not None: +            # Enable Nagle's algorithm for proxies, to avoid packet +            # fragmentation. +            conn.tcp_nodelay = 0 +        return conn      def _get_conn(self, timeout=None):          """ @@ -341,7 +260,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):      def _make_request(self, conn, method, url, timeout=_Default,                        **httplib_request_kw):          """ -        Perform a request on a given httplib connection object taken from our +        Perform a request on a given urllib connection object taken from our          pool.          :param conn: @@ -362,7 +281,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):              timeout_obj.start_connect()              conn.timeout = timeout_obj.connect_timeout              # conn.request() calls httplib.*.request, not the method in -            # request.py. It also calls makefile (recv) on the socket +            # urllib3.request. It also calls makefile (recv) on the socket.              conn.request(method, url, **httplib_request_kw)          except SocketTimeout:              raise ConnectTimeoutError( @@ -371,11 +290,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):          # Reset the timeout for the recv() on the socket          read_timeout = timeout_obj.read_timeout -        log.debug("Setting read timeout to %s" % read_timeout) +          # App Engine doesn't have a sock attr -        if hasattr(conn, 'sock') and \ -            read_timeout is not None and \ -            read_timeout is not Timeout.DEFAULT_TIMEOUT: +        if hasattr(conn, 'sock'):              # In Python 3 socket.py will catch EAGAIN and return None when you              # try and read into the file pointer created by http.client, which              # instead raises a BadStatusLine exception. Instead of catching @@ -385,7 +302,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):                  raise ReadTimeoutError(                      self, url,                      "Read timed out. (read timeout=%s)" % read_timeout) -            conn.sock.settimeout(read_timeout) +            if read_timeout is Timeout.DEFAULT_TIMEOUT: +                conn.sock.settimeout(socket.getdefaulttimeout()) +            else: # None or a value +                conn.sock.settimeout(read_timeout)          # Receive the response from the server          try: @@ -397,6 +317,16 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):              raise ReadTimeoutError(                  self, url, "Read timed out. (read timeout=%s)" % read_timeout) +        except BaseSSLError as e: +            # Catch possible read timeouts thrown as SSL errors. If not the +            # case, rethrow the original. We need to do this because of: +            # http://bugs.python.org/issue10272 +            if 'timed out' in str(e) or \ +               'did not complete (read)' in str(e):  # Python 2.6 +                raise ReadTimeoutError(self, url, "Read timed out.") + +            raise +          except SocketError as e: # Platform-specific: Python 2              # See the above comment about EAGAIN in Python 3. In Python 2 we              # have to specifically catch it and throw the timeout error @@ -404,8 +334,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):                  raise ReadTimeoutError(                      self, url,                      "Read timed out. (read timeout=%s)" % read_timeout) -            raise +            raise          # AppEngine doesn't have a version attr.          http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') @@ -441,9 +371,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):          # TODO: Add optional support for socket.gethostbyname checking.          scheme, host, port = get_host(url) +        # Use explicit default port for comparison when none is given          if self.port and not port: -            # Use explicit default port for comparison when none is given.              port = port_by_scheme.get(scheme) +        elif not self.port and port == port_by_scheme.get(scheme): +            port = None          return (scheme, host, port) == (self.scheme, self.host, self.port) @@ -531,6 +463,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):          conn = None +        # Merge the proxy headers. Only do this in HTTP. We have to copy the +        # headers dict so we can safely change it without those changes being +        # reflected in anyone else's copy. +        if self.scheme == 'http': +            headers = headers.copy() +            headers.update(self.proxy_headers) +          try:              # Request a connection from the queue              conn = self._get_conn(timeout=pool_timeout) @@ -559,36 +498,36 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):          except Empty:              # Timed out by queue -            raise ReadTimeoutError( -                self, url, "Read timed out, no pool connections are available.") - -        except SocketTimeout: -            # Timed out by socket -            raise ReadTimeoutError(self, url, "Read timed out.") +            raise EmptyPoolError(self, "No pool connections are available.")          except BaseSSLError as e: -            # SSL certificate error -            if 'timed out' in str(e) or \ -               'did not complete (read)' in str(e): # Platform-specific: Python 2.6 -                raise ReadTimeoutError(self, url, "Read timed out.")              raise SSLError(e)          except CertificateError as e:              # Name mismatch              raise SSLError(e) -        except (HTTPException, SocketError) as e: -            if isinstance(e, SocketError) and self.proxy is not None: -                raise ProxyError('Cannot connect to proxy. ' -                                 'Socket error: %s.' % e) +        except TimeoutError as e: +            # Connection broken, discard. +            conn = None +            # Save the error off for retry logic. +            err = e +            if retries == 0: +                raise + +        except (HTTPException, SocketError) as e:              # Connection broken, discard. It will be replaced next _get_conn().              conn = None              # This is necessary so we can access e below              err = e              if retries == 0: -                raise MaxRetryError(self, url, e) +                if isinstance(e, SocketError) and self.proxy is not None: +                    raise ProxyError('Cannot connect to proxy. ' +                                     'Socket error: %s.' % e) +                else: +                    raise MaxRetryError(self, url, e)          finally:              if release_conn: @@ -626,7 +565,7 @@ class HTTPSConnectionPool(HTTPConnectionPool):      When Python is compiled with the :mod:`ssl` module, then      :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, -    instead of :class:`httplib.HTTPSConnection`. +    instead of :class:`.HTTPSConnection`.      :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``,      ``assert_hostname`` and ``host`` in this order to verify connections. @@ -639,6 +578,7 @@ class HTTPSConnectionPool(HTTPConnectionPool):      """      scheme = 'https' +    ConnectionCls = HTTPSConnection      def __init__(self, host, port=None,                   strict=False, timeout=None, maxsize=1, @@ -658,33 +598,33 @@ class HTTPSConnectionPool(HTTPConnectionPool):          self.assert_hostname = assert_hostname          self.assert_fingerprint = assert_fingerprint -    def _prepare_conn(self, connection): +    def _prepare_conn(self, conn):          """          Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket`          and establish the tunnel if proxy is used.          """ -        if isinstance(connection, VerifiedHTTPSConnection): -            connection.set_cert(key_file=self.key_file, -                                cert_file=self.cert_file, -                                cert_reqs=self.cert_reqs, -                                ca_certs=self.ca_certs, -                                assert_hostname=self.assert_hostname, -                                assert_fingerprint=self.assert_fingerprint) -            connection.ssl_version = self.ssl_version +        if isinstance(conn, VerifiedHTTPSConnection): +            conn.set_cert(key_file=self.key_file, +                          cert_file=self.cert_file, +                          cert_reqs=self.cert_reqs, +                          ca_certs=self.ca_certs, +                          assert_hostname=self.assert_hostname, +                          assert_fingerprint=self.assert_fingerprint) +            conn.ssl_version = self.ssl_version          if self.proxy is not None:              # Python 2.7+              try: -                set_tunnel = connection.set_tunnel +                set_tunnel = conn.set_tunnel              except AttributeError:  # Platform-specific: Python 2.6 -                set_tunnel = connection._set_tunnel +                set_tunnel = conn._set_tunnel              set_tunnel(self.host, self.port, self.proxy_headers)              # Establish tunnel connection early, because otherwise httplib              # would improperly set Host: header to proxy's IP:port. -            connection.connect() +            conn.connect() -        return connection +        return conn      def _new_conn(self):          """ @@ -694,28 +634,30 @@ class HTTPSConnectionPool(HTTPConnectionPool):          log.info("Starting new HTTPS connection (%d): %s"                   % (self.num_connections, self.host)) +        if not self.ConnectionCls or self.ConnectionCls is DummyConnection: +            # Platform-specific: Python without ssl +            raise SSLError("Can't connect to HTTPS URL because the SSL " +                           "module is not available.") +          actual_host = self.host          actual_port = self.port          if self.proxy is not None:              actual_host = self.proxy.host              actual_port = self.proxy.port -        if not ssl:  # Platform-specific: Python compiled without +ssl -            if not HTTPSConnection or HTTPSConnection is object: -                raise SSLError("Can't connect to HTTPS URL because the SSL " -                               "module is not available.") -            connection_class = HTTPSConnection -        else: -            connection_class = VerifiedHTTPSConnection -          extra_params = {}          if not six.PY3:  # Python 2              extra_params['strict'] = self.strict -        connection = connection_class(host=actual_host, port=actual_port, -                                      timeout=self.timeout.connect_timeout, -                                      **extra_params) -        return self._prepare_conn(connection) +        conn = self.ConnectionCls(host=actual_host, port=actual_port, +                                  timeout=self.timeout.connect_timeout, +                                  **extra_params) +        if self.proxy is not None: +            # Enable Nagle's algorithm for proxies, to avoid packet +            # fragmentation. +            conn.tcp_nodelay = 0 + +        return self._prepare_conn(conn)  def connection_from_url(url, **kw): diff --git a/requests/packages/urllib3/contrib/pyopenssl.py b/requests/packages/urllib3/contrib/pyopenssl.py index d43bcd6..d9bda15 100644 --- a/requests/packages/urllib3/contrib/pyopenssl.py +++ b/requests/packages/urllib3/contrib/pyopenssl.py @@ -1,4 +1,4 @@ -'''SSL with SNI-support for Python 2. +'''SSL with SNI_-support for Python 2.  This needs the following packages installed: @@ -18,17 +18,37 @@ your application begins using ``urllib3``, like this::  Now you can use :mod:`urllib3` as you normally would, and it will support SNI  when the required modules are installed. + +Activating this module also has the positive side effect of disabling SSL/TLS +encryption in Python 2 (see `CRIME attack`_). + +If you want to configure the default list of supported cipher suites, you can +set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable. + +Module Variables +---------------- + +:var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites. +    Default: ``EECDH+ECDSA+AESGCM EECDH+aRSA+AESGCM EECDH+ECDSA+SHA256 +    EECDH+aRSA+SHA256 EECDH+aRSA+RC4 EDH+aRSA EECDH RC4 !aNULL !eNULL !LOW !3DES +    !MD5 !EXP !PSK !SRP !DSS'`` + +.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication +.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) +  '''  from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT -from ndg.httpsclient.subj_alt_name import SubjectAltName +from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName  import OpenSSL.SSL  from pyasn1.codec.der import decoder as der_decoder +from pyasn1.type import univ, constraint  from socket import _fileobject  import ssl +import select  from cStringIO import StringIO -from .. import connectionpool +from .. import connection  from .. import util  __all__ = ['inject_into_urllib3', 'extract_from_urllib3'] @@ -49,26 +69,44 @@ _openssl_verify = {                         + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,  } +# Default SSL/TLS cipher list. +# Recommendation by https://community.qualys.com/blogs/securitylabs/2013/08/05/ +# configuring-apache-nginx-and-openssl-for-forward-secrecy +DEFAULT_SSL_CIPHER_LIST = 'EECDH+ECDSA+AESGCM EECDH+aRSA+AESGCM ' + \ +        'EECDH+ECDSA+SHA256 EECDH+aRSA+SHA256 EECDH+aRSA+RC4 EDH+aRSA ' + \ +        'EECDH RC4 !aNULL !eNULL !LOW !3DES !MD5 !EXP !PSK !SRP !DSS' +  orig_util_HAS_SNI = util.HAS_SNI -orig_connectionpool_ssl_wrap_socket = connectionpool.ssl_wrap_socket +orig_connection_ssl_wrap_socket = connection.ssl_wrap_socket  def inject_into_urllib3():      'Monkey-patch urllib3 with PyOpenSSL-backed SSL-support.' -    connectionpool.ssl_wrap_socket = ssl_wrap_socket +    connection.ssl_wrap_socket = ssl_wrap_socket      util.HAS_SNI = HAS_SNI  def extract_from_urllib3():      'Undo monkey-patching by :func:`inject_into_urllib3`.' -    connectionpool.ssl_wrap_socket = orig_connectionpool_ssl_wrap_socket +    connection.ssl_wrap_socket = orig_connection_ssl_wrap_socket      util.HAS_SNI = orig_util_HAS_SNI  ### Note: This is a slightly bug-fixed version of same from ndg-httpsclient. +class SubjectAltName(BaseSubjectAltName): +    '''ASN.1 implementation for subjectAltNames support''' + +    # There is no limit to how many SAN certificates a certificate may have, +    #   however this needs to have some limit so we'll set an arbitrarily high +    #   limit. +    sizeSpec = univ.SequenceOf.sizeSpec + \ +        constraint.ValueSizeConstraint(1, 1024) + + +### Note: This is a slightly bug-fixed version of same from ndg-httpsclient.  def get_subj_alt_name(peer_cert):      # Search through extensions      dns_name = [] @@ -329,6 +367,13 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,          except OpenSSL.SSL.Error as e:              raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e) +    # Disable TLS compression to migitate CRIME attack (issue #309) +    OP_NO_COMPRESSION = 0x20000 +    ctx.set_options(OP_NO_COMPRESSION) + +    # Set list of supported ciphersuites. +    ctx.set_cipher_list(DEFAULT_SSL_CIPHER_LIST) +      cnx = OpenSSL.SSL.Connection(ctx, sock)      cnx.set_tlsext_host_name(server_hostname)      cnx.set_connect_state() @@ -336,6 +381,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,          try:              cnx.do_handshake()          except OpenSSL.SSL.WantReadError: +            select.select([sock], [], [])              continue          except OpenSSL.SSL.Error as e:              raise ssl.SSLError('bad handshake', e) diff --git a/requests/packages/urllib3/filepost.py b/requests/packages/urllib3/filepost.py index 4575582..e8b30bd 100644 --- a/requests/packages/urllib3/filepost.py +++ b/requests/packages/urllib3/filepost.py @@ -46,16 +46,15 @@ def iter_field_objects(fields):  def iter_fields(fields):      """ -    Iterate over fields. +    .. deprecated:: 1.6 -    .. deprecated :: +    Iterate over fields. -      The addition of `~urllib3.fields.RequestField` makes this function -      obsolete. Instead, use :func:`iter_field_objects`, which returns -      `~urllib3.fields.RequestField` objects, instead. +    The addition of :class:`~urllib3.fields.RequestField` makes this function +    obsolete. Instead, use :func:`iter_field_objects`, which returns +    :class:`~urllib3.fields.RequestField` objects.      Supports list of (k, v) tuples and dicts. -      """      if isinstance(fields, dict):          return ((k, v) for k, v in six.iteritems(fields)) diff --git a/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py b/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py index 2d61ac2..3aa5b2e 100644 --- a/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py +++ b/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py @@ -1,98 +1,13 @@ -"""The match_hostname() function from Python 3.2, essential when using SSL.""" +try: +    # Python 3.2+ +    from ssl import CertificateError, match_hostname +except ImportError: +    try: +        # Backport of the function from a pypi module +        from backports.ssl_match_hostname import CertificateError, match_hostname +    except ImportError: +        # Our vendored copy +        from _implementation import CertificateError, match_hostname -import re - -__version__ = '3.2.2' - -class CertificateError(ValueError): -    pass - -def _dnsname_match(dn, hostname, max_wildcards=1): -    """Matching according to RFC 6125, section 6.4.3 - -    http://tools.ietf.org/html/rfc6125#section-6.4.3 -    """ -    pats = [] -    if not dn: -        return False - -    parts = dn.split(r'.') -    leftmost = parts[0] - -    wildcards = leftmost.count('*') -    if wildcards > max_wildcards: -        # Issue #17980: avoid denials of service by refusing more -        # than one wildcard per fragment.  A survery of established -        # policy among SSL implementations showed it to be a -        # reasonable choice. -        raise CertificateError( -            "too many wildcards in certificate DNS name: " + repr(dn)) - -    # speed up common case w/o wildcards -    if not wildcards: -        return dn.lower() == hostname.lower() - -    # RFC 6125, section 6.4.3, subitem 1. -    # The client SHOULD NOT attempt to match a presented identifier in which -    # the wildcard character comprises a label other than the left-most label. -    if leftmost == '*': -        # When '*' is a fragment by itself, it matches a non-empty dotless -        # fragment. -        pats.append('[^.]+') -    elif leftmost.startswith('xn--') or hostname.startswith('xn--'): -        # RFC 6125, section 6.4.3, subitem 3. -        # The client SHOULD NOT attempt to match a presented identifier -        # where the wildcard character is embedded within an A-label or -        # U-label of an internationalized domain name. -        pats.append(re.escape(leftmost)) -    else: -        # Otherwise, '*' matches any dotless string, e.g. www* -        pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) - -    # add the remaining fragments, ignore any wildcards -    for frag in parts[1:]: -        pats.append(re.escape(frag)) - -    pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) -    return pat.match(hostname) - - -def match_hostname(cert, hostname): -    """Verify that *cert* (in decoded format as returned by -    SSLSocket.getpeercert()) matches the *hostname*.  RFC 2818 and RFC 6125 -    rules are followed, but IP addresses are not accepted for *hostname*. - -    CertificateError is raised on failure. On success, the function -    returns nothing. -    """ -    if not cert: -        raise ValueError("empty or no certificate") -    dnsnames = [] -    san = cert.get('subjectAltName', ()) -    for key, value in san: -        if key == 'DNS': -            if _dnsname_match(value, hostname): -                return -            dnsnames.append(value) -    if not dnsnames: -        # The subject is only checked when there is no dNSName entry -        # in subjectAltName -        for sub in cert.get('subject', ()): -            for key, value in sub: -                # XXX according to RFC 2818, the most specific Common Name -                # must be used. -                if key == 'commonName': -                    if _dnsname_match(value, hostname): -                        return -                    dnsnames.append(value) -    if len(dnsnames) > 1: -        raise CertificateError("hostname %r " -            "doesn't match either of %s" -            % (hostname, ', '.join(map(repr, dnsnames)))) -    elif len(dnsnames) == 1: -        raise CertificateError("hostname %r " -            "doesn't match %r" -            % (hostname, dnsnames[0])) -    else: -        raise CertificateError("no appropriate commonName or " -            "subjectAltName fields were found") +# Not needed, but documenting what we provide. +__all__ = ('CertificateError', 'match_hostname') diff --git a/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py b/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py new file mode 100644 index 0000000..52f4287 --- /dev/null +++ b/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py @@ -0,0 +1,105 @@ +"""The match_hostname() function from Python 3.3.3, essential when using SSL.""" + +# Note: This file is under the PSF license as the code comes from the python +# stdlib.   http://docs.python.org/3/license.html + +import re + +__version__ = '3.4.0.2' + +class CertificateError(ValueError): +    pass + + +def _dnsname_match(dn, hostname, max_wildcards=1): +    """Matching according to RFC 6125, section 6.4.3 + +    http://tools.ietf.org/html/rfc6125#section-6.4.3 +    """ +    pats = [] +    if not dn: +        return False + +    # Ported from python3-syntax: +    # leftmost, *remainder = dn.split(r'.') +    parts = dn.split(r'.') +    leftmost = parts[0] +    remainder = parts[1:] + +    wildcards = leftmost.count('*') +    if wildcards > max_wildcards: +        # Issue #17980: avoid denials of service by refusing more +        # than one wildcard per fragment.  A survey of established +        # policy among SSL implementations showed it to be a +        # reasonable choice. +        raise CertificateError( +            "too many wildcards in certificate DNS name: " + repr(dn)) + +    # speed up common case w/o wildcards +    if not wildcards: +        return dn.lower() == hostname.lower() + +    # RFC 6125, section 6.4.3, subitem 1. +    # The client SHOULD NOT attempt to match a presented identifier in which +    # the wildcard character comprises a label other than the left-most label. +    if leftmost == '*': +        # When '*' is a fragment by itself, it matches a non-empty dotless +        # fragment. +        pats.append('[^.]+') +    elif leftmost.startswith('xn--') or hostname.startswith('xn--'): +        # RFC 6125, section 6.4.3, subitem 3. +        # The client SHOULD NOT attempt to match a presented identifier +        # where the wildcard character is embedded within an A-label or +        # U-label of an internationalized domain name. +        pats.append(re.escape(leftmost)) +    else: +        # Otherwise, '*' matches any dotless string, e.g. www* +        pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) + +    # add the remaining fragments, ignore any wildcards +    for frag in remainder: +        pats.append(re.escape(frag)) + +    pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) +    return pat.match(hostname) + + +def match_hostname(cert, hostname): +    """Verify that *cert* (in decoded format as returned by +    SSLSocket.getpeercert()) matches the *hostname*.  RFC 2818 and RFC 6125 +    rules are followed, but IP addresses are not accepted for *hostname*. + +    CertificateError is raised on failure. On success, the function +    returns nothing. +    """ +    if not cert: +        raise ValueError("empty or no certificate") +    dnsnames = [] +    san = cert.get('subjectAltName', ()) +    for key, value in san: +        if key == 'DNS': +            if _dnsname_match(value, hostname): +                return +            dnsnames.append(value) +    if not dnsnames: +        # The subject is only checked when there is no dNSName entry +        # in subjectAltName +        for sub in cert.get('subject', ()): +            for key, value in sub: +                # XXX according to RFC 2818, the most specific Common Name +                # must be used. +                if key == 'commonName': +                    if _dnsname_match(value, hostname): +                        return +                    dnsnames.append(value) +    if len(dnsnames) > 1: +        raise CertificateError("hostname %r " +            "doesn't match either of %s" +            % (hostname, ', '.join(map(repr, dnsnames)))) +    elif len(dnsnames) == 1: +        raise CertificateError("hostname %r " +            "doesn't match %r" +            % (hostname, dnsnames[0])) +    else: +        raise CertificateError("no appropriate commonName or " +            "subjectAltName fields were found") diff --git a/requests/packages/urllib3/poolmanager.py b/requests/packages/urllib3/poolmanager.py index e7f8667..f18ff2b 100644 --- a/requests/packages/urllib3/poolmanager.py +++ b/requests/packages/urllib3/poolmanager.py @@ -1,5 +1,5 @@  # urllib3/poolmanager.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt)  #  # This module is part of urllib3 and is released under  # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -176,7 +176,7 @@ class ProxyManager(PoolManager):      Behaves just like :class:`PoolManager`, but sends all requests through      the defined proxy, using the CONNECT method for HTTPS URLs. -    :param poxy_url: +    :param proxy_url:          The URL of the proxy to be used.      :param proxy_headers: @@ -245,12 +245,11 @@ class ProxyManager(PoolManager):          u = parse_url(url)          if u.scheme == "http": -            # It's too late to set proxy headers on per-request basis for -            # tunnelled HTTPS connections, should use -            # constructor's proxy_headers instead. +            # For proxied HTTPS requests, httplib sets the necessary headers +            # on the CONNECT to the proxy. For HTTP, we'll definitely +            # need to set 'Host' at the very least.              kw['headers'] = self._set_proxy_headers(url, kw.get('headers',                                                                  self.headers)) -            kw['headers'].update(self.proxy_headers)          return super(ProxyManager, self).urlopen(method, url, redirect, **kw) diff --git a/requests/packages/urllib3/request.py b/requests/packages/urllib3/request.py index 66a9a0e..2a92cc2 100644 --- a/requests/packages/urllib3/request.py +++ b/requests/packages/urllib3/request.py @@ -45,7 +45,6 @@ class RequestMethods(object):      """      _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) -    _encode_body_methods = set(['PATCH', 'POST', 'PUT', 'TRACE'])      def __init__(self, headers=None):          self.headers = headers or {} diff --git a/requests/packages/urllib3/response.py b/requests/packages/urllib3/response.py index 4efff5a..6a1fe1a 100644 --- a/requests/packages/urllib3/response.py +++ b/requests/packages/urllib3/response.py @@ -90,6 +90,7 @@ class HTTPResponse(io.IOBase):          self._body = body if body and isinstance(body, basestring) else None          self._fp = None          self._original_response = original_response +        self._fp_bytes_read = 0          self._pool = pool          self._connection = connection @@ -129,6 +130,14 @@ class HTTPResponse(io.IOBase):          if self._fp:              return self.read(cache_content=True) +    def tell(self): +        """ +        Obtain the number of bytes pulled over the wire so far. May differ from +        the amount of content returned by :meth:``HTTPResponse.read`` if bytes +        are encoded on the wire (e.g, compressed). +        """ +        return self._fp_bytes_read +      def read(self, amt=None, decode_content=None, cache_content=False):          """          Similar to :meth:`httplib.HTTPResponse.read`, but with two additional @@ -183,6 +192,8 @@ class HTTPResponse(io.IOBase):                      self._fp.close()                      flush_decoder = True +            self._fp_bytes_read += len(data) +              try:                  if decode_content and self._decoder:                      data = self._decoder.decompress(data) diff --git a/requests/packages/urllib3/util.py b/requests/packages/urllib3/util.py index 266c9ed..bd26631 100644 --- a/requests/packages/urllib3/util.py +++ b/requests/packages/urllib3/util.py @@ -80,14 +80,13 @@ class Timeout(object):      :type read: integer, float, or None      :param total: -        The maximum amount of time to wait for an HTTP request to connect and -        return. This combines the connect and read timeouts into one. In the +        This combines the connect and read timeouts into one; the read timeout +        will be set to the time leftover from the connect attempt. In the          event that both a connect timeout and a total are specified, or a read          timeout and a total are specified, the shorter timeout will be applied.          Defaults to None. -      :type total: integer, float, or None      .. note:: @@ -101,18 +100,23 @@ class Timeout(object):          `total`.          In addition, the read and total timeouts only measure the time between -        read operations on the socket connecting the client and the server, not -        the total amount of time for the request to return a complete response. -        As an example, you may want a request to return within 7 seconds or -        fail, so you set the ``total`` timeout to 7 seconds. If the server -        sends one byte to you every 5 seconds, the request will **not** trigger -        time out. This case is admittedly rare. +        read operations on the socket connecting the client and the server, +        not the total amount of time for the request to return a complete +        response. For most requests, the timeout is raised because the server +        has not sent the first byte in the specified time. This is not always +        the case; if a server streams one byte every fifteen seconds, a timeout +        of 20 seconds will not ever trigger, even though the request will +        take several minutes to complete. + +        If your goal is to cut off any request after a set amount of wall clock +        time, consider having a second "watcher" thread to cut off a slow +        request.      """      #: A sentinel object representing the default timeout value      DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT -    def __init__(self, connect=_Default, read=_Default, total=None): +    def __init__(self, total=None, connect=_Default, read=_Default):          self._connect = self._validate_timeout(connect, 'connect')          self._read = self._validate_timeout(read, 'read')          self.total = self._validate_timeout(total, 'total') @@ -372,7 +376,8 @@ def parse_url(url):      # Auth      if '@' in url: -        auth, url = url.split('@', 1) +        # Last '@' denotes end of auth part +        auth, url = url.rsplit('@', 1)      # IPv6      if url and url[0] == '[': @@ -386,10 +391,14 @@ def parse_url(url):          if not host:              host = _host -        if not port.isdigit(): -            raise LocationParseError("Failed to parse: %s" % url) - -        port = int(port) +        if port: +            # If given, ports must be integers. +            if not port.isdigit(): +                raise LocationParseError("Failed to parse: %s" % url) +            port = int(port) +        else: +            # Blank ports are cool, too. (rfc3986#section-3.2.3) +            port = None      elif not host and url:          host = url @@ -417,7 +426,7 @@ def get_host(url):  def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, -                 basic_auth=None): +                 basic_auth=None, proxy_basic_auth=None):      """      Shortcuts for generating request headers. @@ -438,6 +447,10 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,          Colon-separated username:password string for 'authorization: basic ...'          auth header. +    :param proxy_basic_auth: +        Colon-separated username:password string for 'proxy-authorization: basic ...' +        auth header. +      Example: ::          >>> make_headers(keep_alive=True, user_agent="Batman/1.0") @@ -465,6 +478,10 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,          headers['authorization'] = 'Basic ' + \              b64encode(six.b(basic_auth)).decode('utf-8') +    if proxy_basic_auth: +        headers['proxy-authorization'] = 'Basic ' + \ +            b64encode(six.b(proxy_basic_auth)).decode('utf-8') +      return headers @@ -603,6 +620,11 @@ if SSLContext is not None:  # Python 3.2+          """          context = SSLContext(ssl_version)          context.verify_mode = cert_reqs + +        # Disable TLS compression to migitate CRIME attack (issue #309) +        OP_NO_COMPRESSION = 0x20000 +        context.options |= OP_NO_COMPRESSION +          if ca_certs:              try:                  context.load_verify_locations(ca_certs) |