diff options
Diffstat (limited to 'requests/packages')
-rw-r--r-- | requests/packages/chardet/__init__.py (renamed from requests/packages/charade/__init__.py) | 64 | ||||
-rw-r--r-- | requests/packages/chardet/big5freq.py (renamed from requests/packages/charade/big5freq.py) | 0 | ||||
-rw-r--r-- | requests/packages/chardet/big5prober.py (renamed from requests/packages/charade/big5prober.py) | 84 | ||||
-rwxr-xr-x | requests/packages/chardet/chardetect.py | 46 | ||||
-rw-r--r-- | requests/packages/chardet/chardistribution.py (renamed from requests/packages/charade/chardistribution.py) | 462 | ||||
-rw-r--r-- | requests/packages/chardet/charsetgroupprober.py (renamed from requests/packages/charade/charsetgroupprober.py) | 212 | ||||
-rw-r--r-- | requests/packages/chardet/charsetprober.py (renamed from requests/packages/charade/charsetprober.py) | 0 | ||||
-rw-r--r-- | requests/packages/chardet/codingstatemachine.py (renamed from requests/packages/charade/codingstatemachine.py) | 122 | ||||
-rw-r--r-- | requests/packages/chardet/compat.py (renamed from requests/packages/charade/compat.py) | 0 | ||||
-rw-r--r-- | requests/packages/chardet/constants.py (renamed from requests/packages/charade/constants.py) | 78 | ||||
-rw-r--r-- | requests/packages/chardet/cp949prober.py (renamed from requests/packages/charade/cp949prober.py) | 88 | ||||
-rw-r--r-- | requests/packages/chardet/escprober.py (renamed from requests/packages/charade/escprober.py) | 172 | ||||
-rw-r--r-- | requests/packages/chardet/escsm.py (renamed from requests/packages/charade/escsm.py) | 484 | ||||
-rw-r--r-- | requests/packages/chardet/eucjpprober.py (renamed from requests/packages/charade/eucjpprober.py) | 180 | ||||
-rw-r--r-- | requests/packages/chardet/euckrfreq.py (renamed from requests/packages/charade/euckrfreq.py) | 0 | ||||
-rw-r--r-- | requests/packages/chardet/euckrprober.py (renamed from requests/packages/charade/euckrprober.py) | 84 | ||||
-rw-r--r-- | requests/packages/chardet/euctwfreq.py (renamed from requests/packages/charade/euctwfreq.py) | 0 | ||||
-rw-r--r-- | requests/packages/chardet/euctwprober.py (renamed from requests/packages/charade/euctwprober.py) | 82 | ||||
-rw-r--r-- | requests/packages/chardet/gb2312freq.py (renamed from requests/packages/charade/gb2312freq.py) | 0 | ||||
-rw-r--r-- | requests/packages/chardet/gb2312prober.py (renamed from requests/packages/charade/gb2312prober.py) | 82 | ||||
-rw-r--r-- | requests/packages/chardet/hebrewprober.py (renamed from requests/packages/charade/hebrewprober.py) | 566 | ||||
-rw-r--r-- | requests/packages/chardet/jisfreq.py (renamed from requests/packages/charade/jisfreq.py) | 0 | ||||
-rw-r--r-- | requests/packages/chardet/jpcntx.py (renamed from requests/packages/charade/jpcntx.py) | 438 | ||||
-rw-r--r-- | requests/packages/chardet/langbulgarianmodel.py (renamed from requests/packages/charade/langbulgarianmodel.py) | 458 | ||||
-rw-r--r-- | requests/packages/chardet/langcyrillicmodel.py (renamed from requests/packages/charade/langcyrillicmodel.py) | 658 | ||||
-rw-r--r-- | requests/packages/chardet/langgreekmodel.py (renamed from requests/packages/charade/langgreekmodel.py) | 450 | ||||
-rw-r--r-- | requests/packages/chardet/langhebrewmodel.py (renamed from requests/packages/charade/langhebrewmodel.py) | 402 | ||||
-rw-r--r-- | requests/packages/chardet/langhungarianmodel.py (renamed from requests/packages/charade/langhungarianmodel.py) | 450 | ||||
-rw-r--r-- | requests/packages/chardet/langthaimodel.py (renamed from requests/packages/charade/langthaimodel.py) | 400 | ||||
-rw-r--r-- | requests/packages/chardet/latin1prober.py (renamed from requests/packages/charade/latin1prober.py) | 278 | ||||
-rw-r--r-- | requests/packages/chardet/mbcharsetprober.py (renamed from requests/packages/charade/mbcharsetprober.py) | 172 | ||||
-rw-r--r-- | requests/packages/chardet/mbcsgroupprober.py (renamed from requests/packages/charade/mbcsgroupprober.py) | 108 | ||||
-rw-r--r-- | requests/packages/chardet/mbcssm.py (renamed from requests/packages/charade/mbcssm.py) | 1150 | ||||
-rw-r--r-- | requests/packages/chardet/sbcharsetprober.py (renamed from requests/packages/charade/sbcharsetprober.py) | 240 | ||||
-rw-r--r-- | requests/packages/chardet/sbcsgroupprober.py (renamed from requests/packages/charade/sbcsgroupprober.py) | 138 | ||||
-rw-r--r-- | requests/packages/chardet/sjisprober.py (renamed from requests/packages/charade/sjisprober.py) | 182 | ||||
-rw-r--r-- | requests/packages/chardet/universaldetector.py (renamed from requests/packages/charade/universaldetector.py) | 342 | ||||
-rw-r--r-- | requests/packages/chardet/utf8prober.py (renamed from requests/packages/charade/utf8prober.py) | 152 | ||||
-rw-r--r-- | requests/packages/urllib3/_collections.py | 11 | ||||
-rw-r--r-- | requests/packages/urllib3/connection.py | 178 | ||||
-rw-r--r-- | requests/packages/urllib3/connectionpool.py | 244 | ||||
-rw-r--r-- | requests/packages/urllib3/contrib/pyopenssl.py | 58 | ||||
-rw-r--r-- | requests/packages/urllib3/filepost.py | 11 | ||||
-rw-r--r-- | requests/packages/urllib3/packages/ssl_match_hostname/__init__.py | 111 | ||||
-rw-r--r-- | requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py | 105 | ||||
-rw-r--r-- | requests/packages/urllib3/poolmanager.py | 11 | ||||
-rw-r--r-- | requests/packages/urllib3/request.py | 1 | ||||
-rw-r--r-- | requests/packages/urllib3/response.py | 11 | ||||
-rw-r--r-- | requests/packages/urllib3/util.py | 54 |
49 files changed, 4944 insertions, 4675 deletions
diff --git a/requests/packages/charade/__init__.py b/requests/packages/chardet/__init__.py index 1aadf3e..e4f0799 100644 --- a/requests/packages/charade/__init__.py +++ b/requests/packages/chardet/__init__.py @@ -1,32 +1,32 @@ -######################## BEGIN LICENSE BLOCK ########################
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-__version__ = "1.0.3"
-from sys import version_info
-
-
-def detect(aBuf):
- if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or
- (version_info >= (3, 0) and not isinstance(aBuf, bytes))):
- raise ValueError('Expected a bytes object, not a unicode object')
-
- from . import universaldetector
- u = universaldetector.UniversalDetector()
- u.reset()
- u.feed(aBuf)
- u.close()
- return u.result
+######################## BEGIN LICENSE BLOCK ######################## +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +__version__ = "2.2.1" +from sys import version_info + + +def detect(aBuf): + if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or + (version_info >= (3, 0) and not isinstance(aBuf, bytes))): + raise ValueError('Expected a bytes object, not a unicode object') + + from . import universaldetector + u = universaldetector.UniversalDetector() + u.reset() + u.feed(aBuf) + u.close() + return u.result diff --git a/requests/packages/charade/big5freq.py b/requests/packages/chardet/big5freq.py index 65bffc0..65bffc0 100644 --- a/requests/packages/charade/big5freq.py +++ b/requests/packages/chardet/big5freq.py diff --git a/requests/packages/charade/big5prober.py b/requests/packages/chardet/big5prober.py index 7382f7c..becce81 100644 --- a/requests/packages/charade/big5prober.py +++ b/requests/packages/chardet/big5prober.py @@ -1,42 +1,42 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Communicator client code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .mbcharsetprober import MultiByteCharSetProber
-from .codingstatemachine import CodingStateMachine
-from .chardistribution import Big5DistributionAnalysis
-from .mbcssm import Big5SMModel
-
-
-class Big5Prober(MultiByteCharSetProber):
- def __init__(self):
- MultiByteCharSetProber.__init__(self)
- self._mCodingSM = CodingStateMachine(Big5SMModel)
- self._mDistributionAnalyzer = Big5DistributionAnalysis()
- self.reset()
-
- def get_charset_name(self):
- return "Big5"
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import Big5DistributionAnalysis +from .mbcssm import Big5SMModel + + +class Big5Prober(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(Big5SMModel) + self._mDistributionAnalyzer = Big5DistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "Big5" diff --git a/requests/packages/chardet/chardetect.py b/requests/packages/chardet/chardetect.py new file mode 100755 index 0000000..ecd0163 --- /dev/null +++ b/requests/packages/chardet/chardetect.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +""" +Script which takes one or more file paths and reports on their detected +encodings + +Example:: + + % chardetect somefile someotherfile + somefile: windows-1252 with confidence 0.5 + someotherfile: ascii with confidence 1.0 + +If no paths are provided, it takes its input from stdin. + +""" +from io import open +from sys import argv, stdin + +from chardet.universaldetector import UniversalDetector + + +def description_of(file, name='stdin'): + """Return a string describing the probable encoding of a file.""" + u = UniversalDetector() + for line in file: + u.feed(line) + u.close() + result = u.result + if result['encoding']: + return '%s: %s with confidence %s' % (name, + result['encoding'], + result['confidence']) + else: + return '%s: no result' % name + + +def main(): + if len(argv) <= 1: + print(description_of(stdin)) + else: + for path in argv[1:]: + with open(path, 'rb') as f: + print(description_of(f, path)) + + +if __name__ == '__main__': + main() diff --git a/requests/packages/charade/chardistribution.py b/requests/packages/chardet/chardistribution.py index dfd3355..4e64a00 100644 --- a/requests/packages/charade/chardistribution.py +++ b/requests/packages/chardet/chardistribution.py @@ -1,231 +1,231 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Communicator client code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE,
- EUCTW_TYPICAL_DISTRIBUTION_RATIO)
-from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE,
- EUCKR_TYPICAL_DISTRIBUTION_RATIO)
-from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE,
- GB2312_TYPICAL_DISTRIBUTION_RATIO)
-from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE,
- BIG5_TYPICAL_DISTRIBUTION_RATIO)
-from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE,
- JIS_TYPICAL_DISTRIBUTION_RATIO)
-from .compat import wrap_ord
-
-ENOUGH_DATA_THRESHOLD = 1024
-SURE_YES = 0.99
-SURE_NO = 0.01
-MINIMUM_DATA_THRESHOLD = 3
-
-
-class CharDistributionAnalysis:
- def __init__(self):
- # Mapping table to get frequency order from char order (get from
- # GetOrder())
- self._mCharToFreqOrder = None
- self._mTableSize = None # Size of above table
- # This is a constant value which varies from language to language,
- # used in calculating confidence. See
- # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
- # for further detail.
- self._mTypicalDistributionRatio = None
- self.reset()
-
- def reset(self):
- """reset analyser, clear any state"""
- # If this flag is set to True, detection is done and conclusion has
- # been made
- self._mDone = False
- self._mTotalChars = 0 # Total characters encountered
- # The number of characters whose frequency order is less than 512
- self._mFreqChars = 0
-
- def feed(self, aBuf, aCharLen):
- """feed a character with known length"""
- if aCharLen == 2:
- # we only care about 2-bytes character in our distribution analysis
- order = self.get_order(aBuf)
- else:
- order = -1
- if order >= 0:
- self._mTotalChars += 1
- # order is valid
- if order < self._mTableSize:
- if 512 > self._mCharToFreqOrder[order]:
- self._mFreqChars += 1
-
- def get_confidence(self):
- """return confidence based on existing data"""
- # if we didn't receive any character in our consideration range,
- # return negative answer
- if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD:
- return SURE_NO
-
- if self._mTotalChars != self._mFreqChars:
- r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars)
- * self._mTypicalDistributionRatio))
- if r < SURE_YES:
- return r
-
- # normalize confidence (we don't want to be 100% sure)
- return SURE_YES
-
- def got_enough_data(self):
- # It is not necessary to receive all data to draw conclusion.
- # For charset detection, certain amount of data is enough
- return self._mTotalChars > ENOUGH_DATA_THRESHOLD
-
- def get_order(self, aBuf):
- # We do not handle characters based on the original encoding string,
- # but convert this encoding string to a number, here called order.
- # This allows multiple encodings of a language to share one frequency
- # table.
- return -1
-
-
-class EUCTWDistributionAnalysis(CharDistributionAnalysis):
- def __init__(self):
- CharDistributionAnalysis.__init__(self)
- self._mCharToFreqOrder = EUCTWCharToFreqOrder
- self._mTableSize = EUCTW_TABLE_SIZE
- self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
-
- def get_order(self, aBuf):
- # for euc-TW encoding, we are interested
- # first byte range: 0xc4 -- 0xfe
- # second byte range: 0xa1 -- 0xfe
- # no validation needed here. State machine has done that
- first_char = wrap_ord(aBuf[0])
- if first_char >= 0xC4:
- return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1
- else:
- return -1
-
-
-class EUCKRDistributionAnalysis(CharDistributionAnalysis):
- def __init__(self):
- CharDistributionAnalysis.__init__(self)
- self._mCharToFreqOrder = EUCKRCharToFreqOrder
- self._mTableSize = EUCKR_TABLE_SIZE
- self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
-
- def get_order(self, aBuf):
- # for euc-KR encoding, we are interested
- # first byte range: 0xb0 -- 0xfe
- # second byte range: 0xa1 -- 0xfe
- # no validation needed here. State machine has done that
- first_char = wrap_ord(aBuf[0])
- if first_char >= 0xB0:
- return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1
- else:
- return -1
-
-
-class GB2312DistributionAnalysis(CharDistributionAnalysis):
- def __init__(self):
- CharDistributionAnalysis.__init__(self)
- self._mCharToFreqOrder = GB2312CharToFreqOrder
- self._mTableSize = GB2312_TABLE_SIZE
- self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO
-
- def get_order(self, aBuf):
- # for GB2312 encoding, we are interested
- # first byte range: 0xb0 -- 0xfe
- # second byte range: 0xa1 -- 0xfe
- # no validation needed here. State machine has done that
- first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
- if (first_char >= 0xB0) and (second_char >= 0xA1):
- return 94 * (first_char - 0xB0) + second_char - 0xA1
- else:
- return -1
-
-
-class Big5DistributionAnalysis(CharDistributionAnalysis):
- def __init__(self):
- CharDistributionAnalysis.__init__(self)
- self._mCharToFreqOrder = Big5CharToFreqOrder
- self._mTableSize = BIG5_TABLE_SIZE
- self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO
-
- def get_order(self, aBuf):
- # for big5 encoding, we are interested
- # first byte range: 0xa4 -- 0xfe
- # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
- # no validation needed here. State machine has done that
- first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
- if first_char >= 0xA4:
- if second_char >= 0xA1:
- return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
- else:
- return 157 * (first_char - 0xA4) + second_char - 0x40
- else:
- return -1
-
-
-class SJISDistributionAnalysis(CharDistributionAnalysis):
- def __init__(self):
- CharDistributionAnalysis.__init__(self)
- self._mCharToFreqOrder = JISCharToFreqOrder
- self._mTableSize = JIS_TABLE_SIZE
- self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
-
- def get_order(self, aBuf):
- # for sjis encoding, we are interested
- # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
- # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
- # no validation needed here. State machine has done that
- first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
- if (first_char >= 0x81) and (first_char <= 0x9F):
- order = 188 * (first_char - 0x81)
- elif (first_char >= 0xE0) and (first_char <= 0xEF):
- order = 188 * (first_char - 0xE0 + 31)
- else:
- return -1
- order = order + second_char - 0x40
- if second_char > 0x7F:
- order = -1
- return order
-
-
-class EUCJPDistributionAnalysis(CharDistributionAnalysis):
- def __init__(self):
- CharDistributionAnalysis.__init__(self)
- self._mCharToFreqOrder = JISCharToFreqOrder
- self._mTableSize = JIS_TABLE_SIZE
- self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
-
- def get_order(self, aBuf):
- # for euc-JP encoding, we are interested
- # first byte range: 0xa0 -- 0xfe
- # second byte range: 0xa1 -- 0xfe
- # no validation needed here. State machine has done that
- char = wrap_ord(aBuf[0])
- if char >= 0xA0:
- return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1
- else:
- return -1
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE, + EUCTW_TYPICAL_DISTRIBUTION_RATIO) +from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE, + EUCKR_TYPICAL_DISTRIBUTION_RATIO) +from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE, + GB2312_TYPICAL_DISTRIBUTION_RATIO) +from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE, + BIG5_TYPICAL_DISTRIBUTION_RATIO) +from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE, + JIS_TYPICAL_DISTRIBUTION_RATIO) +from .compat import wrap_ord + +ENOUGH_DATA_THRESHOLD = 1024 +SURE_YES = 0.99 +SURE_NO = 0.01 +MINIMUM_DATA_THRESHOLD = 3 + + +class CharDistributionAnalysis: + def __init__(self): + # Mapping table to get frequency order from char order (get from + # GetOrder()) + self._mCharToFreqOrder = None + self._mTableSize = None # Size of above table + # This is a constant value which varies from language to language, + # used in calculating confidence. See + # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html + # for further detail. + self._mTypicalDistributionRatio = None + self.reset() + + def reset(self): + """reset analyser, clear any state""" + # If this flag is set to True, detection is done and conclusion has + # been made + self._mDone = False + self._mTotalChars = 0 # Total characters encountered + # The number of characters whose frequency order is less than 512 + self._mFreqChars = 0 + + def feed(self, aBuf, aCharLen): + """feed a character with known length""" + if aCharLen == 2: + # we only care about 2-bytes character in our distribution analysis + order = self.get_order(aBuf) + else: + order = -1 + if order >= 0: + self._mTotalChars += 1 + # order is valid + if order < self._mTableSize: + if 512 > self._mCharToFreqOrder[order]: + self._mFreqChars += 1 + + def get_confidence(self): + """return confidence based on existing data""" + # if we didn't receive any character in our consideration range, + # return negative answer + if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD: + return SURE_NO + + if self._mTotalChars != self._mFreqChars: + r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars) + * self._mTypicalDistributionRatio)) + if r < SURE_YES: + return r + + # normalize confidence (we don't want to be 100% sure) + return SURE_YES + + def got_enough_data(self): + # It is not necessary to receive all data to draw conclusion. + # For charset detection, certain amount of data is enough + return self._mTotalChars > ENOUGH_DATA_THRESHOLD + + def get_order(self, aBuf): + # We do not handle characters based on the original encoding string, + # but convert this encoding string to a number, here called order. + # This allows multiple encodings of a language to share one frequency + # table. + return -1 + + +class EUCTWDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = EUCTWCharToFreqOrder + self._mTableSize = EUCTW_TABLE_SIZE + self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aBuf): + # for euc-TW encoding, we are interested + # first byte range: 0xc4 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + first_char = wrap_ord(aBuf[0]) + if first_char >= 0xC4: + return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1 + else: + return -1 + + +class EUCKRDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = EUCKRCharToFreqOrder + self._mTableSize = EUCKR_TABLE_SIZE + self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aBuf): + # for euc-KR encoding, we are interested + # first byte range: 0xb0 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + first_char = wrap_ord(aBuf[0]) + if first_char >= 0xB0: + return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1 + else: + return -1 + + +class GB2312DistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = GB2312CharToFreqOrder + self._mTableSize = GB2312_TABLE_SIZE + self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aBuf): + # for GB2312 encoding, we are interested + # first byte range: 0xb0 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) + if (first_char >= 0xB0) and (second_char >= 0xA1): + return 94 * (first_char - 0xB0) + second_char - 0xA1 + else: + return -1 + + +class Big5DistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = Big5CharToFreqOrder + self._mTableSize = BIG5_TABLE_SIZE + self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aBuf): + # for big5 encoding, we are interested + # first byte range: 0xa4 -- 0xfe + # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe + # no validation needed here. State machine has done that + first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) + if first_char >= 0xA4: + if second_char >= 0xA1: + return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63 + else: + return 157 * (first_char - 0xA4) + second_char - 0x40 + else: + return -1 + + +class SJISDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = JISCharToFreqOrder + self._mTableSize = JIS_TABLE_SIZE + self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aBuf): + # for sjis encoding, we are interested + # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe + # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe + # no validation needed here. State machine has done that + first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) + if (first_char >= 0x81) and (first_char <= 0x9F): + order = 188 * (first_char - 0x81) + elif (first_char >= 0xE0) and (first_char <= 0xEF): + order = 188 * (first_char - 0xE0 + 31) + else: + return -1 + order = order + second_char - 0x40 + if second_char > 0x7F: + order = -1 + return order + + +class EUCJPDistributionAnalysis(CharDistributionAnalysis): + def __init__(self): + CharDistributionAnalysis.__init__(self) + self._mCharToFreqOrder = JISCharToFreqOrder + self._mTableSize = JIS_TABLE_SIZE + self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, aBuf): + # for euc-JP encoding, we are interested + # first byte range: 0xa0 -- 0xfe + # second byte range: 0xa1 -- 0xfe + # no validation needed here. State machine has done that + char = wrap_ord(aBuf[0]) + if char >= 0xA0: + return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1 + else: + return -1 diff --git a/requests/packages/charade/charsetgroupprober.py b/requests/packages/chardet/charsetgroupprober.py index 2959654..85e7a1c 100644 --- a/requests/packages/charade/charsetgroupprober.py +++ b/requests/packages/chardet/charsetgroupprober.py @@ -1,106 +1,106 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Communicator client code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from . import constants
-import sys
-from .charsetprober import CharSetProber
-
-
-class CharSetGroupProber(CharSetProber):
- def __init__(self):
- CharSetProber.__init__(self)
- self._mActiveNum = 0
- self._mProbers = []
- self._mBestGuessProber = None
-
- def reset(self):
- CharSetProber.reset(self)
- self._mActiveNum = 0
- for prober in self._mProbers:
- if prober:
- prober.reset()
- prober.active = True
- self._mActiveNum += 1
- self._mBestGuessProber = None
-
- def get_charset_name(self):
- if not self._mBestGuessProber:
- self.get_confidence()
- if not self._mBestGuessProber:
- return None
-# self._mBestGuessProber = self._mProbers[0]
- return self._mBestGuessProber.get_charset_name()
-
- def feed(self, aBuf):
- for prober in self._mProbers:
- if not prober:
- continue
- if not prober.active:
- continue
- st = prober.feed(aBuf)
- if not st:
- continue
- if st == constants.eFoundIt:
- self._mBestGuessProber = prober
- return self.get_state()
- elif st == constants.eNotMe:
- prober.active = False
- self._mActiveNum -= 1
- if self._mActiveNum <= 0:
- self._mState = constants.eNotMe
- return self.get_state()
- return self.get_state()
-
- def get_confidence(self):
- st = self.get_state()
- if st == constants.eFoundIt:
- return 0.99
- elif st == constants.eNotMe:
- return 0.01
- bestConf = 0.0
- self._mBestGuessProber = None
- for prober in self._mProbers:
- if not prober:
- continue
- if not prober.active:
- if constants._debug:
- sys.stderr.write(prober.get_charset_name()
- + ' not active\n')
- continue
- cf = prober.get_confidence()
- if constants._debug:
- sys.stderr.write('%s confidence = %s\n' %
- (prober.get_charset_name(), cf))
- if bestConf < cf:
- bestConf = cf
- self._mBestGuessProber = prober
- if not self._mBestGuessProber:
- return 0.0
- return bestConf
-# else:
-# self._mBestGuessProber = self._mProbers[0]
-# return self._mBestGuessProber.get_confidence()
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from . import constants +import sys +from .charsetprober import CharSetProber + + +class CharSetGroupProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mActiveNum = 0 + self._mProbers = [] + self._mBestGuessProber = None + + def reset(self): + CharSetProber.reset(self) + self._mActiveNum = 0 + for prober in self._mProbers: + if prober: + prober.reset() + prober.active = True + self._mActiveNum += 1 + self._mBestGuessProber = None + + def get_charset_name(self): + if not self._mBestGuessProber: + self.get_confidence() + if not self._mBestGuessProber: + return None +# self._mBestGuessProber = self._mProbers[0] + return self._mBestGuessProber.get_charset_name() + + def feed(self, aBuf): + for prober in self._mProbers: + if not prober: + continue + if not prober.active: + continue + st = prober.feed(aBuf) + if not st: + continue + if st == constants.eFoundIt: + self._mBestGuessProber = prober + return self.get_state() + elif st == constants.eNotMe: + prober.active = False + self._mActiveNum -= 1 + if self._mActiveNum <= 0: + self._mState = constants.eNotMe + return self.get_state() + return self.get_state() + + def get_confidence(self): + st = self.get_state() + if st == constants.eFoundIt: + return 0.99 + elif st == constants.eNotMe: + return 0.01 + bestConf = 0.0 + self._mBestGuessProber = None + for prober in self._mProbers: + if not prober: + continue + if not prober.active: + if constants._debug: + sys.stderr.write(prober.get_charset_name() + + ' not active\n') + continue + cf = prober.get_confidence() + if constants._debug: + sys.stderr.write('%s confidence = %s\n' % + (prober.get_charset_name(), cf)) + if bestConf < cf: + bestConf = cf + self._mBestGuessProber = prober + if not self._mBestGuessProber: + return 0.0 + return bestConf +# else: +# self._mBestGuessProber = self._mProbers[0] +# return self._mBestGuessProber.get_confidence() diff --git a/requests/packages/charade/charsetprober.py b/requests/packages/chardet/charsetprober.py index 9758171..9758171 100644 --- a/requests/packages/charade/charsetprober.py +++ b/requests/packages/chardet/charsetprober.py diff --git a/requests/packages/charade/codingstatemachine.py b/requests/packages/chardet/codingstatemachine.py index 1bda9ff..8dd8c91 100644 --- a/requests/packages/charade/codingstatemachine.py +++ b/requests/packages/chardet/codingstatemachine.py @@ -1,61 +1,61 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .constants import eStart
-from .compat import wrap_ord
-
-
-class CodingStateMachine:
- def __init__(self, sm):
- self._mModel = sm
- self._mCurrentBytePos = 0
- self._mCurrentCharLen = 0
- self.reset()
-
- def reset(self):
- self._mCurrentState = eStart
-
- def next_state(self, c):
- # for each byte we get its class
- # if it is first byte, we also get byte length
- # PY3K: aBuf is a byte stream, so c is an int, not a byte
- byteCls = self._mModel['classTable'][wrap_ord(c)]
- if self._mCurrentState == eStart:
- self._mCurrentBytePos = 0
- self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]
- # from byte's class and stateTable, we get its next state
- curr_state = (self._mCurrentState * self._mModel['classFactor']
- + byteCls)
- self._mCurrentState = self._mModel['stateTable'][curr_state]
- self._mCurrentBytePos += 1
- return self._mCurrentState
-
- def get_current_charlen(self):
- return self._mCurrentCharLen
-
- def get_coding_state_machine(self):
- return self._mModel['name']
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .constants import eStart +from .compat import wrap_ord + + +class CodingStateMachine: + def __init__(self, sm): + self._mModel = sm + self._mCurrentBytePos = 0 + self._mCurrentCharLen = 0 + self.reset() + + def reset(self): + self._mCurrentState = eStart + + def next_state(self, c): + # for each byte we get its class + # if it is first byte, we also get byte length + # PY3K: aBuf is a byte stream, so c is an int, not a byte + byteCls = self._mModel['classTable'][wrap_ord(c)] + if self._mCurrentState == eStart: + self._mCurrentBytePos = 0 + self._mCurrentCharLen = self._mModel['charLenTable'][byteCls] + # from byte's class and stateTable, we get its next state + curr_state = (self._mCurrentState * self._mModel['classFactor'] + + byteCls) + self._mCurrentState = self._mModel['stateTable'][curr_state] + self._mCurrentBytePos += 1 + return self._mCurrentState + + def get_current_charlen(self): + return self._mCurrentCharLen + + def get_coding_state_machine(self): + return self._mModel['name'] diff --git a/requests/packages/charade/compat.py b/requests/packages/chardet/compat.py index d9e30ad..d9e30ad 100644 --- a/requests/packages/charade/compat.py +++ b/requests/packages/chardet/compat.py diff --git a/requests/packages/charade/constants.py b/requests/packages/chardet/constants.py index a3d27de..e4d148b 100644 --- a/requests/packages/charade/constants.py +++ b/requests/packages/chardet/constants.py @@ -1,39 +1,39 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Universal charset detector code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 2001
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-# Shy Shalom - original C code
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-_debug = 0
-
-eDetecting = 0
-eFoundIt = 1
-eNotMe = 2
-
-eStart = 0
-eError = 1
-eItsMe = 2
-
-SHORTCUT_THRESHOLD = 0.95
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +_debug = 0 + +eDetecting = 0 +eFoundIt = 1 +eNotMe = 2 + +eStart = 0 +eError = 1 +eItsMe = 2 + +SHORTCUT_THRESHOLD = 0.95 diff --git a/requests/packages/charade/cp949prober.py b/requests/packages/chardet/cp949prober.py index 543501f..ff4272f 100644 --- a/requests/packages/charade/cp949prober.py +++ b/requests/packages/chardet/cp949prober.py @@ -1,44 +1,44 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .mbcharsetprober import MultiByteCharSetProber
-from .codingstatemachine import CodingStateMachine
-from .chardistribution import EUCKRDistributionAnalysis
-from .mbcssm import CP949SMModel
-
-
-class CP949Prober(MultiByteCharSetProber):
- def __init__(self):
- MultiByteCharSetProber.__init__(self)
- self._mCodingSM = CodingStateMachine(CP949SMModel)
- # NOTE: CP949 is a superset of EUC-KR, so the distribution should be
- # not different.
- self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
- self.reset()
-
- def get_charset_name(self):
- return "CP949"
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import EUCKRDistributionAnalysis +from .mbcssm import CP949SMModel + + +class CP949Prober(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(CP949SMModel) + # NOTE: CP949 is a superset of EUC-KR, so the distribution should be + # not different. + self._mDistributionAnalyzer = EUCKRDistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "CP949" diff --git a/requests/packages/charade/escprober.py b/requests/packages/chardet/escprober.py index 0063935..80a844f 100644 --- a/requests/packages/charade/escprober.py +++ b/requests/packages/chardet/escprober.py @@ -1,86 +1,86 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from . import constants
-from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,
- ISO2022KRSMModel)
-from .charsetprober import CharSetProber
-from .codingstatemachine import CodingStateMachine
-from .compat import wrap_ord
-
-
-class EscCharSetProber(CharSetProber):
- def __init__(self):
- CharSetProber.__init__(self)
- self._mCodingSM = [
- CodingStateMachine(HZSMModel),
- CodingStateMachine(ISO2022CNSMModel),
- CodingStateMachine(ISO2022JPSMModel),
- CodingStateMachine(ISO2022KRSMModel)
- ]
- self.reset()
-
- def reset(self):
- CharSetProber.reset(self)
- for codingSM in self._mCodingSM:
- if not codingSM:
- continue
- codingSM.active = True
- codingSM.reset()
- self._mActiveSM = len(self._mCodingSM)
- self._mDetectedCharset = None
-
- def get_charset_name(self):
- return self._mDetectedCharset
-
- def get_confidence(self):
- if self._mDetectedCharset:
- return 0.99
- else:
- return 0.00
-
- def feed(self, aBuf):
- for c in aBuf:
- # PY3K: aBuf is a byte array, so c is an int, not a byte
- for codingSM in self._mCodingSM:
- if not codingSM:
- continue
- if not codingSM.active:
- continue
- codingState = codingSM.next_state(wrap_ord(c))
- if codingState == constants.eError:
- codingSM.active = False
- self._mActiveSM -= 1
- if self._mActiveSM <= 0:
- self._mState = constants.eNotMe
- return self.get_state()
- elif codingState == constants.eItsMe:
- self._mState = constants.eFoundIt
- self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8
- return self.get_state()
-
- return self.get_state()
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from . import constants +from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, + ISO2022KRSMModel) +from .charsetprober import CharSetProber +from .codingstatemachine import CodingStateMachine +from .compat import wrap_ord + + +class EscCharSetProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mCodingSM = [ + CodingStateMachine(HZSMModel), + CodingStateMachine(ISO2022CNSMModel), + CodingStateMachine(ISO2022JPSMModel), + CodingStateMachine(ISO2022KRSMModel) + ] + self.reset() + + def reset(self): + CharSetProber.reset(self) + for codingSM in self._mCodingSM: + if not codingSM: + continue + codingSM.active = True + codingSM.reset() + self._mActiveSM = len(self._mCodingSM) + self._mDetectedCharset = None + + def get_charset_name(self): + return self._mDetectedCharset + + def get_confidence(self): + if self._mDetectedCharset: + return 0.99 + else: + return 0.00 + + def feed(self, aBuf): + for c in aBuf: + # PY3K: aBuf is a byte array, so c is an int, not a byte + for codingSM in self._mCodingSM: + if not codingSM: + continue + if not codingSM.active: + continue + codingState = codingSM.next_state(wrap_ord(c)) + if codingState == constants.eError: + codingSM.active = False + self._mActiveSM -= 1 + if self._mActiveSM <= 0: + self._mState = constants.eNotMe + return self.get_state() + elif codingState == constants.eItsMe: + self._mState = constants.eFoundIt + self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8 + return self.get_state() + + return self.get_state() diff --git a/requests/packages/charade/escsm.py b/requests/packages/chardet/escsm.py index 1cf3aa6..bd302b4 100644 --- a/requests/packages/charade/escsm.py +++ b/requests/packages/chardet/escsm.py @@ -1,242 +1,242 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .constants import eStart, eError, eItsMe
-
-HZ_cls = (
-1,0,0,0,0,0,0,0, # 00 - 07
-0,0,0,0,0,0,0,0, # 08 - 0f
-0,0,0,0,0,0,0,0, # 10 - 17
-0,0,0,1,0,0,0,0, # 18 - 1f
-0,0,0,0,0,0,0,0, # 20 - 27
-0,0,0,0,0,0,0,0, # 28 - 2f
-0,0,0,0,0,0,0,0, # 30 - 37
-0,0,0,0,0,0,0,0, # 38 - 3f
-0,0,0,0,0,0,0,0, # 40 - 47
-0,0,0,0,0,0,0,0, # 48 - 4f
-0,0,0,0,0,0,0,0, # 50 - 57
-0,0,0,0,0,0,0,0, # 58 - 5f
-0,0,0,0,0,0,0,0, # 60 - 67
-0,0,0,0,0,0,0,0, # 68 - 6f
-0,0,0,0,0,0,0,0, # 70 - 77
-0,0,0,4,0,5,2,0, # 78 - 7f
-1,1,1,1,1,1,1,1, # 80 - 87
-1,1,1,1,1,1,1,1, # 88 - 8f
-1,1,1,1,1,1,1,1, # 90 - 97
-1,1,1,1,1,1,1,1, # 98 - 9f
-1,1,1,1,1,1,1,1, # a0 - a7
-1,1,1,1,1,1,1,1, # a8 - af
-1,1,1,1,1,1,1,1, # b0 - b7
-1,1,1,1,1,1,1,1, # b8 - bf
-1,1,1,1,1,1,1,1, # c0 - c7
-1,1,1,1,1,1,1,1, # c8 - cf
-1,1,1,1,1,1,1,1, # d0 - d7
-1,1,1,1,1,1,1,1, # d8 - df
-1,1,1,1,1,1,1,1, # e0 - e7
-1,1,1,1,1,1,1,1, # e8 - ef
-1,1,1,1,1,1,1,1, # f0 - f7
-1,1,1,1,1,1,1,1, # f8 - ff
-)
-
-HZ_st = (
-eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07
-eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
-eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17
- 5,eError, 6,eError, 5, 5, 4,eError,# 18-1f
- 4,eError, 4, 4, 4,eError, 4,eError,# 20-27
- 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f
-)
-
-HZCharLenTable = (0, 0, 0, 0, 0, 0)
-
-HZSMModel = {'classTable': HZ_cls,
- 'classFactor': 6,
- 'stateTable': HZ_st,
- 'charLenTable': HZCharLenTable,
- 'name': "HZ-GB-2312"}
-
-ISO2022CN_cls = (
-2,0,0,0,0,0,0,0, # 00 - 07
-0,0,0,0,0,0,0,0, # 08 - 0f
-0,0,0,0,0,0,0,0, # 10 - 17
-0,0,0,1,0,0,0,0, # 18 - 1f
-0,0,0,0,0,0,0,0, # 20 - 27
-0,3,0,0,0,0,0,0, # 28 - 2f
-0,0,0,0,0,0,0,0, # 30 - 37
-0,0,0,0,0,0,0,0, # 38 - 3f
-0,0,0,4,0,0,0,0, # 40 - 47
-0,0,0,0,0,0,0,0, # 48 - 4f
-0,0,0,0,0,0,0,0, # 50 - 57
-0,0,0,0,0,0,0,0, # 58 - 5f
-0,0,0,0,0,0,0,0, # 60 - 67
-0,0,0,0,0,0,0,0, # 68 - 6f
-0,0,0,0,0,0,0,0, # 70 - 77
-0,0,0,0,0,0,0,0, # 78 - 7f
-2,2,2,2,2,2,2,2, # 80 - 87
-2,2,2,2,2,2,2,2, # 88 - 8f
-2,2,2,2,2,2,2,2, # 90 - 97
-2,2,2,2,2,2,2,2, # 98 - 9f
-2,2,2,2,2,2,2,2, # a0 - a7
-2,2,2,2,2,2,2,2, # a8 - af
-2,2,2,2,2,2,2,2, # b0 - b7
-2,2,2,2,2,2,2,2, # b8 - bf
-2,2,2,2,2,2,2,2, # c0 - c7
-2,2,2,2,2,2,2,2, # c8 - cf
-2,2,2,2,2,2,2,2, # d0 - d7
-2,2,2,2,2,2,2,2, # d8 - df
-2,2,2,2,2,2,2,2, # e0 - e7
-2,2,2,2,2,2,2,2, # e8 - ef
-2,2,2,2,2,2,2,2, # f0 - f7
-2,2,2,2,2,2,2,2, # f8 - ff
-)
-
-ISO2022CN_st = (
-eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
-eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f
-eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
-eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f
-eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27
- 5, 6,eError,eError,eError,eError,eError,eError,# 28-2f
-eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37
-eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f
-)
-
-ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0)
-
-ISO2022CNSMModel = {'classTable': ISO2022CN_cls,
- 'classFactor': 9,
- 'stateTable': ISO2022CN_st,
- 'charLenTable': ISO2022CNCharLenTable,
- 'name': "ISO-2022-CN"}
-
-ISO2022JP_cls = (
-2,0,0,0,0,0,0,0, # 00 - 07
-0,0,0,0,0,0,2,2, # 08 - 0f
-0,0,0,0,0,0,0,0, # 10 - 17
-0,0,0,1,0,0,0,0, # 18 - 1f
-0,0,0,0,7,0,0,0, # 20 - 27
-3,0,0,0,0,0,0,0, # 28 - 2f
-0,0,0,0,0,0,0,0, # 30 - 37
-0,0,0,0,0,0,0,0, # 38 - 3f
-6,0,4,0,8,0,0,0, # 40 - 47
-0,9,5,0,0,0,0,0, # 48 - 4f
-0,0,0,0,0,0,0,0, # 50 - 57
-0,0,0,0,0,0,0,0, # 58 - 5f
-0,0,0,0,0,0,0,0, # 60 - 67
-0,0,0,0,0,0,0,0, # 68 - 6f
-0,0,0,0,0,0,0,0, # 70 - 77
-0,0,0,0,0,0,0,0, # 78 - 7f
-2,2,2,2,2,2,2,2, # 80 - 87
-2,2,2,2,2,2,2,2, # 88 - 8f
-2,2,2,2,2,2,2,2, # 90 - 97
-2,2,2,2,2,2,2,2, # 98 - 9f
-2,2,2,2,2,2,2,2, # a0 - a7
-2,2,2,2,2,2,2,2, # a8 - af
-2,2,2,2,2,2,2,2, # b0 - b7
-2,2,2,2,2,2,2,2, # b8 - bf
-2,2,2,2,2,2,2,2, # c0 - c7
-2,2,2,2,2,2,2,2, # c8 - cf
-2,2,2,2,2,2,2,2, # d0 - d7
-2,2,2,2,2,2,2,2, # d8 - df
-2,2,2,2,2,2,2,2, # e0 - e7
-2,2,2,2,2,2,2,2, # e8 - ef
-2,2,2,2,2,2,2,2, # f0 - f7
-2,2,2,2,2,2,2,2, # f8 - ff
-)
-
-ISO2022JP_st = (
-eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
-eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f
-eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
-eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f
-eError, 5,eError,eError,eError, 4,eError,eError,# 20-27
-eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f
-eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37
-eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f
-eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47
-)
-
-ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-
-ISO2022JPSMModel = {'classTable': ISO2022JP_cls,
- 'classFactor': 10,
- 'stateTable': ISO2022JP_st,
- 'charLenTable': ISO2022JPCharLenTable,
- 'name': "ISO-2022-JP"}
-
-ISO2022KR_cls = (
-2,0,0,0,0,0,0,0, # 00 - 07
-0,0,0,0,0,0,0,0, # 08 - 0f
-0,0,0,0,0,0,0,0, # 10 - 17
-0,0,0,1,0,0,0,0, # 18 - 1f
-0,0,0,0,3,0,0,0, # 20 - 27
-0,4,0,0,0,0,0,0, # 28 - 2f
-0,0,0,0,0,0,0,0, # 30 - 37
-0,0,0,0,0,0,0,0, # 38 - 3f
-0,0,0,5,0,0,0,0, # 40 - 47
-0,0,0,0,0,0,0,0, # 48 - 4f
-0,0,0,0,0,0,0,0, # 50 - 57
-0,0,0,0,0,0,0,0, # 58 - 5f
-0,0,0,0,0,0,0,0, # 60 - 67
-0,0,0,0,0,0,0,0, # 68 - 6f
-0,0,0,0,0,0,0,0, # 70 - 77
-0,0,0,0,0,0,0,0, # 78 - 7f
-2,2,2,2,2,2,2,2, # 80 - 87
-2,2,2,2,2,2,2,2, # 88 - 8f
-2,2,2,2,2,2,2,2, # 90 - 97
-2,2,2,2,2,2,2,2, # 98 - 9f
-2,2,2,2,2,2,2,2, # a0 - a7
-2,2,2,2,2,2,2,2, # a8 - af
-2,2,2,2,2,2,2,2, # b0 - b7
-2,2,2,2,2,2,2,2, # b8 - bf
-2,2,2,2,2,2,2,2, # c0 - c7
-2,2,2,2,2,2,2,2, # c8 - cf
-2,2,2,2,2,2,2,2, # d0 - d7
-2,2,2,2,2,2,2,2, # d8 - df
-2,2,2,2,2,2,2,2, # e0 - e7
-2,2,2,2,2,2,2,2, # e8 - ef
-2,2,2,2,2,2,2,2, # f0 - f7
-2,2,2,2,2,2,2,2, # f8 - ff
-)
-
-ISO2022KR_st = (
-eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07
-eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
-eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17
-eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f
-eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27
-)
-
-ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0)
-
-ISO2022KRSMModel = {'classTable': ISO2022KR_cls,
- 'classFactor': 6,
- 'stateTable': ISO2022KR_st,
- 'charLenTable': ISO2022KRCharLenTable,
- 'name': "ISO-2022-KR"}
-
-# flake8: noqa
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .constants import eStart, eError, eItsMe + +HZ_cls = ( +1,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,0,0, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,0,0,0,0, # 20 - 27 +0,0,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +0,0,0,0,0,0,0,0, # 40 - 47 +0,0,0,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,4,0,5,2,0, # 78 - 7f +1,1,1,1,1,1,1,1, # 80 - 87 +1,1,1,1,1,1,1,1, # 88 - 8f +1,1,1,1,1,1,1,1, # 90 - 97 +1,1,1,1,1,1,1,1, # 98 - 9f +1,1,1,1,1,1,1,1, # a0 - a7 +1,1,1,1,1,1,1,1, # a8 - af +1,1,1,1,1,1,1,1, # b0 - b7 +1,1,1,1,1,1,1,1, # b8 - bf +1,1,1,1,1,1,1,1, # c0 - c7 +1,1,1,1,1,1,1,1, # c8 - cf +1,1,1,1,1,1,1,1, # d0 - d7 +1,1,1,1,1,1,1,1, # d8 - df +1,1,1,1,1,1,1,1, # e0 - e7 +1,1,1,1,1,1,1,1, # e8 - ef +1,1,1,1,1,1,1,1, # f0 - f7 +1,1,1,1,1,1,1,1, # f8 - ff +) + +HZ_st = ( +eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07 +eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f +eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17 + 5,eError, 6,eError, 5, 5, 4,eError,# 18-1f + 4,eError, 4, 4, 4,eError, 4,eError,# 20-27 + 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f +) + +HZCharLenTable = (0, 0, 0, 0, 0, 0) + +HZSMModel = {'classTable': HZ_cls, + 'classFactor': 6, + 'stateTable': HZ_st, + 'charLenTable': HZCharLenTable, + 'name': "HZ-GB-2312"} + +ISO2022CN_cls = ( +2,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,0,0, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,0,0,0,0, # 20 - 27 +0,3,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +0,0,0,4,0,0,0,0, # 40 - 47 +0,0,0,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,0,0,0,0,0, # 78 - 7f +2,2,2,2,2,2,2,2, # 80 - 87 +2,2,2,2,2,2,2,2, # 88 - 8f +2,2,2,2,2,2,2,2, # 90 - 97 +2,2,2,2,2,2,2,2, # 98 - 9f +2,2,2,2,2,2,2,2, # a0 - a7 +2,2,2,2,2,2,2,2, # a8 - af +2,2,2,2,2,2,2,2, # b0 - b7 +2,2,2,2,2,2,2,2, # b8 - bf +2,2,2,2,2,2,2,2, # c0 - c7 +2,2,2,2,2,2,2,2, # c8 - cf +2,2,2,2,2,2,2,2, # d0 - d7 +2,2,2,2,2,2,2,2, # d8 - df +2,2,2,2,2,2,2,2, # e0 - e7 +2,2,2,2,2,2,2,2, # e8 - ef +2,2,2,2,2,2,2,2, # f0 - f7 +2,2,2,2,2,2,2,2, # f8 - ff +) + +ISO2022CN_st = ( +eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 +eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f +eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 +eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f +eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27 + 5, 6,eError,eError,eError,eError,eError,eError,# 28-2f +eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37 +eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f +) + +ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0) + +ISO2022CNSMModel = {'classTable': ISO2022CN_cls, + 'classFactor': 9, + 'stateTable': ISO2022CN_st, + 'charLenTable': ISO2022CNCharLenTable, + 'name': "ISO-2022-CN"} + +ISO2022JP_cls = ( +2,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,2,2, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,7,0,0,0, # 20 - 27 +3,0,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +6,0,4,0,8,0,0,0, # 40 - 47 +0,9,5,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,0,0,0,0,0, # 78 - 7f +2,2,2,2,2,2,2,2, # 80 - 87 +2,2,2,2,2,2,2,2, # 88 - 8f +2,2,2,2,2,2,2,2, # 90 - 97 +2,2,2,2,2,2,2,2, # 98 - 9f +2,2,2,2,2,2,2,2, # a0 - a7 +2,2,2,2,2,2,2,2, # a8 - af +2,2,2,2,2,2,2,2, # b0 - b7 +2,2,2,2,2,2,2,2, # b8 - bf +2,2,2,2,2,2,2,2, # c0 - c7 +2,2,2,2,2,2,2,2, # c8 - cf +2,2,2,2,2,2,2,2, # d0 - d7 +2,2,2,2,2,2,2,2, # d8 - df +2,2,2,2,2,2,2,2, # e0 - e7 +2,2,2,2,2,2,2,2, # e8 - ef +2,2,2,2,2,2,2,2, # f0 - f7 +2,2,2,2,2,2,2,2, # f8 - ff +) + +ISO2022JP_st = ( +eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 +eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f +eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 +eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f +eError, 5,eError,eError,eError, 4,eError,eError,# 20-27 +eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f +eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37 +eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f +eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47 +) + +ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + +ISO2022JPSMModel = {'classTable': ISO2022JP_cls, + 'classFactor': 10, + 'stateTable': ISO2022JP_st, + 'charLenTable': ISO2022JPCharLenTable, + 'name': "ISO-2022-JP"} + +ISO2022KR_cls = ( +2,0,0,0,0,0,0,0, # 00 - 07 +0,0,0,0,0,0,0,0, # 08 - 0f +0,0,0,0,0,0,0,0, # 10 - 17 +0,0,0,1,0,0,0,0, # 18 - 1f +0,0,0,0,3,0,0,0, # 20 - 27 +0,4,0,0,0,0,0,0, # 28 - 2f +0,0,0,0,0,0,0,0, # 30 - 37 +0,0,0,0,0,0,0,0, # 38 - 3f +0,0,0,5,0,0,0,0, # 40 - 47 +0,0,0,0,0,0,0,0, # 48 - 4f +0,0,0,0,0,0,0,0, # 50 - 57 +0,0,0,0,0,0,0,0, # 58 - 5f +0,0,0,0,0,0,0,0, # 60 - 67 +0,0,0,0,0,0,0,0, # 68 - 6f +0,0,0,0,0,0,0,0, # 70 - 77 +0,0,0,0,0,0,0,0, # 78 - 7f +2,2,2,2,2,2,2,2, # 80 - 87 +2,2,2,2,2,2,2,2, # 88 - 8f +2,2,2,2,2,2,2,2, # 90 - 97 +2,2,2,2,2,2,2,2, # 98 - 9f +2,2,2,2,2,2,2,2, # a0 - a7 +2,2,2,2,2,2,2,2, # a8 - af +2,2,2,2,2,2,2,2, # b0 - b7 +2,2,2,2,2,2,2,2, # b8 - bf +2,2,2,2,2,2,2,2, # c0 - c7 +2,2,2,2,2,2,2,2, # c8 - cf +2,2,2,2,2,2,2,2, # d0 - d7 +2,2,2,2,2,2,2,2, # d8 - df +2,2,2,2,2,2,2,2, # e0 - e7 +2,2,2,2,2,2,2,2, # e8 - ef +2,2,2,2,2,2,2,2, # f0 - f7 +2,2,2,2,2,2,2,2, # f8 - ff +) + +ISO2022KR_st = ( +eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07 +eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f +eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17 +eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f +eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27 +) + +ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0) + +ISO2022KRSMModel = {'classTable': ISO2022KR_cls, + 'classFactor': 6, + 'stateTable': ISO2022KR_st, + 'charLenTable': ISO2022KRCharLenTable, + 'name': "ISO-2022-KR"} + +# flake8: noqa diff --git a/requests/packages/charade/eucjpprober.py b/requests/packages/chardet/eucjpprober.py index d70cfbb..8e64fdc 100644 --- a/requests/packages/charade/eucjpprober.py +++ b/requests/packages/chardet/eucjpprober.py @@ -1,90 +1,90 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-import sys
-from . import constants
-from .mbcharsetprober import MultiByteCharSetProber
-from .codingstatemachine import CodingStateMachine
-from .chardistribution import EUCJPDistributionAnalysis
-from .jpcntx import EUCJPContextAnalysis
-from .mbcssm import EUCJPSMModel
-
-
-class EUCJPProber(MultiByteCharSetProber):
- def __init__(self):
- MultiByteCharSetProber.__init__(self)
- self._mCodingSM = CodingStateMachine(EUCJPSMModel)
- self._mDistributionAnalyzer = EUCJPDistributionAnalysis()
- self._mContextAnalyzer = EUCJPContextAnalysis()
- self.reset()
-
- def reset(self):
- MultiByteCharSetProber.reset(self)
- self._mContextAnalyzer.reset()
-
- def get_charset_name(self):
- return "EUC-JP"
-
- def feed(self, aBuf):
- aLen = len(aBuf)
- for i in range(0, aLen):
- # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte
- codingState = self._mCodingSM.next_state(aBuf[i])
- if codingState == constants.eError:
- if constants._debug:
- sys.stderr.write(self.get_charset_name()
- + ' prober hit error at byte ' + str(i)
- + '\n')
- self._mState = constants.eNotMe
- break
- elif codingState == constants.eItsMe:
- self._mState = constants.eFoundIt
- break
- elif codingState == constants.eStart:
- charLen = self._mCodingSM.get_current_charlen()
- if i == 0:
- self._mLastChar[1] = aBuf[0]
- self._mContextAnalyzer.feed(self._mLastChar, charLen)
- self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
- else:
- self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen)
- self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
- charLen)
-
- self._mLastChar[0] = aBuf[aLen - 1]
-
- if self.get_state() == constants.eDetecting:
- if (self._mContextAnalyzer.got_enough_data() and
- (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
- self._mState = constants.eFoundIt
-
- return self.get_state()
-
- def get_confidence(self):
- contxtCf = self._mContextAnalyzer.get_confidence()
- distribCf = self._mDistributionAnalyzer.get_confidence()
- return max(contxtCf, distribCf)
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import sys +from . import constants +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import EUCJPDistributionAnalysis +from .jpcntx import EUCJPContextAnalysis +from .mbcssm import EUCJPSMModel + + +class EUCJPProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(EUCJPSMModel) + self._mDistributionAnalyzer = EUCJPDistributionAnalysis() + self._mContextAnalyzer = EUCJPContextAnalysis() + self.reset() + + def reset(self): + MultiByteCharSetProber.reset(self) + self._mContextAnalyzer.reset() + + def get_charset_name(self): + return "EUC-JP" + + def feed(self, aBuf): + aLen = len(aBuf) + for i in range(0, aLen): + # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte + codingState = self._mCodingSM.next_state(aBuf[i]) + if codingState == constants.eError: + if constants._debug: + sys.stderr.write(self.get_charset_name() + + ' prober hit error at byte ' + str(i) + + '\n') + self._mState = constants.eNotMe + break + elif codingState == constants.eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == constants.eStart: + charLen = self._mCodingSM.get_current_charlen() + if i == 0: + self._mLastChar[1] = aBuf[0] + self._mContextAnalyzer.feed(self._mLastChar, charLen) + self._mDistributionAnalyzer.feed(self._mLastChar, charLen) + else: + self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen) + self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], + charLen) + + self._mLastChar[0] = aBuf[aLen - 1] + + if self.get_state() == constants.eDetecting: + if (self._mContextAnalyzer.got_enough_data() and + (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + contxtCf = self._mContextAnalyzer.get_confidence() + distribCf = self._mDistributionAnalyzer.get_confidence() + return max(contxtCf, distribCf) diff --git a/requests/packages/charade/euckrfreq.py b/requests/packages/chardet/euckrfreq.py index a179e4c..a179e4c 100644 --- a/requests/packages/charade/euckrfreq.py +++ b/requests/packages/chardet/euckrfreq.py diff --git a/requests/packages/charade/euckrprober.py b/requests/packages/chardet/euckrprober.py index def3e42..5982a46 100644 --- a/requests/packages/charade/euckrprober.py +++ b/requests/packages/chardet/euckrprober.py @@ -1,42 +1,42 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .mbcharsetprober import MultiByteCharSetProber
-from .codingstatemachine import CodingStateMachine
-from .chardistribution import EUCKRDistributionAnalysis
-from .mbcssm import EUCKRSMModel
-
-
-class EUCKRProber(MultiByteCharSetProber):
- def __init__(self):
- MultiByteCharSetProber.__init__(self)
- self._mCodingSM = CodingStateMachine(EUCKRSMModel)
- self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
- self.reset()
-
- def get_charset_name(self):
- return "EUC-KR"
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import EUCKRDistributionAnalysis +from .mbcssm import EUCKRSMModel + + +class EUCKRProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(EUCKRSMModel) + self._mDistributionAnalyzer = EUCKRDistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "EUC-KR" diff --git a/requests/packages/charade/euctwfreq.py b/requests/packages/chardet/euctwfreq.py index 576e750..576e750 100644 --- a/requests/packages/charade/euctwfreq.py +++ b/requests/packages/chardet/euctwfreq.py diff --git a/requests/packages/charade/euctwprober.py b/requests/packages/chardet/euctwprober.py index e601adf..fe652fe 100644 --- a/requests/packages/charade/euctwprober.py +++ b/requests/packages/chardet/euctwprober.py @@ -1,41 +1,41 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .mbcharsetprober import MultiByteCharSetProber
-from .codingstatemachine import CodingStateMachine
-from .chardistribution import EUCTWDistributionAnalysis
-from .mbcssm import EUCTWSMModel
-
-class EUCTWProber(MultiByteCharSetProber):
- def __init__(self):
- MultiByteCharSetProber.__init__(self)
- self._mCodingSM = CodingStateMachine(EUCTWSMModel)
- self._mDistributionAnalyzer = EUCTWDistributionAnalysis()
- self.reset()
-
- def get_charset_name(self):
- return "EUC-TW"
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import EUCTWDistributionAnalysis +from .mbcssm import EUCTWSMModel + +class EUCTWProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(EUCTWSMModel) + self._mDistributionAnalyzer = EUCTWDistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "EUC-TW" diff --git a/requests/packages/charade/gb2312freq.py b/requests/packages/chardet/gb2312freq.py index 1238f51..1238f51 100644 --- a/requests/packages/charade/gb2312freq.py +++ b/requests/packages/chardet/gb2312freq.py diff --git a/requests/packages/charade/gb2312prober.py b/requests/packages/chardet/gb2312prober.py index 643fe25..0325a2d 100644 --- a/requests/packages/charade/gb2312prober.py +++ b/requests/packages/chardet/gb2312prober.py @@ -1,41 +1,41 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .mbcharsetprober import MultiByteCharSetProber
-from .codingstatemachine import CodingStateMachine
-from .chardistribution import GB2312DistributionAnalysis
-from .mbcssm import GB2312SMModel
-
-class GB2312Prober(MultiByteCharSetProber):
- def __init__(self):
- MultiByteCharSetProber.__init__(self)
- self._mCodingSM = CodingStateMachine(GB2312SMModel)
- self._mDistributionAnalyzer = GB2312DistributionAnalysis()
- self.reset()
-
- def get_charset_name(self):
- return "GB2312"
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import GB2312DistributionAnalysis +from .mbcssm import GB2312SMModel + +class GB2312Prober(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(GB2312SMModel) + self._mDistributionAnalyzer = GB2312DistributionAnalysis() + self.reset() + + def get_charset_name(self): + return "GB2312" diff --git a/requests/packages/charade/hebrewprober.py b/requests/packages/chardet/hebrewprober.py index 90d171f..ba225c5 100644 --- a/requests/packages/charade/hebrewprober.py +++ b/requests/packages/chardet/hebrewprober.py @@ -1,283 +1,283 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Universal charset detector code.
-#
-# The Initial Developer of the Original Code is
-# Shy Shalom
-# Portions created by the Initial Developer are Copyright (C) 2005
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .charsetprober import CharSetProber
-from .constants import eNotMe, eDetecting
-from .compat import wrap_ord
-
-# This prober doesn't actually recognize a language or a charset.
-# It is a helper prober for the use of the Hebrew model probers
-
-### General ideas of the Hebrew charset recognition ###
-#
-# Four main charsets exist in Hebrew:
-# "ISO-8859-8" - Visual Hebrew
-# "windows-1255" - Logical Hebrew
-# "ISO-8859-8-I" - Logical Hebrew
-# "x-mac-hebrew" - ?? Logical Hebrew ??
-#
-# Both "ISO" charsets use a completely identical set of code points, whereas
-# "windows-1255" and "x-mac-hebrew" are two different proper supersets of
-# these code points. windows-1255 defines additional characters in the range
-# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific
-# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6.
-# x-mac-hebrew defines similar additional code points but with a different
-# mapping.
-#
-# As far as an average Hebrew text with no diacritics is concerned, all four
-# charsets are identical with respect to code points. Meaning that for the
-# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters
-# (including final letters).
-#
-# The dominant difference between these charsets is their directionality.
-# "Visual" directionality means that the text is ordered as if the renderer is
-# not aware of a BIDI rendering algorithm. The renderer sees the text and
-# draws it from left to right. The text itself when ordered naturally is read
-# backwards. A buffer of Visual Hebrew generally looks like so:
-# "[last word of first line spelled backwards] [whole line ordered backwards
-# and spelled backwards] [first word of first line spelled backwards]
-# [end of line] [last word of second line] ... etc' "
-# adding punctuation marks, numbers and English text to visual text is
-# naturally also "visual" and from left to right.
-#
-# "Logical" directionality means the text is ordered "naturally" according to
-# the order it is read. It is the responsibility of the renderer to display
-# the text from right to left. A BIDI algorithm is used to place general
-# punctuation marks, numbers and English text in the text.
-#
-# Texts in x-mac-hebrew are almost impossible to find on the Internet. From
-# what little evidence I could find, it seems that its general directionality
-# is Logical.
-#
-# To sum up all of the above, the Hebrew probing mechanism knows about two
-# charsets:
-# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are
-# backwards while line order is natural. For charset recognition purposes
-# the line order is unimportant (In fact, for this implementation, even
-# word order is unimportant).
-# Logical Hebrew - "windows-1255" - normal, naturally ordered text.
-#
-# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be
-# specifically identified.
-# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew
-# that contain special punctuation marks or diacritics is displayed with
-# some unconverted characters showing as question marks. This problem might
-# be corrected using another model prober for x-mac-hebrew. Due to the fact
-# that x-mac-hebrew texts are so rare, writing another model prober isn't
-# worth the effort and performance hit.
-#
-#### The Prober ####
-#
-# The prober is divided between two SBCharSetProbers and a HebrewProber,
-# all of which are managed, created, fed data, inquired and deleted by the
-# SBCSGroupProber. The two SBCharSetProbers identify that the text is in
-# fact some kind of Hebrew, Logical or Visual. The final decision about which
-# one is it is made by the HebrewProber by combining final-letter scores
-# with the scores of the two SBCharSetProbers to produce a final answer.
-#
-# The SBCSGroupProber is responsible for stripping the original text of HTML
-# tags, English characters, numbers, low-ASCII punctuation characters, spaces
-# and new lines. It reduces any sequence of such characters to a single space.
-# The buffer fed to each prober in the SBCS group prober is pure text in
-# high-ASCII.
-# The two SBCharSetProbers (model probers) share the same language model:
-# Win1255Model.
-# The first SBCharSetProber uses the model normally as any other
-# SBCharSetProber does, to recognize windows-1255, upon which this model was
-# built. The second SBCharSetProber is told to make the pair-of-letter
-# lookup in the language model backwards. This in practice exactly simulates
-# a visual Hebrew model using the windows-1255 logical Hebrew model.
-#
-# The HebrewProber is not using any language model. All it does is look for
-# final-letter evidence suggesting the text is either logical Hebrew or visual
-# Hebrew. Disjointed from the model probers, the results of the HebrewProber
-# alone are meaningless. HebrewProber always returns 0.00 as confidence
-# since it never identifies a charset by itself. Instead, the pointer to the
-# HebrewProber is passed to the model probers as a helper "Name Prober".
-# When the Group prober receives a positive identification from any prober,
-# it asks for the name of the charset identified. If the prober queried is a
-# Hebrew model prober, the model prober forwards the call to the
-# HebrewProber to make the final decision. In the HebrewProber, the
-# decision is made according to the final-letters scores maintained and Both
-# model probers scores. The answer is returned in the form of the name of the
-# charset identified, either "windows-1255" or "ISO-8859-8".
-
-# windows-1255 / ISO-8859-8 code points of interest
-FINAL_KAF = 0xea
-NORMAL_KAF = 0xeb
-FINAL_MEM = 0xed
-NORMAL_MEM = 0xee
-FINAL_NUN = 0xef
-NORMAL_NUN = 0xf0
-FINAL_PE = 0xf3
-NORMAL_PE = 0xf4
-FINAL_TSADI = 0xf5
-NORMAL_TSADI = 0xf6
-
-# Minimum Visual vs Logical final letter score difference.
-# If the difference is below this, don't rely solely on the final letter score
-# distance.
-MIN_FINAL_CHAR_DISTANCE = 5
-
-# Minimum Visual vs Logical model score difference.
-# If the difference is below this, don't rely at all on the model score
-# distance.
-MIN_MODEL_DISTANCE = 0.01
-
-VISUAL_HEBREW_NAME = "ISO-8859-8"
-LOGICAL_HEBREW_NAME = "windows-1255"
-
-
-class HebrewProber(CharSetProber):
- def __init__(self):
- CharSetProber.__init__(self)
- self._mLogicalProber = None
- self._mVisualProber = None
- self.reset()
-
- def reset(self):
- self._mFinalCharLogicalScore = 0
- self._mFinalCharVisualScore = 0
- # The two last characters seen in the previous buffer,
- # mPrev and mBeforePrev are initialized to space in order to simulate
- # a word delimiter at the beginning of the data
- self._mPrev = ' '
- self._mBeforePrev = ' '
- # These probers are owned by the group prober.
-
- def set_model_probers(self, logicalProber, visualProber):
- self._mLogicalProber = logicalProber
- self._mVisualProber = visualProber
-
- def is_final(self, c):
- return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE,
- FINAL_TSADI]
-
- def is_non_final(self, c):
- # The normal Tsadi is not a good Non-Final letter due to words like
- # 'lechotet' (to chat) containing an apostrophe after the tsadi. This
- # apostrophe is converted to a space in FilterWithoutEnglishLetters
- # causing the Non-Final tsadi to appear at an end of a word even
- # though this is not the case in the original text.
- # The letters Pe and Kaf rarely display a related behavior of not being
- # a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak'
- # for example legally end with a Non-Final Pe or Kaf. However, the
- # benefit of these letters as Non-Final letters outweighs the damage
- # since these words are quite rare.
- return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE]
-
- def feed(self, aBuf):
- # Final letter analysis for logical-visual decision.
- # Look for evidence that the received buffer is either logical Hebrew
- # or visual Hebrew.
- # The following cases are checked:
- # 1) A word longer than 1 letter, ending with a final letter. This is
- # an indication that the text is laid out "naturally" since the
- # final letter really appears at the end. +1 for logical score.
- # 2) A word longer than 1 letter, ending with a Non-Final letter. In
- # normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi,
- # should not end with the Non-Final form of that letter. Exceptions
- # to this rule are mentioned above in isNonFinal(). This is an
- # indication that the text is laid out backwards. +1 for visual
- # score
- # 3) A word longer than 1 letter, starting with a final letter. Final
- # letters should not appear at the beginning of a word. This is an
- # indication that the text is laid out backwards. +1 for visual
- # score.
- #
- # The visual score and logical score are accumulated throughout the
- # text and are finally checked against each other in GetCharSetName().
- # No checking for final letters in the middle of words is done since
- # that case is not an indication for either Logical or Visual text.
- #
- # We automatically filter out all 7-bit characters (replace them with
- # spaces) so the word boundary detection works properly. [MAP]
-
- if self.get_state() == eNotMe:
- # Both model probers say it's not them. No reason to continue.
- return eNotMe
-
- aBuf = self.filter_high_bit_only(aBuf)
-
- for cur in aBuf:
- if cur == ' ':
- # We stand on a space - a word just ended
- if self._mBeforePrev != ' ':
- # next-to-last char was not a space so self._mPrev is not a
- # 1 letter word
- if self.is_final(self._mPrev):
- # case (1) [-2:not space][-1:final letter][cur:space]
- self._mFinalCharLogicalScore += 1
- elif self.is_non_final(self._mPrev):
- # case (2) [-2:not space][-1:Non-Final letter][
- # cur:space]
- self._mFinalCharVisualScore += 1
- else:
- # Not standing on a space
- if ((self._mBeforePrev == ' ') and
- (self.is_final(self._mPrev)) and (cur != ' ')):
- # case (3) [-2:space][-1:final letter][cur:not space]
- self._mFinalCharVisualScore += 1
- self._mBeforePrev = self._mPrev
- self._mPrev = cur
-
- # Forever detecting, till the end or until both model probers return
- # eNotMe (handled above)
- return eDetecting
-
- def get_charset_name(self):
- # Make the decision: is it Logical or Visual?
- # If the final letter score distance is dominant enough, rely on it.
- finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore
- if finalsub >= MIN_FINAL_CHAR_DISTANCE:
- return LOGICAL_HEBREW_NAME
- if finalsub <= -MIN_FINAL_CHAR_DISTANCE:
- return VISUAL_HEBREW_NAME
-
- # It's not dominant enough, try to rely on the model scores instead.
- modelsub = (self._mLogicalProber.get_confidence()
- - self._mVisualProber.get_confidence())
- if modelsub > MIN_MODEL_DISTANCE:
- return LOGICAL_HEBREW_NAME
- if modelsub < -MIN_MODEL_DISTANCE:
- return VISUAL_HEBREW_NAME
-
- # Still no good, back to final letter distance, maybe it'll save the
- # day.
- if finalsub < 0.0:
- return VISUAL_HEBREW_NAME
-
- # (finalsub > 0 - Logical) or (don't know what to do) default to
- # Logical.
- return LOGICAL_HEBREW_NAME
-
- def get_state(self):
- # Remain active as long as any of the model probers are active.
- if (self._mLogicalProber.get_state() == eNotMe) and \
- (self._mVisualProber.get_state() == eNotMe):
- return eNotMe
- return eDetecting
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Shy Shalom +# Portions created by the Initial Developer are Copyright (C) 2005 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .charsetprober import CharSetProber +from .constants import eNotMe, eDetecting +from .compat import wrap_ord + +# This prober doesn't actually recognize a language or a charset. +# It is a helper prober for the use of the Hebrew model probers + +### General ideas of the Hebrew charset recognition ### +# +# Four main charsets exist in Hebrew: +# "ISO-8859-8" - Visual Hebrew +# "windows-1255" - Logical Hebrew +# "ISO-8859-8-I" - Logical Hebrew +# "x-mac-hebrew" - ?? Logical Hebrew ?? +# +# Both "ISO" charsets use a completely identical set of code points, whereas +# "windows-1255" and "x-mac-hebrew" are two different proper supersets of +# these code points. windows-1255 defines additional characters in the range +# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific +# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6. +# x-mac-hebrew defines similar additional code points but with a different +# mapping. +# +# As far as an average Hebrew text with no diacritics is concerned, all four +# charsets are identical with respect to code points. Meaning that for the +# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters +# (including final letters). +# +# The dominant difference between these charsets is their directionality. +# "Visual" directionality means that the text is ordered as if the renderer is +# not aware of a BIDI rendering algorithm. The renderer sees the text and +# draws it from left to right. The text itself when ordered naturally is read +# backwards. A buffer of Visual Hebrew generally looks like so: +# "[last word of first line spelled backwards] [whole line ordered backwards +# and spelled backwards] [first word of first line spelled backwards] +# [end of line] [last word of second line] ... etc' " +# adding punctuation marks, numbers and English text to visual text is +# naturally also "visual" and from left to right. +# +# "Logical" directionality means the text is ordered "naturally" according to +# the order it is read. It is the responsibility of the renderer to display +# the text from right to left. A BIDI algorithm is used to place general +# punctuation marks, numbers and English text in the text. +# +# Texts in x-mac-hebrew are almost impossible to find on the Internet. From +# what little evidence I could find, it seems that its general directionality +# is Logical. +# +# To sum up all of the above, the Hebrew probing mechanism knows about two +# charsets: +# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are +# backwards while line order is natural. For charset recognition purposes +# the line order is unimportant (In fact, for this implementation, even +# word order is unimportant). +# Logical Hebrew - "windows-1255" - normal, naturally ordered text. +# +# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be +# specifically identified. +# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew +# that contain special punctuation marks or diacritics is displayed with +# some unconverted characters showing as question marks. This problem might +# be corrected using another model prober for x-mac-hebrew. Due to the fact +# that x-mac-hebrew texts are so rare, writing another model prober isn't +# worth the effort and performance hit. +# +#### The Prober #### +# +# The prober is divided between two SBCharSetProbers and a HebrewProber, +# all of which are managed, created, fed data, inquired and deleted by the +# SBCSGroupProber. The two SBCharSetProbers identify that the text is in +# fact some kind of Hebrew, Logical or Visual. The final decision about which +# one is it is made by the HebrewProber by combining final-letter scores +# with the scores of the two SBCharSetProbers to produce a final answer. +# +# The SBCSGroupProber is responsible for stripping the original text of HTML +# tags, English characters, numbers, low-ASCII punctuation characters, spaces +# and new lines. It reduces any sequence of such characters to a single space. +# The buffer fed to each prober in the SBCS group prober is pure text in +# high-ASCII. +# The two SBCharSetProbers (model probers) share the same language model: +# Win1255Model. +# The first SBCharSetProber uses the model normally as any other +# SBCharSetProber does, to recognize windows-1255, upon which this model was +# built. The second SBCharSetProber is told to make the pair-of-letter +# lookup in the language model backwards. This in practice exactly simulates +# a visual Hebrew model using the windows-1255 logical Hebrew model. +# +# The HebrewProber is not using any language model. All it does is look for +# final-letter evidence suggesting the text is either logical Hebrew or visual +# Hebrew. Disjointed from the model probers, the results of the HebrewProber +# alone are meaningless. HebrewProber always returns 0.00 as confidence +# since it never identifies a charset by itself. Instead, the pointer to the +# HebrewProber is passed to the model probers as a helper "Name Prober". +# When the Group prober receives a positive identification from any prober, +# it asks for the name of the charset identified. If the prober queried is a +# Hebrew model prober, the model prober forwards the call to the +# HebrewProber to make the final decision. In the HebrewProber, the +# decision is made according to the final-letters scores maintained and Both +# model probers scores. The answer is returned in the form of the name of the +# charset identified, either "windows-1255" or "ISO-8859-8". + +# windows-1255 / ISO-8859-8 code points of interest +FINAL_KAF = 0xea +NORMAL_KAF = 0xeb +FINAL_MEM = 0xed +NORMAL_MEM = 0xee +FINAL_NUN = 0xef +NORMAL_NUN = 0xf0 +FINAL_PE = 0xf3 +NORMAL_PE = 0xf4 +FINAL_TSADI = 0xf5 +NORMAL_TSADI = 0xf6 + +# Minimum Visual vs Logical final letter score difference. +# If the difference is below this, don't rely solely on the final letter score +# distance. +MIN_FINAL_CHAR_DISTANCE = 5 + +# Minimum Visual vs Logical model score difference. +# If the difference is below this, don't rely at all on the model score +# distance. +MIN_MODEL_DISTANCE = 0.01 + +VISUAL_HEBREW_NAME = "ISO-8859-8" +LOGICAL_HEBREW_NAME = "windows-1255" + + +class HebrewProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mLogicalProber = None + self._mVisualProber = None + self.reset() + + def reset(self): + self._mFinalCharLogicalScore = 0 + self._mFinalCharVisualScore = 0 + # The two last characters seen in the previous buffer, + # mPrev and mBeforePrev are initialized to space in order to simulate + # a word delimiter at the beginning of the data + self._mPrev = ' ' + self._mBeforePrev = ' ' + # These probers are owned by the group prober. + + def set_model_probers(self, logicalProber, visualProber): + self._mLogicalProber = logicalProber + self._mVisualProber = visualProber + + def is_final(self, c): + return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE, + FINAL_TSADI] + + def is_non_final(self, c): + # The normal Tsadi is not a good Non-Final letter due to words like + # 'lechotet' (to chat) containing an apostrophe after the tsadi. This + # apostrophe is converted to a space in FilterWithoutEnglishLetters + # causing the Non-Final tsadi to appear at an end of a word even + # though this is not the case in the original text. + # The letters Pe and Kaf rarely display a related behavior of not being + # a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' + # for example legally end with a Non-Final Pe or Kaf. However, the + # benefit of these letters as Non-Final letters outweighs the damage + # since these words are quite rare. + return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE] + + def feed(self, aBuf): + # Final letter analysis for logical-visual decision. + # Look for evidence that the received buffer is either logical Hebrew + # or visual Hebrew. + # The following cases are checked: + # 1) A word longer than 1 letter, ending with a final letter. This is + # an indication that the text is laid out "naturally" since the + # final letter really appears at the end. +1 for logical score. + # 2) A word longer than 1 letter, ending with a Non-Final letter. In + # normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi, + # should not end with the Non-Final form of that letter. Exceptions + # to this rule are mentioned above in isNonFinal(). This is an + # indication that the text is laid out backwards. +1 for visual + # score + # 3) A word longer than 1 letter, starting with a final letter. Final + # letters should not appear at the beginning of a word. This is an + # indication that the text is laid out backwards. +1 for visual + # score. + # + # The visual score and logical score are accumulated throughout the + # text and are finally checked against each other in GetCharSetName(). + # No checking for final letters in the middle of words is done since + # that case is not an indication for either Logical or Visual text. + # + # We automatically filter out all 7-bit characters (replace them with + # spaces) so the word boundary detection works properly. [MAP] + + if self.get_state() == eNotMe: + # Both model probers say it's not them. No reason to continue. + return eNotMe + + aBuf = self.filter_high_bit_only(aBuf) + + for cur in aBuf: + if cur == ' ': + # We stand on a space - a word just ended + if self._mBeforePrev != ' ': + # next-to-last char was not a space so self._mPrev is not a + # 1 letter word + if self.is_final(self._mPrev): + # case (1) [-2:not space][-1:final letter][cur:space] + self._mFinalCharLogicalScore += 1 + elif self.is_non_final(self._mPrev): + # case (2) [-2:not space][-1:Non-Final letter][ + # cur:space] + self._mFinalCharVisualScore += 1 + else: + # Not standing on a space + if ((self._mBeforePrev == ' ') and + (self.is_final(self._mPrev)) and (cur != ' ')): + # case (3) [-2:space][-1:final letter][cur:not space] + self._mFinalCharVisualScore += 1 + self._mBeforePrev = self._mPrev + self._mPrev = cur + + # Forever detecting, till the end or until both model probers return + # eNotMe (handled above) + return eDetecting + + def get_charset_name(self): + # Make the decision: is it Logical or Visual? + # If the final letter score distance is dominant enough, rely on it. + finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore + if finalsub >= MIN_FINAL_CHAR_DISTANCE: + return LOGICAL_HEBREW_NAME + if finalsub <= -MIN_FINAL_CHAR_DISTANCE: + return VISUAL_HEBREW_NAME + + # It's not dominant enough, try to rely on the model scores instead. + modelsub = (self._mLogicalProber.get_confidence() + - self._mVisualProber.get_confidence()) + if modelsub > MIN_MODEL_DISTANCE: + return LOGICAL_HEBREW_NAME + if modelsub < -MIN_MODEL_DISTANCE: + return VISUAL_HEBREW_NAME + + # Still no good, back to final letter distance, maybe it'll save the + # day. + if finalsub < 0.0: + return VISUAL_HEBREW_NAME + + # (finalsub > 0 - Logical) or (don't know what to do) default to + # Logical. + return LOGICAL_HEBREW_NAME + + def get_state(self): + # Remain active as long as any of the model probers are active. + if (self._mLogicalProber.get_state() == eNotMe) and \ + (self._mVisualProber.get_state() == eNotMe): + return eNotMe + return eDetecting diff --git a/requests/packages/charade/jisfreq.py b/requests/packages/chardet/jisfreq.py index 064345b..064345b 100644 --- a/requests/packages/charade/jisfreq.py +++ b/requests/packages/chardet/jisfreq.py diff --git a/requests/packages/charade/jpcntx.py b/requests/packages/chardet/jpcntx.py index b4e6af4..f7f69ba 100644 --- a/requests/packages/charade/jpcntx.py +++ b/requests/packages/chardet/jpcntx.py @@ -1,219 +1,219 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Communicator client code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .compat import wrap_ord
-
-NUM_OF_CATEGORY = 6
-DONT_KNOW = -1
-ENOUGH_REL_THRESHOLD = 100
-MAX_REL_THRESHOLD = 1000
-MINIMUM_DATA_THRESHOLD = 4
-
-# This is hiragana 2-char sequence table, the number in each cell represents its frequency category
-jp2CharContext = (
-(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1),
-(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4),
-(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2),
-(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4),
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
-(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4),
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
-(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3),
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
-(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4),
-(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4),
-(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3),
-(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3),
-(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3),
-(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4),
-(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3),
-(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4),
-(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3),
-(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5),
-(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3),
-(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5),
-(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4),
-(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4),
-(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3),
-(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3),
-(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3),
-(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5),
-(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4),
-(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5),
-(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3),
-(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4),
-(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4),
-(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4),
-(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1),
-(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0),
-(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3),
-(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0),
-(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3),
-(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3),
-(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5),
-(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4),
-(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5),
-(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3),
-(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3),
-(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3),
-(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3),
-(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4),
-(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4),
-(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2),
-(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3),
-(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3),
-(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3),
-(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3),
-(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4),
-(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3),
-(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4),
-(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3),
-(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3),
-(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4),
-(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4),
-(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3),
-(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4),
-(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4),
-(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3),
-(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4),
-(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4),
-(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4),
-(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3),
-(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2),
-(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2),
-(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3),
-(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3),
-(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5),
-(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3),
-(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4),
-(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4),
-(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4),
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
-(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3),
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1),
-(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2),
-(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3),
-(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1),
-)
-
-class JapaneseContextAnalysis:
- def __init__(self):
- self.reset()
-
- def reset(self):
- self._mTotalRel = 0 # total sequence received
- # category counters, each interger counts sequence in its category
- self._mRelSample = [0] * NUM_OF_CATEGORY
- # if last byte in current buffer is not the last byte of a character,
- # we need to know how many bytes to skip in next buffer
- self._mNeedToSkipCharNum = 0
- self._mLastCharOrder = -1 # The order of previous char
- # If this flag is set to True, detection is done and conclusion has
- # been made
- self._mDone = False
-
- def feed(self, aBuf, aLen):
- if self._mDone:
- return
-
- # The buffer we got is byte oriented, and a character may span in more than one
- # buffers. In case the last one or two byte in last buffer is not
- # complete, we record how many byte needed to complete that character
- # and skip these bytes here. We can choose to record those bytes as
- # well and analyse the character once it is complete, but since a
- # character will not make much difference, by simply skipping
- # this character will simply our logic and improve performance.
- i = self._mNeedToSkipCharNum
- while i < aLen:
- order, charLen = self.get_order(aBuf[i:i + 2])
- i += charLen
- if i > aLen:
- self._mNeedToSkipCharNum = i - aLen
- self._mLastCharOrder = -1
- else:
- if (order != -1) and (self._mLastCharOrder != -1):
- self._mTotalRel += 1
- if self._mTotalRel > MAX_REL_THRESHOLD:
- self._mDone = True
- break
- self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1
- self._mLastCharOrder = order
-
- def got_enough_data(self):
- return self._mTotalRel > ENOUGH_REL_THRESHOLD
-
- def get_confidence(self):
- # This is just one way to calculate confidence. It works well for me.
- if self._mTotalRel > MINIMUM_DATA_THRESHOLD:
- return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel
- else:
- return DONT_KNOW
-
- def get_order(self, aBuf):
- return -1, 1
-
-class SJISContextAnalysis(JapaneseContextAnalysis):
- def get_order(self, aBuf):
- if not aBuf:
- return -1, 1
- # find out current char's byte length
- first_char = wrap_ord(aBuf[0])
- if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)):
- charLen = 2
- else:
- charLen = 1
-
- # return its order if it is hiragana
- if len(aBuf) > 1:
- second_char = wrap_ord(aBuf[1])
- if (first_char == 202) and (0x9F <= second_char <= 0xF1):
- return second_char - 0x9F, charLen
-
- return -1, charLen
-
-class EUCJPContextAnalysis(JapaneseContextAnalysis):
- def get_order(self, aBuf):
- if not aBuf:
- return -1, 1
- # find out current char's byte length
- first_char = wrap_ord(aBuf[0])
- if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):
- charLen = 2
- elif first_char == 0x8F:
- charLen = 3
- else:
- charLen = 1
-
- # return its order if it is hiragana
- if len(aBuf) > 1:
- second_char = wrap_ord(aBuf[1])
- if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):
- return second_char - 0xA1, charLen
-
- return -1, charLen
-
-# flake8: noqa
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .compat import wrap_ord + +NUM_OF_CATEGORY = 6 +DONT_KNOW = -1 +ENOUGH_REL_THRESHOLD = 100 +MAX_REL_THRESHOLD = 1000 +MINIMUM_DATA_THRESHOLD = 4 + +# This is hiragana 2-char sequence table, the number in each cell represents its frequency category +jp2CharContext = ( +(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1), +(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4), +(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2), +(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4), +(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4), +(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3), +(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3), +(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3), +(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4), +(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3), +(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4), +(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3), +(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5), +(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3), +(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5), +(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4), +(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4), +(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3), +(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3), +(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3), +(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5), +(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4), +(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5), +(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3), +(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4), +(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4), +(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4), +(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1), +(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0), +(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3), +(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0), +(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3), +(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3), +(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5), +(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4), +(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5), +(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3), +(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3), +(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3), +(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3), +(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4), +(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4), +(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2), +(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3), +(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3), +(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3), +(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3), +(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4), +(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3), +(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4), +(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3), +(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3), +(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4), +(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4), +(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3), +(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4), +(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4), +(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3), +(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4), +(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4), +(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4), +(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3), +(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2), +(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2), +(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3), +(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3), +(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5), +(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3), +(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4), +(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4), +(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), +(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3), +(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1), +(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2), +(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3), +(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1), +) + +class JapaneseContextAnalysis: + def __init__(self): + self.reset() + + def reset(self): + self._mTotalRel = 0 # total sequence received + # category counters, each interger counts sequence in its category + self._mRelSample = [0] * NUM_OF_CATEGORY + # if last byte in current buffer is not the last byte of a character, + # we need to know how many bytes to skip in next buffer + self._mNeedToSkipCharNum = 0 + self._mLastCharOrder = -1 # The order of previous char + # If this flag is set to True, detection is done and conclusion has + # been made + self._mDone = False + + def feed(self, aBuf, aLen): + if self._mDone: + return + + # The buffer we got is byte oriented, and a character may span in more than one + # buffers. In case the last one or two byte in last buffer is not + # complete, we record how many byte needed to complete that character + # and skip these bytes here. We can choose to record those bytes as + # well and analyse the character once it is complete, but since a + # character will not make much difference, by simply skipping + # this character will simply our logic and improve performance. + i = self._mNeedToSkipCharNum + while i < aLen: + order, charLen = self.get_order(aBuf[i:i + 2]) + i += charLen + if i > aLen: + self._mNeedToSkipCharNum = i - aLen + self._mLastCharOrder = -1 + else: + if (order != -1) and (self._mLastCharOrder != -1): + self._mTotalRel += 1 + if self._mTotalRel > MAX_REL_THRESHOLD: + self._mDone = True + break + self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1 + self._mLastCharOrder = order + + def got_enough_data(self): + return self._mTotalRel > ENOUGH_REL_THRESHOLD + + def get_confidence(self): + # This is just one way to calculate confidence. It works well for me. + if self._mTotalRel > MINIMUM_DATA_THRESHOLD: + return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel + else: + return DONT_KNOW + + def get_order(self, aBuf): + return -1, 1 + +class SJISContextAnalysis(JapaneseContextAnalysis): + def get_order(self, aBuf): + if not aBuf: + return -1, 1 + # find out current char's byte length + first_char = wrap_ord(aBuf[0]) + if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)): + charLen = 2 + else: + charLen = 1 + + # return its order if it is hiragana + if len(aBuf) > 1: + second_char = wrap_ord(aBuf[1]) + if (first_char == 202) and (0x9F <= second_char <= 0xF1): + return second_char - 0x9F, charLen + + return -1, charLen + +class EUCJPContextAnalysis(JapaneseContextAnalysis): + def get_order(self, aBuf): + if not aBuf: + return -1, 1 + # find out current char's byte length + first_char = wrap_ord(aBuf[0]) + if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE): + charLen = 2 + elif first_char == 0x8F: + charLen = 3 + else: + charLen = 1 + + # return its order if it is hiragana + if len(aBuf) > 1: + second_char = wrap_ord(aBuf[1]) + if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3): + return second_char - 0xA1, charLen + + return -1, charLen + +# flake8: noqa diff --git a/requests/packages/charade/langbulgarianmodel.py b/requests/packages/chardet/langbulgarianmodel.py index ea5a60b..e5788fc 100644 --- a/requests/packages/charade/langbulgarianmodel.py +++ b/requests/packages/chardet/langbulgarianmodel.py @@ -1,229 +1,229 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Communicator client code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-# 255: Control characters that usually does not exist in any text
-# 254: Carriage/Return
-# 253: symbol (punctuation) that does not belong to word
-# 252: 0 - 9
-
-# Character Mapping Table:
-# this table is modified base on win1251BulgarianCharToOrderMap, so
-# only number <64 is sure valid
-
-Latin5_BulgarianCharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40
-110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50
-253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60
-116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70
-194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, # 80
-210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, # 90
- 81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, # a0
- 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # b0
- 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, # c0
- 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # d0
- 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, # e0
- 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, # f0
-)
-
-win1251BulgarianCharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40
-110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50
-253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60
-116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70
-206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, # 80
-221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, # 90
- 88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, # a0
- 73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, # b0
- 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # c0
- 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, # d0
- 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # e0
- 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, # f0
-)
-
-# Model Table:
-# total sequences: 100%
-# first 512 sequences: 96.9392%
-# first 1024 sequences:3.0618%
-# rest sequences: 0.2992%
-# negative sequences: 0.0020%
-BulgarianLangModel = (
-0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2,
-3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1,
-0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0,
-0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,
-0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,
-0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
-3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0,
-0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
-3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
-3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
-3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0,
-1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
-3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0,
-0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
-3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
-3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0,
-0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
-3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
-3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
-3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
-3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
-3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
-3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3,
-2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1,
-3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
-3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2,
-1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0,
-3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1,
-1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0,
-2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2,
-2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0,
-3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2,
-1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,
-2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2,
-2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,
-3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2,
-1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0,
-2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2,
-2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0,
-2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2,
-1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0,
-2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2,
-1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,
-3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2,
-1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0,
-3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1,
-1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0,
-2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1,
-1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0,
-2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2,
-1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,
-2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1,
-1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0,
-3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,
-1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2,
-1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,
-2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2,
-1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,
-2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2,
-1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
-1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1,
-0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
-1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2,
-1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
-2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1,
-1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,
-1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1,
-0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
-1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1,
-0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
-2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
-0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
-2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,
-1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
-0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
-0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
-1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1,
-1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,
-1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-)
-
-Latin5BulgarianModel = {
- 'charToOrderMap': Latin5_BulgarianCharToOrderMap,
- 'precedenceMatrix': BulgarianLangModel,
- 'mTypicalPositiveRatio': 0.969392,
- 'keepEnglishLetter': False,
- 'charsetName': "ISO-8859-5"
-}
-
-Win1251BulgarianModel = {
- 'charToOrderMap': win1251BulgarianCharToOrderMap,
- 'precedenceMatrix': BulgarianLangModel,
- 'mTypicalPositiveRatio': 0.969392,
- 'keepEnglishLetter': False,
- 'charsetName': "windows-1251"
-}
-
-
-# flake8: noqa
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +# this table is modified base on win1251BulgarianCharToOrderMap, so +# only number <64 is sure valid + +Latin5_BulgarianCharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40 +110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50 +253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60 +116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70 +194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, # 80 +210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, # 90 + 81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, # a0 + 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # b0 + 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, # c0 + 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # d0 + 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, # e0 + 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, # f0 +) + +win1251BulgarianCharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40 +110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50 +253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60 +116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70 +206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, # 80 +221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, # 90 + 88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, # a0 + 73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, # b0 + 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # c0 + 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, # d0 + 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # e0 + 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, # f0 +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 96.9392% +# first 1024 sequences:3.0618% +# rest sequences: 0.2992% +# negative sequences: 0.0020% +BulgarianLangModel = ( +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2, +3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1, +0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0, +0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0, +0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0, +0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0, +0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3, +2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1, +3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, +3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2, +1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0, +3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1, +1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0, +2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2, +2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0, +3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2, +1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0, +2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2, +2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2, +1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0, +2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2, +2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0, +2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2, +1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0, +2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2, +1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0, +3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2, +1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0, +3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1, +1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0, +2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1, +1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0, +2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2, +1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0, +2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1, +1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2, +1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1, +2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2, +1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0, +2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2, +1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1, +0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2, +1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1, +1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0, +1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1, +0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1, +0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, +0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0, +1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1, +1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, +1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +) + +Latin5BulgarianModel = { + 'charToOrderMap': Latin5_BulgarianCharToOrderMap, + 'precedenceMatrix': BulgarianLangModel, + 'mTypicalPositiveRatio': 0.969392, + 'keepEnglishLetter': False, + 'charsetName': "ISO-8859-5" +} + +Win1251BulgarianModel = { + 'charToOrderMap': win1251BulgarianCharToOrderMap, + 'precedenceMatrix': BulgarianLangModel, + 'mTypicalPositiveRatio': 0.969392, + 'keepEnglishLetter': False, + 'charsetName': "windows-1251" +} + + +# flake8: noqa diff --git a/requests/packages/charade/langcyrillicmodel.py b/requests/packages/chardet/langcyrillicmodel.py index 15e338f..a86f54b 100644 --- a/requests/packages/charade/langcyrillicmodel.py +++ b/requests/packages/chardet/langcyrillicmodel.py @@ -1,329 +1,329 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Communicator client code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-# KOI8-R language model
-# Character Mapping Table:
-KOI8R_CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90
-223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0
-238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0
- 27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0
- 15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0
- 59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0
- 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0
-)
-
-win1251_CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
-223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
-239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253,
- 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
- 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
- 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
- 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
-)
-
-latin5_CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
-223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
- 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
- 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
- 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
- 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
-239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
-)
-
-macCyrillic_CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
- 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
- 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
-223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
-239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16,
- 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
- 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
-)
-
-IBM855_CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
-191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205,
-206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70,
- 3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219,
-220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229,
-230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243,
- 8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248,
- 43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249,
-250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
-)
-
-IBM866_CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
- 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
- 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
- 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
-223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
- 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
-239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
-)
-
-# Model Table:
-# total sequences: 100%
-# first 512 sequences: 97.6601%
-# first 1024 sequences: 2.3389%
-# rest sequences: 0.1237%
-# negative sequences: 0.0009%
-RussianLangModel = (
-0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2,
-3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
-0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
-0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
-3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0,
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
-2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
-3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1,
-1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
-2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1,
-1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0,
-2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1,
-1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,
-3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1,
-1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,
-2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2,
-1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1,
-1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1,
-1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
-2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1,
-1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,
-3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2,
-1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,
-2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1,
-1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,
-2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1,
-1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,
-1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1,
-1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0,
-3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1,
-2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,
-3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,
-1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,
-1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1,
-0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
-2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1,
-1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,
-1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,
-0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
-1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1,
-1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
-2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2,
-2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,
-1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0,
-1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,
-2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,
-1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,
-0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
-2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1,
-1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,
-1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
-0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
-0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,
-0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1,
-0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
-1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,
-0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
-1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,
-0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
-1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1,
-0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,
-2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,
-0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
-)
-
-Koi8rModel = {
- 'charToOrderMap': KOI8R_CharToOrderMap,
- 'precedenceMatrix': RussianLangModel,
- 'mTypicalPositiveRatio': 0.976601,
- 'keepEnglishLetter': False,
- 'charsetName': "KOI8-R"
-}
-
-Win1251CyrillicModel = {
- 'charToOrderMap': win1251_CharToOrderMap,
- 'precedenceMatrix': RussianLangModel,
- 'mTypicalPositiveRatio': 0.976601,
- 'keepEnglishLetter': False,
- 'charsetName': "windows-1251"
-}
-
-Latin5CyrillicModel = {
- 'charToOrderMap': latin5_CharToOrderMap,
- 'precedenceMatrix': RussianLangModel,
- 'mTypicalPositiveRatio': 0.976601,
- 'keepEnglishLetter': False,
- 'charsetName': "ISO-8859-5"
-}
-
-MacCyrillicModel = {
- 'charToOrderMap': macCyrillic_CharToOrderMap,
- 'precedenceMatrix': RussianLangModel,
- 'mTypicalPositiveRatio': 0.976601,
- 'keepEnglishLetter': False,
- 'charsetName': "MacCyrillic"
-};
-
-Ibm866Model = {
- 'charToOrderMap': IBM866_CharToOrderMap,
- 'precedenceMatrix': RussianLangModel,
- 'mTypicalPositiveRatio': 0.976601,
- 'keepEnglishLetter': False,
- 'charsetName': "IBM866"
-}
-
-Ibm855Model = {
- 'charToOrderMap': IBM855_CharToOrderMap,
- 'precedenceMatrix': RussianLangModel,
- 'mTypicalPositiveRatio': 0.976601,
- 'keepEnglishLetter': False,
- 'charsetName': "IBM855"
-}
-
-# flake8: noqa
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# KOI8-R language model +# Character Mapping Table: +KOI8R_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80 +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90 +223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0 +238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0 + 27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0 + 15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0 + 59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0 + 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0 +) + +win1251_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, +239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253, + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +) + +latin5_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, +) + +macCyrillic_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, +239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255, +) + +IBM855_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 +191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205, +206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70, + 3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219, +220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229, +230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243, + 8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248, + 43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249, +250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255, +) + +IBM866_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 + 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, + 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, + 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, +191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, +223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, +239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 97.6601% +# first 1024 sequences: 2.3389% +# rest sequences: 0.1237% +# negative sequences: 0.0009% +RussianLangModel = ( +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2, +3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, +0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, +0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0, +0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1, +1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1, +1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0, +2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1, +1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0, +3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1, +1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0, +2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2, +1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1, +1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1, +1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, +2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1, +1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0, +3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2, +1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1, +2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1, +1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0, +2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0, +0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1, +1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0, +1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1, +1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0, +3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1, +2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1, +3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1, +1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1, +1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1, +0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1, +1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0, +1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1, +0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1, +1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, +2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2, +2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1, +1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0, +1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0, +2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0, +1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1, +0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, +2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1, +1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1, +1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0, +0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1, +0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1, +0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, +1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1, +0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0, +0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1, +0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, +2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0, +0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +) + +Koi8rModel = { + 'charToOrderMap': KOI8R_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': False, + 'charsetName': "KOI8-R" +} + +Win1251CyrillicModel = { + 'charToOrderMap': win1251_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': False, + 'charsetName': "windows-1251" +} + +Latin5CyrillicModel = { + 'charToOrderMap': latin5_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': False, + 'charsetName': "ISO-8859-5" +} + +MacCyrillicModel = { + 'charToOrderMap': macCyrillic_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': False, + 'charsetName': "MacCyrillic" +}; + +Ibm866Model = { + 'charToOrderMap': IBM866_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': False, + 'charsetName': "IBM866" +} + +Ibm855Model = { + 'charToOrderMap': IBM855_CharToOrderMap, + 'precedenceMatrix': RussianLangModel, + 'mTypicalPositiveRatio': 0.976601, + 'keepEnglishLetter': False, + 'charsetName': "IBM855" +} + +# flake8: noqa diff --git a/requests/packages/charade/langgreekmodel.py b/requests/packages/chardet/langgreekmodel.py index 93241ce..ddb5837 100644 --- a/requests/packages/charade/langgreekmodel.py +++ b/requests/packages/chardet/langgreekmodel.py @@ -1,225 +1,225 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Communicator client code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-# 255: Control characters that usually does not exist in any text
-# 254: Carriage/Return
-# 253: symbol (punctuation) that does not belong to word
-# 252: 0 - 9
-
-# Character Mapping Table:
-Latin7_CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40
- 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50
-253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60
- 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90
-253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0
-253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0
-110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0
- 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
-124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
- 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
-)
-
-win1253_CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40
- 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50
-253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60
- 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90
-253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0
-253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0
-110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0
- 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
-124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
- 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
-)
-
-# Model Table:
-# total sequences: 100%
-# first 512 sequences: 98.2851%
-# first 1024 sequences:1.7001%
-# rest sequences: 0.0359%
-# negative sequences: 0.0148%
-GreekLangModel = (
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0,
-3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
-0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0,
-2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0,
-0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0,
-2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
-0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0,
-2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0,
-0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0,
-2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0,
-0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0,
-3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0,
-3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0,
-2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0,
-2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0,
-0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0,
-0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0,
-0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2,
-0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,
-0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2,
-0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0,
-0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2,
-0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2,
-0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,
-0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2,
-0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0,
-0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0,
-0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
-0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0,
-0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
-0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,
-0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2,
-0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,
-0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2,
-0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0,
-0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2,
-0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
-0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2,
-0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,
-0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1,
-0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
-0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2,
-0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
-0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2,
-0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2,
-0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
-0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,
-0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,
-0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
-0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0,
-0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0,
-0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-)
-
-Latin7GreekModel = {
- 'charToOrderMap': Latin7_CharToOrderMap,
- 'precedenceMatrix': GreekLangModel,
- 'mTypicalPositiveRatio': 0.982851,
- 'keepEnglishLetter': False,
- 'charsetName': "ISO-8859-7"
-}
-
-Win1253GreekModel = {
- 'charToOrderMap': win1253_CharToOrderMap,
- 'precedenceMatrix': GreekLangModel,
- 'mTypicalPositiveRatio': 0.982851,
- 'keepEnglishLetter': False,
- 'charsetName': "windows-1253"
-}
-
-# flake8: noqa
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +Latin7_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 + 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 +253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 + 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 +253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 +253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0 +110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 + 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 +124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 + 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 +) + +win1253_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 + 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 +253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 + 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 +253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 +253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0 +110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 + 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 +124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 + 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 98.2851% +# first 1024 sequences:1.7001% +# rest sequences: 0.0359% +# negative sequences: 0.0148% +GreekLangModel = ( +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0, +3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0, +2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0, +0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0, +2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0, +2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0, +0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0, +2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0, +0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0, +3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0, +3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0, +2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0, +2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0, +0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0, +0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0, +0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2, +0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0, +0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2, +0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0, +0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2, +0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2, +0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0, +0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2, +0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0, +0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0, +0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, +0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0, +0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2, +0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2, +0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2, +0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2, +0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0, +0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1, +0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2, +0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2, +0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2, +0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0, +0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0, +0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1, +0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0, +0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0, +0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +) + +Latin7GreekModel = { + 'charToOrderMap': Latin7_CharToOrderMap, + 'precedenceMatrix': GreekLangModel, + 'mTypicalPositiveRatio': 0.982851, + 'keepEnglishLetter': False, + 'charsetName': "ISO-8859-7" +} + +Win1253GreekModel = { + 'charToOrderMap': win1253_CharToOrderMap, + 'precedenceMatrix': GreekLangModel, + 'mTypicalPositiveRatio': 0.982851, + 'keepEnglishLetter': False, + 'charsetName': "windows-1253" +} + +# flake8: noqa diff --git a/requests/packages/charade/langhebrewmodel.py b/requests/packages/chardet/langhebrewmodel.py index d871324..75f2bc7 100644 --- a/requests/packages/charade/langhebrewmodel.py +++ b/requests/packages/chardet/langhebrewmodel.py @@ -1,201 +1,201 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Universal charset detector code.
-#
-# The Initial Developer of the Original Code is
-# Simon Montagu
-# Portions created by the Initial Developer are Copyright (C) 2005
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-# Shy Shalom - original C code
-# Shoshannah Forbes - original C code (?)
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-# 255: Control characters that usually does not exist in any text
-# 254: Carriage/Return
-# 253: symbol (punctuation) that does not belong to word
-# 252: 0 - 9
-
-# Windows-1255 language model
-# Character Mapping Table:
-win1255_CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, # 40
- 78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, # 50
-253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, # 60
- 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, # 70
-124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214,
-215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221,
- 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227,
-106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234,
- 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237,
-238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250,
- 9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23,
- 12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253,
-)
-
-# Model Table:
-# total sequences: 100%
-# first 512 sequences: 98.4004%
-# first 1024 sequences: 1.5981%
-# rest sequences: 0.087%
-# negative sequences: 0.0015%
-HebrewLangModel = (
-0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,
-3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,
-1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,
-1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3,
-1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2,
-1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2,
-1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2,
-0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,
-0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2,
-1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,
-3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2,
-0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1,
-0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,
-0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,
-0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2,
-0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,
-3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2,
-0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2,
-0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2,
-0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2,
-0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1,
-0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2,
-0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,
-3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2,
-0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2,
-0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2,
-0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,
-1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2,
-0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
-3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0,
-0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,
-0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
-3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3,
-0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,
-0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0,
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
-0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0,
-0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
-0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,
-2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0,
-0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1,
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1,
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,
-0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,
-0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1,
-1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1,
-0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,
-2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1,
-1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1,
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1,
-2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1,
-1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1,
-2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
-0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,
-1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1,
-0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
-)
-
-Win1255HebrewModel = {
- 'charToOrderMap': win1255_CharToOrderMap,
- 'precedenceMatrix': HebrewLangModel,
- 'mTypicalPositiveRatio': 0.984004,
- 'keepEnglishLetter': False,
- 'charsetName': "windows-1255"
-}
-
-# flake8: noqa
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Simon Montagu +# Portions created by the Initial Developer are Copyright (C) 2005 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# Shoshannah Forbes - original C code (?) +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Windows-1255 language model +# Character Mapping Table: +win1255_CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, # 40 + 78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, # 50 +253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, # 60 + 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, # 70 +124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214, +215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221, + 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227, +106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234, + 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237, +238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250, + 9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23, + 12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 98.4004% +# first 1024 sequences: 1.5981% +# rest sequences: 0.087% +# negative sequences: 0.0015% +HebrewLangModel = ( +0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0, +3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2, +1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2, +1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3, +1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2, +1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2, +1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2, +0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2, +0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2, +1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2, +0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1, +0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0, +0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2, +0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2, +0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2, +0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2, +0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1, +0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2, +0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0, +3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2, +0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2, +0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2, +0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0, +1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2, +0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, +3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3, +0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0, +0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0, +0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0, +2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0, +0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2, +0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0, +0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1, +1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1, +0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1, +2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1, +1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1, +2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1, +1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1, +2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, +0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1, +1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1, +0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0, +) + +Win1255HebrewModel = { + 'charToOrderMap': win1255_CharToOrderMap, + 'precedenceMatrix': HebrewLangModel, + 'mTypicalPositiveRatio': 0.984004, + 'keepEnglishLetter': False, + 'charsetName': "windows-1255" +} + +# flake8: noqa diff --git a/requests/packages/charade/langhungarianmodel.py b/requests/packages/chardet/langhungarianmodel.py index 6f59c61..49d2f0f 100644 --- a/requests/packages/charade/langhungarianmodel.py +++ b/requests/packages/chardet/langhungarianmodel.py @@ -1,225 +1,225 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Communicator client code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-# 255: Control characters that usually does not exist in any text
-# 254: Carriage/Return
-# 253: symbol (punctuation) that does not belong to word
-# 252: 0 - 9
-
-# Character Mapping Table:
-Latin2_HungarianCharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
- 46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
-253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,
- 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
-159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,
-175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,
-191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205,
- 79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
-221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231,
-232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241,
- 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85,
-245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,
-)
-
-win1250HungarianCharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
- 46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
-253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,
- 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
-161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,
-177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190,
-191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205,
- 81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
-221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231,
-232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241,
- 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87,
-245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,
-)
-
-# Model Table:
-# total sequences: 100%
-# first 512 sequences: 94.7368%
-# first 1024 sequences:5.2623%
-# rest sequences: 0.8894%
-# negative sequences: 0.0009%
-HungarianLangModel = (
-0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,
-3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
-3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3,
-0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
-3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
-3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2,
-0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,
-3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
-3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
-3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
-2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0,
-1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0,
-1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0,
-1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1,
-3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1,
-2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1,
-2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1,
-2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1,
-2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0,
-2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
-3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1,
-2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1,
-2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1,
-2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,
-1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1,
-1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1,
-3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0,
-1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1,
-1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1,
-2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,
-2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0,
-2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1,
-3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1,
-2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1,
-1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,
-1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
-2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1,
-2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,
-1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,
-1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1,
-2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0,
-1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0,
-1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0,
-2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1,
-2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1,
-2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
-1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1,
-1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,
-1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,
-0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
-2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1,
-2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1,
-1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1,
-2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,
-1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,
-1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,
-2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0,
-2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1,
-2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0,
-1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
-2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0,
-0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
-1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,
-0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
-1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
-0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
-0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
-2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
-0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
-)
-
-Latin2HungarianModel = {
- 'charToOrderMap': Latin2_HungarianCharToOrderMap,
- 'precedenceMatrix': HungarianLangModel,
- 'mTypicalPositiveRatio': 0.947368,
- 'keepEnglishLetter': True,
- 'charsetName': "ISO-8859-2"
-}
-
-Win1250HungarianModel = {
- 'charToOrderMap': win1250HungarianCharToOrderMap,
- 'precedenceMatrix': HungarianLangModel,
- 'mTypicalPositiveRatio': 0.947368,
- 'keepEnglishLetter': True,
- 'charsetName': "windows-1250"
-}
-
-# flake8: noqa
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# Character Mapping Table: +Latin2_HungarianCharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, + 46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, +253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, + 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, +159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174, +175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190, +191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205, + 79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, +221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231, +232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241, + 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85, +245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253, +) + +win1250HungarianCharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, + 46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, +253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, + 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, +161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176, +177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190, +191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205, + 81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, +221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231, +232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241, + 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87, +245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 94.7368% +# first 1024 sequences:5.2623% +# rest sequences: 0.8894% +# negative sequences: 0.0009% +HungarianLangModel = ( +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2, +3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0, +3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3, +0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2, +0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0, +1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0, +1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0, +1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1, +3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1, +2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1, +2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1, +2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1, +2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0, +2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, +3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1, +2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1, +2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1, +2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1, +1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1, +1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1, +3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0, +1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1, +1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1, +2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1, +2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0, +2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1, +3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1, +2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1, +1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0, +1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1, +2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1, +1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0, +1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1, +2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0, +1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0, +1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0, +2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1, +2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1, +2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, +1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1, +1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1, +1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0, +0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0, +2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1, +2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1, +1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1, +2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1, +1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0, +1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0, +2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0, +2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1, +2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0, +1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0, +2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0, +0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0, +0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +) + +Latin2HungarianModel = { + 'charToOrderMap': Latin2_HungarianCharToOrderMap, + 'precedenceMatrix': HungarianLangModel, + 'mTypicalPositiveRatio': 0.947368, + 'keepEnglishLetter': True, + 'charsetName': "ISO-8859-2" +} + +Win1250HungarianModel = { + 'charToOrderMap': win1250HungarianCharToOrderMap, + 'precedenceMatrix': HungarianLangModel, + 'mTypicalPositiveRatio': 0.947368, + 'keepEnglishLetter': True, + 'charsetName': "windows-1250" +} + +# flake8: noqa diff --git a/requests/packages/charade/langthaimodel.py b/requests/packages/chardet/langthaimodel.py index df343a7..0508b1b 100644 --- a/requests/packages/charade/langthaimodel.py +++ b/requests/packages/chardet/langthaimodel.py @@ -1,200 +1,200 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Communicator client code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-# 255: Control characters that usually does not exist in any text
-# 254: Carriage/Return
-# 253: symbol (punctuation) that does not belong to word
-# 252: 0 - 9
-
-# The following result for thai was collected from a limited sample (1M).
-
-# Character Mapping Table:
-TIS620CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40
-188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50
-253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60
- 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70
-209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222,
-223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235,
-236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57,
- 49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54,
- 45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63,
- 22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244,
- 11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247,
- 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,
-)
-
-# Model Table:
-# total sequences: 100%
-# first 512 sequences: 92.6386%
-# first 1024 sequences:7.3177%
-# rest sequences: 1.0230%
-# negative sequences: 0.0436%
-ThaiLangModel = (
-0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,
-0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,
-3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3,
-0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,
-3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2,
-3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1,
-3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2,
-3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1,
-3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1,
-3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,
-3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1,
-2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1,
-3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1,
-0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,
-3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1,
-0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,
-3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2,
-1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0,
-3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3,
-3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0,
-1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2,
-0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
-2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3,
-0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0,
-3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1,
-2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,
-3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2,
-0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2,
-3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
-3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0,
-2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
-3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1,
-2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1,
-3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,
-3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0,
-3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1,
-3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1,
-3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1,
-1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2,
-0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3,
-0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,
-3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0,
-3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1,
-1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0,
-3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1,
-3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
-0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2,
-0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
-0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0,
-0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0,
-1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1,
-1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,
-3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1,
-0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
-0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0,
-0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
-3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0,
-3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0,
-0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1,
-0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0,
-0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1,
-0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,
-0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0,
-0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1,
-0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,
-3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0,
-0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0,
-0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
-3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1,
-2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,
-0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0,
-3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0,
-0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
-2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0,
-1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3,
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,
-1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
-1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,
-2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,
-1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-)
-
-TIS620ThaiModel = {
- 'charToOrderMap': TIS620CharToOrderMap,
- 'precedenceMatrix': ThaiLangModel,
- 'mTypicalPositiveRatio': 0.926386,
- 'keepEnglishLetter': False,
- 'charsetName': "TIS-620"
-}
-
-# flake8: noqa
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# 255: Control characters that usually does not exist in any text +# 254: Carriage/Return +# 253: symbol (punctuation) that does not belong to word +# 252: 0 - 9 + +# The following result for thai was collected from a limited sample (1M). + +# Character Mapping Table: +TIS620CharToOrderMap = ( +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 +253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 +253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40 +188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50 +253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60 + 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70 +209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222, +223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235, +236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57, + 49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54, + 45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63, + 22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244, + 11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247, + 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253, +) + +# Model Table: +# total sequences: 100% +# first 512 sequences: 92.6386% +# first 1024 sequences:7.3177% +# rest sequences: 1.0230% +# negative sequences: 0.0436% +ThaiLangModel = ( +0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3, +0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2, +3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3, +0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1, +3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2, +3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1, +3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2, +3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1, +3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1, +3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1, +2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1, +3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1, +0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1, +0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2, +1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0, +3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3, +3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0, +1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2, +0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3, +0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0, +3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1, +2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0, +3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2, +0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2, +3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0, +2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, +3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1, +2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1, +3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0, +3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1, +3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1, +3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1, +1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2, +0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3, +0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1, +3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0, +3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1, +1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0, +3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1, +3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2, +0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0, +0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0, +1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1, +1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1, +3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1, +0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0, +3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0, +0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1, +0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0, +0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1, +0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1, +0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0, +0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1, +0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0, +3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0, +0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0, +0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, +3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1, +2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1, +0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0, +3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0, +1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +) + +TIS620ThaiModel = { + 'charToOrderMap': TIS620CharToOrderMap, + 'precedenceMatrix': ThaiLangModel, + 'mTypicalPositiveRatio': 0.926386, + 'keepEnglishLetter': False, + 'charsetName': "TIS-620" +} + +# flake8: noqa diff --git a/requests/packages/charade/latin1prober.py b/requests/packages/chardet/latin1prober.py index bebe1bc..ad695f5 100644 --- a/requests/packages/charade/latin1prober.py +++ b/requests/packages/chardet/latin1prober.py @@ -1,139 +1,139 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Universal charset detector code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 2001
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-# Shy Shalom - original C code
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .charsetprober import CharSetProber
-from .constants import eNotMe
-from .compat import wrap_ord
-
-FREQ_CAT_NUM = 4
-
-UDF = 0 # undefined
-OTH = 1 # other
-ASC = 2 # ascii capital letter
-ASS = 3 # ascii small letter
-ACV = 4 # accent capital vowel
-ACO = 5 # accent capital other
-ASV = 6 # accent small vowel
-ASO = 7 # accent small other
-CLASS_NUM = 8 # total classes
-
-Latin1_CharToClass = (
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F
- OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47
- ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F
- ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57
- ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F
- OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67
- ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F
- ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77
- ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F
- OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87
- OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F
- UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97
- OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF
- ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7
- ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF
- ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7
- ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF
- ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7
- ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF
- ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7
- ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF
-)
-
-# 0 : illegal
-# 1 : very unlikely
-# 2 : normal
-# 3 : very likely
-Latin1ClassModel = (
- # UDF OTH ASC ASS ACV ACO ASV ASO
- 0, 0, 0, 0, 0, 0, 0, 0, # UDF
- 0, 3, 3, 3, 3, 3, 3, 3, # OTH
- 0, 3, 3, 3, 3, 3, 3, 3, # ASC
- 0, 3, 3, 3, 1, 1, 3, 3, # ASS
- 0, 3, 3, 3, 1, 2, 1, 2, # ACV
- 0, 3, 3, 3, 3, 3, 3, 3, # ACO
- 0, 3, 1, 3, 1, 1, 1, 3, # ASV
- 0, 3, 1, 3, 1, 1, 3, 3, # ASO
-)
-
-
-class Latin1Prober(CharSetProber):
- def __init__(self):
- CharSetProber.__init__(self)
- self.reset()
-
- def reset(self):
- self._mLastCharClass = OTH
- self._mFreqCounter = [0] * FREQ_CAT_NUM
- CharSetProber.reset(self)
-
- def get_charset_name(self):
- return "windows-1252"
-
- def feed(self, aBuf):
- aBuf = self.filter_with_english_letters(aBuf)
- for c in aBuf:
- charClass = Latin1_CharToClass[wrap_ord(c)]
- freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM)
- + charClass]
- if freq == 0:
- self._mState = eNotMe
- break
- self._mFreqCounter[freq] += 1
- self._mLastCharClass = charClass
-
- return self.get_state()
-
- def get_confidence(self):
- if self.get_state() == eNotMe:
- return 0.01
-
- total = sum(self._mFreqCounter)
- if total < 0.01:
- confidence = 0.0
- else:
- confidence = ((self._mFreqCounter[3] / total)
- - (self._mFreqCounter[1] * 20.0 / total))
- if confidence < 0.0:
- confidence = 0.0
- # lower the confidence of latin1 so that other more accurate
- # detector can take priority.
- confidence = confidence * 0.5
- return confidence
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .charsetprober import CharSetProber +from .constants import eNotMe +from .compat import wrap_ord + +FREQ_CAT_NUM = 4 + +UDF = 0 # undefined +OTH = 1 # other +ASC = 2 # ascii capital letter +ASS = 3 # ascii small letter +ACV = 4 # accent capital vowel +ACO = 5 # accent capital other +ASV = 6 # accent small vowel +ASO = 7 # accent small other +CLASS_NUM = 8 # total classes + +Latin1_CharToClass = ( + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F + OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 + ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F + OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 + ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F + ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 + ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F + OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87 + OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F + UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97 + OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF + ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7 + ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF + ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7 + ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF + ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7 + ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF + ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7 + ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF +) + +# 0 : illegal +# 1 : very unlikely +# 2 : normal +# 3 : very likely +Latin1ClassModel = ( + # UDF OTH ASC ASS ACV ACO ASV ASO + 0, 0, 0, 0, 0, 0, 0, 0, # UDF + 0, 3, 3, 3, 3, 3, 3, 3, # OTH + 0, 3, 3, 3, 3, 3, 3, 3, # ASC + 0, 3, 3, 3, 1, 1, 3, 3, # ASS + 0, 3, 3, 3, 1, 2, 1, 2, # ACV + 0, 3, 3, 3, 3, 3, 3, 3, # ACO + 0, 3, 1, 3, 1, 1, 1, 3, # ASV + 0, 3, 1, 3, 1, 1, 3, 3, # ASO +) + + +class Latin1Prober(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self.reset() + + def reset(self): + self._mLastCharClass = OTH + self._mFreqCounter = [0] * FREQ_CAT_NUM + CharSetProber.reset(self) + + def get_charset_name(self): + return "windows-1252" + + def feed(self, aBuf): + aBuf = self.filter_with_english_letters(aBuf) + for c in aBuf: + charClass = Latin1_CharToClass[wrap_ord(c)] + freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) + + charClass] + if freq == 0: + self._mState = eNotMe + break + self._mFreqCounter[freq] += 1 + self._mLastCharClass = charClass + + return self.get_state() + + def get_confidence(self): + if self.get_state() == eNotMe: + return 0.01 + + total = sum(self._mFreqCounter) + if total < 0.01: + confidence = 0.0 + else: + confidence = ((self._mFreqCounter[3] / total) + - (self._mFreqCounter[1] * 20.0 / total)) + if confidence < 0.0: + confidence = 0.0 + # lower the confidence of latin1 so that other more accurate + # detector can take priority. + confidence = confidence * 0.5 + return confidence diff --git a/requests/packages/charade/mbcharsetprober.py b/requests/packages/chardet/mbcharsetprober.py index 1eee253..bb42f2f 100644 --- a/requests/packages/charade/mbcharsetprober.py +++ b/requests/packages/chardet/mbcharsetprober.py @@ -1,86 +1,86 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Universal charset detector code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 2001
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-# Shy Shalom - original C code
-# Proofpoint, Inc.
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-import sys
-from . import constants
-from .charsetprober import CharSetProber
-
-
-class MultiByteCharSetProber(CharSetProber):
- def __init__(self):
- CharSetProber.__init__(self)
- self._mDistributionAnalyzer = None
- self._mCodingSM = None
- self._mLastChar = [0, 0]
-
- def reset(self):
- CharSetProber.reset(self)
- if self._mCodingSM:
- self._mCodingSM.reset()
- if self._mDistributionAnalyzer:
- self._mDistributionAnalyzer.reset()
- self._mLastChar = [0, 0]
-
- def get_charset_name(self):
- pass
-
- def feed(self, aBuf):
- aLen = len(aBuf)
- for i in range(0, aLen):
- codingState = self._mCodingSM.next_state(aBuf[i])
- if codingState == constants.eError:
- if constants._debug:
- sys.stderr.write(self.get_charset_name()
- + ' prober hit error at byte ' + str(i)
- + '\n')
- self._mState = constants.eNotMe
- break
- elif codingState == constants.eItsMe:
- self._mState = constants.eFoundIt
- break
- elif codingState == constants.eStart:
- charLen = self._mCodingSM.get_current_charlen()
- if i == 0:
- self._mLastChar[1] = aBuf[0]
- self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
- else:
- self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
- charLen)
-
- self._mLastChar[0] = aBuf[aLen - 1]
-
- if self.get_state() == constants.eDetecting:
- if (self._mDistributionAnalyzer.got_enough_data() and
- (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
- self._mState = constants.eFoundIt
-
- return self.get_state()
-
- def get_confidence(self):
- return self._mDistributionAnalyzer.get_confidence()
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# Proofpoint, Inc. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import sys +from . import constants +from .charsetprober import CharSetProber + + +class MultiByteCharSetProber(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mDistributionAnalyzer = None + self._mCodingSM = None + self._mLastChar = [0, 0] + + def reset(self): + CharSetProber.reset(self) + if self._mCodingSM: + self._mCodingSM.reset() + if self._mDistributionAnalyzer: + self._mDistributionAnalyzer.reset() + self._mLastChar = [0, 0] + + def get_charset_name(self): + pass + + def feed(self, aBuf): + aLen = len(aBuf) + for i in range(0, aLen): + codingState = self._mCodingSM.next_state(aBuf[i]) + if codingState == constants.eError: + if constants._debug: + sys.stderr.write(self.get_charset_name() + + ' prober hit error at byte ' + str(i) + + '\n') + self._mState = constants.eNotMe + break + elif codingState == constants.eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == constants.eStart: + charLen = self._mCodingSM.get_current_charlen() + if i == 0: + self._mLastChar[1] = aBuf[0] + self._mDistributionAnalyzer.feed(self._mLastChar, charLen) + else: + self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], + charLen) + + self._mLastChar[0] = aBuf[aLen - 1] + + if self.get_state() == constants.eDetecting: + if (self._mDistributionAnalyzer.got_enough_data() and + (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + return self._mDistributionAnalyzer.get_confidence() diff --git a/requests/packages/charade/mbcsgroupprober.py b/requests/packages/chardet/mbcsgroupprober.py index 2f6f5e8..03c9dcf 100644 --- a/requests/packages/charade/mbcsgroupprober.py +++ b/requests/packages/chardet/mbcsgroupprober.py @@ -1,54 +1,54 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Universal charset detector code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 2001
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-# Shy Shalom - original C code
-# Proofpoint, Inc.
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .charsetgroupprober import CharSetGroupProber
-from .utf8prober import UTF8Prober
-from .sjisprober import SJISProber
-from .eucjpprober import EUCJPProber
-from .gb2312prober import GB2312Prober
-from .euckrprober import EUCKRProber
-from .cp949prober import CP949Prober
-from .big5prober import Big5Prober
-from .euctwprober import EUCTWProber
-
-
-class MBCSGroupProber(CharSetGroupProber):
- def __init__(self):
- CharSetGroupProber.__init__(self)
- self._mProbers = [
- UTF8Prober(),
- SJISProber(),
- EUCJPProber(),
- GB2312Prober(),
- EUCKRProber(),
- CP949Prober(),
- Big5Prober(),
- EUCTWProber()
- ]
- self.reset()
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# Proofpoint, Inc. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .charsetgroupprober import CharSetGroupProber +from .utf8prober import UTF8Prober +from .sjisprober import SJISProber +from .eucjpprober import EUCJPProber +from .gb2312prober import GB2312Prober +from .euckrprober import EUCKRProber +from .cp949prober import CP949Prober +from .big5prober import Big5Prober +from .euctwprober import EUCTWProber + + +class MBCSGroupProber(CharSetGroupProber): + def __init__(self): + CharSetGroupProber.__init__(self) + self._mProbers = [ + UTF8Prober(), + SJISProber(), + EUCJPProber(), + GB2312Prober(), + EUCKRProber(), + CP949Prober(), + Big5Prober(), + EUCTWProber() + ] + self.reset() diff --git a/requests/packages/charade/mbcssm.py b/requests/packages/chardet/mbcssm.py index 55c02f0..3f93cfb 100644 --- a/requests/packages/charade/mbcssm.py +++ b/requests/packages/chardet/mbcssm.py @@ -1,575 +1,575 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .constants import eStart, eError, eItsMe
-
-# BIG5
-
-BIG5_cls = (
- 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
- 1,1,1,1,1,1,0,0, # 08 - 0f
- 1,1,1,1,1,1,1,1, # 10 - 17
- 1,1,1,0,1,1,1,1, # 18 - 1f
- 1,1,1,1,1,1,1,1, # 20 - 27
- 1,1,1,1,1,1,1,1, # 28 - 2f
- 1,1,1,1,1,1,1,1, # 30 - 37
- 1,1,1,1,1,1,1,1, # 38 - 3f
- 2,2,2,2,2,2,2,2, # 40 - 47
- 2,2,2,2,2,2,2,2, # 48 - 4f
- 2,2,2,2,2,2,2,2, # 50 - 57
- 2,2,2,2,2,2,2,2, # 58 - 5f
- 2,2,2,2,2,2,2,2, # 60 - 67
- 2,2,2,2,2,2,2,2, # 68 - 6f
- 2,2,2,2,2,2,2,2, # 70 - 77
- 2,2,2,2,2,2,2,1, # 78 - 7f
- 4,4,4,4,4,4,4,4, # 80 - 87
- 4,4,4,4,4,4,4,4, # 88 - 8f
- 4,4,4,4,4,4,4,4, # 90 - 97
- 4,4,4,4,4,4,4,4, # 98 - 9f
- 4,3,3,3,3,3,3,3, # a0 - a7
- 3,3,3,3,3,3,3,3, # a8 - af
- 3,3,3,3,3,3,3,3, # b0 - b7
- 3,3,3,3,3,3,3,3, # b8 - bf
- 3,3,3,3,3,3,3,3, # c0 - c7
- 3,3,3,3,3,3,3,3, # c8 - cf
- 3,3,3,3,3,3,3,3, # d0 - d7
- 3,3,3,3,3,3,3,3, # d8 - df
- 3,3,3,3,3,3,3,3, # e0 - e7
- 3,3,3,3,3,3,3,3, # e8 - ef
- 3,3,3,3,3,3,3,3, # f0 - f7
- 3,3,3,3,3,3,3,0 # f8 - ff
-)
-
-BIG5_st = (
- eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07
- eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f
- eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17
-)
-
-Big5CharLenTable = (0, 1, 1, 2, 0)
-
-Big5SMModel = {'classTable': BIG5_cls,
- 'classFactor': 5,
- 'stateTable': BIG5_st,
- 'charLenTable': Big5CharLenTable,
- 'name': 'Big5'}
-
-# CP949
-
-CP949_cls = (
- 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f
- 1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f
- 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f
- 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f
- 1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f
- 4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f
- 1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f
- 5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f
- 0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f
- 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f
- 6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af
- 7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf
- 7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf
- 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df
- 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef
- 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff
-)
-
-CP949_st = (
-#cls= 0 1 2 3 4 5 6 7 8 9 # previous state =
- eError,eStart, 3,eError,eStart,eStart, 4, 5,eError, 6, # eStart
- eError,eError,eError,eError,eError,eError,eError,eError,eError,eError, # eError
- eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe, # eItsMe
- eError,eError,eStart,eStart,eError,eError,eError,eStart,eStart,eStart, # 3
- eError,eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 4
- eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 5
- eError,eStart,eStart,eStart,eStart,eError,eError,eStart,eStart,eStart, # 6
-)
-
-CP949CharLenTable = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
-
-CP949SMModel = {'classTable': CP949_cls,
- 'classFactor': 10,
- 'stateTable': CP949_st,
- 'charLenTable': CP949CharLenTable,
- 'name': 'CP949'}
-
-# EUC-JP
-
-EUCJP_cls = (
- 4,4,4,4,4,4,4,4, # 00 - 07
- 4,4,4,4,4,4,5,5, # 08 - 0f
- 4,4,4,4,4,4,4,4, # 10 - 17
- 4,4,4,5,4,4,4,4, # 18 - 1f
- 4,4,4,4,4,4,4,4, # 20 - 27
- 4,4,4,4,4,4,4,4, # 28 - 2f
- 4,4,4,4,4,4,4,4, # 30 - 37
- 4,4,4,4,4,4,4,4, # 38 - 3f
- 4,4,4,4,4,4,4,4, # 40 - 47
- 4,4,4,4,4,4,4,4, # 48 - 4f
- 4,4,4,4,4,4,4,4, # 50 - 57
- 4,4,4,4,4,4,4,4, # 58 - 5f
- 4,4,4,4,4,4,4,4, # 60 - 67
- 4,4,4,4,4,4,4,4, # 68 - 6f
- 4,4,4,4,4,4,4,4, # 70 - 77
- 4,4,4,4,4,4,4,4, # 78 - 7f
- 5,5,5,5,5,5,5,5, # 80 - 87
- 5,5,5,5,5,5,1,3, # 88 - 8f
- 5,5,5,5,5,5,5,5, # 90 - 97
- 5,5,5,5,5,5,5,5, # 98 - 9f
- 5,2,2,2,2,2,2,2, # a0 - a7
- 2,2,2,2,2,2,2,2, # a8 - af
- 2,2,2,2,2,2,2,2, # b0 - b7
- 2,2,2,2,2,2,2,2, # b8 - bf
- 2,2,2,2,2,2,2,2, # c0 - c7
- 2,2,2,2,2,2,2,2, # c8 - cf
- 2,2,2,2,2,2,2,2, # d0 - d7
- 2,2,2,2,2,2,2,2, # d8 - df
- 0,0,0,0,0,0,0,0, # e0 - e7
- 0,0,0,0,0,0,0,0, # e8 - ef
- 0,0,0,0,0,0,0,0, # f0 - f7
- 0,0,0,0,0,0,0,5 # f8 - ff
-)
-
-EUCJP_st = (
- 3, 4, 3, 5,eStart,eError,eError,eError,#00-07
- eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
- eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17
- eError,eError,eStart,eError,eError,eError, 3,eError,#18-1f
- 3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27
-)
-
-EUCJPCharLenTable = (2, 2, 2, 3, 1, 0)
-
-EUCJPSMModel = {'classTable': EUCJP_cls,
- 'classFactor': 6,
- 'stateTable': EUCJP_st,
- 'charLenTable': EUCJPCharLenTable,
- 'name': 'EUC-JP'}
-
-# EUC-KR
-
-EUCKR_cls = (
- 1,1,1,1,1,1,1,1, # 00 - 07
- 1,1,1,1,1,1,0,0, # 08 - 0f
- 1,1,1,1,1,1,1,1, # 10 - 17
- 1,1,1,0,1,1,1,1, # 18 - 1f
- 1,1,1,1,1,1,1,1, # 20 - 27
- 1,1,1,1,1,1,1,1, # 28 - 2f
- 1,1,1,1,1,1,1,1, # 30 - 37
- 1,1,1,1,1,1,1,1, # 38 - 3f
- 1,1,1,1,1,1,1,1, # 40 - 47
- 1,1,1,1,1,1,1,1, # 48 - 4f
- 1,1,1,1,1,1,1,1, # 50 - 57
- 1,1,1,1,1,1,1,1, # 58 - 5f
- 1,1,1,1,1,1,1,1, # 60 - 67
- 1,1,1,1,1,1,1,1, # 68 - 6f
- 1,1,1,1,1,1,1,1, # 70 - 77
- 1,1,1,1,1,1,1,1, # 78 - 7f
- 0,0,0,0,0,0,0,0, # 80 - 87
- 0,0,0,0,0,0,0,0, # 88 - 8f
- 0,0,0,0,0,0,0,0, # 90 - 97
- 0,0,0,0,0,0,0,0, # 98 - 9f
- 0,2,2,2,2,2,2,2, # a0 - a7
- 2,2,2,2,2,3,3,3, # a8 - af
- 2,2,2,2,2,2,2,2, # b0 - b7
- 2,2,2,2,2,2,2,2, # b8 - bf
- 2,2,2,2,2,2,2,2, # c0 - c7
- 2,3,2,2,2,2,2,2, # c8 - cf
- 2,2,2,2,2,2,2,2, # d0 - d7
- 2,2,2,2,2,2,2,2, # d8 - df
- 2,2,2,2,2,2,2,2, # e0 - e7
- 2,2,2,2,2,2,2,2, # e8 - ef
- 2,2,2,2,2,2,2,2, # f0 - f7
- 2,2,2,2,2,2,2,0 # f8 - ff
-)
-
-EUCKR_st = (
- eError,eStart, 3,eError,eError,eError,eError,eError,#00-07
- eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f
-)
-
-EUCKRCharLenTable = (0, 1, 2, 0)
-
-EUCKRSMModel = {'classTable': EUCKR_cls,
- 'classFactor': 4,
- 'stateTable': EUCKR_st,
- 'charLenTable': EUCKRCharLenTable,
- 'name': 'EUC-KR'}
-
-# EUC-TW
-
-EUCTW_cls = (
- 2,2,2,2,2,2,2,2, # 00 - 07
- 2,2,2,2,2,2,0,0, # 08 - 0f
- 2,2,2,2,2,2,2,2, # 10 - 17
- 2,2,2,0,2,2,2,2, # 18 - 1f
- 2,2,2,2,2,2,2,2, # 20 - 27
- 2,2,2,2,2,2,2,2, # 28 - 2f
- 2,2,2,2,2,2,2,2, # 30 - 37
- 2,2,2,2,2,2,2,2, # 38 - 3f
- 2,2,2,2,2,2,2,2, # 40 - 47
- 2,2,2,2,2,2,2,2, # 48 - 4f
- 2,2,2,2,2,2,2,2, # 50 - 57
- 2,2,2,2,2,2,2,2, # 58 - 5f
- 2,2,2,2,2,2,2,2, # 60 - 67
- 2,2,2,2,2,2,2,2, # 68 - 6f
- 2,2,2,2,2,2,2,2, # 70 - 77
- 2,2,2,2,2,2,2,2, # 78 - 7f
- 0,0,0,0,0,0,0,0, # 80 - 87
- 0,0,0,0,0,0,6,0, # 88 - 8f
- 0,0,0,0,0,0,0,0, # 90 - 97
- 0,0,0,0,0,0,0,0, # 98 - 9f
- 0,3,4,4,4,4,4,4, # a0 - a7
- 5,5,1,1,1,1,1,1, # a8 - af
- 1,1,1,1,1,1,1,1, # b0 - b7
- 1,1,1,1,1,1,1,1, # b8 - bf
- 1,1,3,1,3,3,3,3, # c0 - c7
- 3,3,3,3,3,3,3,3, # c8 - cf
- 3,3,3,3,3,3,3,3, # d0 - d7
- 3,3,3,3,3,3,3,3, # d8 - df
- 3,3,3,3,3,3,3,3, # e0 - e7
- 3,3,3,3,3,3,3,3, # e8 - ef
- 3,3,3,3,3,3,3,3, # f0 - f7
- 3,3,3,3,3,3,3,0 # f8 - ff
-)
-
-EUCTW_st = (
- eError,eError,eStart, 3, 3, 3, 4,eError,#00-07
- eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f
- eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17
- eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f
- 5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27
- eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f
-)
-
-EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3)
-
-EUCTWSMModel = {'classTable': EUCTW_cls,
- 'classFactor': 7,
- 'stateTable': EUCTW_st,
- 'charLenTable': EUCTWCharLenTable,
- 'name': 'x-euc-tw'}
-
-# GB2312
-
-GB2312_cls = (
- 1,1,1,1,1,1,1,1, # 00 - 07
- 1,1,1,1,1,1,0,0, # 08 - 0f
- 1,1,1,1,1,1,1,1, # 10 - 17
- 1,1,1,0,1,1,1,1, # 18 - 1f
- 1,1,1,1,1,1,1,1, # 20 - 27
- 1,1,1,1,1,1,1,1, # 28 - 2f
- 3,3,3,3,3,3,3,3, # 30 - 37
- 3,3,1,1,1,1,1,1, # 38 - 3f
- 2,2,2,2,2,2,2,2, # 40 - 47
- 2,2,2,2,2,2,2,2, # 48 - 4f
- 2,2,2,2,2,2,2,2, # 50 - 57
- 2,2,2,2,2,2,2,2, # 58 - 5f
- 2,2,2,2,2,2,2,2, # 60 - 67
- 2,2,2,2,2,2,2,2, # 68 - 6f
- 2,2,2,2,2,2,2,2, # 70 - 77
- 2,2,2,2,2,2,2,4, # 78 - 7f
- 5,6,6,6,6,6,6,6, # 80 - 87
- 6,6,6,6,6,6,6,6, # 88 - 8f
- 6,6,6,6,6,6,6,6, # 90 - 97
- 6,6,6,6,6,6,6,6, # 98 - 9f
- 6,6,6,6,6,6,6,6, # a0 - a7
- 6,6,6,6,6,6,6,6, # a8 - af
- 6,6,6,6,6,6,6,6, # b0 - b7
- 6,6,6,6,6,6,6,6, # b8 - bf
- 6,6,6,6,6,6,6,6, # c0 - c7
- 6,6,6,6,6,6,6,6, # c8 - cf
- 6,6,6,6,6,6,6,6, # d0 - d7
- 6,6,6,6,6,6,6,6, # d8 - df
- 6,6,6,6,6,6,6,6, # e0 - e7
- 6,6,6,6,6,6,6,6, # e8 - ef
- 6,6,6,6,6,6,6,6, # f0 - f7
- 6,6,6,6,6,6,6,0 # f8 - ff
-)
-
-GB2312_st = (
- eError,eStart,eStart,eStart,eStart,eStart, 3,eError,#00-07
- eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f
- eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17
- 4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f
- eError,eError, 5,eError,eError,eError,eItsMe,eError,#20-27
- eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f
-)
-
-# To be accurate, the length of class 6 can be either 2 or 4.
-# But it is not necessary to discriminate between the two since
-# it is used for frequency analysis only, and we are validing
-# each code range there as well. So it is safe to set it to be
-# 2 here.
-GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2)
-
-GB2312SMModel = {'classTable': GB2312_cls,
- 'classFactor': 7,
- 'stateTable': GB2312_st,
- 'charLenTable': GB2312CharLenTable,
- 'name': 'GB2312'}
-
-# Shift_JIS
-
-SJIS_cls = (
- 1,1,1,1,1,1,1,1, # 00 - 07
- 1,1,1,1,1,1,0,0, # 08 - 0f
- 1,1,1,1,1,1,1,1, # 10 - 17
- 1,1,1,0,1,1,1,1, # 18 - 1f
- 1,1,1,1,1,1,1,1, # 20 - 27
- 1,1,1,1,1,1,1,1, # 28 - 2f
- 1,1,1,1,1,1,1,1, # 30 - 37
- 1,1,1,1,1,1,1,1, # 38 - 3f
- 2,2,2,2,2,2,2,2, # 40 - 47
- 2,2,2,2,2,2,2,2, # 48 - 4f
- 2,2,2,2,2,2,2,2, # 50 - 57
- 2,2,2,2,2,2,2,2, # 58 - 5f
- 2,2,2,2,2,2,2,2, # 60 - 67
- 2,2,2,2,2,2,2,2, # 68 - 6f
- 2,2,2,2,2,2,2,2, # 70 - 77
- 2,2,2,2,2,2,2,1, # 78 - 7f
- 3,3,3,3,3,3,3,3, # 80 - 87
- 3,3,3,3,3,3,3,3, # 88 - 8f
- 3,3,3,3,3,3,3,3, # 90 - 97
- 3,3,3,3,3,3,3,3, # 98 - 9f
- #0xa0 is illegal in sjis encoding, but some pages does
- #contain such byte. We need to be more error forgiven.
- 2,2,2,2,2,2,2,2, # a0 - a7
- 2,2,2,2,2,2,2,2, # a8 - af
- 2,2,2,2,2,2,2,2, # b0 - b7
- 2,2,2,2,2,2,2,2, # b8 - bf
- 2,2,2,2,2,2,2,2, # c0 - c7
- 2,2,2,2,2,2,2,2, # c8 - cf
- 2,2,2,2,2,2,2,2, # d0 - d7
- 2,2,2,2,2,2,2,2, # d8 - df
- 3,3,3,3,3,3,3,3, # e0 - e7
- 3,3,3,3,3,4,4,4, # e8 - ef
- 4,4,4,4,4,4,4,4, # f0 - f7
- 4,4,4,4,4,0,0,0 # f8 - ff
-)
-
-
-SJIS_st = (
- eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07
- eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
- eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17
-)
-
-SJISCharLenTable = (0, 1, 1, 2, 0, 0)
-
-SJISSMModel = {'classTable': SJIS_cls,
- 'classFactor': 6,
- 'stateTable': SJIS_st,
- 'charLenTable': SJISCharLenTable,
- 'name': 'Shift_JIS'}
-
-# UCS2-BE
-
-UCS2BE_cls = (
- 0,0,0,0,0,0,0,0, # 00 - 07
- 0,0,1,0,0,2,0,0, # 08 - 0f
- 0,0,0,0,0,0,0,0, # 10 - 17
- 0,0,0,3,0,0,0,0, # 18 - 1f
- 0,0,0,0,0,0,0,0, # 20 - 27
- 0,3,3,3,3,3,0,0, # 28 - 2f
- 0,0,0,0,0,0,0,0, # 30 - 37
- 0,0,0,0,0,0,0,0, # 38 - 3f
- 0,0,0,0,0,0,0,0, # 40 - 47
- 0,0,0,0,0,0,0,0, # 48 - 4f
- 0,0,0,0,0,0,0,0, # 50 - 57
- 0,0,0,0,0,0,0,0, # 58 - 5f
- 0,0,0,0,0,0,0,0, # 60 - 67
- 0,0,0,0,0,0,0,0, # 68 - 6f
- 0,0,0,0,0,0,0,0, # 70 - 77
- 0,0,0,0,0,0,0,0, # 78 - 7f
- 0,0,0,0,0,0,0,0, # 80 - 87
- 0,0,0,0,0,0,0,0, # 88 - 8f
- 0,0,0,0,0,0,0,0, # 90 - 97
- 0,0,0,0,0,0,0,0, # 98 - 9f
- 0,0,0,0,0,0,0,0, # a0 - a7
- 0,0,0,0,0,0,0,0, # a8 - af
- 0,0,0,0,0,0,0,0, # b0 - b7
- 0,0,0,0,0,0,0,0, # b8 - bf
- 0,0,0,0,0,0,0,0, # c0 - c7
- 0,0,0,0,0,0,0,0, # c8 - cf
- 0,0,0,0,0,0,0,0, # d0 - d7
- 0,0,0,0,0,0,0,0, # d8 - df
- 0,0,0,0,0,0,0,0, # e0 - e7
- 0,0,0,0,0,0,0,0, # e8 - ef
- 0,0,0,0,0,0,0,0, # f0 - f7
- 0,0,0,0,0,0,4,5 # f8 - ff
-)
-
-UCS2BE_st = (
- 5, 7, 7,eError, 4, 3,eError,eError,#00-07
- eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
- eItsMe,eItsMe, 6, 6, 6, 6,eError,eError,#10-17
- 6, 6, 6, 6, 6,eItsMe, 6, 6,#18-1f
- 6, 6, 6, 6, 5, 7, 7,eError,#20-27
- 5, 8, 6, 6,eError, 6, 6, 6,#28-2f
- 6, 6, 6, 6,eError,eError,eStart,eStart #30-37
-)
-
-UCS2BECharLenTable = (2, 2, 2, 0, 2, 2)
-
-UCS2BESMModel = {'classTable': UCS2BE_cls,
- 'classFactor': 6,
- 'stateTable': UCS2BE_st,
- 'charLenTable': UCS2BECharLenTable,
- 'name': 'UTF-16BE'}
-
-# UCS2-LE
-
-UCS2LE_cls = (
- 0,0,0,0,0,0,0,0, # 00 - 07
- 0,0,1,0,0,2,0,0, # 08 - 0f
- 0,0,0,0,0,0,0,0, # 10 - 17
- 0,0,0,3,0,0,0,0, # 18 - 1f
- 0,0,0,0,0,0,0,0, # 20 - 27
- 0,3,3,3,3,3,0,0, # 28 - 2f
- 0,0,0,0,0,0,0,0, # 30 - 37
- 0,0,0,0,0,0,0,0, # 38 - 3f
- 0,0,0,0,0,0,0,0, # 40 - 47
- 0,0,0,0,0,0,0,0, # 48 - 4f
- 0,0,0,0,0,0,0,0, # 50 - 57
- 0,0,0,0,0,0,0,0, # 58 - 5f
- 0,0,0,0,0,0,0,0, # 60 - 67
- 0,0,0,0,0,0,0,0, # 68 - 6f
- 0,0,0,0,0,0,0,0, # 70 - 77
- 0,0,0,0,0,0,0,0, # 78 - 7f
- 0,0,0,0,0,0,0,0, # 80 - 87
- 0,0,0,0,0,0,0,0, # 88 - 8f
- 0,0,0,0,0,0,0,0, # 90 - 97
- 0,0,0,0,0,0,0,0, # 98 - 9f
- 0,0,0,0,0,0,0,0, # a0 - a7
- 0,0,0,0,0,0,0,0, # a8 - af
- 0,0,0,0,0,0,0,0, # b0 - b7
- 0,0,0,0,0,0,0,0, # b8 - bf
- 0,0,0,0,0,0,0,0, # c0 - c7
- 0,0,0,0,0,0,0,0, # c8 - cf
- 0,0,0,0,0,0,0,0, # d0 - d7
- 0,0,0,0,0,0,0,0, # d8 - df
- 0,0,0,0,0,0,0,0, # e0 - e7
- 0,0,0,0,0,0,0,0, # e8 - ef
- 0,0,0,0,0,0,0,0, # f0 - f7
- 0,0,0,0,0,0,4,5 # f8 - ff
-)
-
-UCS2LE_st = (
- 6, 6, 7, 6, 4, 3,eError,eError,#00-07
- eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
- eItsMe,eItsMe, 5, 5, 5,eError,eItsMe,eError,#10-17
- 5, 5, 5,eError, 5,eError, 6, 6,#18-1f
- 7, 6, 8, 8, 5, 5, 5,eError,#20-27
- 5, 5, 5,eError,eError,eError, 5, 5,#28-2f
- 5, 5, 5,eError, 5,eError,eStart,eStart #30-37
-)
-
-UCS2LECharLenTable = (2, 2, 2, 2, 2, 2)
-
-UCS2LESMModel = {'classTable': UCS2LE_cls,
- 'classFactor': 6,
- 'stateTable': UCS2LE_st,
- 'charLenTable': UCS2LECharLenTable,
- 'name': 'UTF-16LE'}
-
-# UTF-8
-
-UTF8_cls = (
- 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
- 1,1,1,1,1,1,0,0, # 08 - 0f
- 1,1,1,1,1,1,1,1, # 10 - 17
- 1,1,1,0,1,1,1,1, # 18 - 1f
- 1,1,1,1,1,1,1,1, # 20 - 27
- 1,1,1,1,1,1,1,1, # 28 - 2f
- 1,1,1,1,1,1,1,1, # 30 - 37
- 1,1,1,1,1,1,1,1, # 38 - 3f
- 1,1,1,1,1,1,1,1, # 40 - 47
- 1,1,1,1,1,1,1,1, # 48 - 4f
- 1,1,1,1,1,1,1,1, # 50 - 57
- 1,1,1,1,1,1,1,1, # 58 - 5f
- 1,1,1,1,1,1,1,1, # 60 - 67
- 1,1,1,1,1,1,1,1, # 68 - 6f
- 1,1,1,1,1,1,1,1, # 70 - 77
- 1,1,1,1,1,1,1,1, # 78 - 7f
- 2,2,2,2,3,3,3,3, # 80 - 87
- 4,4,4,4,4,4,4,4, # 88 - 8f
- 4,4,4,4,4,4,4,4, # 90 - 97
- 4,4,4,4,4,4,4,4, # 98 - 9f
- 5,5,5,5,5,5,5,5, # a0 - a7
- 5,5,5,5,5,5,5,5, # a8 - af
- 5,5,5,5,5,5,5,5, # b0 - b7
- 5,5,5,5,5,5,5,5, # b8 - bf
- 0,0,6,6,6,6,6,6, # c0 - c7
- 6,6,6,6,6,6,6,6, # c8 - cf
- 6,6,6,6,6,6,6,6, # d0 - d7
- 6,6,6,6,6,6,6,6, # d8 - df
- 7,8,8,8,8,8,8,8, # e0 - e7
- 8,8,8,8,8,9,8,8, # e8 - ef
- 10,11,11,11,11,11,11,11, # f0 - f7
- 12,13,13,13,14,15,0,0 # f8 - ff
-)
-
-UTF8_st = (
- eError,eStart,eError,eError,eError,eError, 12, 10,#00-07
- 9, 11, 8, 7, 6, 5, 4, 3,#08-0f
- eError,eError,eError,eError,eError,eError,eError,eError,#10-17
- eError,eError,eError,eError,eError,eError,eError,eError,#18-1f
- eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27
- eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f
- eError,eError, 5, 5, 5, 5,eError,eError,#30-37
- eError,eError,eError,eError,eError,eError,eError,eError,#38-3f
- eError,eError,eError, 5, 5, 5,eError,eError,#40-47
- eError,eError,eError,eError,eError,eError,eError,eError,#48-4f
- eError,eError, 7, 7, 7, 7,eError,eError,#50-57
- eError,eError,eError,eError,eError,eError,eError,eError,#58-5f
- eError,eError,eError,eError, 7, 7,eError,eError,#60-67
- eError,eError,eError,eError,eError,eError,eError,eError,#68-6f
- eError,eError, 9, 9, 9, 9,eError,eError,#70-77
- eError,eError,eError,eError,eError,eError,eError,eError,#78-7f
- eError,eError,eError,eError,eError, 9,eError,eError,#80-87
- eError,eError,eError,eError,eError,eError,eError,eError,#88-8f
- eError,eError, 12, 12, 12, 12,eError,eError,#90-97
- eError,eError,eError,eError,eError,eError,eError,eError,#98-9f
- eError,eError,eError,eError,eError, 12,eError,eError,#a0-a7
- eError,eError,eError,eError,eError,eError,eError,eError,#a8-af
- eError,eError, 12, 12, 12,eError,eError,eError,#b0-b7
- eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf
- eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7
- eError,eError,eError,eError,eError,eError,eError,eError #c8-cf
-)
-
-UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
-
-UTF8SMModel = {'classTable': UTF8_cls,
- 'classFactor': 16,
- 'stateTable': UTF8_st,
- 'charLenTable': UTF8CharLenTable,
- 'name': 'UTF-8'}
-
-# flake8: noqa
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .constants import eStart, eError, eItsMe + +# BIG5 + +BIG5_cls = ( + 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,1, # 78 - 7f + 4,4,4,4,4,4,4,4, # 80 - 87 + 4,4,4,4,4,4,4,4, # 88 - 8f + 4,4,4,4,4,4,4,4, # 90 - 97 + 4,4,4,4,4,4,4,4, # 98 - 9f + 4,3,3,3,3,3,3,3, # a0 - a7 + 3,3,3,3,3,3,3,3, # a8 - af + 3,3,3,3,3,3,3,3, # b0 - b7 + 3,3,3,3,3,3,3,3, # b8 - bf + 3,3,3,3,3,3,3,3, # c0 - c7 + 3,3,3,3,3,3,3,3, # c8 - cf + 3,3,3,3,3,3,3,3, # d0 - d7 + 3,3,3,3,3,3,3,3, # d8 - df + 3,3,3,3,3,3,3,3, # e0 - e7 + 3,3,3,3,3,3,3,3, # e8 - ef + 3,3,3,3,3,3,3,3, # f0 - f7 + 3,3,3,3,3,3,3,0 # f8 - ff +) + +BIG5_st = ( + eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 + eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f + eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17 +) + +Big5CharLenTable = (0, 1, 1, 2, 0) + +Big5SMModel = {'classTable': BIG5_cls, + 'classFactor': 5, + 'stateTable': BIG5_st, + 'charLenTable': Big5CharLenTable, + 'name': 'Big5'} + +# CP949 + +CP949_cls = ( + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f + 1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f + 1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f + 4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f + 1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f + 5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f + 0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f + 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f + 6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af + 7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf + 7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf + 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df + 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef + 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff +) + +CP949_st = ( +#cls= 0 1 2 3 4 5 6 7 8 9 # previous state = + eError,eStart, 3,eError,eStart,eStart, 4, 5,eError, 6, # eStart + eError,eError,eError,eError,eError,eError,eError,eError,eError,eError, # eError + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe, # eItsMe + eError,eError,eStart,eStart,eError,eError,eError,eStart,eStart,eStart, # 3 + eError,eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 4 + eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 5 + eError,eStart,eStart,eStart,eStart,eError,eError,eStart,eStart,eStart, # 6 +) + +CP949CharLenTable = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2) + +CP949SMModel = {'classTable': CP949_cls, + 'classFactor': 10, + 'stateTable': CP949_st, + 'charLenTable': CP949CharLenTable, + 'name': 'CP949'} + +# EUC-JP + +EUCJP_cls = ( + 4,4,4,4,4,4,4,4, # 00 - 07 + 4,4,4,4,4,4,5,5, # 08 - 0f + 4,4,4,4,4,4,4,4, # 10 - 17 + 4,4,4,5,4,4,4,4, # 18 - 1f + 4,4,4,4,4,4,4,4, # 20 - 27 + 4,4,4,4,4,4,4,4, # 28 - 2f + 4,4,4,4,4,4,4,4, # 30 - 37 + 4,4,4,4,4,4,4,4, # 38 - 3f + 4,4,4,4,4,4,4,4, # 40 - 47 + 4,4,4,4,4,4,4,4, # 48 - 4f + 4,4,4,4,4,4,4,4, # 50 - 57 + 4,4,4,4,4,4,4,4, # 58 - 5f + 4,4,4,4,4,4,4,4, # 60 - 67 + 4,4,4,4,4,4,4,4, # 68 - 6f + 4,4,4,4,4,4,4,4, # 70 - 77 + 4,4,4,4,4,4,4,4, # 78 - 7f + 5,5,5,5,5,5,5,5, # 80 - 87 + 5,5,5,5,5,5,1,3, # 88 - 8f + 5,5,5,5,5,5,5,5, # 90 - 97 + 5,5,5,5,5,5,5,5, # 98 - 9f + 5,2,2,2,2,2,2,2, # a0 - a7 + 2,2,2,2,2,2,2,2, # a8 - af + 2,2,2,2,2,2,2,2, # b0 - b7 + 2,2,2,2,2,2,2,2, # b8 - bf + 2,2,2,2,2,2,2,2, # c0 - c7 + 2,2,2,2,2,2,2,2, # c8 - cf + 2,2,2,2,2,2,2,2, # d0 - d7 + 2,2,2,2,2,2,2,2, # d8 - df + 0,0,0,0,0,0,0,0, # e0 - e7 + 0,0,0,0,0,0,0,0, # e8 - ef + 0,0,0,0,0,0,0,0, # f0 - f7 + 0,0,0,0,0,0,0,5 # f8 - ff +) + +EUCJP_st = ( + 3, 4, 3, 5,eStart,eError,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17 + eError,eError,eStart,eError,eError,eError, 3,eError,#18-1f + 3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27 +) + +EUCJPCharLenTable = (2, 2, 2, 3, 1, 0) + +EUCJPSMModel = {'classTable': EUCJP_cls, + 'classFactor': 6, + 'stateTable': EUCJP_st, + 'charLenTable': EUCJPCharLenTable, + 'name': 'EUC-JP'} + +# EUC-KR + +EUCKR_cls = ( + 1,1,1,1,1,1,1,1, # 00 - 07 + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 1,1,1,1,1,1,1,1, # 40 - 47 + 1,1,1,1,1,1,1,1, # 48 - 4f + 1,1,1,1,1,1,1,1, # 50 - 57 + 1,1,1,1,1,1,1,1, # 58 - 5f + 1,1,1,1,1,1,1,1, # 60 - 67 + 1,1,1,1,1,1,1,1, # 68 - 6f + 1,1,1,1,1,1,1,1, # 70 - 77 + 1,1,1,1,1,1,1,1, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,0,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,2,2,2,2,2,2,2, # a0 - a7 + 2,2,2,2,2,3,3,3, # a8 - af + 2,2,2,2,2,2,2,2, # b0 - b7 + 2,2,2,2,2,2,2,2, # b8 - bf + 2,2,2,2,2,2,2,2, # c0 - c7 + 2,3,2,2,2,2,2,2, # c8 - cf + 2,2,2,2,2,2,2,2, # d0 - d7 + 2,2,2,2,2,2,2,2, # d8 - df + 2,2,2,2,2,2,2,2, # e0 - e7 + 2,2,2,2,2,2,2,2, # e8 - ef + 2,2,2,2,2,2,2,2, # f0 - f7 + 2,2,2,2,2,2,2,0 # f8 - ff +) + +EUCKR_st = ( + eError,eStart, 3,eError,eError,eError,eError,eError,#00-07 + eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f +) + +EUCKRCharLenTable = (0, 1, 2, 0) + +EUCKRSMModel = {'classTable': EUCKR_cls, + 'classFactor': 4, + 'stateTable': EUCKR_st, + 'charLenTable': EUCKRCharLenTable, + 'name': 'EUC-KR'} + +# EUC-TW + +EUCTW_cls = ( + 2,2,2,2,2,2,2,2, # 00 - 07 + 2,2,2,2,2,2,0,0, # 08 - 0f + 2,2,2,2,2,2,2,2, # 10 - 17 + 2,2,2,0,2,2,2,2, # 18 - 1f + 2,2,2,2,2,2,2,2, # 20 - 27 + 2,2,2,2,2,2,2,2, # 28 - 2f + 2,2,2,2,2,2,2,2, # 30 - 37 + 2,2,2,2,2,2,2,2, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,2, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,6,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,3,4,4,4,4,4,4, # a0 - a7 + 5,5,1,1,1,1,1,1, # a8 - af + 1,1,1,1,1,1,1,1, # b0 - b7 + 1,1,1,1,1,1,1,1, # b8 - bf + 1,1,3,1,3,3,3,3, # c0 - c7 + 3,3,3,3,3,3,3,3, # c8 - cf + 3,3,3,3,3,3,3,3, # d0 - d7 + 3,3,3,3,3,3,3,3, # d8 - df + 3,3,3,3,3,3,3,3, # e0 - e7 + 3,3,3,3,3,3,3,3, # e8 - ef + 3,3,3,3,3,3,3,3, # f0 - f7 + 3,3,3,3,3,3,3,0 # f8 - ff +) + +EUCTW_st = ( + eError,eError,eStart, 3, 3, 3, 4,eError,#00-07 + eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17 + eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f + 5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27 + eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f +) + +EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3) + +EUCTWSMModel = {'classTable': EUCTW_cls, + 'classFactor': 7, + 'stateTable': EUCTW_st, + 'charLenTable': EUCTWCharLenTable, + 'name': 'x-euc-tw'} + +# GB2312 + +GB2312_cls = ( + 1,1,1,1,1,1,1,1, # 00 - 07 + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 3,3,3,3,3,3,3,3, # 30 - 37 + 3,3,1,1,1,1,1,1, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,4, # 78 - 7f + 5,6,6,6,6,6,6,6, # 80 - 87 + 6,6,6,6,6,6,6,6, # 88 - 8f + 6,6,6,6,6,6,6,6, # 90 - 97 + 6,6,6,6,6,6,6,6, # 98 - 9f + 6,6,6,6,6,6,6,6, # a0 - a7 + 6,6,6,6,6,6,6,6, # a8 - af + 6,6,6,6,6,6,6,6, # b0 - b7 + 6,6,6,6,6,6,6,6, # b8 - bf + 6,6,6,6,6,6,6,6, # c0 - c7 + 6,6,6,6,6,6,6,6, # c8 - cf + 6,6,6,6,6,6,6,6, # d0 - d7 + 6,6,6,6,6,6,6,6, # d8 - df + 6,6,6,6,6,6,6,6, # e0 - e7 + 6,6,6,6,6,6,6,6, # e8 - ef + 6,6,6,6,6,6,6,6, # f0 - f7 + 6,6,6,6,6,6,6,0 # f8 - ff +) + +GB2312_st = ( + eError,eStart,eStart,eStart,eStart,eStart, 3,eError,#00-07 + eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17 + 4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f + eError,eError, 5,eError,eError,eError,eItsMe,eError,#20-27 + eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f +) + +# To be accurate, the length of class 6 can be either 2 or 4. +# But it is not necessary to discriminate between the two since +# it is used for frequency analysis only, and we are validing +# each code range there as well. So it is safe to set it to be +# 2 here. +GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2) + +GB2312SMModel = {'classTable': GB2312_cls, + 'classFactor': 7, + 'stateTable': GB2312_st, + 'charLenTable': GB2312CharLenTable, + 'name': 'GB2312'} + +# Shift_JIS + +SJIS_cls = ( + 1,1,1,1,1,1,1,1, # 00 - 07 + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 2,2,2,2,2,2,2,2, # 40 - 47 + 2,2,2,2,2,2,2,2, # 48 - 4f + 2,2,2,2,2,2,2,2, # 50 - 57 + 2,2,2,2,2,2,2,2, # 58 - 5f + 2,2,2,2,2,2,2,2, # 60 - 67 + 2,2,2,2,2,2,2,2, # 68 - 6f + 2,2,2,2,2,2,2,2, # 70 - 77 + 2,2,2,2,2,2,2,1, # 78 - 7f + 3,3,3,3,3,3,3,3, # 80 - 87 + 3,3,3,3,3,3,3,3, # 88 - 8f + 3,3,3,3,3,3,3,3, # 90 - 97 + 3,3,3,3,3,3,3,3, # 98 - 9f + #0xa0 is illegal in sjis encoding, but some pages does + #contain such byte. We need to be more error forgiven. + 2,2,2,2,2,2,2,2, # a0 - a7 + 2,2,2,2,2,2,2,2, # a8 - af + 2,2,2,2,2,2,2,2, # b0 - b7 + 2,2,2,2,2,2,2,2, # b8 - bf + 2,2,2,2,2,2,2,2, # c0 - c7 + 2,2,2,2,2,2,2,2, # c8 - cf + 2,2,2,2,2,2,2,2, # d0 - d7 + 2,2,2,2,2,2,2,2, # d8 - df + 3,3,3,3,3,3,3,3, # e0 - e7 + 3,3,3,3,3,4,4,4, # e8 - ef + 4,4,4,4,4,4,4,4, # f0 - f7 + 4,4,4,4,4,0,0,0 # f8 - ff +) + + +SJIS_st = ( + eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17 +) + +SJISCharLenTable = (0, 1, 1, 2, 0, 0) + +SJISSMModel = {'classTable': SJIS_cls, + 'classFactor': 6, + 'stateTable': SJIS_st, + 'charLenTable': SJISCharLenTable, + 'name': 'Shift_JIS'} + +# UCS2-BE + +UCS2BE_cls = ( + 0,0,0,0,0,0,0,0, # 00 - 07 + 0,0,1,0,0,2,0,0, # 08 - 0f + 0,0,0,0,0,0,0,0, # 10 - 17 + 0,0,0,3,0,0,0,0, # 18 - 1f + 0,0,0,0,0,0,0,0, # 20 - 27 + 0,3,3,3,3,3,0,0, # 28 - 2f + 0,0,0,0,0,0,0,0, # 30 - 37 + 0,0,0,0,0,0,0,0, # 38 - 3f + 0,0,0,0,0,0,0,0, # 40 - 47 + 0,0,0,0,0,0,0,0, # 48 - 4f + 0,0,0,0,0,0,0,0, # 50 - 57 + 0,0,0,0,0,0,0,0, # 58 - 5f + 0,0,0,0,0,0,0,0, # 60 - 67 + 0,0,0,0,0,0,0,0, # 68 - 6f + 0,0,0,0,0,0,0,0, # 70 - 77 + 0,0,0,0,0,0,0,0, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,0,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,0,0,0,0,0,0,0, # a0 - a7 + 0,0,0,0,0,0,0,0, # a8 - af + 0,0,0,0,0,0,0,0, # b0 - b7 + 0,0,0,0,0,0,0,0, # b8 - bf + 0,0,0,0,0,0,0,0, # c0 - c7 + 0,0,0,0,0,0,0,0, # c8 - cf + 0,0,0,0,0,0,0,0, # d0 - d7 + 0,0,0,0,0,0,0,0, # d8 - df + 0,0,0,0,0,0,0,0, # e0 - e7 + 0,0,0,0,0,0,0,0, # e8 - ef + 0,0,0,0,0,0,0,0, # f0 - f7 + 0,0,0,0,0,0,4,5 # f8 - ff +) + +UCS2BE_st = ( + 5, 7, 7,eError, 4, 3,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe, 6, 6, 6, 6,eError,eError,#10-17 + 6, 6, 6, 6, 6,eItsMe, 6, 6,#18-1f + 6, 6, 6, 6, 5, 7, 7,eError,#20-27 + 5, 8, 6, 6,eError, 6, 6, 6,#28-2f + 6, 6, 6, 6,eError,eError,eStart,eStart #30-37 +) + +UCS2BECharLenTable = (2, 2, 2, 0, 2, 2) + +UCS2BESMModel = {'classTable': UCS2BE_cls, + 'classFactor': 6, + 'stateTable': UCS2BE_st, + 'charLenTable': UCS2BECharLenTable, + 'name': 'UTF-16BE'} + +# UCS2-LE + +UCS2LE_cls = ( + 0,0,0,0,0,0,0,0, # 00 - 07 + 0,0,1,0,0,2,0,0, # 08 - 0f + 0,0,0,0,0,0,0,0, # 10 - 17 + 0,0,0,3,0,0,0,0, # 18 - 1f + 0,0,0,0,0,0,0,0, # 20 - 27 + 0,3,3,3,3,3,0,0, # 28 - 2f + 0,0,0,0,0,0,0,0, # 30 - 37 + 0,0,0,0,0,0,0,0, # 38 - 3f + 0,0,0,0,0,0,0,0, # 40 - 47 + 0,0,0,0,0,0,0,0, # 48 - 4f + 0,0,0,0,0,0,0,0, # 50 - 57 + 0,0,0,0,0,0,0,0, # 58 - 5f + 0,0,0,0,0,0,0,0, # 60 - 67 + 0,0,0,0,0,0,0,0, # 68 - 6f + 0,0,0,0,0,0,0,0, # 70 - 77 + 0,0,0,0,0,0,0,0, # 78 - 7f + 0,0,0,0,0,0,0,0, # 80 - 87 + 0,0,0,0,0,0,0,0, # 88 - 8f + 0,0,0,0,0,0,0,0, # 90 - 97 + 0,0,0,0,0,0,0,0, # 98 - 9f + 0,0,0,0,0,0,0,0, # a0 - a7 + 0,0,0,0,0,0,0,0, # a8 - af + 0,0,0,0,0,0,0,0, # b0 - b7 + 0,0,0,0,0,0,0,0, # b8 - bf + 0,0,0,0,0,0,0,0, # c0 - c7 + 0,0,0,0,0,0,0,0, # c8 - cf + 0,0,0,0,0,0,0,0, # d0 - d7 + 0,0,0,0,0,0,0,0, # d8 - df + 0,0,0,0,0,0,0,0, # e0 - e7 + 0,0,0,0,0,0,0,0, # e8 - ef + 0,0,0,0,0,0,0,0, # f0 - f7 + 0,0,0,0,0,0,4,5 # f8 - ff +) + +UCS2LE_st = ( + 6, 6, 7, 6, 4, 3,eError,eError,#00-07 + eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f + eItsMe,eItsMe, 5, 5, 5,eError,eItsMe,eError,#10-17 + 5, 5, 5,eError, 5,eError, 6, 6,#18-1f + 7, 6, 8, 8, 5, 5, 5,eError,#20-27 + 5, 5, 5,eError,eError,eError, 5, 5,#28-2f + 5, 5, 5,eError, 5,eError,eStart,eStart #30-37 +) + +UCS2LECharLenTable = (2, 2, 2, 2, 2, 2) + +UCS2LESMModel = {'classTable': UCS2LE_cls, + 'classFactor': 6, + 'stateTable': UCS2LE_st, + 'charLenTable': UCS2LECharLenTable, + 'name': 'UTF-16LE'} + +# UTF-8 + +UTF8_cls = ( + 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value + 1,1,1,1,1,1,0,0, # 08 - 0f + 1,1,1,1,1,1,1,1, # 10 - 17 + 1,1,1,0,1,1,1,1, # 18 - 1f + 1,1,1,1,1,1,1,1, # 20 - 27 + 1,1,1,1,1,1,1,1, # 28 - 2f + 1,1,1,1,1,1,1,1, # 30 - 37 + 1,1,1,1,1,1,1,1, # 38 - 3f + 1,1,1,1,1,1,1,1, # 40 - 47 + 1,1,1,1,1,1,1,1, # 48 - 4f + 1,1,1,1,1,1,1,1, # 50 - 57 + 1,1,1,1,1,1,1,1, # 58 - 5f + 1,1,1,1,1,1,1,1, # 60 - 67 + 1,1,1,1,1,1,1,1, # 68 - 6f + 1,1,1,1,1,1,1,1, # 70 - 77 + 1,1,1,1,1,1,1,1, # 78 - 7f + 2,2,2,2,3,3,3,3, # 80 - 87 + 4,4,4,4,4,4,4,4, # 88 - 8f + 4,4,4,4,4,4,4,4, # 90 - 97 + 4,4,4,4,4,4,4,4, # 98 - 9f + 5,5,5,5,5,5,5,5, # a0 - a7 + 5,5,5,5,5,5,5,5, # a8 - af + 5,5,5,5,5,5,5,5, # b0 - b7 + 5,5,5,5,5,5,5,5, # b8 - bf + 0,0,6,6,6,6,6,6, # c0 - c7 + 6,6,6,6,6,6,6,6, # c8 - cf + 6,6,6,6,6,6,6,6, # d0 - d7 + 6,6,6,6,6,6,6,6, # d8 - df + 7,8,8,8,8,8,8,8, # e0 - e7 + 8,8,8,8,8,9,8,8, # e8 - ef + 10,11,11,11,11,11,11,11, # f0 - f7 + 12,13,13,13,14,15,0,0 # f8 - ff +) + +UTF8_st = ( + eError,eStart,eError,eError,eError,eError, 12, 10,#00-07 + 9, 11, 8, 7, 6, 5, 4, 3,#08-0f + eError,eError,eError,eError,eError,eError,eError,eError,#10-17 + eError,eError,eError,eError,eError,eError,eError,eError,#18-1f + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27 + eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f + eError,eError, 5, 5, 5, 5,eError,eError,#30-37 + eError,eError,eError,eError,eError,eError,eError,eError,#38-3f + eError,eError,eError, 5, 5, 5,eError,eError,#40-47 + eError,eError,eError,eError,eError,eError,eError,eError,#48-4f + eError,eError, 7, 7, 7, 7,eError,eError,#50-57 + eError,eError,eError,eError,eError,eError,eError,eError,#58-5f + eError,eError,eError,eError, 7, 7,eError,eError,#60-67 + eError,eError,eError,eError,eError,eError,eError,eError,#68-6f + eError,eError, 9, 9, 9, 9,eError,eError,#70-77 + eError,eError,eError,eError,eError,eError,eError,eError,#78-7f + eError,eError,eError,eError,eError, 9,eError,eError,#80-87 + eError,eError,eError,eError,eError,eError,eError,eError,#88-8f + eError,eError, 12, 12, 12, 12,eError,eError,#90-97 + eError,eError,eError,eError,eError,eError,eError,eError,#98-9f + eError,eError,eError,eError,eError, 12,eError,eError,#a0-a7 + eError,eError,eError,eError,eError,eError,eError,eError,#a8-af + eError,eError, 12, 12, 12,eError,eError,eError,#b0-b7 + eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf + eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7 + eError,eError,eError,eError,eError,eError,eError,eError #c8-cf +) + +UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) + +UTF8SMModel = {'classTable': UTF8_cls, + 'classFactor': 16, + 'stateTable': UTF8_st, + 'charLenTable': UTF8CharLenTable, + 'name': 'UTF-8'} + +# flake8: noqa diff --git a/requests/packages/charade/sbcharsetprober.py b/requests/packages/chardet/sbcharsetprober.py index da26715..37291bd 100644 --- a/requests/packages/charade/sbcharsetprober.py +++ b/requests/packages/chardet/sbcharsetprober.py @@ -1,120 +1,120 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Universal charset detector code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 2001
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-# Shy Shalom - original C code
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-import sys
-from . import constants
-from .charsetprober import CharSetProber
-from .compat import wrap_ord
-
-SAMPLE_SIZE = 64
-SB_ENOUGH_REL_THRESHOLD = 1024
-POSITIVE_SHORTCUT_THRESHOLD = 0.95
-NEGATIVE_SHORTCUT_THRESHOLD = 0.05
-SYMBOL_CAT_ORDER = 250
-NUMBER_OF_SEQ_CAT = 4
-POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
-#NEGATIVE_CAT = 0
-
-
-class SingleByteCharSetProber(CharSetProber):
- def __init__(self, model, reversed=False, nameProber=None):
- CharSetProber.__init__(self)
- self._mModel = model
- # TRUE if we need to reverse every pair in the model lookup
- self._mReversed = reversed
- # Optional auxiliary prober for name decision
- self._mNameProber = nameProber
- self.reset()
-
- def reset(self):
- CharSetProber.reset(self)
- # char order of last character
- self._mLastOrder = 255
- self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
- self._mTotalSeqs = 0
- self._mTotalChar = 0
- # characters that fall in our sampling range
- self._mFreqChar = 0
-
- def get_charset_name(self):
- if self._mNameProber:
- return self._mNameProber.get_charset_name()
- else:
- return self._mModel['charsetName']
-
- def feed(self, aBuf):
- if not self._mModel['keepEnglishLetter']:
- aBuf = self.filter_without_english_letters(aBuf)
- aLen = len(aBuf)
- if not aLen:
- return self.get_state()
- for c in aBuf:
- order = self._mModel['charToOrderMap'][wrap_ord(c)]
- if order < SYMBOL_CAT_ORDER:
- self._mTotalChar += 1
- if order < SAMPLE_SIZE:
- self._mFreqChar += 1
- if self._mLastOrder < SAMPLE_SIZE:
- self._mTotalSeqs += 1
- if not self._mReversed:
- i = (self._mLastOrder * SAMPLE_SIZE) + order
- model = self._mModel['precedenceMatrix'][i]
- else: # reverse the order of the letters in the lookup
- i = (order * SAMPLE_SIZE) + self._mLastOrder
- model = self._mModel['precedenceMatrix'][i]
- self._mSeqCounters[model] += 1
- self._mLastOrder = order
-
- if self.get_state() == constants.eDetecting:
- if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD:
- cf = self.get_confidence()
- if cf > POSITIVE_SHORTCUT_THRESHOLD:
- if constants._debug:
- sys.stderr.write('%s confidence = %s, we have a'
- 'winner\n' %
- (self._mModel['charsetName'], cf))
- self._mState = constants.eFoundIt
- elif cf < NEGATIVE_SHORTCUT_THRESHOLD:
- if constants._debug:
- sys.stderr.write('%s confidence = %s, below negative'
- 'shortcut threshhold %s\n' %
- (self._mModel['charsetName'], cf,
- NEGATIVE_SHORTCUT_THRESHOLD))
- self._mState = constants.eNotMe
-
- return self.get_state()
-
- def get_confidence(self):
- r = 0.01
- if self._mTotalSeqs > 0:
- r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs
- / self._mModel['mTypicalPositiveRatio'])
- r = r * self._mFreqChar / self._mTotalChar
- if r >= 1.0:
- r = 0.99
- return r
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import sys +from . import constants +from .charsetprober import CharSetProber +from .compat import wrap_ord + +SAMPLE_SIZE = 64 +SB_ENOUGH_REL_THRESHOLD = 1024 +POSITIVE_SHORTCUT_THRESHOLD = 0.95 +NEGATIVE_SHORTCUT_THRESHOLD = 0.05 +SYMBOL_CAT_ORDER = 250 +NUMBER_OF_SEQ_CAT = 4 +POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1 +#NEGATIVE_CAT = 0 + + +class SingleByteCharSetProber(CharSetProber): + def __init__(self, model, reversed=False, nameProber=None): + CharSetProber.__init__(self) + self._mModel = model + # TRUE if we need to reverse every pair in the model lookup + self._mReversed = reversed + # Optional auxiliary prober for name decision + self._mNameProber = nameProber + self.reset() + + def reset(self): + CharSetProber.reset(self) + # char order of last character + self._mLastOrder = 255 + self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT + self._mTotalSeqs = 0 + self._mTotalChar = 0 + # characters that fall in our sampling range + self._mFreqChar = 0 + + def get_charset_name(self): + if self._mNameProber: + return self._mNameProber.get_charset_name() + else: + return self._mModel['charsetName'] + + def feed(self, aBuf): + if not self._mModel['keepEnglishLetter']: + aBuf = self.filter_without_english_letters(aBuf) + aLen = len(aBuf) + if not aLen: + return self.get_state() + for c in aBuf: + order = self._mModel['charToOrderMap'][wrap_ord(c)] + if order < SYMBOL_CAT_ORDER: + self._mTotalChar += 1 + if order < SAMPLE_SIZE: + self._mFreqChar += 1 + if self._mLastOrder < SAMPLE_SIZE: + self._mTotalSeqs += 1 + if not self._mReversed: + i = (self._mLastOrder * SAMPLE_SIZE) + order + model = self._mModel['precedenceMatrix'][i] + else: # reverse the order of the letters in the lookup + i = (order * SAMPLE_SIZE) + self._mLastOrder + model = self._mModel['precedenceMatrix'][i] + self._mSeqCounters[model] += 1 + self._mLastOrder = order + + if self.get_state() == constants.eDetecting: + if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD: + cf = self.get_confidence() + if cf > POSITIVE_SHORTCUT_THRESHOLD: + if constants._debug: + sys.stderr.write('%s confidence = %s, we have a' + 'winner\n' % + (self._mModel['charsetName'], cf)) + self._mState = constants.eFoundIt + elif cf < NEGATIVE_SHORTCUT_THRESHOLD: + if constants._debug: + sys.stderr.write('%s confidence = %s, below negative' + 'shortcut threshhold %s\n' % + (self._mModel['charsetName'], cf, + NEGATIVE_SHORTCUT_THRESHOLD)) + self._mState = constants.eNotMe + + return self.get_state() + + def get_confidence(self): + r = 0.01 + if self._mTotalSeqs > 0: + r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs + / self._mModel['mTypicalPositiveRatio']) + r = r * self._mFreqChar / self._mTotalChar + if r >= 1.0: + r = 0.99 + return r diff --git a/requests/packages/charade/sbcsgroupprober.py b/requests/packages/chardet/sbcsgroupprober.py index b224814..1b6196c 100644 --- a/requests/packages/charade/sbcsgroupprober.py +++ b/requests/packages/chardet/sbcsgroupprober.py @@ -1,69 +1,69 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Universal charset detector code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 2001
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-# Shy Shalom - original C code
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .charsetgroupprober import CharSetGroupProber
-from .sbcharsetprober import SingleByteCharSetProber
-from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
- Latin5CyrillicModel, MacCyrillicModel,
- Ibm866Model, Ibm855Model)
-from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
-from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
-from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
-from .langthaimodel import TIS620ThaiModel
-from .langhebrewmodel import Win1255HebrewModel
-from .hebrewprober import HebrewProber
-
-
-class SBCSGroupProber(CharSetGroupProber):
- def __init__(self):
- CharSetGroupProber.__init__(self)
- self._mProbers = [
- SingleByteCharSetProber(Win1251CyrillicModel),
- SingleByteCharSetProber(Koi8rModel),
- SingleByteCharSetProber(Latin5CyrillicModel),
- SingleByteCharSetProber(MacCyrillicModel),
- SingleByteCharSetProber(Ibm866Model),
- SingleByteCharSetProber(Ibm855Model),
- SingleByteCharSetProber(Latin7GreekModel),
- SingleByteCharSetProber(Win1253GreekModel),
- SingleByteCharSetProber(Latin5BulgarianModel),
- SingleByteCharSetProber(Win1251BulgarianModel),
- SingleByteCharSetProber(Latin2HungarianModel),
- SingleByteCharSetProber(Win1250HungarianModel),
- SingleByteCharSetProber(TIS620ThaiModel),
- ]
- hebrewProber = HebrewProber()
- logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,
- False, hebrewProber)
- visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,
- hebrewProber)
- hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
- self._mProbers.extend([hebrewProber, logicalHebrewProber,
- visualHebrewProber])
-
- self.reset()
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from .charsetgroupprober import CharSetGroupProber +from .sbcharsetprober import SingleByteCharSetProber +from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, + Latin5CyrillicModel, MacCyrillicModel, + Ibm866Model, Ibm855Model) +from .langgreekmodel import Latin7GreekModel, Win1253GreekModel +from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel +from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel +from .langthaimodel import TIS620ThaiModel +from .langhebrewmodel import Win1255HebrewModel +from .hebrewprober import HebrewProber + + +class SBCSGroupProber(CharSetGroupProber): + def __init__(self): + CharSetGroupProber.__init__(self) + self._mProbers = [ + SingleByteCharSetProber(Win1251CyrillicModel), + SingleByteCharSetProber(Koi8rModel), + SingleByteCharSetProber(Latin5CyrillicModel), + SingleByteCharSetProber(MacCyrillicModel), + SingleByteCharSetProber(Ibm866Model), + SingleByteCharSetProber(Ibm855Model), + SingleByteCharSetProber(Latin7GreekModel), + SingleByteCharSetProber(Win1253GreekModel), + SingleByteCharSetProber(Latin5BulgarianModel), + SingleByteCharSetProber(Win1251BulgarianModel), + SingleByteCharSetProber(Latin2HungarianModel), + SingleByteCharSetProber(Win1250HungarianModel), + SingleByteCharSetProber(TIS620ThaiModel), + ] + hebrewProber = HebrewProber() + logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, + False, hebrewProber) + visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True, + hebrewProber) + hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber) + self._mProbers.extend([hebrewProber, logicalHebrewProber, + visualHebrewProber]) + + self.reset() diff --git a/requests/packages/charade/sjisprober.py b/requests/packages/chardet/sjisprober.py index 9bb0cdc..b173614 100644 --- a/requests/packages/charade/sjisprober.py +++ b/requests/packages/chardet/sjisprober.py @@ -1,91 +1,91 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-import sys
-from .mbcharsetprober import MultiByteCharSetProber
-from .codingstatemachine import CodingStateMachine
-from .chardistribution import SJISDistributionAnalysis
-from .jpcntx import SJISContextAnalysis
-from .mbcssm import SJISSMModel
-from . import constants
-
-
-class SJISProber(MultiByteCharSetProber):
- def __init__(self):
- MultiByteCharSetProber.__init__(self)
- self._mCodingSM = CodingStateMachine(SJISSMModel)
- self._mDistributionAnalyzer = SJISDistributionAnalysis()
- self._mContextAnalyzer = SJISContextAnalysis()
- self.reset()
-
- def reset(self):
- MultiByteCharSetProber.reset(self)
- self._mContextAnalyzer.reset()
-
- def get_charset_name(self):
- return "SHIFT_JIS"
-
- def feed(self, aBuf):
- aLen = len(aBuf)
- for i in range(0, aLen):
- codingState = self._mCodingSM.next_state(aBuf[i])
- if codingState == constants.eError:
- if constants._debug:
- sys.stderr.write(self.get_charset_name()
- + ' prober hit error at byte ' + str(i)
- + '\n')
- self._mState = constants.eNotMe
- break
- elif codingState == constants.eItsMe:
- self._mState = constants.eFoundIt
- break
- elif codingState == constants.eStart:
- charLen = self._mCodingSM.get_current_charlen()
- if i == 0:
- self._mLastChar[1] = aBuf[0]
- self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:],
- charLen)
- self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
- else:
- self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3
- - charLen], charLen)
- self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
- charLen)
-
- self._mLastChar[0] = aBuf[aLen - 1]
-
- if self.get_state() == constants.eDetecting:
- if (self._mContextAnalyzer.got_enough_data() and
- (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
- self._mState = constants.eFoundIt
-
- return self.get_state()
-
- def get_confidence(self):
- contxtCf = self._mContextAnalyzer.get_confidence()
- distribCf = self._mDistributionAnalyzer.get_confidence()
- return max(contxtCf, distribCf)
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +import sys +from .mbcharsetprober import MultiByteCharSetProber +from .codingstatemachine import CodingStateMachine +from .chardistribution import SJISDistributionAnalysis +from .jpcntx import SJISContextAnalysis +from .mbcssm import SJISSMModel +from . import constants + + +class SJISProber(MultiByteCharSetProber): + def __init__(self): + MultiByteCharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(SJISSMModel) + self._mDistributionAnalyzer = SJISDistributionAnalysis() + self._mContextAnalyzer = SJISContextAnalysis() + self.reset() + + def reset(self): + MultiByteCharSetProber.reset(self) + self._mContextAnalyzer.reset() + + def get_charset_name(self): + return "SHIFT_JIS" + + def feed(self, aBuf): + aLen = len(aBuf) + for i in range(0, aLen): + codingState = self._mCodingSM.next_state(aBuf[i]) + if codingState == constants.eError: + if constants._debug: + sys.stderr.write(self.get_charset_name() + + ' prober hit error at byte ' + str(i) + + '\n') + self._mState = constants.eNotMe + break + elif codingState == constants.eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == constants.eStart: + charLen = self._mCodingSM.get_current_charlen() + if i == 0: + self._mLastChar[1] = aBuf[0] + self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:], + charLen) + self._mDistributionAnalyzer.feed(self._mLastChar, charLen) + else: + self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3 + - charLen], charLen) + self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], + charLen) + + self._mLastChar[0] = aBuf[aLen - 1] + + if self.get_state() == constants.eDetecting: + if (self._mContextAnalyzer.got_enough_data() and + (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + contxtCf = self._mContextAnalyzer.get_confidence() + distribCf = self._mDistributionAnalyzer.get_confidence() + return max(contxtCf, distribCf) diff --git a/requests/packages/charade/universaldetector.py b/requests/packages/chardet/universaldetector.py index 6175bfb..9a03ad3 100644 --- a/requests/packages/charade/universaldetector.py +++ b/requests/packages/chardet/universaldetector.py @@ -1,172 +1,170 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Universal charset detector code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 2001
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-# Shy Shalom - original C code
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from . import constants
-import sys
-import codecs
-from .latin1prober import Latin1Prober # windows-1252
-from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets
-from .sbcsgroupprober import SBCSGroupProber # single-byte character sets
-from .escprober import EscCharSetProber # ISO-2122, etc.
-import re
-
-MINIMUM_THRESHOLD = 0.20
-ePureAscii = 0
-eEscAscii = 1
-eHighbyte = 2
-
-
-class UniversalDetector:
- def __init__(self):
- self._highBitDetector = re.compile(b'[\x80-\xFF]')
- self._escDetector = re.compile(b'(\033|~{)')
- self._mEscCharSetProber = None
- self._mCharSetProbers = []
- self.reset()
-
- def reset(self):
- self.result = {'encoding': None, 'confidence': 0.0}
- self.done = False
- self._mStart = True
- self._mGotData = False
- self._mInputState = ePureAscii
- self._mLastChar = b''
- if self._mEscCharSetProber:
- self._mEscCharSetProber.reset()
- for prober in self._mCharSetProbers:
- prober.reset()
-
- def feed(self, aBuf):
- if self.done:
- return
-
- aLen = len(aBuf)
- if not aLen:
- return
-
- if not self._mGotData:
- # If the data starts with BOM, we know it is UTF
- if aBuf[:3] == codecs.BOM:
- # EF BB BF UTF-8 with BOM
- self.result = {'encoding': "UTF-8", 'confidence': 1.0}
- elif aBuf[:4] == codecs.BOM_UTF32_LE:
- # FF FE 00 00 UTF-32, little-endian BOM
- self.result = {'encoding': "UTF-32LE", 'confidence': 1.0}
- elif aBuf[:4] == codecs.BOM_UTF32_BE:
- # 00 00 FE FF UTF-32, big-endian BOM
- self.result = {'encoding': "UTF-32BE", 'confidence': 1.0}
- elif aBuf[:4] == b'\xFE\xFF\x00\x00':
- # FE FF 00 00 UCS-4, unusual octet order BOM (3412)
- self.result = {
- 'encoding': "X-ISO-10646-UCS-4-3412",
- 'confidence': 1.0
- }
- elif aBuf[:4] == b'\x00\x00\xFF\xFE':
- # 00 00 FF FE UCS-4, unusual octet order BOM (2143)
- self.result = {
- 'encoding': "X-ISO-10646-UCS-4-2143",
- 'confidence': 1.0
- }
- elif aBuf[:2] == codecs.BOM_LE:
- # FF FE UTF-16, little endian BOM
- self.result = {'encoding': "UTF-16LE", 'confidence': 1.0}
- elif aBuf[:2] == codecs.BOM_BE:
- # FE FF UTF-16, big endian BOM
- self.result = {'encoding': "UTF-16BE", 'confidence': 1.0}
-
- self._mGotData = True
- if self.result['encoding'] and (self.result['confidence'] > 0.0):
- self.done = True
- return
-
- if self._mInputState == ePureAscii:
- if self._highBitDetector.search(aBuf):
- self._mInputState = eHighbyte
- elif ((self._mInputState == ePureAscii) and
- self._escDetector.search(self._mLastChar + aBuf)):
- self._mInputState = eEscAscii
-
- self._mLastChar = aBuf[-1:]
-
- if self._mInputState == eEscAscii:
- if not self._mEscCharSetProber:
- self._mEscCharSetProber = EscCharSetProber()
- if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt:
- self.result = {
- 'encoding': self._mEscCharSetProber.get_charset_name(),
- 'confidence': self._mEscCharSetProber.get_confidence()
- }
- self.done = True
- elif self._mInputState == eHighbyte:
- if not self._mCharSetProbers:
- self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(),
- Latin1Prober()]
- for prober in self._mCharSetProbers:
- if prober.feed(aBuf) == constants.eFoundIt:
- self.result = {'encoding': prober.get_charset_name(),
- 'confidence': prober.get_confidence()}
- self.done = True
- break
-
- def close(self):
- if self.done:
- return
- if not self._mGotData:
- if constants._debug:
- sys.stderr.write('no data received!\n')
- return
- self.done = True
-
- if self._mInputState == ePureAscii:
- self.result = {'encoding': 'ascii', 'confidence': 1.0}
- return self.result
-
- if self._mInputState == eHighbyte:
- proberConfidence = None
- maxProberConfidence = 0.0
- maxProber = None
- for prober in self._mCharSetProbers:
- if not prober:
- continue
- proberConfidence = prober.get_confidence()
- if proberConfidence > maxProberConfidence:
- maxProberConfidence = proberConfidence
- maxProber = prober
- if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD):
- self.result = {'encoding': maxProber.get_charset_name(),
- 'confidence': maxProber.get_confidence()}
- return self.result
-
- if constants._debug:
- sys.stderr.write('no probers hit minimum threshhold\n')
- for prober in self._mCharSetProbers[0].mProbers:
- if not prober:
- continue
- sys.stderr.write('%s confidence = %s\n' %
- (prober.get_charset_name(),
- prober.get_confidence()))
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from . import constants +import sys +import codecs +from .latin1prober import Latin1Prober # windows-1252 +from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets +from .sbcsgroupprober import SBCSGroupProber # single-byte character sets +from .escprober import EscCharSetProber # ISO-2122, etc. +import re + +MINIMUM_THRESHOLD = 0.20 +ePureAscii = 0 +eEscAscii = 1 +eHighbyte = 2 + + +class UniversalDetector: + def __init__(self): + self._highBitDetector = re.compile(b'[\x80-\xFF]') + self._escDetector = re.compile(b'(\033|~{)') + self._mEscCharSetProber = None + self._mCharSetProbers = [] + self.reset() + + def reset(self): + self.result = {'encoding': None, 'confidence': 0.0} + self.done = False + self._mStart = True + self._mGotData = False + self._mInputState = ePureAscii + self._mLastChar = b'' + if self._mEscCharSetProber: + self._mEscCharSetProber.reset() + for prober in self._mCharSetProbers: + prober.reset() + + def feed(self, aBuf): + if self.done: + return + + aLen = len(aBuf) + if not aLen: + return + + if not self._mGotData: + # If the data starts with BOM, we know it is UTF + if aBuf[:3] == codecs.BOM: + # EF BB BF UTF-8 with BOM + self.result = {'encoding': "UTF-8", 'confidence': 1.0} + elif aBuf[:4] == codecs.BOM_UTF32_LE: + # FF FE 00 00 UTF-32, little-endian BOM + self.result = {'encoding': "UTF-32LE", 'confidence': 1.0} + elif aBuf[:4] == codecs.BOM_UTF32_BE: + # 00 00 FE FF UTF-32, big-endian BOM + self.result = {'encoding': "UTF-32BE", 'confidence': 1.0} + elif aBuf[:4] == b'\xFE\xFF\x00\x00': + # FE FF 00 00 UCS-4, unusual octet order BOM (3412) + self.result = { + 'encoding': "X-ISO-10646-UCS-4-3412", + 'confidence': 1.0 + } + elif aBuf[:4] == b'\x00\x00\xFF\xFE': + # 00 00 FF FE UCS-4, unusual octet order BOM (2143) + self.result = { + 'encoding': "X-ISO-10646-UCS-4-2143", + 'confidence': 1.0 + } + elif aBuf[:2] == codecs.BOM_LE: + # FF FE UTF-16, little endian BOM + self.result = {'encoding': "UTF-16LE", 'confidence': 1.0} + elif aBuf[:2] == codecs.BOM_BE: + # FE FF UTF-16, big endian BOM + self.result = {'encoding': "UTF-16BE", 'confidence': 1.0} + + self._mGotData = True + if self.result['encoding'] and (self.result['confidence'] > 0.0): + self.done = True + return + + if self._mInputState == ePureAscii: + if self._highBitDetector.search(aBuf): + self._mInputState = eHighbyte + elif ((self._mInputState == ePureAscii) and + self._escDetector.search(self._mLastChar + aBuf)): + self._mInputState = eEscAscii + + self._mLastChar = aBuf[-1:] + + if self._mInputState == eEscAscii: + if not self._mEscCharSetProber: + self._mEscCharSetProber = EscCharSetProber() + if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt: + self.result = {'encoding': self._mEscCharSetProber.get_charset_name(), + 'confidence': self._mEscCharSetProber.get_confidence()} + self.done = True + elif self._mInputState == eHighbyte: + if not self._mCharSetProbers: + self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(), + Latin1Prober()] + for prober in self._mCharSetProbers: + if prober.feed(aBuf) == constants.eFoundIt: + self.result = {'encoding': prober.get_charset_name(), + 'confidence': prober.get_confidence()} + self.done = True + break + + def close(self): + if self.done: + return + if not self._mGotData: + if constants._debug: + sys.stderr.write('no data received!\n') + return + self.done = True + + if self._mInputState == ePureAscii: + self.result = {'encoding': 'ascii', 'confidence': 1.0} + return self.result + + if self._mInputState == eHighbyte: + proberConfidence = None + maxProberConfidence = 0.0 + maxProber = None + for prober in self._mCharSetProbers: + if not prober: + continue + proberConfidence = prober.get_confidence() + if proberConfidence > maxProberConfidence: + maxProberConfidence = proberConfidence + maxProber = prober + if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD): + self.result = {'encoding': maxProber.get_charset_name(), + 'confidence': maxProber.get_confidence()} + return self.result + + if constants._debug: + sys.stderr.write('no probers hit minimum threshhold\n') + for prober in self._mCharSetProbers[0].mProbers: + if not prober: + continue + sys.stderr.write('%s confidence = %s\n' % + (prober.get_charset_name(), + prober.get_confidence())) diff --git a/requests/packages/charade/utf8prober.py b/requests/packages/chardet/utf8prober.py index 72c8d3d..1c0bb5d 100644 --- a/requests/packages/charade/utf8prober.py +++ b/requests/packages/chardet/utf8prober.py @@ -1,76 +1,76 @@ -######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from . import constants
-from .charsetprober import CharSetProber
-from .codingstatemachine import CodingStateMachine
-from .mbcssm import UTF8SMModel
-
-ONE_CHAR_PROB = 0.5
-
-
-class UTF8Prober(CharSetProber):
- def __init__(self):
- CharSetProber.__init__(self)
- self._mCodingSM = CodingStateMachine(UTF8SMModel)
- self.reset()
-
- def reset(self):
- CharSetProber.reset(self)
- self._mCodingSM.reset()
- self._mNumOfMBChar = 0
-
- def get_charset_name(self):
- return "utf-8"
-
- def feed(self, aBuf):
- for c in aBuf:
- codingState = self._mCodingSM.next_state(c)
- if codingState == constants.eError:
- self._mState = constants.eNotMe
- break
- elif codingState == constants.eItsMe:
- self._mState = constants.eFoundIt
- break
- elif codingState == constants.eStart:
- if self._mCodingSM.get_current_charlen() >= 2:
- self._mNumOfMBChar += 1
-
- if self.get_state() == constants.eDetecting:
- if self.get_confidence() > constants.SHORTCUT_THRESHOLD:
- self._mState = constants.eFoundIt
-
- return self.get_state()
-
- def get_confidence(self):
- unlike = 0.99
- if self._mNumOfMBChar < 6:
- for i in range(0, self._mNumOfMBChar):
- unlike = unlike * ONE_CHAR_PROB
- return 1.0 - unlike
- else:
- return unlike
+######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from . import constants +from .charsetprober import CharSetProber +from .codingstatemachine import CodingStateMachine +from .mbcssm import UTF8SMModel + +ONE_CHAR_PROB = 0.5 + + +class UTF8Prober(CharSetProber): + def __init__(self): + CharSetProber.__init__(self) + self._mCodingSM = CodingStateMachine(UTF8SMModel) + self.reset() + + def reset(self): + CharSetProber.reset(self) + self._mCodingSM.reset() + self._mNumOfMBChar = 0 + + def get_charset_name(self): + return "utf-8" + + def feed(self, aBuf): + for c in aBuf: + codingState = self._mCodingSM.next_state(c) + if codingState == constants.eError: + self._mState = constants.eNotMe + break + elif codingState == constants.eItsMe: + self._mState = constants.eFoundIt + break + elif codingState == constants.eStart: + if self._mCodingSM.get_current_charlen() >= 2: + self._mNumOfMBChar += 1 + + if self.get_state() == constants.eDetecting: + if self.get_confidence() > constants.SHORTCUT_THRESHOLD: + self._mState = constants.eFoundIt + + return self.get_state() + + def get_confidence(self): + unlike = 0.99 + if self._mNumOfMBChar < 6: + for i in range(0, self._mNumOfMBChar): + unlike = unlike * ONE_CHAR_PROB + return 1.0 - unlike + else: + return unlike diff --git a/requests/packages/urllib3/_collections.py b/requests/packages/urllib3/_collections.py index 282b8d5..5907b0d 100644 --- a/requests/packages/urllib3/_collections.py +++ b/requests/packages/urllib3/_collections.py @@ -5,7 +5,16 @@ # the MIT License: http://www.opensource.org/licenses/mit-license.php from collections import MutableMapping -from threading import RLock +try: + from threading import RLock +except ImportError: # Platform-specific: No threads available + class RLock: + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_value, traceback): + pass + try: # Python 2.7+ from collections import OrderedDict diff --git a/requests/packages/urllib3/connection.py b/requests/packages/urllib3/connection.py new file mode 100644 index 0000000..2124774 --- /dev/null +++ b/requests/packages/urllib3/connection.py @@ -0,0 +1,178 @@ +# urllib3/connection.py +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import socket +from socket import timeout as SocketTimeout + +try: # Python 3 + from http.client import HTTPConnection as _HTTPConnection, HTTPException +except ImportError: + from httplib import HTTPConnection as _HTTPConnection, HTTPException + +class DummyConnection(object): + "Used to detect a failed ConnectionCls import." + pass + +try: # Compiled with SSL? + ssl = None + HTTPSConnection = DummyConnection + + class BaseSSLError(BaseException): + pass + + try: # Python 3 + from http.client import HTTPSConnection as _HTTPSConnection + except ImportError: + from httplib import HTTPSConnection as _HTTPSConnection + + import ssl + BaseSSLError = ssl.SSLError + +except (ImportError, AttributeError): # Platform-specific: No SSL. + pass + +from .exceptions import ( + ConnectTimeoutError, +) +from .packages.ssl_match_hostname import match_hostname +from .util import ( + assert_fingerprint, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, +) + + +port_by_scheme = { + 'http': 80, + 'https': 443, +} + + +class HTTPConnection(_HTTPConnection, object): + default_port = port_by_scheme['http'] + + # By default, disable Nagle's Algorithm. + tcp_nodelay = 1 + + def _new_conn(self): + """ Establish a socket connection and set nodelay settings on it + + :return: a new socket connection + """ + try: + conn = socket.create_connection( + (self.host, self.port), + self.timeout, + self.source_address, + ) + except AttributeError: # Python 2.6 + conn = socket.create_connection( + (self.host, self.port), + self.timeout, + ) + conn.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, + self.tcp_nodelay) + return conn + + def _prepare_conn(self, conn): + self.sock = conn + if self._tunnel_host: + # TODO: Fix tunnel so it doesn't depend on self.sock state. + self._tunnel() + + def connect(self): + conn = self._new_conn() + self._prepare_conn(conn) + + +class HTTPSConnection(HTTPConnection): + default_port = port_by_scheme['https'] + + def __init__(self, host, port=None, key_file=None, cert_file=None, + strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None): + try: + HTTPConnection.__init__(self, host, port, strict, timeout, source_address) + except TypeError: # Python 2.6 + HTTPConnection.__init__(self, host, port, strict, timeout) + self.key_file = key_file + self.cert_file = cert_file + + def connect(self): + conn = self._new_conn() + self._prepare_conn(conn) + self.sock = ssl.wrap_socket(conn, self.key_file, self.cert_file) + + +class VerifiedHTTPSConnection(HTTPSConnection): + """ + Based on httplib.HTTPSConnection but wraps the socket with + SSL certification. + """ + cert_reqs = None + ca_certs = None + ssl_version = None + + def set_cert(self, key_file=None, cert_file=None, + cert_reqs=None, ca_certs=None, + assert_hostname=None, assert_fingerprint=None): + + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = cert_reqs + self.ca_certs = ca_certs + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + + def connect(self): + # Add certificate verification + try: + sock = socket.create_connection( + address=(self.host, self.port), + timeout=self.timeout, + ) + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) + + sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, + self.tcp_nodelay) + + resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) + resolved_ssl_version = resolve_ssl_version(self.ssl_version) + + # the _tunnel_host attribute was added in python 2.6.3 (via + # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do + # not have them. + if getattr(self, '_tunnel_host', None): + self.sock = sock + # Calls self._set_hostport(), so self.host is + # self._tunnel_host below. + self._tunnel() + + # Wrap socket using verification with the root certs in + # trusted_root_certs + self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, + cert_reqs=resolved_cert_reqs, + ca_certs=self.ca_certs, + server_hostname=self.host, + ssl_version=resolved_ssl_version) + + if resolved_cert_reqs != ssl.CERT_NONE: + if self.assert_fingerprint: + assert_fingerprint(self.sock.getpeercert(binary_form=True), + self.assert_fingerprint) + elif self.assert_hostname is not False: + match_hostname(self.sock.getpeercert(), + self.assert_hostname or self.host) + + +if ssl: + # Make a copy for testing. + UnverifiedHTTPSConnection = HTTPSConnection + HTTPSConnection = VerifiedHTTPSConnection diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py index 691d4e2..243d700 100644 --- a/requests/packages/urllib3/connectionpool.py +++ b/requests/packages/urllib3/connectionpool.py @@ -11,39 +11,12 @@ from socket import error as SocketError, timeout as SocketTimeout import socket try: # Python 3 - from http.client import HTTPConnection, HTTPException - from http.client import HTTP_PORT, HTTPS_PORT -except ImportError: - from httplib import HTTPConnection, HTTPException - from httplib import HTTP_PORT, HTTPS_PORT - -try: # Python 3 from queue import LifoQueue, Empty, Full except ImportError: from Queue import LifoQueue, Empty, Full import Queue as _ # Platform-specific: Windows -try: # Compiled with SSL? - HTTPSConnection = object - - class BaseSSLError(BaseException): - pass - - ssl = None - - try: # Python 3 - from http.client import HTTPSConnection - except ImportError: - from httplib import HTTPSConnection - - import ssl - BaseSSLError = ssl.SSLError - -except (ImportError, AttributeError): # Platform-specific: No SSL. - pass - - from .exceptions import ( ClosedPoolError, ConnectTimeoutError, @@ -51,94 +24,34 @@ from .exceptions import ( HostChangedError, MaxRetryError, SSLError, + TimeoutError, ReadTimeoutError, ProxyError, ) -from .packages.ssl_match_hostname import CertificateError, match_hostname +from .packages.ssl_match_hostname import CertificateError from .packages import six +from .connection import ( + port_by_scheme, + DummyConnection, + HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection, + HTTPException, BaseSSLError, +) from .request import RequestMethods from .response import HTTPResponse from .util import ( assert_fingerprint, get_host, is_connection_dropped, - resolve_cert_reqs, - resolve_ssl_version, - ssl_wrap_socket, Timeout, ) + xrange = six.moves.xrange log = logging.getLogger(__name__) _Default = object() -port_by_scheme = { - 'http': HTTP_PORT, - 'https': HTTPS_PORT, -} - - -## Connection objects (extension of httplib) - -class VerifiedHTTPSConnection(HTTPSConnection): - """ - Based on httplib.HTTPSConnection but wraps the socket with - SSL certification. - """ - cert_reqs = None - ca_certs = None - ssl_version = None - - def set_cert(self, key_file=None, cert_file=None, - cert_reqs=None, ca_certs=None, - assert_hostname=None, assert_fingerprint=None): - - self.key_file = key_file - self.cert_file = cert_file - self.cert_reqs = cert_reqs - self.ca_certs = ca_certs - self.assert_hostname = assert_hostname - self.assert_fingerprint = assert_fingerprint - - def connect(self): - # Add certificate verification - try: - sock = socket.create_connection( - address=(self.host, self.port), - timeout=self.timeout) - except SocketTimeout: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, self.timeout)) - - resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) - resolved_ssl_version = resolve_ssl_version(self.ssl_version) - - if self._tunnel_host: - self.sock = sock - # Calls self._set_hostport(), so self.host is - # self._tunnel_host below. - self._tunnel() - - # Wrap socket using verification with the root certs in - # trusted_root_certs - self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, - cert_reqs=resolved_cert_reqs, - ca_certs=self.ca_certs, - server_hostname=self.host, - ssl_version=resolved_ssl_version) - - if resolved_cert_reqs != ssl.CERT_NONE: - if self.assert_fingerprint: - assert_fingerprint(self.sock.getpeercert(binary_form=True), - self.assert_fingerprint) - elif self.assert_hostname is not False: - match_hostname(self.sock.getpeercert(), - self.assert_hostname or self.host) - - ## Pool objects class ConnectionPool(object): @@ -218,6 +131,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ scheme = 'http' + ConnectionCls = HTTPConnection def __init__(self, host, port=None, strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, @@ -250,19 +164,24 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def _new_conn(self): """ - Return a fresh :class:`httplib.HTTPConnection`. + Return a fresh :class:`HTTPConnection`. """ self.num_connections += 1 log.info("Starting new HTTP connection (%d): %s" % (self.num_connections, self.host)) + extra_params = {} if not six.PY3: # Python 2 extra_params['strict'] = self.strict - return HTTPConnection(host=self.host, port=self.port, - timeout=self.timeout.connect_timeout, - **extra_params) - + conn = self.ConnectionCls(host=self.host, port=self.port, + timeout=self.timeout.connect_timeout, + **extra_params) + if self.proxy is not None: + # Enable Nagle's algorithm for proxies, to avoid packet + # fragmentation. + conn.tcp_nodelay = 0 + return conn def _get_conn(self, timeout=None): """ @@ -341,7 +260,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ - Perform a request on a given httplib connection object taken from our + Perform a request on a given urllib connection object taken from our pool. :param conn: @@ -362,7 +281,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): timeout_obj.start_connect() conn.timeout = timeout_obj.connect_timeout # conn.request() calls httplib.*.request, not the method in - # request.py. It also calls makefile (recv) on the socket + # urllib3.request. It also calls makefile (recv) on the socket. conn.request(method, url, **httplib_request_kw) except SocketTimeout: raise ConnectTimeoutError( @@ -371,11 +290,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Reset the timeout for the recv() on the socket read_timeout = timeout_obj.read_timeout - log.debug("Setting read timeout to %s" % read_timeout) + # App Engine doesn't have a sock attr - if hasattr(conn, 'sock') and \ - read_timeout is not None and \ - read_timeout is not Timeout.DEFAULT_TIMEOUT: + if hasattr(conn, 'sock'): # In Python 3 socket.py will catch EAGAIN and return None when you # try and read into the file pointer created by http.client, which # instead raises a BadStatusLine exception. Instead of catching @@ -385,7 +302,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) - conn.sock.settimeout(read_timeout) + if read_timeout is Timeout.DEFAULT_TIMEOUT: + conn.sock.settimeout(socket.getdefaulttimeout()) + else: # None or a value + conn.sock.settimeout(read_timeout) # Receive the response from the server try: @@ -397,6 +317,16 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) + except BaseSSLError as e: + # Catch possible read timeouts thrown as SSL errors. If not the + # case, rethrow the original. We need to do this because of: + # http://bugs.python.org/issue10272 + if 'timed out' in str(e) or \ + 'did not complete (read)' in str(e): # Python 2.6 + raise ReadTimeoutError(self, url, "Read timed out.") + + raise + except SocketError as e: # Platform-specific: Python 2 # See the above comment about EAGAIN in Python 3. In Python 2 we # have to specifically catch it and throw the timeout error @@ -404,8 +334,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) - raise + raise # AppEngine doesn't have a version attr. http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') @@ -441,9 +371,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # TODO: Add optional support for socket.gethostbyname checking. scheme, host, port = get_host(url) + # Use explicit default port for comparison when none is given if self.port and not port: - # Use explicit default port for comparison when none is given. port = port_by_scheme.get(scheme) + elif not self.port and port == port_by_scheme.get(scheme): + port = None return (scheme, host, port) == (self.scheme, self.host, self.port) @@ -531,6 +463,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn = None + # Merge the proxy headers. Only do this in HTTP. We have to copy the + # headers dict so we can safely change it without those changes being + # reflected in anyone else's copy. + if self.scheme == 'http': + headers = headers.copy() + headers.update(self.proxy_headers) + try: # Request a connection from the queue conn = self._get_conn(timeout=pool_timeout) @@ -559,36 +498,36 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): except Empty: # Timed out by queue - raise ReadTimeoutError( - self, url, "Read timed out, no pool connections are available.") - - except SocketTimeout: - # Timed out by socket - raise ReadTimeoutError(self, url, "Read timed out.") + raise EmptyPoolError(self, "No pool connections are available.") except BaseSSLError as e: - # SSL certificate error - if 'timed out' in str(e) or \ - 'did not complete (read)' in str(e): # Platform-specific: Python 2.6 - raise ReadTimeoutError(self, url, "Read timed out.") raise SSLError(e) except CertificateError as e: # Name mismatch raise SSLError(e) - except (HTTPException, SocketError) as e: - if isinstance(e, SocketError) and self.proxy is not None: - raise ProxyError('Cannot connect to proxy. ' - 'Socket error: %s.' % e) + except TimeoutError as e: + # Connection broken, discard. + conn = None + # Save the error off for retry logic. + err = e + if retries == 0: + raise + + except (HTTPException, SocketError) as e: # Connection broken, discard. It will be replaced next _get_conn(). conn = None # This is necessary so we can access e below err = e if retries == 0: - raise MaxRetryError(self, url, e) + if isinstance(e, SocketError) and self.proxy is not None: + raise ProxyError('Cannot connect to proxy. ' + 'Socket error: %s.' % e) + else: + raise MaxRetryError(self, url, e) finally: if release_conn: @@ -626,7 +565,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): When Python is compiled with the :mod:`ssl` module, then :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, - instead of :class:`httplib.HTTPSConnection`. + instead of :class:`.HTTPSConnection`. :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, ``assert_hostname`` and ``host`` in this order to verify connections. @@ -639,6 +578,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): """ scheme = 'https' + ConnectionCls = HTTPSConnection def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, @@ -658,33 +598,33 @@ class HTTPSConnectionPool(HTTPConnectionPool): self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint - def _prepare_conn(self, connection): + def _prepare_conn(self, conn): """ Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket` and establish the tunnel if proxy is used. """ - if isinstance(connection, VerifiedHTTPSConnection): - connection.set_cert(key_file=self.key_file, - cert_file=self.cert_file, - cert_reqs=self.cert_reqs, - ca_certs=self.ca_certs, - assert_hostname=self.assert_hostname, - assert_fingerprint=self.assert_fingerprint) - connection.ssl_version = self.ssl_version + if isinstance(conn, VerifiedHTTPSConnection): + conn.set_cert(key_file=self.key_file, + cert_file=self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint) + conn.ssl_version = self.ssl_version if self.proxy is not None: # Python 2.7+ try: - set_tunnel = connection.set_tunnel + set_tunnel = conn.set_tunnel except AttributeError: # Platform-specific: Python 2.6 - set_tunnel = connection._set_tunnel + set_tunnel = conn._set_tunnel set_tunnel(self.host, self.port, self.proxy_headers) # Establish tunnel connection early, because otherwise httplib # would improperly set Host: header to proxy's IP:port. - connection.connect() + conn.connect() - return connection + return conn def _new_conn(self): """ @@ -694,28 +634,30 @@ class HTTPSConnectionPool(HTTPConnectionPool): log.info("Starting new HTTPS connection (%d): %s" % (self.num_connections, self.host)) + if not self.ConnectionCls or self.ConnectionCls is DummyConnection: + # Platform-specific: Python without ssl + raise SSLError("Can't connect to HTTPS URL because the SSL " + "module is not available.") + actual_host = self.host actual_port = self.port if self.proxy is not None: actual_host = self.proxy.host actual_port = self.proxy.port - if not ssl: # Platform-specific: Python compiled without +ssl - if not HTTPSConnection or HTTPSConnection is object: - raise SSLError("Can't connect to HTTPS URL because the SSL " - "module is not available.") - connection_class = HTTPSConnection - else: - connection_class = VerifiedHTTPSConnection - extra_params = {} if not six.PY3: # Python 2 extra_params['strict'] = self.strict - connection = connection_class(host=actual_host, port=actual_port, - timeout=self.timeout.connect_timeout, - **extra_params) - return self._prepare_conn(connection) + conn = self.ConnectionCls(host=actual_host, port=actual_port, + timeout=self.timeout.connect_timeout, + **extra_params) + if self.proxy is not None: + # Enable Nagle's algorithm for proxies, to avoid packet + # fragmentation. + conn.tcp_nodelay = 0 + + return self._prepare_conn(conn) def connection_from_url(url, **kw): diff --git a/requests/packages/urllib3/contrib/pyopenssl.py b/requests/packages/urllib3/contrib/pyopenssl.py index d43bcd6..d9bda15 100644 --- a/requests/packages/urllib3/contrib/pyopenssl.py +++ b/requests/packages/urllib3/contrib/pyopenssl.py @@ -1,4 +1,4 @@ -'''SSL with SNI-support for Python 2. +'''SSL with SNI_-support for Python 2. This needs the following packages installed: @@ -18,17 +18,37 @@ your application begins using ``urllib3``, like this:: Now you can use :mod:`urllib3` as you normally would, and it will support SNI when the required modules are installed. + +Activating this module also has the positive side effect of disabling SSL/TLS +encryption in Python 2 (see `CRIME attack`_). + +If you want to configure the default list of supported cipher suites, you can +set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable. + +Module Variables +---------------- + +:var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites. + Default: ``EECDH+ECDSA+AESGCM EECDH+aRSA+AESGCM EECDH+ECDSA+SHA256 + EECDH+aRSA+SHA256 EECDH+aRSA+RC4 EDH+aRSA EECDH RC4 !aNULL !eNULL !LOW !3DES + !MD5 !EXP !PSK !SRP !DSS'`` + +.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication +.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) + ''' from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT -from ndg.httpsclient.subj_alt_name import SubjectAltName +from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName import OpenSSL.SSL from pyasn1.codec.der import decoder as der_decoder +from pyasn1.type import univ, constraint from socket import _fileobject import ssl +import select from cStringIO import StringIO -from .. import connectionpool +from .. import connection from .. import util __all__ = ['inject_into_urllib3', 'extract_from_urllib3'] @@ -49,26 +69,44 @@ _openssl_verify = { + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, } +# Default SSL/TLS cipher list. +# Recommendation by https://community.qualys.com/blogs/securitylabs/2013/08/05/ +# configuring-apache-nginx-and-openssl-for-forward-secrecy +DEFAULT_SSL_CIPHER_LIST = 'EECDH+ECDSA+AESGCM EECDH+aRSA+AESGCM ' + \ + 'EECDH+ECDSA+SHA256 EECDH+aRSA+SHA256 EECDH+aRSA+RC4 EDH+aRSA ' + \ + 'EECDH RC4 !aNULL !eNULL !LOW !3DES !MD5 !EXP !PSK !SRP !DSS' + orig_util_HAS_SNI = util.HAS_SNI -orig_connectionpool_ssl_wrap_socket = connectionpool.ssl_wrap_socket +orig_connection_ssl_wrap_socket = connection.ssl_wrap_socket def inject_into_urllib3(): 'Monkey-patch urllib3 with PyOpenSSL-backed SSL-support.' - connectionpool.ssl_wrap_socket = ssl_wrap_socket + connection.ssl_wrap_socket = ssl_wrap_socket util.HAS_SNI = HAS_SNI def extract_from_urllib3(): 'Undo monkey-patching by :func:`inject_into_urllib3`.' - connectionpool.ssl_wrap_socket = orig_connectionpool_ssl_wrap_socket + connection.ssl_wrap_socket = orig_connection_ssl_wrap_socket util.HAS_SNI = orig_util_HAS_SNI ### Note: This is a slightly bug-fixed version of same from ndg-httpsclient. +class SubjectAltName(BaseSubjectAltName): + '''ASN.1 implementation for subjectAltNames support''' + + # There is no limit to how many SAN certificates a certificate may have, + # however this needs to have some limit so we'll set an arbitrarily high + # limit. + sizeSpec = univ.SequenceOf.sizeSpec + \ + constraint.ValueSizeConstraint(1, 1024) + + +### Note: This is a slightly bug-fixed version of same from ndg-httpsclient. def get_subj_alt_name(peer_cert): # Search through extensions dns_name = [] @@ -329,6 +367,13 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, except OpenSSL.SSL.Error as e: raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e) + # Disable TLS compression to migitate CRIME attack (issue #309) + OP_NO_COMPRESSION = 0x20000 + ctx.set_options(OP_NO_COMPRESSION) + + # Set list of supported ciphersuites. + ctx.set_cipher_list(DEFAULT_SSL_CIPHER_LIST) + cnx = OpenSSL.SSL.Connection(ctx, sock) cnx.set_tlsext_host_name(server_hostname) cnx.set_connect_state() @@ -336,6 +381,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, try: cnx.do_handshake() except OpenSSL.SSL.WantReadError: + select.select([sock], [], []) continue except OpenSSL.SSL.Error as e: raise ssl.SSLError('bad handshake', e) diff --git a/requests/packages/urllib3/filepost.py b/requests/packages/urllib3/filepost.py index 4575582..e8b30bd 100644 --- a/requests/packages/urllib3/filepost.py +++ b/requests/packages/urllib3/filepost.py @@ -46,16 +46,15 @@ def iter_field_objects(fields): def iter_fields(fields): """ - Iterate over fields. + .. deprecated:: 1.6 - .. deprecated :: + Iterate over fields. - The addition of `~urllib3.fields.RequestField` makes this function - obsolete. Instead, use :func:`iter_field_objects`, which returns - `~urllib3.fields.RequestField` objects, instead. + The addition of :class:`~urllib3.fields.RequestField` makes this function + obsolete. Instead, use :func:`iter_field_objects`, which returns + :class:`~urllib3.fields.RequestField` objects. Supports list of (k, v) tuples and dicts. - """ if isinstance(fields, dict): return ((k, v) for k, v in six.iteritems(fields)) diff --git a/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py b/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py index 2d61ac2..3aa5b2e 100644 --- a/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py +++ b/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py @@ -1,98 +1,13 @@ -"""The match_hostname() function from Python 3.2, essential when using SSL.""" - -import re - -__version__ = '3.2.2' - -class CertificateError(ValueError): - pass - -def _dnsname_match(dn, hostname, max_wildcards=1): - """Matching according to RFC 6125, section 6.4.3 - - http://tools.ietf.org/html/rfc6125#section-6.4.3 - """ - pats = [] - if not dn: - return False - - parts = dn.split(r'.') - leftmost = parts[0] - - wildcards = leftmost.count('*') - if wildcards > max_wildcards: - # Issue #17980: avoid denials of service by refusing more - # than one wildcard per fragment. A survery of established - # policy among SSL implementations showed it to be a - # reasonable choice. - raise CertificateError( - "too many wildcards in certificate DNS name: " + repr(dn)) - - # speed up common case w/o wildcards - if not wildcards: - return dn.lower() == hostname.lower() - - # RFC 6125, section 6.4.3, subitem 1. - # The client SHOULD NOT attempt to match a presented identifier in which - # the wildcard character comprises a label other than the left-most label. - if leftmost == '*': - # When '*' is a fragment by itself, it matches a non-empty dotless - # fragment. - pats.append('[^.]+') - elif leftmost.startswith('xn--') or hostname.startswith('xn--'): - # RFC 6125, section 6.4.3, subitem 3. - # The client SHOULD NOT attempt to match a presented identifier - # where the wildcard character is embedded within an A-label or - # U-label of an internationalized domain name. - pats.append(re.escape(leftmost)) - else: - # Otherwise, '*' matches any dotless string, e.g. www* - pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) - - # add the remaining fragments, ignore any wildcards - for frag in parts[1:]: - pats.append(re.escape(frag)) - - pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) - return pat.match(hostname) - - -def match_hostname(cert, hostname): - """Verify that *cert* (in decoded format as returned by - SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 - rules are followed, but IP addresses are not accepted for *hostname*. - - CertificateError is raised on failure. On success, the function - returns nothing. - """ - if not cert: - raise ValueError("empty or no certificate") - dnsnames = [] - san = cert.get('subjectAltName', ()) - for key, value in san: - if key == 'DNS': - if _dnsname_match(value, hostname): - return - dnsnames.append(value) - if not dnsnames: - # The subject is only checked when there is no dNSName entry - # in subjectAltName - for sub in cert.get('subject', ()): - for key, value in sub: - # XXX according to RFC 2818, the most specific Common Name - # must be used. - if key == 'commonName': - if _dnsname_match(value, hostname): - return - dnsnames.append(value) - if len(dnsnames) > 1: - raise CertificateError("hostname %r " - "doesn't match either of %s" - % (hostname, ', '.join(map(repr, dnsnames)))) - elif len(dnsnames) == 1: - raise CertificateError("hostname %r " - "doesn't match %r" - % (hostname, dnsnames[0])) - else: - raise CertificateError("no appropriate commonName or " - "subjectAltName fields were found") +try: + # Python 3.2+ + from ssl import CertificateError, match_hostname +except ImportError: + try: + # Backport of the function from a pypi module + from backports.ssl_match_hostname import CertificateError, match_hostname + except ImportError: + # Our vendored copy + from _implementation import CertificateError, match_hostname + +# Not needed, but documenting what we provide. +__all__ = ('CertificateError', 'match_hostname') diff --git a/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py b/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py new file mode 100644 index 0000000..52f4287 --- /dev/null +++ b/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py @@ -0,0 +1,105 @@ +"""The match_hostname() function from Python 3.3.3, essential when using SSL.""" + +# Note: This file is under the PSF license as the code comes from the python +# stdlib. http://docs.python.org/3/license.html + +import re + +__version__ = '3.4.0.2' + +class CertificateError(ValueError): + pass + + +def _dnsname_match(dn, hostname, max_wildcards=1): + """Matching according to RFC 6125, section 6.4.3 + + http://tools.ietf.org/html/rfc6125#section-6.4.3 + """ + pats = [] + if not dn: + return False + + # Ported from python3-syntax: + # leftmost, *remainder = dn.split(r'.') + parts = dn.split(r'.') + leftmost = parts[0] + remainder = parts[1:] + + wildcards = leftmost.count('*') + if wildcards > max_wildcards: + # Issue #17980: avoid denials of service by refusing more + # than one wildcard per fragment. A survey of established + # policy among SSL implementations showed it to be a + # reasonable choice. + raise CertificateError( + "too many wildcards in certificate DNS name: " + repr(dn)) + + # speed up common case w/o wildcards + if not wildcards: + return dn.lower() == hostname.lower() + + # RFC 6125, section 6.4.3, subitem 1. + # The client SHOULD NOT attempt to match a presented identifier in which + # the wildcard character comprises a label other than the left-most label. + if leftmost == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + elif leftmost.startswith('xn--') or hostname.startswith('xn--'): + # RFC 6125, section 6.4.3, subitem 3. + # The client SHOULD NOT attempt to match a presented identifier + # where the wildcard character is embedded within an A-label or + # U-label of an internationalized domain name. + pats.append(re.escape(leftmost)) + else: + # Otherwise, '*' matches any dotless string, e.g. www* + pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) + + # add the remaining fragments, ignore any wildcards + for frag in remainder: + pats.append(re.escape(frag)) + + pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + return pat.match(hostname) + + +def match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 + rules are followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError("empty or no certificate") + dnsnames = [] + san = cert.get('subjectAltName', ()) + for key, value in san: + if key == 'DNS': + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if not dnsnames: + # The subject is only checked when there is no dNSName entry + # in subjectAltName + for sub in cert.get('subject', ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == 'commonName': + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if len(dnsnames) > 1: + raise CertificateError("hostname %r " + "doesn't match either of %s" + % (hostname, ', '.join(map(repr, dnsnames)))) + elif len(dnsnames) == 1: + raise CertificateError("hostname %r " + "doesn't match %r" + % (hostname, dnsnames[0])) + else: + raise CertificateError("no appropriate commonName or " + "subjectAltName fields were found") diff --git a/requests/packages/urllib3/poolmanager.py b/requests/packages/urllib3/poolmanager.py index e7f8667..f18ff2b 100644 --- a/requests/packages/urllib3/poolmanager.py +++ b/requests/packages/urllib3/poolmanager.py @@ -1,5 +1,5 @@ # urllib3/poolmanager.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -176,7 +176,7 @@ class ProxyManager(PoolManager): Behaves just like :class:`PoolManager`, but sends all requests through the defined proxy, using the CONNECT method for HTTPS URLs. - :param poxy_url: + :param proxy_url: The URL of the proxy to be used. :param proxy_headers: @@ -245,12 +245,11 @@ class ProxyManager(PoolManager): u = parse_url(url) if u.scheme == "http": - # It's too late to set proxy headers on per-request basis for - # tunnelled HTTPS connections, should use - # constructor's proxy_headers instead. + # For proxied HTTPS requests, httplib sets the necessary headers + # on the CONNECT to the proxy. For HTTP, we'll definitely + # need to set 'Host' at the very least. kw['headers'] = self._set_proxy_headers(url, kw.get('headers', self.headers)) - kw['headers'].update(self.proxy_headers) return super(ProxyManager, self).urlopen(method, url, redirect, **kw) diff --git a/requests/packages/urllib3/request.py b/requests/packages/urllib3/request.py index 66a9a0e..2a92cc2 100644 --- a/requests/packages/urllib3/request.py +++ b/requests/packages/urllib3/request.py @@ -45,7 +45,6 @@ class RequestMethods(object): """ _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) - _encode_body_methods = set(['PATCH', 'POST', 'PUT', 'TRACE']) def __init__(self, headers=None): self.headers = headers or {} diff --git a/requests/packages/urllib3/response.py b/requests/packages/urllib3/response.py index 4efff5a..6a1fe1a 100644 --- a/requests/packages/urllib3/response.py +++ b/requests/packages/urllib3/response.py @@ -90,6 +90,7 @@ class HTTPResponse(io.IOBase): self._body = body if body and isinstance(body, basestring) else None self._fp = None self._original_response = original_response + self._fp_bytes_read = 0 self._pool = pool self._connection = connection @@ -129,6 +130,14 @@ class HTTPResponse(io.IOBase): if self._fp: return self.read(cache_content=True) + def tell(self): + """ + Obtain the number of bytes pulled over the wire so far. May differ from + the amount of content returned by :meth:``HTTPResponse.read`` if bytes + are encoded on the wire (e.g, compressed). + """ + return self._fp_bytes_read + def read(self, amt=None, decode_content=None, cache_content=False): """ Similar to :meth:`httplib.HTTPResponse.read`, but with two additional @@ -183,6 +192,8 @@ class HTTPResponse(io.IOBase): self._fp.close() flush_decoder = True + self._fp_bytes_read += len(data) + try: if decode_content and self._decoder: data = self._decoder.decompress(data) diff --git a/requests/packages/urllib3/util.py b/requests/packages/urllib3/util.py index 266c9ed..bd26631 100644 --- a/requests/packages/urllib3/util.py +++ b/requests/packages/urllib3/util.py @@ -80,14 +80,13 @@ class Timeout(object): :type read: integer, float, or None :param total: - The maximum amount of time to wait for an HTTP request to connect and - return. This combines the connect and read timeouts into one. In the + This combines the connect and read timeouts into one; the read timeout + will be set to the time leftover from the connect attempt. In the event that both a connect timeout and a total are specified, or a read timeout and a total are specified, the shorter timeout will be applied. Defaults to None. - :type total: integer, float, or None .. note:: @@ -101,18 +100,23 @@ class Timeout(object): `total`. In addition, the read and total timeouts only measure the time between - read operations on the socket connecting the client and the server, not - the total amount of time for the request to return a complete response. - As an example, you may want a request to return within 7 seconds or - fail, so you set the ``total`` timeout to 7 seconds. If the server - sends one byte to you every 5 seconds, the request will **not** trigger - time out. This case is admittedly rare. + read operations on the socket connecting the client and the server, + not the total amount of time for the request to return a complete + response. For most requests, the timeout is raised because the server + has not sent the first byte in the specified time. This is not always + the case; if a server streams one byte every fifteen seconds, a timeout + of 20 seconds will not ever trigger, even though the request will + take several minutes to complete. + + If your goal is to cut off any request after a set amount of wall clock + time, consider having a second "watcher" thread to cut off a slow + request. """ #: A sentinel object representing the default timeout value DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT - def __init__(self, connect=_Default, read=_Default, total=None): + def __init__(self, total=None, connect=_Default, read=_Default): self._connect = self._validate_timeout(connect, 'connect') self._read = self._validate_timeout(read, 'read') self.total = self._validate_timeout(total, 'total') @@ -372,7 +376,8 @@ def parse_url(url): # Auth if '@' in url: - auth, url = url.split('@', 1) + # Last '@' denotes end of auth part + auth, url = url.rsplit('@', 1) # IPv6 if url and url[0] == '[': @@ -386,10 +391,14 @@ def parse_url(url): if not host: host = _host - if not port.isdigit(): - raise LocationParseError("Failed to parse: %s" % url) - - port = int(port) + if port: + # If given, ports must be integers. + if not port.isdigit(): + raise LocationParseError("Failed to parse: %s" % url) + port = int(port) + else: + # Blank ports are cool, too. (rfc3986#section-3.2.3) + port = None elif not host and url: host = url @@ -417,7 +426,7 @@ def get_host(url): def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, - basic_auth=None): + basic_auth=None, proxy_basic_auth=None): """ Shortcuts for generating request headers. @@ -438,6 +447,10 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, Colon-separated username:password string for 'authorization: basic ...' auth header. + :param proxy_basic_auth: + Colon-separated username:password string for 'proxy-authorization: basic ...' + auth header. + Example: :: >>> make_headers(keep_alive=True, user_agent="Batman/1.0") @@ -465,6 +478,10 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, headers['authorization'] = 'Basic ' + \ b64encode(six.b(basic_auth)).decode('utf-8') + if proxy_basic_auth: + headers['proxy-authorization'] = 'Basic ' + \ + b64encode(six.b(proxy_basic_auth)).decode('utf-8') + return headers @@ -603,6 +620,11 @@ if SSLContext is not None: # Python 3.2+ """ context = SSLContext(ssl_version) context.verify_mode = cert_reqs + + # Disable TLS compression to migitate CRIME attack (issue #309) + OP_NO_COMPRESSION = 0x20000 + context.options |= OP_NO_COMPRESSION + if ca_certs: try: context.load_verify_locations(ca_certs) |