diff options
Diffstat (limited to 'lib/testtools/testtools/compat.py')
-rw-r--r-- | lib/testtools/testtools/compat.py | 246 |
1 files changed, 246 insertions, 0 deletions
diff --git a/lib/testtools/testtools/compat.py b/lib/testtools/testtools/compat.py new file mode 100644 index 0000000000..0dd2fe8bf9 --- /dev/null +++ b/lib/testtools/testtools/compat.py @@ -0,0 +1,246 @@ +# Copyright (c) 2008-2010 testtools developers. See LICENSE for details. + +"""Compatibility support for python 2 and 3.""" + + +import codecs +import linecache +import locale +import os +import re +import sys +import traceback + +__metaclass__ = type +__all__ = [ + '_b', + '_u', + 'advance_iterator', + 'str_is_unicode', + 'unicode_output_stream', + ] + + +__u_doc = """A function version of the 'u' prefix. + +This is needed becayse the u prefix is not usable in Python 3 but is required +in Python 2 to get a unicode object. + +To migrate code that was written as u'\u1234' in Python 2 to 2+3 change +it to be _u('\u1234'). The Python 3 interpreter will decode it +appropriately and the no-op _u for Python 3 lets it through, in Python +2 we then call unicode-escape in the _u function. +""" + +if sys.version_info > (3, 0): + def _u(s): + return s + _r = ascii + def _b(s): + """A byte literal.""" + return s.encode("latin-1") + advance_iterator = next + def istext(x): + return isinstance(x, str) + def classtypes(): + return (type,) + str_is_unicode = True +else: + def _u(s): + # The double replace mangling going on prepares the string for + # unicode-escape - \foo is preserved, \u and \U are decoded. + return (s.replace("\\", "\\\\").replace("\\\\u", "\\u") + .replace("\\\\U", "\\U").decode("unicode-escape")) + _r = repr + def _b(s): + return s + advance_iterator = lambda it: it.next() + def istext(x): + return isinstance(x, basestring) + def classtypes(): + import types + return (type, types.ClassType) + str_is_unicode = sys.platform == "cli" + +_u.__doc__ = __u_doc + + +def unicode_output_stream(stream): + """Get wrapper for given stream that writes any unicode without exception + + Characters that can't be coerced to the encoding of the stream, or 'ascii' + if valid encoding is not found, will be replaced. The original stream may + be returned in situations where a wrapper is determined unneeded. + + The wrapper only allows unicode to be written, not non-ascii bytestrings, + which is a good thing to ensure sanity and sanitation. + """ + if sys.platform == "cli": + # Best to never encode before writing in IronPython + return stream + try: + writer = codecs.getwriter(stream.encoding or "") + except (AttributeError, LookupError): + # GZ 2010-06-16: Python 3 StringIO ends up here, but probably needs + # different handling as it doesn't want bytestrings + return codecs.getwriter("ascii")(stream, "replace") + if writer.__module__.rsplit(".", 1)[1].startswith("utf"): + # The current stream has a unicode encoding so no error handler is needed + return stream + if sys.version_info > (3, 0): + # Python 3 doesn't seem to make this easy, handle a common case + try: + return stream.__class__(stream.buffer, stream.encoding, "replace", + stream.newlines, stream.line_buffering) + except AttributeError: + pass + return writer(stream, "replace") + + +# The default source encoding is actually "iso-8859-1" until Python 2.5 but +# using non-ascii causes a deprecation warning in 2.4 and it's cleaner to +# treat all versions the same way +_default_source_encoding = "ascii" + +# Pattern specified in <http://www.python.org/dev/peps/pep-0263/> +_cookie_search=re.compile("coding[:=]\s*([-\w.]+)").search + +def _detect_encoding(lines): + """Get the encoding of a Python source file from a list of lines as bytes + + This function does less than tokenize.detect_encoding added in Python 3 as + it does not attempt to raise a SyntaxError when the interpreter would, it + just wants the encoding of a source file Python has already compiled and + determined is valid. + """ + if not lines: + return _default_source_encoding + if lines[0].startswith("\xef\xbb\xbf"): + # Source starting with UTF-8 BOM is either UTF-8 or a SyntaxError + return "utf-8" + # Only the first two lines of the source file are examined + magic = _cookie_search("".join(lines[:2])) + if magic is None: + return _default_source_encoding + encoding = magic.group(1) + try: + codecs.lookup(encoding) + except LookupError: + # Some codecs raise something other than LookupError if they don't + # support the given error handler, but not the text ones that could + # actually be used for Python source code + return _default_source_encoding + return encoding + + +class _EncodingTuple(tuple): + """A tuple type that can have an encoding attribute smuggled on""" + + +def _get_source_encoding(filename): + """Detect, cache and return the encoding of Python source at filename""" + try: + return linecache.cache[filename].encoding + except (AttributeError, KeyError): + encoding = _detect_encoding(linecache.getlines(filename)) + if filename in linecache.cache: + newtuple = _EncodingTuple(linecache.cache[filename]) + newtuple.encoding = encoding + linecache.cache[filename] = newtuple + return encoding + + +def _get_exception_encoding(): + """Return the encoding we expect messages from the OS to be encoded in""" + if os.name == "nt": + # GZ 2010-05-24: Really want the codepage number instead, the error + # handling of standard codecs is more deterministic + return "mbcs" + # GZ 2010-05-23: We need this call to be after initialisation, but there's + # no benefit in asking more than once as it's a global + # setting that can change after the message is formatted. + return locale.getlocale(locale.LC_MESSAGES)[1] or "ascii" + + +def _exception_to_text(evalue): + """Try hard to get a sensible text value out of an exception instance""" + try: + return unicode(evalue) + except KeyboardInterrupt: + raise + except: + # Apparently this is what traceback._some_str does. Sigh - RBC 20100623 + pass + try: + return str(evalue).decode(_get_exception_encoding(), "replace") + except KeyboardInterrupt: + raise + except: + # Apparently this is what traceback._some_str does. Sigh - RBC 20100623 + pass + # Okay, out of ideas, let higher level handle it + return None + + +# GZ 2010-05-23: This function is huge and horrible and I welcome suggestions +# on the best way to break it up +_TB_HEADER = _u('Traceback (most recent call last):\n') +def _format_exc_info(eclass, evalue, tb, limit=None): + """Format a stack trace and the exception information as unicode + + Compatibility function for Python 2 which ensures each component of a + traceback is correctly decoded according to its origins. + + Based on traceback.format_exception and related functions. + """ + fs_enc = sys.getfilesystemencoding() + if tb: + list = [_TB_HEADER] + extracted_list = [] + for filename, lineno, name, line in traceback.extract_tb(tb, limit): + extracted_list.append(( + filename.decode(fs_enc, "replace"), + lineno, + name.decode("ascii", "replace"), + line and line.decode( + _get_source_encoding(filename), "replace"))) + list.extend(traceback.format_list(extracted_list)) + else: + list = [] + if evalue is None: + # Is a (deprecated) string exception + list.append(eclass.decode("ascii", "replace")) + elif isinstance(evalue, SyntaxError) and len(evalue.args) > 1: + # Avoid duplicating the special formatting for SyntaxError here, + # instead create a new instance with unicode filename and line + # Potentially gives duff spacing, but that's a pre-existing issue + filename, lineno, offset, line = evalue.args[1] + if line: + # Errors during parsing give the line from buffer encoded as + # latin-1 or utf-8 or the encoding of the file depending on the + # coding and whether the patch for issue #1031213 is applied, so + # give up on trying to decode it and just read the file again + bytestr = linecache.getline(filename, lineno) + if bytestr: + if lineno == 1 and bytestr.startswith("\xef\xbb\xbf"): + bytestr = bytestr[3:] + line = bytestr.decode(_get_source_encoding(filename), "replace") + del linecache.cache[filename] + else: + line = line.decode("ascii", "replace") + if filename: + filename = filename.decode(fs_enc, "replace") + evalue = eclass(evalue.args[0], (filename, lineno, offset, line)) + list.extend(traceback.format_exception_only(eclass, evalue)) + else: + sclass = eclass.__name__ + svalue = _exception_to_text(evalue) + if svalue: + list.append("%s: %s\n" % (sclass, svalue)) + elif svalue is None: + # GZ 2010-05-24: Not a great fallback message, but keep for the + # the same for compatibility for the moment + list.append("%s: <unprintable %s object>\n" % (sclass, sclass)) + else: + list.append("%s\n" % sclass) + return list |