diff options
Diffstat (limited to 'lib/testtools/testtools/compat.py')
-rw-r--r-- | lib/testtools/testtools/compat.py | 95 |
1 files changed, 94 insertions, 1 deletions
diff --git a/lib/testtools/testtools/compat.py b/lib/testtools/testtools/compat.py index c8a641be23..b7e23c8fec 100644 --- a/lib/testtools/testtools/compat.py +++ b/lib/testtools/testtools/compat.py @@ -25,6 +25,7 @@ import os import re import sys import traceback +import unicodedata from testtools.helpers import try_imports @@ -52,6 +53,7 @@ appropriately and the no-op _u for Python 3 lets it through, in Python """ if sys.version_info > (3, 0): + import builtins def _u(s): return s _r = ascii @@ -59,12 +61,14 @@ if sys.version_info > (3, 0): """A byte literal.""" return s.encode("latin-1") advance_iterator = next + # GZ 2011-08-24: Seems istext() is easy to misuse and makes for bad code. def istext(x): return isinstance(x, str) def classtypes(): return (type,) str_is_unicode = True else: + import __builtin__ as builtins def _u(s): # The double replace mangling going on prepares the string for # unicode-escape - \foo is preserved, \u and \U are decoded. @@ -112,6 +116,95 @@ else: return isinstance(exception, (KeyboardInterrupt, SystemExit)) +# GZ 2011-08-24: Using isinstance checks like this encourages bad interfaces, +# there should be better ways to write code needing this. +if not issubclass(getattr(builtins, "bytes", str), str): + def _isbytes(x): + return isinstance(x, bytes) +else: + # Never return True on Pythons that provide the name but not the real type + def _isbytes(x): + return False + + +def _slow_escape(text): + """Escape unicode `text` leaving printable characters unmodified + + The behaviour emulates the Python 3 implementation of repr, see + unicode_repr in unicodeobject.c and isprintable definition. + + Because this iterates over the input a codepoint at a time, it's slow, and + does not handle astral characters correctly on Python builds with 16 bit + rather than 32 bit unicode type. + """ + output = [] + for c in text: + o = ord(c) + if o < 256: + if o < 32 or 126 < o < 161: + output.append(c.encode("unicode-escape")) + elif o == 92: + # Separate due to bug in unicode-escape codec in Python 2.4 + output.append("\\\\") + else: + output.append(c) + else: + # To get correct behaviour would need to pair up surrogates here + if unicodedata.category(c)[0] in "CZ": + output.append(c.encode("unicode-escape")) + else: + output.append(c) + return "".join(output) + + +def text_repr(text, multiline=None): + """Rich repr for `text` returning unicode, triple quoted if `multiline`""" + is_py3k = sys.version_info > (3, 0) + nl = _isbytes(text) and bytes((0xA,)) or "\n" + if multiline is None: + multiline = nl in text + if not multiline and (is_py3k or not str_is_unicode and type(text) is str): + # Use normal repr for single line of unicode on Python 3 or bytes + return repr(text) + prefix = repr(text[:0])[:-2] + if multiline: + # To escape multiline strings, split and process each line in turn, + # making sure that quotes are not escaped. + if is_py3k: + offset = len(prefix) + 1 + lines = [] + for l in text.split(nl): + r = repr(l) + q = r[-1] + lines.append(r[offset:-1].replace("\\" + q, q)) + elif not str_is_unicode and isinstance(text, str): + lines = [l.encode("string-escape").replace("\\'", "'") + for l in text.split("\n")] + else: + lines = [_slow_escape(l) for l in text.split("\n")] + # Combine the escaped lines and append two of the closing quotes, + # then iterate over the result to escape triple quotes correctly. + _semi_done = "\n".join(lines) + "''" + p = 0 + while True: + p = _semi_done.find("'''", p) + if p == -1: + break + _semi_done = "\\".join([_semi_done[:p], _semi_done[p:]]) + p += 2 + return "".join([prefix, "'''\\\n", _semi_done, "'"]) + escaped_text = _slow_escape(text) + # Determine which quote character to use and if one gets prefixed with a + # backslash following the same logic Python uses for repr() on strings + quote = "'" + if "'" in text: + if '"' in text: + escaped_text = escaped_text.replace("'", "\\'") + else: + quote = '"' + return "".join([prefix, quote, escaped_text, quote]) + + def unicode_output_stream(stream): """Get wrapper for given stream that writes any unicode without exception @@ -143,7 +236,7 @@ def unicode_output_stream(stream): stream.newlines, stream.line_buffering) except AttributeError: pass - return writer(stream, "replace") + return writer(stream, "replace") # The default source encoding is actually "iso-8859-1" until Python 2.5 but |