summaryrefslogtreecommitdiff
path: root/lib/testtools/testtools/compat.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/testtools/testtools/compat.py')
-rw-r--r--lib/testtools/testtools/compat.py95
1 files changed, 94 insertions, 1 deletions
diff --git a/lib/testtools/testtools/compat.py b/lib/testtools/testtools/compat.py
index c8a641be23..b7e23c8fec 100644
--- a/lib/testtools/testtools/compat.py
+++ b/lib/testtools/testtools/compat.py
@@ -25,6 +25,7 @@ import os
import re
import sys
import traceback
+import unicodedata
from testtools.helpers import try_imports
@@ -52,6 +53,7 @@ appropriately and the no-op _u for Python 3 lets it through, in Python
"""
if sys.version_info > (3, 0):
+ import builtins
def _u(s):
return s
_r = ascii
@@ -59,12 +61,14 @@ if sys.version_info > (3, 0):
"""A byte literal."""
return s.encode("latin-1")
advance_iterator = next
+ # GZ 2011-08-24: Seems istext() is easy to misuse and makes for bad code.
def istext(x):
return isinstance(x, str)
def classtypes():
return (type,)
str_is_unicode = True
else:
+ import __builtin__ as builtins
def _u(s):
# The double replace mangling going on prepares the string for
# unicode-escape - \foo is preserved, \u and \U are decoded.
@@ -112,6 +116,95 @@ else:
return isinstance(exception, (KeyboardInterrupt, SystemExit))
+# GZ 2011-08-24: Using isinstance checks like this encourages bad interfaces,
+# there should be better ways to write code needing this.
+if not issubclass(getattr(builtins, "bytes", str), str):
+ def _isbytes(x):
+ return isinstance(x, bytes)
+else:
+ # Never return True on Pythons that provide the name but not the real type
+ def _isbytes(x):
+ return False
+
+
+def _slow_escape(text):
+ """Escape unicode `text` leaving printable characters unmodified
+
+ The behaviour emulates the Python 3 implementation of repr, see
+ unicode_repr in unicodeobject.c and isprintable definition.
+
+ Because this iterates over the input a codepoint at a time, it's slow, and
+ does not handle astral characters correctly on Python builds with 16 bit
+ rather than 32 bit unicode type.
+ """
+ output = []
+ for c in text:
+ o = ord(c)
+ if o < 256:
+ if o < 32 or 126 < o < 161:
+ output.append(c.encode("unicode-escape"))
+ elif o == 92:
+ # Separate due to bug in unicode-escape codec in Python 2.4
+ output.append("\\\\")
+ else:
+ output.append(c)
+ else:
+ # To get correct behaviour would need to pair up surrogates here
+ if unicodedata.category(c)[0] in "CZ":
+ output.append(c.encode("unicode-escape"))
+ else:
+ output.append(c)
+ return "".join(output)
+
+
+def text_repr(text, multiline=None):
+ """Rich repr for `text` returning unicode, triple quoted if `multiline`"""
+ is_py3k = sys.version_info > (3, 0)
+ nl = _isbytes(text) and bytes((0xA,)) or "\n"
+ if multiline is None:
+ multiline = nl in text
+ if not multiline and (is_py3k or not str_is_unicode and type(text) is str):
+ # Use normal repr for single line of unicode on Python 3 or bytes
+ return repr(text)
+ prefix = repr(text[:0])[:-2]
+ if multiline:
+ # To escape multiline strings, split and process each line in turn,
+ # making sure that quotes are not escaped.
+ if is_py3k:
+ offset = len(prefix) + 1
+ lines = []
+ for l in text.split(nl):
+ r = repr(l)
+ q = r[-1]
+ lines.append(r[offset:-1].replace("\\" + q, q))
+ elif not str_is_unicode and isinstance(text, str):
+ lines = [l.encode("string-escape").replace("\\'", "'")
+ for l in text.split("\n")]
+ else:
+ lines = [_slow_escape(l) for l in text.split("\n")]
+ # Combine the escaped lines and append two of the closing quotes,
+ # then iterate over the result to escape triple quotes correctly.
+ _semi_done = "\n".join(lines) + "''"
+ p = 0
+ while True:
+ p = _semi_done.find("'''", p)
+ if p == -1:
+ break
+ _semi_done = "\\".join([_semi_done[:p], _semi_done[p:]])
+ p += 2
+ return "".join([prefix, "'''\\\n", _semi_done, "'"])
+ escaped_text = _slow_escape(text)
+ # Determine which quote character to use and if one gets prefixed with a
+ # backslash following the same logic Python uses for repr() on strings
+ quote = "'"
+ if "'" in text:
+ if '"' in text:
+ escaped_text = escaped_text.replace("'", "\\'")
+ else:
+ quote = '"'
+ return "".join([prefix, quote, escaped_text, quote])
+
+
def unicode_output_stream(stream):
"""Get wrapper for given stream that writes any unicode without exception
@@ -143,7 +236,7 @@ def unicode_output_stream(stream):
stream.newlines, stream.line_buffering)
except AttributeError:
pass
- return writer(stream, "replace")
+ return writer(stream, "replace")
# The default source encoding is actually "iso-8859-1" until Python 2.5 but