1 files changed, 94 insertions, 1 deletions
diff --git a/lib/testtools/testtools/compat.py b/lib/testtools/testtools/compat.py
index c8a641be23..b7e23c8fec 100644
--- a/lib/testtools/testtools/compat.py
+++ b/lib/testtools/testtools/compat.py
@@ -25,6 +25,7 @@ import os
 import re
 import sys
 import traceback
+import unicodedata
 
 from testtools.helpers import try_imports
 
@@ -52,6 +53,7 @@ appropriately and the no-op _u for Python 3 lets it through, in Python
 """
 
 if sys.version_info > (3, 0):
+    import builtins
     def _u(s):
         return s
     _r = ascii
@@ -59,12 +61,14 @@ if sys.version_info > (3, 0):
         """A byte literal."""
         return s.encode("latin-1")
     advance_iterator = next
+    # GZ 2011-08-24: Seems istext() is easy to misuse and makes for bad code.
     def istext(x):
         return isinstance(x, str)
     def classtypes():
         return (type,)
     str_is_unicode = True
 else:
+    import __builtin__ as builtins
     def _u(s):
         # The double replace mangling going on prepares the string for
         # unicode-escape - \foo is preserved, \u and \U are decoded.
@@ -112,6 +116,95 @@ else:
         return isinstance(exception, (KeyboardInterrupt, SystemExit))
 
 
+# GZ 2011-08-24: Using isinstance checks like this encourages bad interfaces,
+#                there should be better ways to write code needing this.
+if not issubclass(getattr(builtins, "bytes", str), str):
+    def _isbytes(x):
+        return isinstance(x, bytes)
+else:
+    # Never return True on Pythons that provide the name but not the real type
+    def _isbytes(x):
+        return False
+
+
+def _slow_escape(text):
+    """Escape unicode `text` leaving printable characters unmodified
+
+    The behaviour emulates the Python 3 implementation of repr, see
+    unicode_repr in unicodeobject.c and isprintable definition.
+
+    Because this iterates over the input a codepoint at a time, it's slow, and
+    does not handle astral characters correctly on Python builds with 16 bit
+    rather than 32 bit unicode type.
+    """
+    output = []
+    for c in text:
+        o = ord(c)
+        if o < 256:
+            if o < 32 or 126 < o < 161:
+                output.append(c.encode("unicode-escape"))
+            elif o == 92:
+                # Separate due to bug in unicode-escape codec in Python 2.4
+                output.append("\\\\")
+            else:
+                output.append(c)
+        else:
+            # To get correct behaviour would need to pair up surrogates here
+            if unicodedata.category(c)[0] in "CZ":
+                output.append(c.encode("unicode-escape"))
+            else:
+                output.append(c)
+    return "".join(output)
+
+
+def text_repr(text, multiline=None):
+    """Rich repr for `text` returning unicode, triple quoted if `multiline`"""
+    is_py3k = sys.version_info > (3, 0)
+    nl = _isbytes(text) and bytes((0xA,)) or "\n"
+    if multiline is None:
+        multiline = nl in text
+    if not multiline and (is_py3k or not str_is_unicode and type(text) is str):
+        # Use normal repr for single line of unicode on Python 3 or bytes
+        return repr(text)
+    prefix = repr(text[:0])[:-2]
+    if multiline:
+        # To escape multiline strings, split and process each line in turn,
+        # making sure that quotes are not escaped. 
+        if is_py3k:
+            offset = len(prefix) + 1
+            lines = []
+            for l in text.split(nl):
+                r = repr(l)
+                q = r[-1]
+                lines.append(r[offset:-1].replace("\\" + q, q))
+        elif not str_is_unicode and isinstance(text, str):
+            lines = [l.encode("string-escape").replace("\\'", "'")
+                for l in text.split("\n")]
+        else:
+            lines = [_slow_escape(l) for l in text.split("\n")]
+        # Combine the escaped lines and append two of the closing quotes,
+        # then iterate over the result to escape triple quotes correctly.
+        _semi_done = "\n".join(lines) + "''"
+        p = 0
+        while True:
+            p = _semi_done.find("'''", p)
+            if p == -1:
+                break
+            _semi_done = "\\".join([_semi_done[:p], _semi_done[p:]])
+            p += 2
+        return "".join([prefix, "'''\\\n", _semi_done, "'"])
+    escaped_text = _slow_escape(text)
+    # Determine which quote character to use and if one gets prefixed with a
+    # backslash following the same logic Python uses for repr() on strings
+    quote = "'"
+    if "'" in text:
+        if '"' in text:
+            escaped_text = escaped_text.replace("'", "\\'")
+        else:
+            quote = '"'
+    return "".join([prefix, quote, escaped_text, quote])
+
+
 def unicode_output_stream(stream):
     """Get wrapper for given stream that writes any unicode without exception
 
@@ -143,7 +236,7 @@ def unicode_output_stream(stream):
                 stream.newlines, stream.line_buffering)
         except AttributeError:
             pass
-    return writer(stream, "replace")    
+    return writer(stream, "replace")
 
 
 # The default source encoding is actually "iso-8859-1" until Python 2.5 but