10 files changed, 597 insertions, 33 deletions
diff --git a/lib/testtools/NEWS b/lib/testtools/NEWS
index 6588b8d438..5896b84c38 100644
--- a/lib/testtools/NEWS
+++ b/lib/testtools/NEWS
@@ -6,6 +6,22 @@ Changes and improvements to testtools_, grouped by release.
 NEXT
 ~~~~
 
+
+0.9.12
+~~~~~~
+
+This is a very big release.  We've made huge improvements on three fronts:
+ 1. Test failures are way nicer and easier to read
+ 2. Matchers and ``assertThat`` are much more convenient to use
+ 3. Correct handling of extended unicode characters
+
+We've trimmed off the fat from the stack trace you get when tests fail, we've
+cut out the bits of error messages that just didn't help, we've made it easier
+to annotate mismatch failures, to compare complex objects and to match raised
+exceptions.
+
+Testing code was never this fun.
+
 Changes
 -------
 
@@ -14,6 +30,12 @@ Changes
   now deprecated.  Please stop using it.
   (Jonathan Lange, #813460)
 
+* ``assertThat`` raises ``MismatchError`` instead of
+  ``TestCase.failureException``.  ``MismatchError`` is a subclass of
+  ``AssertionError``, so in most cases this change will not matter. However,
+  if ``self.failureException`` has been set to a non-default value, then
+  mismatches will become test errors rather than test failures.
+
 * ``gather_details`` takes two dicts, rather than two detailed objects.
   (Jonathan Lange, #801027)
 
@@ -30,12 +52,16 @@ Improvements
 * All public matchers are now in ``testtools.matchers.__all__``.
   (Jonathan Lange, #784859)
 
-* assertThat output is much less verbose, displaying only what the mismatch
+* ``assertThat`` can actually display mismatches and matchers that contain
+  extended unicode characters. (Jonathan Lange, Martin [gz], #804127)
+
+* ``assertThat`` output is much less verbose, displaying only what the mismatch
   tells us to display. Old-style verbose output can be had by passing
   ``verbose=True`` to assertThat. (Jonathan Lange, #675323, #593190)
 
-* assertThat accepts a message which will be used to annotate the matcher. This
-  can be given as a third parameter or as a keyword parameter. (Robert Collins)
+* ``assertThat`` accepts a message which will be used to annotate the matcher.
+  This can be given as a third parameter or as a keyword parameter.
+  (Robert Collins)
 
 * Automated the Launchpad part of the release process.
   (Jonathan Lange, #623486)
diff --git a/lib/testtools/doc/for-test-authors.rst b/lib/testtools/doc/for-test-authors.rst
index eec98b14f8..04c4be6b0d 100644
--- a/lib/testtools/doc/for-test-authors.rst
+++ b/lib/testtools/doc/for-test-authors.rst
@@ -717,7 +717,7 @@ generates.  Here's an example mismatch::
           self.remainder = remainder
 
       def describe(self):
-          return "%s is not divisible by %s, %s remains" % (
+          return "%r is not divisible by %r, %r remains" % (
               self.number, self.divider, self.remainder)
 
       def get_details(self):
@@ -738,11 +738,19 @@ in the Matcher itself like this::
       remainder = actual % self.divider
       if remainder != 0:
           return Mismatch(
-              "%s is not divisible by %s, %s remains" % (
+              "%r is not divisible by %r, %r remains" % (
                   actual, self.divider, remainder))
       else:
           return None
 
+When writing a ``describe`` method or constructing a ``Mismatch`` object the
+code should ensure it only emits printable unicode.  As this output must be
+combined with other text and forwarded for presentation, letting through
+non-ascii bytes of ambiguous encoding or control characters could throw an
+exception or mangle the display.  In most cases simply avoiding the ``%s``
+format specifier and using ``%r`` instead will be enough.  For examples of
+more complex formatting see the ``testtools.matchers`` implementatons.
+
 
 Details
 =======
diff --git a/lib/testtools/scripts/all-pythons b/lib/testtools/scripts/all-pythons
index aecc9495a6..5a0c415708 100755
--- a/lib/testtools/scripts/all-pythons
+++ b/lib/testtools/scripts/all-pythons
@@ -29,7 +29,9 @@ from testtools.content import text_content
 ROOT = os.path.dirname(os.path.dirname(__file__))
 
 
-def run_for_python(version, result):
+def run_for_python(version, result, tests):
+    if not tests:
+        tests = ['testtools.tests.test_suite']
     # XXX: This could probably be broken up and put into subunit.
     python = 'python%s' % (version,)
     # XXX: Correct API, but subunit doesn't support it. :(
@@ -58,7 +60,8 @@ def run_for_python(version, result):
     cmd = [
         python,
         '-W', 'ignore:Module testtools was already imported',
-        subunit_path, 'testtools.tests.test_suite']
+        subunit_path]
+    cmd.extend(tests)
     process = subprocess.Popen(
         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
     _make_stream_binary(process.stdout)
@@ -87,4 +90,4 @@ if __name__ == '__main__':
     sys.path.append(ROOT)
     result = TestProtocolClient(sys.stdout)
     for version in '2.4 2.5 2.6 2.7 3.0 3.1 3.2'.split():
-        run_for_python(version, result)
+        run_for_python(version, result, sys.argv[1:])
diff --git a/lib/testtools/testtools/__init__.py b/lib/testtools/testtools/__init__.py
index 2a2f4d65e0..f518b70373 100644
--- a/lib/testtools/testtools/__init__.py
+++ b/lib/testtools/testtools/__init__.py
@@ -80,4 +80,4 @@ from testtools.distutilscmd import (
 # If the releaselevel is 'final', then the tarball will be major.minor.micro.
 # Otherwise it is major.minor.micro~$(revno).
 
-__version__ = (0, 9, 12, 'dev', 0)
+__version__ = (0, 9, 13, 'dev', 0)
diff --git a/lib/testtools/testtools/compat.py b/lib/testtools/testtools/compat.py
index c8a641be23..b7e23c8fec 100644
--- a/lib/testtools/testtools/compat.py
+++ b/lib/testtools/testtools/compat.py
@@ -25,6 +25,7 @@ import os
 import re
 import sys
 import traceback
+import unicodedata
 
 from testtools.helpers import try_imports
 
@@ -52,6 +53,7 @@ appropriately and the no-op _u for Python 3 lets it through, in Python
 """
 
 if sys.version_info > (3, 0):
+    import builtins
     def _u(s):
         return s
     _r = ascii
@@ -59,12 +61,14 @@ if sys.version_info > (3, 0):
         """A byte literal."""
         return s.encode("latin-1")
     advance_iterator = next
+    # GZ 2011-08-24: Seems istext() is easy to misuse and makes for bad code.
     def istext(x):
         return isinstance(x, str)
     def classtypes():
         return (type,)
     str_is_unicode = True
 else:
+    import __builtin__ as builtins
     def _u(s):
         # The double replace mangling going on prepares the string for
         # unicode-escape - \foo is preserved, \u and \U are decoded.
@@ -112,6 +116,95 @@ else:
         return isinstance(exception, (KeyboardInterrupt, SystemExit))
 
 
+# GZ 2011-08-24: Using isinstance checks like this encourages bad interfaces,
+#                there should be better ways to write code needing this.
+if not issubclass(getattr(builtins, "bytes", str), str):
+    def _isbytes(x):
+        return isinstance(x, bytes)
+else:
+    # Never return True on Pythons that provide the name but not the real type
+    def _isbytes(x):
+        return False
+
+
+def _slow_escape(text):
+    """Escape unicode `text` leaving printable characters unmodified
+
+    The behaviour emulates the Python 3 implementation of repr, see
+    unicode_repr in unicodeobject.c and isprintable definition.
+
+    Because this iterates over the input a codepoint at a time, it's slow, and
+    does not handle astral characters correctly on Python builds with 16 bit
+    rather than 32 bit unicode type.
+    """
+    output = []
+    for c in text:
+        o = ord(c)
+        if o < 256:
+            if o < 32 or 126 < o < 161:
+                output.append(c.encode("unicode-escape"))
+            elif o == 92:
+                # Separate due to bug in unicode-escape codec in Python 2.4
+                output.append("\\\\")
+            else:
+                output.append(c)
+        else:
+            # To get correct behaviour would need to pair up surrogates here
+            if unicodedata.category(c)[0] in "CZ":
+                output.append(c.encode("unicode-escape"))
+            else:
+                output.append(c)
+    return "".join(output)
+
+
+def text_repr(text, multiline=None):
+    """Rich repr for `text` returning unicode, triple quoted if `multiline`"""
+    is_py3k = sys.version_info > (3, 0)
+    nl = _isbytes(text) and bytes((0xA,)) or "\n"
+    if multiline is None:
+        multiline = nl in text
+    if not multiline and (is_py3k or not str_is_unicode and type(text) is str):
+        # Use normal repr for single line of unicode on Python 3 or bytes
+        return repr(text)
+    prefix = repr(text[:0])[:-2]
+    if multiline:
+        # To escape multiline strings, split and process each line in turn,
+        # making sure that quotes are not escaped. 
+        if is_py3k:
+            offset = len(prefix) + 1
+            lines = []
+            for l in text.split(nl):
+                r = repr(l)
+                q = r[-1]
+                lines.append(r[offset:-1].replace("\\" + q, q))
+        elif not str_is_unicode and isinstance(text, str):
+            lines = [l.encode("string-escape").replace("\\'", "'")
+                for l in text.split("\n")]
+        else:
+            lines = [_slow_escape(l) for l in text.split("\n")]
+        # Combine the escaped lines and append two of the closing quotes,
+        # then iterate over the result to escape triple quotes correctly.
+        _semi_done = "\n".join(lines) + "''"
+        p = 0
+        while True:
+            p = _semi_done.find("'''", p)
+            if p == -1:
+                break
+            _semi_done = "\\".join([_semi_done[:p], _semi_done[p:]])
+            p += 2
+        return "".join([prefix, "'''\\\n", _semi_done, "'"])
+    escaped_text = _slow_escape(text)
+    # Determine which quote character to use and if one gets prefixed with a
+    # backslash following the same logic Python uses for repr() on strings
+    quote = "'"
+    if "'" in text:
+        if '"' in text:
+            escaped_text = escaped_text.replace("'", "\\'")
+        else:
+            quote = '"'
+    return "".join([prefix, quote, escaped_text, quote])
+
+
 def unicode_output_stream(stream):
     """Get wrapper for given stream that writes any unicode without exception
 
@@ -143,7 +236,7 @@ def unicode_output_stream(stream):
                 stream.newlines, stream.line_buffering)
         except AttributeError:
             pass
-    return writer(stream, "replace")    
+    return writer(stream, "replace")
 
 
 # The default source encoding is actually "iso-8859-1" until Python 2.5 but
diff --git a/lib/testtools/testtools/matchers.py b/lib/testtools/testtools/matchers.py
index 6ee33f0fd8..693a20befa 100644
--- a/lib/testtools/testtools/matchers.py
+++ b/lib/testtools/testtools/matchers.py
@@ -49,7 +49,10 @@ from testtools.compat import (
     classtypes,
     _error_repr,
     isbaseexception,
+    _isbytes,
     istext,
+    str_is_unicode,
+    text_repr
     )
 
 
@@ -102,6 +105,8 @@ class Mismatch(object):
         """Describe the mismatch.
 
         This should be either a human-readable string or castable to a string.
+        In particular, is should either be plain ascii or unicode on Python 2,
+        and care should be taken to escape control characters.
         """
         try:
             return self._description
@@ -131,6 +136,46 @@ class Mismatch(object):
             id(self), self.__dict__)
 
 
+class MismatchError(AssertionError):
+    """Raised when a mismatch occurs."""
+
+    # This class exists to work around
+    # <https://bugs.launchpad.net/testtools/+bug/804127>.  It provides a
+    # guaranteed way of getting a readable exception, no matter what crazy
+    # characters are in the matchee, matcher or mismatch.
+
+    def __init__(self, matchee, matcher, mismatch, verbose=False):
+        # Have to use old-style upcalling for Python 2.4 and 2.5
+        # compatibility.
+        AssertionError.__init__(self)
+        self.matchee = matchee
+        self.matcher = matcher
+        self.mismatch = mismatch
+        self.verbose = verbose
+
+    def __str__(self):
+        difference = self.mismatch.describe()
+        if self.verbose:
+            # GZ 2011-08-24: Smelly API? Better to take any object and special
+            #                case text inside?
+            if istext(self.matchee) or _isbytes(self.matchee):
+                matchee = text_repr(self.matchee, multiline=False)
+            else:
+                matchee = repr(self.matchee)
+            return (
+                'Match failed. Matchee: %s\nMatcher: %s\nDifference: %s\n'
+                % (matchee, self.matcher, difference))
+        else:
+            return difference
+
+    if not str_is_unicode:
+
+        __unicode__ = __str__
+
+        def __str__(self):
+            return self.__unicode__().encode("ascii", "backslashreplace")
+
+
 class MismatchDecorator(object):
     """Decorate a ``Mismatch``.
 
@@ -241,7 +286,12 @@ class DocTestMismatch(Mismatch):
         self.with_nl = with_nl
 
     def describe(self):
-        return self.matcher._describe_difference(self.with_nl)
+        s = self.matcher._describe_difference(self.with_nl)
+        if str_is_unicode or isinstance(s, unicode):
+            return s
+        # GZ 2011-08-24: This is actually pretty bogus, most C0 codes should
+        #                be escaped, in addition to non-ascii bytes.
+        return s.decode("latin1").encode("ascii", "backslashreplace")
 
 
 class DoesNotContain(Mismatch):
@@ -271,8 +321,8 @@ class DoesNotStartWith(Mismatch):
         self.expected = expected
 
     def describe(self):
-        return "'%s' does not start with '%s'." % (
-            self.matchee, self.expected)
+        return "%s does not start with %s." % (
+            text_repr(self.matchee), text_repr(self.expected))
 
 
 class DoesNotEndWith(Mismatch):
@@ -287,8 +337,8 @@ class DoesNotEndWith(Mismatch):
         self.expected = expected
 
     def describe(self):
-        return "'%s' does not end with '%s'." % (
-            self.matchee, self.expected)
+        return "%s does not end with %s." % (
+            text_repr(self.matchee), text_repr(self.expected))
 
 
 class _BinaryComparison(object):
@@ -320,8 +370,8 @@ class _BinaryMismatch(Mismatch):
     def _format(self, thing):
         # Blocks of text with newlines are formatted as triple-quote
         # strings. Everything else is pretty-printed.
-        if istext(thing) and '\n' in thing:
-            return '"""\\\n%s"""' % (thing,)
+        if istext(thing) or _isbytes(thing):
+            return text_repr(thing)
         return pformat(thing)
 
     def describe(self):
@@ -332,7 +382,7 @@ class _BinaryMismatch(Mismatch):
                 self._mismatch_string, self._format(self.expected),
                 self._format(self.other))
         else:
-            return "%s %s %s" % (left, self._mismatch_string,right)
+            return "%s %s %s" % (left, self._mismatch_string, right)
 
 
 class Equals(_BinaryComparison):
@@ -572,7 +622,7 @@ class StartsWith(Matcher):
         self.expected = expected
 
     def __str__(self):
-        return "Starts with '%s'." % self.expected
+        return "StartsWith(%r)" % (self.expected,)
 
     def match(self, matchee):
         if not matchee.startswith(self.expected):
@@ -591,7 +641,7 @@ class EndsWith(Matcher):
         self.expected = expected
 
     def __str__(self):
-        return "Ends with '%s'." % self.expected
+        return "EndsWith(%r)" % (self.expected,)
 
     def match(self, matchee):
         if not matchee.endswith(self.expected):
@@ -848,8 +898,12 @@ class MatchesRegex(object):
 
     def match(self, value):
         if not re.match(self.pattern, value, self.flags):
+            pattern = self.pattern
+            if not isinstance(pattern, str_is_unicode and str or unicode):
+                pattern = pattern.decode("latin1")
+            pattern = pattern.encode("unicode_escape").decode("ascii")
             return Mismatch("%r does not match /%s/" % (
-                    value, self.pattern))
+                    value, pattern.replace("\\\\", "\\")))
 
 
 class MatchesSetwise(object):
diff --git a/lib/testtools/testtools/testcase.py b/lib/testtools/testtools/testcase.py
index 9370b29e57..ee5e296cd4 100644
--- a/lib/testtools/testtools/testcase.py
+++ b/lib/testtools/testtools/testcase.py
@@ -34,6 +34,7 @@ from testtools.matchers import (
     Equals,
     MatchesAll,
     MatchesException,
+    MismatchError,
     Is,
     IsInstance,
     Not,
@@ -393,7 +394,7 @@ class TestCase(unittest.TestCase):
 
         :param matchee: An object to match with matcher.
         :param matcher: An object meeting the testtools.Matcher protocol.
-        :raises self.failureException: When matcher does not match thing.
+        :raises MismatchError: When matcher does not match thing.
         """
         matcher = Annotate.if_message(message, matcher)
         mismatch = matcher.match(matchee)
@@ -407,13 +408,7 @@ class TestCase(unittest.TestCase):
                 full_name = "%s-%d" % (name, suffix)
                 suffix += 1
             self.addDetail(full_name, content)
-        if verbose:
-            message = (
-                'Match failed. Matchee: "%s"\nMatcher: %s\nDifference: %s\n'
-                % (matchee, matcher, mismatch.describe()))
-        else:
-            message = mismatch.describe()
-        self.fail(message)
+        raise MismatchError(matchee, matcher, mismatch, verbose)
 
     def defaultTestResult(self):
         return TestResult()
diff --git a/lib/testtools/testtools/tests/test_compat.py b/lib/testtools/testtools/tests/test_compat.py
index a33c071aaa..5e385bf48c 100644
--- a/lib/testtools/testtools/tests/test_compat.py
+++ b/lib/testtools/testtools/tests/test_compat.py
@@ -16,6 +16,7 @@ from testtools.compat import (
     _get_source_encoding,
     _u,
     str_is_unicode,
+    text_repr,
     unicode_output_stream,
     )
 from testtools.matchers import (
@@ -262,6 +263,132 @@ class TestUnicodeOutputStream(testtools.TestCase):
         self.assertEqual("pa???n", sout.getvalue())
 
 
+class TestTextRepr(testtools.TestCase):
+    """Ensure in extending repr, basic behaviours are not being broken"""
+
+    ascii_examples = (
+        # Single character examples
+        #  C0 control codes should be escaped except multiline \n
+        ("\x00", "'\\x00'", "'''\\\n\\x00'''"),
+        ("\b", "'\\x08'", "'''\\\n\\x08'''"),
+        ("\t", "'\\t'", "'''\\\n\\t'''"),
+        ("\n", "'\\n'", "'''\\\n\n'''"),
+        ("\r", "'\\r'", "'''\\\n\\r'''"),
+        #  Quotes and backslash should match normal repr behaviour
+        ('"', "'\"'", "'''\\\n\"'''"),
+        ("'", "\"'\"", "'''\\\n\\''''"),
+        ("\\", "'\\\\'", "'''\\\n\\\\'''"),
+        #  DEL is also unprintable and should be escaped
+        ("\x7F", "'\\x7f'", "'''\\\n\\x7f'''"),
+
+        # Character combinations that need double checking
+        ("\r\n", "'\\r\\n'", "'''\\\n\\r\n'''"),
+        ("\"'", "'\"\\''", "'''\\\n\"\\''''"),
+        ("'\"", "'\\'\"'", "'''\\\n'\"'''"),
+        ("\\n", "'\\\\n'", "'''\\\n\\\\n'''"),
+        ("\\\n", "'\\\\\\n'", "'''\\\n\\\\\n'''"),
+        ("\\' ", "\"\\\\' \"", "'''\\\n\\\\' '''"),
+        ("\\'\n", "\"\\\\'\\n\"", "'''\\\n\\\\'\n'''"),
+        ("\\'\"", "'\\\\\\'\"'", "'''\\\n\\\\'\"'''"),
+        ("\\'''", "\"\\\\'''\"", "'''\\\n\\\\\\'\\'\\''''"),
+        )
+
+    # Bytes with the high bit set should always be escaped
+    bytes_examples = (
+        (_b("\x80"), "'\\x80'", "'''\\\n\\x80'''"),
+        (_b("\xA0"), "'\\xa0'", "'''\\\n\\xa0'''"),
+        (_b("\xC0"), "'\\xc0'", "'''\\\n\\xc0'''"),
+        (_b("\xFF"), "'\\xff'", "'''\\\n\\xff'''"),
+        (_b("\xC2\xA7"), "'\\xc2\\xa7'", "'''\\\n\\xc2\\xa7'''"),
+        )
+
+    # Unicode doesn't escape printable characters as per the Python 3 model
+    unicode_examples = (
+        # C1 codes are unprintable
+        (_u("\x80"), "'\\x80'", "'''\\\n\\x80'''"),
+        (_u("\x9F"), "'\\x9f'", "'''\\\n\\x9f'''"),
+        # No-break space is unprintable
+        (_u("\xA0"), "'\\xa0'", "'''\\\n\\xa0'''"),
+        # Letters latin alphabets are printable
+        (_u("\xA1"), _u("'\xa1'"), _u("'''\\\n\xa1'''")),
+        (_u("\xFF"), _u("'\xff'"), _u("'''\\\n\xff'''")),
+        (_u("\u0100"), _u("'\u0100'"), _u("'''\\\n\u0100'''")),
+        # Line and paragraph seperators are unprintable
+        (_u("\u2028"), "'\\u2028'", "'''\\\n\\u2028'''"),
+        (_u("\u2029"), "'\\u2029'", "'''\\\n\\u2029'''"),
+        # Unpaired surrogates are unprintable
+        (_u("\uD800"), "'\\ud800'", "'''\\\n\\ud800'''"),
+        (_u("\uDFFF"), "'\\udfff'", "'''\\\n\\udfff'''"),
+        # Unprintable general categories not fully tested: Cc, Cf, Co, Cn, Zs
+        )
+
+    b_prefix = repr(_b(""))[:-2]
+    u_prefix = repr(_u(""))[:-2]
+
+    def test_ascii_examples_oneline_bytes(self):
+        for s, expected, _ in self.ascii_examples:
+            b = _b(s)
+            actual = text_repr(b, multiline=False)
+            # Add self.assertIsInstance check?
+            self.assertEqual(actual, self.b_prefix + expected)
+            self.assertEqual(eval(actual), b)
+
+    def test_ascii_examples_oneline_unicode(self):
+        for s, expected, _ in self.ascii_examples:
+            u = _u(s)
+            actual = text_repr(u, multiline=False)
+            self.assertEqual(actual, self.u_prefix + expected)
+            self.assertEqual(eval(actual), u)
+
+    def test_ascii_examples_multiline_bytes(self):
+        for s, _, expected in self.ascii_examples:
+            b = _b(s)
+            actual = text_repr(b, multiline=True)
+            self.assertEqual(actual, self.b_prefix + expected)
+            self.assertEqual(eval(actual), b)
+
+    def test_ascii_examples_multiline_unicode(self):
+        for s, _, expected in self.ascii_examples:
+            u = _u(s)
+            actual = text_repr(u, multiline=True)
+            self.assertEqual(actual, self.u_prefix + expected)
+            self.assertEqual(eval(actual), u)
+
+    def test_ascii_examples_defaultline_bytes(self):
+        for s, one, multi in self.ascii_examples:
+            expected = "\n" in s and multi or one
+            self.assertEqual(text_repr(_b(s)), self.b_prefix + expected)
+
+    def test_ascii_examples_defaultline_unicode(self):
+        for s, one, multi in self.ascii_examples:
+            expected = "\n" in s and multi or one
+            self.assertEqual(text_repr(_u(s)), self.u_prefix + expected)
+
+    def test_bytes_examples_oneline(self):
+        for b, expected, _ in self.bytes_examples:
+            actual = text_repr(b, multiline=False)
+            self.assertEqual(actual, self.b_prefix + expected)
+            self.assertEqual(eval(actual), b)
+
+    def test_bytes_examples_multiline(self):
+        for b, _, expected in self.bytes_examples:
+            actual = text_repr(b, multiline=True)
+            self.assertEqual(actual, self.b_prefix + expected)
+            self.assertEqual(eval(actual), b)
+
+    def test_unicode_examples_oneline(self):
+        for u, expected, _ in self.unicode_examples:
+            actual = text_repr(u, multiline=False)
+            self.assertEqual(actual, self.u_prefix + expected)
+            self.assertEqual(eval(actual), u)
+
+    def test_unicode_examples_multiline(self):
+        for u, _, expected in self.unicode_examples:
+            actual = text_repr(u, multiline=True)
+            self.assertEqual(actual, self.u_prefix + expected)
+            self.assertEqual(eval(actual), u)
+
+
 def test_suite():
     from unittest import TestLoader
     return TestLoader().loadTestsFromName(__name__)
diff --git a/lib/testtools/testtools/tests/test_matchers.py b/lib/testtools/testtools/tests/test_matchers.py
index feca41a4e6..ebdd4a9510 100644
--- a/lib/testtools/testtools/tests/test_matchers.py
+++ b/lib/testtools/testtools/tests/test_matchers.py
@@ -12,6 +12,9 @@ from testtools import (
     )
 from testtools.compat import (
     StringIO,
+    str_is_unicode,
+    text_repr,
+    _b,
     _u,
     )
 from testtools.matchers import (
@@ -19,6 +22,7 @@ from testtools.matchers import (
     AllMatch,
     Annotate,
     AnnotatedMismatch,
+    _BinaryMismatch,
     Contains,
     Equals,
     DocTestMatches,
@@ -39,6 +43,7 @@ from testtools.matchers import (
     MatchesStructure,
     Mismatch,
     MismatchDecorator,
+    MismatchError,
     Not,
     NotEquals,
     Raises,
@@ -67,6 +72,125 @@ class TestMismatch(TestCase):
         self.assertEqual({}, mismatch.get_details())
 
 
+class TestMismatchError(TestCase):
+
+    def test_is_assertion_error(self):
+        # MismatchError is an AssertionError, so that most of the time, it
+        # looks like a test failure, rather than an error.
+        def raise_mismatch_error():
+            raise MismatchError(2, Equals(3), Equals(3).match(2))
+        self.assertRaises(AssertionError, raise_mismatch_error)
+
+    def test_default_description_is_mismatch(self):
+        mismatch = Equals(3).match(2)
+        e = MismatchError(2, Equals(3), mismatch)
+        self.assertEqual(mismatch.describe(), str(e))
+
+    def test_default_description_unicode(self):
+        matchee = _u('\xa7')
+        matcher = Equals(_u('a'))
+        mismatch = matcher.match(matchee)
+        e = MismatchError(matchee, matcher, mismatch)
+        self.assertEqual(mismatch.describe(), str(e))
+
+    def test_verbose_description(self):
+        matchee = 2
+        matcher = Equals(3)
+        mismatch = matcher.match(2)
+        e = MismatchError(matchee, matcher, mismatch, True)
+        expected = (
+            'Match failed. Matchee: %r\n'
+            'Matcher: %s\n'
+            'Difference: %s\n' % (
+                matchee,
+                matcher,
+                matcher.match(matchee).describe(),
+                ))
+        self.assertEqual(expected, str(e))
+
+    def test_verbose_unicode(self):
+        # When assertThat is given matchees or matchers that contain non-ASCII
+        # unicode strings, we can still provide a meaningful error.
+        matchee = _u('\xa7')
+        matcher = Equals(_u('a'))
+        mismatch = matcher.match(matchee)
+        expected = (
+            'Match failed. Matchee: %s\n'
+            'Matcher: %s\n'
+            'Difference: %s\n' % (
+                text_repr(matchee),
+                matcher,
+                mismatch.describe(),
+                ))
+        e = MismatchError(matchee, matcher, mismatch, True)
+        if str_is_unicode:
+            actual = str(e)
+        else:
+            actual = unicode(e)
+            # Using str() should still work, and return ascii only
+            self.assertEqual(
+                expected.replace(matchee, matchee.encode("unicode-escape")),
+                str(e).decode("ascii"))
+        self.assertEqual(expected, actual)
+
+
+class Test_BinaryMismatch(TestCase):
+    """Mismatches from binary comparisons need useful describe output"""
+
+    _long_string = "This is a longish multiline non-ascii string\n\xa7"
+    _long_b = _b(_long_string)
+    _long_u = _u(_long_string)
+
+    def test_short_objects(self):
+        o1, o2 = object(), object()
+        mismatch = _BinaryMismatch(o1, "!~", o2)
+        self.assertEqual(mismatch.describe(), "%r !~ %r" % (o1, o2))
+
+    def test_short_mixed_strings(self):
+        b, u = _b("\xa7"), _u("\xa7")
+        mismatch = _BinaryMismatch(b, "!~", u)
+        self.assertEqual(mismatch.describe(), "%r !~ %r" % (b, u))
+
+    def test_long_bytes(self):
+        one_line_b = self._long_b.replace(_b("\n"), _b(" "))
+        mismatch = _BinaryMismatch(one_line_b, "!~", self._long_b)
+        self.assertEqual(mismatch.describe(),
+            "%s:\nreference = %s\nactual = %s\n" % ("!~",
+                text_repr(one_line_b),
+                text_repr(self._long_b, multiline=True)))
+
+    def test_long_unicode(self):
+        one_line_u = self._long_u.replace("\n", " ")
+        mismatch = _BinaryMismatch(one_line_u, "!~", self._long_u)
+        self.assertEqual(mismatch.describe(),
+            "%s:\nreference = %s\nactual = %s\n" % ("!~",
+                text_repr(one_line_u),
+                text_repr(self._long_u, multiline=True)))
+
+    def test_long_mixed_strings(self):
+        mismatch = _BinaryMismatch(self._long_b, "!~", self._long_u)
+        self.assertEqual(mismatch.describe(),
+            "%s:\nreference = %s\nactual = %s\n" % ("!~",
+                text_repr(self._long_b, multiline=True),
+                text_repr(self._long_u, multiline=True)))
+
+    def test_long_bytes_and_object(self):
+        obj = object()
+        mismatch = _BinaryMismatch(self._long_b, "!~", obj)
+        self.assertEqual(mismatch.describe(),
+            "%s:\nreference = %s\nactual = %s\n" % ("!~",
+                text_repr(self._long_b, multiline=True),
+                repr(obj)))
+
+    def test_long_unicode_and_object(self):
+        obj = object()
+        mismatch = _BinaryMismatch(self._long_u, "!~", obj)
+        self.assertEqual(mismatch.describe(),
+            "%s:\nreference = %s\nactual = %s\n" % ("!~",
+                text_repr(self._long_u, multiline=True),
+                repr(obj)))
+
+
 class TestMatchersInterface(object):
 
     run_tests_with = FullStackRunTest
@@ -150,6 +274,23 @@ class TestDocTestMatchesSpecific(TestCase):
         self.assertEqual("bar\n", matcher.want)
         self.assertEqual(doctest.ELLIPSIS, matcher.flags)
 
+    def test_describe_non_ascii_bytes(self):
+        """Even with bytestrings, the mismatch should be coercible to unicode
+
+        DocTestMatches is intended for text, but the Python 2 str type also
+        permits arbitrary binary inputs. This is a slightly bogus thing to do,
+        and under Python 3 using bytes objects will reasonably raise an error.
+        """
+        header = _b("\x89PNG\r\n\x1a\n...")
+        if str_is_unicode:
+            self.assertRaises(TypeError,
+                DocTestMatches, header, doctest.ELLIPSIS)
+            return
+        matcher = DocTestMatches(header, doctest.ELLIPSIS)
+        mismatch = matcher.match(_b("GIF89a\1\0\1\0\0\0\0;"))
+        # Must be treatable as unicode text, the exact output matters less
+        self.assertTrue(unicode(mismatch.describe()))
+
 
 class TestEqualsInterface(TestCase, TestMatchersInterface):
 
@@ -552,6 +693,21 @@ class DoesNotStartWithTests(TestCase):
         mismatch = DoesNotStartWith("fo", "bo")
         self.assertEqual("'fo' does not start with 'bo'.", mismatch.describe())
 
+    def test_describe_non_ascii_unicode(self):
+        string = _u("A\xA7")
+        suffix = _u("B\xA7")
+        mismatch = DoesNotStartWith(string, suffix)
+        self.assertEqual("%s does not start with %s." % (
+            text_repr(string), text_repr(suffix)),
+            mismatch.describe())
+
+    def test_describe_non_ascii_bytes(self):
+        string = _b("A\xA7")
+        suffix = _b("B\xA7")
+        mismatch = DoesNotStartWith(string, suffix)
+        self.assertEqual("%r does not start with %r." % (string, suffix),
+            mismatch.describe())
+
 
 class StartsWithTests(TestCase):
 
@@ -559,7 +715,17 @@ class StartsWithTests(TestCase):
 
     def test_str(self):
         matcher = StartsWith("bar")
-        self.assertEqual("Starts with 'bar'.", str(matcher))
+        self.assertEqual("StartsWith('bar')", str(matcher))
+
+    def test_str_with_bytes(self):
+        b = _b("\xA7")
+        matcher = StartsWith(b)
+        self.assertEqual("StartsWith(%r)" % (b,), str(matcher))
+
+    def test_str_with_unicode(self):
+        u = _u("\xA7")
+        matcher = StartsWith(u)
+        self.assertEqual("StartsWith(%r)" % (u,), str(matcher))
 
     def test_match(self):
         matcher = StartsWith("bar")
@@ -588,6 +754,21 @@ class DoesNotEndWithTests(TestCase):
         mismatch = DoesNotEndWith("fo", "bo")
         self.assertEqual("'fo' does not end with 'bo'.", mismatch.describe())
 
+    def test_describe_non_ascii_unicode(self):
+        string = _u("A\xA7")
+        suffix = _u("B\xA7")
+        mismatch = DoesNotEndWith(string, suffix)
+        self.assertEqual("%s does not end with %s." % (
+            text_repr(string), text_repr(suffix)),
+            mismatch.describe())
+
+    def test_describe_non_ascii_bytes(self):
+        string = _b("A\xA7")
+        suffix = _b("B\xA7")
+        mismatch = DoesNotEndWith(string, suffix)
+        self.assertEqual("%r does not end with %r." % (string, suffix),
+            mismatch.describe())
+
 
 class EndsWithTests(TestCase):
 
@@ -595,7 +776,17 @@ class EndsWithTests(TestCase):
 
     def test_str(self):
         matcher = EndsWith("bar")
-        self.assertEqual("Ends with 'bar'.", str(matcher))
+        self.assertEqual("EndsWith('bar')", str(matcher))
+
+    def test_str_with_bytes(self):
+        b = _b("\xA7")
+        matcher = EndsWith(b)
+        self.assertEqual("EndsWith(%r)" % (b,), str(matcher))
+
+    def test_str_with_unicode(self):
+        u = _u("\xA7")
+        matcher = EndsWith(u)
+        self.assertEqual("EndsWith(%r)" % (u,), str(matcher))
 
     def test_match(self):
         matcher = EndsWith("arf")
@@ -712,11 +903,17 @@ class TestMatchesRegex(TestCase, TestMatchersInterface):
         ("MatchesRegex('a|b')", MatchesRegex('a|b')),
         ("MatchesRegex('a|b', re.M)", MatchesRegex('a|b', re.M)),
         ("MatchesRegex('a|b', re.I|re.M)", MatchesRegex('a|b', re.I|re.M)),
+        ("MatchesRegex(%r)" % (_b("\xA7"),), MatchesRegex(_b("\xA7"))),
+        ("MatchesRegex(%r)" % (_u("\xA7"),), MatchesRegex(_u("\xA7"))),
         ]
 
     describe_examples = [
         ("'c' does not match /a|b/", 'c', MatchesRegex('a|b')),
         ("'c' does not match /a\d/", 'c', MatchesRegex(r'a\d')),
+        ("%r does not match /\\s+\\xa7/" % (_b('c'),),
+            _b('c'), MatchesRegex(_b("\\s+\xA7"))),
+        ("%r does not match /\\s+\\xa7/" % (_u('c'),),
+            _u('c'), MatchesRegex(_u("\\s+\xA7"))),
         ]
 
 
diff --git a/lib/testtools/testtools/tests/test_testcase.py b/lib/testtools/testtools/tests/test_testcase.py
index 03457310a7..52f93c3c52 100644
--- a/lib/testtools/testtools/tests/test_testcase.py
+++ b/lib/testtools/testtools/tests/test_testcase.py
@@ -19,7 +19,10 @@ from testtools import (
     skipUnless,
     testcase,
     )
-from testtools.compat import _b
+from testtools.compat import (
+    _b,
+    _u,
+    )
 from testtools.matchers import (
     Annotate,
     DocTestMatches,
@@ -32,6 +35,7 @@ from testtools.testresult.doubles import (
     Python27TestResult,
     ExtendedTestResult,
     )
+from testtools.testresult.real import TestResult
 from testtools.tests.helpers import (
     an_exc_info,
     FullStackRunTest,
@@ -484,7 +488,7 @@ class TestAssertions(TestCase):
         matchee = 'foo'
         matcher = Equals('bar')
         expected = (
-            'Match failed. Matchee: "%s"\n'
+            'Match failed. Matchee: %r\n'
             'Matcher: %s\n'
             'Difference: %s\n' % (
                 matchee,
@@ -494,6 +498,48 @@ class TestAssertions(TestCase):
         self.assertFails(
             expected, self.assertThat, matchee, matcher, verbose=True)
 
+    def get_error_string(self, e):
+        """Get the string showing how 'e' would be formatted in test output.
+
+        This is a little bit hacky, since it's designed to give consistent
+        output regardless of Python version.
+
+        In testtools, TestResult._exc_info_to_unicode is the point of dispatch
+        between various different implementations of methods that format
+        exceptions, so that's what we have to call. However, that method cares
+        about stack traces and formats the exception class. We don't care
+        about either of these, so we take its output and parse it a little.
+        """
+        error = TestResult()._exc_info_to_unicode((e.__class__, e, None), self)
+        # We aren't at all interested in the traceback.
+        if error.startswith('Traceback (most recent call last):\n'):
+            lines = error.splitlines(True)[1:]
+            for i, line in enumerate(lines):
+                if not line.startswith(' '):
+                    break
+            error = ''.join(lines[i:])
+        # We aren't interested in how the exception type is formatted.
+        exc_class, error = error.split(': ', 1)
+        return error
+
+    def test_assertThat_verbose_unicode(self):
+        # When assertThat is given matchees or matchers that contain non-ASCII
+        # unicode strings, we can still provide a meaningful error.
+        matchee = _u('\xa7')
+        matcher = Equals(_u('a'))
+        expected = (
+            'Match failed. Matchee: %s\n'
+            'Matcher: %s\n'
+            'Difference: %s\n\n' % (
+                repr(matchee).replace("\\xa7", matchee),
+                matcher,
+                matcher.match(matchee).describe(),
+                ))
+        e = self.assertRaises(
+            self.failureException, self.assertThat, matchee, matcher,
+            verbose=True)
+        self.assertEqual(expected, self.get_error_string(e))
+
     def test_assertEqual_nice_formatting(self):
         message = "These things ought not be equal."
         a = ['apple', 'banana', 'cherry']
@@ -519,6 +565,21 @@ class TestAssertions(TestCase):
         self.assertFails(expected_error, self.assertEquals, a, b)
         self.assertFails(expected_error, self.failUnlessEqual, a, b)
 
+    def test_assertEqual_non_ascii_str_with_newlines(self):
+        message = _u("Be careful mixing unicode and bytes")
+        a = "a\n\xa7\n"
+        b = "Just a longish string so the more verbose output form is used."
+        expected_error = '\n'.join([
+            '!=:',
+            "reference = '''\\",
+            'a',
+            repr('\xa7')[1:-1],
+            "'''",
+            'actual = %r' % (b,),
+            ': ' + message,
+            ])
+        self.assertFails(expected_error, self.assertEqual, a, b, message)
+
     def test_assertIsNone(self):
         self.assertIsNone(None)