1 files changed, 104 insertions, 0 deletions
diff --git a/lib/testtools/testtools/matchers/_doctest.py b/lib/testtools/testtools/matchers/_doctest.py
new file mode 100644
index 0000000000..41f3c003e5
--- /dev/null
+++ b/lib/testtools/testtools/matchers/_doctest.py
@@ -0,0 +1,104 @@
+# Copyright (c) 2009-2012 testtools developers. See LICENSE for details.
+
+__all__ = [
+    'DocTestMatches',
+    ]
+
+import doctest
+import re
+
+from ..compat import str_is_unicode
+from ._impl import Mismatch
+
+
+class _NonManglingOutputChecker(doctest.OutputChecker):
+    """Doctest checker that works with unicode rather than mangling strings
+
+    This is needed because current Python versions have tried to fix string
+    encoding related problems, but regressed the default behaviour with
+    unicode inputs in the process.
+
+    In Python 2.6 and 2.7 ``OutputChecker.output_difference`` is was changed
+    to return a bytestring encoded as per ``sys.stdout.encoding``, or utf-8 if
+    that can't be determined. Worse, that encoding process happens in the
+    innocent looking `_indent` global function. Because the
+    `DocTestMismatch.describe` result may well not be destined for printing to
+    stdout, this is no good for us. To get a unicode return as before, the
+    method is monkey patched if ``doctest._encoding`` exists.
+
+    Python 3 has a different problem. For some reason both inputs are encoded
+    to ascii with 'backslashreplace', making an escaped string matches its
+    unescaped form. Overriding the offending ``OutputChecker._toAscii`` method
+    is sufficient to revert this.
+    """
+
+    def _toAscii(self, s):
+        """Return ``s`` unchanged rather than mangling it to ascii"""
+        return s
+
+    # Only do this overriding hackery if doctest has a broken _input function
+    if getattr(doctest, "_encoding", None) is not None:
+        from types import FunctionType as __F
+        __f = doctest.OutputChecker.output_difference.im_func
+        __g = dict(__f.func_globals)
+        def _indent(s, indent=4, _pattern=re.compile("^(?!$)", re.MULTILINE)):
+            """Prepend non-empty lines in ``s`` with ``indent`` number of spaces"""
+            return _pattern.sub(indent*" ", s)
+        __g["_indent"] = _indent
+        output_difference = __F(__f.func_code, __g, "output_difference")
+        del __F, __f, __g, _indent
+
+
+class DocTestMatches(object):
+    """See if a string matches a doctest example."""
+
+    def __init__(self, example, flags=0):
+        """Create a DocTestMatches to match example.
+
+        :param example: The example to match e.g. 'foo bar baz'
+        :param flags: doctest comparison flags to match on. e.g.
+            doctest.ELLIPSIS.
+        """
+        if not example.endswith('\n'):
+            example += '\n'
+        self.want = example # required variable name by doctest.
+        self.flags = flags
+        self._checker = _NonManglingOutputChecker()
+
+    def __str__(self):
+        if self.flags:
+            flagstr = ", flags=%d" % self.flags
+        else:
+            flagstr = ""
+        return 'DocTestMatches(%r%s)' % (self.want, flagstr)
+
+    def _with_nl(self, actual):
+        result = self.want.__class__(actual)
+        if not result.endswith('\n'):
+            result += '\n'
+        return result
+
+    def match(self, actual):
+        with_nl = self._with_nl(actual)
+        if self._checker.check_output(self.want, with_nl, self.flags):
+            return None
+        return DocTestMismatch(self, with_nl)
+
+    def _describe_difference(self, with_nl):
+        return self._checker.output_difference(self, with_nl, self.flags)
+
+
+class DocTestMismatch(Mismatch):
+    """Mismatch object for DocTestMatches."""
+
+    def __init__(self, matcher, with_nl):
+        self.matcher = matcher
+        self.with_nl = with_nl
+
+    def describe(self):
+        s = self.matcher._describe_difference(self.with_nl)
+        if str_is_unicode or isinstance(s, unicode):
+            return s
+        # GZ 2011-08-24: This is actually pretty bogus, most C0 codes should
+        #                be escaped, in addition to non-ascii bytes.
+        return s.decode("latin1").encode("ascii", "backslashreplace")