diff options
Diffstat (limited to 'lib/testtools/testtools/matchers/_doctest.py')
-rw-r--r-- | lib/testtools/testtools/matchers/_doctest.py | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/lib/testtools/testtools/matchers/_doctest.py b/lib/testtools/testtools/matchers/_doctest.py new file mode 100644 index 0000000000..41f3c003e5 --- /dev/null +++ b/lib/testtools/testtools/matchers/_doctest.py @@ -0,0 +1,104 @@ +# Copyright (c) 2009-2012 testtools developers. See LICENSE for details. + +__all__ = [ + 'DocTestMatches', + ] + +import doctest +import re + +from ..compat import str_is_unicode +from ._impl import Mismatch + + +class _NonManglingOutputChecker(doctest.OutputChecker): + """Doctest checker that works with unicode rather than mangling strings + + This is needed because current Python versions have tried to fix string + encoding related problems, but regressed the default behaviour with + unicode inputs in the process. + + In Python 2.6 and 2.7 ``OutputChecker.output_difference`` is was changed + to return a bytestring encoded as per ``sys.stdout.encoding``, or utf-8 if + that can't be determined. Worse, that encoding process happens in the + innocent looking `_indent` global function. Because the + `DocTestMismatch.describe` result may well not be destined for printing to + stdout, this is no good for us. To get a unicode return as before, the + method is monkey patched if ``doctest._encoding`` exists. + + Python 3 has a different problem. For some reason both inputs are encoded + to ascii with 'backslashreplace', making an escaped string matches its + unescaped form. Overriding the offending ``OutputChecker._toAscii`` method + is sufficient to revert this. + """ + + def _toAscii(self, s): + """Return ``s`` unchanged rather than mangling it to ascii""" + return s + + # Only do this overriding hackery if doctest has a broken _input function + if getattr(doctest, "_encoding", None) is not None: + from types import FunctionType as __F + __f = doctest.OutputChecker.output_difference.im_func + __g = dict(__f.func_globals) + def _indent(s, indent=4, _pattern=re.compile("^(?!$)", re.MULTILINE)): + """Prepend non-empty lines in ``s`` with ``indent`` number of spaces""" + return _pattern.sub(indent*" ", s) + __g["_indent"] = _indent + output_difference = __F(__f.func_code, __g, "output_difference") + del __F, __f, __g, _indent + + +class DocTestMatches(object): + """See if a string matches a doctest example.""" + + def __init__(self, example, flags=0): + """Create a DocTestMatches to match example. + + :param example: The example to match e.g. 'foo bar baz' + :param flags: doctest comparison flags to match on. e.g. + doctest.ELLIPSIS. + """ + if not example.endswith('\n'): + example += '\n' + self.want = example # required variable name by doctest. + self.flags = flags + self._checker = _NonManglingOutputChecker() + + def __str__(self): + if self.flags: + flagstr = ", flags=%d" % self.flags + else: + flagstr = "" + return 'DocTestMatches(%r%s)' % (self.want, flagstr) + + def _with_nl(self, actual): + result = self.want.__class__(actual) + if not result.endswith('\n'): + result += '\n' + return result + + def match(self, actual): + with_nl = self._with_nl(actual) + if self._checker.check_output(self.want, with_nl, self.flags): + return None + return DocTestMismatch(self, with_nl) + + def _describe_difference(self, with_nl): + return self._checker.output_difference(self, with_nl, self.flags) + + +class DocTestMismatch(Mismatch): + """Mismatch object for DocTestMatches.""" + + def __init__(self, matcher, with_nl): + self.matcher = matcher + self.with_nl = with_nl + + def describe(self): + s = self.matcher._describe_difference(self.with_nl) + if str_is_unicode or isinstance(s, unicode): + return s + # GZ 2011-08-24: This is actually pretty bogus, most C0 codes should + # be escaped, in addition to non-ascii bytes. + return s.decode("latin1").encode("ascii", "backslashreplace") |