summaryrefslogtreecommitdiff
path: root/lib/testtools/testtools/matchers/_doctest.py
blob: 41f3c003e537892a6fddad8df3cc8fe6d763827b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# Copyright (c) 2009-2012 testtools developers. See LICENSE for details.

__all__ = [
    'DocTestMatches',
    ]

import doctest
import re

from ..compat import str_is_unicode
from ._impl import Mismatch


class _NonManglingOutputChecker(doctest.OutputChecker):
    """Doctest checker that works with unicode rather than mangling strings

    This is needed because current Python versions have tried to fix string
    encoding related problems, but regressed the default behaviour with
    unicode inputs in the process.

    In Python 2.6 and 2.7 ``OutputChecker.output_difference`` is was changed
    to return a bytestring encoded as per ``sys.stdout.encoding``, or utf-8 if
    that can't be determined. Worse, that encoding process happens in the
    innocent looking `_indent` global function. Because the
    `DocTestMismatch.describe` result may well not be destined for printing to
    stdout, this is no good for us. To get a unicode return as before, the
    method is monkey patched if ``doctest._encoding`` exists.

    Python 3 has a different problem. For some reason both inputs are encoded
    to ascii with 'backslashreplace', making an escaped string matches its
    unescaped form. Overriding the offending ``OutputChecker._toAscii`` method
    is sufficient to revert this.
    """

    def _toAscii(self, s):
        """Return ``s`` unchanged rather than mangling it to ascii"""
        return s

    # Only do this overriding hackery if doctest has a broken _input function
    if getattr(doctest, "_encoding", None) is not None:
        from types import FunctionType as __F
        __f = doctest.OutputChecker.output_difference.im_func
        __g = dict(__f.func_globals)
        def _indent(s, indent=4, _pattern=re.compile("^(?!$)", re.MULTILINE)):
            """Prepend non-empty lines in ``s`` with ``indent`` number of spaces"""
            return _pattern.sub(indent*" ", s)
        __g["_indent"] = _indent
        output_difference = __F(__f.func_code, __g, "output_difference")
        del __F, __f, __g, _indent


class DocTestMatches(object):
    """See if a string matches a doctest example."""

    def __init__(self, example, flags=0):
        """Create a DocTestMatches to match example.

        :param example: The example to match e.g. 'foo bar baz'
        :param flags: doctest comparison flags to match on. e.g.
            doctest.ELLIPSIS.
        """
        if not example.endswith('\n'):
            example += '\n'
        self.want = example # required variable name by doctest.
        self.flags = flags
        self._checker = _NonManglingOutputChecker()

    def __str__(self):
        if self.flags:
            flagstr = ", flags=%d" % self.flags
        else:
            flagstr = ""
        return 'DocTestMatches(%r%s)' % (self.want, flagstr)

    def _with_nl(self, actual):
        result = self.want.__class__(actual)
        if not result.endswith('\n'):
            result += '\n'
        return result

    def match(self, actual):
        with_nl = self._with_nl(actual)
        if self._checker.check_output(self.want, with_nl, self.flags):
            return None
        return DocTestMismatch(self, with_nl)

    def _describe_difference(self, with_nl):
        return self._checker.output_difference(self, with_nl, self.flags)


class DocTestMismatch(Mismatch):
    """Mismatch object for DocTestMatches."""

    def __init__(self, matcher, with_nl):
        self.matcher = matcher
        self.with_nl = with_nl

    def describe(self):
        s = self.matcher._describe_difference(self.with_nl)
        if str_is_unicode or isinstance(s, unicode):
            return s
        # GZ 2011-08-24: This is actually pretty bogus, most C0 codes should
        #                be escaped, in addition to non-ascii bytes.
        return s.decode("latin1").encode("ascii", "backslashreplace")