From eae3e6230facf84f59b7d3a6582131d664372f1f Mon Sep 17 00:00:00 2001 From: Jelmer Vernooij Date: Thu, 8 Dec 2011 21:22:35 +0100 Subject: Add pep8. --- lib/pep8/pep8.py | 1360 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1360 insertions(+) create mode 100755 lib/pep8/pep8.py (limited to 'lib/pep8') diff --git a/lib/pep8/pep8.py b/lib/pep8/pep8.py new file mode 100755 index 0000000000..227a9a3ac4 --- /dev/null +++ b/lib/pep8/pep8.py @@ -0,0 +1,1360 @@ +#!/usr/bin/python +# pep8.py - Check Python source code formatting, according to PEP 8 +# Copyright (C) 2006 Johann C. Rocholl +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation files +# (the "Software"), to deal in the Software without restriction, +# including without limitation the rights to use, copy, modify, merge, +# publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +""" +Check Python source code formatting, according to PEP 8: +http://www.python.org/dev/peps/pep-0008/ + +For usage and a list of options, try this: +$ python pep8.py -h + +This program and its regression test suite live here: +http://github.com/jcrocholl/pep8 + +Groups of errors and warnings: +E errors +W warnings +100 indentation +200 whitespace +300 blank lines +400 imports +500 line length +600 deprecation +700 statements + +You can add checks to this program by writing plugins. Each plugin is +a simple function that is called for each line of source code, either +physical or logical. + +Physical line: +- Raw line of text from the input file. + +Logical line: +- Multi-line statements converted to a single line. +- Stripped left and right. +- Contents of strings replaced with 'xxx' of same length. +- Comments removed. + +The check function requests physical or logical lines by the name of +the first argument: + +def maximum_line_length(physical_line) +def extraneous_whitespace(logical_line) +def blank_lines(logical_line, blank_lines, indent_level, line_number) + +The last example above demonstrates how check plugins can request +additional information with extra arguments. All attributes of the +Checker object are available. Some examples: + +lines: a list of the raw lines from the input file +tokens: the tokens that contribute to this logical line +line_number: line number in the input file +blank_lines: blank lines before this one +indent_char: first indentation character in this file (' ' or '\t') +indent_level: indentation (with tabs expanded to multiples of 8) +previous_indent_level: indentation on previous line +previous_logical: previous logical line + +The docstring of each check function shall be the relevant part of +text from PEP 8. It is printed if the user enables --show-pep8. +Several docstrings contain examples directly from the PEP 8 document. + +Okay: spam(ham[1], {eggs: 2}) +E201: spam( ham[1], {eggs: 2}) + +These examples are verified automatically when pep8.py is run with the +--doctest option. You can add examples for your own check functions. +The format is simple: "Okay" or error/warning code followed by colon +and space, the rest of the line is example source code. If you put 'r' +before the docstring, you can use \n for newline, \t for tab and \s +for space. + +""" + +__version__ = '0.5.1dev' + +import os +import sys +import re +import time +import inspect +import keyword +import tokenize +from optparse import OptionParser +from fnmatch import fnmatch +try: + frozenset +except NameError: + from sets import ImmutableSet as frozenset + + +DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git' +DEFAULT_IGNORE = 'E24' +MAX_LINE_LENGTH = 79 + +INDENT_REGEX = re.compile(r'([ \t]*)') +RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)') +SELFTEST_REGEX = re.compile(r'(Okay|[EW]\d{3}):\s(.*)') +ERRORCODE_REGEX = re.compile(r'[EW]\d{3}') +DOCSTRING_REGEX = re.compile(r'u?r?["\']') +WHITESPACE_AROUND_OPERATOR_REGEX = \ + re.compile('([^\w\s]*)\s*(\t| )\s*([^\w\s]*)') +EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]') +WHITESPACE_AROUND_NAMED_PARAMETER_REGEX = \ + re.compile(r'[()]|\s=[^=]|[^=!<>]=\s') + + +WHITESPACE = ' \t' + +BINARY_OPERATORS = frozenset(['**=', '*=', '+=', '-=', '!=', '<>', + '%=', '^=', '&=', '|=', '==', '/=', '//=', '<=', '>=', '<<=', '>>=', + '%', '^', '&', '|', '=', '/', '//', '<', '>', '<<']) +UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-']) +OPERATORS = BINARY_OPERATORS | UNARY_OPERATORS +SKIP_TOKENS = frozenset([tokenize.COMMENT, tokenize.NL, tokenize.INDENT, + tokenize.DEDENT, tokenize.NEWLINE]) +E225NOT_KEYWORDS = (frozenset(keyword.kwlist + ['print']) - + frozenset(['False', 'None', 'True'])) +BENCHMARK_KEYS = ('directories', 'files', 'logical lines', 'physical lines') + +options = None +args = None + + +############################################################################## +# Plugins (check functions) for physical lines +############################################################################## + + +def tabs_or_spaces(physical_line, indent_char): + r""" + Never mix tabs and spaces. + + The most popular way of indenting Python is with spaces only. The + second-most popular way is with tabs only. Code indented with a mixture + of tabs and spaces should be converted to using spaces exclusively. When + invoking the Python command line interpreter with the -t option, it issues + warnings about code that illegally mixes tabs and spaces. When using -tt + these warnings become errors. These options are highly recommended! + + Okay: if a == 0:\n a = 1\n b = 1 + E101: if a == 0:\n a = 1\n\tb = 1 + """ + indent = INDENT_REGEX.match(physical_line).group(1) + for offset, char in enumerate(indent): + if char != indent_char: + return offset, "E101 indentation contains mixed spaces and tabs" + + +def tabs_obsolete(physical_line): + r""" + For new projects, spaces-only are strongly recommended over tabs. Most + editors have features that make this easy to do. + + Okay: if True:\n return + W191: if True:\n\treturn + """ + indent = INDENT_REGEX.match(physical_line).group(1) + if indent.count('\t'): + return indent.index('\t'), "W191 indentation contains tabs" + + +def trailing_whitespace(physical_line): + r""" + JCR: Trailing whitespace is superfluous. + FBM: Except when it occurs as part of a blank line (i.e. the line is + nothing but whitespace). According to Python docs[1] a line with only + whitespace is considered a blank line, and is to be ignored. However, + matching a blank line to its indentation level avoids mistakenly + terminating a multi-line statement (e.g. class declaration) when + pasting code into the standard Python interpreter. + + [1] http://docs.python.org/reference/lexical_analysis.html#blank-lines + + The warning returned varies on whether the line itself is blank, for easier + filtering for those who want to indent their blank lines. + + Okay: spam(1) + W291: spam(1)\s + W293: class Foo(object):\n \n bang = 12 + """ + physical_line = physical_line.rstrip('\n') # chr(10), newline + physical_line = physical_line.rstrip('\r') # chr(13), carriage return + physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L + stripped = physical_line.rstrip() + if physical_line != stripped: + if stripped: + return len(stripped), "W291 trailing whitespace" + else: + return 0, "W293 blank line contains whitespace" + + +def trailing_blank_lines(physical_line, lines, line_number): + r""" + JCR: Trailing blank lines are superfluous. + + Okay: spam(1) + W391: spam(1)\n + """ + if physical_line.strip() == '' and line_number == len(lines): + return 0, "W391 blank line at end of file" + + +def missing_newline(physical_line): + """ + JCR: The last line should have a newline. + """ + if physical_line.rstrip() == physical_line: + return len(physical_line), "W292 no newline at end of file" + + +def maximum_line_length(physical_line): + """ + Limit all lines to a maximum of 79 characters. + + There are still many devices around that are limited to 80 character + lines; plus, limiting windows to 80 characters makes it possible to have + several windows side-by-side. The default wrapping on such devices looks + ugly. Therefore, please limit all lines to a maximum of 79 characters. + For flowing long blocks of text (docstrings or comments), limiting the + length to 72 characters is recommended. + """ + line = physical_line.rstrip() + length = len(line) + if length > MAX_LINE_LENGTH: + try: + # The line could contain multi-byte characters + if not hasattr(line, 'decode'): # Python 3 + line = line.encode('latin-1') + length = len(line.decode('utf-8')) + except UnicodeDecodeError: + pass + if length > MAX_LINE_LENGTH: + return MAX_LINE_LENGTH, "E501 line too long (%d characters)" % length + + +############################################################################## +# Plugins (check functions) for logical lines +############################################################################## + + +def blank_lines(logical_line, blank_lines, indent_level, line_number, + previous_logical, previous_indent_level, + blank_lines_before_comment): + r""" + Separate top-level function and class definitions with two blank lines. + + Method definitions inside a class are separated by a single blank line. + + Extra blank lines may be used (sparingly) to separate groups of related + functions. Blank lines may be omitted between a bunch of related + one-liners (e.g. a set of dummy implementations). + + Use blank lines in functions, sparingly, to indicate logical sections. + + Okay: def a():\n pass\n\n\ndef b():\n pass + Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass + + E301: class Foo:\n b = 0\n def bar():\n pass + E302: def a():\n pass\n\ndef b(n):\n pass + E303: def a():\n pass\n\n\n\ndef b(n):\n pass + E303: def a():\n\n\n\n pass + E304: @decorator\n\ndef a():\n pass + """ + if line_number == 1: + return # Don't expect blank lines before the first line + max_blank_lines = max(blank_lines, blank_lines_before_comment) + if previous_logical.startswith('@'): + if max_blank_lines: + return 0, "E304 blank lines found after function decorator" + elif max_blank_lines > 2 or (indent_level and max_blank_lines == 2): + return 0, "E303 too many blank lines (%d)" % max_blank_lines + elif (logical_line.startswith('def ') or + logical_line.startswith('class ') or + logical_line.startswith('@')): + if indent_level: + if not (max_blank_lines or previous_indent_level < indent_level or + DOCSTRING_REGEX.match(previous_logical)): + return 0, "E301 expected 1 blank line, found 0" + elif max_blank_lines != 2: + return 0, "E302 expected 2 blank lines, found %d" % max_blank_lines + + +def extraneous_whitespace(logical_line): + """ + Avoid extraneous whitespace in the following situations: + + - Immediately inside parentheses, brackets or braces. + + - Immediately before a comma, semicolon, or colon. + + Okay: spam(ham[1], {eggs: 2}) + E201: spam( ham[1], {eggs: 2}) + E201: spam(ham[ 1], {eggs: 2}) + E201: spam(ham[1], { eggs: 2}) + E202: spam(ham[1], {eggs: 2} ) + E202: spam(ham[1 ], {eggs: 2}) + E202: spam(ham[1], {eggs: 2 }) + + E203: if x == 4: print x, y; x, y = y , x + E203: if x == 4: print x, y ; x, y = y, x + E203: if x == 4 : print x, y; x, y = y, x + """ + line = logical_line + for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line): + text = match.group() + char = text.strip() + found = match.start() + if text == char + ' ' and char in '([{': + return found + 1, "E201 whitespace after '%s'" % char + if text == ' ' + char and line[found - 1] != ',': + if char in '}])': + return found, "E202 whitespace before '%s'" % char + if char in ',;:': + return found, "E203 whitespace before '%s'" % char + + +def missing_whitespace(logical_line): + """ + JCR: Each comma, semicolon or colon should be followed by whitespace. + + Okay: [a, b] + Okay: (3,) + Okay: a[1:4] + Okay: a[:4] + Okay: a[1:] + Okay: a[1:4:2] + E231: ['a','b'] + E231: foo(bar,baz) + """ + line = logical_line + for index in range(len(line) - 1): + char = line[index] + if char in ',;:' and line[index + 1] not in WHITESPACE: + before = line[:index] + if char == ':' and before.count('[') > before.count(']'): + continue # Slice syntax, no space required + if char == ',' and line[index + 1] == ')': + continue # Allow tuple with only one element: (3,) + return index, "E231 missing whitespace after '%s'" % char + + +def indentation(logical_line, previous_logical, indent_char, + indent_level, previous_indent_level): + r""" + Use 4 spaces per indentation level. + + For really old code that you don't want to mess up, you can continue to + use 8-space tabs. + + Okay: a = 1 + Okay: if a == 0:\n a = 1 + E111: a = 1 + + Okay: for item in items:\n pass + E112: for item in items:\npass + + Okay: a = 1\nb = 2 + E113: a = 1\n b = 2 + """ + if indent_char == ' ' and indent_level % 4: + return 0, "E111 indentation is not a multiple of four" + indent_expect = previous_logical.endswith(':') + if indent_expect and indent_level <= previous_indent_level: + return 0, "E112 expected an indented block" + if indent_level > previous_indent_level and not indent_expect: + return 0, "E113 unexpected indentation" + + +def whitespace_before_parameters(logical_line, tokens): + """ + Avoid extraneous whitespace in the following situations: + + - Immediately before the open parenthesis that starts the argument + list of a function call. + + - Immediately before the open parenthesis that starts an indexing or + slicing. + + Okay: spam(1) + E211: spam (1) + + Okay: dict['key'] = list[index] + E211: dict ['key'] = list[index] + E211: dict['key'] = list [index] + """ + prev_type = tokens[0][0] + prev_text = tokens[0][1] + prev_end = tokens[0][3] + for index in range(1, len(tokens)): + token_type, text, start, end, line = tokens[index] + if (token_type == tokenize.OP and + text in '([' and + start != prev_end and + (prev_type == tokenize.NAME or prev_text in '}])') and + # Syntax "class A (B):" is allowed, but avoid it + (index < 2 or tokens[index - 2][1] != 'class') and + # Allow "return (a.foo for a in range(5))" + (not keyword.iskeyword(prev_text))): + return prev_end, "E211 whitespace before '%s'" % text + prev_type = token_type + prev_text = text + prev_end = end + + +def whitespace_around_operator(logical_line): + """ + Avoid extraneous whitespace in the following situations: + + - More than one space around an assignment (or other) operator to + align it with another. + + Okay: a = 12 + 3 + E221: a = 4 + 5 + E222: a = 4 + 5 + E223: a = 4\t+ 5 + E224: a = 4 +\t5 + """ + for match in WHITESPACE_AROUND_OPERATOR_REGEX.finditer(logical_line): + before, whitespace, after = match.groups() + tab = whitespace == '\t' + offset = match.start(2) + if before in OPERATORS: + return offset, (tab and "E224 tab after operator" or + "E222 multiple spaces after operator") + elif after in OPERATORS: + return offset, (tab and "E223 tab before operator" or + "E221 multiple spaces before operator") + + +def missing_whitespace_around_operator(logical_line, tokens): + r""" + - Always surround these binary operators with a single space on + either side: assignment (=), augmented assignment (+=, -= etc.), + comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not), + Booleans (and, or, not). + + - Use spaces around arithmetic operators. + + Okay: i = i + 1 + Okay: submitted += 1 + Okay: x = x * 2 - 1 + Okay: hypot2 = x * x + y * y + Okay: c = (a + b) * (a - b) + Okay: foo(bar, key='word', *args, **kwargs) + Okay: baz(**kwargs) + Okay: negative = -1 + Okay: spam(-1) + Okay: alpha[:-i] + Okay: if not -5 < x < +5:\n pass + Okay: lambda *args, **kw: (args, kw) + + E225: i=i+1 + E225: submitted +=1 + E225: x = x*2 - 1 + E225: hypot2 = x*x + y*y + E225: c = (a+b) * (a-b) + E225: c = alpha -4 + E225: z = x **y + """ + parens = 0 + need_space = False + prev_type = tokenize.OP + prev_text = prev_end = None + for token_type, text, start, end, line in tokens: + if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN): + # ERRORTOKEN is triggered by backticks in Python 3000 + continue + if text in ('(', 'lambda'): + parens += 1 + elif text == ')': + parens -= 1 + if need_space: + if start != prev_end: + need_space = False + elif text == '>' and prev_text == '<': + # Tolerate the "<>" operator, even if running Python 3 + pass + else: + return prev_end, "E225 missing whitespace around operator" + elif token_type == tokenize.OP and prev_end is not None: + if text == '=' and parens: + # Allow keyword args or defaults: foo(bar=None). + pass + elif text in BINARY_OPERATORS: + need_space = True + elif text in UNARY_OPERATORS: + # Allow unary operators: -123, -x, +1. + # Allow argument unpacking: foo(*args, **kwargs). + if prev_type == tokenize.OP: + if prev_text in '}])': + need_space = True + elif prev_type == tokenize.NAME: + if prev_text not in E225NOT_KEYWORDS: + need_space = True + else: + need_space = True + if need_space and start == prev_end: + return prev_end, "E225 missing whitespace around operator" + prev_type = token_type + prev_text = text + prev_end = end + + +def whitespace_around_comma(logical_line): + """ + Avoid extraneous whitespace in the following situations: + + - More than one space around an assignment (or other) operator to + align it with another. + + JCR: This should also be applied around comma etc. + Note: these checks are disabled by default + + Okay: a = (1, 2) + E241: a = (1, 2) + E242: a = (1,\t2) + """ + line = logical_line + for separator in ',;:': + found = line.find(separator + ' ') + if found > -1: + return found + 1, "E241 multiple spaces after '%s'" % separator + found = line.find(separator + '\t') + if found > -1: + return found + 1, "E242 tab after '%s'" % separator + + +def whitespace_around_named_parameter_equals(logical_line): + """ + Don't use spaces around the '=' sign when used to indicate a + keyword argument or a default parameter value. + + Okay: def complex(real, imag=0.0): + Okay: return magic(r=real, i=imag) + Okay: boolean(a == b) + Okay: boolean(a != b) + Okay: boolean(a <= b) + Okay: boolean(a >= b) + + E251: def complex(real, imag = 0.0): + E251: return magic(r = real, i = imag) + """ + parens = 0 + for match in WHITESPACE_AROUND_NAMED_PARAMETER_REGEX.finditer( + logical_line): + text = match.group() + if parens and len(text) == 3: + issue = "E251 no spaces around keyword / parameter equals" + return match.start(), issue + if text == '(': + parens += 1 + elif text == ')': + parens -= 1 + + +def whitespace_before_inline_comment(logical_line, tokens): + """ + Separate inline comments by at least two spaces. + + An inline comment is a comment on the same line as a statement. Inline + comments should be separated by at least two spaces from the statement. + They should start with a # and a single space. + + Okay: x = x + 1 # Increment x + Okay: x = x + 1 # Increment x + E261: x = x + 1 # Increment x + E262: x = x + 1 #Increment x + E262: x = x + 1 # Increment x + """ + prev_end = (0, 0) + for token_type, text, start, end, line in tokens: + if token_type == tokenize.NL: + continue + if token_type == tokenize.COMMENT: + if not line[:start[1]].strip(): + continue + if prev_end[0] == start[0] and start[1] < prev_end[1] + 2: + return (prev_end, + "E261 at least two spaces before inline comment") + if (len(text) > 1 and text.startswith('# ') + or not text.startswith('# ')): + return start, "E262 inline comment should start with '# '" + else: + prev_end = end + + +def imports_on_separate_lines(logical_line): + r""" + Imports should usually be on separate lines. + + Okay: import os\nimport sys + E401: import sys, os + + Okay: from subprocess import Popen, PIPE + Okay: from myclas import MyClass + Okay: from foo.bar.yourclass import YourClass + Okay: import myclass + Okay: import foo.bar.yourclass + """ + line = logical_line + if line.startswith('import '): + found = line.find(',') + if found > -1: + return found, "E401 multiple imports on one line" + + +def compound_statements(logical_line): + r""" + Compound statements (multiple statements on the same line) are + generally discouraged. + + While sometimes it's okay to put an if/for/while with a small body + on the same line, never do this for multi-clause statements. Also + avoid folding such long lines! + + Okay: if foo == 'blah':\n do_blah_thing() + Okay: do_one() + Okay: do_two() + Okay: do_three() + + E701: if foo == 'blah': do_blah_thing() + E701: for x in lst: total += x + E701: while t < 10: t = delay() + E701: if foo == 'blah': do_blah_thing() + E701: else: do_non_blah_thing() + E701: try: something() + E701: finally: cleanup() + E701: if foo == 'blah': one(); two(); three() + + E702: do_one(); do_two(); do_three() + """ + line = logical_line + found = line.find(':') + if -1 < found < len(line) - 1: + before = line[:found] + if (before.count('{') <= before.count('}') and # {'a': 1} (dict) + before.count('[') <= before.count(']') and # [1:2] (slice) + not re.search(r'\blambda\b', before)): # lambda x: x + return found, "E701 multiple statements on one line (colon)" + found = line.find(';') + if -1 < found: + return found, "E702 multiple statements on one line (semicolon)" + + +def python_3000_has_key(logical_line): + """ + The {}.has_key() method will be removed in the future version of + Python. Use the 'in' operation instead, like: + d = {"a": 1, "b": 2} + if "b" in d: + print d["b"] + """ + pos = logical_line.find('.has_key(') + if pos > -1: + return pos, "W601 .has_key() is deprecated, use 'in'" + + +def python_3000_raise_comma(logical_line): + """ + When raising an exception, use "raise ValueError('message')" + instead of the older form "raise ValueError, 'message'". + + The paren-using form is preferred because when the exception arguments + are long or include string formatting, you don't need to use line + continuation characters thanks to the containing parentheses. The older + form will be removed in Python 3000. + """ + match = RAISE_COMMA_REGEX.match(logical_line) + if match: + return match.start(1), "W602 deprecated form of raising exception" + + +def python_3000_not_equal(logical_line): + """ + != can also be written <>, but this is an obsolete usage kept for + backwards compatibility only. New code should always use !=. + The older syntax is removed in Python 3000. + """ + pos = logical_line.find('<>') + if pos > -1: + return pos, "W603 '<>' is deprecated, use '!='" + + +def python_3000_backticks(logical_line): + """ + Backticks are removed in Python 3000. + Use repr() instead. + """ + pos = logical_line.find('`') + if pos > -1: + return pos, "W604 backticks are deprecated, use 'repr()'" + + +############################################################################## +# Helper functions +############################################################################## + + +if '' == ''.encode(): + # Python 2: implicit encoding. + def readlines(filename): + return open(filename).readlines() +else: + # Python 3: decode to latin-1. + # This function is lazy, it does not read the encoding declaration. + # XXX: use tokenize.detect_encoding() + def readlines(filename): + return open(filename, encoding='latin-1').readlines() + + +def expand_indent(line): + """ + Return the amount of indentation. + Tabs are expanded to the next multiple of 8. + + >>> expand_indent(' ') + 4 + >>> expand_indent('\\t') + 8 + >>> expand_indent(' \\t') + 8 + >>> expand_indent(' \\t') + 8 + >>> expand_indent(' \\t') + 16 + """ + result = 0 + for char in line: + if char == '\t': + result = result // 8 * 8 + 8 + elif char == ' ': + result += 1 + else: + break + return result + + +def mute_string(text): + """ + Replace contents with 'xxx' to prevent syntax matching. + + >>> mute_string('"abc"') + '"xxx"' + >>> mute_string("'''abc'''") + "'''xxx'''" + >>> mute_string("r'abc'") + "r'xxx'" + """ + start = 1 + end = len(text) - 1 + # String modifiers (e.g. u or r) + if text.endswith('"'): + start += text.index('"') + elif text.endswith("'"): + start += text.index("'") + # Triple quotes + if text.endswith('"""') or text.endswith("'''"): + start += 2 + end -= 2 + return text[:start] + 'x' * (end - start) + text[end:] + + +def message(text): + """Print a message.""" + # print >> sys.stderr, options.prog + ': ' + text + # print >> sys.stderr, text + print(text) + + +############################################################################## +# Framework to run all checks +############################################################################## + + +def find_checks(argument_name): + """ + Find all globally visible functions where the first argument name + starts with argument_name. + """ + checks = [] + for name, function in globals().items(): + if not inspect.isfunction(function): + continue + args = inspect.getargspec(function)[0] + if args and args[0].startswith(argument_name): + codes = ERRORCODE_REGEX.findall(inspect.getdoc(function) or '') + for code in codes or ['']: + if not code or not ignore_code(code): + checks.append((name, function, args)) + break + checks.sort() + return checks + + +class Checker(object): + """ + Load a Python source file, tokenize it, check coding style. + """ + + def __init__(self, filename, lines=None): + self.filename = filename + if filename is None: + self.filename = 'stdin' + self.lines = lines or [] + elif lines is None: + self.lines = readlines(filename) + else: + self.lines = lines + options.counters['physical lines'] += len(self.lines) + + def readline(self): + """ + Get the next line from the input buffer. + """ + self.line_number += 1 + if self.line_number > len(self.lines): + return '' + return self.lines[self.line_number - 1] + + def readline_check_physical(self): + """ + Check and return the next physical line. This method can be + used to feed tokenize.generate_tokens. + """ + line = self.readline() + if line: + self.check_physical(line) + return line + + def run_check(self, check, argument_names): + """ + Run a check plugin. + """ + arguments = [] + for name in argument_names: + arguments.append(getattr(self, name)) + return check(*arguments) + + def check_physical(self, line): + """ + Run all physical checks on a raw input line. + """ + self.physical_line = line + if self.indent_char is None and len(line) and line[0] in ' \t': + self.indent_char = line[0] + for name, check, argument_names in options.physical_checks: + result = self.run_check(check, argument_names) + if result is not None: + offset, text = result + self.report_error(self.line_number, offset, text, check) + + def build_tokens_line(self): + """ + Build a logical line from tokens. + """ + self.mapping = [] + logical = [] + length = 0 + previous = None + for token in self.tokens: + token_type, text = token[0:2] + if token_type in SKIP_TOKENS: + continue + if token_type == tokenize.STRING: + text = mute_string(text) + if previous: + end_line, end = previous[3] + start_line, start = token[2] + if end_line != start_line: # different row + prev_text = self.lines[end_line - 1][end - 1] + if prev_text == ',' or (prev_text not in '{[(' + and text not in '}])'): + logical.append(' ') + length += 1 + elif end != start: # different column + fill = self.lines[end_line - 1][end:start] + logical.append(fill) + length += len(fill) + self.mapping.append((length, token)) + logical.append(text) + length += len(text) + previous = token + self.logical_line = ''.join(logical) + assert self.logical_line.lstrip() == self.logical_line + assert self.logical_line.rstrip() == self.logical_line + + def check_logical(self): + """ + Build a line from tokens and run all logical checks on it. + """ + options.counters['logical lines'] += 1 + self.build_tokens_line() + first_line = self.lines[self.mapping[0][1][2][0] - 1] + indent = first_line[:self.mapping[0][1][2][1]] + self.previous_indent_level = self.indent_level + self.indent_level = expand_indent(indent) + if options.verbose >= 2: + print(self.logical_line[:80].rstrip()) + for name, check, argument_names in options.logical_checks: + if options.verbose >= 4: + print(' ' + name) + result = self.run_check(check, argument_names) + if result is not None: + offset, text = result + if isinstance(offset, tuple): + original_number, original_offset = offset + else: + for token_offset, token in self.mapping: + if offset >= token_offset: + original_number = token[2][0] + original_offset = (token[2][1] + + offset - token_offset) + self.report_error(original_number, original_offset, + text, check) + self.previous_logical = self.logical_line + + def check_all(self, expected=None, line_offset=0): + """ + Run all checks on the input file. + """ + self.expected = expected or () + self.line_offset = line_offset + self.line_number = 0 + self.file_errors = 0 + self.indent_char = None + self.indent_level = 0 + self.previous_logical = '' + self.blank_lines = 0 + self.blank_lines_before_comment = 0 + self.tokens = [] + parens = 0 + for token in tokenize.generate_tokens(self.readline_check_physical): + if options.verbose >= 3: + if token[2][0] == token[3][0]: + pos = '[%s:%s]' % (token[2][1] or '', token[3][1]) + else: + pos = 'l.%s' % token[3][0] + print('l.%s\t%s\t%s\t%r' % + (token[2][0], pos, tokenize.tok_name[token[0]], token[1])) + self.tokens.append(token) + token_type, text = token[0:2] + if token_type == tokenize.OP and text in '([{': + parens += 1 + if token_type == tokenize.OP and text in '}])': + parens -= 1 + if token_type == tokenize.NEWLINE and not parens: + self.check_logical() + self.blank_lines = 0 + self.blank_lines_before_comment = 0 + self.tokens = [] + if token_type == tokenize.NL and not parens: + if len(self.tokens) <= 1: + # The physical line contains only this token. + self.blank_lines += 1 + self.tokens = [] + if token_type == tokenize.COMMENT: + source_line = token[4] + token_start = token[2][1] + if source_line[:token_start].strip() == '': + self.blank_lines_before_comment = max(self.blank_lines, + self.blank_lines_before_comment) + self.blank_lines = 0 + if text.endswith('\n') and not parens: + # The comment also ends a physical line. This works around + # Python < 2.6 behaviour, which does not generate NL after + # a comment which is on a line by itself. + self.tokens = [] + return self.file_errors + + def report_error(self, line_number, offset, text, check): + """ + Report an error, according to options. + """ + code = text[:4] + if ignore_code(code): + return + if options.quiet == 1 and not self.file_errors: + message(self.filename) + if code in options.counters: + options.counters[code] += 1 + else: + options.counters[code] = 1 + options.messages[code] = text[5:] + if options.quiet or code in self.expected: + # Don't care about expected errors or warnings + return + self.file_errors += 1 + if options.counters[code] == 1 or options.repeat: + message("%s:%s:%d: %s" % + (self.filename, self.line_offset + line_number, + offset + 1, text)) + if options.show_source: + line = self.lines[line_number - 1] + message(line.rstrip()) + message(' ' * offset + '^') + if options.show_pep8: + message(check.__doc__.lstrip('\n').rstrip()) + + +def input_file(filename): + """ + Run all checks on a Python source file. + """ + if options.verbose: + message('checking ' + filename) + errors = Checker(filename).check_all() + + +def input_dir(dirname, runner=None): + """ + Check all Python source files in this directory and all subdirectories. + """ + dirname = dirname.rstrip('/') + if excluded(dirname): + return + if runner is None: + runner = input_file + for root, dirs, files in os.walk(dirname): + if options.verbose: + message('directory ' + root) + options.counters['directories'] += 1 + dirs.sort() + for subdir in dirs: + if excluded(subdir): + dirs.remove(subdir) + files.sort() + for filename in files: + if filename_match(filename) and not excluded(filename): + options.counters['files'] += 1 + runner(os.path.join(root, filename)) + + +def excluded(filename): + """ + Check if options.exclude contains a pattern that matches filename. + """ + basename = os.path.basename(filename) + for pattern in options.exclude: + if fnmatch(basename, pattern): + # print basename, 'excluded because it matches', pattern + return True + + +def filename_match(filename): + """ + Check if options.filename contains a pattern that matches filename. + If options.filename is unspecified, this always returns True. + """ + if not options.filename: + return True + for pattern in options.filename: + if fnmatch(filename, pattern): + return True + + +def ignore_code(code): + """ + Check if options.ignore contains a prefix of the error code. + If options.select contains a prefix of the error code, do not ignore it. + """ + for select in options.select: + if code.startswith(select): + return False + for ignore in options.ignore: + if code.startswith(ignore): + return True + + +def reset_counters(): + for key in list(options.counters.keys()): + if key not in BENCHMARK_KEYS: + del options.counters[key] + options.messages = {} + + +def get_error_statistics(): + """Get error statistics.""" + return get_statistics("E") + + +def get_warning_statistics(): + """Get warning statistics.""" + return get_statistics("W") + + +def get_statistics(prefix=''): + """ + Get statistics for message codes that start with the prefix. + + prefix='' matches all errors and warnings + prefix='E' matches all errors + prefix='W' matches all warnings + prefix='E4' matches all errors that have to do with imports + """ + stats = [] + keys = list(options.messages.keys()) + keys.sort() + for key in keys: + if key.startswith(prefix): + stats.append('%-7s %s %s' % + (options.counters[key], key, options.messages[key])) + return stats + + +def get_count(prefix=''): + """Return the total count of errors and warnings.""" + keys = list(options.messages.keys()) + count = 0 + for key in keys: + if key.startswith(prefix): + count += options.counters[key] + return count + + +def print_statistics(prefix=''): + """Print overall statistics (number of errors and warnings).""" + for line in get_statistics(prefix): + print(line) + + +def print_benchmark(elapsed): + """ + Print benchmark numbers. + """ + print('%-7.2f %s' % (elapsed, 'seconds elapsed')) + for key in BENCHMARK_KEYS: + print('%-7d %s per second (%d total)' % ( + options.counters[key] / elapsed, key, + options.counters[key])) + + +def run_tests(filename): + """ + Run all the tests from a file. + + A test file can provide many tests. Each test starts with a declaration. + This declaration is a single line starting with '#:'. + It declares codes of expected failures, separated by spaces or 'Okay' + if no failure is expected. + If the file does not contain such declaration, it should pass all tests. + If the declaration is empty, following lines are not checked, until next + declaration. + + Examples: + + * Only E224 and W701 are expected: #: E224 W701 + * Following example is conform: #: Okay + * Don't check these lines: #: + """ + lines = readlines(filename) + ['#:\n'] + line_offset = 0 + codes = ['Okay'] + testcase = [] + for index, line in enumerate(lines): + if not line.startswith('#:'): + if codes: + # Collect the lines of the test case + testcase.append(line) + continue + if codes and index > 0: + label = '%s:%s:1' % (filename, line_offset + 1) + codes = [c for c in codes if c != 'Okay'] + # Run the checker + errors = Checker(filename, testcase).check_all(codes, line_offset) + # Check if the expected errors were found + for code in codes: + if not options.counters.get(code): + errors += 1 + message('%s: error %s not found' % (label, code)) + if options.verbose and not errors: + message('%s: passed (%s)' % (label, ' '.join(codes))) + # Keep showing errors for multiple tests + reset_counters() + # output the real line numbers + line_offset = index + # configure the expected errors + codes = line.split()[1:] + # empty the test case buffer + del testcase[:] + + +def selftest(): + """ + Test all check functions with test cases in docstrings. + """ + count_passed = 0 + count_failed = 0 + checks = options.physical_checks + options.logical_checks + for name, check, argument_names in checks: + for line in check.__doc__.splitlines(): + line = line.lstrip() + match = SELFTEST_REGEX.match(line) + if match is None: + continue + code, source = match.groups() + checker = Checker(None) + for part in source.split(r'\n'): + part = part.replace(r'\t', '\t') + part = part.replace(r'\s', ' ') + checker.lines.append(part + '\n') + options.quiet = 2 + checker.check_all() + error = None + if code == 'Okay': + if len(options.counters) > len(BENCHMARK_KEYS): + codes = [key for key in options.counters.keys() + if key not in BENCHMARK_KEYS] + error = "incorrectly found %s" % ', '.join(codes) + elif not options.counters.get(code): + error = "failed to find %s" % code + # Reset the counters + reset_counters() + if not error: + count_passed += 1 + else: + count_failed += 1 + if len(checker.lines) == 1: + print("pep8.py: %s: %s" % + (error, checker.lines[0].rstrip())) + else: + print("pep8.py: %s:" % error) + for line in checker.lines: + print(line.rstrip()) + if options.verbose: + print("%d passed and %d failed." % (count_passed, count_failed)) + if count_failed: + print("Test failed.") + else: + print("Test passed.") + + +def process_options(arglist=None): + """ + Process options passed either via arglist or via command line args. + """ + global options, args + parser = OptionParser(version=__version__, + usage="%prog [options] input ...") + parser.add_option('-v', '--verbose', default=0, action='count', + help="print status messages, or debug with -vv") + parser.add_option('-q', '--quiet', default=0, action='count', + help="report only file names, or nothing with -qq") + parser.add_option('-r', '--repeat', action='store_true', + help="show all occurrences of the same error") + parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE, + help="exclude files or directories which match these " + "comma separated patterns (default: %s)" % + DEFAULT_EXCLUDE) + parser.add_option('--filename', metavar='patterns', default='*.py', + help="when parsing directories, only check filenames " + "matching these comma separated patterns (default: " + "*.py)") + parser.add_option('--select', metavar='errors', default='', + help="select errors and warnings (e.g. E,W6)") + parser.add_option('--ignore', metavar='errors', default='', + help="skip errors and warnings (e.g. E4,W)") + parser.add_option('--show-source', action='store_true', + help="show source code for each error") + parser.add_option('--show-pep8', action='store_true', + help="show text of PEP 8 for each error") + parser.add_option('--statistics', action='store_true', + help="count errors and warnings") + parser.add_option('--count', action='store_true', + help="print total number of errors and warnings " + "to standard error and set exit code to 1 if " + "total is not null") + parser.add_option('--benchmark', action='store_true', + help="measure processing speed") + parser.add_option('--testsuite', metavar='dir', + help="run regression tests from dir") + parser.add_option('--doctest', action='store_true', + help="run doctest on myself") + options, args = parser.parse_args(arglist) + if options.testsuite: + args.append(options.testsuite) + if not args and not options.doctest: + parser.error('input not specified') + options.prog = os.path.basename(sys.argv[0]) + options.exclude = options.exclude.split(',') + for index in range(len(options.exclude)): + options.exclude[index] = options.exclude[index].rstrip('/') + if options.filename: + options.filename = options.filename.split(',') + if options.select: + options.select = options.select.split(',') + else: + options.select = [] + if options.ignore: + options.ignore = options.ignore.split(',') + elif options.select: + # Ignore all checks which are not explicitly selected + options.ignore = [''] + elif options.testsuite or options.doctest: + # For doctest and testsuite, all checks are required + options.ignore = [] + else: + # The default choice: ignore controversial checks + options.ignore = DEFAULT_IGNORE.split(',') + options.physical_checks = find_checks('physical_line') + options.logical_checks = find_checks('logical_line') + options.counters = dict.fromkeys(BENCHMARK_KEYS, 0) + options.messages = {} + return options, args + + +def _main(): + """ + Parse options and run checks on Python source. + """ + options, args = process_options() + if options.doctest: + import doctest + doctest.testmod(verbose=options.verbose) + selftest() + if options.testsuite: + runner = run_tests + else: + runner = input_file + start_time = time.time() + for path in args: + if os.path.isdir(path): + input_dir(path, runner=runner) + elif not excluded(path): + options.counters['files'] += 1 + runner(path) + elapsed = time.time() - start_time + if options.statistics: + print_statistics() + if options.benchmark: + print_benchmark(elapsed) + count = get_count() + if count: + if options.count: + sys.stderr.write(str(count) + '\n') + sys.exit(1) + + +if __name__ == '__main__': + _main() -- cgit