krb5 commit: Add Python scripts to check for C style issues
Greg Hudson
ghudson at MIT.EDU
Thu Oct 4 10:35:43 EDT 2012
https://github.com/krb5/krb5/commit/70a119d4dc7ed7a94cfc32c523352af1d000e1c7
commit 70a119d4dc7ed7a94cfc32c523352af1d000e1c7
Author: Greg Hudson <ghudson at mit.edu>
Date: Mon Sep 24 16:39:39 2012 -0400
Add Python scripts to check for C style issues
util/cstyle-file.py checks a file for C style issues and displays
line-by-line output. It is not terribly sophisticated, and can
probably be improved upon (e.g. by doing an emacs batch-reindent of
the file and checking for differences in indentation).
util/cstyle.py produces diffs using git, runs the file checker on each
modified C source file in each diff, and displays the output lines
attribute to the diff.
src/util/cstyle-file.py | 262 +++++++++++++++++++++++++++++++++++++++++++++++
src/util/cstyle.py | 188 +++++++++++++++++++++++++++++++++
2 files changed, 450 insertions(+), 0 deletions(-)
diff --git a/src/util/cstyle-file.py b/src/util/cstyle-file.py
new file mode 100644
index 0000000..edf8a39
--- /dev/null
+++ b/src/util/cstyle-file.py
@@ -0,0 +1,262 @@
+# Copyright (C) 2012 by the Massachusetts Institute of Technology.
+# All rights reserved.
+#
+# Export of this software from the United States of America may
+# require a specific license from the United States Government.
+# It is the responsibility of any person or organization contemplating
+# export to obtain such a license before exporting.
+#
+# WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
+# distribute this software and its documentation for any purpose and
+# without fee is hereby granted, provided that the above copyright
+# notice appear in all copies and that both that copyright notice and
+# this permission notice appear in supporting documentation, and that
+# the name of M.I.T. not be used in advertising or publicity pertaining
+# to distribution of the software without specific, written prior
+# permission. Furthermore if you modify this software you must label
+# your software as modified software and not distribute it in such a
+# fashion that it might be confused with the original M.I.T. software.
+# M.I.T. makes no representations about the suitability of
+# this software for any purpose. It is provided "as is" without express
+# or implied warranty.
+
+# This program checks for some kinds of MIT krb5 coding style
+# violations in a single file. Checked violations include:
+#
+# Line is too long
+# Tabs violations
+# Trailing whitespace and final blank lines
+# Comment formatting errors
+# Preprocessor statements in function bodies
+# Misplaced braces
+# Space before paren in function call, or no space after if/for/while
+# Parenthesized return expression
+# Space after cast operator, or no space before * in cast operator
+# Line broken before binary operator
+# Lack of spaces around binary operator (sometimes)
+# Assignment at the beginning of an if conditional
+# Use of prohibited string functions
+# Lack of braces around 2+ line flow control body
+#
+# This program does not check for the following:
+#
+# Anything outside of a function body except line length/whitespace
+# Anything non-syntactic (proper cleanup flow control, naming, etc.)
+# Indentation or alignment of continuation lines
+# UTF-8 violations
+# Implicit tests against NULL or '\0'
+# Inner-scope variable declarations
+# Over- or under-parenthesization
+# Long or deeply nested function bodies
+# Syntax of function calls through pointers
+
+import re
+import sys
+
+def warn(ln, msg):
+ print '%5d %s' % (ln, msg)
+
+
+def check_length(line, ln):
+ if len(line) > 79 and not line.startswith(' * Copyright'):
+ warn(ln, 'Length exceeds 79 characters')
+
+
+def check_tabs(line, ln, allow_tabs, seen_tab):
+ if not allow_tabs:
+ if '\t' in line:
+ warn(ln, 'Tab character in file which does not allow tabs')
+ else:
+ if ' \t' in line:
+ warn(ln, 'Tab character immediately following space')
+ if ' ' in line and seen_tab:
+ warn(ln, '8+ spaces in file which uses tabs')
+
+
+def check_trailing_whitespace(line, ln):
+ if line and line[-1] in ' \t':
+ warn(ln, 'Trailing whitespace')
+
+
+def check_comment(lines, ln):
+ align = lines[0].index('/*') + 1
+ if not lines[0].lstrip().startswith('/*'):
+ warn(ln, 'Multi-line comment begins after code')
+ for line in lines[1:]:
+ ln += 1
+ if len(line) <= align or line[align] != '*':
+ warn(ln, 'Comment line does not have * aligned with top')
+ elif line[:align].lstrip() != '':
+ warn(ln, 'Garbage before * in comment line')
+ if not lines[-1].rstrip().endswith('*/'):
+ warn(ln, 'Code after end of multi-line comment')
+ if len(lines) > 2 and (lines[0].strip() not in ('/*', '/**') or
+ lines[-1].strip() != '*/'):
+ warn(ln, 'Comment is 3+ lines but is not formatted as block comment')
+
+
+def check_preprocessor(line, ln):
+ if line.startswith('#'):
+ warn(ln, 'Preprocessor statement in function body')
+
+
+def check_braces(line, ln):
+ # Strip out one-line initializer expressions.
+ line = re.sub(r'=\s*{.*}', '', line)
+ if line.lstrip().startswith('{') and not line.startswith('{'):
+ warn(ln, 'Un-cuddled open brace')
+ if re.search(r'{\s*\S', line):
+ warn(ln, 'Code on line after open brace')
+ if re.search(r'\S.*}', line):
+ warn(ln, 'Code on line before close brace')
+
+
+# This test gives false positives on function pointer type
+# declarations or casts. Avoid this by using typedefs.
+def check_space_before_paren(line, ln):
+ for m in re.finditer(r'([\w]+)(\s*)\(', line):
+ ident, ws = m.groups()
+ if ident in ('if', 'for', 'while', 'switch'):
+ if not ws:
+ warn(ln, 'No space after flow control keyword')
+ elif ident != 'return':
+ if ws:
+ warn(ln, 'Space before paren in function call')
+
+
+def check_parenthesized_return(line, ln):
+ if re.search(r'return\s*\(.*\);', line):
+ warn(ln, 'Parenthesized return expression')
+
+
+def check_cast(line, ln):
+ # We can't reliably distinguish cast operators from parenthesized
+ # expressions or function call parameters without a real C parser,
+ # so we use some heuristics. A cast operator is followed by an
+ # expression, which usually begins with an identifier or an open
+ # paren. A function call or parenthesized expression is never
+ # followed by an identifier and only rarely by an open paren. We
+ # won't detect a cast operator when it's followed by an expression
+ # beginning with '*', since it's hard to distinguish that from a
+ # multiplication operator. We will get false positives from
+ # "(*fp) (args)" and "if (condition) statement", but both of those
+ # are erroneous anyway.
+ for m in re.finditer(r'\(([^(]+)\)(\s+)[a-zA-Z_]', line):
+ if m.group(2):
+ warn(ln, 'Space after cast operator (or inline if/while body)')
+ # Check for casts like (char*) which should have a space.
+ if re.search(r'[^\s\*]\*+$', m.group(1)):
+ warn(ln, 'No space before * in cast operator')
+
+
+def check_binary_operator(line, ln):
+ binop = r'(\+|-|\*|/|%|\^|==|=|!=|<=|<|>=|>|&&|&|\|\||\|)'
+ if re.match(r'\s*' + binop + r'\s', line):
+ warn(ln - 1, 'Line broken before binary operator')
+ if re.search(r'\w' + binop + r'\w', line):
+ warn(ln, 'No space before or after binary operator')
+
+
+def check_assignment_in_conditional(line, ln):
+ # Check specifically for if statements; we allow assignments in
+ # loop expressions.
+ if re.search(r'if\s*\(+\w+\s*=[^=]', line):
+ warn(ln, 'Assignment in if conditional')
+
+
+def check_unbraced_do(line, ln):
+ if re.match(r'\s*do$', line):
+ warn(ln, 'do statement without braces')
+
+
+def check_bad_string_fn(line, ln):
+ # This is intentionally pretty fuzzy so that we catch the whole scanf
+ if re.search(r'\W(strcpy|strcat|sprintf|\w*scanf)\W', line):
+ warn(ln, 'Prohibited string function')
+
+
+def check_file(lines):
+ # Check if this file allows tabs.
+ if len(lines) == 0:
+ return
+ allow_tabs = 'indent-tabs-mode: nil' not in lines[0]
+ seen_tab = False
+
+ in_function = False
+ unbraced_flow_body_count = -1
+ comment = []
+ ln = 0
+ for line in lines:
+ ln += 1
+ line = line.rstrip('\r\n')
+ indent = len(re.match('\s*', line).group(0).expandtabs())
+ seen_tab = seen_tab or ('\t' in line)
+
+ # Check line structure issues before altering the line.
+ check_length(line, ln)
+ check_tabs(line, ln, allow_tabs, seen_tab)
+ check_trailing_whitespace(line, ln)
+
+ # Strip out single-line comments the contents of string literals.
+ if not comment:
+ line = re.sub(r'/\*.*?\*/', '', line)
+ line = re.sub(r'"(\\.|[^"])*"', '""', line)
+
+ # Parse out and check multi-line comments. (Ignore code on
+ # the first or last line; check_comment will warn about it.)
+ if comment or '/*' in line:
+ comment.append(line)
+ if '*/' in line:
+ check_comment(comment, ln - len(comment) + 1)
+ comment = []
+ continue
+
+ # Warn if we see a // comment and ignore anything following.
+ if '//' in line:
+ warn(ln, '// comment')
+ line = re.sub(r'//.*/', '', line)
+
+ if line.startswith('{'):
+ in_function = True
+ elif line.startswith('}'):
+ in_function = False
+
+ if in_function:
+ check_preprocessor(line, ln)
+ check_braces(line, ln)
+ check_space_before_paren(line, ln)
+ check_parenthesized_return(line, ln)
+ check_cast(line, ln)
+ check_binary_operator(line, ln)
+ check_assignment_in_conditional(line, ln)
+ check_unbraced_do(line, ln)
+ check_bad_string_fn(line, ln)
+
+ # Check for unbraced flow statement bodies.
+ if unbraced_flow_body_count != -1:
+ if indent > flow_statement_indent:
+ unbraced_flow_body_count += 1
+ else:
+ if unbraced_flow_body_count > 1:
+ warn(ln - 1, 'Body is 2+ lines but has no braces')
+ unbraced_flow_body_count = -1
+ if (re.match('(\s*)(if|else if|for|while)\s*\(.*\)$', line) or
+ re.match('(\s*)else$', line)):
+ unbraced_flow_body_count = 0
+ flow_statement_indent = indent
+
+ if lines[-1] == '':
+ warn(ln, 'Blank line at end of file')
+
+
+if len(sys.argv) == 1:
+ lines = sys.stdin.readlines()
+elif len(sys.argv) == 2:
+ f = open(sys.argv[1])
+ lines = f.readlines()
+ f.close()
+else:
+ sys.stderr.write('Usage: cstyle-file [filename]\n')
+ sys.exit(1)
+
+check_file(lines)
diff --git a/src/util/cstyle.py b/src/util/cstyle.py
new file mode 100644
index 0000000..7c45335
--- /dev/null
+++ b/src/util/cstyle.py
@@ -0,0 +1,188 @@
+# Copyright (C) 2012 by the Massachusetts Institute of Technology.
+# All rights reserved.
+#
+# Export of this software from the United States of America may
+# require a specific license from the United States Government.
+# It is the responsibility of any person or organization contemplating
+# export to obtain such a license before exporting.
+#
+# WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
+# distribute this software and its documentation for any purpose and
+# without fee is hereby granted, provided that the above copyright
+# notice appear in all copies and that both that copyright notice and
+# this permission notice appear in supporting documentation, and that
+# the name of M.I.T. not be used in advertising or publicity pertaining
+# to distribution of the software without specific, written prior
+# permission. Furthermore if you modify this software you must label
+# your software as modified software and not distribute it in such a
+# fashion that it might be confused with the original M.I.T. software.
+# M.I.T. makes no representations about the suitability of
+# this software for any purpose. It is provided "as is" without express
+# or implied warranty.
+
+# This program attempts to detect MIT krb5 coding style violations
+# attributable to the changes a series of git commits. It can be run
+# from anywhere within a git working tree.
+
+import getopt
+import os
+import re
+import sys
+from subprocess import Popen, PIPE, call
+
+def usage():
+ u = ['Usage: cstyle [-w] [rev|rev1..rev2]',
+ '',
+ 'By default, checks working tree against HEAD, or checks changes in',
+ 'HEAD if the working tree is clean. With a revision option, checks',
+ 'changes in rev or the series rev1..rev2. With the -w option,',
+ 'checks working tree against rev (defaults to HEAD).']
+ sys.stderr.write('\n'.join(u) + '\n')
+ sys.exit(1)
+
+
+# Run a command and return a list of its output lines.
+def run(args):
+ # subprocess.check_output would be ideal here, but requires Python 2.7.
+ p = Popen(args, stdout=PIPE, stderr=PIPE)
+ out, err = p.communicate()
+ if p.returncode != 0:
+ sys.stderr.write('Failed command: ' + ' '.join(args) + '\n')
+ if err != '':
+ sys.stderr.write('stderr:\n' + err)
+ sys.stderr.write('Unexpected command failure, exiting\n')
+ sys.exit(1)
+ return out.splitlines()
+
+
+# Find the top level of the git working tree, or None if we're not in
+# one.
+def find_toplevel():
+ # git doesn't seem to have a way to do this, so we search by hand.
+ dir = os.getcwd()
+ while True:
+ if os.path.exists(os.path.join(dir, '.git')):
+ break
+ parent = os.path.dirname(dir)
+ if (parent == dir):
+ return None
+ dir = parent
+ return dir
+
+
+# Check for style issues in a file within rev (or within the current
+# checkout if rev is None). Report only problems on line numbers in
+# new_lines.
+line_re = re.compile(r'^\s*(\d+) (.*)$')
+def check_file(filename, rev, new_lines):
+ # Process only C source files under src.
+ root, ext = os.path.splitext(filename)
+ if not filename.startswith('src/') or ext not in ('.c', '.h', '.hin'):
+ return
+ dispname = filename[4:]
+
+ if rev is None:
+ p1 = Popen(['cat', filename], stdout=PIPE)
+ else:
+ p1 = Popen(['git', 'show', rev + ':' + filename], stdout=PIPE)
+ p2 = Popen(['python', 'src/util/cstyle-file.py'], stdin=p1.stdout,
+ stdout=PIPE)
+ p1.stdout.close()
+ out, err = p2.communicate()
+ if p2.returncode != 0:
+ sys.exit(1)
+
+ first = True
+ for line in out.splitlines():
+ m = line_re.match(line)
+ if int(m.group(1)) in new_lines:
+ if first:
+ print ' ' + dispname + ':'
+ first = False
+ print ' ' + line
+
+
+# Determine the lines of each file modified by diff (a sequence of
+# strings) and check for style violations in those lines. rev
+# indicates the version in which the new contents of each file can be
+# found, or is None if the current contents are in the working copy.
+chunk_header_re = re.compile(r'^@@ -\d+(,(\d+))? \+(\d+)(,(\d+))? @@')
+def check_diff(diff, rev):
+ old_count, new_count, lineno = 0, 0, 0
+ filename = None
+ for line in diff:
+ if not line or line.startswith('\\ No newline'):
+ continue
+ if old_count > 0 or new_count > 0:
+ # We're in a chunk.
+ if line[0] == '+':
+ new_lines.append(lineno)
+ if line[0] in ('+', ' '):
+ new_count = new_count - 1
+ lineno = lineno + 1
+ if line[0] in ('-', ' '):
+ old_count = old_count - 1
+ elif line.startswith('+++ b/'):
+ # We're starting a new file. Check the last one.
+ if filename:
+ check_file(filename, rev, new_lines)
+ filename = line[6:]
+ new_lines = []
+ else:
+ m = chunk_header_re.match(line)
+ if m:
+ old_count = int(m.group(2) or '1')
+ lineno = int(m.group(3))
+ new_count = int(m.group(5) or '1')
+
+ # Check the last file in the diff.
+ if filename:
+ check_file(filename, rev, new_lines)
+
+
+# Check a sequence of revisions for style issues.
+def check_series(revlist):
+ for rev in revlist:
+ sys.stdout.flush()
+ call(['git', 'show', '-s', '--oneline', rev])
+ diff = run(['git', 'diff-tree', '--no-commit-id', '--root', '-M',
+ '--cc', rev])
+ check_diff(diff, rev)
+
+
+# Parse arguments.
+try:
+ opts, args = getopt.getopt(sys.argv[1:], 'w')
+except getopt.GetoptError, err:
+ print str(err)
+ usage()
+if len(args) > 1:
+ usage()
+
+# Change to the top level of the working tree so we easily run the file
+# checker and refer to working tree files.
+toplevel = find_toplevel()
+if toplevel is None:
+ sys.stderr.write('%s must be run within a git working tree')
+os.chdir(toplevel)
+
+if ('-w', '') in opts:
+ # Check the working tree against a base revision.
+ arg = 'HEAD'
+ if args:
+ arg = args[0]
+ check_diff(run(['git', 'diff', arg]), None)
+elif args:
+ # Check the differences in a rev or a series of revs.
+ if '..' in args[0]:
+ check_series(run(['git', 'rev-list', '--reverse', args[0]]))
+ else:
+ check_series([args[0]])
+else:
+ # No options or arguments. Check the differences against HEAD, or
+ # the differences in HEAD if the working tree is clean.
+ diff = run(['git', 'diff', 'HEAD'])
+ if diff:
+ check_diff(diff, None)
+ else:
+ check_series(['HEAD'])
More information about the cvs-krb5
mailing list