Blame - tools/cpplint.py - aom

blob: 25fbef73d8edac6526704db76dacd5b7f70b65df [file] [log] [blame]

John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1	#!/usr/bin/python
				2	#
				3	# Copyright (c) 2009 Google Inc. All rights reserved.
				4	#
				5	# Redistribution and use in source and binary forms, with or without
				6	# modification, are permitted provided that the following conditions are
				7	# met:
				8	#
				9	# * Redistributions of source code must retain the above copyright
				10	# notice, this list of conditions and the following disclaimer.
				11	# * Redistributions in binary form must reproduce the above
				12	# copyright notice, this list of conditions and the following disclaimer
				13	# in the documentation and/or other materials provided with the
				14	# distribution.
				15	# * Neither the name of Google Inc. nor the names of its
				16	# contributors may be used to endorse or promote products derived from
				17	# this software without specific prior written permission.
				18	#
				19	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				30
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	31	"""Does google-lint on c++ files.
				32
				33	The goal of this script is to identify places in the code that may
				34	be in non-compliance with google style. It does not attempt to fix
				35	up these problems -- the point is to educate. It does also not
				36	attempt to find all problems, or to ensure that everything it does
				37	find is legitimately a problem.
				38
				39	In particular, we can get very confused by /* and // inside strings!
				40	We do a small hack, which is to ignore //'s with "'s after them on the
				41	same line, but it is far from perfect (in either direction).
				42	"""
				43
				44	import codecs
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	45	import copy
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	46	import getopt
				47	import math # for log
				48	import os
				49	import re
				50	import sre_compile
				51	import string
				52	import sys
				53	import unicodedata
				54
				55
				56	_USAGE = """
				57	Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	58	[--counting=total\|toplevel\|detailed] [--root=subdir]
				59	[--linelength=digits]
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	60	<file> [file] ...
				61
				62	The style guidelines this tries to follow are those in
				63	http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
				64
				65	Every problem is given a confidence score from 1-5, with 5 meaning we are
				66	certain of the problem, and 1 meaning it could be a legitimate construct.
				67	This will miss some errors, and is not a substitute for a code review.
				68
				69	To suppress false-positive errors of a certain category, add a
				70	'NOLINT(category)' comment to the line. NOLINT or NOLINT(*)
				71	suppresses errors of all categories on that line.
				72
				73	The files passed in will be linted; at least one file must be provided.
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	74	Default linted extensions are .cc, .cpp, .cu, .cuh and .h. Change the
				75	extensions with the --extensions flag.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	76
				77	Flags:
				78
				79	output=vs7
				80	By default, the output is formatted to ease emacs parsing. Visual Studio
				81	compatible output (vs7) may also be used. Other formats are unsupported.
				82
				83	verbose=#
				84	Specify a number 0-5 to restrict errors to certain verbosity levels.
				85
				86	filter=-x,+y,...
				87	Specify a comma-separated list of category-filters to apply: only
				88	error messages whose category names pass the filters will be printed.
				89	(Category names are printed with the message and look like
				90	"[whitespace/indent]".) Filters are evaluated left to right.
				91	"-FOO" and "FOO" means "do not print categories that start with FOO".
				92	"+FOO" means "do print categories that start with FOO".
				93
				94	Examples: --filter=-whitespace,+whitespace/braces
				95	--filter=whitespace,runtime/printf,+runtime/printf_format
				96	--filter=-,+build/include_what_you_use
				97
				98	To see a list of all the categories used in cpplint, pass no arg:
				99	--filter=
				100
				101	counting=total\|toplevel\|detailed
				102	The total number of errors found is always printed. If
				103	'toplevel' is provided, then the count of errors in each of
				104	the top-level categories like 'build' and 'whitespace' will
				105	also be printed. If 'detailed' is provided, then a count
				106	is provided for each category like 'build/class'.
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	107
				108	root=subdir
				109	The root directory used for deriving header guard CPP variable.
				110	By default, the header guard CPP variable is calculated as the relative
				111	path to the directory that contains .git, .hg, or .svn. When this flag
				112	is specified, the relative path is calculated from the specified
				113	directory. If the specified directory does not exist, this flag is
				114	ignored.
				115
				116	Examples:
				117	Assuing that src/.git exists, the header guard CPP variables for
				118	src/chrome/browser/ui/browser.h are:
				119
				120	No flag => CHROME_BROWSER_UI_BROWSER_H_
				121	--root=chrome => BROWSER_UI_BROWSER_H_
				122	--root=chrome/browser => UI_BROWSER_H_
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	123
				124	linelength=digits
				125	This is the allowed line length for the project. The default value is
				126	80 characters.
				127
				128	Examples:
				129	--linelength=120
				130
				131	extensions=extension,extension,...
				132	The allowed file extensions that cpplint will check
				133
				134	Examples:
				135	--extensions=hpp,cpp
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	136	"""
				137
				138	# We categorize each error message we print. Here are the categories.
				139	# We want an explicit list so we can list them all in cpplint --filter=.
				140	# If you add a new error message with a new category, add it to the list
				141	# here! cpplint_unittest.py should tell you if you forget to do this.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	142	_ERROR_CATEGORIES = [
				143	'build/class',
				144	'build/deprecated',
				145	'build/endif_comment',
				146	'build/explicit_make_pair',
				147	'build/forward_decl',
				148	'build/header_guard',
				149	'build/include',
				150	'build/include_alpha',
				151	'build/include_order',
				152	'build/include_what_you_use',
				153	'build/namespaces',
				154	'build/printf_format',
				155	'build/storage_class',
				156	'legal/copyright',
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	157	'readability/alt_tokens',
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	158	'readability/braces',
				159	'readability/casting',
				160	'readability/check',
				161	'readability/constructors',
				162	'readability/fn_size',
				163	'readability/function',
				164	'readability/multiline_comment',
				165	'readability/multiline_string',
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	166	'readability/namespace',
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	167	'readability/nolint',
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	168	'readability/nul',
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	169	'readability/streams',
				170	'readability/todo',
				171	'readability/utf8',
				172	'runtime/arrays',
				173	'runtime/casting',
				174	'runtime/explicit',
				175	'runtime/int',
				176	'runtime/init',
				177	'runtime/invalid_increment',
				178	'runtime/member_string_references',
				179	'runtime/memset',
				180	'runtime/operator',
				181	'runtime/printf',
				182	'runtime/printf_format',
				183	'runtime/references',
James Zern	3fcaf97	2014-01-21 17:56:04 -0800	[diff] [blame]	184	'runtime/sizeof',
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	185	'runtime/string',
				186	'runtime/threadsafe_fn',
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	187	'runtime/vlog',
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	188	'whitespace/blank_line',
				189	'whitespace/braces',
				190	'whitespace/comma',
				191	'whitespace/comments',
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	192	'whitespace/empty_conditional_body',
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	193	'whitespace/empty_loop_body',
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	194	'whitespace/end_of_line',
				195	'whitespace/ending_newline',
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	196	'whitespace/forcolon',
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	197	'whitespace/indent',
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	198	'whitespace/line_length',
				199	'whitespace/newline',
				200	'whitespace/operators',
				201	'whitespace/parens',
				202	'whitespace/semicolon',
				203	'whitespace/tab',
				204	'whitespace/todo'
				205	]
				206
				207	# The default state of the category filter. This is overrided by the --filter=
				208	# flag. By default all errors are on, so only add here categories that should be
				209	# off by default (i.e., categories that must be enabled by the --filter= flags).
				210	# All entries here should start with a '-' or '+', as in the --filter= flag.
				211	_DEFAULT_FILTERS = ['-build/include_alpha']
				212
				213	# We used to check for high-bit characters, but after much discussion we
				214	# decided those were OK, as long as they were in UTF-8 and didn't represent
				215	# hard-coded international strings, which belong in a separate i18n file.
				216
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	217
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	218	# C++ headers
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	219	_CPP_HEADERS = frozenset([
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	220	# Legacy
				221	'algobase.h',
				222	'algo.h',
				223	'alloc.h',
				224	'builtinbuf.h',
				225	'bvector.h',
				226	'complex.h',
				227	'defalloc.h',
				228	'deque.h',
				229	'editbuf.h',
				230	'fstream.h',
				231	'function.h',
				232	'hash_map',
				233	'hash_map.h',
				234	'hash_set',
				235	'hash_set.h',
				236	'hashtable.h',
				237	'heap.h',
				238	'indstream.h',
				239	'iomanip.h',
				240	'iostream.h',
				241	'istream.h',
				242	'iterator.h',
				243	'list.h',
				244	'map.h',
				245	'multimap.h',
				246	'multiset.h',
				247	'ostream.h',
				248	'pair.h',
				249	'parsestream.h',
				250	'pfstream.h',
				251	'procbuf.h',
				252	'pthread_alloc',
				253	'pthread_alloc.h',
				254	'rope',
				255	'rope.h',
				256	'ropeimpl.h',
				257	'set.h',
				258	'slist',
				259	'slist.h',
				260	'stack.h',
				261	'stdiostream.h',
				262	'stl_alloc.h',
				263	'stl_relops.h',
				264	'streambuf.h',
				265	'stream.h',
				266	'strfile.h',
				267	'strstream.h',
				268	'tempbuf.h',
				269	'tree.h',
				270	'type_traits.h',
				271	'vector.h',
				272	# 17.6.1.2 C++ library headers
				273	'algorithm',
				274	'array',
				275	'atomic',
				276	'bitset',
				277	'chrono',
				278	'codecvt',
				279	'complex',
				280	'condition_variable',
				281	'deque',
				282	'exception',
				283	'forward_list',
				284	'fstream',
				285	'functional',
				286	'future',
				287	'initializer_list',
				288	'iomanip',
				289	'ios',
				290	'iosfwd',
				291	'iostream',
				292	'istream',
				293	'iterator',
				294	'limits',
				295	'list',
				296	'locale',
				297	'map',
				298	'memory',
				299	'mutex',
				300	'new',
				301	'numeric',
				302	'ostream',
				303	'queue',
				304	'random',
				305	'ratio',
				306	'regex',
				307	'set',
				308	'sstream',
				309	'stack',
				310	'stdexcept',
				311	'streambuf',
				312	'string',
				313	'strstream',
				314	'system_error',
				315	'thread',
				316	'tuple',
				317	'typeindex',
				318	'typeinfo',
				319	'type_traits',
				320	'unordered_map',
				321	'unordered_set',
				322	'utility',
				323	'valarray',
				324	'vector',
				325	# 17.6.1.2 C++ headers for C library facilities
				326	'cassert',
				327	'ccomplex',
				328	'cctype',
				329	'cerrno',
				330	'cfenv',
				331	'cfloat',
				332	'cinttypes',
				333	'ciso646',
				334	'climits',
				335	'clocale',
				336	'cmath',
				337	'csetjmp',
				338	'csignal',
				339	'cstdalign',
				340	'cstdarg',
				341	'cstdbool',
				342	'cstddef',
				343	'cstdint',
				344	'cstdio',
				345	'cstdlib',
				346	'cstring',
				347	'ctgmath',
				348	'ctime',
				349	'cuchar',
				350	'cwchar',
				351	'cwctype',
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	352	])
				353
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	354	# Assertion macros. These are defined in base/logging.h and
				355	# testing/base/gunit.h. Note that the _M versions need to come first
				356	# for substring matching to work.
				357	_CHECK_MACROS = [
				358	'DCHECK', 'CHECK',
				359	'EXPECT_TRUE_M', 'EXPECT_TRUE',
				360	'ASSERT_TRUE_M', 'ASSERT_TRUE',
				361	'EXPECT_FALSE_M', 'EXPECT_FALSE',
				362	'ASSERT_FALSE_M', 'ASSERT_FALSE',
				363	]
				364
				365	# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
				366	_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
				367
				368	for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
				369	('>=', 'GE'), ('>', 'GT'),
				370	('<=', 'LE'), ('<', 'LT')]:
				371	_CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
				372	_CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
				373	_CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
				374	_CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
				375	_CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
				376	_CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
				377
				378	for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
				379	('>=', 'LT'), ('>', 'LE'),
				380	('<=', 'GT'), ('<', 'GE')]:
				381	_CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
				382	_CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
				383	_CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
				384	_CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
				385
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	386	# Alternative tokens and their replacements. For full list, see section 2.5
				387	# Alternative tokens [lex.digraph] in the C++ standard.
				388	#
				389	# Digraphs (such as '%:') are not included here since it's a mess to
				390	# match those on a word boundary.
				391	_ALT_TOKEN_REPLACEMENT = {
				392	'and': '&&',
				393	'bitor': '\|',
				394	'or': '\|\|',
				395	'xor': '^',
				396	'compl': '~',
				397	'bitand': '&',
				398	'and_eq': '&=',
				399	'or_eq': '\|=',
				400	'xor_eq': '^=',
				401	'not': '!',
				402	'not_eq': '!='
				403	}
				404
				405	# Compile regular expression that matches all the above keywords. The "[ =()]"
				406	# bit is meant to avoid matching these keywords outside of boolean expressions.
				407	#
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	408	# False positives include C-style multi-line comments and multi-line strings
				409	# but those have always been troublesome for cpplint.
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	410	_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
				411	r'[ =()](' + ('\|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]\|$)')
				412
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	413
				414	# These constants define types of headers for use with
				415	# _IncludeState.CheckNextIncludeOrder().
				416	_C_SYS_HEADER = 1
				417	_CPP_SYS_HEADER = 2
				418	_LIKELY_MY_HEADER = 3
				419	_POSSIBLE_MY_HEADER = 4
				420	_OTHER_HEADER = 5
				421
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	422	# These constants define the current inline assembly state
				423	_NO_ASM = 0 # Outside of inline assembly block
				424	_INSIDE_ASM = 1 # Inside inline assembly block
				425	_END_ASM = 2 # Last line of inline assembly block
				426	_BLOCK_ASM = 3 # The whole block is an inline assembly block
				427
				428	# Match start of assembly blocks
				429	_MATCH_ASM = re.compile(r'^\s*(?:asm\|_asm\|__asm\|__asm__)'
				430	r'(?:\s+(volatile\|__volatile__))?'
				431	r'\s*[{(]')
				432
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	433
				434	_regexp_compile_cache = {}
				435
				436	# Finds occurrences of NOLINT or NOLINT(...).
				437	_RE_SUPPRESSION = re.compile(r'\bNOLINT\b($[^)]*$)?')
				438
				439	# {str, set(int)}: a map from error categories to sets of linenumbers
				440	# on which those errors are expected and should be suppressed.
				441	_error_suppressions = {}
				442
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	443	# The root directory used for deriving header guard CPP variable.
				444	# This is set by --root flag.
				445	_root = None
				446
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	447	# The allowed line length of files.
				448	# This is set by --linelength flag.
				449	_line_length = 80
				450
				451	# The allowed extensions for file names
				452	# This is set by --extensions flag.
				453	_valid_extensions = set(['cc', 'h', 'cpp', 'cu', 'cuh'])
				454
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	455	def ParseNolintSuppressions(filename, raw_line, linenum, error):
				456	"""Updates the global list of error-suppressions.
				457
				458	Parses any NOLINT comments on the current line, updating the global
				459	error_suppressions store. Reports an error if the NOLINT comment
				460	was malformed.
				461
				462	Args:
				463	filename: str, the name of the input file.
				464	raw_line: str, the line of input text, with comments.
				465	linenum: int, the number of the current line.
				466	error: function, an error handler.
				467	"""
				468	# FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
				469	matched = _RE_SUPPRESSION.search(raw_line)
				470	if matched:
				471	category = matched.group(1)
				472	if category in (None, '(*)'): # => "suppress all"
				473	_error_suppressions.setdefault(None, set()).add(linenum)
				474	else:
				475	if category.startswith('(') and category.endswith(')'):
				476	category = category[1:-1]
				477	if category in _ERROR_CATEGORIES:
				478	_error_suppressions.setdefault(category, set()).add(linenum)
				479	else:
				480	error(filename, linenum, 'readability/nolint', 5,
				481	'Unknown NOLINT error category: %s' % category)
				482
				483
				484	def ResetNolintSuppressions():
				485	"Resets the set of NOLINT suppressions to empty."
				486	_error_suppressions.clear()
				487
				488
				489	def IsErrorSuppressedByNolint(category, linenum):
				490	"""Returns true if the specified error category is suppressed on this line.
				491
				492	Consults the global error_suppressions map populated by
				493	ParseNolintSuppressions/ResetNolintSuppressions.
				494
				495	Args:
				496	category: str, the category of the error.
				497	linenum: int, the current line number.
				498	Returns:
				499	bool, True iff the error should be suppressed due to a NOLINT comment.
				500	"""
				501	return (linenum in _error_suppressions.get(category, set()) or
				502	linenum in _error_suppressions.get(None, set()))
				503
				504	def Match(pattern, s):
				505	"""Matches the string with the pattern, caching the compiled regexp."""
				506	# The regexp compilation caching is inlined in both Match and Search for
				507	# performance reasons; factoring it out into a separate function turns out
				508	# to be noticeably expensive.
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	509	if pattern not in _regexp_compile_cache:
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	510	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				511	return _regexp_compile_cache[pattern].match(s)
				512
				513
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	514	def ReplaceAll(pattern, rep, s):
				515	"""Replaces instances of pattern in a string with a replacement.
				516
				517	The compiled regex is kept in a cache shared by Match and Search.
				518
				519	Args:
				520	pattern: regex pattern
				521	rep: replacement text
				522	s: search string
				523
				524	Returns:
				525	string with replacements made (or original string if no replacements)
				526	"""
				527	if pattern not in _regexp_compile_cache:
				528	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				529	return _regexp_compile_cache[pattern].sub(rep, s)
				530
				531
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	532	def Search(pattern, s):
				533	"""Searches the string for the pattern, caching the compiled regexp."""
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	534	if pattern not in _regexp_compile_cache:
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	535	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				536	return _regexp_compile_cache[pattern].search(s)
				537
				538
				539	class _IncludeState(dict):
				540	"""Tracks line numbers for includes, and the order in which includes appear.
				541
				542	As a dict, an _IncludeState object serves as a mapping between include
				543	filename and line number on which that file was included.
				544
				545	Call CheckNextIncludeOrder() once for each header in the file, passing
				546	in the type constants defined above. Calls in an illegal order will
				547	raise an _IncludeError with an appropriate error message.
				548
				549	"""
				550	# self._section will move monotonically through this set. If it ever
				551	# needs to move backwards, CheckNextIncludeOrder will raise an error.
				552	_INITIAL_SECTION = 0
				553	_MY_H_SECTION = 1
				554	_C_SECTION = 2
				555	_CPP_SECTION = 3
				556	_OTHER_H_SECTION = 4
				557
				558	_TYPE_NAMES = {
				559	_C_SYS_HEADER: 'C system header',
				560	_CPP_SYS_HEADER: 'C++ system header',
				561	_LIKELY_MY_HEADER: 'header this file implements',
				562	_POSSIBLE_MY_HEADER: 'header this file may implement',
				563	_OTHER_HEADER: 'other header',
				564	}
				565	_SECTION_NAMES = {
				566	_INITIAL_SECTION: "... nothing. (This can't be an error.)",
				567	_MY_H_SECTION: 'a header this file implements',
				568	_C_SECTION: 'C system header',
				569	_CPP_SECTION: 'C++ system header',
				570	_OTHER_H_SECTION: 'other header',
				571	}
				572
				573	def __init__(self):
				574	dict.__init__(self)
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	575	self.ResetSection()
				576
				577	def ResetSection(self):
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	578	# The name of the current section.
				579	self._section = self._INITIAL_SECTION
				580	# The path of last found header.
				581	self._last_header = ''
				582
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	583	def SetLastHeader(self, header_path):
				584	self._last_header = header_path
				585
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	586	def CanonicalizeAlphabeticalOrder(self, header_path):
				587	"""Returns a path canonicalized for alphabetical comparison.
				588
				589	- replaces "-" with "_" so they both cmp the same.
				590	- removes '-inl' since we don't require them to be after the main header.
				591	- lowercase everything, just in case.
				592
				593	Args:
				594	header_path: Path to be canonicalized.
				595
				596	Returns:
				597	Canonicalized path.
				598	"""
				599	return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
				600
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	601	def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	602	"""Check if a header is in alphabetical order with the previous header.
				603
				604	Args:
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	605	clean_lines: A CleansedLines instance containing the file.
				606	linenum: The number of the line to check.
				607	header_path: Canonicalized header to be checked.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	608
				609	Returns:
				610	Returns true if the header is in alphabetical order.
				611	"""
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	612	# If previous section is different from current section, _last_header will
				613	# be reset to empty string, so it's always less than current header.
				614	#
				615	# If previous line was a blank line, assume that the headers are
				616	# intentionally sorted the way they are.
				617	if (self._last_header > header_path and
				618	not Match(r'^\s*$', clean_lines.elided[linenum - 1])):
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	619	return False
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	620	return True
				621
				622	def CheckNextIncludeOrder(self, header_type):
				623	"""Returns a non-empty error message if the next header is out of order.
				624
				625	This function also updates the internal state to be ready to check
				626	the next include.
				627
				628	Args:
				629	header_type: One of the _XXX_HEADER constants defined above.
				630
				631	Returns:
				632	The empty string if the header is in the right order, or an
				633	error message describing what's wrong.
				634
				635	"""
				636	error_message = ('Found %s after %s' %
				637	(self._TYPE_NAMES[header_type],
				638	self._SECTION_NAMES[self._section]))
				639
				640	last_section = self._section
				641
				642	if header_type == _C_SYS_HEADER:
				643	if self._section <= self._C_SECTION:
				644	self._section = self._C_SECTION
				645	else:
				646	self._last_header = ''
				647	return error_message
				648	elif header_type == _CPP_SYS_HEADER:
				649	if self._section <= self._CPP_SECTION:
				650	self._section = self._CPP_SECTION
				651	else:
				652	self._last_header = ''
				653	return error_message
				654	elif header_type == _LIKELY_MY_HEADER:
				655	if self._section <= self._MY_H_SECTION:
				656	self._section = self._MY_H_SECTION
				657	else:
				658	self._section = self._OTHER_H_SECTION
				659	elif header_type == _POSSIBLE_MY_HEADER:
				660	if self._section <= self._MY_H_SECTION:
				661	self._section = self._MY_H_SECTION
				662	else:
				663	# This will always be the fallback because we're not sure
				664	# enough that the header is associated with this file.
				665	self._section = self._OTHER_H_SECTION
				666	else:
				667	assert header_type == _OTHER_HEADER
				668	self._section = self._OTHER_H_SECTION
				669
				670	if last_section != self._section:
				671	self._last_header = ''
				672
				673	return ''
				674
				675
				676	class _CppLintState(object):
				677	"""Maintains module-wide state.."""
				678
				679	def __init__(self):
				680	self.verbose_level = 1 # global setting.
				681	self.error_count = 0 # global count of reported errors
				682	# filters to apply when emitting error messages
				683	self.filters = _DEFAULT_FILTERS[:]
				684	self.counting = 'total' # In what way are we counting errors?
				685	self.errors_by_category = {} # string to int dict storing error counts
				686
				687	# output format:
				688	# "emacs" - format that emacs can parse (default)
				689	# "vs7" - format that Microsoft Visual Studio 7 can parse
				690	self.output_format = 'emacs'
				691
				692	def SetOutputFormat(self, output_format):
				693	"""Sets the output format for errors."""
				694	self.output_format = output_format
				695
				696	def SetVerboseLevel(self, level):
				697	"""Sets the module's verbosity, and returns the previous setting."""
				698	last_verbose_level = self.verbose_level
				699	self.verbose_level = level
				700	return last_verbose_level
				701
				702	def SetCountingStyle(self, counting_style):
				703	"""Sets the module's counting options."""
				704	self.counting = counting_style
				705
				706	def SetFilters(self, filters):
				707	"""Sets the error-message filters.
				708
				709	These filters are applied when deciding whether to emit a given
				710	error message.
				711
				712	Args:
				713	filters: A string of comma-separated filters (eg "+whitespace/indent").
				714	Each filter should start with + or -; else we die.
				715
				716	Raises:
				717	ValueError: The comma-separated filters did not all start with '+' or '-'.
				718	E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
				719	"""
				720	# Default filters always have less priority than the flag ones.
				721	self.filters = _DEFAULT_FILTERS[:]
				722	for filt in filters.split(','):
				723	clean_filt = filt.strip()
				724	if clean_filt:
				725	self.filters.append(clean_filt)
				726	for filt in self.filters:
				727	if not (filt.startswith('+') or filt.startswith('-')):
				728	raise ValueError('Every filter in --filters must start with + or -'
				729	' (%s does not)' % filt)
				730
				731	def ResetErrorCounts(self):
				732	"""Sets the module's error statistic back to zero."""
				733	self.error_count = 0
				734	self.errors_by_category = {}
				735
				736	def IncrementErrorCount(self, category):
				737	"""Bumps the module's error statistic."""
				738	self.error_count += 1
				739	if self.counting in ('toplevel', 'detailed'):
				740	if self.counting != 'detailed':
				741	category = category.split('/')[0]
				742	if category not in self.errors_by_category:
				743	self.errors_by_category[category] = 0
				744	self.errors_by_category[category] += 1
				745
				746	def PrintErrorCounts(self):
				747	"""Print a summary of errors by category, and the total."""
				748	for category, count in self.errors_by_category.iteritems():
				749	sys.stderr.write('Category \'%s\' errors found: %d\n' %
				750	(category, count))
				751	sys.stderr.write('Total errors found: %d\n' % self.error_count)
				752
				753	_cpplint_state = _CppLintState()
				754
				755
				756	def _OutputFormat():
				757	"""Gets the module's output format."""
				758	return _cpplint_state.output_format
				759
				760
				761	def _SetOutputFormat(output_format):
				762	"""Sets the module's output format."""
				763	_cpplint_state.SetOutputFormat(output_format)
				764
				765
				766	def _VerboseLevel():
				767	"""Returns the module's verbosity setting."""
				768	return _cpplint_state.verbose_level
				769
				770
				771	def _SetVerboseLevel(level):
				772	"""Sets the module's verbosity, and returns the previous setting."""
				773	return _cpplint_state.SetVerboseLevel(level)
				774
				775
				776	def _SetCountingStyle(level):
				777	"""Sets the module's counting options."""
				778	_cpplint_state.SetCountingStyle(level)
				779
				780
				781	def _Filters():
				782	"""Returns the module's list of output filters, as a list."""
				783	return _cpplint_state.filters
				784
				785
				786	def _SetFilters(filters):
				787	"""Sets the module's error-message filters.
				788
				789	These filters are applied when deciding whether to emit a given
				790	error message.
				791
				792	Args:
				793	filters: A string of comma-separated filters (eg "whitespace/indent").
				794	Each filter should start with + or -; else we die.
				795	"""
				796	_cpplint_state.SetFilters(filters)
				797
				798
				799	class _FunctionState(object):
				800	"""Tracks current function name and the number of lines in its body."""
				801
				802	_NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
				803	_TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
				804
				805	def __init__(self):
				806	self.in_a_function = False
				807	self.lines_in_function = 0
				808	self.current_function = ''
				809
				810	def Begin(self, function_name):
				811	"""Start analyzing function body.
				812
				813	Args:
				814	function_name: The name of the function being tracked.
				815	"""
				816	self.in_a_function = True
				817	self.lines_in_function = 0
				818	self.current_function = function_name
				819
				820	def Count(self):
				821	"""Count line in current function body."""
				822	if self.in_a_function:
				823	self.lines_in_function += 1
				824
				825	def Check(self, error, filename, linenum):
				826	"""Report if too many lines in function body.
				827
				828	Args:
				829	error: The function to call with any errors found.
				830	filename: The name of the current file.
				831	linenum: The number of the line to check.
				832	"""
				833	if Match(r'T(EST\|est)', self.current_function):
				834	base_trigger = self._TEST_TRIGGER
				835	else:
				836	base_trigger = self._NORMAL_TRIGGER
				837	trigger = base_trigger * 2**_VerboseLevel()
				838
				839	if self.lines_in_function > trigger:
				840	error_level = int(math.log(self.lines_in_function / base_trigger, 2))
				841	# 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
				842	if error_level > 5:
				843	error_level = 5
				844	error(filename, linenum, 'readability/fn_size', error_level,
				845	'Small and focused functions are preferred:'
				846	' %s has %d non-comment lines'
				847	' (error triggered by exceeding %d lines).' % (
				848	self.current_function, self.lines_in_function, trigger))
				849
				850	def End(self):
				851	"""Stop analyzing function body."""
				852	self.in_a_function = False
				853
				854
				855	class _IncludeError(Exception):
				856	"""Indicates a problem with the include order in a file."""
				857	pass
				858
				859
				860	class FileInfo:
				861	"""Provides utility functions for filenames.
				862
				863	FileInfo provides easy access to the components of a file's path
				864	relative to the project root.
				865	"""
				866
				867	def __init__(self, filename):
				868	self._filename = filename
				869
				870	def FullName(self):
				871	"""Make Windows paths like Unix."""
				872	return os.path.abspath(self._filename).replace('\\', '/')
				873
				874	def RepositoryName(self):
				875	"""FullName after removing the local path to the repository.
				876
				877	If we have a real absolute path name here we can try to do something smart:
				878	detecting the root of the checkout and truncating /path/to/checkout from
				879	the name so that we get header guards that don't include things like
				880	"C:\Documents and Settings\..." or "/home/username/..." in them and thus
				881	people on different computers who have checked the source out to different
				882	locations won't see bogus errors.
				883	"""
				884	fullname = self.FullName()
				885
				886	if os.path.exists(fullname):
				887	project_dir = os.path.dirname(fullname)
				888
				889	if os.path.exists(os.path.join(project_dir, ".svn")):
				890	# If there's a .svn file in the current directory, we recursively look
				891	# up the directory tree for the top of the SVN checkout
				892	root_dir = project_dir
				893	one_up_dir = os.path.dirname(root_dir)
				894	while os.path.exists(os.path.join(one_up_dir, ".svn")):
				895	root_dir = os.path.dirname(root_dir)
				896	one_up_dir = os.path.dirname(one_up_dir)
				897
				898	prefix = os.path.commonprefix([root_dir, project_dir])
				899	return fullname[len(prefix) + 1:]
				900
				901	# Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
				902	# searching up from the current path.
				903	root_dir = os.path.dirname(fullname)
				904	while (root_dir != os.path.dirname(root_dir) and
				905	not os.path.exists(os.path.join(root_dir, ".git")) and
				906	not os.path.exists(os.path.join(root_dir, ".hg")) and
				907	not os.path.exists(os.path.join(root_dir, ".svn"))):
				908	root_dir = os.path.dirname(root_dir)
				909
				910	if (os.path.exists(os.path.join(root_dir, ".git")) or
				911	os.path.exists(os.path.join(root_dir, ".hg")) or
				912	os.path.exists(os.path.join(root_dir, ".svn"))):
				913	prefix = os.path.commonprefix([root_dir, project_dir])
				914	return fullname[len(prefix) + 1:]
				915
				916	# Don't know what to do; header guard warnings may be wrong...
				917	return fullname
				918
				919	def Split(self):
				920	"""Splits the file into the directory, basename, and extension.
				921
				922	For 'chrome/browser/browser.cc', Split() would
				923	return ('chrome/browser', 'browser', '.cc')
				924
				925	Returns:
				926	A tuple of (directory, basename, extension).
				927	"""
				928
				929	googlename = self.RepositoryName()
				930	project, rest = os.path.split(googlename)
				931	return (project,) + os.path.splitext(rest)
				932
				933	def BaseName(self):
				934	"""File base name - text after the final slash, before the final period."""
				935	return self.Split()[1]
				936
				937	def Extension(self):
				938	"""File extension - text following the final period."""
				939	return self.Split()[2]
				940
				941	def NoExtension(self):
				942	"""File has no source file extension."""
				943	return '/'.join(self.Split()[0:2])
				944
				945	def IsSource(self):
				946	"""File has a source file extension."""
				947	return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
				948
				949
				950	def _ShouldPrintError(category, confidence, linenum):
				951	"""If confidence >= verbose, category passes filter and is not suppressed."""
				952
				953	# There are three ways we might decide not to print an error message:
				954	# a "NOLINT(category)" comment appears in the source,
				955	# the verbosity level isn't high enough, or the filters filter it out.
				956	if IsErrorSuppressedByNolint(category, linenum):
				957	return False
				958	if confidence < _cpplint_state.verbose_level:
				959	return False
				960
				961	is_filtered = False
				962	for one_filter in _Filters():
				963	if one_filter.startswith('-'):
				964	if category.startswith(one_filter[1:]):
				965	is_filtered = True
				966	elif one_filter.startswith('+'):
				967	if category.startswith(one_filter[1:]):
				968	is_filtered = False
				969	else:
				970	assert False # should have been checked for in SetFilter.
				971	if is_filtered:
				972	return False
				973
				974	return True
				975
				976
				977	def Error(filename, linenum, category, confidence, message):
				978	"""Logs the fact we've found a lint error.
				979
				980	We log where the error was found, and also our confidence in the error,
				981	that is, how certain we are this is a legitimate style regression, and
				982	not a misidentification or a use that's sometimes justified.
				983
				984	False positives can be suppressed by the use of
				985	"cpplint(category)" comments on the offending line. These are
				986	parsed into _error_suppressions.
				987
				988	Args:
				989	filename: The name of the file containing the error.
				990	linenum: The number of the line containing the error.
				991	category: A string used to describe the "category" this bug
				992	falls under: "whitespace", say, or "runtime". Categories
				993	may have a hierarchy separated by slashes: "whitespace/indent".
				994	confidence: A number from 1-5 representing a confidence score for
				995	the error, with 5 meaning that we are certain of the problem,
				996	and 1 meaning that it could be a legitimate construct.
				997	message: The error message.
				998	"""
				999	if _ShouldPrintError(category, confidence, linenum):
				1000	_cpplint_state.IncrementErrorCount(category)
				1001	if _cpplint_state.output_format == 'vs7':
				1002	sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
				1003	filename, linenum, message, category, confidence))
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1004	elif _cpplint_state.output_format == 'eclipse':
				1005	sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % (
				1006	filename, linenum, message, category, confidence))
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1007	else:
				1008	sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
				1009	filename, linenum, message, category, confidence))
				1010
				1011
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1012	# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1013	_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
				1014	r'\\([abfnrtv?"\\\']\|\d+\|x[0-9a-fA-F]+)')
				1015	# Matches strings. Escape codes should already be removed by ESCAPES.
				1016	_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
				1017	# Matches characters. Escape codes should already be removed by ESCAPES.
				1018	_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
				1019	# Matches multi-line C++ comments.
				1020	# This RE is a little bit more complicated than one might expect, because we
				1021	# have to take care of space removals tools so we can handle comments inside
				1022	# statements better.
				1023	# The current rule is: We only clear spaces from both sides when we're at the
				1024	# end of the line. Otherwise, we try to remove spaces from the right side,
				1025	# if this doesn't work we try on left side but only if there's a non-character
				1026	# on the right.
				1027	_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
				1028	r"""(\s/\.\/\s*$\|
				1029	/\.\*/\s+\|
				1030	\s+/\.\*/(?=\W)\|
				1031	/\.\*/)""", re.VERBOSE)
				1032
				1033
				1034	def IsCppString(line):
				1035	"""Does line terminate so, that the next symbol is in string constant.
				1036
				1037	This function does not consider single-line nor multi-line comments.
				1038
				1039	Args:
				1040	line: is a partial line of code starting from the 0..n.
				1041
				1042	Returns:
				1043	True, if next character appended to 'line' is inside a
				1044	string constant.
				1045	"""
				1046
				1047	line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
				1048	return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
				1049
				1050
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1051	def CleanseRawStrings(raw_lines):
				1052	"""Removes C++11 raw strings from lines.
				1053
				1054	Before:
				1055	static const char kData[] = R"(
				1056	multi-line string
				1057	)";
				1058
				1059	After:
				1060	static const char kData[] = ""
				1061	(replaced by blank line)
				1062	"";
				1063
				1064	Args:
				1065	raw_lines: list of raw lines.
				1066
				1067	Returns:
				1068	list of lines with C++11 raw strings replaced by empty strings.
				1069	"""
				1070
				1071	delimiter = None
				1072	lines_without_raw_strings = []
				1073	for line in raw_lines:
				1074	if delimiter:
				1075	# Inside a raw string, look for the end
				1076	end = line.find(delimiter)
				1077	if end >= 0:
				1078	# Found the end of the string, match leading space for this
				1079	# line and resume copying the original lines, and also insert
				1080	# a "" on the last line.
				1081	leading_space = Match(r'^(\s*)\S', line)
				1082	line = leading_space.group(1) + '""' + line[end + len(delimiter):]
				1083	delimiter = None
				1084	else:
				1085	# Haven't found the end yet, append a blank line.
				1086	line = ''
				1087
				1088	else:
				1089	# Look for beginning of a raw string.
				1090	# See 2.14.15 [lex.string] for syntax.
				1091	matched = Match(r'^(.)\b(?:R\|u8R\|uR\|UR\|LR)"([^\s\\()])\((.*)$', line)
				1092	if matched:
				1093	delimiter = ')' + matched.group(2) + '"'
				1094
				1095	end = matched.group(3).find(delimiter)
				1096	if end >= 0:
				1097	# Raw string ended on same line
				1098	line = (matched.group(1) + '""' +
				1099	matched.group(3)[end + len(delimiter):])
				1100	delimiter = None
				1101	else:
				1102	# Start of a multi-line raw string
				1103	line = matched.group(1) + '""'
				1104
				1105	lines_without_raw_strings.append(line)
				1106
				1107	# TODO(unknown): if delimiter is not None here, we might want to
				1108	# emit a warning for unterminated string.
				1109	return lines_without_raw_strings
				1110
				1111
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1112	def FindNextMultiLineCommentStart(lines, lineix):
				1113	"""Find the beginning marker for a multiline comment."""
				1114	while lineix < len(lines):
				1115	if lines[lineix].strip().startswith('/*'):
				1116	# Only return this marker if the comment goes beyond this line
				1117	if lines[lineix].strip().find('*/', 2) < 0:
				1118	return lineix
				1119	lineix += 1
				1120	return len(lines)
				1121
				1122
				1123	def FindNextMultiLineCommentEnd(lines, lineix):
				1124	"""We are inside a comment, find the end marker."""
				1125	while lineix < len(lines):
				1126	if lines[lineix].strip().endswith('*/'):
				1127	return lineix
				1128	lineix += 1
				1129	return len(lines)
				1130
				1131
				1132	def RemoveMultiLineCommentsFromRange(lines, begin, end):
				1133	"""Clears a range of lines for multi-line comments."""
				1134	# Having // dummy comments makes the lines non-empty, so we will not get
				1135	# unnecessary blank line warnings later in the code.
				1136	for i in range(begin, end):
				1137	lines[i] = '// dummy'
				1138
				1139
				1140	def RemoveMultiLineComments(filename, lines, error):
				1141	"""Removes multiline (c-style) comments from lines."""
				1142	lineix = 0
				1143	while lineix < len(lines):
				1144	lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
				1145	if lineix_begin >= len(lines):
				1146	return
				1147	lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
				1148	if lineix_end >= len(lines):
				1149	error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
				1150	'Could not find end of multi-line comment')
				1151	return
				1152	RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
				1153	lineix = lineix_end + 1
				1154
				1155
				1156	def CleanseComments(line):
				1157	"""Removes //-comments and single-line C-style /* */ comments.
				1158
				1159	Args:
				1160	line: A line of C++ source.
				1161
				1162	Returns:
				1163	The line with single-line comments removed.
				1164	"""
				1165	commentpos = line.find('//')
				1166	if commentpos != -1 and not IsCppString(line[:commentpos]):
				1167	line = line[:commentpos].rstrip()
				1168	# get rid of /* ... */
				1169	return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
				1170
				1171
				1172	class CleansedLines(object):
				1173	"""Holds 3 copies of all lines with different preprocessing applied to them.
				1174
				1175	1) elided member contains lines without strings and comments,
				1176	2) lines member contains lines without comments, and
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1177	3) raw_lines member contains all the lines without processing.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1178	All these three members are of <type 'list'>, and of the same length.
				1179	"""
				1180
				1181	def __init__(self, lines):
				1182	self.elided = []
				1183	self.lines = []
				1184	self.raw_lines = lines
				1185	self.num_lines = len(lines)
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1186	self.lines_without_raw_strings = CleanseRawStrings(lines)
				1187	for linenum in range(len(self.lines_without_raw_strings)):
				1188	self.lines.append(CleanseComments(
				1189	self.lines_without_raw_strings[linenum]))
				1190	elided = self._CollapseStrings(self.lines_without_raw_strings[linenum])
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1191	self.elided.append(CleanseComments(elided))
				1192
				1193	def NumLines(self):
				1194	"""Returns the number of lines represented."""
				1195	return self.num_lines
				1196
				1197	@staticmethod
				1198	def _CollapseStrings(elided):
				1199	"""Collapses strings and chars on a line to simple "" or '' blocks.
				1200
				1201	We nix strings first so we're not fooled by text like '"http://"'
				1202
				1203	Args:
				1204	elided: The line being processed.
				1205
				1206	Returns:
				1207	The line with collapsed strings.
				1208	"""
				1209	if not _RE_PATTERN_INCLUDE.match(elided):
				1210	# Remove escaped characters first to make quote/single quote collapsing
				1211	# basic. Things that look like escaped characters shouldn't occur
				1212	# outside of strings and chars.
				1213	elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
				1214	elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
				1215	elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
				1216	return elided
				1217
				1218
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1219	def FindEndOfExpressionInLine(line, startpos, depth, startchar, endchar):
				1220	"""Find the position just after the matching endchar.
				1221
				1222	Args:
				1223	line: a CleansedLines line.
				1224	startpos: start searching at this position.
				1225	depth: nesting level at startpos.
				1226	startchar: expression opening character.
				1227	endchar: expression closing character.
				1228
				1229	Returns:
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1230	On finding matching endchar: (index just after matching endchar, 0)
				1231	Otherwise: (-1, new depth at end of this line)
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1232	"""
				1233	for i in xrange(startpos, len(line)):
				1234	if line[i] == startchar:
				1235	depth += 1
				1236	elif line[i] == endchar:
				1237	depth -= 1
				1238	if depth == 0:
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1239	return (i + 1, 0)
				1240	return (-1, depth)
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1241
				1242
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1243	def CloseExpression(clean_lines, linenum, pos):
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1244	"""If input points to ( or { or [ or <, finds the position that closes it.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1245
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1246	If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1247	linenum/pos that correspond to the closing of the expression.
				1248
				1249	Args:
				1250	clean_lines: A CleansedLines instance containing the file.
				1251	linenum: The number of the line to check.
				1252	pos: A position on the line.
				1253
				1254	Returns:
				1255	A tuple (line, linenum, pos) pointer past the closing brace, or
				1256	(line, len(lines), -1) if we never find a close. Note we ignore
				1257	strings and comments when matching; and the line we return is the
				1258	'cleansed' line at linenum.
				1259	"""
				1260
				1261	line = clean_lines.elided[linenum]
				1262	startchar = line[pos]
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1263	if startchar not in '({[<':
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1264	return (line, clean_lines.NumLines(), -1)
				1265	if startchar == '(': endchar = ')'
				1266	if startchar == '[': endchar = ']'
				1267	if startchar == '{': endchar = '}'
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1268	if startchar == '<': endchar = '>'
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1269
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1270	# Check first line
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1271	(end_pos, num_open) = FindEndOfExpressionInLine(
				1272	line, pos, 0, startchar, endchar)
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1273	if end_pos > -1:
				1274	return (line, linenum, end_pos)
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1275
				1276	# Continue scanning forward
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1277	while linenum < clean_lines.NumLines() - 1:
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1278	linenum += 1
				1279	line = clean_lines.elided[linenum]
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1280	(end_pos, num_open) = FindEndOfExpressionInLine(
				1281	line, 0, num_open, startchar, endchar)
				1282	if end_pos > -1:
				1283	return (line, linenum, end_pos)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1284
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1285	# Did not find endchar before end of file, give up
				1286	return (line, clean_lines.NumLines(), -1)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1287
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1288
				1289	def FindStartOfExpressionInLine(line, endpos, depth, startchar, endchar):
				1290	"""Find position at the matching startchar.
				1291
				1292	This is almost the reverse of FindEndOfExpressionInLine, but note
				1293	that the input position and returned position differs by 1.
				1294
				1295	Args:
				1296	line: a CleansedLines line.
				1297	endpos: start searching at this position.
				1298	depth: nesting level at endpos.
				1299	startchar: expression opening character.
				1300	endchar: expression closing character.
				1301
				1302	Returns:
				1303	On finding matching startchar: (index at matching startchar, 0)
				1304	Otherwise: (-1, new depth at beginning of this line)
				1305	"""
				1306	for i in xrange(endpos, -1, -1):
				1307	if line[i] == endchar:
				1308	depth += 1
				1309	elif line[i] == startchar:
				1310	depth -= 1
				1311	if depth == 0:
				1312	return (i, 0)
				1313	return (-1, depth)
				1314
				1315
				1316	def ReverseCloseExpression(clean_lines, linenum, pos):
				1317	"""If input points to ) or } or ] or >, finds the position that opens it.
				1318
				1319	If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
				1320	linenum/pos that correspond to the opening of the expression.
				1321
				1322	Args:
				1323	clean_lines: A CleansedLines instance containing the file.
				1324	linenum: The number of the line to check.
				1325	pos: A position on the line.
				1326
				1327	Returns:
				1328	A tuple (line, linenum, pos) pointer at the opening brace, or
				1329	(line, 0, -1) if we never find the matching opening brace. Note
				1330	we ignore strings and comments when matching; and the line we
				1331	return is the 'cleansed' line at linenum.
				1332	"""
				1333	line = clean_lines.elided[linenum]
				1334	endchar = line[pos]
				1335	if endchar not in ')}]>':
				1336	return (line, 0, -1)
				1337	if endchar == ')': startchar = '('
				1338	if endchar == ']': startchar = '['
				1339	if endchar == '}': startchar = '{'
				1340	if endchar == '>': startchar = '<'
				1341
				1342	# Check last line
				1343	(start_pos, num_open) = FindStartOfExpressionInLine(
				1344	line, pos, 0, startchar, endchar)
				1345	if start_pos > -1:
				1346	return (line, linenum, start_pos)
				1347
				1348	# Continue scanning backward
				1349	while linenum > 0:
				1350	linenum -= 1
				1351	line = clean_lines.elided[linenum]
				1352	(start_pos, num_open) = FindStartOfExpressionInLine(
				1353	line, len(line) - 1, num_open, startchar, endchar)
				1354	if start_pos > -1:
				1355	return (line, linenum, start_pos)
				1356
				1357	# Did not find startchar before beginning of file, give up
				1358	return (line, 0, -1)
				1359
				1360
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1361	def CheckForCopyright(filename, lines, error):
				1362	"""Logs an error if no Copyright message appears at the top of the file."""
				1363
				1364	# We'll say it should occur by line 10. Don't forget there's a
				1365	# dummy line at the front.
				1366	for line in xrange(1, min(len(lines), 11)):
				1367	if re.search(r'Copyright', lines[line], re.I): break
				1368	else: # means no copyright line was found
				1369	error(filename, 0, 'legal/copyright', 5,
				1370	'No copyright message found. '
				1371	'You should have a line: "Copyright [year] <Copyright Owner>"')
				1372
				1373
				1374	def GetHeaderGuardCPPVariable(filename):
				1375	"""Returns the CPP variable that should be used as a header guard.
				1376
				1377	Args:
				1378	filename: The name of a C++ header file.
				1379
				1380	Returns:
				1381	The CPP variable that should be used as a header guard in the
				1382	named file.
				1383
				1384	"""
				1385
				1386	# Restores original filename in case that cpplint is invoked from Emacs's
				1387	# flymake.
				1388	filename = re.sub(r'_flymake\.h$', '.h', filename)
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1389	filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1390
				1391	fileinfo = FileInfo(filename)
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1392	file_path_from_root = fileinfo.RepositoryName()
				1393	if _root:
				1394	file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
				1395	return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_'
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1396
				1397
				1398	def CheckForHeaderGuard(filename, lines, error):
				1399	"""Checks that the file contains a header guard.
				1400
				1401	Logs an error if no #ifndef header guard is present. For other
				1402	headers, checks that the full pathname is used.
				1403
				1404	Args:
				1405	filename: The name of the C++ header file.
				1406	lines: An array of strings, each representing a line of the file.
				1407	error: The function to call with any errors found.
				1408	"""
				1409
				1410	cppvar = GetHeaderGuardCPPVariable(filename)
				1411
				1412	ifndef = None
				1413	ifndef_linenum = 0
				1414	define = None
				1415	endif = None
				1416	endif_linenum = 0
				1417	for linenum, line in enumerate(lines):
				1418	linesplit = line.split()
				1419	if len(linesplit) >= 2:
				1420	# find the first occurrence of #ifndef and #define, save arg
				1421	if not ifndef and linesplit[0] == '#ifndef':
				1422	# set ifndef to the header guard presented on the #ifndef line.
				1423	ifndef = linesplit[1]
				1424	ifndef_linenum = linenum
				1425	if not define and linesplit[0] == '#define':
				1426	define = linesplit[1]
				1427	# find the last occurrence of #endif, save entire line
				1428	if line.startswith('#endif'):
				1429	endif = line
				1430	endif_linenum = linenum
				1431
				1432	if not ifndef:
				1433	error(filename, 0, 'build/header_guard', 5,
				1434	'No #ifndef header guard found, suggested CPP variable is: %s' %
				1435	cppvar)
				1436	return
				1437
				1438	if not define:
				1439	error(filename, 0, 'build/header_guard', 5,
				1440	'No #define header guard found, suggested CPP variable is: %s' %
				1441	cppvar)
				1442	return
				1443
				1444	# The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
				1445	# for backward compatibility.
				1446	if ifndef != cppvar:
				1447	error_level = 0
				1448	if ifndef != cppvar + '_':
				1449	error_level = 5
				1450
				1451	ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
				1452	error)
				1453	error(filename, ifndef_linenum, 'build/header_guard', error_level,
				1454	'#ifndef header guard has wrong style, please use: %s' % cppvar)
				1455
				1456	if define != ifndef:
				1457	error(filename, 0, 'build/header_guard', 5,
				1458	'#ifndef and #define don\'t match, suggested CPP variable is: %s' %
				1459	cppvar)
				1460	return
				1461
				1462	if endif != ('#endif // %s' % cppvar):
				1463	error_level = 0
				1464	if endif != ('#endif // %s' % (cppvar + '_')):
				1465	error_level = 5
				1466
				1467	ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
				1468	error)
				1469	error(filename, endif_linenum, 'build/header_guard', error_level,
				1470	'#endif line should be "#endif // %s"' % cppvar)
				1471
				1472
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1473	def CheckForBadCharacters(filename, lines, error):
				1474	"""Logs an error for each line containing bad characters.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1475
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1476	Two kinds of bad characters:
				1477
				1478	1. Unicode replacement characters: These indicate that either the file
				1479	contained invalid UTF-8 (likely) or Unicode replacement characters (which
				1480	it shouldn't). Note that it's possible for this to throw off line
				1481	numbering if the invalid UTF-8 occurred adjacent to a newline.
				1482
				1483	2. NUL bytes. These are problematic for some tools.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1484
				1485	Args:
				1486	filename: The name of the current file.
				1487	lines: An array of strings, each representing a line of the file.
				1488	error: The function to call with any errors found.
				1489	"""
				1490	for linenum, line in enumerate(lines):
				1491	if u'\ufffd' in line:
				1492	error(filename, linenum, 'readability/utf8', 5,
				1493	'Line contains invalid UTF-8 (or Unicode replacement character).')
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1494	if '\0' in line:
				1495	error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.')
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1496
				1497
				1498	def CheckForNewlineAtEOF(filename, lines, error):
				1499	"""Logs an error if there is no newline char at the end of the file.
				1500
				1501	Args:
				1502	filename: The name of the current file.
				1503	lines: An array of strings, each representing a line of the file.
				1504	error: The function to call with any errors found.
				1505	"""
				1506
				1507	# The array lines() was created by adding two newlines to the
				1508	# original file (go figure), then splitting on \n.
				1509	# To verify that the file ends in \n, we just have to make sure the
				1510	# last-but-two element of lines() exists and is empty.
				1511	if len(lines) < 3 or lines[-2]:
				1512	error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
				1513	'Could not find a newline character at the end of the file.')
				1514
				1515
				1516	def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
				1517	"""Logs an error if we see /* ... */ or "..." that extend past one line.
				1518
				1519	/* ... */ comments are legit inside macros, for one line.
				1520	Otherwise, we prefer // comments, so it's ok to warn about the
				1521	other. Likewise, it's ok for strings to extend across multiple
				1522	lines, as long as a line continuation character (backslash)
				1523	terminates each line. Although not currently prohibited by the C++
				1524	style guide, it's ugly and unnecessary. We don't do well with either
				1525	in this lint program, so we warn about both.
				1526
				1527	Args:
				1528	filename: The name of the current file.
				1529	clean_lines: A CleansedLines instance containing the file.
				1530	linenum: The number of the line to check.
				1531	error: The function to call with any errors found.
				1532	"""
				1533	line = clean_lines.elided[linenum]
				1534
				1535	# Remove all \\ (escaped backslashes) from the line. They are OK, and the
				1536	# second (escaped) slash may trigger later \" detection erroneously.
				1537	line = line.replace('\\\\', '')
				1538
				1539	if line.count('/') > line.count('/'):
				1540	error(filename, linenum, 'readability/multiline_comment', 5,
				1541	'Complex multi-line /.../-style comment found. '
				1542	'Lint may give bogus warnings. '
				1543	'Consider replacing these with //-style comments, '
				1544	'with #if 0...#endif, '
				1545	'or with more clearly structured multi-line comments.')
				1546
				1547	if (line.count('"') - line.count('\\"')) % 2:
				1548	error(filename, linenum, 'readability/multiline_string', 5,
				1549	'Multi-line string ("...") found. This lint script doesn\'t '
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1550	'do well with such strings, and may give bogus warnings. '
				1551	'Use C++11 raw strings or concatenation instead.')
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1552
				1553
				1554	threading_list = (
				1555	('asctime(', 'asctime_r('),
				1556	('ctime(', 'ctime_r('),
				1557	('getgrgid(', 'getgrgid_r('),
				1558	('getgrnam(', 'getgrnam_r('),
				1559	('getlogin(', 'getlogin_r('),
				1560	('getpwnam(', 'getpwnam_r('),
				1561	('getpwuid(', 'getpwuid_r('),
				1562	('gmtime(', 'gmtime_r('),
				1563	('localtime(', 'localtime_r('),
				1564	('rand(', 'rand_r('),
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1565	('strtok(', 'strtok_r('),
				1566	('ttyname(', 'ttyname_r('),
				1567	)
				1568
				1569
				1570	def CheckPosixThreading(filename, clean_lines, linenum, error):
				1571	"""Checks for calls to thread-unsafe functions.
				1572
				1573	Much code has been originally written without consideration of
				1574	multi-threading. Also, engineers are relying on their old experience;
				1575	they have learned posix before threading extensions were added. These
				1576	tests guide the engineers to use thread-safe functions (when using
				1577	posix directly).
				1578
				1579	Args:
				1580	filename: The name of the current file.
				1581	clean_lines: A CleansedLines instance containing the file.
				1582	linenum: The number of the line to check.
				1583	error: The function to call with any errors found.
				1584	"""
				1585	line = clean_lines.elided[linenum]
				1586	for single_thread_function, multithread_safe_function in threading_list:
				1587	ix = line.find(single_thread_function)
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1588	# Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1589	if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
				1590	line[ix - 1] not in ('_', '.', '>'))):
				1591	error(filename, linenum, 'runtime/threadsafe_fn', 2,
				1592	'Consider using ' + multithread_safe_function +
				1593	'...) instead of ' + single_thread_function +
				1594	'...) for improved thread safety.')
				1595
				1596
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1597	def CheckVlogArguments(filename, clean_lines, linenum, error):
				1598	"""Checks that VLOG() is only used for defining a logging level.
				1599
				1600	For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
				1601	VLOG(FATAL) are not.
				1602
				1603	Args:
				1604	filename: The name of the current file.
				1605	clean_lines: A CleansedLines instance containing the file.
				1606	linenum: The number of the line to check.
				1607	error: The function to call with any errors found.
				1608	"""
				1609	line = clean_lines.elided[linenum]
				1610	if Search(r'\bVLOG$(INFO\|ERROR\|WARNING\|DFATAL\|FATAL)$', line):
				1611	error(filename, linenum, 'runtime/vlog', 5,
				1612	'VLOG() should be used with numeric verbosity level. '
				1613	'Use LOG() if you want symbolic severity levels.')
				1614
				1615
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1616	# Matches invalid increment: *count++, which moves pointer instead of
				1617	# incrementing a value.
				1618	_RE_PATTERN_INVALID_INCREMENT = re.compile(
				1619	r'^\s\\w+(\+\+\|--);')
				1620
				1621
				1622	def CheckInvalidIncrement(filename, clean_lines, linenum, error):
				1623	"""Checks for invalid increment *count++.
				1624
				1625	For example following function:
				1626	void increment_counter(int* count) {
				1627	*count++;
				1628	}
				1629	is invalid, because it effectively does count++, moving pointer, and should
				1630	be replaced with ++count, (count)++ or *count += 1.
				1631
				1632	Args:
				1633	filename: The name of the current file.
				1634	clean_lines: A CleansedLines instance containing the file.
				1635	linenum: The number of the line to check.
				1636	error: The function to call with any errors found.
				1637	"""
				1638	line = clean_lines.elided[linenum]
				1639	if _RE_PATTERN_INVALID_INCREMENT.match(line):
				1640	error(filename, linenum, 'runtime/invalid_increment', 5,
				1641	'Changing pointer instead of value (or unused value of operator*).')
				1642
				1643
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1644	class _BlockInfo(object):
				1645	"""Stores information about a generic block of code."""
				1646
				1647	def __init__(self, seen_open_brace):
				1648	self.seen_open_brace = seen_open_brace
				1649	self.open_parentheses = 0
				1650	self.inline_asm = _NO_ASM
				1651
				1652	def CheckBegin(self, filename, clean_lines, linenum, error):
				1653	"""Run checks that applies to text up to the opening brace.
				1654
				1655	This is mostly for checking the text after the class identifier
				1656	and the "{", usually where the base class is specified. For other
				1657	blocks, there isn't much to check, so we always pass.
				1658
				1659	Args:
				1660	filename: The name of the current file.
				1661	clean_lines: A CleansedLines instance containing the file.
				1662	linenum: The number of the line to check.
				1663	error: The function to call with any errors found.
				1664	"""
				1665	pass
				1666
				1667	def CheckEnd(self, filename, clean_lines, linenum, error):
				1668	"""Run checks that applies to text after the closing brace.
				1669
				1670	This is mostly used for checking end of namespace comments.
				1671
				1672	Args:
				1673	filename: The name of the current file.
				1674	clean_lines: A CleansedLines instance containing the file.
				1675	linenum: The number of the line to check.
				1676	error: The function to call with any errors found.
				1677	"""
				1678	pass
				1679
				1680
				1681	class _ClassInfo(_BlockInfo):
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1682	"""Stores information about a class."""
				1683
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1684	def __init__(self, name, class_or_struct, clean_lines, linenum):
				1685	_BlockInfo.__init__(self, False)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1686	self.name = name
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1687	self.starting_linenum = linenum
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1688	self.is_derived = False
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1689	if class_or_struct == 'struct':
				1690	self.access = 'public'
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1691	self.is_struct = True
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1692	else:
				1693	self.access = 'private'
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1694	self.is_struct = False
				1695
				1696	# Remember initial indentation level for this class. Using raw_lines here
				1697	# instead of elided to account for leading comments.
				1698	initial_indent = Match(r'^( *)\S', clean_lines.raw_lines[linenum])
				1699	if initial_indent:
				1700	self.class_indent = len(initial_indent.group(1))
				1701	else:
				1702	self.class_indent = 0
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1703
				1704	# Try to find the end of the class. This will be confused by things like:
				1705	# class A {
				1706	# } *x = { ...
				1707	#
				1708	# But it's still good enough for CheckSectionSpacing.
				1709	self.last_line = 0
				1710	depth = 0
				1711	for i in range(linenum, clean_lines.NumLines()):
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1712	line = clean_lines.elided[i]
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1713	depth += line.count('{') - line.count('}')
				1714	if not depth:
				1715	self.last_line = i
				1716	break
				1717
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1718	def CheckBegin(self, filename, clean_lines, linenum, error):
				1719	# Look for a bare ':'
				1720	if Search('(^\|[^:]):($\|[^:])', clean_lines.elided[linenum]):
				1721	self.is_derived = True
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1722
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1723	def CheckEnd(self, filename, clean_lines, linenum, error):
				1724	# Check that closing brace is aligned with beginning of the class.
				1725	# Only do this if the closing brace is indented by only whitespaces.
				1726	# This means we will not check single-line class definitions.
				1727	indent = Match(r'^( *)\}', clean_lines.elided[linenum])
				1728	if indent and len(indent.group(1)) != self.class_indent:
				1729	if self.is_struct:
				1730	parent = 'struct ' + self.name
				1731	else:
				1732	parent = 'class ' + self.name
				1733	error(filename, linenum, 'whitespace/indent', 3,
				1734	'Closing brace should be aligned with beginning of %s' % parent)
				1735
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1736
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1737	class _NamespaceInfo(_BlockInfo):
				1738	"""Stores information about a namespace."""
				1739
				1740	def __init__(self, name, linenum):
				1741	_BlockInfo.__init__(self, False)
				1742	self.name = name or ''
				1743	self.starting_linenum = linenum
				1744
				1745	def CheckEnd(self, filename, clean_lines, linenum, error):
				1746	"""Check end of namespace comments."""
				1747	line = clean_lines.raw_lines[linenum]
				1748
				1749	# Check how many lines is enclosed in this namespace. Don't issue
				1750	# warning for missing namespace comments if there aren't enough
				1751	# lines. However, do apply checks if there is already an end of
				1752	# namespace comment and it's incorrect.
				1753	#
				1754	# TODO(unknown): We always want to check end of namespace comments
				1755	# if a namespace is large, but sometimes we also want to apply the
				1756	# check if a short namespace contained nontrivial things (something
				1757	# other than forward declarations). There is currently no logic on
				1758	# deciding what these nontrivial things are, so this check is
				1759	# triggered by namespace size only, which works most of the time.
				1760	if (linenum - self.starting_linenum < 10
				1761	and not Match(r'};\s(//\|/\).\bnamespace\b', line)):
				1762	return
				1763
				1764	# Look for matching comment at end of namespace.
				1765	#
				1766	# Note that we accept C style "/* */" comments for terminating
				1767	# namespaces, so that code that terminate namespaces inside
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1768	# preprocessor macros can be cpplint clean.
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1769	#
				1770	# We also accept stuff like "// end of namespace <name>." with the
				1771	# period at the end.
				1772	#
				1773	# Besides these, we don't accept anything else, otherwise we might
				1774	# get false negatives when existing comment is a substring of the
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1775	# expected namespace.
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1776	if self.name:
				1777	# Named namespace
				1778	if not Match((r'};\s(//\|/\).\bnamespace\s+' + re.escape(self.name) +
				1779	r'[\/\.\\\s]$'),
				1780	line):
				1781	error(filename, linenum, 'readability/namespace', 5,
				1782	'Namespace should be terminated with "// namespace %s"' %
				1783	self.name)
				1784	else:
				1785	# Anonymous namespace
				1786	if not Match(r'};\s(//\|/\).\bnamespace[\/\.\\\s]$', line):
				1787	error(filename, linenum, 'readability/namespace', 5,
				1788	'Namespace should be terminated with "// namespace"')
				1789
				1790
				1791	class _PreprocessorInfo(object):
				1792	"""Stores checkpoints of nesting stacks when #if/#else is seen."""
				1793
				1794	def __init__(self, stack_before_if):
				1795	# The entire nesting stack before #if
				1796	self.stack_before_if = stack_before_if
				1797
				1798	# The entire nesting stack up to #else
				1799	self.stack_before_else = []
				1800
				1801	# Whether we have already seen #else or #elif
				1802	self.seen_else = False
				1803
				1804
				1805	class _NestingState(object):
				1806	"""Holds states related to parsing braces."""
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1807
				1808	def __init__(self):
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1809	# Stack for tracking all braces. An object is pushed whenever we
				1810	# see a "{", and popped when we see a "}". Only 3 types of
				1811	# objects are possible:
				1812	# - _ClassInfo: a class or struct.
				1813	# - _NamespaceInfo: a namespace.
				1814	# - _BlockInfo: some other type of block.
				1815	self.stack = []
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	1816
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1817	# Stack of _PreprocessorInfo objects.
				1818	self.pp_stack = []
				1819
				1820	def SeenOpenBrace(self):
				1821	"""Check if we have seen the opening brace for the innermost block.
				1822
				1823	Returns:
				1824	True if we have seen the opening brace, False if the innermost
				1825	block is still expecting an opening brace.
				1826	"""
				1827	return (not self.stack) or self.stack[-1].seen_open_brace
				1828
				1829	def InNamespaceBody(self):
				1830	"""Check if we are currently one level inside a namespace body.
				1831
				1832	Returns:
				1833	True if top of the stack is a namespace block, False otherwise.
				1834	"""
				1835	return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
				1836
				1837	def UpdatePreprocessor(self, line):
				1838	"""Update preprocessor stack.
				1839
				1840	We need to handle preprocessors due to classes like this:
				1841	#ifdef SWIG
				1842	struct ResultDetailsPageElementExtensionPoint {
				1843	#else
				1844	struct ResultDetailsPageElementExtensionPoint : public Extension {
				1845	#endif
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1846
				1847	We make the following assumptions (good enough for most files):
				1848	- Preprocessor condition evaluates to true from #if up to first
				1849	#else/#elif/#endif.
				1850
				1851	- Preprocessor condition evaluates to false from #else/#elif up
				1852	to #endif. We still perform lint checks on these lines, but
				1853	these do not affect nesting stack.
				1854
				1855	Args:
				1856	line: current line to check.
				1857	"""
				1858	if Match(r'^\s#\s(if\|ifdef\|ifndef)\b', line):
				1859	# Beginning of #if block, save the nesting stack here. The saved
				1860	# stack will allow us to restore the parsing state in the #else case.
				1861	self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
				1862	elif Match(r'^\s#\s(else\|elif)\b', line):
				1863	# Beginning of #else block
				1864	if self.pp_stack:
				1865	if not self.pp_stack[-1].seen_else:
				1866	# This is the first #else or #elif block. Remember the
				1867	# whole nesting stack up to this point. This is what we
				1868	# keep after the #endif.
				1869	self.pp_stack[-1].seen_else = True
				1870	self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
				1871
				1872	# Restore the stack to how it was before the #if
				1873	self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
				1874	else:
				1875	# TODO(unknown): unexpected #else, issue warning?
				1876	pass
				1877	elif Match(r'^\s#\sendif\b', line):
				1878	# End of #if or #else blocks.
				1879	if self.pp_stack:
				1880	# If we saw an #else, we will need to restore the nesting
				1881	# stack to its former state before the #else, otherwise we
				1882	# will just continue from where we left off.
				1883	if self.pp_stack[-1].seen_else:
				1884	# Here we can just use a shallow copy since we are the last
				1885	# reference to it.
				1886	self.stack = self.pp_stack[-1].stack_before_else
				1887	# Drop the corresponding #if
				1888	self.pp_stack.pop()
				1889	else:
				1890	# TODO(unknown): unexpected #endif, issue warning?
				1891	pass
				1892
				1893	def Update(self, filename, clean_lines, linenum, error):
				1894	"""Update nesting state with current line.
				1895
				1896	Args:
				1897	filename: The name of the current file.
				1898	clean_lines: A CleansedLines instance containing the file.
				1899	linenum: The number of the line to check.
				1900	error: The function to call with any errors found.
				1901	"""
				1902	line = clean_lines.elided[linenum]
				1903
				1904	# Update pp_stack first
				1905	self.UpdatePreprocessor(line)
				1906
				1907	# Count parentheses. This is to avoid adding struct arguments to
				1908	# the nesting stack.
				1909	if self.stack:
				1910	inner_block = self.stack[-1]
				1911	depth_change = line.count('(') - line.count(')')
				1912	inner_block.open_parentheses += depth_change
				1913
				1914	# Also check if we are starting or ending an inline assembly block.
				1915	if inner_block.inline_asm in (_NO_ASM, _END_ASM):
				1916	if (depth_change != 0 and
				1917	inner_block.open_parentheses == 1 and
				1918	_MATCH_ASM.match(line)):
				1919	# Enter assembly block
				1920	inner_block.inline_asm = _INSIDE_ASM
				1921	else:
				1922	# Not entering assembly block. If previous line was _END_ASM,
				1923	# we will now shift to _NO_ASM state.
				1924	inner_block.inline_asm = _NO_ASM
				1925	elif (inner_block.inline_asm == _INSIDE_ASM and
				1926	inner_block.open_parentheses == 0):
				1927	# Exit assembly block
				1928	inner_block.inline_asm = _END_ASM
				1929
				1930	# Consume namespace declaration at the beginning of the line. Do
				1931	# this in a loop so that we catch same line declarations like this:
				1932	# namespace proto2 { namespace bridge { class MessageSet; } }
				1933	while True:
				1934	# Match start of namespace. The "\b\s*" below catches namespace
				1935	# declarations even if it weren't followed by a whitespace, this
				1936	# is so that we don't confuse our namespace checker. The
				1937	# missing spaces will be flagged by CheckSpacing.
				1938	namespace_decl_match = Match(r'^\snamespace\b\s([:\w]+)?(.*)$', line)
				1939	if not namespace_decl_match:
				1940	break
				1941
				1942	new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
				1943	self.stack.append(new_namespace)
				1944
				1945	line = namespace_decl_match.group(2)
				1946	if line.find('{') != -1:
				1947	new_namespace.seen_open_brace = True
				1948	line = line[line.find('{') + 1:]
				1949
				1950	# Look for a class declaration in whatever is left of the line
				1951	# after parsing namespaces. The regexp accounts for decorated classes
				1952	# such as in:
				1953	# class LOCKABLE API Object {
				1954	# };
				1955	#
				1956	# Templates with class arguments may confuse the parser, for example:
				1957	# template <class T
				1958	# class Comparator = less<T>,
				1959	# class Vector = vector<T> >
				1960	# class HeapQueue {
				1961	#
				1962	# Because this parser has no nesting state about templates, by the
				1963	# time it saw "class Comparator", it may think that it's a new class.
				1964	# Nested templates have a similar problem:
				1965	# template <
				1966	# typename ExportedType,
				1967	# typename TupleType,
				1968	# template <typename, typename> class ImplTemplate>
				1969	#
				1970	# To avoid these cases, we ignore classes that are followed by '=' or '>'
				1971	class_decl_match = Match(
				1972	r'\s(template\s<[\w\s<>,:]>\s)?'
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1973	r'(class\|struct)\s+([A-Z_]+\s+)(\w+(?:::\w+))'
				1974	r'(([^=>]\|<[^<>]>\|<[^<>]<[^<>]>\s>)*)$', line)
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1975	if (class_decl_match and
				1976	(not self.stack or self.stack[-1].open_parentheses == 0)):
				1977	self.stack.append(_ClassInfo(
				1978	class_decl_match.group(4), class_decl_match.group(2),
				1979	clean_lines, linenum))
				1980	line = class_decl_match.group(5)
				1981
				1982	# If we have not yet seen the opening brace for the innermost block,
				1983	# run checks here.
				1984	if not self.SeenOpenBrace():
				1985	self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
				1986
				1987	# Update access control if we are inside a class/struct
				1988	if self.stack and isinstance(self.stack[-1], _ClassInfo):
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1989	classinfo = self.stack[-1]
				1990	access_match = Match(
				1991	r'^(.)\b(public\|private\|protected\|signals)(\s+(?:slots\s)?)?'
				1992	r':(?:[^:]\|$)',
				1993	line)
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	1994	if access_match:
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	1995	classinfo.access = access_match.group(2)
				1996
				1997	# Check that access keywords are indented +1 space. Skip this
				1998	# check if the keywords are not preceded by whitespaces.
				1999	indent = access_match.group(1)
				2000	if (len(indent) != classinfo.class_indent + 1 and
				2001	Match(r'^\s*$', indent)):
				2002	if classinfo.is_struct:
				2003	parent = 'struct ' + classinfo.name
				2004	else:
				2005	parent = 'class ' + classinfo.name
				2006	slots = ''
				2007	if access_match.group(3):
				2008	slots = access_match.group(3)
				2009	error(filename, linenum, 'whitespace/indent', 3,
				2010	'%s%s: should be indented +1 space inside %s' % (
				2011	access_match.group(2), slots, parent))
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2012
				2013	# Consume braces or semicolons from what's left of the line
				2014	while True:
				2015	# Match first brace, semicolon, or closed parenthesis.
				2016	matched = Match(r'^[^{;)}]([{;)}])(.)$', line)
				2017	if not matched:
				2018	break
				2019
				2020	token = matched.group(1)
				2021	if token == '{':
				2022	# If namespace or class hasn't seen a opening brace yet, mark
				2023	# namespace/class head as complete. Push a new block onto the
				2024	# stack otherwise.
				2025	if not self.SeenOpenBrace():
				2026	self.stack[-1].seen_open_brace = True
				2027	else:
				2028	self.stack.append(_BlockInfo(True))
				2029	if _MATCH_ASM.match(line):
				2030	self.stack[-1].inline_asm = _BLOCK_ASM
				2031	elif token == ';' or token == ')':
				2032	# If we haven't seen an opening brace yet, but we already saw
				2033	# a semicolon, this is probably a forward declaration. Pop
				2034	# the stack for these.
				2035	#
				2036	# Similarly, if we haven't seen an opening brace yet, but we
				2037	# already saw a closing parenthesis, then these are probably
				2038	# function arguments with extra "class" or "struct" keywords.
				2039	# Also pop these stack for these.
				2040	if not self.SeenOpenBrace():
				2041	self.stack.pop()
				2042	else: # token == '}'
				2043	# Perform end of block checks and pop the stack.
				2044	if self.stack:
				2045	self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
				2046	self.stack.pop()
				2047	line = matched.group(2)
				2048
				2049	def InnermostClass(self):
				2050	"""Get class info on the top of the stack.
				2051
				2052	Returns:
				2053	A _ClassInfo object if we are inside a class, or None otherwise.
				2054	"""
				2055	for i in range(len(self.stack), 0, -1):
				2056	classinfo = self.stack[i - 1]
				2057	if isinstance(classinfo, _ClassInfo):
				2058	return classinfo
				2059	return None
				2060
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2061	def CheckCompletedBlocks(self, filename, error):
				2062	"""Checks that all classes and namespaces have been completely parsed.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2063
				2064	Call this when all lines in a file have been processed.
				2065	Args:
				2066	filename: The name of the current file.
				2067	error: The function to call with any errors found.
				2068	"""
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2069	# Note: This test can result in false positives if #ifdef constructs
				2070	# get in the way of brace matching. See the testBuildClass test in
				2071	# cpplint_unittest.py for an example of this.
				2072	for obj in self.stack:
				2073	if isinstance(obj, _ClassInfo):
				2074	error(filename, obj.starting_linenum, 'build/class', 5,
				2075	'Failed to find complete declaration of class %s' %
				2076	obj.name)
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2077	elif isinstance(obj, _NamespaceInfo):
				2078	error(filename, obj.starting_linenum, 'build/namespaces', 5,
				2079	'Failed to find complete declaration of namespace %s' %
				2080	obj.name)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2081
				2082
				2083	def CheckForNonStandardConstructs(filename, clean_lines, linenum,
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2084	nesting_state, error):
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2085	r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2086
				2087	Complain about several constructs which gcc-2 accepts, but which are
				2088	not standard C++. Warning about these in lint is one way to ease the
				2089	transition to new compilers.
				2090	- put storage class first (e.g. "static const" instead of "const static").
				2091	- "%lld" instead of %qd" in printf-type functions.
				2092	- "%1$d" is non-standard in printf-type functions.
				2093	- "\%" is an undefined character escape sequence.
				2094	- text after #endif is not allowed.
				2095	- invalid inner-style forward declaration.
				2096	- >? and <? operators, and their >?= and <?= cousins.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2097
				2098	Additionally, check for constructor/destructor style violations and reference
				2099	members, as it is very convenient to do so while checking for
				2100	gcc-2 compliance.
				2101
				2102	Args:
				2103	filename: The name of the current file.
				2104	clean_lines: A CleansedLines instance containing the file.
				2105	linenum: The number of the line to check.
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2106	nesting_state: A _NestingState instance which maintains information about
				2107	the current stack of nested blocks being parsed.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2108	error: A callable to which errors are reported, which takes 4 arguments:
				2109	filename, line number, error level, and message
				2110	"""
				2111
				2112	# Remove comments from the line, but leave in strings for now.
				2113	line = clean_lines.lines[linenum]
				2114
				2115	if Search(r'printf\s\(.".%[-+ ]?\dq', line):
				2116	error(filename, linenum, 'runtime/printf_format', 3,
				2117	'%q in format strings is deprecated. Use %ll instead.')
				2118
				2119	if Search(r'printf\s\(.".*%\d+\$', line):
				2120	error(filename, linenum, 'runtime/printf_format', 2,
				2121	'%N$ formats are unconventional. Try rewriting to avoid them.')
				2122
				2123	# Remove escaped backslashes before looking for undefined escapes.
				2124	line = line.replace('\\\\', '')
				2125
				2126	if Search(r'("\|\').*\\(%\|\[\|\(\|{)', line):
				2127	error(filename, linenum, 'build/printf_format', 3,
				2128	'%, [, (, and { are undefined character escapes. Unescape them.')
				2129
				2130	# For the rest, work with both comments and strings removed.
				2131	line = clean_lines.elided[linenum]
				2132
				2133	if Search(r'\b(const\|volatile\|void\|char\|short\|int\|long'
				2134	r'\|float\|double\|signed\|unsigned'
				2135	r'\|schar\|u?int8\|u?int16\|u?int32\|u?int64)'
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2136	r'\s+(register\|static\|extern\|typedef)\b',
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2137	line):
				2138	error(filename, linenum, 'build/storage_class', 5,
				2139	'Storage class (static, extern, typedef, etc) should be first.')
				2140
				2141	if Match(r'\s#\sendif\s*[^/\s]+', line):
				2142	error(filename, linenum, 'build/endif_comment', 5,
				2143	'Uncommented text after #endif is non-standard. Use a comment.')
				2144
				2145	if Match(r'\sclass\s+(\w+\s::\s)+\w+\s;', line):
				2146	error(filename, linenum, 'build/forward_decl', 5,
				2147	'Inner-style forward declarations are invalid. Remove this line.')
				2148
				2149	if Search(r'(\w+\|[+-]?\d+(\.\d)?)\s(<\|>)\?=?\s(\w+\|[+-]?\d+)(\.\d)?',
				2150	line):
				2151	error(filename, linenum, 'build/deprecated', 3,
				2152	'>? and <? (max and min) operators are non-standard and deprecated.')
				2153
				2154	if Search(r'^\sconst\sstring\s&\s\w+\s*;', line):
				2155	# TODO(unknown): Could it be expanded safely to arbitrary references,
				2156	# without triggering too many false positives? The first
				2157	# attempt triggered 5 warnings for mostly benign code in the regtest, hence
				2158	# the restriction.
				2159	# Here's the original regexp, for the reference:
				2160	# type_name = r'\w+((\s::\s\w+)\|(\s<\s\w+?\s*>))?'
				2161	# r'\sconst\s' + type_name + '\s&\s\w+\s*;'
				2162	error(filename, linenum, 'runtime/member_string_references', 2,
				2163	'const string& members are dangerous. It is much better to use '
				2164	'alternatives, such as pointers or simple constants.')
				2165
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2166	# Everything else in this function operates on class declarations.
				2167	# Return early if the top of the nesting stack is not a class, or if
				2168	# the class head is not completed yet.
				2169	classinfo = nesting_state.InnermostClass()
				2170	if not classinfo or not classinfo.seen_open_brace:
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2171	return
				2172
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2173	# The class may have been declared with namespace or classname qualifiers.
				2174	# The constructor and destructor will not have those qualifiers.
				2175	base_classname = classinfo.name.split('::')[-1]
				2176
				2177	# Look for single-argument constructors that aren't marked explicit.
				2178	# Technically a valid construct, but against style.
				2179	args = Match(r'\s+(?:inline\s+)?%s\s*$([^,()]+)$'
				2180	% re.escape(base_classname),
				2181	line)
				2182	if (args and
				2183	args.group(1) != 'void' and
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2184	not Match(r'(const\s+)?%s(\s+const)?\s(?:<\w+>\s)?&'
				2185	% re.escape(base_classname), args.group(1).strip())):
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2186	error(filename, linenum, 'runtime/explicit', 5,
				2187	'Single-argument constructors should be marked explicit.')
				2188
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2189
				2190	def CheckSpacingForFunctionCall(filename, line, linenum, error):
				2191	"""Checks for the correctness of various spacing around function calls.
				2192
				2193	Args:
				2194	filename: The name of the current file.
				2195	line: The text of the line to check.
				2196	linenum: The number of the line to check.
				2197	error: The function to call with any errors found.
				2198	"""
				2199
				2200	# Since function calls often occur inside if/for/while/switch
				2201	# expressions - which have their own, more liberal conventions - we
				2202	# first see if we should be looking inside such an expression for a
				2203	# function call, to which we can apply more strict standards.
				2204	fncall = line # if there's no control flow construct, look at whole line
				2205	for pattern in (r'\bif\s$(.)$\s*{',
				2206	r'\bfor\s$(.)$\s*{',
				2207	r'\bwhile\s$(.)$\s*[{;]',
				2208	r'\bswitch\s$(.)$\s*{'):
				2209	match = Search(pattern, line)
				2210	if match:
				2211	fncall = match.group(1) # look inside the parens for function calls
				2212	break
				2213
				2214	# Except in if/for/while/switch, there should never be space
				2215	# immediately inside parens (eg "f( 3, 4 )"). We make an exception
				2216	# for nested parens ( (a+b) + c ). Likewise, there should never be
				2217	# a space before a ( when it's a function argument. I assume it's a
				2218	# function argument when the char before the whitespace is legal in
				2219	# a function name (alnum + _) and we're not starting a macro. Also ignore
				2220	# pointers and references to arrays and functions coz they're too tricky:
				2221	# we use a very simple way to recognize these:
				2222	# " (something)(maybe-something)" or
				2223	# " (something)(maybe-something," or
				2224	# " (something)[something]"
				2225	# Note that we assume the contents of [] to be short enough that
				2226	# they'll never need to wrap.
				2227	if ( # Ignore control structures.
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2228	not Search(r'\b(if\|for\|while\|switch\|return\|new\|delete\|catch\|sizeof)\b',
				2229	fncall) and
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2230	# Ignore pointers/references to functions.
				2231	not Search(r' $[^)]+$$[^)]*($\|,$)', fncall) and
				2232	# Ignore pointers/references to arrays.
				2233	not Search(r' $[^)]+$\[[^\]]+\]', fncall)):
				2234	if Search(r'\w\s\(\s(?!\s\\$)', fncall): # a ( used for a fn call
				2235	error(filename, linenum, 'whitespace/parens', 4,
				2236	'Extra space after ( in function call')
				2237	elif Search(r'$\s+(?!(\s*\$\|\()', fncall):
				2238	error(filename, linenum, 'whitespace/parens', 2,
				2239	'Extra space after (')
				2240	if (Search(r'\w\s+\(', fncall) and
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2241	not Search(r'#\s*define\|typedef', fncall) and
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2242	not Search(r'\w\s+$(\w+::)\\w+$\(', fncall)):
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2243	error(filename, linenum, 'whitespace/parens', 4,
				2244	'Extra space before ( in function call')
				2245	# If the ) is followed only by a newline or a { + newline, assume it's
				2246	# part of a control statement (if/while/etc), and don't complain
				2247	if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
				2248	# If the closing parenthesis is preceded by only whitespaces,
				2249	# try to give a more descriptive error message.
				2250	if Search(r'^\s+\)', fncall):
				2251	error(filename, linenum, 'whitespace/parens', 2,
				2252	'Closing ) should be moved to the previous line')
				2253	else:
				2254	error(filename, linenum, 'whitespace/parens', 2,
				2255	'Extra space before )')
				2256
				2257
				2258	def IsBlankLine(line):
				2259	"""Returns true if the given line is blank.
				2260
				2261	We consider a line to be blank if the line is empty or consists of
				2262	only white spaces.
				2263
				2264	Args:
				2265	line: A line of a string.
				2266
				2267	Returns:
				2268	True, if the given line is blank.
				2269	"""
				2270	return not line or line.isspace()
				2271
				2272
				2273	def CheckForFunctionLengths(filename, clean_lines, linenum,
				2274	function_state, error):
				2275	"""Reports for long function bodies.
				2276
				2277	For an overview why this is done, see:
				2278	http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
				2279
				2280	Uses a simplistic algorithm assuming other style guidelines
				2281	(especially spacing) are followed.
				2282	Only checks unindented functions, so class members are unchecked.
				2283	Trivial bodies are unchecked, so constructors with huge initializer lists
				2284	may be missed.
				2285	Blank/comment lines are not counted so as to avoid encouraging the removal
				2286	of vertical space and comments just to get through a lint check.
				2287	NOLINT on the last line of a function disables this check.
				2288
				2289	Args:
				2290	filename: The name of the current file.
				2291	clean_lines: A CleansedLines instance containing the file.
				2292	linenum: The number of the line to check.
				2293	function_state: Current function name and lines in body so far.
				2294	error: The function to call with any errors found.
				2295	"""
				2296	lines = clean_lines.lines
				2297	line = lines[linenum]
				2298	raw = clean_lines.raw_lines
				2299	raw_line = raw[linenum]
				2300	joined_line = ''
				2301
				2302	starting_func = False
				2303	regexp = r'(\w(\w\|::\|\\|\&\|\s))\(' # decls * & space::name( ...
				2304	match_result = Match(regexp, line)
				2305	if match_result:
				2306	# If the name is all caps and underscores, figure it's a macro and
				2307	# ignore it, unless it's TEST or TEST_F.
				2308	function_name = match_result.group(1).split()[-1]
				2309	if function_name == 'TEST' or function_name == 'TEST_F' or (
				2310	not Match(r'[A-Z_]+$', function_name)):
				2311	starting_func = True
				2312
				2313	if starting_func:
				2314	body_found = False
				2315	for start_linenum in xrange(linenum, clean_lines.NumLines()):
				2316	start_line = lines[start_linenum]
				2317	joined_line += ' ' + start_line.lstrip()
				2318	if Search(r'(;\|})', start_line): # Declarations and trivial functions
				2319	body_found = True
				2320	break # ... ignore
				2321	elif Search(r'{', start_line):
				2322	body_found = True
				2323	function = Search(r'((\w\|:)*)\(', line).group(1)
				2324	if Match(r'TEST', function): # Handle TEST... macros
				2325	parameter_regexp = Search(r'($.*$)', joined_line)
				2326	if parameter_regexp: # Ignore bad syntax
				2327	function += parameter_regexp.group(1)
				2328	else:
				2329	function += '()'
				2330	function_state.Begin(function)
				2331	break
				2332	if not body_found:
				2333	# No body for the function (or evidence of a non-function) was found.
				2334	error(filename, linenum, 'readability/fn_size', 5,
				2335	'Lint failed to find start of function body.')
				2336	elif Match(r'^\}\s*$', line): # function end
				2337	function_state.Check(error, filename, linenum)
				2338	function_state.End()
				2339	elif not Match(r'^\s*$', line):
				2340	function_state.Count() # Count non-blank/non-comment lines.
				2341
				2342
				2343	_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO($.+?$)?:?(\s\|$)?')
				2344
				2345
				2346	def CheckComment(comment, filename, linenum, error):
				2347	"""Checks for common mistakes in TODO comments.
				2348
				2349	Args:
				2350	comment: The text of the comment from the line in question.
				2351	filename: The name of the current file.
				2352	linenum: The number of the line to check.
				2353	error: The function to call with any errors found.
				2354	"""
				2355	match = _RE_PATTERN_TODO.match(comment)
				2356	if match:
				2357	# One whitespace is correct; zero whitespace is handled elsewhere.
				2358	leading_whitespace = match.group(1)
				2359	if len(leading_whitespace) > 1:
				2360	error(filename, linenum, 'whitespace/todo', 2,
				2361	'Too many spaces before TODO')
				2362
				2363	username = match.group(2)
				2364	if not username:
				2365	error(filename, linenum, 'readability/todo', 2,
				2366	'Missing username in TODO; it should look like '
				2367	'"// TODO(my_username): Stuff."')
				2368
				2369	middle_whitespace = match.group(3)
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2370	# Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2371	if middle_whitespace != ' ' and middle_whitespace != '':
				2372	error(filename, linenum, 'whitespace/todo', 2,
				2373	'TODO(my_username) should be followed by a space')
				2374
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2375	def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
				2376	"""Checks for improper use of DISALLOW* macros.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2377
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2378	Args:
				2379	filename: The name of the current file.
				2380	clean_lines: A CleansedLines instance containing the file.
				2381	linenum: The number of the line to check.
				2382	nesting_state: A _NestingState instance which maintains information about
				2383	the current stack of nested blocks being parsed.
				2384	error: The function to call with any errors found.
				2385	"""
				2386	line = clean_lines.elided[linenum] # get rid of comments and strings
				2387
				2388	matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN\|'
				2389	r'DISALLOW_EVIL_CONSTRUCTORS\|'
				2390	r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
				2391	if not matched:
				2392	return
				2393	if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
				2394	if nesting_state.stack[-1].access != 'private':
				2395	error(filename, linenum, 'readability/constructors', 3,
				2396	'%s must be in the private: section' % matched.group(1))
				2397
				2398	else:
				2399	# Found DISALLOW* macro outside a class declaration, or perhaps it
				2400	# was used inside a function when it should have been part of the
				2401	# class declaration. We could issue a warning here, but it
				2402	# probably resulted in a compiler error already.
				2403	pass
				2404
				2405
				2406	def FindNextMatchingAngleBracket(clean_lines, linenum, init_suffix):
				2407	"""Find the corresponding > to close a template.
				2408
				2409	Args:
				2410	clean_lines: A CleansedLines instance containing the file.
				2411	linenum: Current line number.
				2412	init_suffix: Remainder of the current line after the initial <.
				2413
				2414	Returns:
				2415	True if a matching bracket exists.
				2416	"""
				2417	line = init_suffix
				2418	nesting_stack = ['<']
				2419	while True:
				2420	# Find the next operator that can tell us whether < is used as an
				2421	# opening bracket or as a less-than operator. We only want to
				2422	# warn on the latter case.
				2423	#
				2424	# We could also check all other operators and terminate the search
				2425	# early, e.g. if we got something like this "a<b+c", the "<" is
				2426	# most likely a less-than operator, but then we will get false
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2427	# positives for default arguments and other template expressions.
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2428	match = Search(r'^[^<>(),;\[\]]([<>(),;\[\]])(.)$', line)
				2429	if match:
				2430	# Found an operator, update nesting stack
				2431	operator = match.group(1)
				2432	line = match.group(2)
				2433
				2434	if nesting_stack[-1] == '<':
				2435	# Expecting closing angle bracket
				2436	if operator in ('<', '(', '['):
				2437	nesting_stack.append(operator)
				2438	elif operator == '>':
				2439	nesting_stack.pop()
				2440	if not nesting_stack:
				2441	# Found matching angle bracket
				2442	return True
				2443	elif operator == ',':
				2444	# Got a comma after a bracket, this is most likely a template
				2445	# argument. We have not seen a closing angle bracket yet, but
				2446	# it's probably a few lines later if we look for it, so just
				2447	# return early here.
				2448	return True
				2449	else:
				2450	# Got some other operator.
				2451	return False
				2452
				2453	else:
				2454	# Expecting closing parenthesis or closing bracket
				2455	if operator in ('<', '(', '['):
				2456	nesting_stack.append(operator)
				2457	elif operator in (')', ']'):
				2458	# We don't bother checking for matching () or []. If we got
				2459	# something like (] or [), it would have been a syntax error.
				2460	nesting_stack.pop()
				2461
				2462	else:
				2463	# Scan the next line
				2464	linenum += 1
				2465	if linenum >= len(clean_lines.elided):
				2466	break
				2467	line = clean_lines.elided[linenum]
				2468
				2469	# Exhausted all remaining lines and still no matching angle bracket.
				2470	# Most likely the input was incomplete, otherwise we should have
				2471	# seen a semicolon and returned early.
				2472	return True
				2473
				2474
				2475	def FindPreviousMatchingAngleBracket(clean_lines, linenum, init_prefix):
				2476	"""Find the corresponding < that started a template.
				2477
				2478	Args:
				2479	clean_lines: A CleansedLines instance containing the file.
				2480	linenum: Current line number.
				2481	init_prefix: Part of the current line before the initial >.
				2482
				2483	Returns:
				2484	True if a matching bracket exists.
				2485	"""
				2486	line = init_prefix
				2487	nesting_stack = ['>']
				2488	while True:
				2489	# Find the previous operator
				2490	match = Search(r'^(.)([<>(),;\[\]])[^<>(),;\[\]]$', line)
				2491	if match:
				2492	# Found an operator, update nesting stack
				2493	operator = match.group(2)
				2494	line = match.group(1)
				2495
				2496	if nesting_stack[-1] == '>':
				2497	# Expecting opening angle bracket
				2498	if operator in ('>', ')', ']'):
				2499	nesting_stack.append(operator)
				2500	elif operator == '<':
				2501	nesting_stack.pop()
				2502	if not nesting_stack:
				2503	# Found matching angle bracket
				2504	return True
				2505	elif operator == ',':
				2506	# Got a comma before a bracket, this is most likely a
				2507	# template argument. The opening angle bracket is probably
				2508	# there if we look for it, so just return early here.
				2509	return True
				2510	else:
				2511	# Got some other operator.
				2512	return False
				2513
				2514	else:
				2515	# Expecting opening parenthesis or opening bracket
				2516	if operator in ('>', ')', ']'):
				2517	nesting_stack.append(operator)
				2518	elif operator in ('(', '['):
				2519	nesting_stack.pop()
				2520
				2521	else:
				2522	# Scan the previous line
				2523	linenum -= 1
				2524	if linenum < 0:
				2525	break
				2526	line = clean_lines.elided[linenum]
				2527
				2528	# Exhausted all earlier lines and still no matching angle bracket.
				2529	return False
				2530
				2531
				2532	def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2533	"""Checks for the correctness of various spacing issues in the code.
				2534
				2535	Things we check for: spaces around operators, spaces after
				2536	if/for/while/switch, no spaces around parens in function calls, two
				2537	spaces between code and comment, don't start a block with a blank
				2538	line, don't end a function with a blank line, don't add a blank line
				2539	after public/protected/private, don't have too many blank lines in a row.
				2540
				2541	Args:
				2542	filename: The name of the current file.
				2543	clean_lines: A CleansedLines instance containing the file.
				2544	linenum: The number of the line to check.
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2545	nesting_state: A _NestingState instance which maintains information about
				2546	the current stack of nested blocks being parsed.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2547	error: The function to call with any errors found.
				2548	"""
				2549
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2550	# Don't use "elided" lines here, otherwise we can't check commented lines.
				2551	# Don't want to use "raw" either, because we don't want to check inside C++11
				2552	# raw strings,
				2553	raw = clean_lines.lines_without_raw_strings
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2554	line = raw[linenum]
				2555
				2556	# Before nixing comments, check if the line is blank for no good
				2557	# reason. This includes the first line after a block is opened, and
				2558	# blank lines at the end of a function (ie, right before a line like '}'
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2559	#
				2560	# Skip all the blank line checks if we are immediately inside a
				2561	# namespace body. In other words, don't issue blank line warnings
				2562	# for this block:
				2563	# namespace {
				2564	#
				2565	# }
				2566	#
				2567	# A warning about missing end of namespace comments will be issued instead.
				2568	if IsBlankLine(line) and not nesting_state.InNamespaceBody():
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2569	elided = clean_lines.elided
				2570	prev_line = elided[linenum - 1]
				2571	prevbrace = prev_line.rfind('{')
				2572	# TODO(unknown): Don't complain if line before blank line, and line after,
				2573	# both start with alnums and are indented the same amount.
				2574	# This ignores whitespace at the start of a namespace block
				2575	# because those are not usually indented.
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2576	if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2577	# OK, we have a blank line at the start of a code block. Before we
				2578	# complain, we check if it is an exception to the rule: The previous
				2579	# non-empty line has the parameters of a function header that are indented
				2580	# 4 spaces (because they did not fit in a 80 column line when placed on
				2581	# the same line as the function name). We also check for the case where
				2582	# the previous line is indented 6 spaces, which may happen when the
				2583	# initializers of a constructor do not fit into a 80 column line.
				2584	exception = False
				2585	if Match(r' {6}\w', prev_line): # Initializer list?
				2586	# We are looking for the opening column of initializer list, which
				2587	# should be indented 4 spaces to cause 6 space indentation afterwards.
				2588	search_position = linenum-2
				2589	while (search_position >= 0
				2590	and Match(r' {6}\w', elided[search_position])):
				2591	search_position -= 1
				2592	exception = (search_position >= 0
				2593	and elided[search_position][:5] == ' :')
				2594	else:
				2595	# Search for the function arguments or an initializer list. We use a
				2596	# simple heuristic here: If the line is indented 4 spaces; and we have a
				2597	# closing paren, without the opening paren, followed by an opening brace
				2598	# or colon (for initializer lists) we assume that it is the last line of
				2599	# a function header. If we have a colon indented 4 spaces, it is an
				2600	# initializer list.
				2601	exception = (Match(r' {4}\w[^$]$\s(const\s)?(\{\s$\|:)',
				2602	prev_line)
				2603	or Match(r' {4}:', prev_line))
				2604
				2605	if not exception:
				2606	error(filename, linenum, 'whitespace/blank_line', 2,
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2607	'Redundant blank line at the start of a code block '
				2608	'should be deleted.')
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2609	# Ignore blank lines at the end of a block in a long if-else
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2610	# chain, like this:
				2611	# if (condition1) {
				2612	# // Something followed by a blank line
				2613	#
				2614	# } else if (condition2) {
				2615	# // Something else
				2616	# }
				2617	if linenum + 1 < clean_lines.NumLines():
				2618	next_line = raw[linenum + 1]
				2619	if (next_line
				2620	and Match(r'\s*}', next_line)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2621	and next_line.find('} else ') == -1):
				2622	error(filename, linenum, 'whitespace/blank_line', 3,
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2623	'Redundant blank line at the end of a code block '
				2624	'should be deleted.')
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2625
				2626	matched = Match(r'\s*(public\|protected\|private):', prev_line)
				2627	if matched:
				2628	error(filename, linenum, 'whitespace/blank_line', 3,
				2629	'Do not leave a blank line after "%s:"' % matched.group(1))
				2630
				2631	# Next, we complain if there's a comment too near the text
				2632	commentpos = line.find('//')
				2633	if commentpos != -1:
				2634	# Check if the // may be in quotes. If so, ignore it
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2635	# Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2636	if (line.count('"', 0, commentpos) -
				2637	line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
				2638	# Allow one space for new scopes, two spaces otherwise:
				2639	if (not Match(r'^\s*{ //', line) and
				2640	((commentpos >= 1 and
				2641	line[commentpos-1] not in string.whitespace) or
				2642	(commentpos >= 2 and
				2643	line[commentpos-2] not in string.whitespace))):
				2644	error(filename, linenum, 'whitespace/comments', 2,
				2645	'At least two spaces is best between code and comments')
				2646	# There should always be a space between the // and the comment
				2647	commentend = commentpos + 2
				2648	if commentend < len(line) and not line[commentend] == ' ':
				2649	# but some lines are exceptions -- e.g. if they're big
				2650	# comment delimiters like:
				2651	# //----------------------------------------------------------
				2652	# or are an empty C++ style Doxygen comment, like:
				2653	# ///
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2654	# or C++ style Doxygen comments placed after the variable:
				2655	# ///< Header comment
				2656	# //!< Header comment
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2657	# or they begin with multiple slashes followed by a space:
				2658	# //////// Header comment
				2659	match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
				2660	Search(r'^/$', line[commentend:]) or
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2661	Search(r'^!< ', line[commentend:]) or
				2662	Search(r'^/< ', line[commentend:]) or
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2663	Search(r'^/+ ', line[commentend:]))
				2664	if not match:
				2665	error(filename, linenum, 'whitespace/comments', 4,
				2666	'Should have a space between // and comment')
				2667	CheckComment(line[commentpos:], filename, linenum, error)
				2668
				2669	line = clean_lines.elided[linenum] # get rid of comments and strings
				2670
				2671	# Don't try to do spacing checks for operator methods
				2672	line = re.sub(r'operator(==\|!=\|<\|<<\|<=\|>=\|>>\|>)\(', 'operator\(', line)
				2673
				2674	# We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
				2675	# Otherwise not. Note we only check for non-spaces on both sides;
				2676	# sometimes people put non-spaces on one side when aligning ='s among
				2677	# many lines (not that this is behavior that I approve of...)
				2678	if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if\|while) ', line):
				2679	error(filename, linenum, 'whitespace/operators', 4,
				2680	'Missing spaces around =')
				2681
				2682	# It's ok not to have spaces around binary operators like + - * /, but if
				2683	# there's too little whitespace, we get concerned. It's hard to tell,
				2684	# though, so we punt on this one for now. TODO.
				2685
				2686	# You should always have whitespace around binary operators.
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2687	#
				2688	# Check <= and >= first to avoid false positives with < and >, then
				2689	# check non-include lines for spacing around < and >.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2690	match = Search(r'[^<>=!\s](==\|!=\|<=\|>=)[^<>=!\s]', line)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2691	if match:
				2692	error(filename, linenum, 'whitespace/operators', 3,
				2693	'Missing spaces around %s' % match.group(1))
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2694	# We allow no-spaces around << when used like this: 10<<20, but
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2695	# not otherwise (particularly, not when used as streams)
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2696	# Also ignore using ns::operator<<;
				2697	match = Search(r'(operator\|\S)(?:L\|UL\|ULL\|l\|ul\|ull)?<<(\S)', line)
				2698	if (match and
				2699	not (match.group(1).isdigit() and match.group(2).isdigit()) and
				2700	not (match.group(1) == 'operator' and match.group(2) == ';')):
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2701	error(filename, linenum, 'whitespace/operators', 3,
				2702	'Missing spaces around <<')
				2703	elif not Match(r'#.*include', line):
				2704	# Avoid false positives on ->
				2705	reduced_line = line.replace('->', '')
				2706
				2707	# Look for < that is not surrounded by spaces. This is only
				2708	# triggered if both sides are missing spaces, even though
				2709	# technically should should flag if at least one side is missing a
				2710	# space. This is done to avoid some false positives with shifts.
				2711	match = Search(r'[^\s<]<([^\s=<].*)', reduced_line)
				2712	if (match and
				2713	not FindNextMatchingAngleBracket(clean_lines, linenum, match.group(1))):
				2714	error(filename, linenum, 'whitespace/operators', 3,
				2715	'Missing spaces around <')
				2716
				2717	# Look for > that is not surrounded by spaces. Similar to the
				2718	# above, we only trigger if both sides are missing spaces to avoid
				2719	# false positives with shifts.
				2720	match = Search(r'^(.*[^\s>])>[^\s=>]', reduced_line)
				2721	if (match and
				2722	not FindPreviousMatchingAngleBracket(clean_lines, linenum,
				2723	match.group(1))):
				2724	error(filename, linenum, 'whitespace/operators', 3,
				2725	'Missing spaces around >')
				2726
				2727	# We allow no-spaces around >> for almost anything. This is because
				2728	# C++11 allows ">>" to close nested templates, which accounts for
				2729	# most cases when ">>" is not followed by a space.
				2730	#
				2731	# We still warn on ">>" followed by alpha character, because that is
				2732	# likely due to ">>" being used for right shifts, e.g.:
				2733	# value >> alpha
				2734	#
				2735	# When ">>" is used to close templates, the alphanumeric letter that
				2736	# follows would be part of an identifier, and there should still be
				2737	# a space separating the template type and the identifier.
				2738	# type<type<type>> alpha
				2739	match = Search(r'>>[a-zA-Z_]', line)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2740	if match:
				2741	error(filename, linenum, 'whitespace/operators', 3,
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2742	'Missing spaces around >>')
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2743
				2744	# There shouldn't be space around unary operators
				2745	match = Search(r'(!\s\|~\s\|[\s]--[\s;]\|[\s]\+\+[\s;])', line)
				2746	if match:
				2747	error(filename, linenum, 'whitespace/operators', 4,
				2748	'Extra space for operator %s' % match.group(1))
				2749
				2750	# A pet peeve of mine: no spaces after an if, while, switch, or for
				2751	match = Search(r' (if\(\|for\(\|while\(\|switch\()', line)
				2752	if match:
				2753	error(filename, linenum, 'whitespace/parens', 5,
				2754	'Missing space before ( in %s' % match.group(1))
				2755
				2756	# For if/for/while/switch, the left and right parens should be
				2757	# consistent about how many spaces are inside the parens, and
				2758	# there should either be zero or one spaces inside the parens.
				2759	# We don't want: "if ( foo)" or "if ( foo )".
				2760	# Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
				2761	match = Search(r'\b(if\|for\|while\|switch)\s*'
				2762	r'$([ ])(.).[^ ]+([ ])$\s{\s*$',
				2763	line)
				2764	if match:
				2765	if len(match.group(2)) != len(match.group(4)):
				2766	if not (match.group(3) == ';' and
				2767	len(match.group(2)) == 1 + len(match.group(4)) or
				2768	not match.group(2) and Search(r'\bfor\s$.; $', line)):
				2769	error(filename, linenum, 'whitespace/parens', 5,
				2770	'Mismatching spaces inside () in %s' % match.group(1))
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2771	if len(match.group(2)) not in [0, 1]:
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2772	error(filename, linenum, 'whitespace/parens', 5,
				2773	'Should have zero or one spaces inside ( and ) in %s' %
				2774	match.group(1))
				2775
				2776	# You should always have a space after a comma (either as fn arg or operator)
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2777	#
				2778	# This does not apply when the non-space character following the
				2779	# comma is another comma, since the only time when that happens is
				2780	# for empty macro arguments.
				2781	#
				2782	# We run this check in two passes: first pass on elided lines to
				2783	# verify that lines contain missing whitespaces, second pass on raw
				2784	# lines to confirm that those missing whitespaces are not due to
				2785	# elided comments.
				2786	if Search(r',[^,\s]', line) and Search(r',[^,\s]', raw[linenum]):
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2787	error(filename, linenum, 'whitespace/comma', 3,
				2788	'Missing space after ,')
				2789
				2790	# You should always have a space after a semicolon
				2791	# except for few corner cases
				2792	# TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
				2793	# space after ;
				2794	if Search(r';[^\s};\\)/]', line):
				2795	error(filename, linenum, 'whitespace/semicolon', 3,
				2796	'Missing space after ;')
				2797
				2798	# Next we will look for issues with function calls.
				2799	CheckSpacingForFunctionCall(filename, line, linenum, error)
				2800
				2801	# Except after an opening paren, or after another opening brace (in case of
				2802	# an initializer list, for instance), you should have spaces before your
				2803	# braces. And since you should never have braces at the beginning of a line,
				2804	# this is an easy test.
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2805	match = Match(r'^(.*[^ ({]){', line)
				2806	if match:
				2807	# Try a bit harder to check for brace initialization. This
				2808	# happens in one of the following forms:
				2809	# Constructor() : initializer_list_{} { ... }
				2810	# Constructor{}.MemberFunction()
				2811	# Type variable{};
				2812	# FunctionCall(type{}, ...);
				2813	# LastArgument(..., type{});
				2814	# LOG(INFO) << type{} << " ...";
				2815	# map_of_type[{...}] = ...;
				2816	#
				2817	# We check for the character following the closing brace, and
				2818	# silence the warning if it's one of those listed above, i.e.
				2819	# "{.;,)<]".
				2820	#
				2821	# To account for nested initializer list, we allow any number of
				2822	# closing braces up to "{;,)<". We can't simply silence the
				2823	# warning on first sight of closing brace, because that would
				2824	# cause false negatives for things that are not initializer lists.
				2825	# Silence this: But not this:
				2826	# Outer{ if (...) {
				2827	# Inner{...} if (...){ // Missing space before {
				2828	# }; }
				2829	#
				2830	# There is a false negative with this approach if people inserted
				2831	# spurious semicolons, e.g. "if (cond){};", but we will catch the
				2832	# spurious semicolon with a separate check.
				2833	(endline, endlinenum, endpos) = CloseExpression(
				2834	clean_lines, linenum, len(match.group(1)))
				2835	trailing_text = ''
				2836	if endpos > -1:
				2837	trailing_text = endline[endpos:]
				2838	for offset in xrange(endlinenum + 1,
				2839	min(endlinenum + 3, clean_lines.NumLines() - 1)):
				2840	trailing_text += clean_lines.elided[offset]
				2841	if not Match(r'^[\s}]*[{.;,)<\]]', trailing_text):
				2842	error(filename, linenum, 'whitespace/braces', 5,
				2843	'Missing space before {')
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2844
				2845	# Make sure '} else {' has spaces.
				2846	if Search(r'}else', line):
				2847	error(filename, linenum, 'whitespace/braces', 5,
				2848	'Missing space before else')
				2849
				2850	# You shouldn't have spaces before your brackets, except maybe after
				2851	# 'delete []' or 'new char * []'.
				2852	if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
				2853	error(filename, linenum, 'whitespace/braces', 5,
				2854	'Extra space before [')
				2855
				2856	# You shouldn't have a space before a semicolon at the end of the line.
				2857	# There's a special case for "for" since the style guide allows space before
				2858	# the semicolon there.
				2859	if Search(r':\s;\s$', line):
				2860	error(filename, linenum, 'whitespace/semicolon', 5,
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2861	'Semicolon defining empty statement. Use {} instead.')
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2862	elif Search(r'^\s;\s$', line):
				2863	error(filename, linenum, 'whitespace/semicolon', 5,
				2864	'Line contains only semicolon. If this should be an empty statement, '
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2865	'use {} instead.')
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2866	elif (Search(r'\s+;\s*$', line) and
				2867	not Search(r'\bfor\b', line)):
				2868	error(filename, linenum, 'whitespace/semicolon', 5,
				2869	'Extra space before last semicolon. If this should be an empty '
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2870	'statement, use {} instead.')
				2871
				2872	# In range-based for, we wanted spaces before and after the colon, but
				2873	# not around "::" tokens that might appear.
				2874	if (Search('for \(.[^:]:[^: ]', line) or
				2875	Search('for \(.[^: ]:[^:]', line)):
				2876	error(filename, linenum, 'whitespace/forcolon', 2,
				2877	'Missing space around colon in range-based for loop')
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2878
				2879
				2880	def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
				2881	"""Checks for additional blank line issues related to sections.
				2882
				2883	Currently the only thing checked here is blank line before protected/private.
				2884
				2885	Args:
				2886	filename: The name of the current file.
				2887	clean_lines: A CleansedLines instance containing the file.
				2888	class_info: A _ClassInfo objects.
				2889	linenum: The number of the line to check.
				2890	error: The function to call with any errors found.
				2891	"""
				2892	# Skip checks if the class is small, where small means 25 lines or less.
				2893	# 25 lines seems like a good cutoff since that's the usual height of
				2894	# terminals, and any class that can't fit in one screen can't really
				2895	# be considered "small".
				2896	#
				2897	# Also skip checks if we are on the first line. This accounts for
				2898	# classes that look like
				2899	# class Foo { public: ... };
				2900	#
				2901	# If we didn't find the end of the class, last_line would be zero,
				2902	# and the check will be skipped by the first condition.
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2903	if (class_info.last_line - class_info.starting_linenum <= 24 or
				2904	linenum <= class_info.starting_linenum):
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2905	return
				2906
				2907	matched = Match(r'\s*(public\|protected\|private):', clean_lines.lines[linenum])
				2908	if matched:
				2909	# Issue warning if the line before public/protected/private was
				2910	# not a blank line, but don't do this if the previous line contains
				2911	# "class" or "struct". This can happen two ways:
				2912	# - We are at the beginning of the class.
				2913	# - We are forward-declaring an inner class that is semantically
				2914	# private, but needed to be public for implementation reasons.
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2915	# Also ignores cases where the previous line ends with a backslash as can be
				2916	# common when defining classes in C macros.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2917	prev_line = clean_lines.lines[linenum - 1]
				2918	if (not IsBlankLine(prev_line) and
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2919	not Search(r'\b(class\|struct)\b', prev_line) and
				2920	not Search(r'\\$', prev_line)):
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2921	# Try a bit harder to find the beginning of the class. This is to
				2922	# account for multi-line base-specifier lists, e.g.:
				2923	# class Derived
				2924	# : public Base {
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2925	end_class_head = class_info.starting_linenum
				2926	for i in range(class_info.starting_linenum, linenum):
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2927	if Search(r'\{\s*$', clean_lines.lines[i]):
				2928	end_class_head = i
				2929	break
				2930	if end_class_head < linenum - 1:
				2931	error(filename, linenum, 'whitespace/blank_line', 3,
				2932	'"%s:" should be preceded by a blank line' % matched.group(1))
				2933
				2934
				2935	def GetPreviousNonBlankLine(clean_lines, linenum):
				2936	"""Return the most recent non-blank line and its line number.
				2937
				2938	Args:
				2939	clean_lines: A CleansedLines instance containing the file contents.
				2940	linenum: The number of the line to check.
				2941
				2942	Returns:
				2943	A tuple with two elements. The first element is the contents of the last
				2944	non-blank line before the current line, or the empty string if this is the
				2945	first non-blank line. The second is the line number of that line, or -1
				2946	if this is the first non-blank line.
				2947	"""
				2948
				2949	prevlinenum = linenum - 1
				2950	while prevlinenum >= 0:
				2951	prevline = clean_lines.elided[prevlinenum]
				2952	if not IsBlankLine(prevline): # if not a blank line...
				2953	return (prevline, prevlinenum)
				2954	prevlinenum -= 1
				2955	return ('', -1)
				2956
				2957
				2958	def CheckBraces(filename, clean_lines, linenum, error):
				2959	"""Looks for misplaced braces (e.g. at the end of line).
				2960
				2961	Args:
				2962	filename: The name of the current file.
				2963	clean_lines: A CleansedLines instance containing the file.
				2964	linenum: The number of the line to check.
				2965	error: The function to call with any errors found.
				2966	"""
				2967
				2968	line = clean_lines.elided[linenum] # get rid of comments and strings
				2969
				2970	if Match(r'\s{\s$', line):
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2971	# We allow an open brace to start a line in the case where someone is using
				2972	# braces in a block to explicitly create a new scope, which is commonly used
				2973	# to control the lifetime of stack-allocated variables. Braces are also
				2974	# used for brace initializers inside function calls. We don't detect this
				2975	# perfectly: we just don't complain if the last non-whitespace character on
				2976	# the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the
				2977	# previous line starts a preprocessor block.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2978	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	2979	if (not Search(r'[,;:}{(]\s*$', prevline) and
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	2980	not Match(r'\s*#', prevline)):
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	2981	error(filename, linenum, 'whitespace/braces', 4,
				2982	'{ should almost always be at the end of the previous line')
				2983
				2984	# An else clause should be on the same line as the preceding closing brace.
				2985	if Match(r'\selse\s', line):
				2986	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
				2987	if Match(r'\s}\s$', prevline):
				2988	error(filename, linenum, 'whitespace/newline', 4,
				2989	'An else should appear on the same line as the preceding }')
				2990
				2991	# If braces come on one side of an else, they should be on both.
				2992	# However, we have to worry about "else if" that spans multiple lines!
				2993	if Search(r'}\selse[^{]$', line) or Match(r'[^}]else\s{', line):
				2994	if Search(r'}\selse if([^{])$', line): # could be multi-line if
				2995	# find the ( after the if
				2996	pos = line.find('else if')
				2997	pos = line.find('(', pos)
				2998	if pos > 0:
				2999	(endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
				3000	if endline[endpos:].find('{') == -1: # must be brace after if
				3001	error(filename, linenum, 'readability/braces', 5,
				3002	'If an else has a brace on one side, it should have it on both')
				3003	else: # common case: else not followed by a multi-line if
				3004	error(filename, linenum, 'readability/braces', 5,
				3005	'If an else has a brace on one side, it should have it on both')
				3006
				3007	# Likewise, an else should never have the else clause on the same line
				3008	if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
				3009	error(filename, linenum, 'whitespace/newline', 4,
				3010	'Else clause should never be on same line as else (use 2 lines)')
				3011
				3012	# In the same way, a do/while should never be on one line
				3013	if Match(r'\s*do [^\s{]', line):
				3014	error(filename, linenum, 'whitespace/newline', 4,
				3015	'do/while clauses should not be on a single line')
				3016
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3017	# Block bodies should not be followed by a semicolon. Due to C++11
				3018	# brace initialization, there are more places where semicolons are
				3019	# required than not, so we use a whitelist approach to check these
				3020	# rather than a blacklist. These are the places where "};" should
				3021	# be replaced by just "}":
				3022	# 1. Some flavor of block following closing parenthesis:
				3023	# for (;;) {};
				3024	# while (...) {};
				3025	# switch (...) {};
				3026	# Function(...) {};
				3027	# if (...) {};
				3028	# if (...) else if (...) {};
				3029	#
				3030	# 2. else block:
				3031	# if (...) else {};
				3032	#
				3033	# 3. const member function:
				3034	# Function(...) const {};
				3035	#
				3036	# 4. Block following some statement:
				3037	# x = 42;
				3038	# {};
				3039	#
				3040	# 5. Block at the beginning of a function:
				3041	# Function(...) {
				3042	# {};
				3043	# }
				3044	#
				3045	# Note that naively checking for the preceding "{" will also match
				3046	# braces inside multi-dimensional arrays, but this is fine since
				3047	# that expression will not contain semicolons.
				3048	#
				3049	# 6. Block following another block:
				3050	# while (true) {}
				3051	# {};
				3052	#
				3053	# 7. End of namespaces:
				3054	# namespace {};
				3055	#
				3056	# These semicolons seems far more common than other kinds of
				3057	# redundant semicolons, possibly due to people converting classes
				3058	# to namespaces. For now we do not warn for this case.
				3059	#
				3060	# Try matching case 1 first.
				3061	match = Match(r'^(.\)\s)\{', line)
				3062	if match:
				3063	# Matched closing parenthesis (case 1). Check the token before the
				3064	# matching opening parenthesis, and don't warn if it looks like a
				3065	# macro. This avoids these false positives:
				3066	# - macro that defines a base class
				3067	# - multi-line macro that defines a base class
				3068	# - macro that defines the whole class-head
				3069	#
				3070	# But we still issue warnings for macros that we know are safe to
				3071	# warn, specifically:
				3072	# - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P
				3073	# - TYPED_TEST
				3074	# - INTERFACE_DEF
				3075	# - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED:
				3076	#
				3077	# We implement a whitelist of safe macros instead of a blacklist of
				3078	# unsafe macros, even though the latter appears less frequently in
				3079	# google code and would have been easier to implement. This is because
				3080	# the downside for getting the whitelist wrong means some extra
				3081	# semicolons, while the downside for getting the blacklist wrong
				3082	# would result in compile errors.
				3083	#
				3084	# In addition to macros, we also don't want to warn on compound
				3085	# literals.
				3086	closing_brace_pos = match.group(1).rfind(')')
				3087	opening_parenthesis = ReverseCloseExpression(
				3088	clean_lines, linenum, closing_brace_pos)
				3089	if opening_parenthesis[2] > -1:
				3090	line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]]
				3091	macro = Search(r'\b([A-Z_]+)\s*$', line_prefix)
				3092	if ((macro and
				3093	macro.group(1) not in (
				3094	'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST',
				3095	'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED',
				3096	'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or
				3097	Search(r'\s+=\s*$', line_prefix)):
				3098	match = None
				3099
				3100	else:
				3101	# Try matching cases 2-3.
				3102	match = Match(r'^(.(?:else\|\)\sconst)\s*)\{', line)
				3103	if not match:
				3104	# Try matching cases 4-6. These are always matched on separate lines.
				3105	#
				3106	# Note that we can't simply concatenate the previous line to the
				3107	# current line and do a single match, otherwise we may output
				3108	# duplicate warnings for the blank line case:
				3109	# if (cond) {
				3110	# // blank line
				3111	# }
				3112	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
				3113	if prevline and Search(r'[;{}]\s*$', prevline):
				3114	match = Match(r'^(\s*)\{', line)
				3115
				3116	# Check matching closing brace
				3117	if match:
				3118	(endline, endlinenum, endpos) = CloseExpression(
				3119	clean_lines, linenum, len(match.group(1)))
				3120	if endpos > -1 and Match(r'^\s*;', endline[endpos:]):
				3121	# Current {} pair is eligible for semicolon check, and we have found
				3122	# the redundant semicolon, output warning here.
				3123	#
				3124	# Note: because we are scanning forward for opening braces, and
				3125	# outputting warnings for the matching closing brace, if there are
				3126	# nested blocks with trailing semicolons, we will get the error
				3127	# messages in reversed order.
				3128	error(filename, endlinenum, 'readability/braces', 4,
				3129	"You don't need a ; after a }")
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3130
				3131
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3132	def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
				3133	"""Look for empty loop/conditional body with only a single semicolon.
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	3134
				3135	Args:
				3136	filename: The name of the current file.
				3137	clean_lines: A CleansedLines instance containing the file.
				3138	linenum: The number of the line to check.
				3139	error: The function to call with any errors found.
				3140	"""
				3141
				3142	# Search for loop keywords at the beginning of the line. Because only
				3143	# whitespaces are allowed before the keywords, this will also ignore most
				3144	# do-while-loops, since those lines should start with closing brace.
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3145	#
				3146	# We also check "if" blocks here, since an empty conditional block
				3147	# is likely an error.
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	3148	line = clean_lines.elided[linenum]
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3149	matched = Match(r'\s(for\|while\|if)\s\(', line)
				3150	if matched:
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	3151	# Find the end of the conditional expression
				3152	(end_line, end_linenum, end_pos) = CloseExpression(
				3153	clean_lines, linenum, line.find('('))
				3154
				3155	# Output warning if what follows the condition expression is a semicolon.
				3156	# No warning for all other cases, including whitespace or newline, since we
				3157	# have a separate check for semicolons preceded by whitespace.
				3158	if end_pos >= 0 and Match(r';', end_line[end_pos:]):
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3159	if matched.group(1) == 'if':
				3160	error(filename, end_linenum, 'whitespace/empty_conditional_body', 5,
				3161	'Empty conditional bodies should use {}')
				3162	else:
				3163	error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
				3164	'Empty loop bodies should use {} or continue')
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3165
				3166
				3167	def CheckCheck(filename, clean_lines, linenum, error):
				3168	"""Checks the use of CHECK and EXPECT macros.
				3169
				3170	Args:
				3171	filename: The name of the current file.
				3172	clean_lines: A CleansedLines instance containing the file.
				3173	linenum: The number of the line to check.
				3174	error: The function to call with any errors found.
				3175	"""
				3176
				3177	# Decide the set of replacement macros that should be suggested
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3178	lines = clean_lines.elided
				3179	check_macro = None
				3180	start_pos = -1
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3181	for macro in _CHECK_MACROS:
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3182	i = lines[linenum].find(macro)
				3183	if i >= 0:
				3184	check_macro = macro
				3185
				3186	# Find opening parenthesis. Do a regular expression match here
				3187	# to make sure that we are matching the expected CHECK macro, as
				3188	# opposed to some other macro that happens to contain the CHECK
				3189	# substring.
				3190	matched = Match(r'^(.\b' + check_macro + r'\s)\(', lines[linenum])
				3191	if not matched:
				3192	continue
				3193	start_pos = len(matched.group(1))
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3194	break
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3195	if not check_macro or start_pos < 0:
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3196	# Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
				3197	return
				3198
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3199	# Find end of the boolean expression by matching parentheses
				3200	(last_line, end_line, end_pos) = CloseExpression(
				3201	clean_lines, linenum, start_pos)
				3202	if end_pos < 0:
				3203	return
				3204	if linenum == end_line:
				3205	expression = lines[linenum][start_pos + 1:end_pos - 1]
				3206	else:
				3207	expression = lines[linenum][start_pos + 1:]
				3208	for i in xrange(linenum + 1, end_line):
				3209	expression += lines[i]
				3210	expression += last_line[0:end_pos - 1]
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3211
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3212	# Parse expression so that we can take parentheses into account.
				3213	# This avoids false positives for inputs like "CHECK((a < 4) == b)",
				3214	# which is not replaceable by CHECK_LE.
				3215	lhs = ''
				3216	rhs = ''
				3217	operator = None
				3218	while expression:
				3219	matched = Match(r'^\s(<<\|<<=\|>>\|>>=\|->\\|->\|&&\|\\|\\|\|'
				3220	r'==\|!=\|>=\|>\|<=\|<\|\()(.*)$', expression)
				3221	if matched:
				3222	token = matched.group(1)
				3223	if token == '(':
				3224	# Parenthesized operand
				3225	expression = matched.group(2)
				3226	(end, _) = FindEndOfExpressionInLine(expression, 0, 1, '(', ')')
				3227	if end < 0:
				3228	return # Unmatched parenthesis
				3229	lhs += '(' + expression[0:end]
				3230	expression = expression[end:]
				3231	elif token in ('&&', '\|\|'):
				3232	# Logical and/or operators. This means the expression
				3233	# contains more than one term, for example:
				3234	# CHECK(42 < a && a < b);
				3235	#
				3236	# These are not replaceable with CHECK_LE, so bail out early.
				3237	return
				3238	elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
				3239	# Non-relational operator
				3240	lhs += token
				3241	expression = matched.group(2)
				3242	else:
				3243	# Relational operator
				3244	operator = token
				3245	rhs = matched.group(2)
				3246	break
				3247	else:
				3248	# Unparenthesized operand. Instead of appending to lhs one character
				3249	# at a time, we do another regular expression match to consume several
				3250	# characters at once if possible. Trivial benchmark shows that this
				3251	# is more efficient when the operands are longer than a single
				3252	# character, which is generally the case.
				3253	matched = Match(r'^([^-=!<>()&\|]+)(.*)$', expression)
				3254	if not matched:
				3255	matched = Match(r'^(\s\S)(.)$', expression)
				3256	if not matched:
				3257	break
				3258	lhs += matched.group(1)
				3259	expression = matched.group(2)
				3260
				3261	# Only apply checks if we got all parts of the boolean expression
				3262	if not (lhs and operator and rhs):
				3263	return
				3264
				3265	# Check that rhs do not contain logical operators. We already know
				3266	# that lhs is fine since the loop above parses out && and \|\|.
				3267	if rhs.find('&&') > -1 or rhs.find('\|\|') > -1:
				3268	return
				3269
				3270	# At least one of the operands must be a constant literal. This is
				3271	# to avoid suggesting replacements for unprintable things like
				3272	# CHECK(variable != iterator)
				3273	#
				3274	# The following pattern matches decimal, hex integers, strings, and
				3275	# characters (in that order).
				3276	lhs = lhs.strip()
				3277	rhs = rhs.strip()
				3278	match_constant = r'^([-+]?(\d+\|0[xX][0-9a-fA-F]+)[lLuU]{0,3}\|"."\|\'.\')$'
				3279	if Match(match_constant, lhs) or Match(match_constant, rhs):
				3280	# Note: since we know both lhs and rhs, we can provide a more
				3281	# descriptive error message like:
				3282	# Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
				3283	# Instead of:
				3284	# Consider using CHECK_EQ instead of CHECK(a == b)
				3285	#
				3286	# We are still keeping the less descriptive message because if lhs
				3287	# or rhs gets long, the error message might become unreadable.
				3288	error(filename, linenum, 'readability/check', 2,
				3289	'Consider using %s instead of %s(a %s b)' % (
				3290	_CHECK_REPLACEMENT[check_macro][operator],
				3291	check_macro, operator))
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3292
				3293
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	3294	def CheckAltTokens(filename, clean_lines, linenum, error):
				3295	"""Check alternative keywords being used in boolean expressions.
				3296
				3297	Args:
				3298	filename: The name of the current file.
				3299	clean_lines: A CleansedLines instance containing the file.
				3300	linenum: The number of the line to check.
				3301	error: The function to call with any errors found.
				3302	"""
				3303	line = clean_lines.elided[linenum]
				3304
				3305	# Avoid preprocessor lines
				3306	if Match(r'^\s*#', line):
				3307	return
				3308
				3309	# Last ditch effort to avoid multi-line comments. This will not help
				3310	# if the comment started before the current line or ended after the
				3311	# current line, but it catches most of the false positives. At least,
				3312	# it provides a way to workaround this warning for people who use
				3313	# multi-line comments in preprocessor macros.
				3314	#
				3315	# TODO(unknown): remove this once cpplint has better support for
				3316	# multi-line comments.
				3317	if line.find('/') >= 0 or line.find('/') >= 0:
				3318	return
				3319
				3320	for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
				3321	error(filename, linenum, 'readability/alt_tokens', 2,
				3322	'Use operator %s instead of %s' % (
				3323	_ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
				3324
				3325
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3326	def GetLineWidth(line):
				3327	"""Determines the width of the line in column positions.
				3328
				3329	Args:
				3330	line: A string, which may be a Unicode string.
				3331
				3332	Returns:
				3333	The width of the line in column positions, accounting for Unicode
				3334	combining characters and wide characters.
				3335	"""
				3336	if isinstance(line, unicode):
				3337	width = 0
				3338	for uc in unicodedata.normalize('NFC', line):
				3339	if unicodedata.east_asian_width(uc) in ('W', 'F'):
				3340	width += 2
				3341	elif not unicodedata.combining(uc):
				3342	width += 1
				3343	return width
				3344	else:
				3345	return len(line)
				3346
				3347
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	3348	def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3349	error):
				3350	"""Checks rules from the 'C++ style rules' section of cppguide.html.
				3351
				3352	Most of these rules are hard to test (naming, comment style), but we
				3353	do what we can. In particular we check for 2-space indents, line lengths,
				3354	tab usage, spaces inside code, etc.
				3355
				3356	Args:
				3357	filename: The name of the current file.
				3358	clean_lines: A CleansedLines instance containing the file.
				3359	linenum: The number of the line to check.
				3360	file_extension: The extension (without the dot) of the filename.
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	3361	nesting_state: A _NestingState instance which maintains information about
				3362	the current stack of nested blocks being parsed.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3363	error: The function to call with any errors found.
				3364	"""
				3365
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3366	# Don't use "elided" lines here, otherwise we can't check commented lines.
				3367	# Don't want to use "raw" either, because we don't want to check inside C++11
				3368	# raw strings,
				3369	raw_lines = clean_lines.lines_without_raw_strings
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3370	line = raw_lines[linenum]
				3371
				3372	if line.find('\t') != -1:
				3373	error(filename, linenum, 'whitespace/tab', 1,
				3374	'Tab found; better to use spaces')
				3375
				3376	# One or three blank spaces at the beginning of the line is weird; it's
				3377	# hard to reconcile that with 2-space indents.
				3378	# NOTE: here are the conditions rob pike used for his tests. Mine aren't
				3379	# as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
				3380	# if(RLENGTH > 20) complain = 0;
				3381	# if(match($0, " +(error\|private\|public\|protected):")) complain = 0;
				3382	# if(match(prev, "&& *$")) complain = 0;
				3383	# if(match(prev, "\\\|\\\| *$")) complain = 0;
				3384	# if(match(prev, "[\",=><] *$")) complain = 0;
				3385	# if(match($0, " <<")) complain = 0;
				3386	# if(match(prev, " +for \\(")) complain = 0;
				3387	# if(prevodd && match(prevprev, " +for \\(")) complain = 0;
				3388	initial_spaces = 0
				3389	cleansed_line = clean_lines.elided[linenum]
				3390	while initial_spaces < len(line) and line[initial_spaces] == ' ':
				3391	initial_spaces += 1
				3392	if line and line[-1].isspace():
				3393	error(filename, linenum, 'whitespace/end_of_line', 4,
				3394	'Line ends in whitespace. Consider deleting these extra spaces.')
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3395	# There are certain situations we allow one space, notably for section labels
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3396	elif ((initial_spaces == 1 or initial_spaces == 3) and
				3397	not Match(r'\s\w+\s:\s*$', cleansed_line)):
				3398	error(filename, linenum, 'whitespace/indent', 3,
				3399	'Weird number of spaces at line-start. '
				3400	'Are you using a 2-space indent?')
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3401
				3402	# Check if the line is a header guard.
				3403	is_header_guard = False
				3404	if file_extension == 'h':
				3405	cppvar = GetHeaderGuardCPPVariable(filename)
				3406	if (line.startswith('#ifndef %s' % cppvar) or
				3407	line.startswith('#define %s' % cppvar) or
				3408	line.startswith('#endif // %s' % cppvar)):
				3409	is_header_guard = True
				3410	# #include lines and header guards can be long, since there's no clean way to
				3411	# split them.
				3412	#
				3413	# URLs can be long too. It's possible to split these, but it makes them
				3414	# harder to cut&paste.
				3415	#
				3416	# The "$Id:...$" comment may also get very long without it being the
				3417	# developers fault.
				3418	if (not line.startswith('#include') and not is_header_guard and
				3419	not Match(r'^\s//.http(s?)://\S*$', line) and
				3420	not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
				3421	line_width = GetLineWidth(line)
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3422	extended_length = int((_line_length * 1.25))
				3423	if line_width > extended_length:
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3424	error(filename, linenum, 'whitespace/line_length', 4,
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3425	'Lines should very rarely be longer than %i characters' %
				3426	extended_length)
				3427	elif line_width > _line_length:
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3428	error(filename, linenum, 'whitespace/line_length', 2,
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3429	'Lines should be <= %i characters long' % _line_length)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3430
				3431	if (cleansed_line.count(';') > 1 and
				3432	# for loops are allowed two ;'s (and may run over two lines).
				3433	cleansed_line.find('for') == -1 and
				3434	(GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
				3435	GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
				3436	# It's ok to have many commands in a switch case that fits in 1 line
				3437	not ((cleansed_line.find('case ') != -1 or
				3438	cleansed_line.find('default:') != -1) and
				3439	cleansed_line.find('break;') != -1)):
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	3440	error(filename, linenum, 'whitespace/newline', 0,
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3441	'More than one command on the same line')
				3442
				3443	# Some more style checks
				3444	CheckBraces(filename, clean_lines, linenum, error)
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3445	CheckEmptyBlockBody(filename, clean_lines, linenum, error)
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	3446	CheckAccess(filename, clean_lines, linenum, nesting_state, error)
				3447	CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3448	CheckCheck(filename, clean_lines, linenum, error)
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	3449	CheckAltTokens(filename, clean_lines, linenum, error)
				3450	classinfo = nesting_state.InnermostClass()
				3451	if classinfo:
				3452	CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3453
				3454
				3455	_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
				3456	_RE_PATTERN_INCLUDE = re.compile(r'^\s#\sinclude\s([<"])([^>"])[>"].*$')
				3457	# Matches the first component of a filename delimited by -s and _s. That is:
				3458	# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
				3459	# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
				3460	# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
				3461	# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
				3462	_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
				3463
				3464
				3465	def _DropCommonSuffixes(filename):
				3466	"""Drops common suffixes like _test.cc or -inl.h from filename.
				3467
				3468	For example:
				3469	>>> _DropCommonSuffixes('foo/foo-inl.h')
				3470	'foo/foo'
				3471	>>> _DropCommonSuffixes('foo/bar/foo.cc')
				3472	'foo/bar/foo'
				3473	>>> _DropCommonSuffixes('foo/foo_internal.h')
				3474	'foo/foo'
				3475	>>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
				3476	'foo/foo_unusualinternal'
				3477
				3478	Args:
				3479	filename: The input filename.
				3480
				3481	Returns:
				3482	The filename with the common suffix removed.
				3483	"""
				3484	for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
				3485	'inl.h', 'impl.h', 'internal.h'):
				3486	if (filename.endswith(suffix) and len(filename) > len(suffix) and
				3487	filename[-len(suffix) - 1] in ('-', '_')):
				3488	return filename[:-len(suffix) - 1]
				3489	return os.path.splitext(filename)[0]
				3490
				3491
				3492	def _IsTestFilename(filename):
				3493	"""Determines if the given filename has a suffix that identifies it as a test.
				3494
				3495	Args:
				3496	filename: The input filename.
				3497
				3498	Returns:
				3499	True if 'filename' looks like a test, False otherwise.
				3500	"""
				3501	if (filename.endswith('_test.cc') or
				3502	filename.endswith('_unittest.cc') or
				3503	filename.endswith('_regtest.cc')):
				3504	return True
				3505	else:
				3506	return False
				3507
				3508
				3509	def _ClassifyInclude(fileinfo, include, is_system):
				3510	"""Figures out what kind of header 'include' is.
				3511
				3512	Args:
				3513	fileinfo: The current file cpplint is running over. A FileInfo instance.
				3514	include: The path to a #included file.
				3515	is_system: True if the #include used <> rather than "".
				3516
				3517	Returns:
				3518	One of the _XXX_HEADER constants.
				3519
				3520	For example:
				3521	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
				3522	_C_SYS_HEADER
				3523	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
				3524	_CPP_SYS_HEADER
				3525	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
				3526	_LIKELY_MY_HEADER
				3527	>>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
				3528	... 'bar/foo_other_ext.h', False)
				3529	_POSSIBLE_MY_HEADER
				3530	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
				3531	_OTHER_HEADER
				3532	"""
				3533	# This is a list of all standard c++ header files, except
				3534	# those already checked for above.
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3535	is_cpp_h = include in _CPP_HEADERS
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3536
				3537	if is_system:
				3538	if is_cpp_h:
				3539	return _CPP_SYS_HEADER
				3540	else:
				3541	return _C_SYS_HEADER
				3542
				3543	# If the target file and the include we're checking share a
				3544	# basename when we drop common extensions, and the include
				3545	# lives in . , then it's likely to be owned by the target file.
				3546	target_dir, target_base = (
				3547	os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
				3548	include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
				3549	if target_base == include_base and (
				3550	include_dir == target_dir or
				3551	include_dir == os.path.normpath(target_dir + '/../public')):
				3552	return _LIKELY_MY_HEADER
				3553
				3554	# If the target and include share some initial basename
				3555	# component, it's possible the target is implementing the
				3556	# include, so it's allowed to be first, but we'll never
				3557	# complain if it's not there.
				3558	target_first_component = _RE_FIRST_COMPONENT.match(target_base)
				3559	include_first_component = _RE_FIRST_COMPONENT.match(include_base)
				3560	if (target_first_component and include_first_component and
				3561	target_first_component.group(0) ==
				3562	include_first_component.group(0)):
				3563	return _POSSIBLE_MY_HEADER
				3564
				3565	return _OTHER_HEADER
				3566
				3567
				3568
				3569	def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
				3570	"""Check rules that are applicable to #include lines.
				3571
				3572	Strings on #include lines are NOT removed from elided line, to make
				3573	certain tasks easier. However, to prevent false positives, checks
				3574	applicable to #include lines in CheckLanguage must be put here.
				3575
				3576	Args:
				3577	filename: The name of the current file.
				3578	clean_lines: A CleansedLines instance containing the file.
				3579	linenum: The number of the line to check.
				3580	include_state: An _IncludeState instance in which the headers are inserted.
				3581	error: The function to call with any errors found.
				3582	"""
				3583	fileinfo = FileInfo(filename)
				3584
				3585	line = clean_lines.lines[linenum]
				3586
				3587	# "include" should use the new style "foo/bar.h" instead of just "bar.h"
				3588	if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
				3589	error(filename, linenum, 'build/include', 4,
				3590	'Include the directory when naming .h files')
				3591
				3592	# we shouldn't include a file more than once. actually, there are a
				3593	# handful of instances where doing so is okay, but in general it's
				3594	# not.
				3595	match = _RE_PATTERN_INCLUDE.search(line)
				3596	if match:
				3597	include = match.group(2)
				3598	is_system = (match.group(1) == '<')
				3599	if include in include_state:
				3600	error(filename, linenum, 'build/include', 4,
				3601	'"%s" already included at %s:%s' %
				3602	(include, filename, include_state[include]))
				3603	else:
				3604	include_state[include] = linenum
				3605
				3606	# We want to ensure that headers appear in the right order:
				3607	# 1) for foo.cc, foo.h (preferred location)
				3608	# 2) c system files
				3609	# 3) cpp system files
				3610	# 4) for foo.cc, foo.h (deprecated location)
				3611	# 5) other google headers
				3612	#
				3613	# We classify each include statement as one of those 5 types
				3614	# using a number of techniques. The include_state object keeps
				3615	# track of the highest type seen, and complains if we see a
				3616	# lower type after that.
				3617	error_message = include_state.CheckNextIncludeOrder(
				3618	_ClassifyInclude(fileinfo, include, is_system))
				3619	if error_message:
				3620	error(filename, linenum, 'build/include_order', 4,
				3621	'%s. Should be: %s.h, c system, c++ system, other.' %
				3622	(error_message, fileinfo.BaseName()))
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3623	canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
				3624	if not include_state.IsInAlphabeticalOrder(
				3625	clean_lines, linenum, canonical_include):
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3626	error(filename, linenum, 'build/include_alpha', 4,
				3627	'Include "%s" not in alphabetical order' % include)
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3628	include_state.SetLastHeader(canonical_include)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3629
				3630	# Look for any of the stream classes that are part of standard C++.
				3631	match = _RE_PATTERN_INCLUDE.match(line)
				3632	if match:
				3633	include = match.group(2)
				3634	if Match(r'(f\|ind\|io\|i\|o\|parse\|pf\|stdio\|str\|)?stream$', include):
				3635	# Many unit tests use cout, so we exempt them.
				3636	if not _IsTestFilename(filename):
				3637	error(filename, linenum, 'readability/streams', 3,
				3638	'Streams are highly discouraged.')
				3639
				3640
				3641	def _GetTextInside(text, start_pattern):
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3642	r"""Retrieves all the text between matching open and close parentheses.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3643
				3644	Given a string of lines and a regular expression string, retrieve all the text
				3645	following the expression and between opening punctuation symbols like
				3646	(, [, or {, and the matching close-punctuation symbol. This properly nested
				3647	occurrences of the punctuations, so for the text like
				3648	printf(a(), b(c()));
				3649	a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
				3650	start_pattern must match string having an open punctuation symbol at the end.
				3651
				3652	Args:
				3653	text: The lines to extract text. Its comments and strings must be elided.
				3654	It can be single line and can span multiple lines.
				3655	start_pattern: The regexp string indicating where to start extracting
				3656	the text.
				3657	Returns:
				3658	The extracted text.
				3659	None if either the opening string or ending punctuation could not be found.
				3660	"""
				3661	# TODO(sugawarayu): Audit cpplint.py to see what places could be profitably
				3662	# rewritten to use _GetTextInside (and use inferior regexp matching today).
				3663
				3664	# Give opening punctuations to get the matching close-punctuations.
				3665	matching_punctuation = {'(': ')', '{': '}', '[': ']'}
				3666	closing_punctuation = set(matching_punctuation.itervalues())
				3667
				3668	# Find the position to start extracting text.
				3669	match = re.search(start_pattern, text, re.M)
				3670	if not match: # start_pattern not found in text.
				3671	return None
				3672	start_position = match.end(0)
				3673
				3674	assert start_position > 0, (
				3675	'start_pattern must ends with an opening punctuation.')
				3676	assert text[start_position - 1] in matching_punctuation, (
				3677	'start_pattern must ends with an opening punctuation.')
				3678	# Stack of closing punctuations we expect to have in text after position.
				3679	punctuation_stack = [matching_punctuation[text[start_position - 1]]]
				3680	position = start_position
				3681	while punctuation_stack and position < len(text):
				3682	if text[position] == punctuation_stack[-1]:
				3683	punctuation_stack.pop()
				3684	elif text[position] in closing_punctuation:
				3685	# A closing punctuation without matching opening punctuations.
				3686	return None
				3687	elif text[position] in matching_punctuation:
				3688	punctuation_stack.append(matching_punctuation[text[position]])
				3689	position += 1
				3690	if punctuation_stack:
				3691	# Opening punctuations left without matching close-punctuations.
				3692	return None
				3693	# punctuations match.
				3694	return text[start_position:position - 1]
				3695
				3696
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3697	# Patterns for matching call-by-reference parameters.
				3698	#
				3699	# Supports nested templates up to 2 levels deep using this messy pattern:
				3700	# < (?: < (?: < [^<>]*
				3701	# >
				3702	# \| [^<>] )*
				3703	# >
				3704	# \| [^<>] )*
				3705	# >
				3706	_RE_PATTERN_IDENT = r'[_a-zA-Z]\w' # =~ [[:alpha:]][[:alnum:]]
				3707	_RE_PATTERN_TYPE = (
				3708	r'(?:const\s+)?(?:typename\s+\|class\s+\|struct\s+\|union\s+\|enum\s+)?'
				3709	r'(?:\w\|'
				3710	r'\s<(?:<(?:<[^<>]>\|[^<>])>\|[^<>])>\|'
				3711	r'::)+')
				3712	# A call-by-reference parameter ends with '& identifier'.
				3713	_RE_PATTERN_REF_PARAM = re.compile(
				3714	r'(' + _RE_PATTERN_TYPE + r'(?:\s(?:\bconst\b\|[]))\s'
				3715	r'&\s' + _RE_PATTERN_IDENT + r')\s(?:=[^,()]+)?[,)]')
				3716	# A call-by-const-reference parameter either ends with 'const& identifier'
				3717	# or looks like 'const type& identifier' when 'type' is atomic.
				3718	_RE_PATTERN_CONST_REF_PARAM = (
				3719	r'(?:.\s\bconst\s&\s' + _RE_PATTERN_IDENT +
				3720	r'\|const\s+' + _RE_PATTERN_TYPE + r'\s&\s' + _RE_PATTERN_IDENT + r')')
				3721
				3722
				3723	def CheckLanguage(filename, clean_lines, linenum, file_extension,
				3724	include_state, nesting_state, error):
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3725	"""Checks rules from the 'C++ language rules' section of cppguide.html.
				3726
				3727	Some of these rules are hard to test (function overloading, using
				3728	uint32 inappropriately), but we do the best we can.
				3729
				3730	Args:
				3731	filename: The name of the current file.
				3732	clean_lines: A CleansedLines instance containing the file.
				3733	linenum: The number of the line to check.
				3734	file_extension: The extension (without the dot) of the filename.
				3735	include_state: An _IncludeState instance in which the headers are inserted.
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3736	nesting_state: A _NestingState instance which maintains information about
				3737	the current stack of nested blocks being parsed.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3738	error: The function to call with any errors found.
				3739	"""
				3740	# If the line is empty or consists of entirely a comment, no need to
				3741	# check it.
				3742	line = clean_lines.elided[linenum]
				3743	if not line:
				3744	return
				3745
				3746	match = _RE_PATTERN_INCLUDE.search(line)
				3747	if match:
				3748	CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
				3749	return
				3750
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3751	# Reset include state across preprocessor directives. This is meant
				3752	# to silence warnings for conditional includes.
				3753	if Match(r'^\s#\s(?:ifdef\|elif\|else\|endif)\b', line):
				3754	include_state.ResetSection()
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3755
				3756	# Make Windows paths like Unix.
				3757	fullname = os.path.abspath(filename).replace('\\', '/')
				3758
				3759	# TODO(unknown): figure out if they're using default arguments in fn proto.
				3760
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3761	# Check to see if they're using an conversion function cast.
				3762	# I just try to capture the most common basic types, though there are more.
				3763	# Parameterless conversion functions, such as bool(), are allowed as they are
				3764	# probably a member operator declaration or default constructor.
				3765	match = Search(
				3766	r'(\bnew\s+)?\b' # Grab 'new' operator, if it's there
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3767	r'(int\|float\|double\|bool\|char\|int32\|uint32\|int64\|uint64)'
				3768	r'(\([^)].*)', line)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3769	if match:
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3770	matched_new = match.group(1)
				3771	matched_type = match.group(2)
				3772	matched_funcptr = match.group(3)
				3773
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3774	# gMock methods are defined using some variant of MOCK_METHODx(name, type)
				3775	# where type may be float(), int(string), etc. Without context they are
				3776	# virtually indistinguishable from int(x) casts. Likewise, gMock's
				3777	# MockCallback takes a template parameter of the form return_type(arg_type),
				3778	# which looks much like the cast we're trying to detect.
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3779	#
				3780	# std::function<> wrapper has a similar problem.
				3781	#
				3782	# Return types for function pointers also look like casts if they
				3783	# don't have an extra space.
				3784	if (matched_new is None and # If new operator, then this isn't a cast
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3785	not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3786	Search(r'\bMockCallback<.*>', line) or
				3787	Search(r'\bstd::function<.*>', line)) and
				3788	not (matched_funcptr and
				3789	Match(r'$(?:[^() ]+::\s\\s)?[^() ]+$\s\(',
				3790	matched_funcptr))):
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	3791	# Try a bit harder to catch gmock lines: the only place where
				3792	# something looks like an old-style cast is where we declare the
				3793	# return type of the mocked method, and the only time when we
				3794	# are missing context is if MOCK_METHOD was split across
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3795	# multiple lines. The missing MOCK_METHOD is usually one or two
				3796	# lines back, so scan back one or two lines.
				3797	#
				3798	# It's not possible for gmock macros to appear in the first 2
				3799	# lines, since the class head + section name takes up 2 lines.
				3800	if (linenum < 2 or
				3801	not (Match(r'^\sMOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s$',
				3802	clean_lines.elided[linenum - 1]) or
				3803	Match(r'^\sMOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s$',
				3804	clean_lines.elided[linenum - 2]))):
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	3805	error(filename, linenum, 'readability/casting', 4,
				3806	'Using deprecated casting style. '
				3807	'Use static_cast<%s>(...) instead' %
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3808	matched_type)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3809
				3810	CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				3811	'static_cast',
				3812	r'$(int\|float\|double\|bool\|char\|u?int(16\|32\|64))$', error)
				3813
				3814	# This doesn't catch all cases. Consider (const char * const)"hello".
				3815	#
				3816	# (char *) "foo" should always be a const_cast (reinterpret_cast won't
				3817	# compile).
				3818	if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				3819	'const_cast', r'$(char\s?\+\s?)$\s"', error):
				3820	pass
				3821	else:
				3822	# Check pointer casts for other than string constants
				3823	CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				3824	'reinterpret_cast', r'$(\w+\s?\*+\s?)$', error)
				3825
				3826	# In addition, we look for people taking the address of a cast. This
				3827	# is dangerous -- casts can assign to temporaries, so the pointer doesn't
				3828	# point where you think.
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3829	match = Search(
				3830	r'(?:&$([^)]+)$[\w(])\|'
				3831	r'(?:&(static\|dynamic\|down\|reinterpret)_cast\b)', line)
				3832	if match and match.group(1) != '*':
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3833	error(filename, linenum, 'runtime/casting', 4,
				3834	('Are you taking an address of a cast? '
				3835	'This is dangerous: could be a temp var. '
				3836	'Take the address before doing the cast, rather than after'))
				3837
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3838	# Create an extended_line, which is the concatenation of the current and
				3839	# next lines, for more effective checking of code that may span more than one
				3840	# line.
				3841	if linenum + 1 < clean_lines.NumLines():
				3842	extended_line = line + clean_lines.elided[linenum + 1]
				3843	else:
				3844	extended_line = line
				3845
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3846	# Check for people declaring static/global STL strings at the top level.
				3847	# This is dangerous because the C++ language does not guarantee that
				3848	# globals with constructors are initialized before the first access.
				3849	match = Match(
				3850	r'((?:\|static +)(?:\|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
				3851	line)
				3852	# Make sure it's not a function.
				3853	# Function template specialization looks like: "string foo<Type>(...".
				3854	# Class template definitions look like: "string Foo<Type>::Method(...".
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	3855	#
				3856	# Also ignore things that look like operators. These are matched separately
				3857	# because operator names cross non-word boundaries. If we change the pattern
				3858	# above, we would decrease the accuracy of matching identifiers.
				3859	if (match and
				3860	not Search(r'\boperator\W', line) and
				3861	not Match(r'\s(<.>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]\|$)', match.group(3))):
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3862	error(filename, linenum, 'runtime/string', 4,
				3863	'For a static/global string constant, use a C style string instead: '
				3864	'"%schar %s[]".' %
				3865	(match.group(1), match.group(2)))
				3866
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3867	if Search(r'\b([A-Za-z0-9_]*_)$\1$', line):
				3868	error(filename, linenum, 'runtime/init', 4,
				3869	'You seem to be initializing a member variable with itself.')
				3870
				3871	if file_extension == 'h':
				3872	# TODO(unknown): check that 1-arg constructors are explicit.
				3873	# How to tell it's a constructor?
				3874	# (handled in CheckForNonStandardConstructs for now)
				3875	# TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
				3876	# (level 1 error)
				3877	pass
				3878
				3879	# Check if people are using the verboten C basic types. The only exception
				3880	# we regularly allow is "unsigned short port" for port.
				3881	if Search(r'\bshort port\b', line):
				3882	if not Search(r'\bunsigned short port\b', line):
				3883	error(filename, linenum, 'runtime/int', 4,
				3884	'Use "unsigned short" for ports, not "short"')
				3885	else:
				3886	match = Search(r'\b(short\|long(?! +double)\|long long)\b', line)
				3887	if match:
				3888	error(filename, linenum, 'runtime/int', 4,
				3889	'Use int16/int64/etc, rather than the C type %s' % match.group(1))
				3890
				3891	# When snprintf is used, the second argument shouldn't be a literal.
				3892	match = Search(r'snprintf\s\(([^,]),\s([0-9])\s*,', line)
				3893	if match and match.group(2) != '0':
				3894	# If 2nd arg is zero, snprintf is used to calculate size.
				3895	error(filename, linenum, 'runtime/printf', 3,
				3896	'If you can, use sizeof(%s) instead of %s as the 2nd arg '
				3897	'to snprintf.' % (match.group(1), match.group(2)))
				3898
				3899	# Check if some verboten C functions are being used.
				3900	if Search(r'\bsprintf\b', line):
				3901	error(filename, linenum, 'runtime/printf', 5,
				3902	'Never use sprintf. Use snprintf instead.')
				3903	match = Search(r'\b(strcpy\|strcat)\b', line)
				3904	if match:
				3905	error(filename, linenum, 'runtime/printf', 4,
				3906	'Almost always, snprintf is better than %s' % match.group(1))
				3907
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3908	# Check if some verboten operator overloading is going on
				3909	# TODO(unknown): catch out-of-line unary operator&:
				3910	# class X {};
				3911	# int operator&(const X& x) { return 42; } // unary operator&
				3912	# The trick is it's hard to tell apart from binary operator&:
				3913	# class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
				3914	if Search(r'\boperator\s&\s$\s*$', line):
				3915	error(filename, linenum, 'runtime/operator', 4,
				3916	'Unary operator& is dangerous. Do not use it.')
				3917
				3918	# Check for suspicious usage of "if" like
				3919	# } if (a == b) {
				3920	if Search(r'\}\sif\s\(', line):
				3921	error(filename, linenum, 'readability/braces', 4,
				3922	'Did you mean "else if"? If not, start a new line for "if".')
				3923
				3924	# Check for potential format string bugs like printf(foo).
				3925	# We constrain the pattern not to pick things like DocidForPrintf(foo).
				3926	# Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
				3927	# TODO(sugawarayu): Catch the following case. Need to change the calling
				3928	# convention of the whole function to process multiple line to handle it.
				3929	# printf(
				3930	# boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
				3931	printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
				3932	if printf_args:
				3933	match = Match(r'([\w.\->()]+)$', printf_args)
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	3934	if match and match.group(1) != '__VA_ARGS__':
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	3935	function_name = re.search(r'\b((?:string)?printf)\s*\(',
				3936	line, re.I).group(1)
				3937	error(filename, linenum, 'runtime/printf', 4,
				3938	'Potential format string bug. Do %s("%%s", %s) instead.'
				3939	% (function_name, match.group(1)))
				3940
				3941	# Check for potential memset bugs like memset(buf, sizeof(buf), 0).
				3942	match = Search(r'memset\s$([^,]),\s([^,]),\s0\s$', line)
				3943	if match and not Match(r"^''\|-?[0-9]+\|0x[0-9A-Fa-f]$", match.group(2)):
				3944	error(filename, linenum, 'runtime/memset', 4,
				3945	'Did you mean "memset(%s, 0, %s)"?'
				3946	% (match.group(1), match.group(2)))
				3947
				3948	if Search(r'\busing namespace\b', line):
				3949	error(filename, linenum, 'build/namespaces', 5,
				3950	'Do not use namespace using-directives. '
				3951	'Use using-declarations instead.')
				3952
				3953	# Detect variable-length arrays.
				3954	match = Match(r'\s(.+::)?(\w+) [a-z]\w\[(.+)];', line)
				3955	if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
				3956	match.group(3).find(']') == -1):
				3957	# Split the size using space and arithmetic operators as delimiters.
				3958	# If any of the resulting tokens are not compile time constants then
				3959	# report the error.
				3960	tokens = re.split(r'\s\|\+\|\-\|\*\|\/\|<<\|>>]', match.group(3))
				3961	is_const = True
				3962	skip_next = False
				3963	for tok in tokens:
				3964	if skip_next:
				3965	skip_next = False
				3966	continue
				3967
				3968	if Search(r'sizeof$.+$', tok): continue
				3969	if Search(r'arraysize$\w+$', tok): continue
				3970
				3971	tok = tok.lstrip('(')
				3972	tok = tok.rstrip(')')
				3973	if not tok: continue
				3974	if Match(r'\d+', tok): continue
				3975	if Match(r'0[xX][0-9a-fA-F]+', tok): continue
				3976	if Match(r'k[A-Z0-9]\w*', tok): continue
				3977	if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
				3978	if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
				3979	# A catch all for tricky sizeof cases, including 'sizeof expression',
				3980	# 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
				3981	# requires skipping the next token because we split on ' ' and '*'.
				3982	if tok.startswith('sizeof'):
				3983	skip_next = True
				3984	continue
				3985	is_const = False
				3986	break
				3987	if not is_const:
				3988	error(filename, linenum, 'runtime/arrays', 1,
				3989	'Do not use variable-length arrays. Use an appropriately named '
				3990	"('k' followed by CamelCase) compile-time constant for the size.")
				3991
				3992	# If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
				3993	# DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
				3994	# in the class declaration.
				3995	match = Match(
				3996	(r'\s*'
				3997	r'(DISALLOW_(EVIL_CONSTRUCTORS\|COPY_AND_ASSIGN\|IMPLICIT_CONSTRUCTORS))'
				3998	r'$.*$;$'),
				3999	line)
				4000	if match and linenum + 1 < clean_lines.NumLines():
				4001	next_line = clean_lines.elided[linenum + 1]
				4002	# We allow some, but not all, declarations of variables to be present
				4003	# in the statement that defines the class. The [\w\,\s] fragment of
				4004	# the regular expression below allows users to declare instances of
				4005	# the class or pointers to instances, but not less common types such
				4006	# as function pointers or arrays. It's a tradeoff between allowing
				4007	# reasonable code and avoiding trying to parse more C++ using regexps.
				4008	if not Search(r'^\s}[\w\,\s]*;', next_line):
				4009	error(filename, linenum, 'readability/constructors', 3,
				4010	match.group(1) + ' should be the last thing in the class')
				4011
				4012	# Check for use of unnamed namespaces in header files. Registration
				4013	# macros are typically OK, so we allow use of "namespace {" on lines
				4014	# that end with backslashes.
				4015	if (file_extension == 'h'
				4016	and Search(r'\bnamespace\s*{', line)
				4017	and line[-1] != '\\'):
				4018	error(filename, linenum, 'build/namespaces', 4,
				4019	'Do not use unnamed namespaces in header files. See '
				4020	'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
				4021	' for more information.')
				4022
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	4023	def CheckForNonConstReference(filename, clean_lines, linenum,
				4024	nesting_state, error):
				4025	"""Check for non-const references.
				4026
				4027	Separate from CheckLanguage since it scans backwards from current
				4028	line, instead of scanning forward.
				4029
				4030	Args:
				4031	filename: The name of the current file.
				4032	clean_lines: A CleansedLines instance containing the file.
				4033	linenum: The number of the line to check.
				4034	nesting_state: A _NestingState instance which maintains information about
				4035	the current stack of nested blocks being parsed.
				4036	error: The function to call with any errors found.
				4037	"""
				4038	# Do nothing if there is no '&' on current line.
				4039	line = clean_lines.elided[linenum]
				4040	if '&' not in line:
				4041	return
				4042
				4043	# Long type names may be broken across multiple lines, usually in one
				4044	# of these forms:
				4045	# LongType
				4046	# ::LongTypeContinued &identifier
				4047	# LongType::
				4048	# LongTypeContinued &identifier
				4049	# LongType<
				4050	# ...>::LongTypeContinued &identifier
				4051	#
				4052	# If we detected a type split across two lines, join the previous
				4053	# line to current line so that we can match const references
				4054	# accordingly.
				4055	#
				4056	# Note that this only scans back one line, since scanning back
				4057	# arbitrary number of lines would be expensive. If you have a type
				4058	# that spans more than 2 lines, please use a typedef.
				4059	if linenum > 1:
				4060	previous = None
				4061	if Match(r'\s::(?:[\w<>]\|::)+\s&\s*\S', line):
				4062	# previous_line\n + ::current_line
				4063	previous = Search(r'\b((?:const\s)?(?:[\w<>]\|::)+[\w<>])\s$',
				4064	clean_lines.elided[linenum - 1])
				4065	elif Match(r'\s[a-zA-Z_]([\w<>]\|::)+\s&\s*\S', line):
				4066	# previous_line::\n + current_line
				4067	previous = Search(r'\b((?:const\s)?(?:[\w<>]\|::)+::)\s$',
				4068	clean_lines.elided[linenum - 1])
				4069	if previous:
				4070	line = previous.group(1) + line.lstrip()
				4071	else:
				4072	# Check for templated parameter that is split across multiple lines
				4073	endpos = line.rfind('>')
				4074	if endpos > -1:
				4075	(_, startline, startpos) = ReverseCloseExpression(
				4076	clean_lines, linenum, endpos)
				4077	if startpos > -1 and startline < linenum:
				4078	# Found the matching < on an earlier line, collect all
				4079	# pieces up to current line.
				4080	line = ''
				4081	for i in xrange(startline, linenum + 1):
				4082	line += clean_lines.elided[i].strip()
				4083
				4084	# Check for non-const references in function parameters. A single '&' may
				4085	# found in the following places:
				4086	# inside expression: binary & for bitwise AND
				4087	# inside expression: unary & for taking the address of something
				4088	# inside declarators: reference parameter
				4089	# We will exclude the first two cases by checking that we are not inside a
				4090	# function body, including one that was just introduced by a trailing '{'.
				4091	# TODO(unknwon): Doesn't account for preprocessor directives.
				4092	# TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
				4093	check_params = False
				4094	if not nesting_state.stack:
				4095	check_params = True # top level
				4096	elif (isinstance(nesting_state.stack[-1], _ClassInfo) or
				4097	isinstance(nesting_state.stack[-1], _NamespaceInfo)):
				4098	check_params = True # within class or namespace
				4099	elif Match(r'.{\s$', line):
				4100	if (len(nesting_state.stack) == 1 or
				4101	isinstance(nesting_state.stack[-2], _ClassInfo) or
				4102	isinstance(nesting_state.stack[-2], _NamespaceInfo)):
				4103	check_params = True # just opened global/class/namespace block
				4104	# We allow non-const references in a few standard places, like functions
				4105	# called "swap()" or iostream operators like "<<" or ">>". Do not check
				4106	# those function parameters.
				4107	#
				4108	# We also accept & in static_assert, which looks like a function but
				4109	# it's actually a declaration expression.
				4110	whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?\|'
				4111	r'operator\s*[<>][<>]\|'
				4112	r'static_assert\|COMPILE_ASSERT'
				4113	r')\s*\(')
				4114	if Search(whitelisted_functions, line):
				4115	check_params = False
				4116	elif not Search(r'\S+\([^)]*$', line):
				4117	# Don't see a whitelisted function on this line. Actually we
				4118	# didn't see any function name on this line, so this is likely a
				4119	# multi-line parameter list. Try a bit harder to catch this case.
				4120	for i in xrange(2):
				4121	if (linenum > i and
				4122	Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])):
				4123	check_params = False
				4124	break
				4125
				4126	if check_params:
				4127	decls = ReplaceAll(r'{[^}]*}', ' ', line) # exclude function body
				4128	for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
				4129	if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter):
				4130	error(filename, linenum, 'runtime/references', 2,
				4131	'Is this a non-const reference? '
				4132	'If so, make const or use a pointer: ' +
				4133	ReplaceAll(' *<', '<', parameter))
				4134
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4135
				4136	def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
				4137	error):
				4138	"""Checks for a C-style cast by looking for the pattern.
				4139
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4140	Args:
				4141	filename: The name of the current file.
				4142	linenum: The number of the line to check.
				4143	line: The line of code to check.
				4144	raw_line: The raw line of code to check, with comments.
				4145	cast_type: The string for the C++ cast to recommend. This is either
				4146	reinterpret_cast, static_cast, or const_cast, depending.
				4147	pattern: The regular expression used to find C-style casts.
				4148	error: The function to call with any errors found.
				4149
				4150	Returns:
				4151	True if an error was emitted.
				4152	False otherwise.
				4153	"""
				4154	match = Search(pattern, line)
				4155	if not match:
				4156	return False
				4157
James Zern	3fcaf97	2014-01-21 17:56:04 -0800	[diff] [blame]	4158	# e.g., sizeof(int)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4159	sizeof_match = Match(r'.sizeof\s$', line[0:match.start(1) - 1])
				4160	if sizeof_match:
James Zern	3fcaf97	2014-01-21 17:56:04 -0800	[diff] [blame]	4161	error(filename, linenum, 'runtime/sizeof', 1,
				4162	'Using sizeof(type). Use sizeof(varname) instead if possible')
				4163	return True
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4164
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	4165	# operator++(int) and operator--(int)
				4166	if (line[0:match.start(1) - 1].endswith(' operator++') or
				4167	line[0:match.start(1) - 1].endswith(' operator--')):
				4168	return False
				4169
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	4170	# A single unnamed argument for a function tends to look like old
				4171	# style cast. If we see those, don't issue warnings for deprecated
				4172	# casts, instead issue warnings for unnamed arguments where
				4173	# appropriate.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4174	#
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	4175	# These are things that we want warnings for, since the style guide
				4176	# explicitly require all parameters to be named:
				4177	# Function(int);
				4178	# Function(int) {
				4179	# ConstMember(int) const;
				4180	# ConstMember(int) const {
				4181	# ExceptionMember(int) throw (...);
				4182	# ExceptionMember(int) throw (...) {
				4183	# PureVirtual(int) = 0;
				4184	#
				4185	# These are functions of some sort, where the compiler would be fine
				4186	# if they had named parameters, but people often omit those
				4187	# identifiers to reduce clutter:
				4188	# (FunctionPointer)(int);
				4189	# (FunctionPointer)(int) = value;
				4190	# Function((function_pointer_arg)(int))
				4191	# <TemplateArgument(int)>;
				4192	# <(FunctionPointerTemplateArgument)(int)>;
				4193	remainder = line[match.end(0):]
				4194	if Match(r'^\s*(?:;\|const\b\|throw\b\|=\|>\|\{\|\))', remainder):
				4195	# Looks like an unnamed parameter.
				4196
				4197	# Don't warn on any kind of template arguments.
				4198	if Match(r'^\s*>', remainder):
				4199	return False
				4200
				4201	# Don't warn on assignments to function pointers, but keep warnings for
				4202	# unnamed parameters to pure virtual functions. Note that this pattern
				4203	# will also pass on assignments of "0" to function pointers, but the
				4204	# preferred values for those would be "nullptr" or "NULL".
				4205	matched_zero = Match(r'^\s=\s(\S+)\s;', remainder)
				4206	if matched_zero and matched_zero.group(1) != '0':
				4207	return False
				4208
				4209	# Don't warn on function pointer declarations. For this we need
				4210	# to check what came before the "(type)" string.
				4211	if Match(r'.\)\s$', line[0:match.start(0)]):
				4212	return False
				4213
				4214	# Don't warn if the parameter is named with block comments, e.g.:
				4215	# Function(int /unused_param/);
				4216	if '/*' in raw_line:
				4217	return False
				4218
				4219	# Passed all filters, issue warning here.
				4220	error(filename, linenum, 'readability/function', 3,
				4221	'All parameters should be named in a function')
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4222	return True
				4223
				4224	# At this point, all that should be left is actual casts.
				4225	error(filename, linenum, 'readability/casting', 4,
				4226	'Using C-style cast. Use %s<%s>(...) instead' %
				4227	(cast_type, match.group(1)))
				4228
				4229	return True
				4230
				4231
				4232	_HEADERS_CONTAINING_TEMPLATES = (
				4233	('<deque>', ('deque',)),
				4234	('<functional>', ('unary_function', 'binary_function',
				4235	'plus', 'minus', 'multiplies', 'divides', 'modulus',
				4236	'negate',
				4237	'equal_to', 'not_equal_to', 'greater', 'less',
				4238	'greater_equal', 'less_equal',
				4239	'logical_and', 'logical_or', 'logical_not',
				4240	'unary_negate', 'not1', 'binary_negate', 'not2',
				4241	'bind1st', 'bind2nd',
				4242	'pointer_to_unary_function',
				4243	'pointer_to_binary_function',
				4244	'ptr_fun',
				4245	'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
				4246	'mem_fun_ref_t',
				4247	'const_mem_fun_t', 'const_mem_fun1_t',
				4248	'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
				4249	'mem_fun_ref',
				4250	)),
				4251	('<limits>', ('numeric_limits',)),
				4252	('<list>', ('list',)),
				4253	('<map>', ('map', 'multimap',)),
				4254	('<memory>', ('allocator',)),
				4255	('<queue>', ('queue', 'priority_queue',)),
				4256	('<set>', ('set', 'multiset',)),
				4257	('<stack>', ('stack',)),
				4258	('<string>', ('char_traits', 'basic_string',)),
				4259	('<utility>', ('pair',)),
				4260	('<vector>', ('vector',)),
				4261
				4262	# gcc extensions.
				4263	# Note: std::hash is their hash, ::hash is our hash
				4264	('<hash_map>', ('hash_map', 'hash_multimap',)),
				4265	('<hash_set>', ('hash_set', 'hash_multiset',)),
				4266	('<slist>', ('slist',)),
				4267	)
				4268
				4269	_RE_PATTERN_STRING = re.compile(r'\bstring\b')
				4270
				4271	_re_pattern_algorithm_header = []
				4272	for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
				4273	'transform'):
				4274	# Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
				4275	# type::max().
				4276	_re_pattern_algorithm_header.append(
				4277	(re.compile(r'[^>.]\b' + _template + r'(<.*?>)?$[^$]'),
				4278	_template,
				4279	'<algorithm>'))
				4280
				4281	_re_pattern_templates = []
				4282	for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
				4283	for _template in _templates:
				4284	_re_pattern_templates.append(
				4285	(re.compile(r'(\<\|\b)' + _template + r'\s*\<'),
				4286	_template + '<>',
				4287	_header))
				4288
				4289
				4290	def FilesBelongToSameModule(filename_cc, filename_h):
				4291	"""Check if these two filenames belong to the same module.
				4292
				4293	The concept of a 'module' here is a as follows:
				4294	foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
				4295	same 'module' if they are in the same directory.
				4296	some/path/public/xyzzy and some/path/internal/xyzzy are also considered
				4297	to belong to the same module here.
				4298
				4299	If the filename_cc contains a longer path than the filename_h, for example,
				4300	'/absolute/path/to/base/sysinfo.cc', and this file would include
				4301	'base/sysinfo.h', this function also produces the prefix needed to open the
				4302	header. This is used by the caller of this function to more robustly open the
				4303	header file. We don't have access to the real include paths in this context,
				4304	so we need this guesswork here.
				4305
				4306	Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
				4307	according to this implementation. Because of this, this function gives
				4308	some false positives. This should be sufficiently rare in practice.
				4309
				4310	Args:
				4311	filename_cc: is the path for the .cc file
				4312	filename_h: is the path for the header path
				4313
				4314	Returns:
				4315	Tuple with a bool and a string:
				4316	bool: True if filename_cc and filename_h belong to the same module.
				4317	string: the additional prefix needed to open the header file.
				4318	"""
				4319
				4320	if not filename_cc.endswith('.cc'):
				4321	return (False, '')
				4322	filename_cc = filename_cc[:-len('.cc')]
				4323	if filename_cc.endswith('_unittest'):
				4324	filename_cc = filename_cc[:-len('_unittest')]
				4325	elif filename_cc.endswith('_test'):
				4326	filename_cc = filename_cc[:-len('_test')]
				4327	filename_cc = filename_cc.replace('/public/', '/')
				4328	filename_cc = filename_cc.replace('/internal/', '/')
				4329
				4330	if not filename_h.endswith('.h'):
				4331	return (False, '')
				4332	filename_h = filename_h[:-len('.h')]
				4333	if filename_h.endswith('-inl'):
				4334	filename_h = filename_h[:-len('-inl')]
				4335	filename_h = filename_h.replace('/public/', '/')
				4336	filename_h = filename_h.replace('/internal/', '/')
				4337
				4338	files_belong_to_same_module = filename_cc.endswith(filename_h)
				4339	common_path = ''
				4340	if files_belong_to_same_module:
				4341	common_path = filename_cc[:-len(filename_h)]
				4342	return files_belong_to_same_module, common_path
				4343
				4344
				4345	def UpdateIncludeState(filename, include_state, io=codecs):
				4346	"""Fill up the include_state with new includes found from the file.
				4347
				4348	Args:
				4349	filename: the name of the header to read.
				4350	include_state: an _IncludeState instance in which the headers are inserted.
				4351	io: The io factory to use to read the file. Provided for testability.
				4352
				4353	Returns:
				4354	True if a header was succesfully added. False otherwise.
				4355	"""
				4356	headerfile = None
				4357	try:
				4358	headerfile = io.open(filename, 'r', 'utf8', 'replace')
				4359	except IOError:
				4360	return False
				4361	linenum = 0
				4362	for line in headerfile:
				4363	linenum += 1
				4364	clean_line = CleanseComments(line)
				4365	match = _RE_PATTERN_INCLUDE.search(clean_line)
				4366	if match:
				4367	include = match.group(2)
				4368	# The value formatting is cute, but not really used right now.
				4369	# What matters here is that the key is in include_state.
				4370	include_state.setdefault(include, '%s:%d' % (filename, linenum))
				4371	return True
				4372
				4373
				4374	def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
				4375	io=codecs):
				4376	"""Reports for missing stl includes.
				4377
				4378	This function will output warnings to make sure you are including the headers
				4379	necessary for the stl containers and functions that you use. We only give one
				4380	reason to include a header. For example, if you use both equal_to<> and
				4381	less<> in a .h file, only one (the latter in the file) of these will be
				4382	reported as a reason to include the <functional>.
				4383
				4384	Args:
				4385	filename: The name of the current file.
				4386	clean_lines: A CleansedLines instance containing the file.
				4387	include_state: An _IncludeState instance.
				4388	error: The function to call with any errors found.
				4389	io: The IO factory to use to read the header file. Provided for unittest
				4390	injection.
				4391	"""
				4392	required = {} # A map of header name to linenumber and the template entity.
				4393	# Example of required: { '<functional>': (1219, 'less<>') }
				4394
				4395	for linenum in xrange(clean_lines.NumLines()):
				4396	line = clean_lines.elided[linenum]
				4397	if not line or line[0] == '#':
				4398	continue
				4399
				4400	# String is special -- it is a non-templatized type in STL.
				4401	matched = _RE_PATTERN_STRING.search(line)
				4402	if matched:
				4403	# Don't warn about strings in non-STL namespaces:
				4404	# (We check only the first match per line; good enough.)
				4405	prefix = line[:matched.start()]
				4406	if prefix.endswith('std::') or not prefix.endswith('::'):
				4407	required['<string>'] = (linenum, 'string')
				4408
				4409	for pattern, template, header in _re_pattern_algorithm_header:
				4410	if pattern.search(line):
				4411	required[header] = (linenum, template)
				4412
				4413	# The following function is just a speed up, no semantics are changed.
				4414	if not '<' in line: # Reduces the cpu time usage by skipping lines.
				4415	continue
				4416
				4417	for pattern, template, header in _re_pattern_templates:
				4418	if pattern.search(line):
				4419	required[header] = (linenum, template)
				4420
				4421	# The policy is that if you #include something in foo.h you don't need to
				4422	# include it again in foo.cc. Here, we will look at possible includes.
				4423	# Let's copy the include_state so it is only messed up within this function.
				4424	include_state = include_state.copy()
				4425
				4426	# Did we find the header for this file (if any) and succesfully load it?
				4427	header_found = False
				4428
				4429	# Use the absolute path so that matching works properly.
				4430	abs_filename = FileInfo(filename).FullName()
				4431
				4432	# For Emacs's flymake.
				4433	# If cpplint is invoked from Emacs's flymake, a temporary file is generated
				4434	# by flymake and that file name might end with '_flymake.cc'. In that case,
				4435	# restore original file name here so that the corresponding header file can be
				4436	# found.
				4437	# e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
				4438	# instead of 'foo_flymake.h'
				4439	abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
				4440
				4441	# include_state is modified during iteration, so we iterate over a copy of
				4442	# the keys.
				4443	header_keys = include_state.keys()
				4444	for header in header_keys:
				4445	(same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
				4446	fullpath = common_path + header
				4447	if same_module and UpdateIncludeState(fullpath, include_state, io):
				4448	header_found = True
				4449
				4450	# If we can't find the header file for a .cc, assume it's because we don't
				4451	# know where to look. In that case we'll give up as we're not sure they
				4452	# didn't include it in the .h file.
				4453	# TODO(unknown): Do a better job of finding .h files so we are confident that
				4454	# not having the .h file means there isn't one.
				4455	if filename.endswith('.cc') and not header_found:
				4456	return
				4457
				4458	# All the lines have been processed, report the errors found.
				4459	for required_header_unstripped in required:
				4460	template = required[required_header_unstripped][1]
				4461	if required_header_unstripped.strip('<>"') not in include_state:
				4462	error(filename, required[required_header_unstripped][0],
				4463	'build/include_what_you_use', 4,
				4464	'Add #include ' + required_header_unstripped + ' for ' + template)
				4465
				4466
				4467	_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
				4468
				4469
				4470	def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
				4471	"""Check that make_pair's template arguments are deduced.
				4472
				4473	G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are
				4474	specified explicitly, and such use isn't intended in any case.
				4475
				4476	Args:
				4477	filename: The name of the current file.
				4478	clean_lines: A CleansedLines instance containing the file.
				4479	linenum: The number of the line to check.
				4480	error: The function to call with any errors found.
				4481	"""
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	4482	line = clean_lines.elided[linenum]
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4483	match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
				4484	if match:
				4485	error(filename, linenum, 'build/explicit_make_pair',
				4486	4, # 4 = high confidence
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	4487	'For C++11-compatibility, omit template arguments from make_pair'
				4488	' OR use pair directly OR if appropriate, construct a pair directly')
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4489
				4490
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	4491	def ProcessLine(filename, file_extension, clean_lines, line,
				4492	include_state, function_state, nesting_state, error,
				4493	extra_check_functions=[]):
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4494	"""Processes a single line in the file.
				4495
				4496	Args:
				4497	filename: Filename of the file that is being processed.
				4498	file_extension: The extension (dot not included) of the file.
				4499	clean_lines: An array of strings, each representing a line of the file,
				4500	with comments stripped.
				4501	line: Number of line being processed.
				4502	include_state: An _IncludeState instance in which the headers are inserted.
				4503	function_state: A _FunctionState instance which counts function lines, etc.
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	4504	nesting_state: A _NestingState instance which maintains information about
				4505	the current stack of nested blocks being parsed.
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4506	error: A callable to which errors are reported, which takes 4 arguments:
				4507	filename, line number, error level, and message
				4508	extra_check_functions: An array of additional check functions that will be
				4509	run on each source line. Each function takes 4
				4510	arguments: filename, clean_lines, line, error
				4511	"""
				4512	raw_lines = clean_lines.raw_lines
				4513	ParseNolintSuppressions(filename, raw_lines[line], line, error)
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	4514	nesting_state.Update(filename, clean_lines, line, error)
				4515	if nesting_state.stack and nesting_state.stack[-1].inline_asm != _NO_ASM:
				4516	return
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4517	CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
				4518	CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	4519	CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4520	CheckLanguage(filename, clean_lines, line, file_extension, include_state,
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	4521	nesting_state, error)
				4522	CheckForNonConstReference(filename, clean_lines, line, nesting_state, error)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4523	CheckForNonStandardConstructs(filename, clean_lines, line,
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	4524	nesting_state, error)
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	4525	CheckVlogArguments(filename, clean_lines, line, error)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4526	CheckPosixThreading(filename, clean_lines, line, error)
				4527	CheckInvalidIncrement(filename, clean_lines, line, error)
				4528	CheckMakePairUsesDeduction(filename, clean_lines, line, error)
				4529	for check_fn in extra_check_functions:
				4530	check_fn(filename, clean_lines, line, error)
				4531
				4532	def ProcessFileData(filename, file_extension, lines, error,
				4533	extra_check_functions=[]):
				4534	"""Performs lint checks and reports any errors to the given error function.
				4535
				4536	Args:
				4537	filename: Filename of the file that is being processed.
				4538	file_extension: The extension (dot not included) of the file.
				4539	lines: An array of strings, each representing a line of the file, with the
				4540	last element being empty if the file is terminated with a newline.
				4541	error: A callable to which errors are reported, which takes 4 arguments:
				4542	filename, line number, error level, and message
				4543	extra_check_functions: An array of additional check functions that will be
				4544	run on each source line. Each function takes 4
				4545	arguments: filename, clean_lines, line, error
				4546	"""
				4547	lines = (['// marker so line numbers and indices both start at 1'] + lines +
				4548	['// marker so line numbers end in a known way'])
				4549
				4550	include_state = _IncludeState()
				4551	function_state = _FunctionState()
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	4552	nesting_state = _NestingState()
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4553
				4554	ResetNolintSuppressions()
				4555
				4556	CheckForCopyright(filename, lines, error)
				4557
				4558	if file_extension == 'h':
				4559	CheckForHeaderGuard(filename, lines, error)
				4560
				4561	RemoveMultiLineComments(filename, lines, error)
				4562	clean_lines = CleansedLines(lines)
				4563	for line in xrange(clean_lines.NumLines()):
				4564	ProcessLine(filename, file_extension, clean_lines, line,
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	4565	include_state, function_state, nesting_state, error,
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4566	extra_check_functions)
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	4567	nesting_state.CheckCompletedBlocks(filename, error)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4568
				4569	CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
				4570
				4571	# We check here rather than inside ProcessLine so that we see raw
				4572	# lines rather than "cleaned" lines.
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	4573	CheckForBadCharacters(filename, lines, error)
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4574
				4575	CheckForNewlineAtEOF(filename, lines, error)
				4576
				4577	def ProcessFile(filename, vlevel, extra_check_functions=[]):
				4578	"""Does google-lint on a single file.
				4579
				4580	Args:
				4581	filename: The name of the file to parse.
				4582
				4583	vlevel: The level of errors to report. Every error of confidence
				4584	>= verbose_level will be reported. 0 is a good default.
				4585
				4586	extra_check_functions: An array of additional check functions that will be
				4587	run on each source line. Each function takes 4
				4588	arguments: filename, clean_lines, line, error
				4589	"""
				4590
				4591	_SetVerboseLevel(vlevel)
				4592
				4593	try:
				4594	# Support the UNIX convention of using "-" for stdin. Note that
				4595	# we are not opening the file with universal newline support
				4596	# (which codecs doesn't support anyway), so the resulting lines do
				4597	# contain trailing '\r' characters if we are reading a file that
				4598	# has CRLF endings.
				4599	# If after the split a trailing '\r' is present, it is removed
				4600	# below. If it is not expected to be present (i.e. os.linesep !=
				4601	# '\r\n' as in Windows), a warning is issued below if this file
				4602	# is processed.
				4603
				4604	if filename == '-':
				4605	lines = codecs.StreamReaderWriter(sys.stdin,
				4606	codecs.getreader('utf8'),
				4607	codecs.getwriter('utf8'),
				4608	'replace').read().split('\n')
				4609	else:
				4610	lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
				4611
				4612	carriage_return_found = False
				4613	# Remove trailing '\r'.
				4614	for linenum in range(len(lines)):
				4615	if lines[linenum].endswith('\r'):
				4616	lines[linenum] = lines[linenum].rstrip('\r')
				4617	carriage_return_found = True
				4618
				4619	except IOError:
				4620	sys.stderr.write(
				4621	"Skipping input '%s': Can't open for reading\n" % filename)
				4622	return
				4623
				4624	# Note, if no dot is found, this will give the entire filename as the ext.
				4625	file_extension = filename[filename.rfind('.') + 1:]
				4626
				4627	# When reading from stdin, the extension is unknown, so no cpplint tests
				4628	# should rely on the extension.
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	4629	if filename != '-' and file_extension not in _valid_extensions:
				4630	sys.stderr.write('Ignoring %s; not a valid file name '
				4631	'(%s)\n' % (filename, ', '.join(_valid_extensions)))
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4632	else:
				4633	ProcessFileData(filename, file_extension, lines, Error,
				4634	extra_check_functions)
				4635	if carriage_return_found and os.linesep != '\r\n':
				4636	# Use 0 for linenum since outputting only one error for potentially
				4637	# several lines.
				4638	Error(filename, 0, 'whitespace/newline', 1,
				4639	'One or more unexpected \\r (^M) found;'
				4640	'better to use only a \\n')
				4641
				4642	sys.stderr.write('Done processing %s\n' % filename)
				4643
				4644
				4645	def PrintUsage(message):
				4646	"""Prints a brief usage string and exits, optionally with an error message.
				4647
				4648	Args:
				4649	message: The optional error message.
				4650	"""
				4651	sys.stderr.write(_USAGE)
				4652	if message:
				4653	sys.exit('\nFATAL ERROR: ' + message)
				4654	else:
				4655	sys.exit(1)
				4656
				4657
				4658	def PrintCategories():
				4659	"""Prints a list of all the error-categories used by error messages.
				4660
				4661	These are the categories used to filter messages via --filter.
				4662	"""
				4663	sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES))
				4664	sys.exit(0)
				4665
				4666
				4667	def ParseArguments(args):
				4668	"""Parses the command line arguments.
				4669
				4670	This may set the output format and verbosity level as side-effects.
				4671
				4672	Args:
				4673	args: The command line arguments:
				4674
				4675	Returns:
				4676	The list of filenames to lint.
				4677	"""
				4678	try:
				4679	(opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
				4680	'counting=',
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	4681	'filter=',
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	4682	'root=',
				4683	'linelength=',
				4684	'extensions='])
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4685	except getopt.GetoptError:
				4686	PrintUsage('Invalid arguments.')
				4687
				4688	verbosity = _VerboseLevel()
				4689	output_format = _OutputFormat()
				4690	filters = ''
				4691	counting_style = ''
				4692
				4693	for (opt, val) in opts:
				4694	if opt == '--help':
				4695	PrintUsage(None)
				4696	elif opt == '--output':
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	4697	if val not in ('emacs', 'vs7', 'eclipse'):
				4698	PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4699	output_format = val
				4700	elif opt == '--verbose':
				4701	verbosity = int(val)
				4702	elif opt == '--filter':
				4703	filters = val
				4704	if not filters:
				4705	PrintCategories()
				4706	elif opt == '--counting':
				4707	if val not in ('total', 'toplevel', 'detailed'):
				4708	PrintUsage('Valid counting options are total, toplevel, and detailed')
				4709	counting_style = val
James Zern	e125286	2013-05-03 12:54:50 -0700	[diff] [blame]	4710	elif opt == '--root':
				4711	global _root
				4712	_root = val
James Zern	1edc498	2014-01-17 20:18:48 -0800	[diff] [blame]	4713	elif opt == '--linelength':
				4714	global _line_length
				4715	try:
				4716	_line_length = int(val)
				4717	except ValueError:
				4718	PrintUsage('Line length must be digits.')
				4719	elif opt == '--extensions':
				4720	global _valid_extensions
				4721	try:
				4722	_valid_extensions = set(val.split(','))
				4723	except ValueError:
				4724	PrintUsage('Extensions must be comma seperated list.')
John Koleszar	6c776b2	2012-07-13 15:14:17 -0700	[diff] [blame]	4725
				4726	if not filenames:
				4727	PrintUsage('No files were specified.')
				4728
				4729	_SetOutputFormat(output_format)
				4730	_SetVerboseLevel(verbosity)
				4731	_SetFilters(filters)
				4732	_SetCountingStyle(counting_style)
				4733
				4734	return filenames
				4735
				4736
				4737	def main():
				4738	filenames = ParseArguments(sys.argv[1:])
				4739
				4740	# Change stderr to write with replacement characters so we don't die
				4741	# if we try to print something containing non-ASCII characters.
				4742	sys.stderr = codecs.StreamReaderWriter(sys.stderr,
				4743	codecs.getreader('utf8'),
				4744	codecs.getwriter('utf8'),
				4745	'replace')
				4746
				4747	_cpplint_state.ResetErrorCounts()
				4748	for filename in filenames:
				4749	ProcessFile(filename, _cpplint_state.verbose_level)
				4750	_cpplint_state.PrintErrorCounts()
				4751
				4752	sys.exit(_cpplint_state.error_count > 0)
				4753
				4754
				4755	if __name__ == '__main__':
				4756	main()