Analysis Software
Documentation for sPHENIX simulation software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ast.py
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file ast.py
1 #!/usr/bin/env python
2 #
3 # Copyright 2007 Neal Norwitz
4 # Portions Copyright 2007 Google Inc.
5 #
6 # Licensed under the Apache License, Version 2.0 (the "License");
7 # you may not use this file except in compliance with the License.
8 # You may obtain a copy of the License at
9 #
10 # http://www.apache.org/licenses/LICENSE-2.0
11 #
12 # Unless required by applicable law or agreed to in writing, software
13 # distributed under the License is distributed on an "AS IS" BASIS,
14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 # See the License for the specific language governing permissions and
16 # limitations under the License.
17 
18 """Generate an Abstract Syntax Tree (AST) for C++."""
19 
20 __author__ = 'nnorwitz@google.com (Neal Norwitz)'
21 
22 
23 # TODO:
24 # * Tokens should never be exported, need to convert to Nodes
25 # (return types, parameters, etc.)
26 # * Handle static class data for templatized classes
27 # * Handle casts (both C++ and C-style)
28 # * Handle conditions and loops (if/else, switch, for, while/do)
29 #
30 # TODO much, much later:
31 # * Handle #define
32 # * exceptions
33 
34 
35 try:
36  # Python 3.x
37  import builtins
38 except ImportError:
39  # Python 2.x
40  import __builtin__ as builtins
41 
42 import sys
43 import traceback
44 
45 from cpp import keywords
46 from cpp import tokenize
47 from cpp import utils
48 
49 
50 if not hasattr(builtins, 'reversed'):
51  # Support Python 2.3 and earlier.
52  def reversed(seq):
53  for i in range(len(seq)-1, -1, -1):
54  yield seq[i]
55 
56 if not hasattr(builtins, 'next'):
57  # Support Python 2.5 and earlier.
58  def next(obj):
59  return obj.next()
60 
61 
62 VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3)
63 
64 FUNCTION_NONE = 0x00
65 FUNCTION_CONST = 0x01
66 FUNCTION_VIRTUAL = 0x02
67 FUNCTION_PURE_VIRTUAL = 0x04
68 FUNCTION_CTOR = 0x08
69 FUNCTION_DTOR = 0x10
70 FUNCTION_ATTRIBUTE = 0x20
71 FUNCTION_UNKNOWN_ANNOTATION = 0x40
72 FUNCTION_THROW = 0x80
73 FUNCTION_OVERRIDE = 0x100
74 
75 """
76 These are currently unused. Should really handle these properly at some point.
77 
78 TYPE_MODIFIER_INLINE = 0x010000
79 TYPE_MODIFIER_EXTERN = 0x020000
80 TYPE_MODIFIER_STATIC = 0x040000
81 TYPE_MODIFIER_CONST = 0x080000
82 TYPE_MODIFIER_REGISTER = 0x100000
83 TYPE_MODIFIER_VOLATILE = 0x200000
84 TYPE_MODIFIER_MUTABLE = 0x400000
85 
86 TYPE_MODIFIER_MAP = {
87  'inline': TYPE_MODIFIER_INLINE,
88  'extern': TYPE_MODIFIER_EXTERN,
89  'static': TYPE_MODIFIER_STATIC,
90  'const': TYPE_MODIFIER_CONST,
91  'register': TYPE_MODIFIER_REGISTER,
92  'volatile': TYPE_MODIFIER_VOLATILE,
93  'mutable': TYPE_MODIFIER_MUTABLE,
94  }
95 """
96 
97 _INTERNAL_TOKEN = 'internal'
98 _NAMESPACE_POP = 'ns-pop'
99 
100 
101 # TODO(nnorwitz): use this as a singleton for templated_types, etc
102 # where we don't want to create a new empty dict each time. It is also const.
103 class _NullDict(object):
104  __contains__ = lambda self: False
105  keys = values = items = iterkeys = itervalues = iteritems = lambda self: ()
106 
107 
108 # TODO(nnorwitz): move AST nodes into a separate module.
109 class Node(object):
110  """Base AST node."""
111 
112  def __init__(self, start, end):
113  self.start = start
114  self.end = end
115 
116  def IsDeclaration(self):
117  """Returns bool if this node is a declaration."""
118  return False
119 
120  def IsDefinition(self):
121  """Returns bool if this node is a definition."""
122  return False
123 
124  def IsExportable(self):
125  """Returns bool if this node exportable from a header file."""
126  return False
127 
128  def Requires(self, node):
129  """Does this AST node require the definition of the node passed in?"""
130  return False
131 
132  def XXX__str__(self):
133  return self._StringHelper(self.__class__.__name__, '')
134 
135  def _StringHelper(self, name, suffix):
136  if not utils.DEBUG:
137  return '%s(%s)' % (name, suffix)
138  return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix)
139 
140  def __repr__(self):
141  return str(self)
142 
143 
144 class Define(Node):
145  def __init__(self, start, end, name, definition):
146  Node.__init__(self, start, end)
147  self.name = name
148  self.definition = definition
149 
150  def __str__(self):
151  value = '%s %s' % (self.name, self.definition)
152  return self._StringHelper(self.__class__.__name__, value)
153 
154 
155 class Include(Node):
156  def __init__(self, start, end, filename, system):
157  Node.__init__(self, start, end)
158  self.filename = filename
159  self.system = system
160 
161  def __str__(self):
162  fmt = '"%s"'
163  if self.system:
164  fmt = '<%s>'
165  return self._StringHelper(self.__class__.__name__, fmt % self.filename)
166 
167 
168 class Goto(Node):
169  def __init__(self, start, end, label):
170  Node.__init__(self, start, end)
171  self.label = label
172 
173  def __str__(self):
174  return self._StringHelper(self.__class__.__name__, str(self.label))
175 
176 
177 class Expr(Node):
178  def __init__(self, start, end, expr):
179  Node.__init__(self, start, end)
180  self.expr = expr
181 
182  def Requires(self, node):
183  # TODO(nnorwitz): impl.
184  return False
185 
186  def __str__(self):
187  return self._StringHelper(self.__class__.__name__, str(self.expr))
188 
189 
190 class Return(Expr):
191  pass
192 
193 
194 class Delete(Expr):
195  pass
196 
197 
198 class Friend(Expr):
199  def __init__(self, start, end, expr, namespace):
200  Expr.__init__(self, start, end, expr)
201  self.namespace = namespace[:]
202 
203 
204 class Using(Node):
205  def __init__(self, start, end, names):
206  Node.__init__(self, start, end)
207  self.names = names
208 
209  def __str__(self):
210  return self._StringHelper(self.__class__.__name__, str(self.names))
211 
212 
214  def __init__(self, start, end, name, parameter_type, default):
215  Node.__init__(self, start, end)
216  self.name = name
217  self.type = parameter_type
218  self.default = default
219 
220  def Requires(self, node):
221  # TODO(nnorwitz): handle namespaces, etc.
222  return self.type.name == node.name
223 
224  def __str__(self):
225  name = str(self.type)
226  suffix = '%s %s' % (name, self.name)
227  if self.default:
228  suffix += ' = ' + ''.join([d.name for d in self.default])
229  return self._StringHelper(self.__class__.__name__, suffix)
230 
231 
233  def __init__(self, start, end, name, namespace):
234  Node.__init__(self, start, end)
235  self.name = name
236  self.namespace = namespace[:]
237 
238  def FullName(self):
239  prefix = ''
240  if self.namespace and self.namespace[-1]:
241  prefix = '::'.join(self.namespace) + '::'
242  return prefix + self.name
243 
244  def _TypeStringHelper(self, suffix):
245  if self.namespace:
246  names = [n or '<anonymous>' for n in self.namespace]
247  suffix += ' in ' + '::'.join(names)
248  return self._StringHelper(self.__class__.__name__, suffix)
249 
250 
251 # TODO(nnorwitz): merge with Parameter in some way?
253  def __init__(self, start, end, name, var_type, initial_value, namespace):
254  _GenericDeclaration.__init__(self, start, end, name, namespace)
255  self.type = var_type
256  self.initial_value = initial_value
257 
258  def Requires(self, node):
259  # TODO(nnorwitz): handle namespaces, etc.
260  return self.type.name == node.name
261 
262  def ToString(self):
263  """Return a string that tries to reconstitute the variable decl."""
264  suffix = '%s %s' % (self.type, self.name)
265  if self.initial_value:
266  suffix += ' = ' + self.initial_value
267  return suffix
268 
269  def __str__(self):
270  return self._StringHelper(self.__class__.__name__, self.ToString())
271 
272 
274  def __init__(self, start, end, name, alias, namespace):
275  _GenericDeclaration.__init__(self, start, end, name, namespace)
276  self.alias = alias
277 
278  def IsDefinition(self):
279  return True
280 
281  def IsExportable(self):
282  return True
283 
284  def Requires(self, node):
285  # TODO(nnorwitz): handle namespaces, etc.
286  name = node.name
287  for token in self.alias:
288  if token is not None and name == token.name:
289  return True
290  return False
291 
292  def __str__(self):
293  suffix = '%s, %s' % (self.name, self.alias)
294  return self._TypeStringHelper(suffix)
295 
296 
298  def __init__(self, start, end, name, fields, namespace):
299  _GenericDeclaration.__init__(self, start, end, name, namespace)
300  self.fields = fields
301 
302  def IsDefinition(self):
303  return True
304 
305  def IsExportable(self):
306  return True
307 
308  def __str__(self):
309  suffix = '%s, {%s}' % (self.name, self.fields)
310  return self._TypeStringHelper(suffix)
311 
312 
314  pass
315 
316 
317 class Enum(_NestedType):
318  pass
319 
320 
322  def __init__(self, start, end, name, bases, templated_types, body, namespace):
323  _GenericDeclaration.__init__(self, start, end, name, namespace)
324  self.bases = bases
325  self.body = body
326  self.templated_types = templated_types
327 
328  def IsDeclaration(self):
329  return self.bases is None and self.body is None
330 
331  def IsDefinition(self):
332  return not self.IsDeclaration()
333 
334  def IsExportable(self):
335  return not self.IsDeclaration()
336 
337  def Requires(self, node):
338  # TODO(nnorwitz): handle namespaces, etc.
339  if self.bases:
340  for token_list in self.bases:
341  # TODO(nnorwitz): bases are tokens, do name comparision.
342  for token in token_list:
343  if token.name == node.name:
344  return True
345  # TODO(nnorwitz): search in body too.
346  return False
347 
348  def __str__(self):
349  name = self.name
350  if self.templated_types:
351  name += '<%s>' % self.templated_types
352  suffix = '%s, %s, %s' % (name, self.bases, self.body)
353  return self._TypeStringHelper(suffix)
354 
355 
356 class Struct(Class):
357  pass
358 
359 
360 class Function(_GenericDeclaration):
361  def __init__(self, start, end, name, return_type, parameters,
362  modifiers, templated_types, body, namespace):
363  _GenericDeclaration.__init__(self, start, end, name, namespace)
364  converter = TypeConverter(namespace)
365  self.return_type = converter.CreateReturnType(return_type)
366  self.parameters = converter.ToParameters(parameters)
367  self.modifiers = modifiers
368  self.body = body
369  self.templated_types = templated_types
370 
371  def IsDeclaration(self):
372  return self.body is None
373 
374  def IsDefinition(self):
375  return self.body is not None
376 
377  def IsExportable(self):
378  if self.return_type and 'static' in self.return_type.modifiers:
379  return False
380  return None not in self.namespace
381 
382  def Requires(self, node):
383  if self.parameters:
384  # TODO(nnorwitz): parameters are tokens, do name comparision.
385  for p in self.parameters:
386  if p.name == node.name:
387  return True
388  # TODO(nnorwitz): search in body too.
389  return False
390 
391  def __str__(self):
392  # TODO(nnorwitz): add templated_types.
393  suffix = ('%s %s(%s), 0x%02x, %s' %
394  (self.return_type, self.name, self.parameters,
395  self.modifiers, self.body))
396  return self._TypeStringHelper(suffix)
397 
398 
400  def __init__(self, start, end, name, in_class, return_type, parameters,
401  modifiers, templated_types, body, namespace):
402  Function.__init__(self, start, end, name, return_type, parameters,
403  modifiers, templated_types, body, namespace)
404  # TODO(nnorwitz): in_class could also be a namespace which can
405  # mess up finding functions properly.
406  self.in_class = in_class
407 
408 
410  """Type used for any variable (eg class, primitive, struct, etc)."""
411 
412  def __init__(self, start, end, name, templated_types, modifiers,
413  reference, pointer, array):
414  """
415  Args:
416  name: str name of main type
417  templated_types: [Class (Type?)] template type info between <>
418  modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
419  reference, pointer, array: bools
420  """
421  _GenericDeclaration.__init__(self, start, end, name, [])
422  self.templated_types = templated_types
423  if not name and modifiers:
424  self.name = modifiers.pop()
425  self.modifiers = modifiers
426  self.reference = reference
427  self.pointer = pointer
428  self.array = array
429 
430  def __str__(self):
431  prefix = ''
432  if self.modifiers:
433  prefix = ' '.join(self.modifiers) + ' '
434  name = str(self.name)
435  if self.templated_types:
436  name += '<%s>' % self.templated_types
437  suffix = prefix + name
438  if self.reference:
439  suffix += '&'
440  if self.pointer:
441  suffix += '*'
442  if self.array:
443  suffix += '[]'
444  return self._TypeStringHelper(suffix)
445 
446  # By definition, Is* are always False. A Type can only exist in
447  # some sort of variable declaration, parameter, or return value.
448  def IsDeclaration(self):
449  return False
450 
451  def IsDefinition(self):
452  return False
453 
454  def IsExportable(self):
455  return False
456 
457 
458 class TypeConverter(object):
459 
460  def __init__(self, namespace_stack):
461  self.namespace_stack = namespace_stack
462 
463  def _GetTemplateEnd(self, tokens, start):
464  count = 1
465  end = start
466  while 1:
467  token = tokens[end]
468  end += 1
469  if token.name == '<':
470  count += 1
471  elif token.name == '>':
472  count -= 1
473  if count == 0:
474  break
475  return tokens[start:end-1], end
476 
477  def ToType(self, tokens):
478  """Convert [Token,...] to [Class(...), ] useful for base classes.
479  For example, code like class Foo : public Bar<x, y> { ... };
480  the "Bar<x, y>" portion gets converted to an AST.
481 
482  Returns:
483  [Class(...), ...]
484  """
485  result = []
486  name_tokens = []
487  reference = pointer = array = False
488 
489  def AddType(templated_types):
490  # Partition tokens into name and modifier tokens.
491  names = []
492  modifiers = []
493  for t in name_tokens:
494  if keywords.IsKeyword(t.name):
495  modifiers.append(t.name)
496  else:
497  names.append(t.name)
498  name = ''.join(names)
499  if name_tokens:
500  result.append(Type(name_tokens[0].start, name_tokens[-1].end,
501  name, templated_types, modifiers,
502  reference, pointer, array))
503  del name_tokens[:]
504 
505  i = 0
506  end = len(tokens)
507  while i < end:
508  token = tokens[i]
509  if token.name == '<':
510  new_tokens, new_end = self._GetTemplateEnd(tokens, i+1)
511  AddType(self.ToType(new_tokens))
512  # If there is a comma after the template, we need to consume
513  # that here otherwise it becomes part of the name.
514  i = new_end
515  reference = pointer = array = False
516  elif token.name == ',':
517  AddType([])
518  reference = pointer = array = False
519  elif token.name == '*':
520  pointer = True
521  elif token.name == '&':
522  reference = True
523  elif token.name == '[':
524  pointer = True
525  elif token.name == ']':
526  pass
527  else:
528  name_tokens.append(token)
529  i += 1
530 
531  if name_tokens:
532  # No '<' in the tokens, just a simple name and no template.
533  AddType([])
534  return result
535 
536  def DeclarationToParts(self, parts, needs_name_removed):
537  name = None
538  default = []
539  if needs_name_removed:
540  # Handle default (initial) values properly.
541  for i, t in enumerate(parts):
542  if t.name == '=':
543  default = parts[i+1:]
544  name = parts[i-1].name
545  if name == ']' and parts[i-2].name == '[':
546  name = parts[i-3].name
547  i -= 1
548  parts = parts[:i-1]
549  break
550  else:
551  if parts[-1].token_type == tokenize.NAME:
552  name = parts.pop().name
553  else:
554  # TODO(nnorwitz): this is a hack that happens for code like
555  # Register(Foo<T>); where it thinks this is a function call
556  # but it's actually a declaration.
557  name = '???'
558  modifiers = []
559  type_name = []
560  other_tokens = []
561  templated_types = []
562  i = 0
563  end = len(parts)
564  while i < end:
565  p = parts[i]
566  if keywords.IsKeyword(p.name):
567  modifiers.append(p.name)
568  elif p.name == '<':
569  templated_tokens, new_end = self._GetTemplateEnd(parts, i+1)
570  templated_types = self.ToType(templated_tokens)
571  i = new_end - 1
572  # Don't add a spurious :: to data members being initialized.
573  next_index = i + 1
574  if next_index < end and parts[next_index].name == '::':
575  i += 1
576  elif p.name in ('[', ']', '='):
577  # These are handled elsewhere.
578  other_tokens.append(p)
579  elif p.name not in ('*', '&', '>'):
580  # Ensure that names have a space between them.
581  if (type_name and type_name[-1].token_type == tokenize.NAME and
582  p.token_type == tokenize.NAME):
583  type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0))
584  type_name.append(p)
585  else:
586  other_tokens.append(p)
587  i += 1
588  type_name = ''.join([t.name for t in type_name])
589  return name, type_name, templated_types, modifiers, default, other_tokens
590 
591  def ToParameters(self, tokens):
592  if not tokens:
593  return []
594 
595  result = []
596  name = type_name = ''
597  type_modifiers = []
598  pointer = reference = array = False
599  first_token = None
600  default = []
601 
602  def AddParameter(end):
603  if default:
604  del default[0] # Remove flag.
605  parts = self.DeclarationToParts(type_modifiers, True)
606  (name, type_name, templated_types, modifiers,
607  unused_default, unused_other_tokens) = parts
608  parameter_type = Type(first_token.start, first_token.end,
609  type_name, templated_types, modifiers,
610  reference, pointer, array)
611  p = Parameter(first_token.start, end, name,
612  parameter_type, default)
613  result.append(p)
614 
615  template_count = 0
616  for s in tokens:
617  if not first_token:
618  first_token = s
619  if s.name == '<':
620  template_count += 1
621  elif s.name == '>':
622  template_count -= 1
623  if template_count > 0:
624  type_modifiers.append(s)
625  continue
626 
627  if s.name == ',':
628  AddParameter(s.start)
629  name = type_name = ''
630  type_modifiers = []
631  pointer = reference = array = False
632  first_token = None
633  default = []
634  elif s.name == '*':
635  pointer = True
636  elif s.name == '&':
637  reference = True
638  elif s.name == '[':
639  array = True
640  elif s.name == ']':
641  pass # Just don't add to type_modifiers.
642  elif s.name == '=':
643  # Got a default value. Add any value (None) as a flag.
644  default.append(None)
645  elif default:
646  default.append(s)
647  else:
648  type_modifiers.append(s)
649  AddParameter(tokens[-1].end)
650  return result
651 
652  def CreateReturnType(self, return_type_seq):
653  if not return_type_seq:
654  return None
655  start = return_type_seq[0].start
656  end = return_type_seq[-1].end
657  _, name, templated_types, modifiers, default, other_tokens = \
658  self.DeclarationToParts(return_type_seq, False)
659  names = [n.name for n in other_tokens]
660  reference = '&' in names
661  pointer = '*' in names
662  array = '[' in names
663  return Type(start, end, name, templated_types, modifiers,
664  reference, pointer, array)
665 
666  def GetTemplateIndices(self, names):
667  # names is a list of strings.
668  start = names.index('<')
669  end = len(names) - 1
670  while end > 0:
671  if names[end] == '>':
672  break
673  end -= 1
674  return start, end+1
675 
676 class AstBuilder(object):
677  def __init__(self, token_stream, filename, in_class='', visibility=None,
678  namespace_stack=[]):
679  self.tokens = token_stream
680  self.filename = filename
681  # TODO(nnorwitz): use a better data structure (deque) for the queue.
682  # Switching directions of the "queue" improved perf by about 25%.
683  # Using a deque should be even better since we access from both sides.
684  self.token_queue = []
685  self.namespace_stack = namespace_stack[:]
686  self.in_class = in_class
687  if in_class is None:
688  self.in_class_name_only = None
689  else:
690  self.in_class_name_only = in_class.split('::')[-1]
691  self.visibility = visibility
692  self.in_function = False
693  self.current_token = None
694  # Keep the state whether we are currently handling a typedef or not.
695  self._handling_typedef = False
696 
698 
699  def HandleError(self, msg, token):
700  printable_queue = list(reversed(self.token_queue[-20:]))
701  sys.stderr.write('Got %s in %s @ %s %s\n' %
702  (msg, self.filename, token, printable_queue))
703 
704  def Generate(self):
705  while 1:
706  token = self._GetNextToken()
707  if not token:
708  break
709 
710  # Get the next token.
711  self.current_token = token
712 
713  # Dispatch on the next token type.
714  if token.token_type == _INTERNAL_TOKEN:
715  if token.name == _NAMESPACE_POP:
716  self.namespace_stack.pop()
717  continue
718 
719  try:
720  result = self._GenerateOne(token)
721  if result is not None:
722  yield result
723  except:
724  self.HandleError('exception', token)
725  raise
726 
727  def _CreateVariable(self, pos_token, name, type_name, type_modifiers,
728  ref_pointer_name_seq, templated_types, value=None):
729  reference = '&' in ref_pointer_name_seq
730  pointer = '*' in ref_pointer_name_seq
731  array = '[' in ref_pointer_name_seq
732  var_type = Type(pos_token.start, pos_token.end, type_name,
733  templated_types, type_modifiers,
734  reference, pointer, array)
735  return VariableDeclaration(pos_token.start, pos_token.end,
736  name, var_type, value, self.namespace_stack)
737 
738  def _GenerateOne(self, token):
739  if token.token_type == tokenize.NAME:
740  if (keywords.IsKeyword(token.name) and
741  not keywords.IsBuiltinType(token.name)):
742  method = getattr(self, 'handle_' + token.name)
743  return method()
744  elif token.name == self.in_class_name_only:
745  # The token name is the same as the class, must be a ctor if
746  # there is a paren. Otherwise, it's the return type.
747  # Peek ahead to get the next token to figure out which.
748  next = self._GetNextToken()
749  self._AddBackToken(next)
750  if next.token_type == tokenize.SYNTAX and next.name == '(':
751  return self._GetMethod([token], FUNCTION_CTOR, None, True)
752  # Fall through--handle like any other method.
753 
754  # Handle data or function declaration/definition.
755  syntax = tokenize.SYNTAX
756  temp_tokens, last_token = \
757  self._GetVarTokensUpTo(syntax, '(', ';', '{', '[')
758  temp_tokens.insert(0, token)
759  if last_token.name == '(':
760  # If there is an assignment before the paren,
761  # this is an expression, not a method.
762  expr = bool([e for e in temp_tokens if e.name == '='])
763  if expr:
764  new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';')
765  temp_tokens.append(last_token)
766  temp_tokens.extend(new_temp)
767  last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0)
768 
769  if last_token.name == '[':
770  # Handle array, this isn't a method, unless it's an operator.
771  # TODO(nnorwitz): keep the size somewhere.
772  # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
773  temp_tokens.append(last_token)
774  if temp_tokens[-2].name == 'operator':
775  temp_tokens.append(self._GetNextToken())
776  else:
777  temp_tokens2, last_token = \
778  self._GetVarTokensUpTo(tokenize.SYNTAX, ';')
779  temp_tokens.extend(temp_tokens2)
780 
781  if last_token.name == ';':
782  # Handle data, this isn't a method.
783  parts = self.converter.DeclarationToParts(temp_tokens, True)
784  (name, type_name, templated_types, modifiers, default,
785  unused_other_tokens) = parts
786 
787  t0 = temp_tokens[0]
788  names = [t.name for t in temp_tokens]
789  if templated_types:
790  start, end = self.converter.GetTemplateIndices(names)
791  names = names[:start] + names[end:]
792  default = ''.join([t.name for t in default])
793  return self._CreateVariable(t0, name, type_name, modifiers,
794  names, templated_types, default)
795  if last_token.name == '{':
796  self._AddBackTokens(temp_tokens[1:])
797  self._AddBackToken(last_token)
798  method_name = temp_tokens[0].name
799  method = getattr(self, 'handle_' + method_name, None)
800  if not method:
801  # Must be declaring a variable.
802  # TODO(nnorwitz): handle the declaration.
803  return None
804  return method()
805  return self._GetMethod(temp_tokens, 0, None, False)
806  elif token.token_type == tokenize.SYNTAX:
807  if token.name == '~' and self.in_class:
808  # Must be a dtor (probably not in method body).
809  token = self._GetNextToken()
810  # self.in_class can contain A::Name, but the dtor will only
811  # be Name. Make sure to compare against the right value.
812  if (token.token_type == tokenize.NAME and
813  token.name == self.in_class_name_only):
814  return self._GetMethod([token], FUNCTION_DTOR, None, True)
815  # TODO(nnorwitz): handle a lot more syntax.
816  elif token.token_type == tokenize.PREPROCESSOR:
817  # TODO(nnorwitz): handle more preprocessor directives.
818  # token starts with a #, so remove it and strip whitespace.
819  name = token.name[1:].lstrip()
820  if name.startswith('include'):
821  # Remove "include".
822  name = name[7:].strip()
823  assert name
824  # Handle #include <newline> "header-on-second-line.h".
825  if name.startswith('\\'):
826  name = name[1:].strip()
827  assert name[0] in '<"', token
828  assert name[-1] in '>"', token
829  system = name[0] == '<'
830  filename = name[1:-1]
831  return Include(token.start, token.end, filename, system)
832  if name.startswith('define'):
833  # Remove "define".
834  name = name[6:].strip()
835  assert name
836  value = ''
837  for i, c in enumerate(name):
838  if c.isspace():
839  value = name[i:].lstrip()
840  name = name[:i]
841  break
842  return Define(token.start, token.end, name, value)
843  if name.startswith('if') and name[2:3].isspace():
844  condition = name[3:].strip()
845  if condition.startswith('0') or condition.startswith('(0)'):
846  self._SkipIf0Blocks()
847  return None
848 
849  def _GetTokensUpTo(self, expected_token_type, expected_token):
850  return self._GetVarTokensUpTo(expected_token_type, expected_token)[0]
851 
852  def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens):
853  last_token = self._GetNextToken()
854  tokens = []
855  while (last_token.token_type != expected_token_type or
856  last_token.name not in expected_tokens):
857  tokens.append(last_token)
858  last_token = self._GetNextToken()
859  return tokens, last_token
860 
861  # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necesary.
862  def _IgnoreUpTo(self, token_type, token):
863  unused_tokens = self._GetTokensUpTo(token_type, token)
864 
865  def _SkipIf0Blocks(self):
866  count = 1
867  while 1:
868  token = self._GetNextToken()
869  if token.token_type != tokenize.PREPROCESSOR:
870  continue
871 
872  name = token.name[1:].lstrip()
873  if name.startswith('endif'):
874  count -= 1
875  if count == 0:
876  break
877  elif name.startswith('if'):
878  count += 1
879 
880  def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None):
881  if GetNextToken is None:
882  GetNextToken = self._GetNextToken
883  # Assumes the current token is open_paren and we will consume
884  # and return up to the close_paren.
885  count = 1
886  token = GetNextToken()
887  while 1:
888  if token.token_type == tokenize.SYNTAX:
889  if token.name == open_paren:
890  count += 1
891  elif token.name == close_paren:
892  count -= 1
893  if count == 0:
894  break
895  yield token
896  token = GetNextToken()
897  yield token
898 
899  def _GetParameters(self):
900  return self._GetMatchingChar('(', ')')
901 
902  def GetScope(self):
903  return self._GetMatchingChar('{', '}')
904 
905  def _GetNextToken(self):
906  if self.token_queue:
907  return self.token_queue.pop()
908  return next(self.tokens)
909 
910  def _AddBackToken(self, token):
911  if token.whence == tokenize.WHENCE_STREAM:
912  token.whence = tokenize.WHENCE_QUEUE
913  self.token_queue.insert(0, token)
914  else:
915  assert token.whence == tokenize.WHENCE_QUEUE, token
916  self.token_queue.append(token)
917 
918  def _AddBackTokens(self, tokens):
919  if tokens:
920  if tokens[-1].whence == tokenize.WHENCE_STREAM:
921  for token in tokens:
922  token.whence = tokenize.WHENCE_QUEUE
923  self.token_queue[:0] = reversed(tokens)
924  else:
925  assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens
926  self.token_queue.extend(reversed(tokens))
927 
928  def GetName(self, seq=None):
929  """Returns ([tokens], next_token_info)."""
930  GetNextToken = self._GetNextToken
931  if seq is not None:
932  it = iter(seq)
933  GetNextToken = lambda: next(it)
934  next_token = GetNextToken()
935  tokens = []
936  last_token_was_name = False
937  while (next_token.token_type == tokenize.NAME or
938  (next_token.token_type == tokenize.SYNTAX and
939  next_token.name in ('::', '<'))):
940  # Two NAMEs in a row means the identifier should terminate.
941  # It's probably some sort of variable declaration.
942  if last_token_was_name and next_token.token_type == tokenize.NAME:
943  break
944  last_token_was_name = next_token.token_type == tokenize.NAME
945  tokens.append(next_token)
946  # Handle templated names.
947  if next_token.name == '<':
948  tokens.extend(self._GetMatchingChar('<', '>', GetNextToken))
949  last_token_was_name = True
950  next_token = GetNextToken()
951  return tokens, next_token
952 
953  def GetMethod(self, modifiers, templated_types):
954  return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
955  assert len(return_type_and_name) >= 1
956  return self._GetMethod(return_type_and_name, modifiers, templated_types,
957  False)
958 
959  def _GetMethod(self, return_type_and_name, modifiers, templated_types,
960  get_paren):
961  template_portion = None
962  if get_paren:
963  token = self._GetNextToken()
964  assert token.token_type == tokenize.SYNTAX, token
965  if token.name == '<':
966  # Handle templatized dtors.
967  template_portion = [token]
968  template_portion.extend(self._GetMatchingChar('<', '>'))
969  token = self._GetNextToken()
970  assert token.token_type == tokenize.SYNTAX, token
971  assert token.name == '(', token
972 
973  name = return_type_and_name.pop()
974  # Handle templatized ctors.
975  if name.name == '>':
976  index = 1
977  while return_type_and_name[index].name != '<':
978  index += 1
979  template_portion = return_type_and_name[index:] + [name]
980  del return_type_and_name[index:]
981  name = return_type_and_name.pop()
982  elif name.name == ']':
983  rt = return_type_and_name
984  assert rt[-1].name == '[', return_type_and_name
985  assert rt[-2].name == 'operator', return_type_and_name
986  name_seq = return_type_and_name[-2:]
987  del return_type_and_name[-2:]
988  name = tokenize.Token(tokenize.NAME, 'operator[]',
989  name_seq[0].start, name.end)
990  # Get the open paren so _GetParameters() below works.
991  unused_open_paren = self._GetNextToken()
992 
993  # TODO(nnorwitz): store template_portion.
994  return_type = return_type_and_name
995  indices = name
996  if return_type:
997  indices = return_type[0]
998 
999  # Force ctor for templatized ctors.
1000  if name.name == self.in_class and not modifiers:
1001  modifiers |= FUNCTION_CTOR
1002  parameters = list(self._GetParameters())
1003  del parameters[-1] # Remove trailing ')'.
1004 
1005  # Handling operator() is especially weird.
1006  if name.name == 'operator' and not parameters:
1007  token = self._GetNextToken()
1008  assert token.name == '(', token
1009  parameters = list(self._GetParameters())
1010  del parameters[-1] # Remove trailing ')'.
1011 
1012  token = self._GetNextToken()
1013  while token.token_type == tokenize.NAME:
1014  modifier_token = token
1015  token = self._GetNextToken()
1016  if modifier_token.name == 'const':
1017  modifiers |= FUNCTION_CONST
1018  elif modifier_token.name == '__attribute__':
1019  # TODO(nnorwitz): handle more __attribute__ details.
1020  modifiers |= FUNCTION_ATTRIBUTE
1021  assert token.name == '(', token
1022  # Consume everything between the (parens).
1023  unused_tokens = list(self._GetMatchingChar('(', ')'))
1024  token = self._GetNextToken()
1025  elif modifier_token.name == 'throw':
1026  modifiers |= FUNCTION_THROW
1027  assert token.name == '(', token
1028  # Consume everything between the (parens).
1029  unused_tokens = list(self._GetMatchingChar('(', ')'))
1030  token = self._GetNextToken()
1031  elif modifier_token.name == 'override':
1032  modifiers |= FUNCTION_OVERRIDE
1033  elif modifier_token.name == modifier_token.name.upper():
1034  # HACK(nnorwitz): assume that all upper-case names
1035  # are some macro we aren't expanding.
1036  modifiers |= FUNCTION_UNKNOWN_ANNOTATION
1037  else:
1038  self.HandleError('unexpected token', modifier_token)
1039 
1040  assert token.token_type == tokenize.SYNTAX, token
1041  # Handle ctor initializers.
1042  if token.name == ':':
1043  # TODO(nnorwitz): anything else to handle for initializer list?
1044  while token.name != ';' and token.name != '{':
1045  token = self._GetNextToken()
1046 
1047  # Handle pointer to functions that are really data but look
1048  # like method declarations.
1049  if token.name == '(':
1050  if parameters[0].name == '*':
1051  # name contains the return type.
1052  name = parameters.pop()
1053  # parameters contains the name of the data.
1054  modifiers = [p.name for p in parameters]
1055  # Already at the ( to open the parameter list.
1056  function_parameters = list(self._GetMatchingChar('(', ')'))
1057  del function_parameters[-1] # Remove trailing ')'.
1058  # TODO(nnorwitz): store the function_parameters.
1059  token = self._GetNextToken()
1060  assert token.token_type == tokenize.SYNTAX, token
1061  assert token.name == ';', token
1062  return self._CreateVariable(indices, name.name, indices.name,
1063  modifiers, '', None)
1064  # At this point, we got something like:
1065  # return_type (type::*name_)(params);
1066  # This is a data member called name_ that is a function pointer.
1067  # With this code: void (sq_type::*field_)(string&);
1068  # We get: name=void return_type=[] parameters=sq_type ... field_
1069  # TODO(nnorwitz): is return_type always empty?
1070  # TODO(nnorwitz): this isn't even close to being correct.
1071  # Just put in something so we don't crash and can move on.
1072  real_name = parameters[-1]
1073  modifiers = [p.name for p in self._GetParameters()]
1074  del modifiers[-1] # Remove trailing ')'.
1075  return self._CreateVariable(indices, real_name.name, indices.name,
1076  modifiers, '', None)
1077 
1078  if token.name == '{':
1079  body = list(self.GetScope())
1080  del body[-1] # Remove trailing '}'.
1081  else:
1082  body = None
1083  if token.name == '=':
1084  token = self._GetNextToken()
1085 
1086  if token.name == 'default' or token.name == 'delete':
1087  # Ignore explicitly defaulted and deleted special members
1088  # in C++11.
1089  token = self._GetNextToken()
1090  else:
1091  # Handle pure-virtual declarations.
1092  assert token.token_type == tokenize.CONSTANT, token
1093  assert token.name == '0', token
1094  modifiers |= FUNCTION_PURE_VIRTUAL
1095  token = self._GetNextToken()
1096 
1097  if token.name == '[':
1098  # TODO(nnorwitz): store tokens and improve parsing.
1099  # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
1100  tokens = list(self._GetMatchingChar('[', ']'))
1101  token = self._GetNextToken()
1102 
1103  assert token.name == ';', (token, return_type_and_name, parameters)
1104 
1105  # Looks like we got a method, not a function.
1106  if len(return_type) > 2 and return_type[-1].name == '::':
1107  return_type, in_class = \
1108  self._GetReturnTypeAndClassName(return_type)
1109  return Method(indices.start, indices.end, name.name, in_class,
1110  return_type, parameters, modifiers, templated_types,
1111  body, self.namespace_stack)
1112  return Function(indices.start, indices.end, name.name, return_type,
1113  parameters, modifiers, templated_types, body,
1114  self.namespace_stack)
1115 
1116  def _GetReturnTypeAndClassName(self, token_seq):
1117  # Splitting the return type from the class name in a method
1118  # can be tricky. For example, Return::Type::Is::Hard::To::Find().
1119  # Where is the return type and where is the class name?
1120  # The heuristic used is to pull the last name as the class name.
1121  # This includes all the templated type info.
1122  # TODO(nnorwitz): if there is only One name like in the
1123  # example above, punt and assume the last bit is the class name.
1124 
1125  # Ignore a :: prefix, if exists so we can find the first real name.
1126  i = 0
1127  if token_seq[0].name == '::':
1128  i = 1
1129  # Ignore a :: suffix, if exists.
1130  end = len(token_seq) - 1
1131  if token_seq[end-1].name == '::':
1132  end -= 1
1133 
1134  # Make a copy of the sequence so we can append a sentinel
1135  # value. This is required for GetName will has to have some
1136  # terminating condition beyond the last name.
1137  seq_copy = token_seq[i:end]
1138  seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0))
1139  names = []
1140  while i < end:
1141  # Iterate through the sequence parsing out each name.
1142  new_name, next = self.GetName(seq_copy[i:])
1143  assert new_name, 'Got empty new_name, next=%s' % next
1144  # We got a pointer or ref. Add it to the name.
1145  if next and next.token_type == tokenize.SYNTAX:
1146  new_name.append(next)
1147  names.append(new_name)
1148  i += len(new_name)
1149 
1150  # Now that we have the names, it's time to undo what we did.
1151 
1152  # Remove the sentinel value.
1153  names[-1].pop()
1154  # Flatten the token sequence for the return type.
1155  return_type = [e for seq in names[:-1] for e in seq]
1156  # The class name is the last name.
1157  class_name = names[-1]
1158  return return_type, class_name
1159 
1160  def handle_bool(self):
1161  pass
1162 
1163  def handle_char(self):
1164  pass
1165 
1166  def handle_int(self):
1167  pass
1168 
1169  def handle_long(self):
1170  pass
1171 
1172  def handle_short(self):
1173  pass
1174 
1175  def handle_double(self):
1176  pass
1177 
1178  def handle_float(self):
1179  pass
1180 
1181  def handle_void(self):
1182  pass
1183 
1184  def handle_wchar_t(self):
1185  pass
1186 
1187  def handle_unsigned(self):
1188  pass
1189 
1190  def handle_signed(self):
1191  pass
1192 
1193  def _GetNestedType(self, ctor):
1194  name = None
1195  name_tokens, token = self.GetName()
1196  if name_tokens:
1197  name = ''.join([t.name for t in name_tokens])
1198 
1199  # Handle forward declarations.
1200  if token.token_type == tokenize.SYNTAX and token.name == ';':
1201  return ctor(token.start, token.end, name, None,
1202  self.namespace_stack)
1203 
1204  if token.token_type == tokenize.NAME and self._handling_typedef:
1205  self._AddBackToken(token)
1206  return ctor(token.start, token.end, name, None,
1207  self.namespace_stack)
1208 
1209  # Must be the type declaration.
1210  fields = list(self._GetMatchingChar('{', '}'))
1211  del fields[-1] # Remove trailing '}'.
1212  if token.token_type == tokenize.SYNTAX and token.name == '{':
1213  next = self._GetNextToken()
1214  new_type = ctor(token.start, token.end, name, fields,
1215  self.namespace_stack)
1216  # A name means this is an anonymous type and the name
1217  # is the variable declaration.
1218  if next.token_type != tokenize.NAME:
1219  return new_type
1220  name = new_type
1221  token = next
1222 
1223  # Must be variable declaration using the type prefixed with keyword.
1224  assert token.token_type == tokenize.NAME, token
1225  return self._CreateVariable(token, token.name, name, [], '', None)
1226 
1227  def handle_struct(self):
1228  # Special case the handling typedef/aliasing of structs here.
1229  # It would be a pain to handle in the class code.
1230  name_tokens, var_token = self.GetName()
1231  if name_tokens:
1232  next_token = self._GetNextToken()
1233  is_syntax = (var_token.token_type == tokenize.SYNTAX and
1234  var_token.name[0] in '*&')
1235  is_variable = (var_token.token_type == tokenize.NAME and
1236  next_token.name == ';')
1237  variable = var_token
1238  if is_syntax and not is_variable:
1239  variable = next_token
1240  temp = self._GetNextToken()
1241  if temp.token_type == tokenize.SYNTAX and temp.name == '(':
1242  # Handle methods declared to return a struct.
1243  t0 = name_tokens[0]
1244  struct = tokenize.Token(tokenize.NAME, 'struct',
1245  t0.start-7, t0.start-2)
1246  type_and_name = [struct]
1247  type_and_name.extend(name_tokens)
1248  type_and_name.extend((var_token, next_token))
1249  return self._GetMethod(type_and_name, 0, None, False)
1250  assert temp.name == ';', (temp, name_tokens, var_token)
1251  if is_syntax or (is_variable and not self._handling_typedef):
1252  modifiers = ['struct']
1253  type_name = ''.join([t.name for t in name_tokens])
1254  position = name_tokens[0]
1255  return self._CreateVariable(position, variable.name, type_name,
1256  modifiers, var_token.name, None)
1257  name_tokens.extend((var_token, next_token))
1258  self._AddBackTokens(name_tokens)
1259  else:
1260  self._AddBackToken(var_token)
1261  return self._GetClass(Struct, VISIBILITY_PUBLIC, None)
1262 
1263  def handle_union(self):
1264  return self._GetNestedType(Union)
1265 
1266  def handle_enum(self):
1267  return self._GetNestedType(Enum)
1268 
1269  def handle_auto(self):
1270  # TODO(nnorwitz): warn about using auto? Probably not since it
1271  # will be reclaimed and useful for C++0x.
1272  pass
1273 
1274  def handle_register(self):
1275  pass
1276 
1277  def handle_const(self):
1278  pass
1279 
1280  def handle_inline(self):
1281  pass
1282 
1283  def handle_extern(self):
1284  pass
1285 
1286  def handle_static(self):
1287  pass
1288 
1289  def handle_virtual(self):
1290  # What follows must be a method.
1291  token = token2 = self._GetNextToken()
1292  if token.name == 'inline':
1293  # HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
1294  token2 = self._GetNextToken()
1295  if token2.token_type == tokenize.SYNTAX and token2.name == '~':
1296  return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None)
1297  assert token.token_type == tokenize.NAME or token.name == '::', token
1298  return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(') # )
1299  return_type_and_name.insert(0, token)
1300  if token2 is not token:
1301  return_type_and_name.insert(1, token2)
1302  return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL,
1303  None, False)
1304 
1305  def handle_volatile(self):
1306  pass
1307 
1308  def handle_mutable(self):
1309  pass
1310 
1311  def handle_public(self):
1312  assert self.in_class
1313  self.visibility = VISIBILITY_PUBLIC
1314 
1315  def handle_protected(self):
1316  assert self.in_class
1317  self.visibility = VISIBILITY_PROTECTED
1318 
1319  def handle_private(self):
1320  assert self.in_class
1321  self.visibility = VISIBILITY_PRIVATE
1322 
1323  def handle_friend(self):
1324  tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1325  assert tokens
1326  t0 = tokens[0]
1327  return Friend(t0.start, t0.end, tokens, self.namespace_stack)
1328 
1330  pass
1331 
1333  pass
1334 
1336  pass
1337 
1339  pass
1340 
1341  def handle_new(self):
1342  pass
1343 
1344  def handle_delete(self):
1345  tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1346  assert tokens
1347  return Delete(tokens[0].start, tokens[0].end, tokens)
1348 
1349  def handle_typedef(self):
1350  token = self._GetNextToken()
1351  if (token.token_type == tokenize.NAME and
1352  keywords.IsKeyword(token.name)):
1353  # Token must be struct/enum/union/class.
1354  method = getattr(self, 'handle_' + token.name)
1355  self._handling_typedef = True
1356  tokens = [method()]
1357  self._handling_typedef = False
1358  else:
1359  tokens = [token]
1360 
1361  # Get the remainder of the typedef up to the semi-colon.
1362  tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';'))
1363 
1364  # TODO(nnorwitz): clean all this up.
1365  assert tokens
1366  name = tokens.pop()
1367  indices = name
1368  if tokens:
1369  indices = tokens[0]
1370  if not indices:
1371  indices = token
1372  if name.name == ')':
1373  # HACK(nnorwitz): Handle pointers to functions "properly".
1374  if (len(tokens) >= 4 and
1375  tokens[1].name == '(' and tokens[2].name == '*'):
1376  tokens.append(name)
1377  name = tokens[3]
1378  elif name.name == ']':
1379  # HACK(nnorwitz): Handle arrays properly.
1380  if len(tokens) >= 2:
1381  tokens.append(name)
1382  name = tokens[1]
1383  new_type = tokens
1384  if tokens and isinstance(tokens[0], tokenize.Token):
1385  new_type = self.converter.ToType(tokens)[0]
1386  return Typedef(indices.start, indices.end, name.name,
1387  new_type, self.namespace_stack)
1388 
1389  def handle_typeid(self):
1390  pass # Not needed yet.
1391 
1392  def handle_typename(self):
1393  pass # Not needed yet.
1394 
1396  result = {}
1397  tokens = list(self._GetMatchingChar('<', '>'))
1398  len_tokens = len(tokens) - 1 # Ignore trailing '>'.
1399  i = 0
1400  while i < len_tokens:
1401  key = tokens[i].name
1402  i += 1
1403  if keywords.IsKeyword(key) or key == ',':
1404  continue
1405  type_name = default = None
1406  if i < len_tokens:
1407  i += 1
1408  if tokens[i-1].name == '=':
1409  assert i < len_tokens, '%s %s' % (i, tokens)
1410  default, unused_next_token = self.GetName(tokens[i:])
1411  i += len(default)
1412  else:
1413  if tokens[i-1].name != ',':
1414  # We got something like: Type variable.
1415  # Re-adjust the key (variable) and type_name (Type).
1416  key = tokens[i-1].name
1417  type_name = tokens[i-2]
1418 
1419  result[key] = (type_name, default)
1420  return result
1421 
1422  def handle_template(self):
1423  token = self._GetNextToken()
1424  assert token.token_type == tokenize.SYNTAX, token
1425  assert token.name == '<', token
1426  templated_types = self._GetTemplatedTypes()
1427  # TODO(nnorwitz): for now, just ignore the template params.
1428  token = self._GetNextToken()
1429  if token.token_type == tokenize.NAME:
1430  if token.name == 'class':
1431  return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types)
1432  elif token.name == 'struct':
1433  return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types)
1434  elif token.name == 'friend':
1435  return self.handle_friend()
1436  self._AddBackToken(token)
1437  tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';')
1438  tokens.append(last)
1439  self._AddBackTokens(tokens)
1440  if last.name == '(':
1441  return self.GetMethod(FUNCTION_NONE, templated_types)
1442  # Must be a variable definition.
1443  return None
1444 
1445  def handle_true(self):
1446  pass # Nothing to do.
1447 
1448  def handle_false(self):
1449  pass # Nothing to do.
1450 
1451  def handle_asm(self):
1452  pass # Not needed yet.
1453 
1454  def handle_class(self):
1455  return self._GetClass(Class, VISIBILITY_PRIVATE, None)
1456 
1457  def _GetBases(self):
1458  # Get base classes.
1459  bases = []
1460  while 1:
1461  token = self._GetNextToken()
1462  assert token.token_type == tokenize.NAME, token
1463  # TODO(nnorwitz): store kind of inheritance...maybe.
1464  if token.name not in ('public', 'protected', 'private'):
1465  # If inheritance type is not specified, it is private.
1466  # Just put the token back so we can form a name.
1467  # TODO(nnorwitz): it would be good to warn about this.
1468  self._AddBackToken(token)
1469  else:
1470  # Check for virtual inheritance.
1471  token = self._GetNextToken()
1472  if token.name != 'virtual':
1473  self._AddBackToken(token)
1474  else:
1475  # TODO(nnorwitz): store that we got virtual for this base.
1476  pass
1477  base, next_token = self.GetName()
1478  bases_ast = self.converter.ToType(base)
1479  assert len(bases_ast) == 1, bases_ast
1480  bases.append(bases_ast[0])
1481  assert next_token.token_type == tokenize.SYNTAX, next_token
1482  if next_token.name == '{':
1483  token = next_token
1484  break
1485  # Support multiple inheritance.
1486  assert next_token.name == ',', next_token
1487  return bases, token
1488 
1489  def _GetClass(self, class_type, visibility, templated_types):
1490  class_name = None
1491  class_token = self._GetNextToken()
1492  if class_token.token_type != tokenize.NAME:
1493  assert class_token.token_type == tokenize.SYNTAX, class_token
1494  token = class_token
1495  else:
1496  # Skip any macro (e.g. storage class specifiers) after the
1497  # 'class' keyword.
1498  next_token = self._GetNextToken()
1499  if next_token.token_type == tokenize.NAME:
1500  self._AddBackToken(next_token)
1501  else:
1502  self._AddBackTokens([class_token, next_token])
1503  name_tokens, token = self.GetName()
1504  class_name = ''.join([t.name for t in name_tokens])
1505  bases = None
1506  if token.token_type == tokenize.SYNTAX:
1507  if token.name == ';':
1508  # Forward declaration.
1509  return class_type(class_token.start, class_token.end,
1510  class_name, None, templated_types, None,
1511  self.namespace_stack)
1512  if token.name in '*&':
1513  # Inline forward declaration. Could be method or data.
1514  name_token = self._GetNextToken()
1515  next_token = self._GetNextToken()
1516  if next_token.name == ';':
1517  # Handle data
1518  modifiers = ['class']
1519  return self._CreateVariable(class_token, name_token.name,
1520  class_name,
1521  modifiers, token.name, None)
1522  else:
1523  # Assume this is a method.
1524  tokens = (class_token, token, name_token, next_token)
1525  self._AddBackTokens(tokens)
1526  return self.GetMethod(FUNCTION_NONE, None)
1527  if token.name == ':':
1528  bases, token = self._GetBases()
1529 
1530  body = None
1531  if token.token_type == tokenize.SYNTAX and token.name == '{':
1532  assert token.token_type == tokenize.SYNTAX, token
1533  assert token.name == '{', token
1534 
1535  ast = AstBuilder(self.GetScope(), self.filename, class_name,
1536  visibility, self.namespace_stack)
1537  body = list(ast.Generate())
1538 
1539  if not self._handling_typedef:
1540  token = self._GetNextToken()
1541  if token.token_type != tokenize.NAME:
1542  assert token.token_type == tokenize.SYNTAX, token
1543  assert token.name == ';', token
1544  else:
1545  new_class = class_type(class_token.start, class_token.end,
1546  class_name, bases, None,
1547  body, self.namespace_stack)
1548 
1549  modifiers = []
1550  return self._CreateVariable(class_token,
1551  token.name, new_class,
1552  modifiers, token.name, None)
1553  else:
1554  if not self._handling_typedef:
1555  self.HandleError('non-typedef token', token)
1556  self._AddBackToken(token)
1557 
1558  return class_type(class_token.start, class_token.end, class_name,
1559  bases, templated_types, body, self.namespace_stack)
1560 
1561  def handle_namespace(self):
1562  token = self._GetNextToken()
1563  # Support anonymous namespaces.
1564  name = None
1565  if token.token_type == tokenize.NAME:
1566  name = token.name
1567  token = self._GetNextToken()
1568  self.namespace_stack.append(name)
1569  assert token.token_type == tokenize.SYNTAX, token
1570  # Create an internal token that denotes when the namespace is complete.
1571  internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP,
1572  None, None)
1573  internal_token.whence = token.whence
1574  if token.name == '=':
1575  # TODO(nnorwitz): handle aliasing namespaces.
1576  name, next_token = self.GetName()
1577  assert next_token.name == ';', next_token
1578  self._AddBackToken(internal_token)
1579  else:
1580  assert token.name == '{', token
1581  tokens = list(self.GetScope())
1582  # Replace the trailing } with the internal namespace pop token.
1583  tokens[-1] = internal_token
1584  # Handle namespace with nothing in it.
1585  self._AddBackTokens(tokens)
1586  return None
1587 
1588  def handle_using(self):
1589  tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1590  assert tokens
1591  return Using(tokens[0].start, tokens[0].end, tokens)
1592 
1593  def handle_explicit(self):
1594  assert self.in_class
1595  # Nothing much to do.
1596  # TODO(nnorwitz): maybe verify the method name == class name.
1597  # This must be a ctor.
1598  return self.GetMethod(FUNCTION_CTOR, None)
1599 
1600  def handle_this(self):
1601  pass # Nothing to do.
1602 
1603  def handle_operator(self):
1604  # Pull off the next token(s?) and make that part of the method name.
1605  pass
1606 
1607  def handle_sizeof(self):
1608  pass
1609 
1610  def handle_case(self):
1611  pass
1612 
1613  def handle_switch(self):
1614  pass
1615 
1616  def handle_default(self):
1617  token = self._GetNextToken()
1618  assert token.token_type == tokenize.SYNTAX
1619  assert token.name == ':'
1620 
1621  def handle_if(self):
1622  pass
1623 
1624  def handle_else(self):
1625  pass
1626 
1627  def handle_return(self):
1628  tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1629  if not tokens:
1630  return Return(self.current_token.start, self.current_token.end, None)
1631  return Return(tokens[0].start, tokens[0].end, tokens)
1632 
1633  def handle_goto(self):
1634  tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1635  assert len(tokens) == 1, str(tokens)
1636  return Goto(tokens[0].start, tokens[0].end, tokens[0].name)
1637 
1638  def handle_try(self):
1639  pass # Not needed yet.
1640 
1641  def handle_catch(self):
1642  pass # Not needed yet.
1643 
1644  def handle_throw(self):
1645  pass # Not needed yet.
1646 
1647  def handle_while(self):
1648  pass
1649 
1650  def handle_do(self):
1651  pass
1652 
1653  def handle_for(self):
1654  pass
1655 
1656  def handle_break(self):
1657  self._IgnoreUpTo(tokenize.SYNTAX, ';')
1658 
1659  def handle_continue(self):
1660  self._IgnoreUpTo(tokenize.SYNTAX, ';')
1661 
1662 
1663 def BuilderFromSource(source, filename):
1664  """Utility method that returns an AstBuilder from source code.
1665 
1666  Args:
1667  source: 'C++ source code'
1668  filename: 'file1'
1669 
1670  Returns:
1671  AstBuilder
1672  """
1673  return AstBuilder(tokenize.GetTokens(source), filename)
1674 
1675 
1676 def PrintIndentifiers(filename, should_print):
1677  """Prints all identifiers for a C++ source file.
1678 
1679  Args:
1680  filename: 'file1'
1681  should_print: predicate with signature: bool Function(token)
1682  """
1683  source = utils.ReadFile(filename, False)
1684  if source is None:
1685  sys.stderr.write('Unable to find: %s\n' % filename)
1686  return
1687 
1688  #print('Processing %s' % actual_filename)
1689  builder = BuilderFromSource(source, filename)
1690  try:
1691  for node in builder.Generate():
1692  if should_print(node):
1693  print(node.name)
1694  except KeyboardInterrupt:
1695  return
1696  except:
1697  pass
1698 
1699 
1700 def PrintAllIndentifiers(filenames, should_print):
1701  """Prints all identifiers for each C++ source file in filenames.
1702 
1703  Args:
1704  filenames: ['file1', 'file2', ...]
1705  should_print: predicate with signature: bool Function(token)
1706  """
1707  for path in filenames:
1708  PrintIndentifiers(path, should_print)
1709 
1710 
1711 def main(argv):
1712  for filename in argv[1:]:
1713  source = utils.ReadFile(filename)
1714  if source is None:
1715  continue
1716 
1717  print('Processing %s' % filename)
1718  builder = BuilderFromSource(source, filename)
1719  try:
1720  entire_ast = filter(None, builder.Generate())
1721  except KeyboardInterrupt:
1722  return
1723  except:
1724  # Already printed a warning, print the traceback and continue.
1725  traceback.print_exc()
1726  else:
1727  if utils.DEBUG:
1728  for ast in entire_ast:
1729  print(ast)
1730 
1731 
1732 if __name__ == '__main__':
1733  main(sys.argv)