ast.py 56 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773
  1. #!/usr/bin/env python
  2. #
  3. # Copyright 2007 Neal Norwitz
  4. # Portions Copyright 2007 Google Inc.
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License");
  7. # you may not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. """Generate an Abstract Syntax Tree (AST) for C++."""
  18. # FIXME:
  19. # * Tokens should never be exported, need to convert to Nodes
  20. # (return types, parameters, etc.)
  21. # * Handle static class data for templatized classes
  22. # * Handle casts (both C++ and C-style)
  23. # * Handle conditions and loops (if/else, switch, for, while/do)
  24. #
  25. # TODO much, much later:
  26. # * Handle #define
  27. # * exceptions
  28. try:
  29. # Python 3.x
  30. import builtins
  31. except ImportError:
  32. # Python 2.x
  33. import __builtin__ as builtins
  34. import collections
  35. import sys
  36. import traceback
  37. from cpp import keywords
  38. from cpp import tokenize
  39. from cpp import utils
  40. if not hasattr(builtins, 'reversed'):
  41. # Support Python 2.3 and earlier.
  42. def reversed(seq):
  43. for i in range(len(seq)-1, -1, -1):
  44. yield seq[i]
  45. if not hasattr(builtins, 'next'):
  46. # Support Python 2.5 and earlier.
  47. def next(obj):
  48. return obj.next()
  49. VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3)
  50. FUNCTION_NONE = 0x00
  51. FUNCTION_CONST = 0x01
  52. FUNCTION_VIRTUAL = 0x02
  53. FUNCTION_PURE_VIRTUAL = 0x04
  54. FUNCTION_CTOR = 0x08
  55. FUNCTION_DTOR = 0x10
  56. FUNCTION_ATTRIBUTE = 0x20
  57. FUNCTION_UNKNOWN_ANNOTATION = 0x40
  58. FUNCTION_THROW = 0x80
  59. FUNCTION_OVERRIDE = 0x100
  60. """
  61. These are currently unused. Should really handle these properly at some point.
  62. TYPE_MODIFIER_INLINE = 0x010000
  63. TYPE_MODIFIER_EXTERN = 0x020000
  64. TYPE_MODIFIER_STATIC = 0x040000
  65. TYPE_MODIFIER_CONST = 0x080000
  66. TYPE_MODIFIER_REGISTER = 0x100000
  67. TYPE_MODIFIER_VOLATILE = 0x200000
  68. TYPE_MODIFIER_MUTABLE = 0x400000
  69. TYPE_MODIFIER_MAP = {
  70. 'inline': TYPE_MODIFIER_INLINE,
  71. 'extern': TYPE_MODIFIER_EXTERN,
  72. 'static': TYPE_MODIFIER_STATIC,
  73. 'const': TYPE_MODIFIER_CONST,
  74. 'register': TYPE_MODIFIER_REGISTER,
  75. 'volatile': TYPE_MODIFIER_VOLATILE,
  76. 'mutable': TYPE_MODIFIER_MUTABLE,
  77. }
  78. """
  79. _INTERNAL_TOKEN = 'internal'
  80. _NAMESPACE_POP = 'ns-pop'
  81. # TODO(nnorwitz): use this as a singleton for templated_types, etc
  82. # where we don't want to create a new empty dict each time. It is also const.
  83. class _NullDict(object):
  84. __contains__ = lambda self: False
  85. keys = values = items = iterkeys = itervalues = iteritems = lambda self: ()
  86. # TODO(nnorwitz): move AST nodes into a separate module.
  87. class Node(object):
  88. """Base AST node."""
  89. def __init__(self, start, end):
  90. self.start = start
  91. self.end = end
  92. def IsDeclaration(self):
  93. """Returns bool if this node is a declaration."""
  94. return False
  95. def IsDefinition(self):
  96. """Returns bool if this node is a definition."""
  97. return False
  98. def IsExportable(self):
  99. """Returns bool if this node exportable from a header file."""
  100. return False
  101. def Requires(self, node):
  102. """Does this AST node require the definition of the node passed in?"""
  103. return False
  104. def XXX__str__(self):
  105. return self._StringHelper(self.__class__.__name__, '')
  106. def _StringHelper(self, name, suffix):
  107. if not utils.DEBUG:
  108. return '%s(%s)' % (name, suffix)
  109. return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix)
  110. def __repr__(self):
  111. return str(self)
  112. class Define(Node):
  113. def __init__(self, start, end, name, definition):
  114. Node.__init__(self, start, end)
  115. self.name = name
  116. self.definition = definition
  117. def __str__(self):
  118. value = '%s %s' % (self.name, self.definition)
  119. return self._StringHelper(self.__class__.__name__, value)
  120. class Include(Node):
  121. def __init__(self, start, end, filename, system):
  122. Node.__init__(self, start, end)
  123. self.filename = filename
  124. self.system = system
  125. def __str__(self):
  126. fmt = '"%s"'
  127. if self.system:
  128. fmt = '<%s>'
  129. return self._StringHelper(self.__class__.__name__, fmt % self.filename)
  130. class Goto(Node):
  131. def __init__(self, start, end, label):
  132. Node.__init__(self, start, end)
  133. self.label = label
  134. def __str__(self):
  135. return self._StringHelper(self.__class__.__name__, str(self.label))
  136. class Expr(Node):
  137. def __init__(self, start, end, expr):
  138. Node.__init__(self, start, end)
  139. self.expr = expr
  140. def Requires(self, node):
  141. # TODO(nnorwitz): impl.
  142. return False
  143. def __str__(self):
  144. return self._StringHelper(self.__class__.__name__, str(self.expr))
  145. class Return(Expr):
  146. pass
  147. class Delete(Expr):
  148. pass
  149. class Friend(Expr):
  150. def __init__(self, start, end, expr, namespace):
  151. Expr.__init__(self, start, end, expr)
  152. self.namespace = namespace[:]
  153. class Using(Node):
  154. def __init__(self, start, end, names):
  155. Node.__init__(self, start, end)
  156. self.names = names
  157. def __str__(self):
  158. return self._StringHelper(self.__class__.__name__, str(self.names))
  159. class Parameter(Node):
  160. def __init__(self, start, end, name, parameter_type, default):
  161. Node.__init__(self, start, end)
  162. self.name = name
  163. self.type = parameter_type
  164. self.default = default
  165. def Requires(self, node):
  166. # TODO(nnorwitz): handle namespaces, etc.
  167. return self.type.name == node.name
  168. def __str__(self):
  169. name = str(self.type)
  170. suffix = '%s %s' % (name, self.name)
  171. if self.default:
  172. suffix += ' = ' + ''.join([d.name for d in self.default])
  173. return self._StringHelper(self.__class__.__name__, suffix)
  174. class _GenericDeclaration(Node):
  175. def __init__(self, start, end, name, namespace):
  176. Node.__init__(self, start, end)
  177. self.name = name
  178. self.namespace = namespace[:]
  179. def FullName(self):
  180. prefix = ''
  181. if self.namespace and self.namespace[-1]:
  182. prefix = '::'.join(self.namespace) + '::'
  183. return prefix + self.name
  184. def _TypeStringHelper(self, suffix):
  185. if self.namespace:
  186. names = [n or '<anonymous>' for n in self.namespace]
  187. suffix += ' in ' + '::'.join(names)
  188. return self._StringHelper(self.__class__.__name__, suffix)
  189. # TODO(nnorwitz): merge with Parameter in some way?
  190. class VariableDeclaration(_GenericDeclaration):
  191. def __init__(self, start, end, name, var_type, initial_value, namespace):
  192. _GenericDeclaration.__init__(self, start, end, name, namespace)
  193. self.type = var_type
  194. self.initial_value = initial_value
  195. def Requires(self, node):
  196. # TODO(nnorwitz): handle namespaces, etc.
  197. return self.type.name == node.name
  198. def ToString(self):
  199. """Return a string that tries to reconstitute the variable decl."""
  200. suffix = '%s %s' % (self.type, self.name)
  201. if self.initial_value:
  202. suffix += ' = ' + self.initial_value
  203. return suffix
  204. def __str__(self):
  205. return self._StringHelper(self.__class__.__name__, self.ToString())
  206. class Typedef(_GenericDeclaration):
  207. def __init__(self, start, end, name, alias, namespace):
  208. _GenericDeclaration.__init__(self, start, end, name, namespace)
  209. self.alias = alias
  210. def IsDefinition(self):
  211. return True
  212. def IsExportable(self):
  213. return True
  214. def Requires(self, node):
  215. # TODO(nnorwitz): handle namespaces, etc.
  216. name = node.name
  217. for token in self.alias:
  218. if token is not None and name == token.name:
  219. return True
  220. return False
  221. def __str__(self):
  222. suffix = '%s, %s' % (self.name, self.alias)
  223. return self._TypeStringHelper(suffix)
  224. class _NestedType(_GenericDeclaration):
  225. def __init__(self, start, end, name, fields, namespace):
  226. _GenericDeclaration.__init__(self, start, end, name, namespace)
  227. self.fields = fields
  228. def IsDefinition(self):
  229. return True
  230. def IsExportable(self):
  231. return True
  232. def __str__(self):
  233. suffix = '%s, {%s}' % (self.name, self.fields)
  234. return self._TypeStringHelper(suffix)
  235. class Union(_NestedType):
  236. pass
  237. class Enum(_NestedType):
  238. pass
  239. class Class(_GenericDeclaration):
  240. def __init__(self, start, end, name, bases, templated_types, body, namespace):
  241. _GenericDeclaration.__init__(self, start, end, name, namespace)
  242. self.bases = bases
  243. self.body = body
  244. self.templated_types = templated_types
  245. def IsDeclaration(self):
  246. return self.bases is None and self.body is None
  247. def IsDefinition(self):
  248. return not self.IsDeclaration()
  249. def IsExportable(self):
  250. return not self.IsDeclaration()
  251. def Requires(self, node):
  252. # TODO(nnorwitz): handle namespaces, etc.
  253. if self.bases:
  254. for token_list in self.bases:
  255. # TODO(nnorwitz): bases are tokens, do name comparison.
  256. for token in token_list:
  257. if token.name == node.name:
  258. return True
  259. # TODO(nnorwitz): search in body too.
  260. return False
  261. def __str__(self):
  262. name = self.name
  263. if self.templated_types:
  264. name += '<%s>' % self.templated_types
  265. suffix = '%s, %s, %s' % (name, self.bases, self.body)
  266. return self._TypeStringHelper(suffix)
  267. class Struct(Class):
  268. pass
  269. class Function(_GenericDeclaration):
  270. def __init__(self, start, end, name, return_type, parameters,
  271. modifiers, templated_types, body, namespace):
  272. _GenericDeclaration.__init__(self, start, end, name, namespace)
  273. converter = TypeConverter(namespace)
  274. self.return_type = converter.CreateReturnType(return_type)
  275. self.parameters = converter.ToParameters(parameters)
  276. self.modifiers = modifiers
  277. self.body = body
  278. self.templated_types = templated_types
  279. def IsDeclaration(self):
  280. return self.body is None
  281. def IsDefinition(self):
  282. return self.body is not None
  283. def IsExportable(self):
  284. if self.return_type and 'static' in self.return_type.modifiers:
  285. return False
  286. return None not in self.namespace
  287. def Requires(self, node):
  288. if self.parameters:
  289. # TODO(nnorwitz): parameters are tokens, do name comparison.
  290. for p in self.parameters:
  291. if p.name == node.name:
  292. return True
  293. # TODO(nnorwitz): search in body too.
  294. return False
  295. def __str__(self):
  296. # TODO(nnorwitz): add templated_types.
  297. suffix = ('%s %s(%s), 0x%02x, %s' %
  298. (self.return_type, self.name, self.parameters,
  299. self.modifiers, self.body))
  300. return self._TypeStringHelper(suffix)
  301. class Method(Function):
  302. def __init__(self, start, end, name, in_class, return_type, parameters,
  303. modifiers, templated_types, body, namespace):
  304. Function.__init__(self, start, end, name, return_type, parameters,
  305. modifiers, templated_types, body, namespace)
  306. # TODO(nnorwitz): in_class could also be a namespace which can
  307. # mess up finding functions properly.
  308. self.in_class = in_class
  309. class Type(_GenericDeclaration):
  310. """Type used for any variable (eg class, primitive, struct, etc)."""
  311. def __init__(self, start, end, name, templated_types, modifiers,
  312. reference, pointer, array):
  313. """
  314. Args:
  315. name: str name of main type
  316. templated_types: [Class (Type?)] template type info between <>
  317. modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
  318. reference, pointer, array: bools
  319. """
  320. _GenericDeclaration.__init__(self, start, end, name, [])
  321. self.templated_types = templated_types
  322. if not name and modifiers:
  323. self.name = modifiers.pop()
  324. self.modifiers = modifiers
  325. self.reference = reference
  326. self.pointer = pointer
  327. self.array = array
  328. def __str__(self):
  329. prefix = ''
  330. if self.modifiers:
  331. prefix = ' '.join(self.modifiers) + ' '
  332. name = str(self.name)
  333. if self.templated_types:
  334. name += '<%s>' % self.templated_types
  335. suffix = prefix + name
  336. if self.reference:
  337. suffix += '&'
  338. if self.pointer:
  339. suffix += '*'
  340. if self.array:
  341. suffix += '[]'
  342. return self._TypeStringHelper(suffix)
  343. # By definition, Is* are always False. A Type can only exist in
  344. # some sort of variable declaration, parameter, or return value.
  345. def IsDeclaration(self):
  346. return False
  347. def IsDefinition(self):
  348. return False
  349. def IsExportable(self):
  350. return False
  351. class TypeConverter(object):
  352. def __init__(self, namespace_stack):
  353. self.namespace_stack = namespace_stack
  354. def _GetTemplateEnd(self, tokens, start):
  355. count = 1
  356. end = start
  357. while 1:
  358. token = tokens[end]
  359. end += 1
  360. if token.name == '<':
  361. count += 1
  362. elif token.name == '>':
  363. count -= 1
  364. if count == 0:
  365. break
  366. return tokens[start:end-1], end
  367. def ToType(self, tokens):
  368. """Convert [Token,...] to [Class(...), ] useful for base classes.
  369. For example, code like class Foo : public Bar<x, y> { ... };
  370. the "Bar<x, y>" portion gets converted to an AST.
  371. Returns:
  372. [Class(...), ...]
  373. """
  374. result = []
  375. name_tokens = []
  376. reference = pointer = array = False
  377. def AddType(templated_types):
  378. # Partition tokens into name and modifier tokens.
  379. names = []
  380. modifiers = []
  381. for t in name_tokens:
  382. if keywords.IsKeyword(t.name):
  383. modifiers.append(t.name)
  384. else:
  385. names.append(t.name)
  386. name = ''.join(names)
  387. if name_tokens:
  388. result.append(Type(name_tokens[0].start, name_tokens[-1].end,
  389. name, templated_types, modifiers,
  390. reference, pointer, array))
  391. del name_tokens[:]
  392. i = 0
  393. end = len(tokens)
  394. while i < end:
  395. token = tokens[i]
  396. if token.name == '<':
  397. new_tokens, new_end = self._GetTemplateEnd(tokens, i+1)
  398. AddType(self.ToType(new_tokens))
  399. # If there is a comma after the template, we need to consume
  400. # that here otherwise it becomes part of the name.
  401. i = new_end
  402. reference = pointer = array = False
  403. elif token.name == ',':
  404. AddType([])
  405. reference = pointer = array = False
  406. elif token.name == '*':
  407. pointer = True
  408. elif token.name == '&':
  409. reference = True
  410. elif token.name == '[':
  411. pointer = True
  412. elif token.name == ']':
  413. pass
  414. else:
  415. name_tokens.append(token)
  416. i += 1
  417. if name_tokens:
  418. # No '<' in the tokens, just a simple name and no template.
  419. AddType([])
  420. return result
  421. def DeclarationToParts(self, parts, needs_name_removed):
  422. name = None
  423. default = []
  424. if needs_name_removed:
  425. # Handle default (initial) values properly.
  426. for i, t in enumerate(parts):
  427. if t.name == '=':
  428. default = parts[i+1:]
  429. name = parts[i-1].name
  430. if name == ']' and parts[i-2].name == '[':
  431. name = parts[i-3].name
  432. i -= 1
  433. parts = parts[:i-1]
  434. break
  435. else:
  436. if parts[-1].token_type == tokenize.NAME:
  437. name = parts.pop().name
  438. else:
  439. # TODO(nnorwitz): this is a hack that happens for code like
  440. # Register(Foo<T>); where it thinks this is a function call
  441. # but it's actually a declaration.
  442. name = '???'
  443. modifiers = []
  444. type_name = []
  445. other_tokens = []
  446. templated_types = []
  447. i = 0
  448. end = len(parts)
  449. while i < end:
  450. p = parts[i]
  451. if keywords.IsKeyword(p.name):
  452. modifiers.append(p.name)
  453. elif p.name == '<':
  454. templated_tokens, new_end = self._GetTemplateEnd(parts, i+1)
  455. templated_types = self.ToType(templated_tokens)
  456. i = new_end - 1
  457. # Don't add a spurious :: to data members being initialized.
  458. next_index = i + 1
  459. if next_index < end and parts[next_index].name == '::':
  460. i += 1
  461. elif p.name in ('[', ']', '='):
  462. # These are handled elsewhere.
  463. other_tokens.append(p)
  464. elif p.name not in ('*', '&', '>'):
  465. # Ensure that names have a space between them.
  466. if (type_name and type_name[-1].token_type == tokenize.NAME and
  467. p.token_type == tokenize.NAME):
  468. type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0))
  469. type_name.append(p)
  470. else:
  471. other_tokens.append(p)
  472. i += 1
  473. type_name = ''.join([t.name for t in type_name])
  474. return name, type_name, templated_types, modifiers, default, other_tokens
  475. def ToParameters(self, tokens):
  476. if not tokens:
  477. return []
  478. result = []
  479. name = type_name = ''
  480. type_modifiers = []
  481. pointer = reference = array = False
  482. first_token = None
  483. default = []
  484. def AddParameter(end):
  485. if default:
  486. del default[0] # Remove flag.
  487. parts = self.DeclarationToParts(type_modifiers, True)
  488. (name, type_name, templated_types, modifiers,
  489. unused_default, unused_other_tokens) = parts
  490. parameter_type = Type(first_token.start, first_token.end,
  491. type_name, templated_types, modifiers,
  492. reference, pointer, array)
  493. p = Parameter(first_token.start, end, name,
  494. parameter_type, default)
  495. result.append(p)
  496. template_count = 0
  497. brace_count = 0
  498. for s in tokens:
  499. if not first_token:
  500. first_token = s
  501. # Check for braces before templates, as we can have unmatched '<>'
  502. # inside default arguments.
  503. if s.name == '{':
  504. brace_count += 1
  505. elif s.name == '}':
  506. brace_count -= 1
  507. if brace_count > 0:
  508. type_modifiers.append(s)
  509. continue
  510. if s.name == '<':
  511. template_count += 1
  512. elif s.name == '>':
  513. template_count -= 1
  514. if template_count > 0:
  515. type_modifiers.append(s)
  516. continue
  517. if s.name == ',':
  518. AddParameter(s.start)
  519. name = type_name = ''
  520. type_modifiers = []
  521. pointer = reference = array = False
  522. first_token = None
  523. default = []
  524. elif s.name == '*':
  525. pointer = True
  526. elif s.name == '&':
  527. reference = True
  528. elif s.name == '[':
  529. array = True
  530. elif s.name == ']':
  531. pass # Just don't add to type_modifiers.
  532. elif s.name == '=':
  533. # Got a default value. Add any value (None) as a flag.
  534. default.append(None)
  535. elif default:
  536. default.append(s)
  537. else:
  538. type_modifiers.append(s)
  539. AddParameter(tokens[-1].end)
  540. return result
  541. def CreateReturnType(self, return_type_seq):
  542. if not return_type_seq:
  543. return None
  544. start = return_type_seq[0].start
  545. end = return_type_seq[-1].end
  546. _, name, templated_types, modifiers, default, other_tokens = \
  547. self.DeclarationToParts(return_type_seq, False)
  548. names = [n.name for n in other_tokens]
  549. reference = '&' in names
  550. pointer = '*' in names
  551. array = '[' in names
  552. return Type(start, end, name, templated_types, modifiers,
  553. reference, pointer, array)
  554. def GetTemplateIndices(self, names):
  555. # names is a list of strings.
  556. start = names.index('<')
  557. end = len(names) - 1
  558. while end > 0:
  559. if names[end] == '>':
  560. break
  561. end -= 1
  562. return start, end+1
  563. class AstBuilder(object):
  564. def __init__(self, token_stream, filename, in_class='', visibility=None,
  565. namespace_stack=[]):
  566. self.tokens = token_stream
  567. self.filename = filename
  568. # TODO(nnorwitz): use a better data structure (deque) for the queue.
  569. # Switching directions of the "queue" improved perf by about 25%.
  570. # Using a deque should be even better since we access from both sides.
  571. self.token_queue = []
  572. self.namespace_stack = namespace_stack[:]
  573. self.in_class = in_class
  574. if in_class is None:
  575. self.in_class_name_only = None
  576. else:
  577. self.in_class_name_only = in_class.split('::')[-1]
  578. self.visibility = visibility
  579. self.in_function = False
  580. self.current_token = None
  581. # Keep the state whether we are currently handling a typedef or not.
  582. self._handling_typedef = False
  583. self.converter = TypeConverter(self.namespace_stack)
  584. def HandleError(self, msg, token):
  585. printable_queue = list(reversed(self.token_queue[-20:]))
  586. sys.stderr.write('Got %s in %s @ %s %s\n' %
  587. (msg, self.filename, token, printable_queue))
  588. def Generate(self):
  589. while 1:
  590. token = self._GetNextToken()
  591. if not token:
  592. break
  593. # Get the next token.
  594. self.current_token = token
  595. # Dispatch on the next token type.
  596. if token.token_type == _INTERNAL_TOKEN:
  597. if token.name == _NAMESPACE_POP:
  598. self.namespace_stack.pop()
  599. continue
  600. try:
  601. result = self._GenerateOne(token)
  602. if result is not None:
  603. yield result
  604. except:
  605. self.HandleError('exception', token)
  606. raise
  607. def _CreateVariable(self, pos_token, name, type_name, type_modifiers,
  608. ref_pointer_name_seq, templated_types, value=None):
  609. reference = '&' in ref_pointer_name_seq
  610. pointer = '*' in ref_pointer_name_seq
  611. array = '[' in ref_pointer_name_seq
  612. var_type = Type(pos_token.start, pos_token.end, type_name,
  613. templated_types, type_modifiers,
  614. reference, pointer, array)
  615. return VariableDeclaration(pos_token.start, pos_token.end,
  616. name, var_type, value, self.namespace_stack)
  617. def _GenerateOne(self, token):
  618. if token.token_type == tokenize.NAME:
  619. if (keywords.IsKeyword(token.name) and
  620. not keywords.IsBuiltinType(token.name)):
  621. if token.name == 'enum':
  622. # Pop the next token and only put it back if it's not
  623. # 'class'. This allows us to support the two-token
  624. # 'enum class' keyword as if it were simply 'enum'.
  625. next = self._GetNextToken()
  626. if next.name != 'class':
  627. self._AddBackToken(next)
  628. method = getattr(self, 'handle_' + token.name)
  629. return method()
  630. elif token.name == self.in_class_name_only:
  631. # The token name is the same as the class, must be a ctor if
  632. # there is a paren. Otherwise, it's the return type.
  633. # Peek ahead to get the next token to figure out which.
  634. next = self._GetNextToken()
  635. self._AddBackToken(next)
  636. if next.token_type == tokenize.SYNTAX and next.name == '(':
  637. return self._GetMethod([token], FUNCTION_CTOR, None, True)
  638. # Fall through--handle like any other method.
  639. # Handle data or function declaration/definition.
  640. syntax = tokenize.SYNTAX
  641. temp_tokens, last_token = \
  642. self._GetVarTokensUpToIgnoringTemplates(syntax,
  643. '(', ';', '{', '[')
  644. temp_tokens.insert(0, token)
  645. if last_token.name == '(':
  646. # If there is an assignment before the paren,
  647. # this is an expression, not a method.
  648. expr = bool([e for e in temp_tokens if e.name == '='])
  649. if expr:
  650. new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  651. temp_tokens.append(last_token)
  652. temp_tokens.extend(new_temp)
  653. last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0)
  654. if last_token.name == '[':
  655. # Handle array, this isn't a method, unless it's an operator.
  656. # TODO(nnorwitz): keep the size somewhere.
  657. # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
  658. temp_tokens.append(last_token)
  659. if temp_tokens[-2].name == 'operator':
  660. temp_tokens.append(self._GetNextToken())
  661. else:
  662. temp_tokens2, last_token = \
  663. self._GetVarTokensUpTo(tokenize.SYNTAX, ';')
  664. temp_tokens.extend(temp_tokens2)
  665. if last_token.name == ';':
  666. # Handle data, this isn't a method.
  667. parts = self.converter.DeclarationToParts(temp_tokens, True)
  668. (name, type_name, templated_types, modifiers, default,
  669. unused_other_tokens) = parts
  670. t0 = temp_tokens[0]
  671. names = [t.name for t in temp_tokens]
  672. if templated_types:
  673. start, end = self.converter.GetTemplateIndices(names)
  674. names = names[:start] + names[end:]
  675. default = ''.join([t.name for t in default])
  676. return self._CreateVariable(t0, name, type_name, modifiers,
  677. names, templated_types, default)
  678. if last_token.name == '{':
  679. self._AddBackTokens(temp_tokens[1:])
  680. self._AddBackToken(last_token)
  681. method_name = temp_tokens[0].name
  682. method = getattr(self, 'handle_' + method_name, None)
  683. if not method:
  684. # Must be declaring a variable.
  685. # TODO(nnorwitz): handle the declaration.
  686. return None
  687. return method()
  688. return self._GetMethod(temp_tokens, 0, None, False)
  689. elif token.token_type == tokenize.SYNTAX:
  690. if token.name == '~' and self.in_class:
  691. # Must be a dtor (probably not in method body).
  692. token = self._GetNextToken()
  693. # self.in_class can contain A::Name, but the dtor will only
  694. # be Name. Make sure to compare against the right value.
  695. if (token.token_type == tokenize.NAME and
  696. token.name == self.in_class_name_only):
  697. return self._GetMethod([token], FUNCTION_DTOR, None, True)
  698. # TODO(nnorwitz): handle a lot more syntax.
  699. elif token.token_type == tokenize.PREPROCESSOR:
  700. # TODO(nnorwitz): handle more preprocessor directives.
  701. # token starts with a #, so remove it and strip whitespace.
  702. name = token.name[1:].lstrip()
  703. if name.startswith('include'):
  704. # Remove "include".
  705. name = name[7:].strip()
  706. assert name
  707. # Handle #include \<newline> "header-on-second-line.h".
  708. if name.startswith('\\'):
  709. name = name[1:].strip()
  710. assert name[0] in '<"', token
  711. assert name[-1] in '>"', token
  712. system = name[0] == '<'
  713. filename = name[1:-1]
  714. return Include(token.start, token.end, filename, system)
  715. if name.startswith('define'):
  716. # Remove "define".
  717. name = name[6:].strip()
  718. assert name
  719. value = ''
  720. for i, c in enumerate(name):
  721. if c.isspace():
  722. value = name[i:].lstrip()
  723. name = name[:i]
  724. break
  725. return Define(token.start, token.end, name, value)
  726. if name.startswith('if') and name[2:3].isspace():
  727. condition = name[3:].strip()
  728. if condition.startswith('0') or condition.startswith('(0)'):
  729. self._SkipIf0Blocks()
  730. return None
  731. def _GetTokensUpTo(self, expected_token_type, expected_token):
  732. return self._GetVarTokensUpTo(expected_token_type, expected_token)[0]
  733. def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens):
  734. last_token = self._GetNextToken()
  735. tokens = []
  736. while (last_token.token_type != expected_token_type or
  737. last_token.name not in expected_tokens):
  738. tokens.append(last_token)
  739. last_token = self._GetNextToken()
  740. return tokens, last_token
  741. # Same as _GetVarTokensUpTo, but skips over '<...>' which could contain an
  742. # expected token.
  743. def _GetVarTokensUpToIgnoringTemplates(self, expected_token_type,
  744. *expected_tokens):
  745. last_token = self._GetNextToken()
  746. tokens = []
  747. nesting = 0
  748. while (nesting > 0 or
  749. last_token.token_type != expected_token_type or
  750. last_token.name not in expected_tokens):
  751. tokens.append(last_token)
  752. last_token = self._GetNextToken()
  753. if last_token.name == '<':
  754. nesting += 1
  755. elif last_token.name == '>':
  756. nesting -= 1
  757. return tokens, last_token
  758. # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necessary.
  759. def _IgnoreUpTo(self, token_type, token):
  760. unused_tokens = self._GetTokensUpTo(token_type, token)
  761. def _SkipIf0Blocks(self):
  762. count = 1
  763. while 1:
  764. token = self._GetNextToken()
  765. if token.token_type != tokenize.PREPROCESSOR:
  766. continue
  767. name = token.name[1:].lstrip()
  768. if name.startswith('endif'):
  769. count -= 1
  770. if count == 0:
  771. break
  772. elif name.startswith('if'):
  773. count += 1
  774. def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None):
  775. if GetNextToken is None:
  776. GetNextToken = self._GetNextToken
  777. # Assumes the current token is open_paren and we will consume
  778. # and return up to the close_paren.
  779. count = 1
  780. token = GetNextToken()
  781. while 1:
  782. if token.token_type == tokenize.SYNTAX:
  783. if token.name == open_paren:
  784. count += 1
  785. elif token.name == close_paren:
  786. count -= 1
  787. if count == 0:
  788. break
  789. yield token
  790. token = GetNextToken()
  791. yield token
  792. def _GetParameters(self):
  793. return self._GetMatchingChar('(', ')')
  794. def GetScope(self):
  795. return self._GetMatchingChar('{', '}')
  796. def _GetNextToken(self):
  797. if self.token_queue:
  798. return self.token_queue.pop()
  799. try:
  800. return next(self.tokens)
  801. except StopIteration:
  802. return
  803. def _AddBackToken(self, token):
  804. if token.whence == tokenize.WHENCE_STREAM:
  805. token.whence = tokenize.WHENCE_QUEUE
  806. self.token_queue.insert(0, token)
  807. else:
  808. assert token.whence == tokenize.WHENCE_QUEUE, token
  809. self.token_queue.append(token)
  810. def _AddBackTokens(self, tokens):
  811. if tokens:
  812. if tokens[-1].whence == tokenize.WHENCE_STREAM:
  813. for token in tokens:
  814. token.whence = tokenize.WHENCE_QUEUE
  815. self.token_queue[:0] = reversed(tokens)
  816. else:
  817. assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens
  818. self.token_queue.extend(reversed(tokens))
  819. def GetName(self, seq=None):
  820. """Returns ([tokens], next_token_info)."""
  821. GetNextToken = self._GetNextToken
  822. if seq is not None:
  823. it = iter(seq)
  824. GetNextToken = lambda: next(it)
  825. next_token = GetNextToken()
  826. tokens = []
  827. last_token_was_name = False
  828. while (next_token.token_type == tokenize.NAME or
  829. (next_token.token_type == tokenize.SYNTAX and
  830. next_token.name in ('::', '<'))):
  831. # Two NAMEs in a row means the identifier should terminate.
  832. # It's probably some sort of variable declaration.
  833. if last_token_was_name and next_token.token_type == tokenize.NAME:
  834. break
  835. last_token_was_name = next_token.token_type == tokenize.NAME
  836. tokens.append(next_token)
  837. # Handle templated names.
  838. if next_token.name == '<':
  839. tokens.extend(self._GetMatchingChar('<', '>', GetNextToken))
  840. last_token_was_name = True
  841. next_token = GetNextToken()
  842. return tokens, next_token
  843. def GetMethod(self, modifiers, templated_types):
  844. return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
  845. assert len(return_type_and_name) >= 1
  846. return self._GetMethod(return_type_and_name, modifiers, templated_types,
  847. False)
  848. def _GetMethod(self, return_type_and_name, modifiers, templated_types,
  849. get_paren):
  850. template_portion = None
  851. if get_paren:
  852. token = self._GetNextToken()
  853. assert token.token_type == tokenize.SYNTAX, token
  854. if token.name == '<':
  855. # Handle templatized dtors.
  856. template_portion = [token]
  857. template_portion.extend(self._GetMatchingChar('<', '>'))
  858. token = self._GetNextToken()
  859. assert token.token_type == tokenize.SYNTAX, token
  860. assert token.name == '(', token
  861. name = return_type_and_name.pop()
  862. # Handle templatized ctors.
  863. if name.name == '>':
  864. index = 1
  865. while return_type_and_name[index].name != '<':
  866. index += 1
  867. template_portion = return_type_and_name[index:] + [name]
  868. del return_type_and_name[index:]
  869. name = return_type_and_name.pop()
  870. elif name.name == ']':
  871. rt = return_type_and_name
  872. assert rt[-1].name == '[', return_type_and_name
  873. assert rt[-2].name == 'operator', return_type_and_name
  874. name_seq = return_type_and_name[-2:]
  875. del return_type_and_name[-2:]
  876. name = tokenize.Token(tokenize.NAME, 'operator[]',
  877. name_seq[0].start, name.end)
  878. # Get the open paren so _GetParameters() below works.
  879. unused_open_paren = self._GetNextToken()
  880. # TODO(nnorwitz): store template_portion.
  881. return_type = return_type_and_name
  882. indices = name
  883. if return_type:
  884. indices = return_type[0]
  885. # Force ctor for templatized ctors.
  886. if name.name == self.in_class and not modifiers:
  887. modifiers |= FUNCTION_CTOR
  888. parameters = list(self._GetParameters())
  889. del parameters[-1] # Remove trailing ')'.
  890. # Handling operator() is especially weird.
  891. if name.name == 'operator' and not parameters:
  892. token = self._GetNextToken()
  893. assert token.name == '(', token
  894. parameters = list(self._GetParameters())
  895. del parameters[-1] # Remove trailing ')'.
  896. token = self._GetNextToken()
  897. while token.token_type == tokenize.NAME:
  898. modifier_token = token
  899. token = self._GetNextToken()
  900. if modifier_token.name == 'const':
  901. modifiers |= FUNCTION_CONST
  902. elif modifier_token.name == '__attribute__':
  903. # TODO(nnorwitz): handle more __attribute__ details.
  904. modifiers |= FUNCTION_ATTRIBUTE
  905. assert token.name == '(', token
  906. # Consume everything between the (parens).
  907. unused_tokens = list(self._GetMatchingChar('(', ')'))
  908. token = self._GetNextToken()
  909. elif modifier_token.name == 'throw':
  910. modifiers |= FUNCTION_THROW
  911. assert token.name == '(', token
  912. # Consume everything between the (parens).
  913. unused_tokens = list(self._GetMatchingChar('(', ')'))
  914. token = self._GetNextToken()
  915. elif modifier_token.name == 'override':
  916. modifiers |= FUNCTION_OVERRIDE
  917. elif modifier_token.name == modifier_token.name.upper():
  918. # HACK(nnorwitz): assume that all upper-case names
  919. # are some macro we aren't expanding.
  920. modifiers |= FUNCTION_UNKNOWN_ANNOTATION
  921. else:
  922. self.HandleError('unexpected token', modifier_token)
  923. assert token.token_type == tokenize.SYNTAX, token
  924. # Handle ctor initializers.
  925. if token.name == ':':
  926. # TODO(nnorwitz): anything else to handle for initializer list?
  927. while token.name != ';' and token.name != '{':
  928. token = self._GetNextToken()
  929. # Handle pointer to functions that are really data but look
  930. # like method declarations.
  931. if token.name == '(':
  932. if parameters[0].name == '*':
  933. # name contains the return type.
  934. name = parameters.pop()
  935. # parameters contains the name of the data.
  936. modifiers = [p.name for p in parameters]
  937. # Already at the ( to open the parameter list.
  938. function_parameters = list(self._GetMatchingChar('(', ')'))
  939. del function_parameters[-1] # Remove trailing ')'.
  940. # TODO(nnorwitz): store the function_parameters.
  941. token = self._GetNextToken()
  942. assert token.token_type == tokenize.SYNTAX, token
  943. assert token.name == ';', token
  944. return self._CreateVariable(indices, name.name, indices.name,
  945. modifiers, '', None)
  946. # At this point, we got something like:
  947. # return_type (type::*name_)(params);
  948. # This is a data member called name_ that is a function pointer.
  949. # With this code: void (sq_type::*field_)(string&);
  950. # We get: name=void return_type=[] parameters=sq_type ... field_
  951. # TODO(nnorwitz): is return_type always empty?
  952. # TODO(nnorwitz): this isn't even close to being correct.
  953. # Just put in something so we don't crash and can move on.
  954. real_name = parameters[-1]
  955. modifiers = [p.name for p in self._GetParameters()]
  956. del modifiers[-1] # Remove trailing ')'.
  957. return self._CreateVariable(indices, real_name.name, indices.name,
  958. modifiers, '', None)
  959. if token.name == '{':
  960. body = list(self.GetScope())
  961. del body[-1] # Remove trailing '}'.
  962. else:
  963. body = None
  964. if token.name == '=':
  965. token = self._GetNextToken()
  966. if token.name == 'default' or token.name == 'delete':
  967. # Ignore explicitly defaulted and deleted special members
  968. # in C++11.
  969. token = self._GetNextToken()
  970. else:
  971. # Handle pure-virtual declarations.
  972. assert token.token_type == tokenize.CONSTANT, token
  973. assert token.name == '0', token
  974. modifiers |= FUNCTION_PURE_VIRTUAL
  975. token = self._GetNextToken()
  976. if token.name == '[':
  977. # TODO(nnorwitz): store tokens and improve parsing.
  978. # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
  979. tokens = list(self._GetMatchingChar('[', ']'))
  980. token = self._GetNextToken()
  981. assert token.name == ';', (token, return_type_and_name, parameters)
  982. # Looks like we got a method, not a function.
  983. if len(return_type) > 2 and return_type[-1].name == '::':
  984. return_type, in_class = \
  985. self._GetReturnTypeAndClassName(return_type)
  986. return Method(indices.start, indices.end, name.name, in_class,
  987. return_type, parameters, modifiers, templated_types,
  988. body, self.namespace_stack)
  989. return Function(indices.start, indices.end, name.name, return_type,
  990. parameters, modifiers, templated_types, body,
  991. self.namespace_stack)
  992. def _GetReturnTypeAndClassName(self, token_seq):
  993. # Splitting the return type from the class name in a method
  994. # can be tricky. For example, Return::Type::Is::Hard::To::Find().
  995. # Where is the return type and where is the class name?
  996. # The heuristic used is to pull the last name as the class name.
  997. # This includes all the templated type info.
  998. # TODO(nnorwitz): if there is only One name like in the
  999. # example above, punt and assume the last bit is the class name.
  1000. # Ignore a :: prefix, if exists so we can find the first real name.
  1001. i = 0
  1002. if token_seq[0].name == '::':
  1003. i = 1
  1004. # Ignore a :: suffix, if exists.
  1005. end = len(token_seq) - 1
  1006. if token_seq[end-1].name == '::':
  1007. end -= 1
  1008. # Make a copy of the sequence so we can append a sentinel
  1009. # value. This is required for GetName will has to have some
  1010. # terminating condition beyond the last name.
  1011. seq_copy = token_seq[i:end]
  1012. seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0))
  1013. names = []
  1014. while i < end:
  1015. # Iterate through the sequence parsing out each name.
  1016. new_name, next = self.GetName(seq_copy[i:])
  1017. assert new_name, 'Got empty new_name, next=%s' % next
  1018. # We got a pointer or ref. Add it to the name.
  1019. if next and next.token_type == tokenize.SYNTAX:
  1020. new_name.append(next)
  1021. names.append(new_name)
  1022. i += len(new_name)
  1023. # Now that we have the names, it's time to undo what we did.
  1024. # Remove the sentinel value.
  1025. names[-1].pop()
  1026. # Flatten the token sequence for the return type.
  1027. return_type = [e for seq in names[:-1] for e in seq]
  1028. # The class name is the last name.
  1029. class_name = names[-1]
  1030. return return_type, class_name
  1031. def handle_bool(self):
  1032. pass
  1033. def handle_char(self):
  1034. pass
  1035. def handle_int(self):
  1036. pass
  1037. def handle_long(self):
  1038. pass
  1039. def handle_short(self):
  1040. pass
  1041. def handle_double(self):
  1042. pass
  1043. def handle_float(self):
  1044. pass
  1045. def handle_void(self):
  1046. pass
  1047. def handle_wchar_t(self):
  1048. pass
  1049. def handle_unsigned(self):
  1050. pass
  1051. def handle_signed(self):
  1052. pass
  1053. def _GetNestedType(self, ctor):
  1054. name = None
  1055. name_tokens, token = self.GetName()
  1056. if name_tokens:
  1057. name = ''.join([t.name for t in name_tokens])
  1058. # Handle forward declarations.
  1059. if token.token_type == tokenize.SYNTAX and token.name == ';':
  1060. return ctor(token.start, token.end, name, None,
  1061. self.namespace_stack)
  1062. if token.token_type == tokenize.NAME and self._handling_typedef:
  1063. self._AddBackToken(token)
  1064. return ctor(token.start, token.end, name, None,
  1065. self.namespace_stack)
  1066. # Must be the type declaration.
  1067. fields = list(self._GetMatchingChar('{', '}'))
  1068. del fields[-1] # Remove trailing '}'.
  1069. if token.token_type == tokenize.SYNTAX and token.name == '{':
  1070. next = self._GetNextToken()
  1071. new_type = ctor(token.start, token.end, name, fields,
  1072. self.namespace_stack)
  1073. # A name means this is an anonymous type and the name
  1074. # is the variable declaration.
  1075. if next.token_type != tokenize.NAME:
  1076. return new_type
  1077. name = new_type
  1078. token = next
  1079. # Must be variable declaration using the type prefixed with keyword.
  1080. assert token.token_type == tokenize.NAME, token
  1081. return self._CreateVariable(token, token.name, name, [], '', None)
  1082. def handle_struct(self):
  1083. # Special case the handling typedef/aliasing of structs here.
  1084. # It would be a pain to handle in the class code.
  1085. name_tokens, var_token = self.GetName()
  1086. if name_tokens:
  1087. next_token = self._GetNextToken()
  1088. is_syntax = (var_token.token_type == tokenize.SYNTAX and
  1089. var_token.name[0] in '*&')
  1090. is_variable = (var_token.token_type == tokenize.NAME and
  1091. next_token.name == ';')
  1092. variable = var_token
  1093. if is_syntax and not is_variable:
  1094. variable = next_token
  1095. temp = self._GetNextToken()
  1096. if temp.token_type == tokenize.SYNTAX and temp.name == '(':
  1097. # Handle methods declared to return a struct.
  1098. t0 = name_tokens[0]
  1099. struct = tokenize.Token(tokenize.NAME, 'struct',
  1100. t0.start-7, t0.start-2)
  1101. type_and_name = [struct]
  1102. type_and_name.extend(name_tokens)
  1103. type_and_name.extend((var_token, next_token))
  1104. return self._GetMethod(type_and_name, 0, None, False)
  1105. assert temp.name == ';', (temp, name_tokens, var_token)
  1106. if is_syntax or (is_variable and not self._handling_typedef):
  1107. modifiers = ['struct']
  1108. type_name = ''.join([t.name for t in name_tokens])
  1109. position = name_tokens[0]
  1110. return self._CreateVariable(position, variable.name, type_name,
  1111. modifiers, var_token.name, None)
  1112. name_tokens.extend((var_token, next_token))
  1113. self._AddBackTokens(name_tokens)
  1114. else:
  1115. self._AddBackToken(var_token)
  1116. return self._GetClass(Struct, VISIBILITY_PUBLIC, None)
  1117. def handle_union(self):
  1118. return self._GetNestedType(Union)
  1119. def handle_enum(self):
  1120. return self._GetNestedType(Enum)
  1121. def handle_auto(self):
  1122. # TODO(nnorwitz): warn about using auto? Probably not since it
  1123. # will be reclaimed and useful for C++0x.
  1124. pass
  1125. def handle_register(self):
  1126. pass
  1127. def handle_const(self):
  1128. pass
  1129. def handle_inline(self):
  1130. pass
  1131. def handle_extern(self):
  1132. pass
  1133. def handle_static(self):
  1134. pass
  1135. def handle_virtual(self):
  1136. # What follows must be a method.
  1137. token = token2 = self._GetNextToken()
  1138. if token.name == 'inline':
  1139. # HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
  1140. token2 = self._GetNextToken()
  1141. if token2.token_type == tokenize.SYNTAX and token2.name == '~':
  1142. return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None)
  1143. assert token.token_type == tokenize.NAME or token.name == '::', token
  1144. return_type_and_name, _ = self._GetVarTokensUpToIgnoringTemplates(
  1145. tokenize.SYNTAX, '(') # )
  1146. return_type_and_name.insert(0, token)
  1147. if token2 is not token:
  1148. return_type_and_name.insert(1, token2)
  1149. return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL,
  1150. None, False)
  1151. def handle_volatile(self):
  1152. pass
  1153. def handle_mutable(self):
  1154. pass
  1155. def handle_public(self):
  1156. assert self.in_class
  1157. self.visibility = VISIBILITY_PUBLIC
  1158. def handle_protected(self):
  1159. assert self.in_class
  1160. self.visibility = VISIBILITY_PROTECTED
  1161. def handle_private(self):
  1162. assert self.in_class
  1163. self.visibility = VISIBILITY_PRIVATE
  1164. def handle_friend(self):
  1165. tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1166. assert tokens
  1167. t0 = tokens[0]
  1168. return Friend(t0.start, t0.end, tokens, self.namespace_stack)
  1169. def handle_static_cast(self):
  1170. pass
  1171. def handle_const_cast(self):
  1172. pass
  1173. def handle_dynamic_cast(self):
  1174. pass
  1175. def handle_reinterpret_cast(self):
  1176. pass
  1177. def handle_new(self):
  1178. pass
  1179. def handle_delete(self):
  1180. tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1181. assert tokens
  1182. return Delete(tokens[0].start, tokens[0].end, tokens)
  1183. def handle_typedef(self):
  1184. token = self._GetNextToken()
  1185. if (token.token_type == tokenize.NAME and
  1186. keywords.IsKeyword(token.name)):
  1187. # Token must be struct/enum/union/class.
  1188. method = getattr(self, 'handle_' + token.name)
  1189. self._handling_typedef = True
  1190. tokens = [method()]
  1191. self._handling_typedef = False
  1192. else:
  1193. tokens = [token]
  1194. # Get the remainder of the typedef up to the semi-colon.
  1195. tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';'))
  1196. # TODO(nnorwitz): clean all this up.
  1197. assert tokens
  1198. name = tokens.pop()
  1199. indices = name
  1200. if tokens:
  1201. indices = tokens[0]
  1202. if not indices:
  1203. indices = token
  1204. if name.name == ')':
  1205. # HACK(nnorwitz): Handle pointers to functions "properly".
  1206. if (len(tokens) >= 4 and
  1207. tokens[1].name == '(' and tokens[2].name == '*'):
  1208. tokens.append(name)
  1209. name = tokens[3]
  1210. elif name.name == ']':
  1211. # HACK(nnorwitz): Handle arrays properly.
  1212. if len(tokens) >= 2:
  1213. tokens.append(name)
  1214. name = tokens[1]
  1215. new_type = tokens
  1216. if tokens and isinstance(tokens[0], tokenize.Token):
  1217. new_type = self.converter.ToType(tokens)[0]
  1218. return Typedef(indices.start, indices.end, name.name,
  1219. new_type, self.namespace_stack)
  1220. def handle_typeid(self):
  1221. pass # Not needed yet.
  1222. def handle_typename(self):
  1223. pass # Not needed yet.
  1224. def _GetTemplatedTypes(self):
  1225. result = collections.OrderedDict()
  1226. tokens = list(self._GetMatchingChar('<', '>'))
  1227. len_tokens = len(tokens) - 1 # Ignore trailing '>'.
  1228. i = 0
  1229. while i < len_tokens:
  1230. key = tokens[i].name
  1231. i += 1
  1232. if keywords.IsKeyword(key) or key == ',':
  1233. continue
  1234. type_name = default = None
  1235. if i < len_tokens:
  1236. i += 1
  1237. if tokens[i-1].name == '=':
  1238. assert i < len_tokens, '%s %s' % (i, tokens)
  1239. default, unused_next_token = self.GetName(tokens[i:])
  1240. i += len(default)
  1241. else:
  1242. if tokens[i-1].name != ',':
  1243. # We got something like: Type variable.
  1244. # Re-adjust the key (variable) and type_name (Type).
  1245. key = tokens[i-1].name
  1246. type_name = tokens[i-2]
  1247. result[key] = (type_name, default)
  1248. return result
  1249. def handle_template(self):
  1250. token = self._GetNextToken()
  1251. assert token.token_type == tokenize.SYNTAX, token
  1252. assert token.name == '<', token
  1253. templated_types = self._GetTemplatedTypes()
  1254. # TODO(nnorwitz): for now, just ignore the template params.
  1255. token = self._GetNextToken()
  1256. if token.token_type == tokenize.NAME:
  1257. if token.name == 'class':
  1258. return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types)
  1259. elif token.name == 'struct':
  1260. return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types)
  1261. elif token.name == 'friend':
  1262. return self.handle_friend()
  1263. self._AddBackToken(token)
  1264. tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';')
  1265. tokens.append(last)
  1266. self._AddBackTokens(tokens)
  1267. if last.name == '(':
  1268. return self.GetMethod(FUNCTION_NONE, templated_types)
  1269. # Must be a variable definition.
  1270. return None
  1271. def handle_true(self):
  1272. pass # Nothing to do.
  1273. def handle_false(self):
  1274. pass # Nothing to do.
  1275. def handle_asm(self):
  1276. pass # Not needed yet.
  1277. def handle_class(self):
  1278. return self._GetClass(Class, VISIBILITY_PRIVATE, None)
  1279. def _GetBases(self):
  1280. # Get base classes.
  1281. bases = []
  1282. while 1:
  1283. token = self._GetNextToken()
  1284. assert token.token_type == tokenize.NAME, token
  1285. # TODO(nnorwitz): store kind of inheritance...maybe.
  1286. if token.name not in ('public', 'protected', 'private'):
  1287. # If inheritance type is not specified, it is private.
  1288. # Just put the token back so we can form a name.
  1289. # TODO(nnorwitz): it would be good to warn about this.
  1290. self._AddBackToken(token)
  1291. else:
  1292. # Check for virtual inheritance.
  1293. token = self._GetNextToken()
  1294. if token.name != 'virtual':
  1295. self._AddBackToken(token)
  1296. else:
  1297. # TODO(nnorwitz): store that we got virtual for this base.
  1298. pass
  1299. base, next_token = self.GetName()
  1300. bases_ast = self.converter.ToType(base)
  1301. assert len(bases_ast) == 1, bases_ast
  1302. bases.append(bases_ast[0])
  1303. assert next_token.token_type == tokenize.SYNTAX, next_token
  1304. if next_token.name == '{':
  1305. token = next_token
  1306. break
  1307. # Support multiple inheritance.
  1308. assert next_token.name == ',', next_token
  1309. return bases, token
  1310. def _GetClass(self, class_type, visibility, templated_types):
  1311. class_name = None
  1312. class_token = self._GetNextToken()
  1313. if class_token.token_type != tokenize.NAME:
  1314. assert class_token.token_type == tokenize.SYNTAX, class_token
  1315. token = class_token
  1316. else:
  1317. # Skip any macro (e.g. storage class specifiers) after the
  1318. # 'class' keyword.
  1319. next_token = self._GetNextToken()
  1320. if next_token.token_type == tokenize.NAME:
  1321. self._AddBackToken(next_token)
  1322. else:
  1323. self._AddBackTokens([class_token, next_token])
  1324. name_tokens, token = self.GetName()
  1325. class_name = ''.join([t.name for t in name_tokens])
  1326. bases = None
  1327. if token.token_type == tokenize.SYNTAX:
  1328. if token.name == ';':
  1329. # Forward declaration.
  1330. return class_type(class_token.start, class_token.end,
  1331. class_name, None, templated_types, None,
  1332. self.namespace_stack)
  1333. if token.name in '*&':
  1334. # Inline forward declaration. Could be method or data.
  1335. name_token = self._GetNextToken()
  1336. next_token = self._GetNextToken()
  1337. if next_token.name == ';':
  1338. # Handle data
  1339. modifiers = ['class']
  1340. return self._CreateVariable(class_token, name_token.name,
  1341. class_name,
  1342. modifiers, token.name, None)
  1343. else:
  1344. # Assume this is a method.
  1345. tokens = (class_token, token, name_token, next_token)
  1346. self._AddBackTokens(tokens)
  1347. return self.GetMethod(FUNCTION_NONE, None)
  1348. if token.name == ':':
  1349. bases, token = self._GetBases()
  1350. body = None
  1351. if token.token_type == tokenize.SYNTAX and token.name == '{':
  1352. assert token.token_type == tokenize.SYNTAX, token
  1353. assert token.name == '{', token
  1354. ast = AstBuilder(self.GetScope(), self.filename, class_name,
  1355. visibility, self.namespace_stack)
  1356. body = list(ast.Generate())
  1357. if not self._handling_typedef:
  1358. token = self._GetNextToken()
  1359. if token.token_type != tokenize.NAME:
  1360. assert token.token_type == tokenize.SYNTAX, token
  1361. assert token.name == ';', token
  1362. else:
  1363. new_class = class_type(class_token.start, class_token.end,
  1364. class_name, bases, None,
  1365. body, self.namespace_stack)
  1366. modifiers = []
  1367. return self._CreateVariable(class_token,
  1368. token.name, new_class,
  1369. modifiers, token.name, None)
  1370. else:
  1371. if not self._handling_typedef:
  1372. self.HandleError('non-typedef token', token)
  1373. self._AddBackToken(token)
  1374. return class_type(class_token.start, class_token.end, class_name,
  1375. bases, templated_types, body, self.namespace_stack)
  1376. def handle_namespace(self):
  1377. # Support anonymous namespaces.
  1378. name = None
  1379. name_tokens, token = self.GetName()
  1380. if name_tokens:
  1381. name = ''.join([t.name for t in name_tokens])
  1382. self.namespace_stack.append(name)
  1383. assert token.token_type == tokenize.SYNTAX, token
  1384. # Create an internal token that denotes when the namespace is complete.
  1385. internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP,
  1386. None, None)
  1387. internal_token.whence = token.whence
  1388. if token.name == '=':
  1389. # TODO(nnorwitz): handle aliasing namespaces.
  1390. name, next_token = self.GetName()
  1391. assert next_token.name == ';', next_token
  1392. self._AddBackToken(internal_token)
  1393. else:
  1394. assert token.name == '{', token
  1395. tokens = list(self.GetScope())
  1396. # Replace the trailing } with the internal namespace pop token.
  1397. tokens[-1] = internal_token
  1398. # Handle namespace with nothing in it.
  1399. self._AddBackTokens(tokens)
  1400. return None
  1401. def handle_using(self):
  1402. tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1403. assert tokens
  1404. return Using(tokens[0].start, tokens[0].end, tokens)
  1405. def handle_explicit(self):
  1406. assert self.in_class
  1407. # Nothing much to do.
  1408. # TODO(nnorwitz): maybe verify the method name == class name.
  1409. # This must be a ctor.
  1410. return self.GetMethod(FUNCTION_CTOR, None)
  1411. def handle_this(self):
  1412. pass # Nothing to do.
  1413. def handle_operator(self):
  1414. # Pull off the next token(s?) and make that part of the method name.
  1415. pass
  1416. def handle_sizeof(self):
  1417. pass
  1418. def handle_case(self):
  1419. pass
  1420. def handle_switch(self):
  1421. pass
  1422. def handle_default(self):
  1423. token = self._GetNextToken()
  1424. assert token.token_type == tokenize.SYNTAX
  1425. assert token.name == ':'
  1426. def handle_if(self):
  1427. pass
  1428. def handle_else(self):
  1429. pass
  1430. def handle_return(self):
  1431. tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1432. if not tokens:
  1433. return Return(self.current_token.start, self.current_token.end, None)
  1434. return Return(tokens[0].start, tokens[0].end, tokens)
  1435. def handle_goto(self):
  1436. tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1437. assert len(tokens) == 1, str(tokens)
  1438. return Goto(tokens[0].start, tokens[0].end, tokens[0].name)
  1439. def handle_try(self):
  1440. pass # Not needed yet.
  1441. def handle_catch(self):
  1442. pass # Not needed yet.
  1443. def handle_throw(self):
  1444. pass # Not needed yet.
  1445. def handle_while(self):
  1446. pass
  1447. def handle_do(self):
  1448. pass
  1449. def handle_for(self):
  1450. pass
  1451. def handle_break(self):
  1452. self._IgnoreUpTo(tokenize.SYNTAX, ';')
  1453. def handle_continue(self):
  1454. self._IgnoreUpTo(tokenize.SYNTAX, ';')
  1455. def BuilderFromSource(source, filename):
  1456. """Utility method that returns an AstBuilder from source code.
  1457. Args:
  1458. source: 'C++ source code'
  1459. filename: 'file1'
  1460. Returns:
  1461. AstBuilder
  1462. """
  1463. return AstBuilder(tokenize.GetTokens(source), filename)
  1464. def PrintIndentifiers(filename, should_print):
  1465. """Prints all identifiers for a C++ source file.
  1466. Args:
  1467. filename: 'file1'
  1468. should_print: predicate with signature: bool Function(token)
  1469. """
  1470. source = utils.ReadFile(filename, False)
  1471. if source is None:
  1472. sys.stderr.write('Unable to find: %s\n' % filename)
  1473. return
  1474. #print('Processing %s' % actual_filename)
  1475. builder = BuilderFromSource(source, filename)
  1476. try:
  1477. for node in builder.Generate():
  1478. if should_print(node):
  1479. print(node.name)
  1480. except KeyboardInterrupt:
  1481. return
  1482. except:
  1483. pass
  1484. def PrintAllIndentifiers(filenames, should_print):
  1485. """Prints all identifiers for each C++ source file in filenames.
  1486. Args:
  1487. filenames: ['file1', 'file2', ...]
  1488. should_print: predicate with signature: bool Function(token)
  1489. """
  1490. for path in filenames:
  1491. PrintIndentifiers(path, should_print)
  1492. def main(argv):
  1493. for filename in argv[1:]:
  1494. source = utils.ReadFile(filename)
  1495. if source is None:
  1496. continue
  1497. print('Processing %s' % filename)
  1498. builder = BuilderFromSource(source, filename)
  1499. try:
  1500. entire_ast = filter(None, builder.Generate())
  1501. except KeyboardInterrupt:
  1502. return
  1503. except:
  1504. # Already printed a warning, print the traceback and continue.
  1505. traceback.print_exc()
  1506. else:
  1507. if utils.DEBUG:
  1508. for ast in entire_ast:
  1509. print(ast)
  1510. if __name__ == '__main__':
  1511. main(sys.argv)