ast.py 61 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736
  1. #!/usr/bin/env python
  2. #
  3. # Copyright 2007 Neal Norwitz
  4. # Portions Copyright 2007 Google Inc.
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License");
  7. # you may not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. """Generate an Abstract Syntax Tree (AST) for C++."""
  18. __author__ = 'nnorwitz@google.com (Neal Norwitz)'
  19. # TODO:
  20. # * Tokens should never be exported, need to convert to Nodes
  21. # (return types, parameters, etc.)
  22. # * Handle static class data for templatized classes
  23. # * Handle casts (both C++ and C-style)
  24. # * Handle conditions and loops (if/else, switch, for, while/do)
  25. #
  26. # TODO much, much later:
  27. # * Handle #define
  28. # * exceptions
  29. try:
  30. # Python 3.x
  31. import builtins
  32. except ImportError:
  33. # Python 2.x
  34. import __builtin__ as builtins
  35. import sys
  36. import traceback
  37. from cpp import keywords
  38. from cpp import tokenize
  39. from cpp import utils
  40. if not hasattr(builtins, 'reversed'):
  41. # Support Python 2.3 and earlier.
  42. def reversed(seq):
  43. for i in range(len(seq)-1, -1, -1):
  44. yield seq[i]
  45. if not hasattr(builtins, 'next'):
  46. # Support Python 2.5 and earlier.
  47. def next(obj):
  48. return obj.next()
  49. VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3)
  50. FUNCTION_NONE = 0x00
  51. FUNCTION_CONST = 0x01
  52. FUNCTION_VIRTUAL = 0x02
  53. FUNCTION_PURE_VIRTUAL = 0x04
  54. FUNCTION_CTOR = 0x08
  55. FUNCTION_DTOR = 0x10
  56. FUNCTION_ATTRIBUTE = 0x20
  57. FUNCTION_UNKNOWN_ANNOTATION = 0x40
  58. FUNCTION_THROW = 0x80
  59. FUNCTION_OVERRIDE = 0x100
  60. """
  61. These are currently unused. Should really handle these properly at some point.
  62. TYPE_MODIFIER_INLINE = 0x010000
  63. TYPE_MODIFIER_EXTERN = 0x020000
  64. TYPE_MODIFIER_STATIC = 0x040000
  65. TYPE_MODIFIER_CONST = 0x080000
  66. TYPE_MODIFIER_REGISTER = 0x100000
  67. TYPE_MODIFIER_VOLATILE = 0x200000
  68. TYPE_MODIFIER_MUTABLE = 0x400000
  69. TYPE_MODIFIER_MAP = {
  70. 'inline': TYPE_MODIFIER_INLINE,
  71. 'extern': TYPE_MODIFIER_EXTERN,
  72. 'static': TYPE_MODIFIER_STATIC,
  73. 'const': TYPE_MODIFIER_CONST,
  74. 'register': TYPE_MODIFIER_REGISTER,
  75. 'volatile': TYPE_MODIFIER_VOLATILE,
  76. 'mutable': TYPE_MODIFIER_MUTABLE,
  77. }
  78. """
  79. _INTERNAL_TOKEN = 'internal'
  80. _NAMESPACE_POP = 'ns-pop'
  81. # TODO(nnorwitz): use this as a singleton for templated_types, etc
  82. # where we don't want to create a new empty dict each time. It is also const.
  83. class _NullDict(object):
  84. __contains__ = lambda self: False
  85. keys = values = items = iterkeys = itervalues = iteritems = lambda self: ()
  86. # TODO(nnorwitz): move AST nodes into a separate module.
  87. class Node(object):
  88. """Base AST node."""
  89. def __init__(self, start, end):
  90. self.start = start
  91. self.end = end
  92. def IsDeclaration(self):
  93. """Returns bool if this node is a declaration."""
  94. return False
  95. def IsDefinition(self):
  96. """Returns bool if this node is a definition."""
  97. return False
  98. def IsExportable(self):
  99. """Returns bool if this node exportable from a header file."""
  100. return False
  101. def Requires(self, node):
  102. """Does this AST node require the definition of the node passed in?"""
  103. return False
  104. def XXX__str__(self):
  105. return self._StringHelper(self.__class__.__name__, '')
  106. def _StringHelper(self, name, suffix):
  107. if not utils.DEBUG:
  108. return '%s(%s)' % (name, suffix)
  109. return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix)
  110. def __repr__(self):
  111. return str(self)
  112. class Define(Node):
  113. def __init__(self, start, end, name, definition):
  114. Node.__init__(self, start, end)
  115. self.name = name
  116. self.definition = definition
  117. def __str__(self):
  118. value = '%s %s' % (self.name, self.definition)
  119. return self._StringHelper(self.__class__.__name__, value)
  120. class Include(Node):
  121. def __init__(self, start, end, filename, system):
  122. Node.__init__(self, start, end)
  123. self.filename = filename
  124. self.system = system
  125. def __str__(self):
  126. fmt = '"%s"'
  127. if self.system:
  128. fmt = '<%s>'
  129. return self._StringHelper(self.__class__.__name__, fmt % self.filename)
  130. class Goto(Node):
  131. def __init__(self, start, end, label):
  132. Node.__init__(self, start, end)
  133. self.label = label
  134. def __str__(self):
  135. return self._StringHelper(self.__class__.__name__, str(self.label))
  136. class Expr(Node):
  137. def __init__(self, start, end, expr):
  138. Node.__init__(self, start, end)
  139. self.expr = expr
  140. def Requires(self, node):
  141. # TODO(nnorwitz): impl.
  142. return False
  143. def __str__(self):
  144. return self._StringHelper(self.__class__.__name__, str(self.expr))
  145. class Return(Expr):
  146. pass
  147. class Delete(Expr):
  148. pass
  149. class Friend(Expr):
  150. def __init__(self, start, end, expr, namespace):
  151. Expr.__init__(self, start, end, expr)
  152. self.namespace = namespace[:]
  153. class Using(Node):
  154. def __init__(self, start, end, names):
  155. Node.__init__(self, start, end)
  156. self.names = names
  157. def __str__(self):
  158. return self._StringHelper(self.__class__.__name__, str(self.names))
  159. class Parameter(Node):
  160. def __init__(self, start, end, name, parameter_type, default):
  161. Node.__init__(self, start, end)
  162. self.name = name
  163. self.type = parameter_type
  164. self.default = default
  165. def Requires(self, node):
  166. # TODO(nnorwitz): handle namespaces, etc.
  167. return self.type.name == node.name
  168. def __str__(self):
  169. name = str(self.type)
  170. suffix = '%s %s' % (name, self.name)
  171. if self.default:
  172. suffix += ' = ' + ''.join([d.name for d in self.default])
  173. return self._StringHelper(self.__class__.__name__, suffix)
  174. class _GenericDeclaration(Node):
  175. def __init__(self, start, end, name, namespace):
  176. Node.__init__(self, start, end)
  177. self.name = name
  178. self.namespace = namespace[:]
  179. def FullName(self):
  180. prefix = ''
  181. if self.namespace and self.namespace[-1]:
  182. prefix = '::'.join(self.namespace) + '::'
  183. return prefix + self.name
  184. def _TypeStringHelper(self, suffix):
  185. if self.namespace:
  186. names = [n or '<anonymous>' for n in self.namespace]
  187. suffix += ' in ' + '::'.join(names)
  188. return self._StringHelper(self.__class__.__name__, suffix)
  189. # TODO(nnorwitz): merge with Parameter in some way?
  190. class VariableDeclaration(_GenericDeclaration):
  191. def __init__(self, start, end, name, var_type, initial_value, namespace):
  192. _GenericDeclaration.__init__(self, start, end, name, namespace)
  193. self.type = var_type
  194. self.initial_value = initial_value
  195. def Requires(self, node):
  196. # TODO(nnorwitz): handle namespaces, etc.
  197. return self.type.name == node.name
  198. def ToString(self):
  199. """Return a string that tries to reconstitute the variable decl."""
  200. suffix = '%s %s' % (self.type, self.name)
  201. if self.initial_value:
  202. suffix += ' = ' + self.initial_value
  203. return suffix
  204. def __str__(self):
  205. return self._StringHelper(self.__class__.__name__, self.ToString())
  206. class Typedef(_GenericDeclaration):
  207. def __init__(self, start, end, name, alias, namespace):
  208. _GenericDeclaration.__init__(self, start, end, name, namespace)
  209. self.alias = alias
  210. def IsDefinition(self):
  211. return True
  212. def IsExportable(self):
  213. return True
  214. def Requires(self, node):
  215. # TODO(nnorwitz): handle namespaces, etc.
  216. name = node.name
  217. for token in self.alias:
  218. if token is not None and name == token.name:
  219. return True
  220. return False
  221. def __str__(self):
  222. suffix = '%s, %s' % (self.name, self.alias)
  223. return self._TypeStringHelper(suffix)
  224. class _NestedType(_GenericDeclaration):
  225. def __init__(self, start, end, name, fields, namespace):
  226. _GenericDeclaration.__init__(self, start, end, name, namespace)
  227. self.fields = fields
  228. def IsDefinition(self):
  229. return True
  230. def IsExportable(self):
  231. return True
  232. def __str__(self):
  233. suffix = '%s, {%s}' % (self.name, self.fields)
  234. return self._TypeStringHelper(suffix)
  235. class Union(_NestedType):
  236. pass
  237. class Enum(_NestedType):
  238. pass
  239. class Class(_GenericDeclaration):
  240. def __init__(self, start, end, name, bases, templated_types, body, namespace):
  241. _GenericDeclaration.__init__(self, start, end, name, namespace)
  242. self.bases = bases
  243. self.body = body
  244. self.templated_types = templated_types
  245. def IsDeclaration(self):
  246. return self.bases is None and self.body is None
  247. def IsDefinition(self):
  248. return not self.IsDeclaration()
  249. def IsExportable(self):
  250. return not self.IsDeclaration()
  251. def Requires(self, node):
  252. # TODO(nnorwitz): handle namespaces, etc.
  253. if self.bases:
  254. for token_list in self.bases:
  255. # TODO(nnorwitz): bases are tokens, do name comparison.
  256. for token in token_list:
  257. if token.name == node.name:
  258. return True
  259. # TODO(nnorwitz): search in body too.
  260. return False
  261. def __str__(self):
  262. name = self.name
  263. if self.templated_types:
  264. name += '<%s>' % self.templated_types
  265. suffix = '%s, %s, %s' % (name, self.bases, self.body)
  266. return self._TypeStringHelper(suffix)
  267. class Struct(Class):
  268. pass
  269. class Function(_GenericDeclaration):
  270. def __init__(self, start, end, name, return_type, parameters,
  271. modifiers, templated_types, body, namespace):
  272. _GenericDeclaration.__init__(self, start, end, name, namespace)
  273. converter = TypeConverter(namespace)
  274. self.return_type = converter.CreateReturnType(return_type)
  275. self.parameters = converter.ToParameters(parameters)
  276. self.modifiers = modifiers
  277. self.body = body
  278. self.templated_types = templated_types
  279. def IsDeclaration(self):
  280. return self.body is None
  281. def IsDefinition(self):
  282. return self.body is not None
  283. def IsExportable(self):
  284. if self.return_type and 'static' in self.return_type.modifiers:
  285. return False
  286. return None not in self.namespace
  287. def Requires(self, node):
  288. if self.parameters:
  289. # TODO(nnorwitz): parameters are tokens, do name comparison.
  290. for p in self.parameters:
  291. if p.name == node.name:
  292. return True
  293. # TODO(nnorwitz): search in body too.
  294. return False
  295. def __str__(self):
  296. # TODO(nnorwitz): add templated_types.
  297. suffix = ('%s %s(%s), 0x%02x, %s' %
  298. (self.return_type, self.name, self.parameters,
  299. self.modifiers, self.body))
  300. return self._TypeStringHelper(suffix)
  301. class Method(Function):
  302. def __init__(self, start, end, name, in_class, return_type, parameters,
  303. modifiers, templated_types, body, namespace):
  304. Function.__init__(self, start, end, name, return_type, parameters,
  305. modifiers, templated_types, body, namespace)
  306. # TODO(nnorwitz): in_class could also be a namespace which can
  307. # mess up finding functions properly.
  308. self.in_class = in_class
  309. class Type(_GenericDeclaration):
  310. """Type used for any variable (eg class, primitive, struct, etc)."""
  311. def __init__(self, start, end, name, templated_types, modifiers,
  312. reference, pointer, array):
  313. """
  314. Args:
  315. name: str name of main type
  316. templated_types: [Class (Type?)] template type info between <>
  317. modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
  318. reference, pointer, array: bools
  319. """
  320. _GenericDeclaration.__init__(self, start, end, name, [])
  321. self.templated_types = templated_types
  322. if not name and modifiers:
  323. self.name = modifiers.pop()
  324. self.modifiers = modifiers
  325. self.reference = reference
  326. self.pointer = pointer
  327. self.array = array
  328. def __str__(self):
  329. prefix = ''
  330. if self.modifiers:
  331. prefix = ' '.join(self.modifiers) + ' '
  332. name = str(self.name)
  333. if self.templated_types:
  334. name += '<%s>' % self.templated_types
  335. suffix = prefix + name
  336. if self.reference:
  337. suffix += '&'
  338. if self.pointer:
  339. suffix += '*'
  340. if self.array:
  341. suffix += '[]'
  342. return self._TypeStringHelper(suffix)
  343. # By definition, Is* are always False. A Type can only exist in
  344. # some sort of variable declaration, parameter, or return value.
  345. def IsDeclaration(self):
  346. return False
  347. def IsDefinition(self):
  348. return False
  349. def IsExportable(self):
  350. return False
  351. class TypeConverter(object):
  352. def __init__(self, namespace_stack):
  353. self.namespace_stack = namespace_stack
  354. def _GetTemplateEnd(self, tokens, start):
  355. count = 1
  356. end = start
  357. while 1:
  358. token = tokens[end]
  359. end += 1
  360. if token.name == '<':
  361. count += 1
  362. elif token.name == '>':
  363. count -= 1
  364. if count == 0:
  365. break
  366. return tokens[start:end-1], end
  367. def ToType(self, tokens):
  368. """Convert [Token,...] to [Class(...), ] useful for base classes.
  369. For example, code like class Foo : public Bar<x, y> { ... };
  370. the "Bar<x, y>" portion gets converted to an AST.
  371. Returns:
  372. [Class(...), ...]
  373. """
  374. result = []
  375. name_tokens = []
  376. reference = pointer = array = False
  377. def AddType(templated_types):
  378. # Partition tokens into name and modifier tokens.
  379. names = []
  380. modifiers = []
  381. for t in name_tokens:
  382. if keywords.IsKeyword(t.name):
  383. modifiers.append(t.name)
  384. else:
  385. names.append(t.name)
  386. name = ''.join(names)
  387. if name_tokens:
  388. result.append(Type(name_tokens[0].start, name_tokens[-1].end,
  389. name, templated_types, modifiers,
  390. reference, pointer, array))
  391. del name_tokens[:]
  392. i = 0
  393. end = len(tokens)
  394. while i < end:
  395. token = tokens[i]
  396. if token.name == '<':
  397. new_tokens, new_end = self._GetTemplateEnd(tokens, i+1)
  398. AddType(self.ToType(new_tokens))
  399. # If there is a comma after the template, we need to consume
  400. # that here otherwise it becomes part of the name.
  401. i = new_end
  402. reference = pointer = array = False
  403. elif token.name == ',':
  404. AddType([])
  405. reference = pointer = array = False
  406. elif token.name == '*':
  407. pointer = True
  408. elif token.name == '&':
  409. reference = True
  410. elif token.name == '[':
  411. pointer = True
  412. elif token.name == ']':
  413. pass
  414. else:
  415. name_tokens.append(token)
  416. i += 1
  417. if name_tokens:
  418. # No '<' in the tokens, just a simple name and no template.
  419. AddType([])
  420. return result
  421. def DeclarationToParts(self, parts, needs_name_removed):
  422. name = None
  423. default = []
  424. if needs_name_removed:
  425. # Handle default (initial) values properly.
  426. for i, t in enumerate(parts):
  427. if t.name == '=':
  428. default = parts[i+1:]
  429. name = parts[i-1].name
  430. if name == ']' and parts[i-2].name == '[':
  431. name = parts[i-3].name
  432. i -= 1
  433. parts = parts[:i-1]
  434. break
  435. else:
  436. if parts[-1].token_type == tokenize.NAME:
  437. name = parts.pop().name
  438. else:
  439. # TODO(nnorwitz): this is a hack that happens for code like
  440. # Register(Foo<T>); where it thinks this is a function call
  441. # but it's actually a declaration.
  442. name = '???'
  443. modifiers = []
  444. type_name = []
  445. other_tokens = []
  446. templated_types = []
  447. i = 0
  448. end = len(parts)
  449. while i < end:
  450. p = parts[i]
  451. if keywords.IsKeyword(p.name):
  452. modifiers.append(p.name)
  453. elif p.name == '<':
  454. templated_tokens, new_end = self._GetTemplateEnd(parts, i+1)
  455. templated_types = self.ToType(templated_tokens)
  456. i = new_end - 1
  457. # Don't add a spurious :: to data members being initialized.
  458. next_index = i + 1
  459. if next_index < end and parts[next_index].name == '::':
  460. i += 1
  461. elif p.name in ('[', ']', '='):
  462. # These are handled elsewhere.
  463. other_tokens.append(p)
  464. elif p.name not in ('*', '&', '>'):
  465. # Ensure that names have a space between them.
  466. if (type_name and type_name[-1].token_type == tokenize.NAME and
  467. p.token_type == tokenize.NAME):
  468. type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0))
  469. type_name.append(p)
  470. else:
  471. other_tokens.append(p)
  472. i += 1
  473. type_name = ''.join([t.name for t in type_name])
  474. return name, type_name, templated_types, modifiers, default, other_tokens
  475. def ToParameters(self, tokens):
  476. if not tokens:
  477. return []
  478. result = []
  479. name = type_name = ''
  480. type_modifiers = []
  481. pointer = reference = array = False
  482. first_token = None
  483. default = []
  484. def AddParameter(end):
  485. if default:
  486. del default[0] # Remove flag.
  487. parts = self.DeclarationToParts(type_modifiers, True)
  488. (name, type_name, templated_types, modifiers,
  489. unused_default, unused_other_tokens) = parts
  490. parameter_type = Type(first_token.start, first_token.end,
  491. type_name, templated_types, modifiers,
  492. reference, pointer, array)
  493. p = Parameter(first_token.start, end, name,
  494. parameter_type, default)
  495. result.append(p)
  496. template_count = 0
  497. for s in tokens:
  498. if not first_token:
  499. first_token = s
  500. if s.name == '<':
  501. template_count += 1
  502. elif s.name == '>':
  503. template_count -= 1
  504. if template_count > 0:
  505. type_modifiers.append(s)
  506. continue
  507. if s.name == ',':
  508. AddParameter(s.start)
  509. name = type_name = ''
  510. type_modifiers = []
  511. pointer = reference = array = False
  512. first_token = None
  513. default = []
  514. elif s.name == '*':
  515. pointer = True
  516. elif s.name == '&':
  517. reference = True
  518. elif s.name == '[':
  519. array = True
  520. elif s.name == ']':
  521. pass # Just don't add to type_modifiers.
  522. elif s.name == '=':
  523. # Got a default value. Add any value (None) as a flag.
  524. default.append(None)
  525. elif default:
  526. default.append(s)
  527. else:
  528. type_modifiers.append(s)
  529. AddParameter(tokens[-1].end)
  530. return result
  531. def CreateReturnType(self, return_type_seq):
  532. if not return_type_seq:
  533. return None
  534. start = return_type_seq[0].start
  535. end = return_type_seq[-1].end
  536. _, name, templated_types, modifiers, default, other_tokens = \
  537. self.DeclarationToParts(return_type_seq, False)
  538. names = [n.name for n in other_tokens]
  539. reference = '&' in names
  540. pointer = '*' in names
  541. array = '[' in names
  542. return Type(start, end, name, templated_types, modifiers,
  543. reference, pointer, array)
  544. def GetTemplateIndices(self, names):
  545. # names is a list of strings.
  546. start = names.index('<')
  547. end = len(names) - 1
  548. while end > 0:
  549. if names[end] == '>':
  550. break
  551. end -= 1
  552. return start, end+1
  553. class AstBuilder(object):
  554. def __init__(self, token_stream, filename, in_class='', visibility=None,
  555. namespace_stack=[]):
  556. self.tokens = token_stream
  557. self.filename = filename
  558. # TODO(nnorwitz): use a better data structure (deque) for the queue.
  559. # Switching directions of the "queue" improved perf by about 25%.
  560. # Using a deque should be even better since we access from both sides.
  561. self.token_queue = []
  562. self.namespace_stack = namespace_stack[:]
  563. self.in_class = in_class
  564. if in_class is None:
  565. self.in_class_name_only = None
  566. else:
  567. self.in_class_name_only = in_class.split('::')[-1]
  568. self.visibility = visibility
  569. self.in_function = False
  570. self.current_token = None
  571. # Keep the state whether we are currently handling a typedef or not.
  572. self._handling_typedef = False
  573. self.converter = TypeConverter(self.namespace_stack)
  574. def HandleError(self, msg, token):
  575. printable_queue = list(reversed(self.token_queue[-20:]))
  576. sys.stderr.write('Got %s in %s @ %s %s\n' %
  577. (msg, self.filename, token, printable_queue))
  578. def Generate(self):
  579. while 1:
  580. token = self._GetNextToken()
  581. if not token:
  582. break
  583. # Get the next token.
  584. self.current_token = token
  585. # Dispatch on the next token type.
  586. if token.token_type == _INTERNAL_TOKEN:
  587. if token.name == _NAMESPACE_POP:
  588. self.namespace_stack.pop()
  589. continue
  590. try:
  591. result = self._GenerateOne(token)
  592. if result is not None:
  593. yield result
  594. except:
  595. self.HandleError('exception', token)
  596. raise
  597. def _CreateVariable(self, pos_token, name, type_name, type_modifiers,
  598. ref_pointer_name_seq, templated_types, value=None):
  599. reference = '&' in ref_pointer_name_seq
  600. pointer = '*' in ref_pointer_name_seq
  601. array = '[' in ref_pointer_name_seq
  602. var_type = Type(pos_token.start, pos_token.end, type_name,
  603. templated_types, type_modifiers,
  604. reference, pointer, array)
  605. return VariableDeclaration(pos_token.start, pos_token.end,
  606. name, var_type, value, self.namespace_stack)
  607. def _GenerateOne(self, token):
  608. if token.token_type == tokenize.NAME:
  609. if (keywords.IsKeyword(token.name) and
  610. not keywords.IsBuiltinType(token.name)):
  611. method = getattr(self, 'handle_' + token.name)
  612. return method()
  613. elif token.name == self.in_class_name_only:
  614. # The token name is the same as the class, must be a ctor if
  615. # there is a paren. Otherwise, it's the return type.
  616. # Peek ahead to get the next token to figure out which.
  617. next = self._GetNextToken()
  618. self._AddBackToken(next)
  619. if next.token_type == tokenize.SYNTAX and next.name == '(':
  620. return self._GetMethod([token], FUNCTION_CTOR, None, True)
  621. # Fall through--handle like any other method.
  622. # Handle data or function declaration/definition.
  623. syntax = tokenize.SYNTAX
  624. temp_tokens, last_token = \
  625. self._GetVarTokensUpTo(syntax, '(', ';', '{', '[')
  626. temp_tokens.insert(0, token)
  627. if last_token.name == '(':
  628. # If there is an assignment before the paren,
  629. # this is an expression, not a method.
  630. expr = bool([e for e in temp_tokens if e.name == '='])
  631. if expr:
  632. new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  633. temp_tokens.append(last_token)
  634. temp_tokens.extend(new_temp)
  635. last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0)
  636. if last_token.name == '[':
  637. # Handle array, this isn't a method, unless it's an operator.
  638. # TODO(nnorwitz): keep the size somewhere.
  639. # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
  640. temp_tokens.append(last_token)
  641. if temp_tokens[-2].name == 'operator':
  642. temp_tokens.append(self._GetNextToken())
  643. else:
  644. temp_tokens2, last_token = \
  645. self._GetVarTokensUpTo(tokenize.SYNTAX, ';')
  646. temp_tokens.extend(temp_tokens2)
  647. if last_token.name == ';':
  648. # Handle data, this isn't a method.
  649. parts = self.converter.DeclarationToParts(temp_tokens, True)
  650. (name, type_name, templated_types, modifiers, default,
  651. unused_other_tokens) = parts
  652. t0 = temp_tokens[0]
  653. names = [t.name for t in temp_tokens]
  654. if templated_types:
  655. start, end = self.converter.GetTemplateIndices(names)
  656. names = names[:start] + names[end:]
  657. default = ''.join([t.name for t in default])
  658. return self._CreateVariable(t0, name, type_name, modifiers,
  659. names, templated_types, default)
  660. if last_token.name == '{':
  661. self._AddBackTokens(temp_tokens[1:])
  662. self._AddBackToken(last_token)
  663. method_name = temp_tokens[0].name
  664. method = getattr(self, 'handle_' + method_name, None)
  665. if not method:
  666. # Must be declaring a variable.
  667. # TODO(nnorwitz): handle the declaration.
  668. return None
  669. return method()
  670. return self._GetMethod(temp_tokens, 0, None, False)
  671. elif token.token_type == tokenize.SYNTAX:
  672. if token.name == '~' and self.in_class:
  673. # Must be a dtor (probably not in method body).
  674. token = self._GetNextToken()
  675. # self.in_class can contain A::Name, but the dtor will only
  676. # be Name. Make sure to compare against the right value.
  677. if (token.token_type == tokenize.NAME and
  678. token.name == self.in_class_name_only):
  679. return self._GetMethod([token], FUNCTION_DTOR, None, True)
  680. # TODO(nnorwitz): handle a lot more syntax.
  681. elif token.token_type == tokenize.PREPROCESSOR:
  682. # TODO(nnorwitz): handle more preprocessor directives.
  683. # token starts with a #, so remove it and strip whitespace.
  684. name = token.name[1:].lstrip()
  685. if name.startswith('include'):
  686. # Remove "include".
  687. name = name[7:].strip()
  688. assert name
  689. # Handle #include \<newline> "header-on-second-line.h".
  690. if name.startswith('\\'):
  691. name = name[1:].strip()
  692. assert name[0] in '<"', token
  693. assert name[-1] in '>"', token
  694. system = name[0] == '<'
  695. filename = name[1:-1]
  696. return Include(token.start, token.end, filename, system)
  697. if name.startswith('define'):
  698. # Remove "define".
  699. name = name[6:].strip()
  700. assert name
  701. value = ''
  702. for i, c in enumerate(name):
  703. if c.isspace():
  704. value = name[i:].lstrip()
  705. name = name[:i]
  706. break
  707. return Define(token.start, token.end, name, value)
  708. if name.startswith('if') and name[2:3].isspace():
  709. condition = name[3:].strip()
  710. if condition.startswith('0') or condition.startswith('(0)'):
  711. self._SkipIf0Blocks()
  712. return None
  713. def _GetTokensUpTo(self, expected_token_type, expected_token):
  714. return self._GetVarTokensUpTo(expected_token_type, expected_token)[0]
  715. def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens):
  716. last_token = self._GetNextToken()
  717. tokens = []
  718. while (last_token.token_type != expected_token_type or
  719. last_token.name not in expected_tokens):
  720. tokens.append(last_token)
  721. last_token = self._GetNextToken()
  722. return tokens, last_token
  723. # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necessary.
  724. def _IgnoreUpTo(self, token_type, token):
  725. unused_tokens = self._GetTokensUpTo(token_type, token)
  726. def _SkipIf0Blocks(self):
  727. count = 1
  728. while 1:
  729. token = self._GetNextToken()
  730. if token.token_type != tokenize.PREPROCESSOR:
  731. continue
  732. name = token.name[1:].lstrip()
  733. if name.startswith('endif'):
  734. count -= 1
  735. if count == 0:
  736. break
  737. elif name.startswith('if'):
  738. count += 1
  739. def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None):
  740. if GetNextToken is None:
  741. GetNextToken = self._GetNextToken
  742. # Assumes the current token is open_paren and we will consume
  743. # and return up to the close_paren.
  744. count = 1
  745. token = GetNextToken()
  746. while 1:
  747. if token.token_type == tokenize.SYNTAX:
  748. if token.name == open_paren:
  749. count += 1
  750. elif token.name == close_paren:
  751. count -= 1
  752. if count == 0:
  753. break
  754. yield token
  755. token = GetNextToken()
  756. yield token
  757. def _GetParameters(self):
  758. return self._GetMatchingChar('(', ')')
  759. def GetScope(self):
  760. return self._GetMatchingChar('{', '}')
  761. def _GetNextToken(self):
  762. if self.token_queue:
  763. return self.token_queue.pop()
  764. return next(self.tokens)
  765. def _AddBackToken(self, token):
  766. if token.whence == tokenize.WHENCE_STREAM:
  767. token.whence = tokenize.WHENCE_QUEUE
  768. self.token_queue.insert(0, token)
  769. else:
  770. assert token.whence == tokenize.WHENCE_QUEUE, token
  771. self.token_queue.append(token)
  772. def _AddBackTokens(self, tokens):
  773. if tokens:
  774. if tokens[-1].whence == tokenize.WHENCE_STREAM:
  775. for token in tokens:
  776. token.whence = tokenize.WHENCE_QUEUE
  777. self.token_queue[:0] = reversed(tokens)
  778. else:
  779. assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens
  780. self.token_queue.extend(reversed(tokens))
  781. def GetName(self, seq=None):
  782. """Returns ([tokens], next_token_info)."""
  783. GetNextToken = self._GetNextToken
  784. if seq is not None:
  785. it = iter(seq)
  786. GetNextToken = lambda: next(it)
  787. next_token = GetNextToken()
  788. tokens = []
  789. last_token_was_name = False
  790. while (next_token.token_type == tokenize.NAME or
  791. (next_token.token_type == tokenize.SYNTAX and
  792. next_token.name in ('::', '<'))):
  793. # Two NAMEs in a row means the identifier should terminate.
  794. # It's probably some sort of variable declaration.
  795. if last_token_was_name and next_token.token_type == tokenize.NAME:
  796. break
  797. last_token_was_name = next_token.token_type == tokenize.NAME
  798. tokens.append(next_token)
  799. # Handle templated names.
  800. if next_token.name == '<':
  801. tokens.extend(self._GetMatchingChar('<', '>', GetNextToken))
  802. last_token_was_name = True
  803. next_token = GetNextToken()
  804. return tokens, next_token
  805. def GetMethod(self, modifiers, templated_types):
  806. return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
  807. assert len(return_type_and_name) >= 1
  808. return self._GetMethod(return_type_and_name, modifiers, templated_types,
  809. False)
  810. def _GetMethod(self, return_type_and_name, modifiers, templated_types,
  811. get_paren):
  812. template_portion = None
  813. if get_paren:
  814. token = self._GetNextToken()
  815. assert token.token_type == tokenize.SYNTAX, token
  816. if token.name == '<':
  817. # Handle templatized dtors.
  818. template_portion = [token]
  819. template_portion.extend(self._GetMatchingChar('<', '>'))
  820. token = self._GetNextToken()
  821. assert token.token_type == tokenize.SYNTAX, token
  822. assert token.name == '(', token
  823. name = return_type_and_name.pop()
  824. # Handle templatized ctors.
  825. if name.name == '>':
  826. index = 1
  827. while return_type_and_name[index].name != '<':
  828. index += 1
  829. template_portion = return_type_and_name[index:] + [name]
  830. del return_type_and_name[index:]
  831. name = return_type_and_name.pop()
  832. elif name.name == ']':
  833. rt = return_type_and_name
  834. assert rt[-1].name == '[', return_type_and_name
  835. assert rt[-2].name == 'operator', return_type_and_name
  836. name_seq = return_type_and_name[-2:]
  837. del return_type_and_name[-2:]
  838. name = tokenize.Token(tokenize.NAME, 'operator[]',
  839. name_seq[0].start, name.end)
  840. # Get the open paren so _GetParameters() below works.
  841. unused_open_paren = self._GetNextToken()
  842. # TODO(nnorwitz): store template_portion.
  843. return_type = return_type_and_name
  844. indices = name
  845. if return_type:
  846. indices = return_type[0]
  847. # Force ctor for templatized ctors.
  848. if name.name == self.in_class and not modifiers:
  849. modifiers |= FUNCTION_CTOR
  850. parameters = list(self._GetParameters())
  851. del parameters[-1] # Remove trailing ')'.
  852. # Handling operator() is especially weird.
  853. if name.name == 'operator' and not parameters:
  854. token = self._GetNextToken()
  855. assert token.name == '(', token
  856. parameters = list(self._GetParameters())
  857. del parameters[-1] # Remove trailing ')'.
  858. token = self._GetNextToken()
  859. while token.token_type == tokenize.NAME:
  860. modifier_token = token
  861. token = self._GetNextToken()
  862. if modifier_token.name == 'const':
  863. modifiers |= FUNCTION_CONST
  864. elif modifier_token.name == '__attribute__':
  865. # TODO(nnorwitz): handle more __attribute__ details.
  866. modifiers |= FUNCTION_ATTRIBUTE
  867. assert token.name == '(', token
  868. # Consume everything between the (parens).
  869. unused_tokens = list(self._GetMatchingChar('(', ')'))
  870. token = self._GetNextToken()
  871. elif modifier_token.name == 'throw':
  872. modifiers |= FUNCTION_THROW
  873. assert token.name == '(', token
  874. # Consume everything between the (parens).
  875. unused_tokens = list(self._GetMatchingChar('(', ')'))
  876. token = self._GetNextToken()
  877. elif modifier_token.name == 'override':
  878. modifiers |= FUNCTION_OVERRIDE
  879. elif modifier_token.name == modifier_token.name.upper():
  880. # HACK(nnorwitz): assume that all upper-case names
  881. # are some macro we aren't expanding.
  882. modifiers |= FUNCTION_UNKNOWN_ANNOTATION
  883. else:
  884. self.HandleError('unexpected token', modifier_token)
  885. assert token.token_type == tokenize.SYNTAX, token
  886. # Handle ctor initializers.
  887. if token.name == ':':
  888. # TODO(nnorwitz): anything else to handle for initializer list?
  889. while token.name != ';' and token.name != '{':
  890. token = self._GetNextToken()
  891. # Handle pointer to functions that are really data but look
  892. # like method declarations.
  893. if token.name == '(':
  894. if parameters[0].name == '*':
  895. # name contains the return type.
  896. name = parameters.pop()
  897. # parameters contains the name of the data.
  898. modifiers = [p.name for p in parameters]
  899. # Already at the ( to open the parameter list.
  900. function_parameters = list(self._GetMatchingChar('(', ')'))
  901. del function_parameters[-1] # Remove trailing ')'.
  902. # TODO(nnorwitz): store the function_parameters.
  903. token = self._GetNextToken()
  904. assert token.token_type == tokenize.SYNTAX, token
  905. assert token.name == ';', token
  906. return self._CreateVariable(indices, name.name, indices.name,
  907. modifiers, '', None)
  908. # At this point, we got something like:
  909. # return_type (type::*name_)(params);
  910. # This is a data member called name_ that is a function pointer.
  911. # With this code: void (sq_type::*field_)(string&);
  912. # We get: name=void return_type=[] parameters=sq_type ... field_
  913. # TODO(nnorwitz): is return_type always empty?
  914. # TODO(nnorwitz): this isn't even close to being correct.
  915. # Just put in something so we don't crash and can move on.
  916. real_name = parameters[-1]
  917. modifiers = [p.name for p in self._GetParameters()]
  918. del modifiers[-1] # Remove trailing ')'.
  919. return self._CreateVariable(indices, real_name.name, indices.name,
  920. modifiers, '', None)
  921. if token.name == '{':
  922. body = list(self.GetScope())
  923. del body[-1] # Remove trailing '}'.
  924. else:
  925. body = None
  926. if token.name == '=':
  927. token = self._GetNextToken()
  928. if token.name == 'default' or token.name == 'delete':
  929. # Ignore explicitly defaulted and deleted special members
  930. # in C++11.
  931. token = self._GetNextToken()
  932. else:
  933. # Handle pure-virtual declarations.
  934. assert token.token_type == tokenize.CONSTANT, token
  935. assert token.name == '0', token
  936. modifiers |= FUNCTION_PURE_VIRTUAL
  937. token = self._GetNextToken()
  938. if token.name == '[':
  939. # TODO(nnorwitz): store tokens and improve parsing.
  940. # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
  941. tokens = list(self._GetMatchingChar('[', ']'))
  942. token = self._GetNextToken()
  943. assert token.name == ';', (token, return_type_and_name, parameters)
  944. # Looks like we got a method, not a function.
  945. if len(return_type) > 2 and return_type[-1].name == '::':
  946. return_type, in_class = \
  947. self._GetReturnTypeAndClassName(return_type)
  948. return Method(indices.start, indices.end, name.name, in_class,
  949. return_type, parameters, modifiers, templated_types,
  950. body, self.namespace_stack)
  951. return Function(indices.start, indices.end, name.name, return_type,
  952. parameters, modifiers, templated_types, body,
  953. self.namespace_stack)
  954. def _GetReturnTypeAndClassName(self, token_seq):
  955. # Splitting the return type from the class name in a method
  956. # can be tricky. For example, Return::Type::Is::Hard::To::Find().
  957. # Where is the return type and where is the class name?
  958. # The heuristic used is to pull the last name as the class name.
  959. # This includes all the templated type info.
  960. # TODO(nnorwitz): if there is only One name like in the
  961. # example above, punt and assume the last bit is the class name.
  962. # Ignore a :: prefix, if exists so we can find the first real name.
  963. i = 0
  964. if token_seq[0].name == '::':
  965. i = 1
  966. # Ignore a :: suffix, if exists.
  967. end = len(token_seq) - 1
  968. if token_seq[end-1].name == '::':
  969. end -= 1
  970. # Make a copy of the sequence so we can append a sentinel
  971. # value. This is required for GetName will has to have some
  972. # terminating condition beyond the last name.
  973. seq_copy = token_seq[i:end]
  974. seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0))
  975. names = []
  976. while i < end:
  977. # Iterate through the sequence parsing out each name.
  978. new_name, next = self.GetName(seq_copy[i:])
  979. assert new_name, 'Got empty new_name, next=%s' % next
  980. # We got a pointer or ref. Add it to the name.
  981. if next and next.token_type == tokenize.SYNTAX:
  982. new_name.append(next)
  983. names.append(new_name)
  984. i += len(new_name)
  985. # Now that we have the names, it's time to undo what we did.
  986. # Remove the sentinel value.
  987. names[-1].pop()
  988. # Flatten the token sequence for the return type.
  989. return_type = [e for seq in names[:-1] for e in seq]
  990. # The class name is the last name.
  991. class_name = names[-1]
  992. return return_type, class_name
  993. def handle_bool(self):
  994. pass
  995. def handle_char(self):
  996. pass
  997. def handle_int(self):
  998. pass
  999. def handle_long(self):
  1000. pass
  1001. def handle_short(self):
  1002. pass
  1003. def handle_double(self):
  1004. pass
  1005. def handle_float(self):
  1006. pass
  1007. def handle_void(self):
  1008. pass
  1009. def handle_wchar_t(self):
  1010. pass
  1011. def handle_unsigned(self):
  1012. pass
  1013. def handle_signed(self):
  1014. pass
  1015. def _GetNestedType(self, ctor):
  1016. name = None
  1017. name_tokens, token = self.GetName()
  1018. if name_tokens:
  1019. name = ''.join([t.name for t in name_tokens])
  1020. # Handle forward declarations.
  1021. if token.token_type == tokenize.SYNTAX and token.name == ';':
  1022. return ctor(token.start, token.end, name, None,
  1023. self.namespace_stack)
  1024. if token.token_type == tokenize.NAME and self._handling_typedef:
  1025. self._AddBackToken(token)
  1026. return ctor(token.start, token.end, name, None,
  1027. self.namespace_stack)
  1028. # Must be the type declaration.
  1029. fields = list(self._GetMatchingChar('{', '}'))
  1030. del fields[-1] # Remove trailing '}'.
  1031. if token.token_type == tokenize.SYNTAX and token.name == '{':
  1032. next = self._GetNextToken()
  1033. new_type = ctor(token.start, token.end, name, fields,
  1034. self.namespace_stack)
  1035. # A name means this is an anonymous type and the name
  1036. # is the variable declaration.
  1037. if next.token_type != tokenize.NAME:
  1038. return new_type
  1039. name = new_type
  1040. token = next
  1041. # Must be variable declaration using the type prefixed with keyword.
  1042. assert token.token_type == tokenize.NAME, token
  1043. return self._CreateVariable(token, token.name, name, [], '', None)
  1044. def handle_struct(self):
  1045. # Special case the handling typedef/aliasing of structs here.
  1046. # It would be a pain to handle in the class code.
  1047. name_tokens, var_token = self.GetName()
  1048. if name_tokens:
  1049. next_token = self._GetNextToken()
  1050. is_syntax = (var_token.token_type == tokenize.SYNTAX and
  1051. var_token.name[0] in '*&')
  1052. is_variable = (var_token.token_type == tokenize.NAME and
  1053. next_token.name == ';')
  1054. variable = var_token
  1055. if is_syntax and not is_variable:
  1056. variable = next_token
  1057. temp = self._GetNextToken()
  1058. if temp.token_type == tokenize.SYNTAX and temp.name == '(':
  1059. # Handle methods declared to return a struct.
  1060. t0 = name_tokens[0]
  1061. struct = tokenize.Token(tokenize.NAME, 'struct',
  1062. t0.start-7, t0.start-2)
  1063. type_and_name = [struct]
  1064. type_and_name.extend(name_tokens)
  1065. type_and_name.extend((var_token, next_token))
  1066. return self._GetMethod(type_and_name, 0, None, False)
  1067. assert temp.name == ';', (temp, name_tokens, var_token)
  1068. if is_syntax or (is_variable and not self._handling_typedef):
  1069. modifiers = ['struct']
  1070. type_name = ''.join([t.name for t in name_tokens])
  1071. position = name_tokens[0]
  1072. return self._CreateVariable(position, variable.name, type_name,
  1073. modifiers, var_token.name, None)
  1074. name_tokens.extend((var_token, next_token))
  1075. self._AddBackTokens(name_tokens)
  1076. else:
  1077. self._AddBackToken(var_token)
  1078. return self._GetClass(Struct, VISIBILITY_PUBLIC, None)
  1079. def handle_union(self):
  1080. return self._GetNestedType(Union)
  1081. def handle_enum(self):
  1082. token = self._GetNextToken()
  1083. if not (token.token_type == tokenize.NAME and token.name == 'class'):
  1084. self._AddBackToken(token)
  1085. return self._GetNestedType(Enum)
  1086. def handle_auto(self):
  1087. # TODO(nnorwitz): warn about using auto? Probably not since it
  1088. # will be reclaimed and useful for C++0x.
  1089. pass
  1090. def handle_register(self):
  1091. pass
  1092. def handle_const(self):
  1093. pass
  1094. def handle_inline(self):
  1095. pass
  1096. def handle_extern(self):
  1097. pass
  1098. def handle_static(self):
  1099. pass
  1100. def handle_virtual(self):
  1101. # What follows must be a method.
  1102. token = token2 = self._GetNextToken()
  1103. if token.name == 'inline':
  1104. # HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
  1105. token2 = self._GetNextToken()
  1106. if token2.token_type == tokenize.SYNTAX and token2.name == '~':
  1107. return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None)
  1108. assert token.token_type == tokenize.NAME or token.name == '::', token
  1109. return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(') # )
  1110. return_type_and_name.insert(0, token)
  1111. if token2 is not token:
  1112. return_type_and_name.insert(1, token2)
  1113. return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL,
  1114. None, False)
  1115. def handle_volatile(self):
  1116. pass
  1117. def handle_mutable(self):
  1118. pass
  1119. def handle_public(self):
  1120. assert self.in_class
  1121. self.visibility = VISIBILITY_PUBLIC
  1122. def handle_protected(self):
  1123. assert self.in_class
  1124. self.visibility = VISIBILITY_PROTECTED
  1125. def handle_private(self):
  1126. assert self.in_class
  1127. self.visibility = VISIBILITY_PRIVATE
  1128. def handle_friend(self):
  1129. tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1130. assert tokens
  1131. t0 = tokens[0]
  1132. return Friend(t0.start, t0.end, tokens, self.namespace_stack)
  1133. def handle_static_cast(self):
  1134. pass
  1135. def handle_const_cast(self):
  1136. pass
  1137. def handle_dynamic_cast(self):
  1138. pass
  1139. def handle_reinterpret_cast(self):
  1140. pass
  1141. def handle_new(self):
  1142. pass
  1143. def handle_delete(self):
  1144. tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1145. assert tokens
  1146. return Delete(tokens[0].start, tokens[0].end, tokens)
  1147. def handle_typedef(self):
  1148. token = self._GetNextToken()
  1149. if (token.token_type == tokenize.NAME and
  1150. keywords.IsKeyword(token.name)):
  1151. # Token must be struct/enum/union/class.
  1152. method = getattr(self, 'handle_' + token.name)
  1153. self._handling_typedef = True
  1154. tokens = [method()]
  1155. self._handling_typedef = False
  1156. else:
  1157. tokens = [token]
  1158. # Get the remainder of the typedef up to the semi-colon.
  1159. tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';'))
  1160. # TODO(nnorwitz): clean all this up.
  1161. assert tokens
  1162. name = tokens.pop()
  1163. indices = name
  1164. if tokens:
  1165. indices = tokens[0]
  1166. if not indices:
  1167. indices = token
  1168. if name.name == ')':
  1169. # HACK(nnorwitz): Handle pointers to functions "properly".
  1170. if (len(tokens) >= 4 and
  1171. tokens[1].name == '(' and tokens[2].name == '*'):
  1172. tokens.append(name)
  1173. name = tokens[3]
  1174. elif name.name == ']':
  1175. # HACK(nnorwitz): Handle arrays properly.
  1176. if len(tokens) >= 2:
  1177. tokens.append(name)
  1178. name = tokens[1]
  1179. new_type = tokens
  1180. if tokens and isinstance(tokens[0], tokenize.Token):
  1181. new_type = self.converter.ToType(tokens)[0]
  1182. return Typedef(indices.start, indices.end, name.name,
  1183. new_type, self.namespace_stack)
  1184. def handle_typeid(self):
  1185. pass # Not needed yet.
  1186. def handle_typename(self):
  1187. pass # Not needed yet.
  1188. def _GetTemplatedTypes(self):
  1189. result = {}
  1190. tokens = list(self._GetMatchingChar('<', '>'))
  1191. len_tokens = len(tokens) - 1 # Ignore trailing '>'.
  1192. i = 0
  1193. while i < len_tokens:
  1194. key = tokens[i].name
  1195. i += 1
  1196. if keywords.IsKeyword(key) or key == ',':
  1197. continue
  1198. type_name = default = None
  1199. if i < len_tokens:
  1200. i += 1
  1201. if tokens[i-1].name == '=':
  1202. assert i < len_tokens, '%s %s' % (i, tokens)
  1203. default, unused_next_token = self.GetName(tokens[i:])
  1204. i += len(default)
  1205. else:
  1206. if tokens[i-1].name != ',':
  1207. # We got something like: Type variable.
  1208. # Re-adjust the key (variable) and type_name (Type).
  1209. key = tokens[i-1].name
  1210. type_name = tokens[i-2]
  1211. result[key] = (type_name, default)
  1212. return result
  1213. def handle_template(self):
  1214. token = self._GetNextToken()
  1215. assert token.token_type == tokenize.SYNTAX, token
  1216. assert token.name == '<', token
  1217. templated_types = self._GetTemplatedTypes()
  1218. # TODO(nnorwitz): for now, just ignore the template params.
  1219. token = self._GetNextToken()
  1220. if token.token_type == tokenize.NAME:
  1221. if token.name == 'class':
  1222. return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types)
  1223. elif token.name == 'struct':
  1224. return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types)
  1225. elif token.name == 'friend':
  1226. return self.handle_friend()
  1227. self._AddBackToken(token)
  1228. tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';')
  1229. tokens.append(last)
  1230. self._AddBackTokens(tokens)
  1231. if last.name == '(':
  1232. return self.GetMethod(FUNCTION_NONE, templated_types)
  1233. # Must be a variable definition.
  1234. return None
  1235. def handle_true(self):
  1236. pass # Nothing to do.
  1237. def handle_false(self):
  1238. pass # Nothing to do.
  1239. def handle_asm(self):
  1240. pass # Not needed yet.
  1241. def handle_class(self):
  1242. return self._GetClass(Class, VISIBILITY_PRIVATE, None)
  1243. def _GetBases(self):
  1244. # Get base classes.
  1245. bases = []
  1246. while 1:
  1247. token = self._GetNextToken()
  1248. assert token.token_type == tokenize.NAME, token
  1249. # TODO(nnorwitz): store kind of inheritance...maybe.
  1250. if token.name not in ('public', 'protected', 'private'):
  1251. # If inheritance type is not specified, it is private.
  1252. # Just put the token back so we can form a name.
  1253. # TODO(nnorwitz): it would be good to warn about this.
  1254. self._AddBackToken(token)
  1255. else:
  1256. # Check for virtual inheritance.
  1257. token = self._GetNextToken()
  1258. if token.name != 'virtual':
  1259. self._AddBackToken(token)
  1260. else:
  1261. # TODO(nnorwitz): store that we got virtual for this base.
  1262. pass
  1263. base, next_token = self.GetName()
  1264. bases_ast = self.converter.ToType(base)
  1265. assert len(bases_ast) == 1, bases_ast
  1266. bases.append(bases_ast[0])
  1267. assert next_token.token_type == tokenize.SYNTAX, next_token
  1268. if next_token.name == '{':
  1269. token = next_token
  1270. break
  1271. # Support multiple inheritance.
  1272. assert next_token.name == ',', next_token
  1273. return bases, token
  1274. def _GetClass(self, class_type, visibility, templated_types):
  1275. class_name = None
  1276. class_token = self._GetNextToken()
  1277. if class_token.token_type != tokenize.NAME:
  1278. assert class_token.token_type == tokenize.SYNTAX, class_token
  1279. token = class_token
  1280. else:
  1281. # Skip any macro (e.g. storage class specifiers) after the
  1282. # 'class' keyword.
  1283. next_token = self._GetNextToken()
  1284. if next_token.token_type == tokenize.NAME:
  1285. self._AddBackToken(next_token)
  1286. else:
  1287. self._AddBackTokens([class_token, next_token])
  1288. name_tokens, token = self.GetName()
  1289. class_name = ''.join([t.name for t in name_tokens])
  1290. bases = None
  1291. if token.token_type == tokenize.SYNTAX:
  1292. if token.name == ';':
  1293. # Forward declaration.
  1294. return class_type(class_token.start, class_token.end,
  1295. class_name, None, templated_types, None,
  1296. self.namespace_stack)
  1297. if token.name in '*&':
  1298. # Inline forward declaration. Could be method or data.
  1299. name_token = self._GetNextToken()
  1300. next_token = self._GetNextToken()
  1301. if next_token.name == ';':
  1302. # Handle data
  1303. modifiers = ['class']
  1304. return self._CreateVariable(class_token, name_token.name,
  1305. class_name,
  1306. modifiers, token.name, None)
  1307. else:
  1308. # Assume this is a method.
  1309. tokens = (class_token, token, name_token, next_token)
  1310. self._AddBackTokens(tokens)
  1311. return self.GetMethod(FUNCTION_NONE, None)
  1312. if token.name == ':':
  1313. bases, token = self._GetBases()
  1314. body = None
  1315. if token.token_type == tokenize.SYNTAX and token.name == '{':
  1316. assert token.token_type == tokenize.SYNTAX, token
  1317. assert token.name == '{', token
  1318. ast = AstBuilder(self.GetScope(), self.filename, class_name,
  1319. visibility, self.namespace_stack)
  1320. body = list(ast.Generate())
  1321. if not self._handling_typedef:
  1322. token = self._GetNextToken()
  1323. if token.token_type != tokenize.NAME:
  1324. assert token.token_type == tokenize.SYNTAX, token
  1325. assert token.name == ';', token
  1326. else:
  1327. new_class = class_type(class_token.start, class_token.end,
  1328. class_name, bases, None,
  1329. body, self.namespace_stack)
  1330. modifiers = []
  1331. return self._CreateVariable(class_token,
  1332. token.name, new_class,
  1333. modifiers, token.name, None)
  1334. else:
  1335. if not self._handling_typedef:
  1336. self.HandleError('non-typedef token', token)
  1337. self._AddBackToken(token)
  1338. return class_type(class_token.start, class_token.end, class_name,
  1339. bases, templated_types, body, self.namespace_stack)
  1340. def handle_namespace(self):
  1341. token = self._GetNextToken()
  1342. # Support anonymous namespaces.
  1343. name = None
  1344. if token.token_type == tokenize.NAME:
  1345. name = token.name
  1346. token = self._GetNextToken()
  1347. self.namespace_stack.append(name)
  1348. assert token.token_type == tokenize.SYNTAX, token
  1349. # Create an internal token that denotes when the namespace is complete.
  1350. internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP,
  1351. None, None)
  1352. internal_token.whence = token.whence
  1353. if token.name == '=':
  1354. # TODO(nnorwitz): handle aliasing namespaces.
  1355. name, next_token = self.GetName()
  1356. assert next_token.name == ';', next_token
  1357. self._AddBackToken(internal_token)
  1358. else:
  1359. assert token.name == '{', token
  1360. tokens = list(self.GetScope())
  1361. # Replace the trailing } with the internal namespace pop token.
  1362. tokens[-1] = internal_token
  1363. # Handle namespace with nothing in it.
  1364. self._AddBackTokens(tokens)
  1365. return None
  1366. def handle_using(self):
  1367. tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1368. assert tokens
  1369. return Using(tokens[0].start, tokens[0].end, tokens)
  1370. def handle_explicit(self):
  1371. assert self.in_class
  1372. # Nothing much to do.
  1373. # TODO(nnorwitz): maybe verify the method name == class name.
  1374. # This must be a ctor.
  1375. return self.GetMethod(FUNCTION_CTOR, None)
  1376. def handle_this(self):
  1377. pass # Nothing to do.
  1378. def handle_operator(self):
  1379. # Pull off the next token(s?) and make that part of the method name.
  1380. pass
  1381. def handle_sizeof(self):
  1382. pass
  1383. def handle_case(self):
  1384. pass
  1385. def handle_switch(self):
  1386. pass
  1387. def handle_default(self):
  1388. token = self._GetNextToken()
  1389. assert token.token_type == tokenize.SYNTAX
  1390. assert token.name == ':'
  1391. def handle_if(self):
  1392. pass
  1393. def handle_else(self):
  1394. pass
  1395. def handle_return(self):
  1396. tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1397. if not tokens:
  1398. return Return(self.current_token.start, self.current_token.end, None)
  1399. return Return(tokens[0].start, tokens[0].end, tokens)
  1400. def handle_goto(self):
  1401. tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1402. assert len(tokens) == 1, str(tokens)
  1403. return Goto(tokens[0].start, tokens[0].end, tokens[0].name)
  1404. def handle_try(self):
  1405. pass # Not needed yet.
  1406. def handle_catch(self):
  1407. pass # Not needed yet.
  1408. def handle_throw(self):
  1409. pass # Not needed yet.
  1410. def handle_while(self):
  1411. pass
  1412. def handle_do(self):
  1413. pass
  1414. def handle_for(self):
  1415. pass
  1416. def handle_break(self):
  1417. self._IgnoreUpTo(tokenize.SYNTAX, ';')
  1418. def handle_continue(self):
  1419. self._IgnoreUpTo(tokenize.SYNTAX, ';')
  1420. def BuilderFromSource(source, filename):
  1421. """Utility method that returns an AstBuilder from source code.
  1422. Args:
  1423. source: 'C++ source code'
  1424. filename: 'file1'
  1425. Returns:
  1426. AstBuilder
  1427. """
  1428. return AstBuilder(tokenize.GetTokens(source), filename)
  1429. def PrintIndentifiers(filename, should_print):
  1430. """Prints all identifiers for a C++ source file.
  1431. Args:
  1432. filename: 'file1'
  1433. should_print: predicate with signature: bool Function(token)
  1434. """
  1435. source = utils.ReadFile(filename, False)
  1436. if source is None:
  1437. sys.stderr.write('Unable to find: %s\n' % filename)
  1438. return
  1439. #print('Processing %s' % actual_filename)
  1440. builder = BuilderFromSource(source, filename)
  1441. try:
  1442. for node in builder.Generate():
  1443. if should_print(node):
  1444. print(node.name)
  1445. except KeyboardInterrupt:
  1446. return
  1447. except:
  1448. pass
  1449. def PrintAllIndentifiers(filenames, should_print):
  1450. """Prints all identifiers for each C++ source file in filenames.
  1451. Args:
  1452. filenames: ['file1', 'file2', ...]
  1453. should_print: predicate with signature: bool Function(token)
  1454. """
  1455. for path in filenames:
  1456. PrintIndentifiers(path, should_print)
  1457. def main(argv):
  1458. for filename in argv[1:]:
  1459. source = utils.ReadFile(filename)
  1460. if source is None:
  1461. continue
  1462. print('Processing %s' % filename)
  1463. builder = BuilderFromSource(source, filename)
  1464. try:
  1465. entire_ast = filter(None, builder.Generate())
  1466. except KeyboardInterrupt:
  1467. return
  1468. except:
  1469. # Already printed a warning, print the traceback and continue.
  1470. traceback.print_exc()
  1471. else:
  1472. if utils.DEBUG:
  1473. for ast in entire_ast:
  1474. print(ast)
  1475. if __name__ == '__main__':
  1476. main(sys.argv)