# from py2.6 distr # -*- coding: utf-8 -*- """ ast ~~~ The `ast` module helps Python applications to process trees of the Python abstract syntax grammar. The abstract syntax itself might change with each Python release; this module helps to find out programmatically what the current grammar looks like and allows modifications of it. An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as a flag to the `compile()` builtin function or by using the `parse()` function from this module. The result will be a tree of objects whose classes all inherit from `ast.AST`. A modified abstract syntax tree can be compiled into a Python code object using the built-in `compile()` function. Additionally various helper functions are provided that make working with the trees simpler. The main intention of the helper functions and this module in general is to provide an easy to use interface for libraries that work tightly with the python syntax (template engines for example). :copyright: Copyright 2008 by Armin Ronacher. :license: Python License. """ from _ast import * BOOLOP_SYMBOLS = { And: 'and', Or: 'or' } BINOP_SYMBOLS = { Add: '+', Sub: '-', Mult: '*', Div: '/', FloorDiv: '//', Mod: '%', LShift: '<<', RShift: '>>', BitOr: '|', BitAnd: '&', BitXor: '^' } CMPOP_SYMBOLS = { Eq: '==', Gt: '>', GtE: '>=', In: 'in', Is: 'is', IsNot: 'is not', Lt: '<', LtE: '<=', NotEq: '!=', NotIn: 'not in' } UNARYOP_SYMBOLS = { Invert: '~', Not: 'not', UAdd: '+', USub: '-' } ALL_SYMBOLS = {} ALL_SYMBOLS.update(BOOLOP_SYMBOLS) ALL_SYMBOLS.update(BINOP_SYMBOLS) ALL_SYMBOLS.update(CMPOP_SYMBOLS) ALL_SYMBOLS.update(UNARYOP_SYMBOLS) def parse(expr, filename='', mode='exec'): """Parse an expression into an AST node.""" return compile(expr, filename, mode, PyCF_ONLY_AST) def literal_eval(node_or_string): """Safe evaluate a literal. The string or node provided may include any of the following python structures: strings, numbers, tuples, lists, dicts, booleans or None. """ _safe_names = {'None': None, 'True': True, 'False': False} if isinstance(node_or_string, basestring): node_or_string = parse(node_or_string, mode='eval') if isinstance(node_or_string, Expression): node_or_string = node_or_string.body def _convert(node): if isinstance(node, Str): return node.s elif isinstance(node, Num): return node.n elif isinstance(node, Tuple): return tuple(map(_convert, node.elts)) elif isinstance(node, List): return list(map(_convert, node.elts)) elif isinstance(node, Dict): return dict((_convert(k), _convert(v)) for k, v in zip(node.keys, node.values)) elif isinstance(node, Name): if node.id in _safe_names: return _safe_names[node.id] raise ValueError('malformed string') return _convert(node_or_string) def dump(node, annotate_fields=True, include_attributes=False): """A very verbose representation of the node passed. This is useful for debugging purposes. Per default the returned string will show the names and the values for fields. This makes the code impossible to evaluate, if evaluation is wanted `annotate_fields` must be set to False. Attributes such as line numbers and column offsets are dumped by default. If this is wanted, `include_attributes` can be set to `True`. """ def _format(node): if isinstance(node, AST): fields = [(a, _format(b)) for a, b in iter_fields(node)] rv = '%s(%s' % (node.__class__.__name__, ', '.join( ('%s=%s' % field for field in fields) if annotate_fields else (b for a, b in fields) )) if include_attributes and node._attributes: rv += fields and ', ' or ' ' rv += ', '.join('%s=%s' % (a, _format(getattr(node, a))) for a in node._attributes) return rv + ')' elif isinstance(node, list): return '[%s]' % ', '.join(_format(x) for x in node) return repr(node) if not isinstance(node, AST): raise TypeError('expected AST, got %r' % node.__class__.__name__) return _format(node) def copy_location(new_node, old_node): """Copy the source location hint (`lineno` and `col_offset`) from the old to the new node if possible and return the new one. """ for attr in 'lineno', 'col_offset': if attr in old_node._attributes and attr in new_node._attributes \ and hasattr(old_node, attr): setattr(new_node, attr, getattr(old_node, attr)) return new_node def fix_missing_locations(node): """Some nodes require a line number and the column offset. Without that information the compiler will abort the compilation. Because it can be a dull task to add appropriate line numbers and column offsets when adding new nodes this function can help. It copies the line number and column offset of the parent node to the child nodes without this information. Unlike `copy_location` this works recursive and won't touch nodes that already have a location information. """ def _fix(node, lineno, col_offset): if 'lineno' in node._attributes: if not hasattr(node, 'lineno'): node.lineno = lineno else: lineno = node.lineno if 'col_offset' in node._attributes: if not hasattr(node, 'col_offset'): node.col_offset = col_offset else: col_offset = node.col_offset for child in iter_child_nodes(node): _fix(child, lineno, col_offset) _fix(node, 1, 0) return node def increment_lineno(node, n=1): """Increment the line numbers of all nodes by `n` if they have line number attributes. This is useful to "move code" to a different location in a file. """ if 'lineno' in node._attributes: node.lineno = getattr(node, 'lineno', 0) + n for child in walk(node): if 'lineno' in child._attributes: child.lineno = getattr(child, 'lineno', 0) + n return node def iter_fields(node): """Iterate over all fields of a node, only yielding existing fields.""" if not node: return if not node._fields: return for field in node._fields: try: yield field, getattr(node, field) except AttributeError: pass def get_fields(node): """Like `iter_fiels` but returns a dict.""" return dict(iter_fields(node)) def iter_child_nodes(node): """Iterate over all child nodes or a node.""" for name, field in iter_fields(node): if isinstance(field, AST): yield field elif isinstance(field, list): for item in field: if isinstance(item, AST): yield item def get_child_nodes(node): """Like `iter_child_nodes` but returns a list.""" return list(iter_child_nodes(node)) def get_docstring(node, trim=True): """Return the docstring for the given node or `None` if no docstring can be found. If the node provided does not accept docstrings a `TypeError` will be raised. """ if not isinstance(node, (FunctionDef, ClassDef, Module)): raise TypeError("%r can't have docstrings" % node.__class__.__name__) if node.body and isinstance(node.body[0], Expr) and \ isinstance(node.body[0].value, Str): doc = node.body[0].value.s if trim: doc = trim_docstring(doc) return doc def trim_docstring(docstring): """Trim a docstring. This should probably go into the inspect module.""" lines = docstring.expandtabs().splitlines() # Find minimum indentation of any non-blank lines after first line. from sys import maxint margin = maxint for line in lines[1:]: content = len(line.lstrip()) if content: indent = len(line) - content margin = min(margin, indent) # Remove indentation. if lines: lines[0] = lines[0].lstrip() if margin < maxint: for i in range(1, len(lines)): lines[i] = lines[i][margin:] # Remove any trailing or leading blank lines. while lines and not lines[-1]: lines.pop() while lines and not lines[0]: lines.pop(0) return '\n'.join(lines) def get_symbol(operator): """Return the symbol of the given operator node or node type.""" if isinstance(operator, AST): operator = type(operator) try: return ALL_SYMBOLS[operator] except KeyError: raise LookupError('no known symbol for %r' % operator) def walk(node): """Iterate over all nodes. This is useful if you only want to modify nodes in place and don't care about the context or the order the nodes are returned. """ from collections import deque todo = deque([node]) while todo: node = todo.popleft() todo.extend(iter_child_nodes(node)) yield node class NodeVisitor(object): """Walks the abstract syntax tree and call visitor functions for every node found. The visitor functions may return values which will be forwarded by the `visit` method. Per default the visitor functions for the nodes are ``'visit_'`` + class name of the node. So a `TryFinally` node visit function would be `visit_TryFinally`. This behavior can be changed by overriding the `get_visitor` function. If no visitor function exists for a node (return value `None`) the `generic_visit` visitor is used instead. Don't use the `NodeVisitor` if you want to apply changes to nodes during traversing. For this a special visitor exists (`NodeTransformer`) that allows modifications. """ def get_visitor(self, node): """Return the visitor function for this node or `None` if no visitor exists for this node. In that case the generic visit function is used instead. """ method = 'visit_' + node.__class__.__name__ return getattr(self, method, None) def visit(self, node): """Visit a node.""" f = self.get_visitor(node) if f is not None: return f(node) return self.generic_visit(node) def generic_visit(self, node): """Called if no explicit visitor function exists for a node.""" for field, value in iter_fields(node): if isinstance(value, list): for item in value: if isinstance(item, AST): self.visit(item) elif isinstance(value, AST): self.visit(value) class NodeTransformer(NodeVisitor): """Walks the abstract syntax tree and allows modifications of nodes. The `NodeTransformer` will walk the AST and use the return value of the visitor functions to replace or remove the old node. If the return value of the visitor function is `None` the node will be removed from the previous location otherwise it's replaced with the return value. The return value may be the original node in which case no replacement takes place. Here an example transformer that rewrites all `foo` to `data['foo']`:: class RewriteName(NodeTransformer): def visit_Name(self, node): return copy_location(Subscript( value=Name(id='data', ctx=Load()), slice=Index(value=Str(s=node.id)), ctx=node.ctx ), node) Keep in mind that if the node you're operating on has child nodes you must either transform the child nodes yourself or call the generic visit function for the node first. Nodes that were part of a collection of statements (that applies to all statement nodes) may also return a list of nodes rather than just a single node. Usually you use the transformer like this:: node = YourTransformer().visit(node) """ def generic_visit(self, node): for field, old_value in iter_fields(node): old_value = getattr(node, field, None) if isinstance(old_value, list): new_values = [] for value in old_value: if isinstance(value, AST): value = self.visit(value) if value is None: continue elif not isinstance(value, AST): new_values.extend(value) continue new_values.append(value) old_value[:] = new_values elif isinstance(old_value, AST): new_node = self.visit(old_value) if new_node is None: delattr(node, field) else: setattr(node, field, new_node) return node