«
Using the compiler module to help internationalize your software
»
While working on I18N for the Talking Panda iLingo installers I needed a reliable way to extract strings from Python code. I decided to use the _(u'string') pattern, even though I'm not using gettext. This simple class uses the AST facilities in Python to extract such strings, as long as they are constants in the code.
from compiler import parseFile from compiler.visitor import ASTVisitor from compiler.ast import Name, Const from sets import Set class StringVisitor(object): def __init__(self): self.strings = Set() self.visitor = ASTVisitor() def findStrings(self, fn): self.visitor.preorder(parseFile(fn), self) def visitCallFunc(self, node): fn = node.node if not (isinstance(fn, Name) and fn.name == '_' and len(node.args) == 1 and isinstance(node.args[0], Const)): for child in node.getChildNodes(): self.visit(child) return self.strings.add(node.args[0].value) if __name__ == '__main__': import sys sv = StringVisitor() for fn in sys.argv[1:]: sv.findStrings(fn) lst = list(sv.strings) lst.sort() for s in lst: print s.encode('unicode_escape')