Bob Ippolito (@etrepum) on Haskell, Python, Erlang, JavaScript, etc.
«

Using the compiler module to help internationalize your software

»

While working on I18N for the Talking Panda iLingo installers I needed a reliable way to extract strings from Python code. I decided to use the _(u'string') pattern, even though I'm not using gettext. This simple class uses the AST facilities in Python to extract such strings, as long as they are constants in the code.

from compiler import parseFile
from compiler.visitor import ASTVisitor
from compiler.ast import Name, Const
from sets import Set

class StringVisitor(object):
    def __init__(self):
        self.strings = Set()
        self.visitor = ASTVisitor()

    def findStrings(self, fn):
        self.visitor.preorder(parseFile(fn), self)

    def visitCallFunc(self, node):
        fn = node.node
        if not (isinstance(fn, Name) and
                fn.name == '_' and
                len(node.args) == 1 and
                isinstance(node.args[0], Const)):
            for child in node.getChildNodes():
                self.visit(child)
            return
        self.strings.add(node.args[0].value)

if __name__ == '__main__':
    import sys
    sv = StringVisitor()
    for fn in sys.argv[1:]:
        sv.findStrings(fn)
    lst = list(sv.strings)
    lst.sort()
    for s in lst:
        print s.encode('unicode_escape')