#!/usr/bin/env python

def uniq(list):
    m={}
    for i in list:
        m[i]=1
    return m.keys()

def getTokenList(rules):
    "returns a list of predefined tokens from a grammar"    
    res=[]
    tok=filter(lambda x:type(x)==type("") and len(x)>0,
               reduce(lambda x,y:x+y,map(lambda x:x[1],rules)))
    
    return uniq(tok)

def getToken(tokenlist,value):
    "returns a list of tokens from some tokenlist and a input value"
    res=[]	    
    while value:
	# get next (and longest) token
        filt=filter(lambda x,value=value:x[0]>=0,
                    map(lambda x,value=value:(value.find(x),x),tokenlist))
	if filt==[]:
            res.append(value)
    	    break
        else:
	    pos=reduce(lambda x,y:(x[0]<y[0]) and x or y,filt)
    	    if pos[0]>0:
                res.append(value[:pos[0]])
            res.append(pos[1])
	    value=value[pos[0]+len(pos[1]):]
    return res


def sub_parse(level,rules,tokenlist,exclude,todo,token,tree):
    """
    TD parser
    assume:
        no left recursive rules
        stack machine
        no empty rules
    """
    res=0    
    for j in range(len(todo)):
        item=todo[j]
        
        if type(item)==type(0):
            # var -> rule
            for i in range(len(rules)):
                rule=rules[i]
                if rule[0]==item:                    
                    stoken=token[:]                                 
                    subtree=[]                    
                    res=sub_parse(level+1,rules,tokenlist,exclude,
                                  rule[1][:],token,subtree)                    
                    if res:
                        # at the end of tokens?
                        if token==[] and j<len(todo)-1:
                            res=0
                        else:
                            tree.append(subtree)
                        break
                    else:
                        # backtrack
                        token[:]=stoken

        elif type(item)==type("") and len(token)>0:
            # token
            s=token.pop()                        
            res=(item==s or (item=="" and s not in tokenlist))
            if s not in tokenlist and (item==s or item==""):
                tree.append(s)
            elif item==s:
                if s not in exclude:
                    tree[:0]=[s]
                    
        if not res:
            break
    return res

def post_process(value):
    "post process the parse tree"
        
    if type(value)==type([]):
        if len(value)==1:
            return post_process(value[0])
        children=map(post_process,value[1:])            
        res=value[:1]
        if len(children)==1 and res[0] in ["+","-"]:
            children[:0]=[0.0]

            
        for i in range(len(children)):
            child=children[i]
            if type(child)==type((0,)):
                return child
            res.append(child)

            # ["+","a",["+","b","c"]] -> ["+","a","b","c"]
            if type(child)==type([]) and len(child)>2 and \
                   len(children)>1 and (res[0]==child[0]) and res[0] in ["+","*"]:
                res[-1:]=child[1:]
 
    elif type(value)==type(""):
        if  value[0] in "0123456789.":
            try:
                res=float(value)
            except:
                res=("%s is not a float literal"%value,)
        else:
            if not is_identifier(value):
                 res=("%s is not a identifier"%value,)
            else:
                res=value
    else:
        assert 0,repr(value)
    return res

def parse(rules,start,exclude,value):
    """Parse the value with the grammar specified by the rules.
    
    Returns a tree (list of strings) or an errorstring  if something went wrong
    """
    res=[]
    
    # get tokens
    tokenlist=getTokenList(rules)
    token=getToken(tokenlist,value)
    token.reverse()
    
    sub_parse(0,rules,tokenlist,exclude,[start],token,res)
    
    errors=[lambda x:x[0]=="(" and "operation missing",
            lambda x:x[0]==")" and "open parenthesis missing",
            lambda x:len(x)>1 and x[1]=="(" and "close parenthesis missing",
            lambda x,tokenlist=tokenlist:x[0] in tokenlist and "value missing",
            lambda x:"unknown error"]
    
    token.reverse()
    if token!=[]:
        list=filter(lambda x,token=token,errors=errors:
                    x!=0,map(lambda x,token=token,errors=errors:
                             x(token),errors[:]))
        res=(list[0]+": "+"".join(token),)
    else:
        res=post_process(res)
    return res


def parse_eq(value):
    RULE_T=1
    RULE_F=2
    RULE_P=3
    RULE_C=4

    RULES=[(RULE_T,[RULE_F,"+",RULE_T]), (RULE_T,[RULE_F,"-",RULE_T]),
           (RULE_T,[RULE_F]), (RULE_F,[RULE_P,"*",RULE_F]),
           (RULE_F,[RULE_P,"/",RULE_F]), (RULE_F,[RULE_P]),
           (RULE_P,["+",RULE_C]), (RULE_P,["-",RULE_C]), (RULE_P,[RULE_C]),
           (RULE_C,["(",RULE_T,")"]), (RULE_C,[""])]

    exclude=['(',')']
    return parse(RULES,RULE_T,exclude,value)

def is_identifier(id):
    if not id:
        return 0
    if not(id[0].isalpha() or id[0]=="_"):
        return 0
        
    for ch in id[1:]:
        if not (ch.isalnum() or ch=="_"):
            return 0
    return 1

if __name__=="__main__":
    for s in ["5*6-7/lkjf", "024utgoirflgkjq","(&/%&$","8**4"]:
        print "zu parsen:",s
        print "geparst:",parse_eq(s)
        print

