/** The grammar for a mechanism description
  * The format is BNF-like, as used by JavaCC
  * 
  */

options {
    STATIC = false;
}

PARSER_BEGIN(MechanismParser)

public class MechanismParser{}

PARSER_END(MechanismParser)

////////////////// whitespace and line separators
SKIP: { /* white space and comments */
      " " | "\n" | "\r" | "\t" | "\f"
    | "//" : IN_SINGLE_LINE_COMMENT
    | "/*" : IN_MULTILINE_COMMENT
}

<IN_SINGLE_LINE_COMMENT> SKIP: { < "\n" | "\r" > : DEFAULT | <~[]> }
<IN_MULTILINE_COMMENT> SKIP: { "*/" : DEFAULT | <~[]> }

TOKEN: { < REACTION_MARKER: "<->" > }

TOKEN: { /* binary operators */
      < IF:  "?" > 
    | < OR:  "|" >
    | < AND: "&" > 
    | < EQ:  "==" >
    | < LE:  "<=" >
    | < GE:  ">=" >
    | < NE:  "!=" >
    | < LT:  "<" >
    | < GT:  ">" >
    | < ADD: "+" >
    | < SUB: "-" >
    | < MUL: "*" >
    | < DIV: "/" >
    | < MOD: "%" >
    | < POW: "^" >
}

TOKEN: { /* reserved words */
      < RATE: "rate(" > // Note: no space before the '('
    | < RATECONSTANT: "k(" >
    | < FINAL: "final" >
    | < OUTPUT_MARKER: "*output" >
    | < SCRIPT_MARKER: "*script" > : IN_SCRIPT
}

<DEFAULT, IN_SCRIPT>
TOKEN: {
      < INT: ["1"-"9"] (["0"-"9"])* > // integers
}

TOKEN: { /* numbers--from the Java grammar */
      < NUMBER:
          (<DIGIT>)+ ( "." (<DIGIT>)* (<EXPONENT>)? )?
        | "." (<DIGIT>)+ (<EXPONENT>)?
        | (<DIGIT>)+ <EXPONENT>
    >
    | < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ >
}

<DEFAULT, IN_SCRIPT>
TOKEN: { /* identifier--Unicode from the Java grammar; allows ' in names */
      < ID: <LETTER> (<LETTER>|<DIGIT>|"'")* >
    | < #LETTER: ["A"-"Z","a"-"z","$","_"] >
    | < #DIGIT: ["0"-"9"] >
}

TOKEN: { // java identifier; starts with @ and allows dots to separate parts of names
    < JAVAID: "@" <LETTER> (<LETTER>|<DIGIT>|".")* >
}

// string definition from the Java grammar.
<DEFAULT, IN_SCRIPT>
TOKEN: {
    < STRING:
        "\"" (
            (~["\"","\\","\n","\r"])
          | "\\" (
               ["n","t","b","r","f","\\","'","\""]
             | ["0"-"7"] ( ["0"-"7"] )?
             | ["0"-"3"] ["0"-"7"] ["0"-"7"]
          )
        )* "\""
    >
}

////////////////// Mechanism grammar

void mechanism(): {}{
    [ reactionBlock() ]
    [ outputBlock() ]
    [ scriptBlock() ]
    <EOF>
}

void reactionBlock() : {}{
    ( [
        LOOKAHEAD (reactionLookahead()) reaction() 
      | expression()
    ] eol() )+
}

void outputBlock() : {}{
    <OUTPUT_MARKER>
    ( [ expression() ] eol() )*
}

void scriptBlock() : {}{
    <SCRIPT_MARKER>
    script () 
}

void eol(): {}{ ";" }

////////////////// Reaction grammar

// used for lookahead to decide if a statement is a reaction; don't
// need to look for the whole thing, just to the <REACTION_MARKER>
void reactionLookahead() : {}{
    side() <REACTION_MARKER>
}

void reaction() : {}{
    side() ( <REACTION_MARKER> side() )+
}

void side() : {}{
    reactant() ( <ADD> reactant() )*
}

void reactant() : {}{
    [ <INT> ] <ID> 
}

////////////////// Expression grammar

void expression(): {}{
      assignExpression()
    | <FINAL> t = <ID> ":" expression()
}

void assignExpression(): {}{
    binaryExpression() [
          "=" assignExpression()
        | ":" assignExpression() 
    ]
}

// This part of the grammar will determine whether a mechanism is well-formed or not,
// but does not parse the precedence of operators correctly. The actual grammar uses
// the precedence-climbing algorithm described by 
// Theodore Norvell
void binaryExpression (): {}{
    unaryExpression() (	op=binary() binaryExpression () )*
}

void binary(): {}{
      <IF>|<OR>|<AND>|<EQ>|<LE>|<GE>|<NE>|<LT>|<GT>|
      <ADD>|<SUB>|<MUL>|<DIV>|<MOD>|<POW>
}

// unary expressions have higher precedence than any binary
// consistent, I believe, with Java but not with standard algebraic use
// so -x^2 parses as (-x)^2, not -(x^2)
void unaryExpression(): {}{
      <ADD> unaryExpression()
      // we allow no-parens functions [ exp x rather than exp(x) ]
    | unary() unaryExpression()
    | primaryExpression()
}

/* predefined unary operators. for any other method, use a <JAVAID> */
void unary(): {}{
      <SUB> | "!" | "exp" | "ln"  
}

void primaryExpression(): {}{
      number()
    | identifier()
    | rate()
    | rateConstant()
    | javaID()
    | "(" expression() ")"
}

void number(): {}{
    <INT> | <NUMBER>
}

void identifier(): {}{
    <ID> 
}

void rate(): {}{
    <RATE> (
          reactionNumber()
        | <ID> 
    ) ")"
}

void rateConstant(): {}{
    <RATECONSTANT> reactionNumber() ")"
}

void reactionNumber(): {}{
    ( <ADD> | <SUB> ) [ <INT> ] 
}

void javaID(): {}{
    <JAVAID> [ "("  [ argumentList() ] ")" ]
}

void argumentList (): {}{
    expression()  ( "," expression() )*
}

////////////////// TOKENS

////////////////// Script processing
// This is a simplified view of a script: just any sequence of characters from the script marker to
// the end of the string.
// The real grammar scans the remaining text for specified strings (reserved words like go
// or variable names) that need to be modified to work with the mechanism interpreter.
// The rest is passed straight through to the BeanShell
// interpreter for parsing; that interpreter will decide if any errors exist in the script.
void script () : {}{
    ( ~[] )*
}