/* parser implementation */
#include <string>
#include <strstream>

#include "parser.h"
#include "token.h"
#include "validity_checks.h"
#include "logger.h"

#define WHITESPACE " \t\n\r"

parser::parser(strstream& config_data):_data(config_data){}

/* the main entry point. performs initial processing and then parses
   each rule */
bool parser::parse(list<rule>& rules)
{
     //parsing the configuration data.

     /*strip comments from the file. we do this in a pretty bizzare way
       due to problems encountered with strstreams. what we need to do here 
       is rea all of the data from the strstream, get rid of all comment lines
       and then spill it into a new strstream and replace the old one. */
     string data;
     char buffer[512];
     
     //read the data line by line from the file. 
     //if the line is a comment dump it. 
     while (_data.getline(buffer, 512)){
	  if (buffer[0] == '#' || 
	      (buffer[0] == '/') && (buffer[1] == '/'))
	       continue;
	  data+=buffer;
	  //getline discards the '\n'
	  data+='\n';
     }
     
     //do the actual parsing
     token t;
     string::size_type location = 0;
     //read_token will return a token with the entire rule data contained
     //and a type of RULE
     bool res = true;
     while (read_token(t, data, &location)){
	  if (t.get_type() == RULE){
	       rule r;
	       if (parse_rule(r, t.get_value())){
		    rules.push_back(r);
	       }else{
		    log_error("parser::parse", "failed to build a rule", t);
		    res = false;
	       }
	  }else{ //error in the config file. log it. 
	       log_error("parser::parse","failed to find a rule", t);
	       res = false;
	  }
     }
     return res;
}

bool parser::parse_rule(rule& r, const string& rule)
{
     token t;
     string::size_type pos = 0;
     while(read_token(t, rule , &pos)){
	  //read tokens from the data and act based on their types. 
	  switch (t.get_type()){
	  case(SYSCALL_NAME):{
	       r.set_syscall_identifier(t.get_value());
	       break;
	  }
	  case(RULE_NAME):{
	       r.set_rule_name(t.get_value());
	       break;
	  }
	  case(PARAM_CONDITION):{
	       param_conditional* pcond = r.get_param_conditional();
	       if (!pcond){
		    logger::log("parser::parse_rule() out of memory");
		    return false;
	       }
	       parse_param_condition(pcond, t.get_value());
	       break; 
	  }
	  case(PROCESS_CONDITION):{
	       process_conditional* pcond = r.get_process_conditional();
	       if (!pcond){
		    logger::log("parser::parse_rule() out of memory");
		    return false;
	       }
	       parse_process_condition(pcond, t.get_value());
	       break;
	  }
	  case(ACTION):{
	       r.set_action(t.get_value());
	       break;
	  }
	  case(CONDITIONS):{
	       r.set_condition_matching(t.get_value());
	       break;
	  }
	  default:
	       log_error("parser::parse_rule", "an unknown token", t);
	  }
     }

     //validate the rule, by making sure it has all required fields and 
     //no collisions in the instructions. 
     if (!validate_rule(r)){
	  //TODO: use logging interface once it supports 
	  //user defined data types
	  cerr<<"rule failed validation: "<<r;
	  return false;
     }

     return true;
}

bool parser::read_token(token& t, const string& rule, string::size_type* pos){
     //a complex token is one which has the begin and end ('{' and '}') chars
     //after the name. 
     bool complex = false;

     //find the first non whitespace character.
     string::size_type beg = rule.find_first_not_of(WHITESPACE, *pos);
     if (beg == rule.npos){
	  return false;
     }
     
     //find the end of this word. this is either whitespace, the '=' sign 
     //or the '{' sign. 
     string::size_type end = rule.find_first_of(WHITESPACE":={", beg+1);
     if (end == rule.npos){
	  return false;
     }

     
     //is it a complex token?
     if (rule[end] == '{')
	  complex = true;
     
     //this word is the 'type' of the token. 
     //we take the address of a temp variable here, so care must be taken
     //not to use it outside of this function.
     const string& tok = rule.substr(beg, end - beg);
//     cerr<<"***token: "<<tok;


     //now find the beginning of the 'value' part of the token
     beg = rule.find_first_not_of(WHITESPACE, end+1);
     if (beg == rule.npos){
	  return false;
     }
     
     //again, check if this token is complex. 
     if (rule[beg] == '{'){
	  complex = true;
	  //if it is complex, skip forward a few chars until the 
	  //actual beginning of the value. 
	  beg = rule.find_first_not_of(WHITESPACE,beg+1);
     }
     

     //if it's not a complex token, the value is only one word, so 
     //it ends with whitespcae. 
     if (!complex){
	  end = rule.find_first_of(WHITESPACE, beg+1);
     }else{
	  //if it is a complex token, it might contain other complex
	  //tokens, so we need to find the '}' sign matching ours. 
	  int count = 1;
	  end = rule.find_first_of("{}", beg+1);
	  while (end != rule.npos){
	       if (rule.at(end) == '{'){
		    ++count;
	       }else{
		    --count;
	       }
	       if (count == 0)
		    break;
	       end = rule.find_first_of("{}", end+1);
	  }
	  if (count != 0){
	       log_error("parser::read_token", "mismatched brackets in input", 
			 rule.substr(*pos, string::npos));
	       return false;
	  }
     }

     //ok, copy the value part. 
     const string& tok_value = rule.substr(beg, end - beg);

     //compare the substring with each of the keywords we know
     //this should be happening via a map.
     if (!tok.compare("rule")){
	  t.set_type(RULE);
     }else if (!tok.compare("syscall_name")){
	  t.set_type(SYSCALL_NAME);
     }else if (!tok.compare("rule_name")){
	  t.set_type(RULE_NAME);
     }else if (!tok.compare("param_location")){
	  t.set_type(PARAM_LOCATION);
     }else if (!tok.compare("process_field")){
	  t.set_type(PROCESS_FIELD);
     }else if (!tok.compare("param_type")){
	  t.set_type(PARAM_TYPE);
     }else if (!tok.compare("action")){
	  t.set_type(ACTION);
     }else if (!tok.compare("operand")){
	  t.set_type(OPERAND);
     }else if (!tok.compare("value")){
	  t.set_type(VALUE);
     }else if (!tok.compare("param_condition")){
	  t.set_type(PARAM_CONDITION);
     }else if (!tok.compare("process_condition")){
	  t.set_type(PROCESS_CONDITION);
     }else if (!tok.compare("conditions")){
	  t.set_type(CONDITIONS);
     }else{
	  log_error("parser::read_token", "unknown token type parsed", tok);
	  return false;
     }
     t.set_value(tok_value);

     *pos = end+1;
     return true;
}


bool parser::validate_rule(const rule& r){
     //print the rule to stderr
/*     cerr<<"------ now validating '"+r.get_rule_name()+"' ---------"<<endl;
       cerr<<r<<endl;*/
     if (!r.is_valid())
	  return false;
     return true;
}

bool parser::parse_param_condition(
     param_conditional* pcond, const string& data)
{
     token t;
     string::size_type pos = 0;
     while (read_token(t, data, &pos)){
	  switch(t.get_type()){
	  case(PARAM_LOCATION):{
	       pcond->set_location(t.get_value());
	       break;
	  }
	  case(PARAM_TYPE):{
	       pcond->set_type(t.get_value());
	       break;
	  }
	  case(OPERAND):{
	       pcond->set_operand(t.get_value());
	       break;
	  }
	  case(VALUE):{
	       pcond->set_value(t.get_value());
	       break;
	  }
	  default:
	       log_error("parser::parse_param_condition", "got unknown token", t);
	       return false;
	  }
     }
     return true;
}

bool parser::parse_process_condition(
     process_conditional* pcond, const string& data)
{
     token t;
     string::size_type pos = 0;
     while (read_token(t, data, &pos)){
	  switch(t.get_type()){
	  case(PROCESS_FIELD):{
	       pcond->set_process_field(t.get_value());
	       break;
	  }
	  case(VALUE):{
	       pcond->set_value(t.get_value());
	       break;
	  }
	  default:
	       log_error("parser::parse_process_condition", "got unknown token", t);
	       return false;
	  }
     }
     return true;
}

void parser::log_error(const char* func_name, const char* msg, const token& t)
{
     strstream buf; 
     buf<<"*** parse error at function: "<<func_name<<endl;
     buf<<msg<<endl;
     buf<<"token is: "<<t.get_type()<<endl;
     buf<<"token value: "<<t.get_value()<<endl;
     buf<<"end parse error ***"<<endl;
     logger::log(buf.str());
}

void parser::log_error(const char* func_name, const char* msg, const string& data)
{
     strstream buf; 
     buf<<"*** parse error at function: "<<func_name<<endl;
     buf<<msg<<endl;
     buf<<"failed to build token from: "<<endl;
     buf<<data<<endl;
     buf<<"end parse error ***"<<endl;
     logger::log(buf.str());
}
