/*
 YamCha -- Yet Another Multipurpose CHunk Annotator

 $Id: parser.cc,v 1.11 2001/06/27 13:24:39 taku-ku Exp $;

 Copyright (C) 2001  Taku Kudoh <taku-ku.aist-nara.ac.jp>
 All rights reserved.

 This library is free software; you can redistribute it and/or
 modify it under the terms of the GNU Library General Public
 License as published by the Free Software Foundation; either
 version 2 of the License, or (at your option) any later verjsion.

 This library is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 Library General Public License for more details.

 You should have received a copy of the GNU Library General Public
 License along with this library; if not, write to the
 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA.
*/
#include "parser.h"
#include "common.h"

// $Id: parser.cc,v 1.11 2001/06/27 13:24:39 taku-ku Exp $;

namespace YamCha {

Parser::Parser() :
  svm_model (0),
  isReverse (0),
  isParsed  (0),
  isWriteHeader (0),
  isAppendFunc (0),
  columnSize (0),
  modelFeaturesList (0),
  _parse (0),
  selectorFunc (0) {};

Parser::Parser(const Param &p) :
  svm_model (0),
  isReverse (0),
  isParsed  (0),
  isWriteHeader (0),
  isAppendFunc (0),
  columnSize (0),
  modelFeaturesList (0),
  _parse (0),
  selectorFunc (0) { set(p); };

int Parser::set(const Param &p)
{
  try {
    if (svm_model) delete svm_model;
    svm_model         = new svmModel;

    // copy param
    param             = p;

    ///////////////////////////////////////////////////
    //  Parser Mode
    if (param.modelFileName != "") {
      // check verbose level
      _parse = &Parser::_parse_parser_mode_normal;
      _write = &Parser::_write_parser_mode;
      if (p.withCandidates) _parse = &Parser::_parse_parser_mode_detail;

      // read model
      svm_model->readModel(p.modelFileName);
      
      // parse features
      featureIndexList.clear();
      if (! featureIndexList.setFeature(svm_model->getParam("features"),
					svm_model->getParam("tag_features")))
	throw string("cannot find features definition in " + param.modelFileName);

      // get column size
      char *tmp;
      if ((tmp = (char *)svm_model->getParam("column_size").c_str()) == 0)
	throw string("cannot obtain column_size parameter from " + param.modelFileName);
      columnSize = atoi(tmp);
      
      // parsing direction
      isReverse = 0;
      if (svm_model->getParam("parsing_direction") == "backward") isReverse = 1;

    ///////////////////////////////////////////////////
    //  Selection Mode
    } else if (param.featureParameter != "") {
      isReverse = p.isReverse;    
      _write = &Parser::_write_selection_mode;
      _parse = &Parser::_parse_selection_mode;

    ///////////////////////////////////////////////////
    //  Other ??
    } else {
      throw string("Unknown Action Mode");
    }

    // allocate memories for features
    if (! modelFeaturesList) {
      modelFeaturesList = new char * [MAX_FEATURE_LEN];
      for (int i = 0; i < MAX_FEATURE_LEN; i++) modelFeaturesList[i] = 0;
    }
  }

  catch (string &e) {
    e = "Parser::Parser() " + e;
    throw e;
    return 0;
  }

  return 1;
}

Parser::~Parser()
{
  if (modelFeaturesList) {
    for (int i = 0; i < MAX_FEATURE_LEN; i++) 
      delete [] modelFeaturesList[i];
    delete [] modelFeaturesList;
  }

  if (svm_model) delete svm_model;
}

string& Parser::getFeature(int i, int j)
{
  if (i < 0) {
    for (int k = -bosList.size()-1; k >= i; k--) {
      sprintf(bos_eos_buf, "%+d__BOS__", k);
      bosList.push_back(string(bos_eos_buf));
    }
    return const_cast<string &>(bosList[-i-1]);
  } else if (i >= (signed int)contextList.size()) {
    for (int k = 1 + eosList.size(); k <= (i - (int)contextList.size() + 1); k++) {
      sprintf(bos_eos_buf, "%+d__EOS__", k); 
      eosList.push_back(string(bos_eos_buf));
    }
    return const_cast<string &>(eosList[i-contextList.size()]);
  } else {
    return const_cast<string &>(contextList[i][j]);
  }
}

int Parser::setSelector (int (*func)(Parser *, int), int append)
{
  selectorFunc = func;
  isAppendFunc = append;
  return 1;
}

#define ALLOC_MODEL_FEATURES_LIST(i) \
  if (! modelFeaturesList[(i)]) modelFeaturesList[(i)] = new char [MAX_STR_LEN]

int Parser::addFeature (char *s)
{
  ALLOC_MODEL_FEATURES_LIST(modelFeaturesListSize);
  strncpy (modelFeaturesList[modelFeaturesListSize], s, MAX_STR_LEN);
  modelFeaturesListSize++;
  return modelFeaturesListSize;
}

int Parser::select (int i)
{
  modelFeaturesListSize = 0;
  if (selectorFunc) {
    (*selectorFunc)(this, i);
    if (! isAppendFunc) return modelFeaturesListSize;
  }

  unsigned int l = modelFeaturesListSize;
  for (unsigned int j = 0; j < featureIndexList.features.size(); j++) {
    ALLOC_MODEL_FEATURES_LIST(l);
#ifdef HAVE_SNPRINTF
    snprintf(modelFeaturesList[l], MAX_STR_LEN,
#else
    sprintf(modelFeaturesList[l],
#endif
	    "F:%+d:%d:%s",
	    featureIndexList.features[j].row,
	    featureIndexList.features[j].col, 
	    getFeature(i + featureIndexList.features[j].row,
		       featureIndexList.features[j].col).c_str());
    l++;
  }

  for (unsigned int j = 0; j < featureIndexList.tags.size(); j++) {
    int k = i + featureIndexList.tags[j];
    if (k >= 0) {
      ALLOC_MODEL_FEATURES_LIST(l);
#ifdef HAVE_SNPRINTF
      snprintf(modelFeaturesList[l], MAX_STR_LEN,
#else
      sprintf(modelFeaturesList[l],
#endif
	      "T:%+d:%s", featureIndexList.tags[j], getTag(k).c_str());
      l++;
    }
  }

  return l;
}

void Parser::reverse()
{
   std::reverse(contextList.begin(),     contextList.end());
   std::reverse(tagList.begin(),         tagList.end());
   std::reverse(distScoreList.begin(),   distScoreList.end());
   std::reverse(marginScoreList.begin(), marginScoreList.end());
}

int Parser::add(vector <string>& s)
{
  contextList.push_back(s);
  return contextList.size();
}

int Parser::add(string &s) 
{
  vector <string> tmp;
  int i = (int)split_string(s,"\t ",tmp);
  for (;i < (int)(columnSize-1);i++) tmp.push_back(""); // fill dummy field
  return add(tmp);
}

ModelResult* Parser::estimateCost(int i)
{
  int size = select (i);
  return svm_model->classify(modelFeaturesList, size);
}

istream& Parser::read(istream &is)
{
  clear();

  string line;
  while (1) {
    if (! getline(is,line)) {
//     is.setstate(ios::eofbit);
      is.clear(ios::eofbit|ios::badbit);
      return is;
    }

    if (line == "" || line == "EOS" || isspace(line[0])) break;
    add(line);
  }

  isParsed = 0;
  return is;
}

// write
ostream& Parser::_write_parser_mode(ostream &os)
{
  if (isReverse) reverse();

  for (unsigned int i = 0; i < size(); i++) {
    if (isParsed && param.withCandidates) 
      for (unsigned int j = 0; j < columnSize; j++) os << contextList[i][j] << "\t";
    else 
      for (unsigned int j = 0; j < contextList[i].size(); j++) os << contextList[i][j] << "\t";

    if (! isParsed) continue;

    os << tagList[i]; // print tag
    
    if (param.verbose == Param::TAG_WITH_SCORE) {
      os << "\t" << distScoreList[i] << "/"  << marginScoreList[i];
    } else if (param.verbose == Param::TAG_WITH_ALL_SCORE) {
      if (! param.withCandidates || (param.withCandidates && contextList.size() == columnSize)) { 
	for (unsigned int j = 0; j < allCandidatesScoreList[i].size(); j++) 
	  os << "\t" << svm_model->classList[j]             << "/" << allCandidatesScoreList[i][j];
      } else {
	for (unsigned int j = 0; j < allCandidatesScoreList[i].size(); j++)
	  os << "\t" << contextList[i][columnSize + j] << "/" << allCandidatesScoreList[i][j];
      }
    }
    os << endl;
  }

//  os << "EOS" << endl;
  os << param.eosString << endl;
   
  return os;
}

ostream& Parser::_write_selection_mode(ostream &os)
{
  if (!isWriteHeader) {
    os << "Version: " << VERSION << endl;
    os << "Package:"  << PACKAGE << endl;
    os << "Parsing_Direction: " << (isReverse ? "backward" : "forward") << endl;
    os << "Feature_Parameter: " << param.featureParameter << endl;

    os << "Tag_Features:";
    for (unsigned int i = 0; i < featureIndexList.tags.size(); i++) 
      os << " " << featureIndexList.tags[i];
    os << endl;

    os << "Features:";
    for (unsigned int i = 0; i < featureIndexList.features.size(); i++) 
      os << " " << featureIndexList.features[i].row << ":" << featureIndexList.features[i].col;
    os << endl << endl;

    isWriteHeader = 1;
  }

  for (unsigned int i = 0; i < featuresList.size(); i++)
    os << featuresList[i] << endl;
  cout << endl;
        
  return os;
}

int Parser::clear()
{
  distScoreList.clear();
  marginScoreList.clear();
  allCandidatesScoreList.clear();
  featuresList.clear();
  tagList.clear();
  contextList.clear();

  return 1;
}

// parse
int Parser::parse()
{
  try {
    if (size() == 0 || ! _parse) return 0;
    if (isReverse) reverse();
    return (isParsed = (this->*_parse)());  /* call virtual function */
  } 
    
  catch (string &e) {
    cerr << e << endl;
    return 0;
  }
}

int Parser::_parse_parser_mode_normal()
{
  for (unsigned int i = 0; i < this->size(); i++) {
    unsigned int max_score = 0;
    unsigned int id    = 0;
    double marginScore = 0;

    if (param.verbose == Param::TAG_WITH_ALL_SCORE) {
      vector <double> tmp;
      allCandidatesScoreList.push_back(tmp);
    }

    ModelResult *result = this->estimateCost(i);
    for (unsigned int n = 0; n < svm_model->classSize; n++) {
      if (max_score < result[n].voteScore) { // get max item only
	max_score = result[n].voteScore; 
	id = n; // save
      }
      marginScore += result[n].distScore;
      if (param.verbose == Param::TAG_WITH_ALL_SCORE) {
	allCandidatesScoreList[i].push_back(result[n].distScore);
      }
    }

    tagList.push_back(result[id].className);
    marginScoreList.push_back(marginScore);
    distScoreList.push_back(result[id].distScore);
  }

  return tagList.size();
}

int Parser::_parse_parser_mode_detail()
{
  for (unsigned int i = 0; i < this->size(); i++) {
    unsigned int max_score = 0;
    unsigned int id = 0;
    int n;
    double marginScore = 0;

    if (param.verbose == Param::TAG_WITH_ALL_SCORE) {
      vector <double> tmp;
      allCandidatesScoreList.push_back(tmp);
    }

    if (contextList[i].size() - columnSize == 0) {
      ModelResult *result = this->estimateCost(i);
      for (unsigned int n = 0; n < svm_model->classSize; n++) {
	if (max_score < result[n].voteScore) { // get max item only
	  max_score = result[n].voteScore; 
	  id = n; // save
	}
	marginScore += result[n].distScore;

	if (param.verbose == Param::TAG_WITH_ALL_SCORE) 
	  allCandidatesScoreList[i].push_back(result[n].distScore);
      }

      tagList.push_back(result[id].className);
      distScoreList.push_back(result[id].distScore);
      marginScoreList.push_back(marginScore);

    } else if (contextList[i].size() - columnSize == 1) { //  already tag is annotaded
      if ((n = svm_model->getClassId(contextList[i][columnSize])) < 0) {
	string tmp = 
	  "Parser::parse: tag [" + contextList[i][columnSize] +  "] is not found in model";
	throw tmp;
      }

      tagList.push_back(contextList[i][columnSize]);
      distScoreList.push_back(0.0);
      marginScoreList.push_back(0.0);

      if (param.verbose == Param::TAG_WITH_ALL_SCORE) 
	allCandidatesScoreList[i].push_back(0.0);

    } else if (contextList[i].size() - columnSize > 1) {
      ModelResult *result = this->estimateCost(i);
      for (unsigned int j = columnSize; j < contextList[i].size(); j++) {
	if ((n = svm_model->getClassId(contextList[i][j])) < 0) {
	  string tmp = "Parser::parse: tag [" + contextList[i][j] + "] is not found in model";
	  throw tmp;
	}
	
	if (max_score < result[n].voteScore) {
	  max_score = result[n].voteScore; 
	  id = n; // save
	}

	marginScore += result[n].distScore;

	if (param.verbose == Param::TAG_WITH_ALL_SCORE) 
	  allCandidatesScoreList[i].push_back(result[n].distScore);
      }

      tagList.push_back(result[id].className);
      distScoreList.push_back(result[id].distScore);
      marginScoreList.push_back(marginScore); 
    } 
  }

  return tagList.size();
}

int Parser::_parse_selection_mode()
{
  if (! columnSize && size()) {
     columnSize = contextList[0].size() - 1;
     try {
	featureIndexList.setFeature(param.featureParameter, columnSize);
     } catch (string &e) {
       cerr << e << endl;
       exit (EXIT_FAILURE);
     }
  }

  for (unsigned int i = 0; i < size(); i++) {
    tagList.push_back(contextList[i][columnSize]);
    string buf = getTag(i);
    int size = select (i);
    for (int j = 0; j < size; j++) {
      buf += " ";
      buf += string(modelFeaturesList[j]);
    }
    featuresList.push_back(buf);
  }

  return featuresList.size();
}
}
