/*
 YamCha -- Yet Another Multipurpose CHunk Annotator

 $Id: feature_index.cc,v 1.4 2001/06/18 06:03:37 taku-ku Exp $;

 Copyright (C) 2001  Taku Kudoh <taku-ku.aist-nara.ac.jp>
 All rights reserved.

 This library is free software; you can redistribute it and/or
 modify it under the terms of the GNU Library General Public
 License as published by the Free Software Foundation; either
 version 2 of the License, or (at your option) any later verjsion.

 This library is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 Library General Public License for more details.

 You should have received a copy of the GNU Library General Public
 License along with this library; if not, write to the
 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA.
*/
#include "feature_index.h"
#include "common.h"

namespace YamCha {

unsigned int split_string(const string &src, const string& key, vector <string>& result)
{
  result.clear();
  int len =  src.size();
  int i = 0; int si = 0;
  while(i < len) {
    while (i < len && key.find(src[i]) != string::npos) { si++; i++; }; // skip beginning spaces
    while (i < len && key.find(src[i]) == string::npos) i++; // skip contents
    result.push_back(src.substr(si,i-si));
    si = i;
  }

  return result.size();
}

int FeatureIndexList::parse_start_end(const string &src, int &start, int &end, int end_default)
{
  char *ptr = (char *)src.c_str();
  char *nptr;

  start = strtol(ptr,&nptr,10);
  if (ptr == nptr) return 0; // error

  unsigned int pos = src.find("..");
  if (pos == string::npos) { // not found ..
    end = start;
    return 1;
  } else if (pos == (src.size()-2)) { // foud but .. style
    end = end_default;
    if (end < start) return 0;
    return 1;
  } else {
    ptr += (pos + 2);
    end = strtol(ptr,&nptr,10);
    if (ptr == nptr) return 0; // error, cannot find digit
    if (end < start) return 0; // error, start > end  }
  }

  return 1;
}

// STYLE for model 1:2 3:1 4:1 ... 
int FeatureIndexList::setFeature(const string &feature, const string &tag)
{
  try {
    vector <string> tmp;
    clear();

    split_string(feature, "\t ", tmp);
    for (unsigned int i = 0; i < tmp.size(); i++) {
      FeatureIndex tmp_feature_index;
      vector <string> tmp2;
      if (split_string(tmp[i],":",tmp2) != 2) throw tmp[i];
      tmp_feature_index.row = atoi(tmp2[0].c_str());
      tmp_feature_index.col = atoi(tmp2[1].c_str());
      features.push_back(tmp_feature_index);
    }

    split_string(tag, "\t ", tmp);
    for (unsigned int i = 0; i < tmp.size(); i++) {
      int row = atoi(tmp[i].c_str());
      tags.push_back(row);
    }

    return 1;
  }

  catch (string &e) {
    e = "FeatureIndexList::setFeature() format error [" + e + "]";
    throw e;
    return 0;
  }
}

// STYLE for User given parameter F:1:2..
int FeatureIndexList::setFeature(const string& str, int max_col)
{
  vector <string> list, rclist, item;
  vector <int>    row, col, tag;
  int start,end;
  clear();

  try {
    if (!split_string(str,"\t ",list)) throw str;
    
    for (unsigned int i = 0; i < list.size(); i++) {
      int size = split_string(list[i],":",rclist);

      if (size == 3 && (rclist[0] == "F" || rclist[0] == "f")) {
	if (! split_string(rclist[1],",",item)) throw list[i];
	for (unsigned int j = 0; j < item.size(); j++) {
	  if (! parse_start_end(item[j],start,end, max_col - 1)) throw list[i];
	  for (int j = start; j <= end; j++) row.push_back(j);
	}

	if (! split_string(rclist[2],",",item)) throw list[i];
	for (unsigned int j = 0; j < item.size(); j++) {
	  if (! parse_start_end(item[j],start,end,max_col - 1) || start < 0) throw list[i];
	  for (int j = start; j <= end; j++) col.push_back(j);
	}

      } else if (size == 2 && (rclist[0] == "T" || rclist[0] == "t")) {
	if (! split_string(rclist[1],",",item)) throw list[i];
	for (unsigned int j = 0; j < item.size(); j++) {
	  if (! parse_start_end(item[j],start,end,0) || start * end <= 0 ) throw list[i];
	  for (int j = start; j <= end; j++) tag.push_back(j);
	}
      } else {
	throw list[i];
      }
    }

    sort(row.begin(),row.end()); 
    vector <int>::iterator rlast = unique(row.begin(),row.end());
    sort(col.begin(),col.end()); 
    vector <int>::iterator clast = unique(col.begin(),col.end());
    
    for (vector<int>::iterator i = row.begin(); i < rlast ; i++) {
      for (vector<int>::iterator j = col.begin(); j < clast ; j++) {
	FeatureIndex tmp_feature_index;
	tmp_feature_index.row = *i;
	tmp_feature_index.col = *j;
	features.push_back(tmp_feature_index);
      }
    }

    sort(tag.begin(),tag.end()); 
    vector <int>::iterator tlast = unique(tag.begin(),tag.end());
    for (vector <int>::iterator i = tag.begin(); i < tlast; i++) {
      tags.push_back(*i);
    }
  }

  catch (string &e) {
    e = "FeatureIndexList::setFeature() format error [" + e + "]";
    throw e;
    return 0;
  }

  return 1;
}

}
