/*
 PrefixSpan: An efficient algorithm for sequential pattern mining

 $Id: prefixspan.cpp,v 1.5 2002/01/18 15:08:30 taku-ku Exp $;

 Copyright (C) 2002 Taku Kudo  All rights reserved.
 This is free software with ABSOLUTELY NO WARRANTY.

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 02111-1307, USA
*/

#include <iostream>
#include <map>
#include <vector>
#include <string>
#include <string.h>
#include <strstream>
#include <unistd.h>
#include <stdlib.h>

using namespace std;

template <class T = unsigned int> class PrefixSpan {
private:
  vector < vector <T> >             transaction;
  vector < pair <T, unsigned int> > pattern;
  unsigned int minsup;
  unsigned int minpat;
  string delimiter;
  bool all;
  bool position;
  bool verbose;
  ostream *os;

  void report (vector <pair <unsigned int, int> > &projected) 
  {
    if (minpat > pattern.size()) return;

    // print position & pattern
    if (position) { 
      *os << "<pattern>" << endl;

      // what: string of pattern
      *os << "<what>";
      for (unsigned int i = 0; i < pattern.size(); i++) 
	*os << (i ? " " : "") << pattern[i].first << delimiter << pattern[i].second;
      *os << "</what>" << endl;
      
      // where: transaction of pattern 
      *os << "<where>";
      for (unsigned int i = 0; i < projected.size(); i++) 
	*os << (i ? " " : "") << projected[i].first;
      *os << "</where>" << endl;

      *os << "</pattern>" << endl;

    } else {

      // print pattern only
      for (unsigned int i = 0; i < pattern.size(); i++) 
	*os << (i ? " " : "") << pattern[i].first << delimiter << pattern[i].second;
      *os << endl;
    }
  }

  void project (vector <pair <unsigned int, int> > &projected)
  {
    if (all) report(projected);
    
    map <T, vector <pair <unsigned int, int> > > counter;
  
    for (unsigned int i = 0; i < projected.size(); i++) {
      int pos = projected[i].second;
      unsigned int id  = projected[i].first;
      unsigned int size = transaction[id].size();
      map <T, int> tmp;
      for (unsigned int j = pos + 1; j < size; j++) {
	T item = transaction[id][j];
	if (tmp.find (item) == tmp.end()) tmp[item] = j ;
      }

      for (map <T, int>::iterator k = tmp.begin(); k != tmp.end(); ++k) 
	counter[k->first].push_back (make_pair <unsigned int, int> (id, k->second));
    }

    if (verbose) cerr << "## " << counter.size() << " items found\n";

    for (map <T, vector <pair <unsigned int, int> > >::iterator l = counter.begin (); 
	 l != counter.end (); ) {
      if (l->second.size() < minsup) {
	map <T, vector <pair <unsigned int, int> > >::iterator tmp = l;
	tmp = l;
	++tmp;
	counter.erase (l);
	l = tmp;
      } else {
	++l;
      }
    }

    if (verbose) cerr << "## pruned to " << counter.size() << " items\n";

    if (! all && counter.size () == 0) {
      report (projected);
      return;
    }

    for (map <T, vector <pair <unsigned int, int> > >::iterator l = counter.begin (); 
	 l != counter.end(); ++l) {
      pattern.push_back (make_pair <T, unsigned int> (l->first, l->second.size()));
      project (l->second);
      pattern.erase (pattern.end());
    }
  }

public:
  PrefixSpan (unsigned int _minsup = 1, 
	      unsigned int _minpat = 1, 
	      string _delimiter = "/",
	      bool _all = false,
	      bool _position = false,
	      bool _verbose = false):
    minsup(_minsup), minpat (_minpat), delimiter (_delimiter), 
    all(_all), position(_position), verbose (_verbose) {};

  ~PrefixSpan () {};

  istream& read (istream &is) 
  {
    string line;
    vector <T> tmp;
    T item;
    while (getline (is, line)) {
       tmp.clear ();
       istrstream istrs ((char *)line.c_str());
       while (istrs >> item) tmp.push_back (item);
       transaction.push_back (tmp);
    }
    return is;
  }

  ostream& run (ostream &_os)
  {
    os = &_os;
    vector <pair <unsigned int, int> > root;
    for (unsigned int i = 0; i < transaction.size(); i++) 
      root.push_back (make_pair (i, -1));
    project (root); 
    return *os;
  }

  void clear ()
  {
    transaction.clear ();
    pattern.clear ();
  }
};

int main (int argc, char **argv)
{
  extern char *optarg;
  bool all = false;
  bool position = false;
  bool verbose = false;
  bool useint = true;
  unsigned int minsup = 1;
  unsigned int minpat = 1;
  string delimiter = "/";

  int opt;
  while ((opt = getopt(argc, argv, "apvsM:m:d:")) != -1) {
    switch(opt) {
    case 'a':
      all = true;
      break;
    case 'p':
      position = true;
      break;
    case 'v':
      verbose = true;
      break;
    case 'm':
      minsup = atoi (optarg);
      break;
    case 'M':
      minpat = atoi (optarg);
      break;
    case 's':
      useint = false;
      break;
    case 'd':
      delimiter = string (optarg);
      break;
    default:
      cout << "Usage: " << argv[0] 
	   << " [-m minsup] [-M minpat] [-a] [-p] [-v] [-s] [-d delimiter] < data .." << endl;
      return -1;
    }
  }

  if (useint) {
    PrefixSpan<unsigned int> prefixspan (minsup, minpat, delimiter, all, position, verbose);
    prefixspan.read (cin);
    prefixspan.run  (cout);
  } else {
    PrefixSpan<string> prefixspan (minsup, minpat, delimiter, all, position, verbose);
    prefixspan.read (cin);
    prefixspan.run  (cout);
  }

  return 0;
}
