#!/usr/bin/perl -w

# $Id: mkmodel.in,v 1.10 2002/10/19 13:21:02 taku-ku Exp $;
my $inModelFileName  = shift(@ARGV);
my $outModelFileName = shift(@ARGV);

die "Usage: $0 text_model_file|gziped_txt_model_file binary_model_file\n"
      if (! $inModelFileName || ! $outModelFileName);

my $tooldir = $0;
$tooldir =~ s#[^/]+$##;

# model specfic parameters
my $version    = "0.3";
my $gzip       = "/bin/gzip";
my $mkdarts    = "$tooldir" . "mkdarts";
my $modelSize = 0;
my $svSize    = 0;
my $dimensionSize  = 0;
my $nonzeroDimensionSize = 0;
my $tableSize = 0;
my $classSize = 0;
my $alphaSize = 0;
my $kernel_type = "";
my @modelList = ();
my @alphaList = ();
my %fi2si = ();
my @fi2pos = ();
my %example2id = ();
my %classHash = ();
my $isonevsrest = 0;

# specfic parameters
my $paramStr = "";

# kernel specfic parameters;
my $param_degree = 0;
my $param_g = 0;
my $param_s = 0;
my $param_r = 0;
my $posLabel = "";
my $negLabel = "";
my $th = 0.0;

####################################################################################
#
# check parameter 
#
if ($inModelFileName =~/\.txtmodel\.gz$/) {
    open(F, "$gzip -dc $inModelFileName |") ||  die "$! $inModelFileName";
} else {
    open(F, $inModelFileName) ||  die "$! $inModelFileName";
}

while(<F>) {
    chomp;
    next if (/^\s*\#/);
    last if (/^$/);
    if (/^([^\s:]+):\s*(.+)$/) {
	$paramStr .= lc($1) . pack("x") . $2 . pack("x");
    }
}

####################################################################################
#
# check dictionary (using suffix array)
#
open(S, "| $mkdarts - $outModelFileName.darts") || die;
while(<F>) {
    chomp;
    last if (/^$/);
    my ($id, $str) = split;
    print S "$id $str\n";
}
close(S);
my $daSize = (stat("$outModelFileName.darts"))[7];

print "Reading TEXT model: $inModelFileName\n";
####################################################################################
#
# Making Model
#
while(<F>) {
    chomp;
    print STDERR "."  if ($i++ % 1000 == 0) ;
    if (/^(\S+) \# kernel type/) {
	if ($1 eq "1") {
	    $kernel_type = 'polynomial';
	} elsif ($1 eq "3") {
	    $kernel_type = 'polynomial' if ($1 eq "3");
	} else {
	    die "FATAL: cannot supnport Kernel Func, type $1\n";
	}
    } elsif (/^(\d+) \# kernel parameter -d/) {
	$param_degree = $1;
    } elsif (/^(\S+) \# kernel parameter -g/) {
	$param_g = $1;
    } elsif (/^(\S+) \# kernel parameter -s/) {
	$param_s = $1;
    } elsif (/^(\S+) \# kernel parameter -r/) {
	$param_r = $1;
    } elsif (/^(\S+) \# threshold b/) {
	$th = $1;
    } elsif (/MULTI_CLASS (\S+) (\S+)/) {
	$posLabel = $1; $negLabel = $2;
    } elsif (/^$/) {
	$modelList[$modelSize] = [$posLabel,$negLabel,$th];
	$modelSize++;
    } elsif (/^SVM-light/ || /^TinySVM/) {
	# do nothing
    } elsif (! /\#/) {
	my ($alpha,$ex) = split(/\s+/,$_,2);
	if (! defined $example2id{$ex}) {
	    $example2id{$ex} = $svSize;
	    my (@tmp) = split(/\s+/,$ex);
	    die "empty example, ok?\n" if (! @tmp);
	    for (@tmp) {
		my ($i,$v) = split(/:/,$_);
		die "FATAL: FLOAT value is found [$i:$v]\n" if ($v != 1);
		$dimensionSize = $dimensionSize>$i?$dimensionSize:$i;
		push(@{$fi2si{$i}}, $svSize);
	    }
	    $svSize++;
	    $nonzeroDimensionSize = $nonzeroDimensionSize>($#tmp+1)?$nonzeroDimensionSize:($#tmp+1);
	} 
	my ($svId) = $example2id{$ex};
	if (! defined $alphaList[$modelSize]->{$svId}) {
	    $alphaSize++;
	}
	$alphaList[$modelSize]->{$svId} += $alpha;
    }
}
close(F);
print "\n";

# free memory
%example2id = (); 

# if binary model, dummy filelds are given;
if (! $modelSize) {
    $modelList[$modelSize] = ["+1","-1",$th];
    $modelSize++;
}

# make class list
my $classId = 0;
for (@modelList) {
    my ($p,$n,$b) = @{$_};

    $classHash{$p} = ($classId++) if (! defined $classHash{$p});
    
    # ignore if dummy class for one vs rest
    if ($n eq "___OTHER___") {
	$isonevsrest = 1;
	next;
    }
    
    $classHash{$n} = ($classId++) if (! defined $classHash{$n});
}

# give largest id to dummy class
$classHash{"___OTHER___"} = $classId++ if ($isonevsrest);

$classSize = scalar (keys %classHash);
die "FATAL: classSize == 1, OK?\n" if ($classSize <= 1);
die "FATAL: Use pair wise when binary classificaion\n" if ($isonevsrest && $classSize == 3);

# examin FIPOS
$old = 0;
$| = 1;
select (STDERR); $| = 1; select(STDOUT);
for $i (0 .. $dimensionSize) {
#    print STDERR "Warning: Feature ID [ID=$i] is empty. ignored\n"  if (! defined $fi2si{$i});
    my $num = 1;
    $num = scalar (@{$fi2si{$i}}) + 1 if (defined $fi2si{$i});
    push(@fi2pos, $old);
    $old += $num;
}
$tableSize = $old;

####################################################################################
#
# Writing Model file
#
# All seems to be OK, write binary model
print "Writing BINARY model: $outModelFileName\n";
open (S,"> $outModelFileName") || die "$! $outModelFileName\n";

# print model header
print S pack("a32a32IdddIIIIIIII", # "a32a32Idddiiiiiiii",
	     $version,$kernel_type,$param_degree,$param_g,$param_s,$param_r, # kernel param
	     $modelSize,  # model size, if binary, modelSize equal to 1.
	     $classSize,  # class size, if binary, classSize equal to 2. 
	     $alphaSize,  # number of alpha, total size of examples which have weight, alpha
	     $svSize,     # number of examples, counted uniqly, duplicated examples are neglected
	     $tableSize,  # size of fi2si lists, including dummy fields
	     $dimensionSize+1, # dimension of model, 
	     $nonzeroDimensionSize, # max non dimension, which is used cache of dot products.
	     $daSize,               # size of double array
	     );

# print specfic header
print S pack("I",  length($paramStr)); # size of parameter
print S pack("a*", $paramStr); # contents of parameter

# print class list
for (sort keys %classHash) {
    print S pack("a32",$_);
}

# print model definition
for (@modelList) {
    my ($p,$n,$b) = @{$_};
    print S pack("IId", $classHash{$p}, $classHash{$n}, $b);
}

# print alpha
for $i (0 .. ($modelSize-1)) {
    for $j (0..($svSize-1)) {
	print "." if ($j % 10000 == 0);
	if (defined $alphaList[$i]->{$j}) {
	    print S pack("id", $j, $alphaList[$i]->{$j});
	} 
    }
    print S pack("id", -1, 0.0); # dummy
}

# print fi2pos
print S pack("i*", @fi2pos);

# print body of fi2si
for $i (0 .. $dimensionSize) {
    print "." if ($i % 1000 == 0);
    print S pack("i*", @{$fi2si{$i}}) if (defined $fi2si{$i});
    print S pack("i", -1); # dummy
}

# close fh
close(S);

system "cat $outModelFileName.darts >> $outModelFileName"; # append
unlink "$outModelFileName.darts";

# ok finish
print " Done!\n";
