/*
Copyright (c) 1998 - 2013
ILK - Tilburg University
CLiPS - University of Antwerp
This file is part of timbl
timbl is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
timbl is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see .
For questions and suggestions, see:
http://ilk.uvt.nl/software.html
or send mail to:
timbl@uvt.nl
*/
#include
#include
#include
#include
#include "timbl/TimblAPI.h"
using namespace Timbl;
using std::ifstream;
using std::ofstream;
using std::ios;
using std::cerr;
using std::cin;
using std::cout;
using std::endl;
using std::istream;
using std::string;
using std::equal;
using std::getline;
#define MAX_EXP 10
#define MAX_PARAMS 256
bool nocase_cmp( char c1, char c2 ){
return toupper(c1) == toupper(c2);
}
bool compare_nocase_n( const string& s1, const string& s2, size_t n ){
if ( equal( s1.begin(), s1.begin()+n, s2.begin(), nocase_cmp ) ){
return true;
}
else {
return false;
}
}
//
// here are the various scripting actions:
//
enum ActionType { UnknownAct, New, Free,
Set, Show, Classify,
Train, Test, Increment, Decrement,
Expand, Remove,
SaveTree, GetTree,
SaveW, GetW,
Quit };
/*
The following scripting commands are implemented:
QUIT
stop all further actions.
NEW name
create an experiment with name 'name' of type 'algo'
algo can be IB1, IB2, TRIBL or IGTREE. Default: IBL
FREE name
delete the experiment with name 'name'
.SET option value
set option of experiment name to value
.SHOW OPTIONS
show all posible options with their default and current
values for experiment 'name'
.SHOW SETTINGS
show all options with current settings of 'name'
.TRAIN file
build an instancebase from file.
.TEST file1 [file2]
classify all lines from file1, write results to file2 or
to file1.out if parameter file2 is not present
.EXPAND file1
increment the database with contents of file1
.REMOVE file1
decrement the database with contents of file1
.CLASSIFY line
classify this line
.ADD line
increment the database with line
.REM line
decrement the database with line
.SAVE file
save the instancebase of experiment name to file.
.GET file
get the instancebase for experiment name from file.
.SAVEW file
save the current weights of experiment name to file.
.GETW file
get new weights for experiment name from file.
*/
TimblAPI *experiments[MAX_EXP];
int exp_cnt = 0;
// the following functions implement a simple parser to parse the
// script file, recognize experiment names en actions to perform
// on those experiments
int fill_params( string *params, const string& line ){
// chop line into a bunch of parameters.
int i;
for ( i=0; i < MAX_PARAMS; i++ )
params[i] = "";
i = 0;
size_t len = line.length();
if ( line[0] == '"' && line[len-1] == '"' ) {
params[0] = string( line, 1, len-2 );
return 1;
}
for ( size_t u_i = 0; u_i < len; u_i++) {
if ( line[u_i] == ',' || line[u_i] == ' ' ){
if ( params[i] != "" ) // Don't accept zero length strings !
++i;
if ( i >= MAX_PARAMS )
break;
}
else
params[i] += line[u_i];
} // u_i
if ( i >= MAX_PARAMS ){
cerr << "too many parameters!" << endl;
return MAX_PARAMS+1;
}
if ( params[i] != "" ){ // last param ended at line end
i++;
}
return i;
}
int lookup( const string& name ){
// search for an experiment with the name 'name' in the list of
// known experiments.
int i;
for ( i=0; i < exp_cnt; i++ ){
if ( name == experiments[i]->ExpName() )
return i;
}
return -1;
}
ActionType parse( const string& Buffer, int &pos, string *pars, int &len ){
// here we parse lines of the script-file:
// first we take the first part and see if it is a NEW or FREE
// command which need special attention.
// otherwise we asume it to be the name of an experiment.
string Buf = TiCC::trim( Buffer );
len = 0;
if ( compare_nocase_n( Buf, "NEW ", 4 ) ){
len = fill_params( pars, Buf.substr(4) );
if ( ( pos = lookup( pars[0] ) ) != -1 ){
cerr << "you can't renew an experiment: " << Buf << endl;
return UnknownAct;
}
return New;
}
else if ( compare_nocase_n( Buf, "FREE ", 5 ) ){
len = fill_params( pars, Buf.substr(5) );
if ( (pos = lookup( pars[0] ) ) == -1 ){
cerr << "you can't free this unknown experiment: " << Buf << endl;
return UnknownAct;
}
return Free;
}
else if ( compare_nocase_n( Buf, "QUIT", 4 ) ){
return Quit;
}
else {
string expname;
string::size_type p = Buf.find( '.' );
if ( p == string::npos ){
cerr << "missing experiment reference!" << endl;
return UnknownAct;
}
else{
expname = Buf.substr(0, p );
pos = lookup( expname ); // do we know it.
if ( pos == -1 )
return UnknownAct; // error
Buf = Buf.substr( p+1 );
// A well known experiment, so now we can see what we
// must do.
if ( compare_nocase_n( Buf, "SET ", 4 ) ){
len = fill_params( pars, Buf.substr(4) );
return Set;
}
else if ( compare_nocase_n( Buf, "SHOW ", 5 ) ){
len = fill_params( pars, Buf.substr(5) );
return Show;
}
else if ( compare_nocase_n( Buf, "GET ", 4 ) ){
len = fill_params( pars, Buf.substr(4) );
return GetTree;
}
else if ( compare_nocase_n( Buf, "GETW ", 5 ) ){
len = fill_params( pars, Buf.substr(5) );
return GetW;
}
else if ( compare_nocase_n( Buf, "SAVE ", 5 ) ){
len = fill_params( pars, Buf.substr(5) );
return SaveTree;
}
else if ( compare_nocase_n( Buf, "SAVEW ", 6 ) ){
len = fill_params( pars, Buf.substr(6) );
return SaveW;
}
else if ( compare_nocase_n( Buf, "TRAIN ", 6 ) ){
len = fill_params( pars, Buf.substr(6) );
return Train;
}
else if ( compare_nocase_n( Buf, "EXPAND ", 7 ) ){
len = fill_params( pars, Buf.substr(7) );
return Expand;
}
else if ( compare_nocase_n( Buf, "REMOVE ", 7 ) ){
len = fill_params( pars, Buf.substr(7) );
return Remove;
}
else if ( compare_nocase_n( Buf, "TEST ", 5 ) ){
len = fill_params( pars, Buf.substr(5) );
return Test;
}
else if ( compare_nocase_n( Buf, "CLASSIFY ", 9 ) ){
len = fill_params( pars, Buf.substr(9) );
return Classify;
}
else if ( compare_nocase_n( Buf, "ADD ", 4 ) ){
len = fill_params( pars, Buf.substr(4) );
return Increment;
}
else if ( compare_nocase_n( Buf, "REM ", 4 ) ){
len = fill_params( pars, Buf.substr(4) );
return Decrement;
}
else
return UnknownAct;
}
}
}
void one_command( istream &in_file, int &line_count ) {
// the actual "engine"
// get a line from in_file, parse it and take appropiate action
// Most of the time by directly calling a MBL Class function.
// of course some sanity checking is done here and there
static string *params = NULL;
int pos = -1, len;
if ( params == 0 ){
params = new string[MAX_PARAMS+1];
}
string Buffer;
getline( in_file, Buffer );
line_count++;
if ( Buffer == "" || Buffer[0] == '#' ){
return;
}
cerr << "TSE script, executing line: " << line_count<< endl
<< "=== " << Buffer << endl;
ActionType action = parse( Buffer, pos, params, len );
if ( len >= MAX_PARAMS ){
cerr << "Too many parameters, skipping....." << endl;
return;
}
switch ( action ){
case Quit:
exit(1);
break;
case New: {
if ( exp_cnt == MAX_EXP ){
cerr << "To many different experiments in one run" << endl;
exit(1);
}
if ( len == 0 ){
cerr << " Wrong number of parameters for New" << endl;
exit(1);
}
string cmnd;
if ( len == 1 ){
cerr << "1 parameters " << params[0] << endl;
cmnd = "-a IB1";
}
else {
for ( int i=1; i < len; ++i )
cmnd += params[i] + " ";
}
experiments[exp_cnt++] = new TimblAPI( cmnd, params[0] );
cerr << "Created a new experiment: "
<< experiments[exp_cnt-1]->ExpName() << endl;
break;
}
case Free:
delete experiments[pos];
exp_cnt--;
for ( ; pos < exp_cnt; pos++ ){
experiments[pos] = experiments[pos+1];
}
experiments[exp_cnt] = 0;
break;
case GetTree:
if ( len == 0 )
cerr << "missing filename to retrieve InstanceBase" << endl;
else
experiments[pos]->GetInstanceBase( params[0] );
break;
case SaveTree:
if ( len == 0 ){
params[0] = experiments[pos]->ExpName() + ".tree";
}
else
experiments[pos]->WriteInstanceBase(params[0]);
break;
case GetW:
if ( len == 0 ) {
params[0] = experiments[pos]->ExpName() + ".weights";
}
else
experiments[pos]->GetWeights(params[0]);
break;
case SaveW:
if ( len == 0 ){
params[0] = experiments[pos]->ExpName() + ".weights";
}
else
experiments[pos]->SaveWeights(params[0]);
break;
case Show:
if ( len != 1 )
cerr << "missing information about WHAT to show" << endl;
else {
if ( compare_nocase( params[0], "OPTIONS" ) )
experiments[pos]->ShowOptions( cerr );
else if ( compare_nocase( params[0], "SETTING" ) )
experiments[pos]->ShowSettings( cerr );
else
cerr << "don't know how to show '" << params[0] << "'" << endl;
}
break;
case Train:
if ( len == 1 )
experiments[pos]->Learn(params[0]);
else
cerr << "missing filename for Train" << endl;
break;
case Expand:
if ( len == 1 )
experiments[pos]->Expand(params[0]);
else
cerr << "missing filename for Expand" << endl;
break;
case Remove:
if ( len == 1 )
experiments[pos]->Remove(params[0]);
else
cerr << "missing filename for Remove" << endl;
break;
case Test: {
switch ( len ){
case 0:
cerr << "missing filename for Test" << endl;
return;
break;
case 1:
params[1] = params[0] + ".out";
break;
case 2:
break;
default:
cerr << "too many parameters for Test, (ignored)" << endl;
}
experiments[pos]->Test( params[0], params[1] );
break;
}
case Classify:
if ( len == 1 ){
const TargetValue *tv = experiments[pos]->Classify(params[0]);
cout << "classify: " << params[0] << " ==> " << tv << endl;
}
else
cerr << "missing instancestring for Add" << endl;
break;
case Increment:
if ( len == 1 )
experiments[pos]->Increment(params[0]);
else
cerr << "missing instancestring for Add" << endl;
break;
case Decrement:
if ( len == 1 )
experiments[pos]->Decrement(params[0]);
else
cerr << "missing instancestring for Remove" << endl;
break;
case Set:
if ( len != 1 ){
for ( int j=1; j < len; j++ )
params[0] += params[j];
}
if ( !experiments[pos]->SetOptions( params[0] ) )
cerr << "problem with Set " << params[0] << endl;
break;
case UnknownAct:
if ( pos < 0 )
cerr << "[" << line_count << "]" << Buffer
<< " ==> Unknown experiment, skipped\n" << endl;
else
cerr << "[" << line_count << "] " << Buffer
<< " ==> Unknown action, skipped\n" << endl;
break;
}
}
int main(int argc, char *argv[] ){
// the following trick makes it possible to parse lines from cin
// as well from a user supplied file.
istream *script_file;
ifstream test_file;
if ( argc > 1 ){
if ( (test_file.open( argv[1], ios::in ), !test_file.good() ) ){
cerr << argv[0] << " - couldn't open scriptfile " << argv[1] << endl;
exit(1);
}
cout << "reading script from: " << argv[1] << endl;
script_file = &test_file;
}
else
script_file = &cin;
int line = 0;
while ( !(*script_file).eof() )
one_command( *script_file, line );
exit(0);
}