/*
$Id: MBLClass.h 15565 2013-01-07 14:27:05Z sloot $
$URL: https://ilk.uvt.nl/svn/trunk/sources/Timbl6/include/timbl/MBLClass.h $
Copyright (c) 1998 - 2013
ILK - Tilburg University
CLiPS - University of Antwerp
This file is part of timbl
timbl is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
timbl is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see .
For questions and suggestions, see:
http://ilk.uvt.nl/software.html
or send mail to:
timbl@uvt.nl
*/
#ifndef TIMBL_MBLCLASS_H
#define TIMBL_MBLCLASS_H
typedef struct _xmlNode xmlNode;
namespace Timbl {
using namespace Common;
class InstanceBase_base;
class TesterClass;
class Chopper;
class MBLClass {
public:
bool SetOption( const std::string& );
xmlNode *settingsToXml() const;
bool ShowWeights( std::ostream& ) const;
bool Verbosity( VerbosityFlags v ) const {
return verbosity & v; };
void SetVerbosityFlag( VerbosityFlags v ) { verbosity |= v; };
void ResetVerbosityFlag( VerbosityFlags v ) { verbosity &= ~v; };
bool MBLInit() const { return MBL_init; };
void MBLInit( bool b ) { MBL_init = b; };
bool ExpInvalid( bool b = true ) const {
if ( err_count > 0 ){
if ( b )
InvalidMessage();
return true;
}
else
return false;
};
WeightType CurrentWeighting() const { return Weighting; };
InputFormatType InputFormat() const { return input_format; };
bool connectToSocket( std::ostream * );
std::ostream *sock_os;
int getOcc() const { return doOcc; };
protected:
enum PhaseValue { TrainWords, LearnWords, TestWords, TrainLearnWords };
friend std::ostream& operator<< ( std::ostream&, const PhaseValue& );
enum IB_Stat { Invalid, Normal, Pruned };
bool writeArrays( std::ostream& );
bool readArrays( std::istream& );
bool writeMatrices( std::ostream& ) const;
bool readMatrices( std::istream& );
bool writeWeights( std::ostream& ) const;
bool readWeights( std::istream&, WeightType );
bool writeNamesFile( std::ostream& ) const;
bool ShowOptions( std::ostream& ) const;
bool ShowSettings( std::ostream& ) const;
void writePermutation( std::ostream& ) const;
void LearningInfo( std::ostream& );
MBLClass( const std::string& = "" );
virtual ~MBLClass();
void InitClass( const size_t );
MBLClass& operator=( const MBLClass& );
void Initialize( size_t = 0 );
bool PutInstanceBase( std::ostream& ) const;
VerbosityFlags get_verbosity() const { return verbosity; };
void set_verbosity( VerbosityFlags v ) { verbosity = v; };
const Instance *chopped_to_instance( PhaseValue );
bool Chop( const std::string& );
bool HideInstance( const Instance& );
bool UnHideInstance( const Instance& );
std::string formatInstance( const std::vector&,
std::vector&,
size_t, size_t ) const;
bool setInputFormat( const InputFormatType );
size_t countFeatures( const std::string&,
const InputFormatType ) const;
InputFormatType getInputFormat( const std::string& ) const;
size_t examineData( const std::string& );
void time_stamp( const char *, int =-1 ) const;
void TestInstance( const Instance& ,
InstanceBase_base * = NULL,
size_t = 0 );
std::string get_org_input( ) const;
const ValueDistribution *ExactMatch( const Instance& ) const;
void fillNeighborSet( neighborSet& ) const;
void addToNeighborSet( neighborSet& ns, size_t n ) const;
double getBestDistance() const;
WValueDistribution *getBestDistribution( unsigned int =0 );
IB_Stat IBStatus() const;
bool get_ranges( const std::string& );
bool get_IB_Info( std::istream&, bool&, int&, bool&, std::string& );
size_t NumOfFeatures() const { return num_of_features; };
size_t targetPos() const { return target_pos; };
size_t NumNumFeatures() const { return num_of_num_features; };
size_t EffectiveFeatures() const { return effective_feats; };
void IBInfo( std::ostream& os ) const;
void MatrixInfo( std::ostream& ) const;
int RandomSeed() const { return random_seed; };
void Info( const std::string& ) const;
void Warning( const std::string& ) const;
void Error( const std::string& ) const;
void FatalError( const std::string& ) const;
size_t MaxFeats() const { return MaxFeatures; };
int Progress() const { return progress; };
void Progress( int p ){ progress = p; };
Target *Targets;
std::vector Features;
std::vector PermFeatures;
std::vector permutation;
InstanceBase_base *InstanceBase;
std::ostream *mylog;
std::ostream *myerr;
size_t TRIBL_offset() const { return tribl_offset; };
unsigned int igOffset() const { return igThreshold; };
unsigned int IB2_offset() const { return ib2_offset; };
void IB2_offset( unsigned int n ) { ib2_offset = n; };
bool Do_Sloppy_LOO() const { return do_sloppy_loo; };
bool doSamples() const {
return do_sample_weighting && !do_ignore_samples; };
bool Do_Exact() const { return do_exact_match; };
void Do_Exact( bool b ) { do_exact_match = b; };
void InitWeights();
void diverseWeights();
bool KeepDistributions() const { return keep_distributions; };
void KeepDistributions( bool f ){ keep_distributions = f; };
bool IsClone() const { return is_copy; };
void default_order();
void set_order(void);
void calculatePermutation( const std::vector& );
void calculate_fv_entropy( bool );
OptionTableClass Options;
PhaseValue runningPhase;
WeightType Weighting;
metricClass *GlobalMetric;
OrdeningType TreeOrder;
size_t num_of_neighbors;
bool dynamic_neighbors;
DecayType decay_flag;
Hash::StringHash *TargetStrings;
Hash::StringHash *FeatureStrings;
std::string exp_name;
Instance CurrInst;
BestArray bestArray;
size_t MaxBests;
neighborSet nSet;
decayStruct *decay;
int beamSize;
normType normalisation;
double norm_factor;
bool is_copy;
bool is_synced;
unsigned int ib2_offset;
int random_seed;
double decay_alfa;
double decay_beta;
bool MBL_init;
bool tableFilled;
MetricType globalMetricOption;
bool do_diversify;
bool initProbabilityArrays( bool );
void calculatePrestored();
void initDecay();
void initTesters();
Chopper *ChopInput;
int F_length;
private:
size_t MaxFeatures;
std::vector UserOptions;
InputFormatType input_format;
VerbosityFlags verbosity;
mutable int err_count;
size_t num_of_features;
size_t num_of_num_features;
size_t target_pos;
size_t effective_feats;
int clip_factor;
int Bin_Size;
int progress;
size_t tribl_offset;
unsigned igThreshold;
int mvd_threshold;
bool do_sloppy_loo;
bool do_exact_match;
bool do_silly_testing;
bool hashed_trees;
bool need_all_weights;
bool do_sample_weighting;
bool do_ignore_samples;
bool no_samples_test;
bool keep_distributions;
double DBEntropy;
TesterClass *tester;
int doOcc;
bool chopExamples() const {
return do_sample_weighting &&
!( runningPhase == TestWords && no_samples_test ); }
bool chopOcc() const {
switch( runningPhase ) {
case TrainWords:
case LearnWords:
case TrainLearnWords:
return doOcc == 1 || doOcc == 3;
case TestWords:
return doOcc > 1;
default:
return false;
}
};
void fill_table();
void InvalidMessage() const ;
double calculate_db_entropy( Target * );
void do_numeric_statistics( );
void test_instance( const Instance& ,
InstanceBase_base * = NULL,
size_t = 0 );
void test_instance_sim( const Instance& ,
InstanceBase_base * = NULL,
size_t = 0 );
void test_instance_ex( const Instance&,
InstanceBase_base * = NULL,
size_t = 0 );
bool allocate_arrays();
double RelativeWeight( unsigned int ) const;
void writePermSpecial(std::ostream&) const;
bool read_the_vals( std::istream& );
MBLClass( const MBLClass& );
};
inline std::ostream& operator<< ( std::ostream& os,
const MBLClass::PhaseValue& ph ){
switch( ph ){
case MBLClass::TrainWords:
os << "TrainWords";
break;
case MBLClass::LearnWords:
os << "LearnWords";
break;
case MBLClass::TestWords:
os << "TestWords";
break;
case MBLClass::TrainLearnWords:
os << "TrainlearnWords";
break;
default:
os << "unknown phase";
}
return os;
}
bool empty_line( const std::string& , const InputFormatType );
}
#endif // TIMBL_MBLCLASS_H