/* $Id: IBtree.h 15865 2013-04-02 13:24:34Z sloot $ $URL: https://ilk.uvt.nl/svn/trunk/sources/Timbl6/include/timbl/IBtree.h $ Copyright (c) 1998 - 2013 ILK - Tilburg University CLiPS - University of Antwerp This file is part of timbl timbl is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. timbl is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . For questions and suggestions, see: http://ilk.uvt.nl/software.html or send mail to: timbl@uvt.nl */ #ifndef TIMBL_IBTREE_H #define TIMBL_IBTREE_H #include "ticcutils/TreeHash.h" #include "ticcutils/XMLtools.h" //#define IBSTATS namespace Timbl { class IB_InstanceBase; class IG_InstanceBase; class TRIBL_InstanceBase; class TRIBL2_InstanceBase; class Feature; class FeatureValue; class Instance; class Target; class TargetValue; class ValueDistribution; class WValueDistribution; class IBtree { friend class InstanceBase_base; friend class IB_InstanceBase; friend class IG_InstanceBase; friend class TRIBL_InstanceBase; friend class TRIBL2_InstanceBase; friend std::ostream &operator<<( std::ostream&, const IBtree& ); friend std::ostream &operator<<( std::ostream&, const IBtree * ); friend xmlNode *to_xml( IBtree *pnt ); friend int count_next( const IBtree * ); public: const TargetValue* targetValue() const { return TValue; }; private: FeatureValue *FValue; const TargetValue *TValue; ValueDistribution *TDistribution; IBtree *link; IBtree *next; IBtree(); IBtree( FeatureValue * ); ~IBtree(); IBtree *Reduce( const TargetValue *, unsigned long&, long ); #ifdef IBSTATS inline IBtree *add_feat_val( FeatureValue *, unsigned int&, IBtree **, unsigned long& ); #else inline IBtree *add_feat_val( FeatureValue *, IBtree **, unsigned long& ); #endif inline ValueDistribution *sum_distributions( bool ); inline IBtree *make_unique( const TargetValue *, unsigned long& ); void cleanDistributions(); void re_assign_defaults( bool, bool ); void assign_defaults( bool, bool, size_t ); void redo_distributions(); void countBranches( unsigned int, std::vector&, std::vector& ); const ValueDistribution *exact_match( const Instance& ) const; protected: const IBtree *search_node( FeatureValue * ) const; IBtree( const IBtree& ); IBtree& operator=( const IBtree& ); }; typedef std::map FI_map; class InstanceBase_base: public MsgClass { friend class IG_InstanceBase; friend class TRIBL_InstanceBase; friend class TRIBL2_InstanceBase; InstanceBase_base( const InstanceBase_base& ); InstanceBase_base& operator=( const InstanceBase_base& ); friend std::ostream& operator<<( std::ostream &os, const InstanceBase_base& ); friend std::ostream& operator<<( std::ostream &os, const InstanceBase_base * ); public: InstanceBase_base( size_t, unsigned long&, bool, bool ); virtual ~InstanceBase_base( void ); void AssignDefaults( void ); void RedoDistributions(); bool AddInstance( const Instance& ); void RemoveInstance( const Instance& ); void summarizeNodes( std::vector&, std::vector& ); virtual bool MergeSub( InstanceBase_base * ); const ValueDistribution *ExactMatch( const Instance& I ) const { return InstBase->exact_match( I ); }; virtual const ValueDistribution *InitGraphTest( std::vector&, const std::vector *, size_t, size_t ); virtual const ValueDistribution *NextGraphTest( std::vector&, size_t& ); unsigned long int GetDistSize( ) const { return NumOfTails; }; virtual const ValueDistribution *IG_test( const Instance& , size_t&, bool&, const TargetValue *& ); virtual IB_InstanceBase *TRIBL_test( const Instance& , size_t, const TargetValue *&, const ValueDistribution *&, size_t& ); virtual IB_InstanceBase *TRIBL2_test( const Instance& , const ValueDistribution *&, size_t& ); bool read_hash( std::istream &, Hash::StringHash *, Hash::StringHash * ) const; virtual InstanceBase_base *Copy() const = 0; virtual InstanceBase_base *clone() const = 0; void Save( std::ostream &, bool=false ); void Save( std::ostream &, Hash::StringHash *, Hash::StringHash *, bool=false ); void toXML( std::ostream& ); void printStatsTree( std::ostream&, unsigned int startLevel ); virtual bool ReadIB( std::istream&, std::vector&, Target *, int ); virtual bool ReadIB( std::istream &, std::vector&, Target *, Hash::StringHash *, Hash::StringHash *, int ); virtual void Prune( const TargetValue *, long = 0 ); virtual bool IsPruned() const { return false; }; void CleanPartition( bool ); unsigned long int GetSizeInfo( unsigned long int&, double & ) const; const ValueDistribution *TopDist() const { return TopDistribution; }; bool HasDistributions() const; const TargetValue *TopTarget( bool & ); bool PersistentD() const { return PersistentDistributions; }; unsigned long int nodeCount() const { return ibCount;} ; const IBtree *instBase() const { return InstBase; }; #ifdef IBSTATS std::vector mismatch; #endif protected: bool DefAss; bool DefaultsValid; bool Random; bool PersistentDistributions; int Version; ValueDistribution *TopDistribution; WValueDistribution *WTop; const TargetValue *TopT; FI_map fast_index; bool tiedTop; IBtree *InstBase; IBtree *LastInstBasePos; const IBtree **RestartSearch; const IBtree **SkipSearch; const IBtree **InstPath; unsigned long int tree_size; unsigned long int& ibCount; size_t Depth; unsigned long int NumOfTails; IBtree *read_list( std::istream &, std::vector&, Target *, int ); IBtree *read_local( std::istream &, std::vector&, Target *, int ); IBtree *read_list_hashed( std::istream &, std::vector&, Target *, int ); IBtree *read_local_hashed( std::istream &, std::vector&, Target *, int ); void write_tree( std::ostream &os, const IBtree * ) const; void write_tree_hashed( std::ostream &os, const IBtree * ) const; bool read_IB( std::istream &, std::vector&, Target *, int ); bool read_IB( std::istream &, std::vector&, Target *, Hash::StringHash *, Hash::StringHash *, int ); void fill_index(); const IBtree *fast_search_node( FeatureValue * ); }; class IB_InstanceBase: public InstanceBase_base { public: IB_InstanceBase( size_t size, unsigned long& cnt, bool rand ): InstanceBase_base( size, cnt, rand , false ) { }; IB_InstanceBase *Copy() const; IB_InstanceBase *clone() const; const ValueDistribution *InitGraphTest( std::vector&, const std::vector *, size_t, size_t ); const ValueDistribution *NextGraphTest( std::vector&, size_t& ); private: size_t offSet; size_t effFeat; const std::vector *testInst; }; class IG_InstanceBase: public InstanceBase_base { public: IG_InstanceBase( size_t size, unsigned long& cnt, bool rand, bool pruned, bool keep_dists ): InstanceBase_base( size, cnt, rand, keep_dists ), Pruned( pruned ) {}; IG_InstanceBase *clone() const; IG_InstanceBase *Copy() const; void Prune( const TargetValue *, long = 0 ); void specialPrune( const TargetValue * ); bool IsPruned() const { return Pruned; }; const ValueDistribution *IG_test( const Instance& , size_t&, bool&, const TargetValue *& ); bool ReadIB( std::istream &, std::vector&, Target *, int ); bool ReadIB( std::istream &, std::vector&, Target *, Hash::StringHash *, Hash::StringHash *, int ); bool MergeSub( InstanceBase_base * ); protected: bool Pruned; }; class TRIBL_InstanceBase: public InstanceBase_base { public: TRIBL_InstanceBase( size_t size, unsigned long& cnt, bool rand, bool keep_dists ): InstanceBase_base( size, cnt, rand, keep_dists ), Threshold(0) {}; TRIBL_InstanceBase *clone() const; TRIBL_InstanceBase *Copy() const; IB_InstanceBase *TRIBL_test( const Instance&, size_t, const TargetValue *&, const ValueDistribution *&, size_t& ); private: IB_InstanceBase *IBPartition( IBtree * ) const; void AssignDefaults( size_t ); size_t Threshold; }; class TRIBL2_InstanceBase: public InstanceBase_base { public: TRIBL2_InstanceBase( size_t size, unsigned long& cnt, bool rand, bool keep_dists ): InstanceBase_base( size, cnt, rand, keep_dists ) { }; TRIBL2_InstanceBase *clone() const; TRIBL2_InstanceBase *Copy() const; IB_InstanceBase *TRIBL2_test( const Instance& , const ValueDistribution *&, size_t& ); private: IB_InstanceBase *IBPartition( IBtree * ) const; }; } #endif