/* $Id: Testers.cxx 15828 2013-03-28 11:55:53Z sloot $ $URL: https://ilk.uvt.nl/svn/trunk/sources/Timbl6/src/Testers.cxx $ Copyright (c) 1998 - 2013 ILK - Tilburg University CLiPS - University of Antwerp This file is part of timbl timbl is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. timbl is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . For questions and suggestions, see: http://ilk.uvt.nl/software.html or send mail to: timbl@uvt.nl */ #include #include #include #include #include #include #include #include #include "timbl/Common.h" #include "timbl/Types.h" #include "timbl/Instance.h" #include "timbl/Metrics.h" #include "timbl/Testers.h" using namespace std; using namespace TiCC; using Common::Epsilon; using Common::Log2; namespace Timbl{ //#define DBGTEST double overlapTestFunction::test( FeatureValue *F, FeatureValue *G, Feature *Feat ) const { #ifdef DBGTEST cerr << "overlap_distance(" << F << "," << G << ") = "; #endif double result = Feat->fvDistance( F, G ); #ifdef DBGTEST cerr << result; #endif result *= Feat->Weight(); #ifdef DBGTEST cerr << " gewogen " << result << endl; #endif return result; } double valueDiffTestFunction::test( FeatureValue *F, FeatureValue *G, Feature *Feat ) const { #ifdef DBGTEST cerr << toString(Feat->getMetricType()) << "_distance(" << F << "," << G << ") = "; #endif double result = Feat->fvDistance( F, G, threshold ); #ifdef DBGTEST cerr << result; #endif result *= Feat->Weight(); #ifdef DBGTEST cerr << " gewogen " << result << endl; #endif return result; } TesterClass* getTester( MetricType m, const std::vector& features, const std::vector& permutation, int mvdThreshold ){ if ( m == Cosine ) return new CosineTester( features, permutation ); else if ( m == DotProduct ) return new DotProductTester( features, permutation ); else return new DistanceTester( features, permutation, mvdThreshold ); } TesterClass::TesterClass( const vector& feat, const vector& perm ): _size(feat.size()), features(feat), permutation(perm) { permFeatures.resize(_size,0); #ifdef DBGTEST cerr << "created TesterClass(" << _size << ")" << endl; #endif for ( size_t j=0; j < _size; ++j ){ permFeatures[j] = feat[perm[j]]; } distances.resize(_size+1, 0.0); } void TesterClass::init( const Instance& inst, size_t effective, size_t oset ){ #ifdef DBGTEST cerr << "tester Initialized!" << endl; #endif effSize = effective-oset; offSet = oset; FV = &inst.FV; } DistanceTester::~DistanceTester(){ for ( size_t i=0; i < _size; ++i ){ delete metricTest[i]; } delete [] metricTest; } DistanceTester::DistanceTester( const vector& feat, const vector& perm, int mvdmThreshold ): TesterClass( feat, perm ){ #ifdef DBGTEST cerr << "create a tester with threshold = " << mvdmThreshold << endl; #endif metricTest = new metricTestFunction*[_size]; for ( size_t i=0; i < _size; ++i ){ metricTest[i] = 0; #ifdef DBGTEST cerr << "set metric[" << i+1 << "]=" << toString(features[i]->getMetricType()) << endl; #endif if ( features[i]->Ignore() ) continue; if ( features[i]->isStorableMetric() ){ #ifdef DBGTEST cerr << "created valueDiffTestFunction " << endl; #endif metricTest[i] = new valueDiffTestFunction( mvdmThreshold ); } else { #ifdef DBGTEST cerr << "created overlapFunction " << endl; #endif metricTest[i] = new overlapTestFunction(); } } } size_t DistanceTester::test( vector& G, size_t CurPos, double Threshold ) { size_t i; size_t TrueF; for ( i=CurPos, TrueF = i + offSet; i < effSize; ++i,++TrueF ){ #ifdef DBGTEST cerr << "feature " << TrueF << " (perm=" << permutation[TrueF] << ")" << endl; #endif double result = metricTest[permutation[TrueF]]->test( (*FV)[TrueF], G[i], permFeatures[TrueF] ); distances[i+1] = distances[i] + result; if ( distances[i+1] > Threshold ){ #ifdef DBGTEST cerr << "threshold reached at " << i << " distance=" << distances[i+1] << endl; #endif return i; } } #ifdef DBGTEST cerr << "threshold reached at end, distance=" << distances[effSize] << endl; #endif return effSize; } double DistanceTester::getDistance( size_t pos ) const{ return distances[pos]; } inline bool FV_to_real( FeatureValue *FV, double &result ){ if ( FV ){ if ( stringTo( FV->Name(), result ) ) return true; } return false; } double innerProduct( FeatureValue *FV, FeatureValue *G ) { double r1, r2, result; #ifdef DBGTEST cerr << "innerproduct " << FV << " x " << G << endl; #endif if ( FV_to_real( FV, r1 ) && FV_to_real( G, r2 ) ){ #ifdef DBGTEST cerr << "innerproduct " << r1 << " x " << r2 << endl; #endif result = r1 * r2; } else result = 0.0; #ifdef DBGTEST cerr << " resultaat == " << result << endl; #endif return result; } size_t CosineTester::test( vector& G, size_t CurPos, double ){ double denom1 = 0.0; double denom2 = 0.0; double result = 0.0; size_t TrueF; size_t i; for ( i=CurPos, TrueF = i + offSet; i < effSize; ++i,++TrueF ){ double W = permFeatures[TrueF]->Weight(); denom1 += innerProduct( (*FV)[TrueF], (*FV)[TrueF] ) * W; denom2 += innerProduct( G[i], G[i] ) * W; result += innerProduct( (*FV)[TrueF], G[i] ) * W; } double denom = sqrt( denom1 * denom2 ); distances[effSize] = result/ (denom + Common::Epsilon); return effSize; } size_t DotProductTester::test( vector& G, size_t CurPos, double ) { double result; size_t TrueF; size_t i; for ( i=CurPos, TrueF = i + offSet; i < effSize; ++i,++TrueF ){ result = innerProduct( (*FV)[TrueF], G[i] ); result *= permFeatures[TrueF]->Weight(); distances[i+1] = distances[i] + result; #ifdef DBGTEST cerr << "gewogen result " << result << endl; cerr << "distance[" << i+1 << "]=" << distances[i+1] << endl; #endif } return effSize; } double SimilarityTester::getDistance( size_t pos ) const{ #ifdef DBGTEST cerr << "getDistance, maxSim = " << maxSimilarity << endl; cerr << " distances[" << pos << "]= " << distances[pos] << endl; #endif return maxSimilarity - distances[pos]; } }