/* $Id: Statistics.cxx 15565 2013-01-07 14:27:05Z sloot $ $URL: https://ilk.uvt.nl/svn/trunk/sources/Timbl6/src/Statistics.cxx $ Copyright (c) 1998 - 2013 ILK - Tilburg University CLiPS - University of Antwerp This file is part of timbl timbl is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. timbl is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . For questions and suggestions, see: http://ilk.uvt.nl/software.html or send mail to: timbl@uvt.nl */ #include #include #include #include "timbl/Common.h" #include "timbl/MsgClass.h" #include "timbl/Types.h" #include "timbl/Instance.h" #include "timbl/Statistics.h" namespace Timbl { using std::bad_alloc; using std::ostream; using std::ios; using std::ios_base; using std::endl; using Common::Epsilon; ConfusionMatrix::ConfusionMatrix( size_t s ): size(s){ try { mat.resize(size+1); for ( size_t i=0; i <= size; ++i ){ mat[i].resize(size,0); } } catch( bad_alloc ){ Error ( "Not enough memory for ConfusionMatrix" ); throw; } } ConfusionMatrix::~ConfusionMatrix(){ for ( unsigned int i=0; i <= size; ++i ) mat[i].clear(); mat.clear(); } void ConfusionMatrix::Increment( const TargetValue *t1, const TargetValue *t2 ){ if ( t2 ){ if ( t1 ) ++mat[t1->Index()-1][t2->Index()-1]; else ++mat[size][t2->Index()-1]; } else throw std::out_of_range( "ConfusionMatrix, index out of range" ); } void ConfusionMatrix::Print( ostream& os, const Target *tg ) const { os << "Confusion Matrix:" << endl; os << " "; for ( unsigned int i=0; i < tg->ValuesArray.size(); ++i ){ // Print the class names. os.width(6); os.setf(ios::right, ios::adjustfield); os << tg->ValuesArray[i] << " "; } os << endl; os << " "; for ( unsigned int i=0; i < size; ++i ) os << "-------"; os << endl; for ( unsigned int i=0; i < tg->ValuesArray.size(); ++i ){ os.width(6); os.setf(ios::right, ios::adjustfield); os << tg->ValuesArray[i] << " | "; for ( unsigned int j=0; j < size; ++j ){ os.width(6); os.setf(ios::right, ios::adjustfield); os << mat[i][j] << " "; } os << endl; if ( i == tg->ValuesArray.size() - 1 ){ os << " -*- | "; for ( unsigned int j=0; j < size; ++j ){ os.width(6); os.setf(ios::right, ios::adjustfield); os << mat[size][j] << " "; } os << endl; } } os << endl; } void pf( ostream& os, size_t d ){ os.width(4); os << " \t" << d; } void pf( ostream& os, double d ){ if ( d < 0 ) os << " \t (nan)\t"; else { os.setf(ios::showpoint); os << " \t" << d; } } void ConfusionMatrix::FScore( ostream& os, const Target* tg, bool cs_too ) const { double maf = 0.0; double mif = 0.0; double maa = 0.0; double mia = 0.0; ios_base::fmtflags flags = os.flags(ios::fixed); int oldPrec = os.precision(5); size_t effF = 0; size_t testF = 0; size_t effA = 0; if ( cs_too ){ os << "Scores per Value Class:" << endl; os << "class |\tTP\tFP\tTN\tFN\tprecision\trecall(TPR)\tFPR\t\tF-score\t\tAUC" << endl; } for ( unsigned int i=0; i < tg->ValuesArray.size(); ++i ){ // so we loop over all known (trained) target values size_t TP = 0; size_t FP = 0; size_t FN = 0; size_t TN = 0; ValueClass *tv = tg->ValuesArray[i]; size_t valFreq = tv->ValFreq(); size_t testCount = 0; for ( unsigned int j=0; j < size; ++j ){ testCount += mat[i][j]; if ( i == j ){ TP = mat[i][j]; } else FN += mat[i][j]; } testF += testCount; for ( unsigned int j=0; j <= size; ++j ){ if ( j != i ) FP += mat[j][i]; } for ( unsigned int j=0; j <= size; ++j ){ if ( j != i ) for ( unsigned int k=0; k < size; ++k ){ if ( k != i ) TN += mat[j][k]; } } double precision; if ( TP + FP == 0 ) precision = -1; else precision = TP / double(TP + FP); double TPR; if ( TP + FN == 0 ) TPR = -1; else TPR = TP / double(TP + FN); double FPR; if ( FP + TN == 0 ) FPR = -1; else FPR = FP / double(FP + TN); double FScore; if ( precision < 0 || TPR < 0 || fabs(precision + TPR) < Epsilon ){ FScore = -1; } else { FScore = ( 2 * precision * TPR ) / (precision + TPR ); ++effF; maf += FScore; mif += (FScore * testCount); } double AUC; if ( TPR < 0 || FPR < 0 ){ AUC = -1; } else { AUC = ( 0.5 * TPR * FPR ) + ( TPR * ( 1.0 - FPR ) ) + ( 0.5 * ( ( 1.0 - TPR ) * ( 1.0 - FPR ) ) ); ++effA; maa += AUC; mia += (AUC * testCount); } if ( cs_too ){ os.width( 6 ); os << tv << " | "; os.width(0); pf(os,TP); pf(os,FP); pf(os,TN); pf(os,FN); pf(os,precision); pf(os,TPR); pf(os,FPR); pf(os,FScore); pf(os,AUC); os << endl; } } maf = maf / effF; mif = mif / testF; maa = maa / effA; mia = mia / testF; os.precision( oldPrec ); os.flags( flags ); os << "F-Score beta=1, microav: " << mif << endl; os << "F-Score beta=1, macroav: " << maf << endl; os << "AUC, microav: " << mia << endl; os << "AUC, macroav: " << maa << endl; } void ConfusionMatrix::merge( const ConfusionMatrix *cm ){ if ( cm ){ for ( size_t i=0; i <= size; ++i ) for ( size_t j=0; j < size; ++j ){ mat[i][j] += cm->mat[i][j]; } } } void StatisticsClass::merge( const StatisticsClass& in ){ _data += in._data; _skipped += in._skipped; _correct += in._correct; _tieOk += in._tieOk; _tieFalse += in._tieFalse; _exact += in._exact; } }