/*
$Id: BestArray.cxx 15868 2013-04-02 14:05:58Z sloot $
$URL: https://ilk.uvt.nl/svn/trunk/sources/Timbl6/src/BestArray.cxx $
Copyright (c) 1998 - 2013
ILK - Tilburg University
CLiPS - University of Antwerp
This file is part of timbl
timbl is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
timbl is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see .
For questions and suggestions, see:
http://ilk.uvt.nl/software.html
or send mail to:
timbl@uvt.nl
*/
#include
#include
#include
#include
#include "timbl/Common.h"
#include "timbl/MsgClass.h"
#include "timbl/Types.h"
#include "timbl/Instance.h"
#include "timbl/neighborSet.h"
#include "ticcutils/XMLtools.h"
#include "timbl/BestArray.h"
namespace Timbl {
using namespace std;
using namespace TiCC;
using namespace Common;
BestRec::BestRec():
bestDistance( 0.0 )
{}
BestRec::~BestRec(){
for ( unsigned int i=0; i < bestDistributions.size(); ++i ){
delete bestDistributions[i];
}
}
BestArray::~BestArray(){
for ( unsigned int i=0; i < bestArray.size(); ++i )
delete bestArray[i];
}
ostream& operator<< ( ostream& os, const BestRec *b ){
if ( b ){
os << b->aggregateDist.DistToString();
int OldPrec = os.precision(DBL_DIG-1);
os.setf(ios::showpoint);
os << "\t" << b->bestDistance;
os.precision(OldPrec);
os << endl;
}
else {
os << "bestrec is null!" << endl;
}
return os;
}
void BestArray::init( unsigned int numN, unsigned int maxB,
bool storeI, bool showDi, bool showDb ){
_storeInstances = storeI;
_showDi = showDi;
_showDb = showDb;
maxBests = maxB;
// When necessary, take a larger array. (initialy it has 0 length)
// Also check if verbosity has changed and a BestInstances array
// is required.
//
size_t S = size;
size = numN;
if ( S < size ){
bestArray.reserve( size );
for ( size_t k=S; k < size; ++k ) {
bestArray.push_back( new BestRec() );
}
}
for ( size_t i = 0; i < size; ++i ) {
bestArray[i]->bestDistance = (DBL_MAX - numN) + i;
if ( bestArray[i]->bestInstances.empty() ){
if ( _storeInstances ){
bestArray[i]->bestInstances.reserve( maxBests );
bestArray[i]->bestDistributions.reserve( maxBests );
}
}
else {
for ( size_t j = 0; j < bestArray[i]->bestInstances.size(); ++j ){
delete bestArray[i]->bestDistributions[j];
}
bestArray[i]->bestInstances.clear();
bestArray[i]->bestDistributions.clear();
}
bestArray[i]->aggregateDist.clear();
}
}
double BestArray::addResult( double Distance,
const ValueDistribution *Distr,
const string& neighbor ){
// We have the similarity in Distance, and a num_of_neighbors
// dimensional array with best similarities.
// Check, and add/replace/move/whatever.
//
for ( unsigned int k = 0; k < size; ++k ) {
BestRec *best = bestArray[k];
if (fabs(Distance - best->bestDistance) < Epsilon) {
// Equal...just add to the end.
//
best->aggregateDist.Merge( *Distr );
if ( _storeInstances && best->bestInstances.size() < maxBests ){
best->bestInstances.push_back( neighbor );
best->bestDistributions.push_back( Distr->to_VD_Copy() );
}
break;
}
// Check if better than bests[k], insert (or replace if
// it's the lowest of the k bests).
//
/*
Example (no_n = 3):
k distance number
0 2 3
1 4 2
2 6 1
sim = 1 (dus beste)
*/
else if (Distance < best->bestDistance) {
if (k == size - 1) {
//
// Replace.
//
best->bestDistance = Distance;
if ( _storeInstances ){
for ( unsigned int j = 0; j < best->bestInstances.size(); ++j ){
delete best->bestDistributions[j];
}
best->bestInstances.clear();
best->bestDistributions.clear();
best->bestInstances.push_back( neighbor );
best->bestDistributions.push_back( Distr->to_VD_Copy() );
}
best->aggregateDist.clear();
best->aggregateDist.Merge( *Distr );
}
else {
//
// Insert. First shift the rest up.
//
BestRec *keep = bestArray[size-1];
for ( size_t i = size - 1; i > k; i--) {
bestArray[i] = bestArray[i-1];
} // i
//
// And now insert.
//
keep->bestDistance = Distance;
if ( _storeInstances ){
for ( unsigned int j = 0; j < keep->bestInstances.size(); ++j ){
delete keep->bestDistributions[j];
}
keep->bestInstances.clear();
keep->bestDistributions.clear();
keep->bestInstances.push_back( neighbor );
keep->bestDistributions.push_back( Distr->to_VD_Copy() );
}
keep->aggregateDist.clear();
keep->aggregateDist.Merge( *Distr );
bestArray[k] = keep;
}
break;
} // Distance < fBest
} // k
return bestArray[size-1]->bestDistance;
}
void BestArray::initNeighborSet( neighborSet& ns ) const {
ns.clear();
for ( unsigned int k = 0; k < size; ++k ) {
ns.push_back( bestArray[k]->bestDistance,
bestArray[k]->aggregateDist );
}
}
void BestArray::addToNeighborSet( neighborSet& ns, size_t n ) const {
ns.push_back( bestArray[n-1]->bestDistance,
bestArray[n-1]->aggregateDist );
}
xmlNode *BestArray::toXML() const {
xmlNode *top = XmlNewNode( "neighborset" );
for ( unsigned int k = 0; k < size; ++k ) {
BestRec *best = bestArray[k];
if ( _storeInstances ){
size_t totalBests = best->totalBests();
if ( totalBests == 0 )
break; // TRIBL algorithms do this!
xmlNode *nbs = XmlNewChild( top, "neighbors" );
XmlSetAttribute( nbs, "k", toString(k+1) );
XmlSetAttribute( nbs, "total", toString(totalBests) );
XmlSetAttribute( nbs, "distance", toString( best->bestDistance ) );
if ( maxBests < totalBests )
XmlSetAttribute( nbs, "limited", toString( maxBests ) );
for ( unsigned int m=0; m < best->bestInstances.size(); ++m ){
xmlNode *nb = XmlNewChild( nbs, "neighbor" );
XmlNewTextChild( nb, "instance", best->bestInstances[m] );
if ( _showDb )
XmlNewTextChild( nb, "distribution",
best->bestDistributions[m]->DistToString() );
}
}
else {
if ( best->aggregateDist.ZeroDist() )
break;
xmlNode *nbs = XmlNewChild( top, "neighbors" );
XmlSetAttribute( nbs, "k", toString(k+1) );
if ( _showDb ){
XmlNewTextChild( nbs, "distribution",
best->aggregateDist.DistToString() );
}
if ( _showDi ){
XmlNewTextChild( nbs, "distance",
toString(best->bestDistance) );
}
}
}
return top;
}
ostream& operator<< ( ostream& os, const BestArray& bA ){
for ( unsigned int k = 0; k < bA.size; ++k ) {
BestRec *best = bA.bestArray[k];
if ( bA._storeInstances ){
size_t totalBests = best->totalBests();
if ( totalBests == 0 )
break; // TRIBL algorithms do this!
os << "# k=" << k+1 << ", " << totalBests
<< " Neighbor(s) at distance: ";
int OldPrec = os.precision(DBL_DIG-1);
os.setf(ios::showpoint);
os << "\t" << best->bestDistance;
os.precision(OldPrec);
if ( bA.maxBests <= best->bestInstances.size() )
os << " (only " << bA.maxBests << " shown)";
os << endl;
for ( unsigned int m=0; m < best->bestInstances.size(); ++m ){
os << "#\t" << best->bestInstances[m];
if ( bA._showDb )
os << best->bestDistributions[m]->DistToString() << endl;
else
os << " -*-" << endl;
}
}
else {
if ( best->aggregateDist.ZeroDist() )
break;
os << "# k=" << k+1;
if ( bA._showDb ){
os << "\t" << best->aggregateDist.DistToString();
}
if ( bA._showDi ){
int OldPrec = os.precision(DBL_DIG-1);
os.setf(ios::showpoint);
os << "\t" << best->bestDistance;
os.precision(OldPrec);
}
os << endl;
}
}
return os;
}
}