OpenWalnut  1.4.0
WValueSetHistogram.cpp
00001 //---------------------------------------------------------------------------
00002 //
00003 // Project: OpenWalnut ( http://www.openwalnut.org )
00004 //
00005 // Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS
00006 // For more information see http://www.openwalnut.org/copying
00007 //
00008 // This file is part of OpenWalnut.
00009 //
00010 // OpenWalnut is free software: you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as published by
00012 // the Free Software Foundation, either version 3 of the License, or
00013 // (at your option) any later version.
00014 //
00015 // OpenWalnut is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018 // GNU Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public License
00021 // along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>.
00022 //
00023 //---------------------------------------------------------------------------
00024 
00025 #include <algorithm>
00026 #include <cstring> // memset()
00027 #include <numeric>
00028 #include <string>
00029 #include <utility>
00030 
00031 #include "../../common/WAssert.h"
00032 #include "../../common/WLimits.h"
00033 #include "../../common/exceptions/WOutOfBounds.h"
00034 #include "../WDataHandlerEnums.h"
00035 
00036 #include "WValueSetHistogram.h"
00037 
00038 WValueSetHistogram::WValueSetHistogram( boost::shared_ptr< WValueSetBase > valueSet, size_t buckets ):
00039     WHistogram( valueSet->getMinimumValue(), valueSet->getMaximumValue(), buckets )
00040 {
00041     buildHistogram( *valueSet );
00042 }
00043 
00044 WValueSetHistogram::WValueSetHistogram( const WValueSetBase& valueSet, size_t buckets ):
00045     WHistogram( valueSet.getMinimumValue(), valueSet.getMaximumValue(), buckets )
00046 {
00047     buildHistogram( valueSet );
00048 }
00049 
00050 WValueSetHistogram::WValueSetHistogram( boost::shared_ptr< WValueSetBase > valueSet, double min, double max, size_t buckets ):
00051     WHistogram( min, max, buckets )
00052 {
00053     buildHistogram( *valueSet );
00054 }
00055 
00056 WValueSetHistogram::WValueSetHistogram( const WValueSetBase& valueSet, double min, double max, size_t buckets ):
00057     WHistogram( min, max, buckets )
00058 {
00059     buildHistogram( valueSet );
00060 }
00061 
00062 void WValueSetHistogram::buildHistogram( const WValueSetBase& valueSet )
00063 {
00064     m_nbTotalElements = 0;
00065 
00066     // create base histogram
00067     WAssert( m_nbBuckets > 1, "WValueSetHistogram::buildHistogram : number of buckets needs to be larger than 1." );
00068     m_nInitialBuckets = m_nbBuckets - 1;
00069     m_initialBucketSize = ( m_maximum - m_minimum ) / static_cast< double >( m_nInitialBuckets );
00070     WAssert( m_initialBucketSize > 0.0, "WValueSetHistogram::buildHistogram() : m_initialBucketSize to small." );
00071 
00072     // NOTE: as all the intervals are right-open, we need an additional slot in our array for the last interval [m_maximum,\infinity). For the
00073     // calculation of interval sizes, the value must not be incremented
00074     m_nInitialBuckets++;
00075 
00076     // create and initialize array to zero which finally contains the counts
00077     size_t* initialBuckets = new size_t[ m_nInitialBuckets ];
00078     memset( initialBuckets, 0, m_nInitialBuckets * sizeof( size_t ) );
00079     // *initialBuckets = { 0 }; // this should works with C++0x (instead memset), TEST IT!
00080 
00081     // the array can be shared among several instances of WValueSetHistogram.
00082     m_initialBuckets = boost::shared_array< size_t >( initialBuckets );
00083 
00084     // no mapping applied yet. Initial and mapped are equal
00085     m_nMappedBuckets = m_nInitialBuckets;
00086     m_mappedBuckets = m_initialBuckets;
00087     m_mappedBucketSize = m_initialBucketSize;
00088 
00089     // and finally create the histogram
00090     for( size_t i = 0; i < valueSet.size(); ++i )
00091     {
00092         double tmp = valueSet.getScalarDouble( i );
00093         insert( tmp );
00094     }
00095 
00096     m_nbTotalElements = valueSet.size();
00097 }
00098 
00099 WValueSetHistogram::WValueSetHistogram( const WValueSetHistogram& histogram, size_t buckets ):
00100     WHistogram( histogram ),
00101     m_initialBucketSize( histogram.m_initialBucketSize ),
00102     m_initialBuckets( histogram.m_initialBuckets ),
00103     m_nInitialBuckets( histogram.m_nInitialBuckets ),
00104     m_mappedBuckets( histogram.m_mappedBuckets ),
00105     m_nMappedBuckets( histogram.m_nMappedBuckets ),
00106     m_mappedBucketSize( histogram.m_mappedBucketSize ),
00107     m_nbTotalElements( histogram.m_nbTotalElements )
00108 {
00109     // apply modification of the histogram bucket size?
00110     if( ( buckets == 0 ) || ( buckets == m_nMappedBuckets ) )
00111     {
00112         return;
00113     }
00114 
00115     WAssert( buckets > 1, "WValueSetHistogram::WValueSetHistogram : number of buckets needs to be larger than 1." );
00116     WAssert( buckets < m_nInitialBuckets,
00117                           "WValueSetHistogram::WValueSetHistogram : number of buckets needs to be smaller than the initial bucket count." );
00118 
00119     // number of elements in the new mapped histogram = division + (round up)
00120     m_nMappedBuckets = buckets - 1;
00121     m_mappedBucketSize = ( m_maximum - m_minimum ) / static_cast< double >( m_nMappedBuckets );
00122 
00123     // NOTE: as all the intervals are right-open, we need an additional slot in our array for the last interval [m_maximum,\infinity). For the
00124     // calculation of interval sizes, the value must not be incremented
00125     m_nMappedBuckets++;
00126 
00127     size_t ratio = static_cast<size_t>( m_nInitialBuckets / m_nMappedBuckets );
00128 
00129     m_mappedBuckets.reset();
00130 
00131     size_t* mappedBuckets = new size_t[ m_nMappedBuckets ];
00132     memset( mappedBuckets, 0, m_nMappedBuckets * sizeof( size_t ) );
00133     // *mappedBuckets = { 0 }; // works with C++0x
00134     m_mappedBuckets = boost::shared_array< size_t >( mappedBuckets );
00135 
00136     // map it
00137     size_t index = 0;
00138     for( size_t i = 0; i < m_nInitialBuckets; ++i )
00139     {
00140         if( ( i % ratio == 0 ) && ( i != 0 ) && ( i != m_nInitialBuckets - 1 ) )
00141         {
00142             index++;
00143         }
00144         m_mappedBuckets[ index ] += m_initialBuckets[i];
00145     }
00146 }
00147 
00148 WValueSetHistogram& WValueSetHistogram::operator=( const WValueSetHistogram& other )
00149 {
00150     if( this != &other ) // protect against invalid self-assignment
00151     {
00152         m_minimum = other.m_minimum;
00153         m_maximum = other.m_maximum;
00154         m_initialBucketSize = other.m_initialBucketSize;
00155         m_nInitialBuckets = other.m_nInitialBuckets;
00156         m_nMappedBuckets = other.m_nMappedBuckets;
00157         m_mappedBucketSize = other.m_mappedBucketSize;
00158 
00159         // copy the initial/mapped buckets reference, no deep copy here!
00160         m_initialBuckets = other.m_initialBuckets;
00161         m_mappedBuckets =  other.m_mappedBuckets;
00162     }
00163 
00164     return *this;
00165 }
00166 
00167 WValueSetHistogram::~WValueSetHistogram()
00168 {
00169 }
00170 
00171 boost::shared_array< size_t > WValueSetHistogram::getInitialBuckets() const
00172 {
00173     return m_initialBuckets;
00174 }
00175 
00176 size_t WValueSetHistogram::getNInitialBuckets() const
00177 {
00178     return m_nInitialBuckets;
00179 }
00180 
00181 double WValueSetHistogram::getInitialBucketSize() const
00182 {
00183     return m_initialBucketSize;
00184 }
00185 
00186 double WValueSetHistogram::getBucketSize( size_t /* index */ ) const
00187 {
00188     // ignore index as each bucket has the same width
00189     return m_mappedBucketSize;
00190 }
00191 
00192 void WValueSetHistogram::insert( double value )
00193 {
00194     m_mappedBuckets[ getIndexForValue( value ) ]++;
00195 }
00196 
00197 size_t WValueSetHistogram::operator[]( size_t index ) const
00198 {
00199     // if no mapping has been applied, mappedBuckets points to the initial bucket
00200     return m_mappedBuckets[ index ];
00201 }
00202 
00203 size_t WValueSetHistogram::at( size_t index ) const
00204 {
00205     WAssert( index < m_nMappedBuckets, "WValueSetHistogram::at() : index out of range." );
00206 
00207     // if no mapping has been applied, mappedBuckets points to the initial bucket
00208     return m_mappedBuckets[ index ];
00209 }
00210 
00211 size_t WValueSetHistogram::size() const
00212 {
00213     return m_nMappedBuckets;    // overwrite the WHistogram::size here as we have our own size.
00214 }
00215 
00216 size_t WValueSetHistogram::getTotalElementCount() const
00217 {
00218     return m_nbTotalElements;
00219 }
00220 
00221 std::pair< double, double > WValueSetHistogram::getIntervalForIndex( size_t index ) const
00222 {
00223     double first = m_minimum + m_mappedBucketSize * index;
00224     double second =  m_minimum + m_mappedBucketSize * ( index + 1 );
00225     return std::make_pair( first, second );
00226 }
00227 
00228 size_t WValueSetHistogram::accumulate( size_t startIndex, size_t endIndex ) const
00229 {
00230     if( startIndex > endIndex )
00231     {
00232         std::swap( startIndex, endIndex );
00233     }
00234 
00235     // valid index?
00236     if( endIndex > size() )    // as endIndex is exclusive, it is allowed to equal size()
00237     {
00238         throw WOutOfBounds( std::string( "The specified endIndex is out of bounds." ) );
00239     }
00240 
00241     // unfortunately, shared_array can't be used for std::accumulate
00242     size_t acc = 0;
00243     while( startIndex != endIndex )
00244     {
00245         acc += m_mappedBuckets[ startIndex++ ];
00246     }
00247 
00248     return acc;
00249 }
00250 
00251 std::ostream& operator<<( std::ostream& out, const WValueSetHistogram& h )
00252 {
00253     for( size_t i = 0; i < h.size() - 1; ++i )
00254     {
00255         std::pair< double, double > interval = h.getIntervalForIndex( i );
00256         // NOTE: the notation for open intervals is [a,b) or alternatively [a,b[.
00257         //out << i << " = [" << interval.first << ", " << interval.second << ") = " << h[ i ] << std::endl;
00258         out << interval.first << " " <<  interval.second << " " << std::min( h[ i ], size_t( 3000 ) ) << std::endl;
00259     }
00260     // the last interval is handled special
00261     //out << h.size() - 1 << " = [" << h.getIntervalForIndex( h.size() - 1 ).first << ", inf) = " << h[ h.size() - 1 ] << std::endl;
00262     out << h.getIntervalForIndex( h.size() - 1 ).first << " inf " << std::min( h[ h.size() - 1 ], size_t( 3000 ) ) << std::endl;
00263 
00264     return out;
00265 }
00266