OpenWalnut
1.4.0
|
00001 //--------------------------------------------------------------------------- 00002 // 00003 // Project: OpenWalnut ( http://www.openwalnut.org ) 00004 // 00005 // Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS 00006 // For more information see http://www.openwalnut.org/copying 00007 // 00008 // This file is part of OpenWalnut. 00009 // 00010 // OpenWalnut is free software: you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as published by 00012 // the Free Software Foundation, either version 3 of the License, or 00013 // (at your option) any later version. 00014 // 00015 // OpenWalnut is distributed in the hope that it will be useful, 00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00018 // GNU Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public License 00021 // along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>. 00022 // 00023 //--------------------------------------------------------------------------- 00024 00025 #include <algorithm> 00026 #include <cstring> // memset() 00027 #include <numeric> 00028 #include <string> 00029 #include <utility> 00030 00031 #include "../../common/WAssert.h" 00032 #include "../../common/WLimits.h" 00033 #include "../../common/exceptions/WOutOfBounds.h" 00034 #include "../WDataHandlerEnums.h" 00035 00036 #include "WValueSetHistogram.h" 00037 00038 WValueSetHistogram::WValueSetHistogram( boost::shared_ptr< WValueSetBase > valueSet, size_t buckets ): 00039 WHistogram( valueSet->getMinimumValue(), valueSet->getMaximumValue(), buckets ) 00040 { 00041 buildHistogram( *valueSet ); 00042 } 00043 00044 WValueSetHistogram::WValueSetHistogram( const WValueSetBase& valueSet, size_t buckets ): 00045 WHistogram( valueSet.getMinimumValue(), valueSet.getMaximumValue(), buckets ) 00046 { 00047 buildHistogram( valueSet ); 00048 } 00049 00050 WValueSetHistogram::WValueSetHistogram( boost::shared_ptr< WValueSetBase > valueSet, double min, double max, size_t buckets ): 00051 WHistogram( min, max, buckets ) 00052 { 00053 buildHistogram( *valueSet ); 00054 } 00055 00056 WValueSetHistogram::WValueSetHistogram( const WValueSetBase& valueSet, double min, double max, size_t buckets ): 00057 WHistogram( min, max, buckets ) 00058 { 00059 buildHistogram( valueSet ); 00060 } 00061 00062 void WValueSetHistogram::buildHistogram( const WValueSetBase& valueSet ) 00063 { 00064 m_nbTotalElements = 0; 00065 00066 // create base histogram 00067 WAssert( m_nbBuckets > 1, "WValueSetHistogram::buildHistogram : number of buckets needs to be larger than 1." ); 00068 m_nInitialBuckets = m_nbBuckets - 1; 00069 m_initialBucketSize = ( m_maximum - m_minimum ) / static_cast< double >( m_nInitialBuckets ); 00070 WAssert( m_initialBucketSize > 0.0, "WValueSetHistogram::buildHistogram() : m_initialBucketSize to small." ); 00071 00072 // NOTE: as all the intervals are right-open, we need an additional slot in our array for the last interval [m_maximum,\infinity). For the 00073 // calculation of interval sizes, the value must not be incremented 00074 m_nInitialBuckets++; 00075 00076 // create and initialize array to zero which finally contains the counts 00077 size_t* initialBuckets = new size_t[ m_nInitialBuckets ]; 00078 memset( initialBuckets, 0, m_nInitialBuckets * sizeof( size_t ) ); 00079 // *initialBuckets = { 0 }; // this should works with C++0x (instead memset), TEST IT! 00080 00081 // the array can be shared among several instances of WValueSetHistogram. 00082 m_initialBuckets = boost::shared_array< size_t >( initialBuckets ); 00083 00084 // no mapping applied yet. Initial and mapped are equal 00085 m_nMappedBuckets = m_nInitialBuckets; 00086 m_mappedBuckets = m_initialBuckets; 00087 m_mappedBucketSize = m_initialBucketSize; 00088 00089 // and finally create the histogram 00090 for( size_t i = 0; i < valueSet.size(); ++i ) 00091 { 00092 double tmp = valueSet.getScalarDouble( i ); 00093 insert( tmp ); 00094 } 00095 00096 m_nbTotalElements = valueSet.size(); 00097 } 00098 00099 WValueSetHistogram::WValueSetHistogram( const WValueSetHistogram& histogram, size_t buckets ): 00100 WHistogram( histogram ), 00101 m_initialBucketSize( histogram.m_initialBucketSize ), 00102 m_initialBuckets( histogram.m_initialBuckets ), 00103 m_nInitialBuckets( histogram.m_nInitialBuckets ), 00104 m_mappedBuckets( histogram.m_mappedBuckets ), 00105 m_nMappedBuckets( histogram.m_nMappedBuckets ), 00106 m_mappedBucketSize( histogram.m_mappedBucketSize ), 00107 m_nbTotalElements( histogram.m_nbTotalElements ) 00108 { 00109 // apply modification of the histogram bucket size? 00110 if( ( buckets == 0 ) || ( buckets == m_nMappedBuckets ) ) 00111 { 00112 return; 00113 } 00114 00115 WAssert( buckets > 1, "WValueSetHistogram::WValueSetHistogram : number of buckets needs to be larger than 1." ); 00116 WAssert( buckets < m_nInitialBuckets, 00117 "WValueSetHistogram::WValueSetHistogram : number of buckets needs to be smaller than the initial bucket count." ); 00118 00119 // number of elements in the new mapped histogram = division + (round up) 00120 m_nMappedBuckets = buckets - 1; 00121 m_mappedBucketSize = ( m_maximum - m_minimum ) / static_cast< double >( m_nMappedBuckets ); 00122 00123 // NOTE: as all the intervals are right-open, we need an additional slot in our array for the last interval [m_maximum,\infinity). For the 00124 // calculation of interval sizes, the value must not be incremented 00125 m_nMappedBuckets++; 00126 00127 size_t ratio = static_cast<size_t>( m_nInitialBuckets / m_nMappedBuckets ); 00128 00129 m_mappedBuckets.reset(); 00130 00131 size_t* mappedBuckets = new size_t[ m_nMappedBuckets ]; 00132 memset( mappedBuckets, 0, m_nMappedBuckets * sizeof( size_t ) ); 00133 // *mappedBuckets = { 0 }; // works with C++0x 00134 m_mappedBuckets = boost::shared_array< size_t >( mappedBuckets ); 00135 00136 // map it 00137 size_t index = 0; 00138 for( size_t i = 0; i < m_nInitialBuckets; ++i ) 00139 { 00140 if( ( i % ratio == 0 ) && ( i != 0 ) && ( i != m_nInitialBuckets - 1 ) ) 00141 { 00142 index++; 00143 } 00144 m_mappedBuckets[ index ] += m_initialBuckets[i]; 00145 } 00146 } 00147 00148 WValueSetHistogram& WValueSetHistogram::operator=( const WValueSetHistogram& other ) 00149 { 00150 if( this != &other ) // protect against invalid self-assignment 00151 { 00152 m_minimum = other.m_minimum; 00153 m_maximum = other.m_maximum; 00154 m_initialBucketSize = other.m_initialBucketSize; 00155 m_nInitialBuckets = other.m_nInitialBuckets; 00156 m_nMappedBuckets = other.m_nMappedBuckets; 00157 m_mappedBucketSize = other.m_mappedBucketSize; 00158 00159 // copy the initial/mapped buckets reference, no deep copy here! 00160 m_initialBuckets = other.m_initialBuckets; 00161 m_mappedBuckets = other.m_mappedBuckets; 00162 } 00163 00164 return *this; 00165 } 00166 00167 WValueSetHistogram::~WValueSetHistogram() 00168 { 00169 } 00170 00171 boost::shared_array< size_t > WValueSetHistogram::getInitialBuckets() const 00172 { 00173 return m_initialBuckets; 00174 } 00175 00176 size_t WValueSetHistogram::getNInitialBuckets() const 00177 { 00178 return m_nInitialBuckets; 00179 } 00180 00181 double WValueSetHistogram::getInitialBucketSize() const 00182 { 00183 return m_initialBucketSize; 00184 } 00185 00186 double WValueSetHistogram::getBucketSize( size_t /* index */ ) const 00187 { 00188 // ignore index as each bucket has the same width 00189 return m_mappedBucketSize; 00190 } 00191 00192 void WValueSetHistogram::insert( double value ) 00193 { 00194 m_mappedBuckets[ getIndexForValue( value ) ]++; 00195 } 00196 00197 size_t WValueSetHistogram::operator[]( size_t index ) const 00198 { 00199 // if no mapping has been applied, mappedBuckets points to the initial bucket 00200 return m_mappedBuckets[ index ]; 00201 } 00202 00203 size_t WValueSetHistogram::at( size_t index ) const 00204 { 00205 WAssert( index < m_nMappedBuckets, "WValueSetHistogram::at() : index out of range." ); 00206 00207 // if no mapping has been applied, mappedBuckets points to the initial bucket 00208 return m_mappedBuckets[ index ]; 00209 } 00210 00211 size_t WValueSetHistogram::size() const 00212 { 00213 return m_nMappedBuckets; // overwrite the WHistogram::size here as we have our own size. 00214 } 00215 00216 size_t WValueSetHistogram::getTotalElementCount() const 00217 { 00218 return m_nbTotalElements; 00219 } 00220 00221 std::pair< double, double > WValueSetHistogram::getIntervalForIndex( size_t index ) const 00222 { 00223 double first = m_minimum + m_mappedBucketSize * index; 00224 double second = m_minimum + m_mappedBucketSize * ( index + 1 ); 00225 return std::make_pair( first, second ); 00226 } 00227 00228 size_t WValueSetHistogram::accumulate( size_t startIndex, size_t endIndex ) const 00229 { 00230 if( startIndex > endIndex ) 00231 { 00232 std::swap( startIndex, endIndex ); 00233 } 00234 00235 // valid index? 00236 if( endIndex > size() ) // as endIndex is exclusive, it is allowed to equal size() 00237 { 00238 throw WOutOfBounds( std::string( "The specified endIndex is out of bounds." ) ); 00239 } 00240 00241 // unfortunately, shared_array can't be used for std::accumulate 00242 size_t acc = 0; 00243 while( startIndex != endIndex ) 00244 { 00245 acc += m_mappedBuckets[ startIndex++ ]; 00246 } 00247 00248 return acc; 00249 } 00250 00251 std::ostream& operator<<( std::ostream& out, const WValueSetHistogram& h ) 00252 { 00253 for( size_t i = 0; i < h.size() - 1; ++i ) 00254 { 00255 std::pair< double, double > interval = h.getIntervalForIndex( i ); 00256 // NOTE: the notation for open intervals is [a,b) or alternatively [a,b[. 00257 //out << i << " = [" << interval.first << ", " << interval.second << ") = " << h[ i ] << std::endl; 00258 out << interval.first << " " << interval.second << " " << std::min( h[ i ], size_t( 3000 ) ) << std::endl; 00259 } 00260 // the last interval is handled special 00261 //out << h.size() - 1 << " = [" << h.getIntervalForIndex( h.size() - 1 ).first << ", inf) = " << h[ h.size() - 1 ] << std::endl; 00262 out << h.getIntervalForIndex( h.size() - 1 ).first << " inf " << std::min( h[ h.size() - 1 ], size_t( 3000 ) ) << std::endl; 00263 00264 return out; 00265 } 00266