OpenWalnut  1.4.0
WStringUtils.h
00001 //---------------------------------------------------------------------------
00002 //
00003 // Project: OpenWalnut ( http://www.openwalnut.org )
00004 //
00005 // Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS
00006 // For more information see http://www.openwalnut.org/copying
00007 //
00008 // This file is part of OpenWalnut.
00009 //
00010 // OpenWalnut is free software: you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as published by
00012 // the Free Software Foundation, either version 3 of the License, or
00013 // (at your option) any later version.
00014 //
00015 // OpenWalnut is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018 // GNU Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public License
00021 // along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>.
00022 //
00023 //---------------------------------------------------------------------------
00024 
00025 #ifndef WSTRINGUTILS_H
00026 #define WSTRINGUTILS_H
00027 
00028 #include <algorithm>
00029 #include <iostream>
00030 #include <iomanip>
00031 #include <iterator>
00032 #include <list>
00033 #include <set>
00034 #include <sstream>
00035 #include <string>
00036 #include <vector>
00037 
00038 #include "exceptions/WTypeMismatch.h"
00039 
00040 /**
00041  * Some utilities for string manipulation and output operations. Please note
00042  * that the overloaded ostream output operators aren't in a separate namespace
00043  * but the string manipulation functions. This is because of short use of e.g.
00044  * the <tt><<</tt> operator instead of <tt>string_utils::operator( cout,
00045  * myVector)</tt>.
00046  *
00047  * The reason for not using the Boost trimming functions is, that Boost
00048  * providing just Whitespace trimming depending on the current locale, but we
00049  * might want to trim other character sets too.
00050  *
00051  * The reason for not using the Boost case switching functions is that we want
00052  * those functions to return a <tt>std::string</tt> copy which is modified to
00053  * make some call chains ala: <tt>foo( rTrim( toLower( str ), "bar" ) );</tt>.
00054  *
00055  * The reason for not using Boosts Tokenizer is, that this tokenizer, is much
00056  * most simplest to use :).
00057  */
00058 namespace string_utils
00059 {
00060     /**
00061      * Conversion class to convert a string to a given target type. We place this in separate classes as we are not allowed to specialize
00062      * function templates. But we need specializations for certain cases.
00063      *
00064      * \tparam Target target type
00065      */
00066     template< typename Target >
00067     struct fromStringImpl
00068     {
00069         /**
00070          * Convert a given string to the target type. If this fails, a exception is thrown.
00071          *
00072          * \throw WTypeMismatch if source string cannot be converted to target type value.
00073          *
00074          * \param from source string
00075          *
00076          * \return target value
00077          */
00078         static Target fromString( const std::string& from )
00079         {
00080             std::stringstream ss( from );
00081             Target value;
00082             ss >> value;
00083             if( ss.fail() )
00084             {
00085                 throw WTypeMismatch( "Specified string could not be converted to target type." );
00086             }
00087 
00088             return value;
00089         }
00090     };
00091 
00092     /**
00093      * Conversion class to convert a string to a given target type. This is a specialization whenever a string is given as input type
00094      */
00095     template<>
00096     struct fromStringImpl< std::string >
00097     {
00098         /**
00099          * Convert a given string to the target type. Never fails.
00100          *
00101          * \param from source string
00102          *
00103          * \return copy of the source string
00104          */
00105         static std::string fromString( const std::string& from )
00106         {
00107             return from;
00108         }
00109     };
00110 
00111     /**
00112      * Convert a given value to a string. The input value must provide a operator<< or be a standard scalar type.
00113      *
00114      * \tparam T the source type. You do not need to specify this directly as it can be deducted from the given parameter
00115      * \param value the value to cast to string
00116      *
00117      * \return the string.
00118      */
00119     template< typename T >
00120     inline std::string toString( const T& value )
00121     {
00122         std::stringstream ss;
00123         ss << value;
00124         return ss.str();
00125     }
00126 
00127     /**
00128      * Convert a given value to a string. The input value must provide a operator<< or be a standard scalar type.
00129      *
00130      * \param value the value to cast to string
00131      *
00132      * \return the string.
00133      */
00134     inline std::string toString( const unsigned char& value )   // NOLINT: stylechecker complains about non const ref!?
00135     {
00136         std::stringstream ss;
00137         // NOTE: unsigned chars are interpreted as ASCII chars. We want it to be used as number.
00138         ss << static_cast< int >( value );
00139         return ss.str();
00140     }
00141 
00142     /**
00143      * Convert a given value to a string. The input value must provide a operator<< or be a standard scalar type. This method additionally allows
00144      * setting width and precision flags of the used std::stringstream.
00145      *
00146      * \tparam T the source type. You do not need to specify this directly as it can be deducted from the given parameter
00147      * \param value the value to cast to string
00148      * \param precision the precision
00149      * \param width the width
00150      *
00151      * \return the string.
00152      */
00153     template< typename T >
00154     inline std::string toString( const T& value, const size_t width, const size_t precision )
00155     {
00156         std::stringstream ss;
00157         ss.width( width );
00158         ss.precision( precision );
00159         ss << value;
00160         return ss.str();
00161     }
00162 
00163     /**
00164      * Convert a given string to a value of a certain type. The target type must provide a operator>> to work or be a standard scalar type.
00165      *
00166      * \tparam T the source type.
00167      * \param str the value to cast to string
00168      *
00169      * \throw WTypeMismatch if the string cannot be converted properly.
00170      * \return the string.
00171      */
00172     template< typename T >
00173     inline T fromString( const std::string& str )
00174     {
00175         return fromStringImpl< T >::fromString( str );
00176     }
00177 
00178     /** We consider the following characters as whitespace:
00179      *  - <tt>\\r</tt> carriage return
00180      *  - <tt>\\n</tt> newline
00181      *  - <tt>\\t</tt> tab
00182      *  - <tt>' '</tt> space
00183      */
00184     extern const std::string WHITESPACE;
00185 
00186     /**
00187      * Trims any occurence of each character given in parameter t from the end
00188      * (or right side) of the given string.
00189      *
00190      * \param source String to trim
00191      * \param t String representing a set containg all trimmable characters
00192      * \return A copy of the trimmed string
00193      */
00194 
00195     std::string rTrim( const std::string& source, const std::string& t = WHITESPACE );
00196 
00197     /**
00198      * Trims any occurence of each character given in parameter t from the
00199      * start (or left side) of the given string.
00200      *
00201      * \param source String to trim
00202      * \param t String representing a set containg all trimmable characters
00203      * \return A copy of the trimmed string
00204      */
00205     std::string lTrim( const std::string& source, const std::string& t =
00206             WHITESPACE );
00207 
00208     /**
00209      * Trims any occurence of each character given in parameter t from both
00210      * ends (right and left side) of the given string.
00211      *
00212      * \param source String to trim
00213      * \param t String representing a set containg all trimmable characters
00214      * \return A copy of the trimmed string
00215      */
00216     std::string trim( const std::string& source, const std::string& t = WHITESPACE );
00217 
00218     /**
00219      * Transforms all characters in the given string into upper case
00220      * characters.
00221      *
00222      * \param source String to transpose.
00223      * \return A copy of the upper case only string
00224      */
00225     std::string toUpper( const std::string& source );
00226 
00227     /**
00228      * Transforms all characters in the given string into lower case
00229      * characters.
00230      *
00231      * \param source String to transpose.
00232      * \return A copy of the lower case only string
00233      */
00234     std::string toLower( const std::string& source );
00235 
00236     /**
00237      * Splits the given string into a vector of strings (so called tokens).
00238      *
00239      * \param source String to tokenize
00240      * \param compress If true, characters matching between two tokens are
00241      * collapsed and handled as just one character.
00242      * \param delim String representing a set containing all characters considered
00243      * as whitespace.
00244      * \return A vector of strings containing the tokens.
00245      */
00246     std::vector< std::string > tokenize( const std::string& source,
00247                                          const std::string& delim = WHITESPACE,
00248                                          bool compress = true );
00249 
00250     /**
00251      * Writes every vector to an output stream such as cout, if its elements
00252      * have an output operator defined.
00253      *
00254      * \param os The output stream where the elements are written to
00255      * \param v Vector containing the elements
00256      * \return The output stream again.
00257      */
00258     template< class T > std::ostream& operator<<( std::ostream& os, const std::vector< T >& v )
00259     {
00260         std::stringstream result;
00261         result << "[" << std::scientific << std::setprecision( 16 );
00262         std::copy( v.begin(), v.end(), std::ostream_iterator< T >( result, ", " ) );
00263         os << rTrim( result.str(), ", " ) << "]";
00264         return os;
00265     }
00266 
00267     /**
00268      * Write an input stream into the given vector. The delimiter is implicitly set to ", ".
00269      * Also wrapping brackets '[' ']' are expected. In general this is the opposite of the
00270      * output operator above.
00271      * \warning The inputstream is first written into a string then the convertion into T
00272      * via fromString takes place.
00273      * \warning The delimiter should not be in an elements string representation since then
00274      * the tokenizer may gets confused
00275      *
00276      * \param in Input stream
00277      * \param v Vector where to store the elements.
00278      *
00279      * \return The input stream again
00280      */
00281     template< class T > std::istream& operator>>( std::istream& in, std::vector< T >& v )
00282     {
00283         std::string str;
00284         in >> str;
00285         trim( str, "[]" ); // remove preceeding and trailing brackets '[', ']' if any
00286         std::vector< std::string > tokens = tokenize( str, ", " );
00287         v.resize( 0 ); // clear would deallocate
00288         v.reserve( tokens.size() );
00289         for( size_t i = 0; i < tokens.size(); ++i )
00290         {
00291             v.push_back( fromString< T >( tokens[i] ) );
00292         }
00293         return in;
00294     }
00295 
00296     /**
00297      * Writes every list to an output stream such as cout, if its elements have
00298      * an output operator defined.
00299      *
00300      * \param os The output stream where the elements are written to
00301      * \param l List containing the elements
00302      * \return The output stream again.
00303      */
00304     template< class T > std::ostream& operator<<( std::ostream& os, const std::list< T >& l )
00305     {
00306         std::stringstream result;
00307         result << "<" << std::scientific;
00308         std::copy( l.begin(), l.end(), std::ostream_iterator< T >( result, ", " ) );
00309         os << rTrim( result.str(), ", " ) << ">";
00310         return os;
00311     }
00312 
00313     /**
00314      * Writes every set to an output stream such as cout, if its elements have
00315      * an output operator defined.
00316      *
00317      * \param os The output stream where the elements are written to
00318      * \param s set containing the elements
00319      * \return The output stream again.
00320      */
00321     template< class T > std::ostream& operator<<( std::ostream& os, const std::set< T >& s )
00322     {
00323         std::stringstream result;
00324         result << "{" << std::scientific;
00325         std::copy( s.begin(), s.end(), std::ostream_iterator< T >( result, ", " ) );
00326         os << rTrim( result.str(), ", " ) << "}";
00327         return os;
00328     }
00329 }  // end of namespace
00330 
00331 #endif  // WSTRINGUTILS_H