OpenWalnut  1.4.0
WStructuredTextParser.h
00001 //---------------------------------------------------------------------------
00002 //
00003 // Project: OpenWalnut ( http://www.openwalnut.org )
00004 //
00005 // Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS
00006 // For more information see http://www.openwalnut.org/copying
00007 //
00008 // This file is part of OpenWalnut.
00009 //
00010 // OpenWalnut is free software: you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as published by
00012 // the Free Software Foundation, either version 3 of the License, or
00013 // (at your option) any later version.
00014 //
00015 // OpenWalnut is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018 // GNU Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public License
00021 // along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>.
00022 //
00023 //---------------------------------------------------------------------------
00024 
00025 #ifndef WSTRUCTUREDTEXTPARSER_H
00026 #define WSTRUCTUREDTEXTPARSER_H
00027 
00028 #include <algorithm>
00029 #include <iostream>
00030 #include <map>
00031 #include <ostream>
00032 #include <string>
00033 #include <vector>
00034 
00035 #include <boost/config/warning_disable.hpp>
00036 #include <boost/spirit/include/qi.hpp>
00037 #include <boost/spirit/include/phoenix_core.hpp>
00038 #include <boost/spirit/include/phoenix_operator.hpp>
00039 #include <boost/spirit/include/phoenix_fusion.hpp>
00040 #include <boost/spirit/include/phoenix_stl.hpp>
00041 #include <boost/spirit/include/phoenix_object.hpp>
00042 #include <boost/fusion/include/adapt_struct.hpp>
00043 #include <boost/variant/recursive_variant.hpp>
00044 #include <boost/fusion/include/io.hpp>
00045 #include <boost/filesystem/path.hpp>
00046 
00047 #include "WStringUtils.h"
00048 #include "exceptions/WTypeMismatch.h"
00049 #include "exceptions/WNotFound.h"
00050 
00051 /**
00052  * This namespace contains the WStructuredTextParser data types and the parser. It builds up the abstract syntax tree (AST)
00053  * for the given input which later can be traversed.
00054  */
00055 namespace WStructuredTextParser
00056 {
00057     //! we use these quite often, so define some short alias for them:
00058     namespace qi = boost::spirit::qi;
00059 
00060     //! we use these quite often, so define some short alias for them:
00061     namespace fusion = boost::fusion;
00062 
00063     //! we use these quite often, so define some short alias for them:
00064     namespace ascii = boost::spirit::ascii;
00065 
00066     //! we use these quite often, so define some short alias for them:
00067     namespace phoenix = boost::phoenix;
00068 
00069     //! we use these quite often, so define some short alias for them:
00070     namespace spirit = boost::spirit;
00071 
00072     /**
00073      * The type used for keys
00074      */
00075     typedef std::string KeyType;
00076 
00077     /**
00078      * The type used for values
00079      */
00080     typedef std::string ValueType;
00081 
00082     /**
00083      * The type used for comments
00084      */
00085     typedef std::string CommentType;
00086 
00087     /**
00088      * Forward declare the object type.
00089      */
00090     struct ObjectType;
00091 
00092     /**
00093      * KeyValueType - a tuple containing name and value
00094      */
00095     struct KeyValueType
00096     {
00097         /**
00098          * Name string.
00099          */
00100         std::string m_name;
00101         /**
00102          * Value string.
00103          */
00104         std::string m_value;
00105     };
00106 
00107     /**
00108      * A node inside the AST is either another object or a key-value pair.
00109      */
00110     typedef
00111         boost::variant<
00112             boost::recursive_wrapper< ObjectType >,
00113             KeyValueType,
00114             CommentType
00115         >
00116     MemberType;
00117 
00118     /**
00119      * An object is always a name and contains several further nodes
00120      */
00121     struct ObjectType
00122     {
00123         /**
00124          * Name of the object
00125          */
00126         std::string m_name;
00127 
00128         /**
00129          * Object's members
00130          */
00131         std::vector< MemberType > m_nodes;
00132     };
00133 
00134     /**
00135      * An object representing all objects and comments on file level.
00136      */
00137     typedef std::vector< MemberType > FileType;
00138 }
00139 
00140 
00141 // Doxygen has problems with the following
00142 // \cond Suppress_Doxygen
00143 /**
00144  * Tell boost::fusion about our types.
00145  */
00146 BOOST_FUSION_ADAPT_STRUCT(
00147     WStructuredTextParser::ObjectType,
00148     ( std::string, m_name )
00149     ( std::vector< WStructuredTextParser::MemberType >, m_nodes )
00150     )
00151 
00152 /**
00153  * Tell boost::fusion about our types.
00154  */
00155 BOOST_FUSION_ADAPT_STRUCT(
00156     WStructuredTextParser::KeyValueType,
00157     ( std::string, m_name )
00158     ( std::string, m_value )
00159     )
00160 // \endcond
00161 
00162 namespace WStructuredTextParser
00163 {
00164     /**
00165      * The grammar describing the structured format. It uses the boost::spirit features to parse the input. There are several rules to comply to
00166      * successfully parse a file:
00167      * <ul>
00168      *  <li>Key: identifier, needs to be a-z,A-Z,0-9,_
00169      *  <li>Object: defined as key + { ... }
00170      *  <li> ";" is optional after objects
00171      *  <li>Key-Value Pair: is a member of an object and is defines as key="value".
00172      *  <li>Comments begin with //
00173      * </ul>
00174      * For more details please see the test fixture file in core/common/test/fixtures/WStrutcuredTextParser_test.txt.
00175      *
00176      * \tparam Iterator the iterator, used to get the input stream
00177      */
00178     template <typename Iterator>
00179     struct Grammar: qi::grammar<Iterator, FileType(), ascii::space_type >
00180     {
00181         /**
00182          * Constructor and grammar description. It contains the EBNF (Extended Backus Naur Form) of the format we can parse.
00183          *
00184          * \param error Will contain error message if any occurs during functions execution
00185          */
00186         explicit Grammar( std::ostream& error ): Grammar::base_type( file, "WStructuredTextParser::Grammar" ) // NOLINT - non-const ref
00187         {
00188             // a key begins with a letter
00189             key    %= qi::char_( "a-zA-Z_" ) >> *qi::char_( "a-zA-Z_0-9" );
00190             // a value is a quoted string. Multi-line strings possible
00191             value  %= '"' >> *( ~qi::char_( "\"" ) | qi::char_( " " ) ) >> '"';
00192 
00193             // a pair is: key = value
00194             kvpair %= key >> '=' >> value >> ';';
00195             // a comment is // + arbitrary symbols
00196             comment %= qi::lexeme[ qi::char_( "/" ) >> qi::char_( "/" ) >> *qi::char_( "a-zA-Z_0-9!\"#$%&'()*,:;<>?@\\^`{|}~/ .@=[]ยง!+-" ) ];
00197             // a object is a name, and a set of nested objects or key-value pairs
00198             object %= ( key | value ) >> '{' >> *( object | kvpair | comment ) >> '}' >> *qi::char_( ";" );
00199             // a file is basically an object without name.
00200             file %= *( object | kvpair | comment );
00201 
00202             // provide names for these objects for better readability of parse errors
00203             object.name( "object" );
00204             kvpair.name( "key-value pair" );
00205             key.name( "key" );
00206             value.name( "value" );
00207             file.name( "file" );
00208             comment.name( "comment" );
00209 
00210             // provide error handlers
00211             // XXX: can someone tell me how to get them work? According to the boost::spirit doc, this is everything needed but it doesn't work.
00212             qi::on_error< qi::fail >( object, error << phoenix::val( "Error: " ) << qi::_4 );
00213             qi::on_error< qi::fail >( kvpair, error << phoenix::val( "Error: " ) << qi::_4 );
00214             qi::on_error< qi::fail >( key,    error << phoenix::val( "Error: " ) << qi::_4 );
00215             qi::on_error< qi::fail >( value,  error << phoenix::val( "Error: " ) << qi::_4 );
00216             qi::on_error< qi::fail >( comment,  error << phoenix::val( "Error: " ) << qi::_4 );
00217             qi::on_error< qi::fail >( file,  error << phoenix::val( "Error: " ) << qi::_4 );
00218        }
00219 
00220         // Rules we use
00221 
00222         /**
00223          * Rule for objects. Attribute is ObjectType and is the start rule of the grammar. See constructor for exact definition.
00224          */
00225         qi::rule< Iterator, ObjectType(), ascii::space_type > object;
00226 
00227         /**
00228          * Rule for files. Basically the same as an object but without name
00229          */
00230         qi::rule< Iterator, FileType(), ascii::space_type > file;
00231 
00232         /**
00233          * Rule for comments. Ignored.
00234          */
00235         qi::rule< Iterator, CommentType(), ascii::space_type > comment;
00236 
00237         /**
00238          * Key-value pair rule. See constructor for exact definition.
00239          */
00240         qi::rule< Iterator, KeyValueType(), ascii::space_type > kvpair;
00241 
00242         /**
00243          * Key rule. See constructor for exact definition.
00244          */
00245         qi::rule< Iterator, KeyType() > key;
00246 
00247         /**
00248          * Value rule. See constructor for exact definition.
00249          */
00250         qi::rule< Iterator, ValueType() > value;
00251     };
00252 
00253     /**
00254      * This simplifies working with a tree in a \ref WStructuredTextParser::FileType instance. It provides easy query and check methods. It does not
00255      * provide any semantic options. So check validity of the contents and structure of the tree is the job of the using class/derived class. As
00256      * the tree does not know anything about the semantics of your structure, it is also untyped. For every key you query, you need to specify
00257      * the type.
00258      *
00259      * This tree uses the types in the WStructuredTextParser namespace. To avoid unnecessary copy operations, this class is not recursive
00260      * itself. When querying, you always need to specify the full path. This class can be seen as accessor to the
00261      * \ref WStructuredTextParser::ObjectType tree.
00262      *
00263      * \note The syntax of the parsed files is defined by the parser itself. See WStructuredTextParser::Grammar for details.
00264      * \note This also stores the comments of the parsed file. This allows them to be written again if OW loads a file, modifies it and re-writes
00265      * it.
00266      */
00267     class StructuredValueTree
00268     {
00269         friend class WStructuredTextParserTest;
00270     public:
00271         /**
00272          * This char is used as separator for identifying values in the tree. NEVER change this value.
00273          */
00274         static const std::string Separator;
00275 
00276         /**
00277          * Construct the instance given the original parsing structure.
00278          *
00279          * \param file the parsing result structure (the root node).
00280          */
00281         explicit StructuredValueTree( const FileType& file );
00282 
00283         /**
00284          * Construct the instance given a text as string.
00285          *
00286          * \param toParse the text to parse
00287          */
00288         explicit StructuredValueTree( const std::string& toParse );
00289 
00290         /**
00291          * Construct the instance given a path to a file to load.
00292          *
00293          * \param file the path to a file to load.
00294          */
00295         explicit StructuredValueTree( const boost::filesystem::path& file );
00296 
00297         /**
00298          * Creates an empty tree. It will contain no information at all.
00299          */
00300         StructuredValueTree();
00301 
00302         /**
00303          * Cleanup.
00304          */
00305         virtual ~StructuredValueTree();
00306 
00307         /**
00308          * Checks whether the given value or object exists. If you want to know only if a value with the given name exists, set valuesOnly to
00309          * true.
00310          *
00311          * \param key path to the value
00312          * \param valuesOnly if true, it checks only if a value with the name exists. If false, also objects with this name cause this function
00313          * to return true.
00314          *
00315          * \return true if existing.
00316          */
00317         bool exists( std::string key, bool valuesOnly = false ) const;
00318 
00319         /**
00320          * It is possible that there are multiple values matching a key. This method counts them.
00321          *
00322          * \param key path to the values to count
00323          * \param valuesOnly if true, it only counts values matching the given name.
00324          *
00325          * \return the number of found values.
00326          */
00327         size_t count( std::string key, bool valuesOnly = false ) const;
00328 
00329         /**
00330          * Queries the value with the given name. If it is not found, the default value will be returned.
00331          *
00332          * \param key path to the value. Paths to whole objects are invalid.
00333          * \param defaultValue the default if no value was found
00334          * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
00335          *
00336          * \throw WTypeMismatch if the value cannot be cast to the specified target type
00337          *
00338          * \return the value
00339          *
00340          * \note this does not return a reference as the default value might be returned. It returns a copy of the value.
00341          */
00342         template< typename T >
00343         T getValue( std::string key, const T& defaultValue ) const;
00344 
00345         /**
00346          * Queries the list of values matching the given path. If it is not found, the default value will be returned.
00347          *
00348          * \param key path to the value. Paths to whole objects are invalid.
00349          * \param defaults the defaults if no value was found
00350          * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
00351          *
00352          * \throw WTypeMismatch if the value cannot be cast to the specified target type
00353          *
00354          * \return the value
00355          *
00356          * \note this does not return a reference as the default value might be returned. It returns a copy of the value.
00357          */
00358         template< typename T >
00359         std::vector< T > getValues( std::string key, const std::vector< T >& defaults ) const;
00360 
00361         /**
00362          * Queries the list of values matching the given path. If it is not found, an empty results vector is returned.
00363          *
00364          * \param key path to the value. Paths to whole objects are invalid.
00365          * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
00366          *
00367          * \throw WTypeMismatch if the value cannot be cast to the specified target type
00368          *
00369          * \return the value vector. Might be empty if no elements where found.
00370          *
00371          * \note this does not return a reference as the default value might be returned. It returns a copy of the value.
00372          */
00373         template< typename T >
00374         std::vector< T > getValues( std::string key ) const;
00375 
00376         /**
00377          * Queries the value with the given name. If it is not found, an exception is thrown. If multiple entries with this path exist, the first
00378          * one is returned. Use \ref getValues in this case. Query the count of a key:value pair using \ref count
00379          *
00380          * \param key path to the value. Paths to whole objects are invalid.
00381          * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
00382          * \throw WTypeMismatch if the value cannot be cast to the specified target type
00383          * \throw WNotFound if the key:value pair does not exist
00384          *
00385          * \return the value as copy to avoid any const_cast which would allow modification.
00386          */
00387         template< typename T >
00388         T operator[]( std::string key ) const;
00389 
00390         /**
00391          * Gets a subtree. The ValueTree returned contains the node you have searched. It only contains the first match. If all matches are
00392          * needed, use \ref getSubTrees instead. If the key is not valid/nothing matches the key, an empty value tree is returned. If they key
00393          * matches a key-value pair, nothing is returned. This means, this method is only useful for objects.
00394          *
00395          * \param key key to search.
00396          *
00397          * \return the structured value tree.
00398          */
00399         StructuredValueTree getSubTree( std::string key ) const;
00400 
00401         /**
00402          * Gets all matching subtrees. The subtrees returned contains the node you have searched. If multiple objects match the key, a list of
00403          * subtrees is returned. If nothing matches, the returned list is empty. If they key
00404          * matches a key-value pair, nothing is returned. This means, this method is only useful for objects.
00405          *
00406          * \param key key to search.
00407          *
00408          * \return the structured value trees.
00409          */
00410         std::vector< StructuredValueTree > getSubTrees( std::string key ) const;
00411 
00412     protected:
00413     private:
00414         /**
00415          * The named values.
00416          */
00417         FileType m_file;
00418 
00419         /**
00420          * Recursively fills a result vector using a given path iterator. It checks whether the current element matches the current key. If yes,
00421          * it traverses or adds the value to the result vector. This uses depth-first search and allows multiple matches for one key.
00422          *
00423          * \param current current element to check and recursively traverse
00424          * \param keyIter the current path element
00425          * \param keyEnd the end iter. Just used to stop iteration if the key as not further elements
00426          * \param resultObjects all matching instances of type \ref WStructuredTextParser::ObjectType
00427          * \param resultValues all matching instances of type \ref WStructuredTextParser::KeyValueType
00428          */
00429         void traverse( MemberType current, std::vector< std::string >::const_iterator keyIter,
00430                                            std::vector< std::string >::const_iterator keyEnd,
00431                                            std::vector< ObjectType >& resultObjects,
00432                                            std::vector< KeyValueType >& resultValues ) const;
00433 
00434         /**
00435          * Recursively fills a result vector using a given path iterator. It checks whether the current element matches the current key. If yes,
00436          * it traverses or adds the value to the result vector. This uses depth-first search and allows multiple matches for one key.
00437          *
00438          * \param current current element to check and recursively traverse
00439          * \param key the path
00440          * \param resultObjects all matching instances of type \ref WStructuredTextParser::ObjectType
00441          * \param resultValues all matching instances of type \ref WStructuredTextParser::KeyValueType
00442          */
00443         void traverse( FileType current, std::string key,
00444                                          std::vector< ObjectType >& resultObjects,
00445                                          std::vector< KeyValueType >& resultValues ) const;
00446     };
00447 
00448     /**
00449      * Parse the given input and return the syntax tree. Throws an exception WParseError on error.
00450      *
00451      * \param input the input to parse.
00452      *
00453      * \return the syntax tree in plain format. You should use WStructuredValueTree to use this.
00454      *
00455      * \throw WParseError on parse error
00456      */
00457     FileType parseFromString( std::string input );
00458 
00459     /**
00460      * Parse the given input and return the syntax tree. Throws an exception WParseError on error.
00461      *
00462      * \param path the file to parse
00463      *
00464      * \return the syntax tree in plain format. You should use WStructuredValueTree to use this.
00465      *
00466      * \throw WParseError on parse error
00467      * \throw WFileNotFOund in case the specified file could not be opened
00468      */
00469     FileType parseFromFile( boost::filesystem::path path );
00470 
00471     template< typename T >
00472     T StructuredValueTree::getValue( std::string key, const T& defaultValue ) const
00473     {
00474         // NOTE: getValues ensures that always something is returned (the default value). So the returned vector has a valid begin iterator
00475         return *getValues< T >( key, std::vector< T >( 1, defaultValue ) ).begin();
00476     }
00477 
00478     template< typename T >
00479     std::vector< T > StructuredValueTree::getValues( std::string key, const std::vector< T >& defaults ) const
00480     {
00481         std::vector< T > r = getValues< T >( key );
00482         if( r.size() )
00483         {
00484             return r;
00485         }
00486         else
00487         {
00488             return defaults;
00489         }
00490     }
00491 
00492     template< typename T >
00493     T StructuredValueTree::operator[]( std::string key ) const
00494     {
00495         std::vector< T > r = getValues< T >( key );
00496         if( r.size() )
00497         {
00498             return *r.begin();
00499         }
00500         else
00501         {
00502             throw WNotFound( "The key \"" + key + "\" was not found." );
00503         }
00504     }
00505 
00506     /**
00507      * Visitor to identify whether the given variant of type \ref WStructuredTextParser::MemberType is a object or key-value pair.
00508      */
00509     class IsLeafVisitor: public boost::static_visitor< bool >
00510     {
00511     public:
00512         /**
00513          * Returns always true as it is only called for key-value pairs.
00514          *
00515          * \return always true since it identified an key-value pair
00516          */
00517         bool operator()( const KeyValueType& /* element */ ) const
00518         {
00519             return true;
00520         }
00521 
00522         /**
00523          * Returns always false as it is only called for objects.
00524          *
00525          * \tparam T the type. Should be \ref WStructuredTextParser::ObjectType or \ref WStructuredTextParser::CommentType
00526          * \return always false since it identified an Object/comment
00527          */
00528         template< typename T >
00529         bool operator()( const T& /* element */ ) const
00530         {
00531             return false;
00532         }
00533     };
00534 
00535     /**
00536      * Visitor to identify whether the given variant of type \ref WStructuredTextParser::MemberType is a comment.
00537      */
00538     class IsCommentVisitor: public boost::static_visitor< bool >
00539     {
00540     public:
00541         /**
00542          * Returns always true as it is only called for comments.
00543          *
00544          * \return always true
00545          */
00546         bool operator()( const CommentType& /* element */ ) const
00547         {
00548             return true;
00549         }
00550 
00551         /**
00552          * Returns always false as it is only called for objects and key-value pairs.
00553          *
00554          * \tparam T the type. Should be \ref WStructuredTextParser::ObjectType or \ref WStructuredTextParser::KeyValueType
00555          * \return always false since it identified an Object/KeyValueType
00556          */
00557         template< typename T >
00558         bool operator()( const T& /* element */ ) const
00559         {
00560             return false;
00561         }
00562     };
00563 
00564     /**
00565      * Visitor to query the m_name member of \ref WStructuredTextParser::ObjectType and \ref WStructuredTextParser::KeyValueType.
00566      */
00567     class NameQueryVisitor: public boost::static_visitor< std::string >
00568     {
00569     public:
00570         /**
00571          * Comments have no name.
00572          *
00573          * \return empty string.
00574          */
00575         std::string operator()( const CommentType& /* element */ ) const
00576         {
00577             return "";
00578         }
00579 
00580         /**
00581          * Returns the m_name member of the specified object or key-valuev pair.
00582          *
00583          * \param element Specified object.
00584          *
00585          * \tparam T one of the types of the \ref WStructuredTextParser::MemberType variant
00586          * \return always true since it identified an key-value pair
00587          */
00588         template< typename T >
00589         std::string operator()( const T& element ) const
00590         {
00591             return element.m_name;
00592         }
00593     };
00594 
00595     template< typename T >
00596     std::vector< T > StructuredValueTree::getValues( std::string key ) const
00597     {
00598         // traverse the tree
00599         std::vector< ObjectType > rObj;
00600         std::vector< KeyValueType > rKV;
00601 
00602         // traverse
00603         traverse( m_file, key, rObj, rKV );
00604 
00605         // copy to result vector and cast
00606         std::vector< T > r;
00607         for( std::vector< KeyValueType >::const_iterator i = rKV.begin(); i != rKV.end(); ++i )
00608         {
00609             try
00610             {
00611                 r.push_back( string_utils::fromString< T >( ( *i ).m_value ) );
00612             }
00613             catch( ... )
00614             {
00615                 // convert the standard exception (if cannot convert) to a WTypeMismnatch.
00616                 throw WTypeMismatch( "Cannot convert element \"" + key + "\" to desired type." );
00617             }
00618         }
00619 
00620         // done
00621         return r;
00622     }
00623 }
00624 
00625 #endif  // WSTRUCTUREDTEXTPARSER_H
00626