OpenWalnut 1.3.1
WStructuredTextParser.h
00001 //---------------------------------------------------------------------------
00002 //
00003 // Project: OpenWalnut ( http://www.openwalnut.org )
00004 //
00005 // Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS
00006 // For more information see http://www.openwalnut.org/copying
00007 //
00008 // This file is part of OpenWalnut.
00009 //
00010 // OpenWalnut is free software: you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as published by
00012 // the Free Software Foundation, either version 3 of the License, or
00013 // (at your option) any later version.
00014 //
00015 // OpenWalnut is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018 // GNU Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public License
00021 // along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>.
00022 //
00023 //---------------------------------------------------------------------------
00024 
00025 #ifndef WSTRUCTUREDTEXTPARSER_H
00026 #define WSTRUCTUREDTEXTPARSER_H
00027 
00028 #include <algorithm>
00029 #include <iostream>
00030 #include <map>
00031 #include <ostream>
00032 #include <string>
00033 #include <vector>
00034 
00035 #include <boost/config/warning_disable.hpp>
00036 #include <boost/spirit/include/qi.hpp>
00037 #include <boost/spirit/include/phoenix_core.hpp>
00038 #include <boost/spirit/include/phoenix_operator.hpp>
00039 #include <boost/spirit/include/phoenix_fusion.hpp>
00040 #include <boost/spirit/include/phoenix_stl.hpp>
00041 #include <boost/spirit/include/phoenix_object.hpp>
00042 #include <boost/fusion/include/adapt_struct.hpp>
00043 #include <boost/variant/recursive_variant.hpp>
00044 #include <boost/fusion/include/io.hpp>
00045 #include <boost/filesystem/path.hpp>
00046 
00047 #include "WStringUtils.h"
00048 #include "exceptions/WTypeMismatch.h"
00049 #include "exceptions/WNotFound.h"
00050 
00051 /**
00052  * This namespace contains the WStructuredTextParser data types and the parser. It builds up the abstract syntax tree (AST)
00053  * for the given input which later can be traversed.
00054  */
00055 namespace WStructuredTextParser
00056 {
00057     //! we use these quite often, so define some short alias for them:
00058     namespace qi = boost::spirit::qi;
00059 
00060     //! we use these quite often, so define some short alias for them:
00061     namespace fusion = boost::fusion;
00062 
00063     //! we use these quite often, so define some short alias for them:
00064     namespace ascii = boost::spirit::ascii;
00065 
00066     //! we use these quite often, so define some short alias for them:
00067     namespace phoenix = boost::phoenix;
00068 
00069     //! we use these quite often, so define some short alias for them:
00070     namespace spirit = boost::spirit;
00071 
00072     /**
00073      * The type used for keys
00074      */
00075     typedef std::string KeyType;
00076 
00077     /**
00078      * The type used for values
00079      */
00080     typedef std::string ValueType;
00081 
00082     /**
00083      * The type used for comments
00084      */
00085     typedef std::string CommentType;
00086 
00087     /**
00088      * Forward declare the object type.
00089      */
00090     struct ObjectType;
00091 
00092     /**
00093      * KeyValueType - a tuple containing name and value
00094      */
00095     struct KeyValueType
00096     {
00097         /**
00098          * Name string.
00099          */
00100         std::string m_name;
00101         /**
00102          * Value string.
00103          */
00104         std::string m_value;
00105     };
00106 
00107     /**
00108      * A node inside the AST is either another object or a key-value pair.
00109      */
00110     typedef
00111         boost::variant<
00112             boost::recursive_wrapper< ObjectType >,
00113             KeyValueType,
00114             CommentType
00115         >
00116     MemberType;
00117 
00118     /**
00119      * An object is always a name and contains several further nodes
00120      */
00121     struct ObjectType
00122     {
00123         /**
00124          * Name of the object
00125          */
00126         std::string m_name;
00127 
00128         /**
00129          * Object's members
00130          */
00131         std::vector< MemberType > m_nodes;
00132     };
00133 
00134     /**
00135      * An object representing all objects and comments on file level.
00136      */
00137     typedef std::vector< MemberType > FileType;
00138 }
00139 
00140 
00141 // Doxygen has problems with the following
00142 // \cond Suppress_Doxygen
00143 /**
00144  * Tell boost::fusion about our types.
00145  */
00146 BOOST_FUSION_ADAPT_STRUCT(
00147     WStructuredTextParser::ObjectType,
00148     ( std::string, m_name )
00149     ( std::vector< WStructuredTextParser::MemberType >, m_nodes )
00150     )
00151 
00152 /**
00153  * Tell boost::fusion about our types.
00154  */
00155 BOOST_FUSION_ADAPT_STRUCT(
00156     WStructuredTextParser::KeyValueType,
00157     ( std::string, m_name )
00158     ( std::string, m_value )
00159     )
00160 // \endcond
00161 
00162 namespace WStructuredTextParser
00163 {
00164     /**
00165      * The grammar describing the structured format. It uses the boost::spirit features to parse the input. There are several rules to comply to
00166      * successfully parse a file:
00167      * <ul>
00168      *  <li>Key: identifier, needs to be a-z,A-Z,0-9,_
00169      *  <li>Object: defined as key + { ... }
00170      *  <li> ";" is optional after objects
00171      *  <li>Key-Value Pair: is a member of an object and is defines as key="value".
00172      *  <li>Comments begin with //
00173      * </ul>
00174      * For more details please see the test fixture file in core/common/test/fixtures/WStrutcuredTextParser_test.txt.
00175      *
00176      * \tparam Iterator the iterator, used to get the input stream
00177      */
00178     template <typename Iterator>
00179     struct Grammar: qi::grammar<Iterator, FileType(), ascii::space_type >
00180     {
00181         /**
00182          * Constructor and grammar description. It contains the EBNF (Extended Backus Naur Form) of the format we can parse.
00183          *
00184          * \param error Will contain error message if any occurs during functions execution
00185          */
00186         explicit Grammar( std::ostream& error ): Grammar::base_type( file, "WStructuredTextParser::Grammar" ) // NOLINT - non-const ref
00187         {
00188             // a key begins with a letter
00189             key    %= qi::char_( "a-zA-Z_" ) >> *qi::char_( "a-zA-Z_0-9" );
00190             // a value is a quoted string. Multi-line strings possible
00191             value  %= '"' >> *( ~qi::char_( "\"" ) | qi::char_( " " ) ) >> '"';
00192 
00193             // a pair is: key = value
00194             kvpair %= key >> '=' >> value >> ';';
00195             // a comment is // + arbitrary symbols
00196             comment %= qi::lexeme[ qi::char_( "/" ) >> qi::char_( "/" ) >> *qi::char_( "a-zA-Z_0-9!\"#$%&'()*,:;<>?@\\^`{|}~/ .@=[]ยง!+-" ) ];
00197             // a object is a name, and a set of nested objects or key-value pairs
00198             object %= ( key | value ) >> '{' >> *( object | kvpair | comment ) >> '}' >> *qi::char_( ";" );
00199             // a file is basically an object without name.
00200             file %= *( object | kvpair | comment );
00201 
00202             // provide names for these objects for better readability of parse errors
00203             object.name( "object" );
00204             kvpair.name( "key-value pair" );
00205             key.name( "key" );
00206             value.name( "value" );
00207             file.name( "file" );
00208             comment.name( "comment" );
00209 
00210             // provide error handlers
00211             // XXX: can someone tell me how to get them work? According to the boost::spirit doc, this is everything needed but it doesn't work.
00212             qi::on_error< qi::fail >( object, error << phoenix::val( "Error: " ) << qi::_4 );
00213             qi::on_error< qi::fail >( kvpair, error << phoenix::val( "Error: " ) << qi::_4 );
00214             qi::on_error< qi::fail >( key,    error << phoenix::val( "Error: " ) << qi::_4 );
00215             qi::on_error< qi::fail >( value,  error << phoenix::val( "Error: " ) << qi::_4 );
00216             qi::on_error< qi::fail >( comment,  error << phoenix::val( "Error: " ) << qi::_4 );
00217             qi::on_error< qi::fail >( file,  error << phoenix::val( "Error: " ) << qi::_4 );
00218        }
00219 
00220         // Rules we use
00221 
00222         /**
00223          * Rule for objects. Attribute is ObjectType and is the start rule of the grammar. See constructor for exact definition.
00224          */
00225         qi::rule< Iterator, ObjectType(), ascii::space_type > object;
00226 
00227         /**
00228          * Rule for files. Basically the same as an object but without name
00229          */
00230         qi::rule< Iterator, FileType(), ascii::space_type > file;
00231 
00232         /**
00233          * Rule for comments. Ignored.
00234          */
00235         qi::rule< Iterator, CommentType(), ascii::space_type > comment;
00236 
00237         /**
00238          * Key-value pair rule. See constructor for exact definition.
00239          */
00240         qi::rule< Iterator, KeyValueType(), ascii::space_type > kvpair;
00241 
00242         /**
00243          * Key rule. See constructor for exact definition.
00244          */
00245         qi::rule< Iterator, KeyType() > key;
00246 
00247         /**
00248          * Value rule. See constructor for exact definition.
00249          */
00250         qi::rule< Iterator, ValueType() > value;
00251     };
00252 
00253     /**
00254      * This simplifies working with a tree in a \ref FileType instance. It provides easy query and check methods. It does not
00255      * provide any semantic options. So check validity of the contents and structure of the tree is the job of the using class/derived class. As
00256      * the tree does not know anything about the semantics of your structure, it is also untyped. For every key you query, you need to specify
00257      * the type.
00258      *
00259      * This tree uses the types in the \ref WStructuredTextParser namespace. To avoid unnecessary copy operations, this class is not recursive
00260      * itself. When querying, you always need to specify the full path. This class can be seen as accessor to the \ref ObjectType tree.
00261      *
00262      * \note The syntax of the parsed files is defined by the parser itself. See \ref Grammar for details.
00263      * \note This also stores the comments of the parsed file. This allows them to be written again if OW loads a file, modifies it and re-writes
00264      * it.
00265      */
00266     class StructuredValueTree
00267     {
00268         friend class WStructuredTextParserTest;
00269     public:
00270         /**
00271          * This char is used as separator for identifying values in the tree. NEVER change this value.
00272          */
00273         static const std::string Separator;
00274 
00275         /**
00276          * Construct the instance given the original parsing structure.
00277          *
00278          * \param file the parsing result structure (the root node).
00279          */
00280         explicit StructuredValueTree( const FileType& file );
00281 
00282         /**
00283          * Construct the instance given a text as string.
00284          *
00285          * \param toParse the text to parse
00286          */
00287         explicit StructuredValueTree( const std::string& toParse );
00288 
00289         /**
00290          * Construct the instance given a path to a file to load.
00291          *
00292          * \param file the path to a file to load.
00293          */
00294         explicit StructuredValueTree( const boost::filesystem::path& file );
00295 
00296         /**
00297          * Creates an empty tree. It will contain no information at all.
00298          */
00299         StructuredValueTree();
00300 
00301         /**
00302          * Cleanup.
00303          */
00304         virtual ~StructuredValueTree();
00305 
00306         /**
00307          * Checks whether the given value or object exists. If you want to know only if a value with the given name exists, set valuesOnly to
00308          * true.
00309          *
00310          * \param key path to the value
00311          * \param valuesOnly if true, it checks only if a value with the name exists. If false, also objects with this name cause this function
00312          * to return true.
00313          *
00314          * \return true if existing.
00315          */
00316         bool exists( std::string key, bool valuesOnly = false ) const;
00317 
00318         /**
00319          * It is possible that there are multiple values matching a key. This method counts them.
00320          *
00321          * \param key path to the values to count
00322          * \param valuesOnly if true, it only counts values matching the given name.
00323          *
00324          * \return the number of found values.
00325          */
00326         size_t count( std::string key, bool valuesOnly = false ) const;
00327 
00328         /**
00329          * Queries the value with the given name. If it is not found, the default value will be returned.
00330          *
00331          * \param key path to the value. Paths to whole objects are invalid.
00332          * \param defaultValue the default if no value was found
00333          * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
00334          *
00335          * \throw WTypeMismatch if the value cannot be cast to the specified target type
00336          *
00337          * \return the value
00338          *
00339          * \note this does not return a reference as the default value might be returned. It returns a copy of the value.
00340          */
00341         template< typename T >
00342         T getValue( std::string key, const T& defaultValue ) const;
00343 
00344         /**
00345          * Queries the list of values matching the given path. If it is not found, the default value will be returned.
00346          *
00347          * \param key path to the value. Paths to whole objects are invalid.
00348          * \param defaults the defaults if no value was found
00349          * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
00350          *
00351          * \throw WTypeMismatch if the value cannot be cast to the specified target type
00352          *
00353          * \return the value
00354          *
00355          * \note this does not return a reference as the default value might be returned. It returns a copy of the value.
00356          */
00357         template< typename T >
00358         std::vector< T > getValues( std::string key, const std::vector< T >& defaults ) const;
00359 
00360         /**
00361          * Queries the list of values matching the given path. If it is not found, an empty results vector is returned.
00362          *
00363          * \param key path to the value. Paths to whole objects are invalid.
00364          * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
00365          *
00366          * \throw WTypeMismatch if the value cannot be cast to the specified target type
00367          *
00368          * \return the value vector. Might be empty if no elements where found.
00369          *
00370          * \note this does not return a reference as the default value might be returned. It returns a copy of the value.
00371          */
00372         template< typename T >
00373         std::vector< T > getValues( std::string key ) const;
00374 
00375         /**
00376          * Queries the value with the given name. If it is not found, an exception is thrown. If multiple entries with this path exist, the first
00377          * one is returned. Use \ref getValues in this case. Query the count of a key:value pair using \ref count
00378          *
00379          * \param key path to the value. Paths to whole objects are invalid.
00380          * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
00381          * \throw WTypeMismatch if the value cannot be cast to the specified target type
00382          * \throw WNotFound if the key:value pair does not exist
00383          *
00384          * \return the value as copy to avoid any const_cast which would allow modification.
00385          */
00386         template< typename T >
00387         T operator[]( std::string key ) const;
00388 
00389         /**
00390          * Gets a subtree. The ValueTree returned contains the node you have searched. It only contains the first match. If all matches are
00391          * needed, use \ref getSubTrees instead. If the key is not valid/nothing matches the key, an empty value tree is returned. If they key
00392          * matches a key-value pair, nothing is returned. This means, this method is only useful for objects.
00393          *
00394          * \param key key to search.
00395          *
00396          * \return the structured value tree.
00397          */
00398         StructuredValueTree getSubTree( std::string key ) const;
00399 
00400         /**
00401          * Gets all matching subtrees. The subtrees returned contains the node you have searched. If multiple objects match the key, a list of
00402          * subtrees is returned. If nothing matches, the returned list is empty. If they key
00403          * matches a key-value pair, nothing is returned. This means, this method is only useful for objects.
00404          *
00405          * \param key key to search.
00406          *
00407          * \return the structured value trees.
00408          */
00409         std::vector< StructuredValueTree > getSubTrees( std::string key ) const;
00410 
00411     protected:
00412     private:
00413         /**
00414          * The named values.
00415          */
00416         FileType m_file;
00417 
00418         /**
00419          * Recursively fills a result vector using a given path iterator. It checks whether the current element matches the current key. If yes,
00420          * it traverses or adds the value to the result vector. This uses depth-first search and allows multiple matches for one key.
00421          *
00422          * \param current current element to check and recursively traverse
00423          * \param keyIter the current path element
00424          * \param keyEnd the end iter. Just used to stop iteration if the key as not further elements
00425          * \param resultObjects all matching instances of type \ref ObjectType
00426          * \param resultValues all matching instances of type \ref KeyValueType
00427          */
00428         void traverse( MemberType current, std::vector< std::string >::const_iterator keyIter,
00429                                            std::vector< std::string >::const_iterator keyEnd,
00430                                            std::vector< ObjectType >& resultObjects,
00431                                            std::vector< KeyValueType >& resultValues ) const;
00432 
00433         /**
00434          * Recursively fills a result vector using a given path iterator. It checks whether the current element matches the current key. If yes,
00435          * it traverses or adds the value to the result vector. This uses depth-first search and allows multiple matches for one key.
00436          *
00437          * \param current current element to check and recursively traverse
00438          * \param key the path
00439          * \param resultObjects all matching instances of type \ref ObjectType
00440          * \param resultValues all matching instances of type \ref KeyValueType
00441          */
00442         void traverse( FileType current, std::string key,
00443                                          std::vector< ObjectType >& resultObjects,
00444                                          std::vector< KeyValueType >& resultValues ) const;
00445     };
00446 
00447     /**
00448      * Parse the given input and return the syntax tree. Throws an exception WParseError on error.
00449      *
00450      * \param input the input to parse.
00451      *
00452      * \return the syntax tree in plain format. You should use WStructuredValueTree to use this.
00453      *
00454      * \throw WParseError on parse error
00455      */
00456     FileType parseFromString( std::string input );
00457 
00458     /**
00459      * Parse the given input and return the syntax tree. Throws an exception WParseError on error.
00460      *
00461      * \param path the file to parse
00462      *
00463      * \return the syntax tree in plain format. You should use WStructuredValueTree to use this.
00464      *
00465      * \throw WParseError on parse error
00466      * \throw WFileNotFOund in case the specified file could not be opened
00467      */
00468     FileType parseFromFile( boost::filesystem::path path );
00469 
00470     template< typename T >
00471     T StructuredValueTree::getValue( std::string key, const T& defaultValue ) const
00472     {
00473         // NOTE: getValues ensures that always something is returned (the default value). So the returned vector has a valid begin iterator
00474         return *getValues< T >( key, std::vector< T >( 1, defaultValue ) ).begin();
00475     }
00476 
00477     template< typename T >
00478     std::vector< T > StructuredValueTree::getValues( std::string key, const std::vector< T >& defaults ) const
00479     {
00480         std::vector< T > r = getValues< T >( key );
00481         if( r.size() )
00482         {
00483             return r;
00484         }
00485         else
00486         {
00487             return defaults;
00488         }
00489     }
00490 
00491     template< typename T >
00492     T StructuredValueTree::operator[]( std::string key ) const
00493     {
00494         std::vector< T > r = getValues< T >( key );
00495         if( r.size() )
00496         {
00497             return *r.begin();
00498         }
00499         else
00500         {
00501             throw WNotFound( "The key \"" + key + "\" was not found." );
00502         }
00503     }
00504 
00505     /**
00506      * Visitor to identify whether the given variant of type \ref MemberType is a object or key-value pair.
00507      */
00508     class IsLeafVisitor: public boost::static_visitor< bool >
00509     {
00510     public:
00511         /**
00512          * Returns always true as it is only called for key-value pairs.
00513          *
00514          * \return always true since it identified an key-value pair
00515          */
00516         bool operator()( const KeyValueType& /* element */ ) const
00517         {
00518             return true;
00519         }
00520 
00521         /**
00522          * Returns always false as it is only called for objects.
00523          *
00524          * \tparam T the type. Should be \ref ObjectType or \ref CommentType
00525          * \return always false since it identified an Object/comment
00526          */
00527         template< typename T >
00528         bool operator()( const T& /* element */ ) const
00529         {
00530             return false;
00531         }
00532     };
00533 
00534     /**
00535      * Visitor to identify whether the given variant of type \ref MemberType is a comment.
00536      */
00537     class IsCommentVisitor: public boost::static_visitor< bool >
00538     {
00539     public:
00540         /**
00541          * Returns always true as it is only called for comments.
00542          *
00543          * \return always true
00544          */
00545         bool operator()( const CommentType& /* element */ ) const
00546         {
00547             return true;
00548         }
00549 
00550         /**
00551          * Returns always false as it is only called for objects and key-value pairs.
00552          *
00553          * \tparam T the type. Should be \ref ObjectType or \ref KeyValueType
00554          * \return always false since it identified an Object/KeyValueType
00555          */
00556         template< typename T >
00557         bool operator()( const T& /* element */ ) const
00558         {
00559             return false;
00560         }
00561     };
00562 
00563     /**
00564      * Visitor to query the m_name member of \ref ObjectType and \ref KeyValueType.
00565      */
00566     class NameQueryVisitor: public boost::static_visitor< std::string >
00567     {
00568     public:
00569         /**
00570          * Comments have no name.
00571          *
00572          * \return empty string.
00573          */
00574         std::string operator()( const CommentType& /* element */ ) const
00575         {
00576             return "";
00577         }
00578 
00579         /**
00580          * Returns the m_name member of the specified object or key-valuev pair.
00581          *
00582          * \param element Specified object.
00583          *
00584          * \tparam T one of the types of the \ref MemberType variant
00585          * \return always true since it identified an key-value pair
00586          */
00587         template< typename T >
00588         std::string operator()( const T& element ) const
00589         {
00590             return element.m_name;
00591         }
00592     };
00593 
00594     template< typename T >
00595     std::vector< T > StructuredValueTree::getValues( std::string key ) const
00596     {
00597         // traverse the tree
00598         std::vector< ObjectType > rObj;
00599         std::vector< KeyValueType > rKV;
00600 
00601         // traverse
00602         traverse( m_file, key, rObj, rKV );
00603 
00604         // copy to result vector and cast
00605         std::vector< T > r;
00606         for( std::vector< KeyValueType >::const_iterator i = rKV.begin(); i != rKV.end(); ++i )
00607         {
00608             try
00609             {
00610                 r.push_back( string_utils::fromString< T >( ( *i ).m_value ) );
00611             }
00612             catch( ... )
00613             {
00614                 // convert the standard exception (if cannot convert) to a WTypeMismnatch.
00615                 throw WTypeMismatch( "Cannot convert element \"" + key + "\" to desired type." );
00616             }
00617         }
00618 
00619         // done
00620         return r;
00621     }
00622 }
00623 
00624 #endif  // WSTRUCTUREDTEXTPARSER_H
00625