OpenWalnut
1.4.0
|
00001 //--------------------------------------------------------------------------- 00002 // 00003 // Project: OpenWalnut ( http://www.openwalnut.org ) 00004 // 00005 // Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS 00006 // For more information see http://www.openwalnut.org/copying 00007 // 00008 // This file is part of OpenWalnut. 00009 // 00010 // OpenWalnut is free software: you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as published by 00012 // the Free Software Foundation, either version 3 of the License, or 00013 // (at your option) any later version. 00014 // 00015 // OpenWalnut is distributed in the hope that it will be useful, 00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00018 // GNU Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public License 00021 // along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>. 00022 // 00023 //--------------------------------------------------------------------------- 00024 00025 #ifndef WSTRUCTUREDTEXTPARSER_H 00026 #define WSTRUCTUREDTEXTPARSER_H 00027 00028 #include <algorithm> 00029 #include <iostream> 00030 #include <map> 00031 #include <ostream> 00032 #include <string> 00033 #include <vector> 00034 00035 #include <boost/config/warning_disable.hpp> 00036 #include <boost/spirit/include/qi.hpp> 00037 #include <boost/spirit/include/phoenix_core.hpp> 00038 #include <boost/spirit/include/phoenix_operator.hpp> 00039 #include <boost/spirit/include/phoenix_fusion.hpp> 00040 #include <boost/spirit/include/phoenix_stl.hpp> 00041 #include <boost/spirit/include/phoenix_object.hpp> 00042 #include <boost/fusion/include/adapt_struct.hpp> 00043 #include <boost/variant/recursive_variant.hpp> 00044 #include <boost/fusion/include/io.hpp> 00045 #include <boost/filesystem/path.hpp> 00046 00047 #include "WStringUtils.h" 00048 #include "exceptions/WTypeMismatch.h" 00049 #include "exceptions/WNotFound.h" 00050 00051 /** 00052 * This namespace contains the WStructuredTextParser data types and the parser. It builds up the abstract syntax tree (AST) 00053 * for the given input which later can be traversed. 00054 */ 00055 namespace WStructuredTextParser 00056 { 00057 //! we use these quite often, so define some short alias for them: 00058 namespace qi = boost::spirit::qi; 00059 00060 //! we use these quite often, so define some short alias for them: 00061 namespace fusion = boost::fusion; 00062 00063 //! we use these quite often, so define some short alias for them: 00064 namespace ascii = boost::spirit::ascii; 00065 00066 //! we use these quite often, so define some short alias for them: 00067 namespace phoenix = boost::phoenix; 00068 00069 //! we use these quite often, so define some short alias for them: 00070 namespace spirit = boost::spirit; 00071 00072 /** 00073 * The type used for keys 00074 */ 00075 typedef std::string KeyType; 00076 00077 /** 00078 * The type used for values 00079 */ 00080 typedef std::string ValueType; 00081 00082 /** 00083 * The type used for comments 00084 */ 00085 typedef std::string CommentType; 00086 00087 /** 00088 * Forward declare the object type. 00089 */ 00090 struct ObjectType; 00091 00092 /** 00093 * KeyValueType - a tuple containing name and value 00094 */ 00095 struct KeyValueType 00096 { 00097 /** 00098 * Name string. 00099 */ 00100 std::string m_name; 00101 /** 00102 * Value string. 00103 */ 00104 std::string m_value; 00105 }; 00106 00107 /** 00108 * A node inside the AST is either another object or a key-value pair. 00109 */ 00110 typedef 00111 boost::variant< 00112 boost::recursive_wrapper< ObjectType >, 00113 KeyValueType, 00114 CommentType 00115 > 00116 MemberType; 00117 00118 /** 00119 * An object is always a name and contains several further nodes 00120 */ 00121 struct ObjectType 00122 { 00123 /** 00124 * Name of the object 00125 */ 00126 std::string m_name; 00127 00128 /** 00129 * Object's members 00130 */ 00131 std::vector< MemberType > m_nodes; 00132 }; 00133 00134 /** 00135 * An object representing all objects and comments on file level. 00136 */ 00137 typedef std::vector< MemberType > FileType; 00138 } 00139 00140 00141 // Doxygen has problems with the following 00142 // \cond Suppress_Doxygen 00143 /** 00144 * Tell boost::fusion about our types. 00145 */ 00146 BOOST_FUSION_ADAPT_STRUCT( 00147 WStructuredTextParser::ObjectType, 00148 ( std::string, m_name ) 00149 ( std::vector< WStructuredTextParser::MemberType >, m_nodes ) 00150 ) 00151 00152 /** 00153 * Tell boost::fusion about our types. 00154 */ 00155 BOOST_FUSION_ADAPT_STRUCT( 00156 WStructuredTextParser::KeyValueType, 00157 ( std::string, m_name ) 00158 ( std::string, m_value ) 00159 ) 00160 // \endcond 00161 00162 namespace WStructuredTextParser 00163 { 00164 /** 00165 * The grammar describing the structured format. It uses the boost::spirit features to parse the input. There are several rules to comply to 00166 * successfully parse a file: 00167 * <ul> 00168 * <li>Key: identifier, needs to be a-z,A-Z,0-9,_ 00169 * <li>Object: defined as key + { ... } 00170 * <li> ";" is optional after objects 00171 * <li>Key-Value Pair: is a member of an object and is defines as key="value". 00172 * <li>Comments begin with // 00173 * </ul> 00174 * For more details please see the test fixture file in core/common/test/fixtures/WStrutcuredTextParser_test.txt. 00175 * 00176 * \tparam Iterator the iterator, used to get the input stream 00177 */ 00178 template <typename Iterator> 00179 struct Grammar: qi::grammar<Iterator, FileType(), ascii::space_type > 00180 { 00181 /** 00182 * Constructor and grammar description. It contains the EBNF (Extended Backus Naur Form) of the format we can parse. 00183 * 00184 * \param error Will contain error message if any occurs during functions execution 00185 */ 00186 explicit Grammar( std::ostream& error ): Grammar::base_type( file, "WStructuredTextParser::Grammar" ) // NOLINT - non-const ref 00187 { 00188 // a key begins with a letter 00189 key %= qi::char_( "a-zA-Z_" ) >> *qi::char_( "a-zA-Z_0-9" ); 00190 // a value is a quoted string. Multi-line strings possible 00191 value %= '"' >> *( ~qi::char_( "\"" ) | qi::char_( " " ) ) >> '"'; 00192 00193 // a pair is: key = value 00194 kvpair %= key >> '=' >> value >> ';'; 00195 // a comment is // + arbitrary symbols 00196 comment %= qi::lexeme[ qi::char_( "/" ) >> qi::char_( "/" ) >> *qi::char_( "a-zA-Z_0-9!\"#$%&'()*,:;<>?@\\^`{|}~/ .@=[]ยง!+-" ) ]; 00197 // a object is a name, and a set of nested objects or key-value pairs 00198 object %= ( key | value ) >> '{' >> *( object | kvpair | comment ) >> '}' >> *qi::char_( ";" ); 00199 // a file is basically an object without name. 00200 file %= *( object | kvpair | comment ); 00201 00202 // provide names for these objects for better readability of parse errors 00203 object.name( "object" ); 00204 kvpair.name( "key-value pair" ); 00205 key.name( "key" ); 00206 value.name( "value" ); 00207 file.name( "file" ); 00208 comment.name( "comment" ); 00209 00210 // provide error handlers 00211 // XXX: can someone tell me how to get them work? According to the boost::spirit doc, this is everything needed but it doesn't work. 00212 qi::on_error< qi::fail >( object, error << phoenix::val( "Error: " ) << qi::_4 ); 00213 qi::on_error< qi::fail >( kvpair, error << phoenix::val( "Error: " ) << qi::_4 ); 00214 qi::on_error< qi::fail >( key, error << phoenix::val( "Error: " ) << qi::_4 ); 00215 qi::on_error< qi::fail >( value, error << phoenix::val( "Error: " ) << qi::_4 ); 00216 qi::on_error< qi::fail >( comment, error << phoenix::val( "Error: " ) << qi::_4 ); 00217 qi::on_error< qi::fail >( file, error << phoenix::val( "Error: " ) << qi::_4 ); 00218 } 00219 00220 // Rules we use 00221 00222 /** 00223 * Rule for objects. Attribute is ObjectType and is the start rule of the grammar. See constructor for exact definition. 00224 */ 00225 qi::rule< Iterator, ObjectType(), ascii::space_type > object; 00226 00227 /** 00228 * Rule for files. Basically the same as an object but without name 00229 */ 00230 qi::rule< Iterator, FileType(), ascii::space_type > file; 00231 00232 /** 00233 * Rule for comments. Ignored. 00234 */ 00235 qi::rule< Iterator, CommentType(), ascii::space_type > comment; 00236 00237 /** 00238 * Key-value pair rule. See constructor for exact definition. 00239 */ 00240 qi::rule< Iterator, KeyValueType(), ascii::space_type > kvpair; 00241 00242 /** 00243 * Key rule. See constructor for exact definition. 00244 */ 00245 qi::rule< Iterator, KeyType() > key; 00246 00247 /** 00248 * Value rule. See constructor for exact definition. 00249 */ 00250 qi::rule< Iterator, ValueType() > value; 00251 }; 00252 00253 /** 00254 * This simplifies working with a tree in a \ref WStructuredTextParser::FileType instance. It provides easy query and check methods. It does not 00255 * provide any semantic options. So check validity of the contents and structure of the tree is the job of the using class/derived class. As 00256 * the tree does not know anything about the semantics of your structure, it is also untyped. For every key you query, you need to specify 00257 * the type. 00258 * 00259 * This tree uses the types in the WStructuredTextParser namespace. To avoid unnecessary copy operations, this class is not recursive 00260 * itself. When querying, you always need to specify the full path. This class can be seen as accessor to the 00261 * \ref WStructuredTextParser::ObjectType tree. 00262 * 00263 * \note The syntax of the parsed files is defined by the parser itself. See WStructuredTextParser::Grammar for details. 00264 * \note This also stores the comments of the parsed file. This allows them to be written again if OW loads a file, modifies it and re-writes 00265 * it. 00266 */ 00267 class StructuredValueTree 00268 { 00269 friend class WStructuredTextParserTest; 00270 public: 00271 /** 00272 * This char is used as separator for identifying values in the tree. NEVER change this value. 00273 */ 00274 static const std::string Separator; 00275 00276 /** 00277 * Construct the instance given the original parsing structure. 00278 * 00279 * \param file the parsing result structure (the root node). 00280 */ 00281 explicit StructuredValueTree( const FileType& file ); 00282 00283 /** 00284 * Construct the instance given a text as string. 00285 * 00286 * \param toParse the text to parse 00287 */ 00288 explicit StructuredValueTree( const std::string& toParse ); 00289 00290 /** 00291 * Construct the instance given a path to a file to load. 00292 * 00293 * \param file the path to a file to load. 00294 */ 00295 explicit StructuredValueTree( const boost::filesystem::path& file ); 00296 00297 /** 00298 * Creates an empty tree. It will contain no information at all. 00299 */ 00300 StructuredValueTree(); 00301 00302 /** 00303 * Cleanup. 00304 */ 00305 virtual ~StructuredValueTree(); 00306 00307 /** 00308 * Checks whether the given value or object exists. If you want to know only if a value with the given name exists, set valuesOnly to 00309 * true. 00310 * 00311 * \param key path to the value 00312 * \param valuesOnly if true, it checks only if a value with the name exists. If false, also objects with this name cause this function 00313 * to return true. 00314 * 00315 * \return true if existing. 00316 */ 00317 bool exists( std::string key, bool valuesOnly = false ) const; 00318 00319 /** 00320 * It is possible that there are multiple values matching a key. This method counts them. 00321 * 00322 * \param key path to the values to count 00323 * \param valuesOnly if true, it only counts values matching the given name. 00324 * 00325 * \return the number of found values. 00326 */ 00327 size_t count( std::string key, bool valuesOnly = false ) const; 00328 00329 /** 00330 * Queries the value with the given name. If it is not found, the default value will be returned. 00331 * 00332 * \param key path to the value. Paths to whole objects are invalid. 00333 * \param defaultValue the default if no value was found 00334 * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid. 00335 * 00336 * \throw WTypeMismatch if the value cannot be cast to the specified target type 00337 * 00338 * \return the value 00339 * 00340 * \note this does not return a reference as the default value might be returned. It returns a copy of the value. 00341 */ 00342 template< typename T > 00343 T getValue( std::string key, const T& defaultValue ) const; 00344 00345 /** 00346 * Queries the list of values matching the given path. If it is not found, the default value will be returned. 00347 * 00348 * \param key path to the value. Paths to whole objects are invalid. 00349 * \param defaults the defaults if no value was found 00350 * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid. 00351 * 00352 * \throw WTypeMismatch if the value cannot be cast to the specified target type 00353 * 00354 * \return the value 00355 * 00356 * \note this does not return a reference as the default value might be returned. It returns a copy of the value. 00357 */ 00358 template< typename T > 00359 std::vector< T > getValues( std::string key, const std::vector< T >& defaults ) const; 00360 00361 /** 00362 * Queries the list of values matching the given path. If it is not found, an empty results vector is returned. 00363 * 00364 * \param key path to the value. Paths to whole objects are invalid. 00365 * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid. 00366 * 00367 * \throw WTypeMismatch if the value cannot be cast to the specified target type 00368 * 00369 * \return the value vector. Might be empty if no elements where found. 00370 * 00371 * \note this does not return a reference as the default value might be returned. It returns a copy of the value. 00372 */ 00373 template< typename T > 00374 std::vector< T > getValues( std::string key ) const; 00375 00376 /** 00377 * Queries the value with the given name. If it is not found, an exception is thrown. If multiple entries with this path exist, the first 00378 * one is returned. Use \ref getValues in this case. Query the count of a key:value pair using \ref count 00379 * 00380 * \param key path to the value. Paths to whole objects are invalid. 00381 * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid. 00382 * \throw WTypeMismatch if the value cannot be cast to the specified target type 00383 * \throw WNotFound if the key:value pair does not exist 00384 * 00385 * \return the value as copy to avoid any const_cast which would allow modification. 00386 */ 00387 template< typename T > 00388 T operator[]( std::string key ) const; 00389 00390 /** 00391 * Gets a subtree. The ValueTree returned contains the node you have searched. It only contains the first match. If all matches are 00392 * needed, use \ref getSubTrees instead. If the key is not valid/nothing matches the key, an empty value tree is returned. If they key 00393 * matches a key-value pair, nothing is returned. This means, this method is only useful for objects. 00394 * 00395 * \param key key to search. 00396 * 00397 * \return the structured value tree. 00398 */ 00399 StructuredValueTree getSubTree( std::string key ) const; 00400 00401 /** 00402 * Gets all matching subtrees. The subtrees returned contains the node you have searched. If multiple objects match the key, a list of 00403 * subtrees is returned. If nothing matches, the returned list is empty. If they key 00404 * matches a key-value pair, nothing is returned. This means, this method is only useful for objects. 00405 * 00406 * \param key key to search. 00407 * 00408 * \return the structured value trees. 00409 */ 00410 std::vector< StructuredValueTree > getSubTrees( std::string key ) const; 00411 00412 protected: 00413 private: 00414 /** 00415 * The named values. 00416 */ 00417 FileType m_file; 00418 00419 /** 00420 * Recursively fills a result vector using a given path iterator. It checks whether the current element matches the current key. If yes, 00421 * it traverses or adds the value to the result vector. This uses depth-first search and allows multiple matches for one key. 00422 * 00423 * \param current current element to check and recursively traverse 00424 * \param keyIter the current path element 00425 * \param keyEnd the end iter. Just used to stop iteration if the key as not further elements 00426 * \param resultObjects all matching instances of type \ref WStructuredTextParser::ObjectType 00427 * \param resultValues all matching instances of type \ref WStructuredTextParser::KeyValueType 00428 */ 00429 void traverse( MemberType current, std::vector< std::string >::const_iterator keyIter, 00430 std::vector< std::string >::const_iterator keyEnd, 00431 std::vector< ObjectType >& resultObjects, 00432 std::vector< KeyValueType >& resultValues ) const; 00433 00434 /** 00435 * Recursively fills a result vector using a given path iterator. It checks whether the current element matches the current key. If yes, 00436 * it traverses or adds the value to the result vector. This uses depth-first search and allows multiple matches for one key. 00437 * 00438 * \param current current element to check and recursively traverse 00439 * \param key the path 00440 * \param resultObjects all matching instances of type \ref WStructuredTextParser::ObjectType 00441 * \param resultValues all matching instances of type \ref WStructuredTextParser::KeyValueType 00442 */ 00443 void traverse( FileType current, std::string key, 00444 std::vector< ObjectType >& resultObjects, 00445 std::vector< KeyValueType >& resultValues ) const; 00446 }; 00447 00448 /** 00449 * Parse the given input and return the syntax tree. Throws an exception WParseError on error. 00450 * 00451 * \param input the input to parse. 00452 * 00453 * \return the syntax tree in plain format. You should use WStructuredValueTree to use this. 00454 * 00455 * \throw WParseError on parse error 00456 */ 00457 FileType parseFromString( std::string input ); 00458 00459 /** 00460 * Parse the given input and return the syntax tree. Throws an exception WParseError on error. 00461 * 00462 * \param path the file to parse 00463 * 00464 * \return the syntax tree in plain format. You should use WStructuredValueTree to use this. 00465 * 00466 * \throw WParseError on parse error 00467 * \throw WFileNotFOund in case the specified file could not be opened 00468 */ 00469 FileType parseFromFile( boost::filesystem::path path ); 00470 00471 template< typename T > 00472 T StructuredValueTree::getValue( std::string key, const T& defaultValue ) const 00473 { 00474 // NOTE: getValues ensures that always something is returned (the default value). So the returned vector has a valid begin iterator 00475 return *getValues< T >( key, std::vector< T >( 1, defaultValue ) ).begin(); 00476 } 00477 00478 template< typename T > 00479 std::vector< T > StructuredValueTree::getValues( std::string key, const std::vector< T >& defaults ) const 00480 { 00481 std::vector< T > r = getValues< T >( key ); 00482 if( r.size() ) 00483 { 00484 return r; 00485 } 00486 else 00487 { 00488 return defaults; 00489 } 00490 } 00491 00492 template< typename T > 00493 T StructuredValueTree::operator[]( std::string key ) const 00494 { 00495 std::vector< T > r = getValues< T >( key ); 00496 if( r.size() ) 00497 { 00498 return *r.begin(); 00499 } 00500 else 00501 { 00502 throw WNotFound( "The key \"" + key + "\" was not found." ); 00503 } 00504 } 00505 00506 /** 00507 * Visitor to identify whether the given variant of type \ref WStructuredTextParser::MemberType is a object or key-value pair. 00508 */ 00509 class IsLeafVisitor: public boost::static_visitor< bool > 00510 { 00511 public: 00512 /** 00513 * Returns always true as it is only called for key-value pairs. 00514 * 00515 * \return always true since it identified an key-value pair 00516 */ 00517 bool operator()( const KeyValueType& /* element */ ) const 00518 { 00519 return true; 00520 } 00521 00522 /** 00523 * Returns always false as it is only called for objects. 00524 * 00525 * \tparam T the type. Should be \ref WStructuredTextParser::ObjectType or \ref WStructuredTextParser::CommentType 00526 * \return always false since it identified an Object/comment 00527 */ 00528 template< typename T > 00529 bool operator()( const T& /* element */ ) const 00530 { 00531 return false; 00532 } 00533 }; 00534 00535 /** 00536 * Visitor to identify whether the given variant of type \ref WStructuredTextParser::MemberType is a comment. 00537 */ 00538 class IsCommentVisitor: public boost::static_visitor< bool > 00539 { 00540 public: 00541 /** 00542 * Returns always true as it is only called for comments. 00543 * 00544 * \return always true 00545 */ 00546 bool operator()( const CommentType& /* element */ ) const 00547 { 00548 return true; 00549 } 00550 00551 /** 00552 * Returns always false as it is only called for objects and key-value pairs. 00553 * 00554 * \tparam T the type. Should be \ref WStructuredTextParser::ObjectType or \ref WStructuredTextParser::KeyValueType 00555 * \return always false since it identified an Object/KeyValueType 00556 */ 00557 template< typename T > 00558 bool operator()( const T& /* element */ ) const 00559 { 00560 return false; 00561 } 00562 }; 00563 00564 /** 00565 * Visitor to query the m_name member of \ref WStructuredTextParser::ObjectType and \ref WStructuredTextParser::KeyValueType. 00566 */ 00567 class NameQueryVisitor: public boost::static_visitor< std::string > 00568 { 00569 public: 00570 /** 00571 * Comments have no name. 00572 * 00573 * \return empty string. 00574 */ 00575 std::string operator()( const CommentType& /* element */ ) const 00576 { 00577 return ""; 00578 } 00579 00580 /** 00581 * Returns the m_name member of the specified object or key-valuev pair. 00582 * 00583 * \param element Specified object. 00584 * 00585 * \tparam T one of the types of the \ref WStructuredTextParser::MemberType variant 00586 * \return always true since it identified an key-value pair 00587 */ 00588 template< typename T > 00589 std::string operator()( const T& element ) const 00590 { 00591 return element.m_name; 00592 } 00593 }; 00594 00595 template< typename T > 00596 std::vector< T > StructuredValueTree::getValues( std::string key ) const 00597 { 00598 // traverse the tree 00599 std::vector< ObjectType > rObj; 00600 std::vector< KeyValueType > rKV; 00601 00602 // traverse 00603 traverse( m_file, key, rObj, rKV ); 00604 00605 // copy to result vector and cast 00606 std::vector< T > r; 00607 for( std::vector< KeyValueType >::const_iterator i = rKV.begin(); i != rKV.end(); ++i ) 00608 { 00609 try 00610 { 00611 r.push_back( string_utils::fromString< T >( ( *i ).m_value ) ); 00612 } 00613 catch( ... ) 00614 { 00615 // convert the standard exception (if cannot convert) to a WTypeMismnatch. 00616 throw WTypeMismatch( "Cannot convert element \"" + key + "\" to desired type." ); 00617 } 00618 } 00619 00620 // done 00621 return r; 00622 } 00623 } 00624 00625 #endif // WSTRUCTUREDTEXTPARSER_H 00626