OpenWalnut 1.3.1
|
00001 //--------------------------------------------------------------------------- 00002 // 00003 // Project: OpenWalnut ( http://www.openwalnut.org ) 00004 // 00005 // Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS 00006 // For more information see http://www.openwalnut.org/copying 00007 // 00008 // This file is part of OpenWalnut. 00009 // 00010 // OpenWalnut is free software: you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as published by 00012 // the Free Software Foundation, either version 3 of the License, or 00013 // (at your option) any later version. 00014 // 00015 // OpenWalnut is distributed in the hope that it will be useful, 00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00018 // GNU Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public License 00021 // along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>. 00022 // 00023 //--------------------------------------------------------------------------- 00024 00025 #ifndef WSTRUCTUREDTEXTPARSER_H 00026 #define WSTRUCTUREDTEXTPARSER_H 00027 00028 #include <algorithm> 00029 #include <iostream> 00030 #include <map> 00031 #include <ostream> 00032 #include <string> 00033 #include <vector> 00034 00035 #include <boost/config/warning_disable.hpp> 00036 #include <boost/spirit/include/qi.hpp> 00037 #include <boost/spirit/include/phoenix_core.hpp> 00038 #include <boost/spirit/include/phoenix_operator.hpp> 00039 #include <boost/spirit/include/phoenix_fusion.hpp> 00040 #include <boost/spirit/include/phoenix_stl.hpp> 00041 #include <boost/spirit/include/phoenix_object.hpp> 00042 #include <boost/fusion/include/adapt_struct.hpp> 00043 #include <boost/variant/recursive_variant.hpp> 00044 #include <boost/fusion/include/io.hpp> 00045 #include <boost/filesystem/path.hpp> 00046 00047 #include "WStringUtils.h" 00048 #include "exceptions/WTypeMismatch.h" 00049 #include "exceptions/WNotFound.h" 00050 00051 /** 00052 * This namespace contains the WStructuredTextParser data types and the parser. It builds up the abstract syntax tree (AST) 00053 * for the given input which later can be traversed. 00054 */ 00055 namespace WStructuredTextParser 00056 { 00057 //! we use these quite often, so define some short alias for them: 00058 namespace qi = boost::spirit::qi; 00059 00060 //! we use these quite often, so define some short alias for them: 00061 namespace fusion = boost::fusion; 00062 00063 //! we use these quite often, so define some short alias for them: 00064 namespace ascii = boost::spirit::ascii; 00065 00066 //! we use these quite often, so define some short alias for them: 00067 namespace phoenix = boost::phoenix; 00068 00069 //! we use these quite often, so define some short alias for them: 00070 namespace spirit = boost::spirit; 00071 00072 /** 00073 * The type used for keys 00074 */ 00075 typedef std::string KeyType; 00076 00077 /** 00078 * The type used for values 00079 */ 00080 typedef std::string ValueType; 00081 00082 /** 00083 * The type used for comments 00084 */ 00085 typedef std::string CommentType; 00086 00087 /** 00088 * Forward declare the object type. 00089 */ 00090 struct ObjectType; 00091 00092 /** 00093 * KeyValueType - a tuple containing name and value 00094 */ 00095 struct KeyValueType 00096 { 00097 /** 00098 * Name string. 00099 */ 00100 std::string m_name; 00101 /** 00102 * Value string. 00103 */ 00104 std::string m_value; 00105 }; 00106 00107 /** 00108 * A node inside the AST is either another object or a key-value pair. 00109 */ 00110 typedef 00111 boost::variant< 00112 boost::recursive_wrapper< ObjectType >, 00113 KeyValueType, 00114 CommentType 00115 > 00116 MemberType; 00117 00118 /** 00119 * An object is always a name and contains several further nodes 00120 */ 00121 struct ObjectType 00122 { 00123 /** 00124 * Name of the object 00125 */ 00126 std::string m_name; 00127 00128 /** 00129 * Object's members 00130 */ 00131 std::vector< MemberType > m_nodes; 00132 }; 00133 00134 /** 00135 * An object representing all objects and comments on file level. 00136 */ 00137 typedef std::vector< MemberType > FileType; 00138 } 00139 00140 00141 // Doxygen has problems with the following 00142 // \cond Suppress_Doxygen 00143 /** 00144 * Tell boost::fusion about our types. 00145 */ 00146 BOOST_FUSION_ADAPT_STRUCT( 00147 WStructuredTextParser::ObjectType, 00148 ( std::string, m_name ) 00149 ( std::vector< WStructuredTextParser::MemberType >, m_nodes ) 00150 ) 00151 00152 /** 00153 * Tell boost::fusion about our types. 00154 */ 00155 BOOST_FUSION_ADAPT_STRUCT( 00156 WStructuredTextParser::KeyValueType, 00157 ( std::string, m_name ) 00158 ( std::string, m_value ) 00159 ) 00160 // \endcond 00161 00162 namespace WStructuredTextParser 00163 { 00164 /** 00165 * The grammar describing the structured format. It uses the boost::spirit features to parse the input. There are several rules to comply to 00166 * successfully parse a file: 00167 * <ul> 00168 * <li>Key: identifier, needs to be a-z,A-Z,0-9,_ 00169 * <li>Object: defined as key + { ... } 00170 * <li> ";" is optional after objects 00171 * <li>Key-Value Pair: is a member of an object and is defines as key="value". 00172 * <li>Comments begin with // 00173 * </ul> 00174 * For more details please see the test fixture file in core/common/test/fixtures/WStrutcuredTextParser_test.txt. 00175 * 00176 * \tparam Iterator the iterator, used to get the input stream 00177 */ 00178 template <typename Iterator> 00179 struct Grammar: qi::grammar<Iterator, FileType(), ascii::space_type > 00180 { 00181 /** 00182 * Constructor and grammar description. It contains the EBNF (Extended Backus Naur Form) of the format we can parse. 00183 * 00184 * \param error Will contain error message if any occurs during functions execution 00185 */ 00186 explicit Grammar( std::ostream& error ): Grammar::base_type( file, "WStructuredTextParser::Grammar" ) // NOLINT - non-const ref 00187 { 00188 // a key begins with a letter 00189 key %= qi::char_( "a-zA-Z_" ) >> *qi::char_( "a-zA-Z_0-9" ); 00190 // a value is a quoted string. Multi-line strings possible 00191 value %= '"' >> *( ~qi::char_( "\"" ) | qi::char_( " " ) ) >> '"'; 00192 00193 // a pair is: key = value 00194 kvpair %= key >> '=' >> value >> ';'; 00195 // a comment is // + arbitrary symbols 00196 comment %= qi::lexeme[ qi::char_( "/" ) >> qi::char_( "/" ) >> *qi::char_( "a-zA-Z_0-9!\"#$%&'()*,:;<>?@\\^`{|}~/ .@=[]ยง!+-" ) ]; 00197 // a object is a name, and a set of nested objects or key-value pairs 00198 object %= ( key | value ) >> '{' >> *( object | kvpair | comment ) >> '}' >> *qi::char_( ";" ); 00199 // a file is basically an object without name. 00200 file %= *( object | kvpair | comment ); 00201 00202 // provide names for these objects for better readability of parse errors 00203 object.name( "object" ); 00204 kvpair.name( "key-value pair" ); 00205 key.name( "key" ); 00206 value.name( "value" ); 00207 file.name( "file" ); 00208 comment.name( "comment" ); 00209 00210 // provide error handlers 00211 // XXX: can someone tell me how to get them work? According to the boost::spirit doc, this is everything needed but it doesn't work. 00212 qi::on_error< qi::fail >( object, error << phoenix::val( "Error: " ) << qi::_4 ); 00213 qi::on_error< qi::fail >( kvpair, error << phoenix::val( "Error: " ) << qi::_4 ); 00214 qi::on_error< qi::fail >( key, error << phoenix::val( "Error: " ) << qi::_4 ); 00215 qi::on_error< qi::fail >( value, error << phoenix::val( "Error: " ) << qi::_4 ); 00216 qi::on_error< qi::fail >( comment, error << phoenix::val( "Error: " ) << qi::_4 ); 00217 qi::on_error< qi::fail >( file, error << phoenix::val( "Error: " ) << qi::_4 ); 00218 } 00219 00220 // Rules we use 00221 00222 /** 00223 * Rule for objects. Attribute is ObjectType and is the start rule of the grammar. See constructor for exact definition. 00224 */ 00225 qi::rule< Iterator, ObjectType(), ascii::space_type > object; 00226 00227 /** 00228 * Rule for files. Basically the same as an object but without name 00229 */ 00230 qi::rule< Iterator, FileType(), ascii::space_type > file; 00231 00232 /** 00233 * Rule for comments. Ignored. 00234 */ 00235 qi::rule< Iterator, CommentType(), ascii::space_type > comment; 00236 00237 /** 00238 * Key-value pair rule. See constructor for exact definition. 00239 */ 00240 qi::rule< Iterator, KeyValueType(), ascii::space_type > kvpair; 00241 00242 /** 00243 * Key rule. See constructor for exact definition. 00244 */ 00245 qi::rule< Iterator, KeyType() > key; 00246 00247 /** 00248 * Value rule. See constructor for exact definition. 00249 */ 00250 qi::rule< Iterator, ValueType() > value; 00251 }; 00252 00253 /** 00254 * This simplifies working with a tree in a \ref FileType instance. It provides easy query and check methods. It does not 00255 * provide any semantic options. So check validity of the contents and structure of the tree is the job of the using class/derived class. As 00256 * the tree does not know anything about the semantics of your structure, it is also untyped. For every key you query, you need to specify 00257 * the type. 00258 * 00259 * This tree uses the types in the \ref WStructuredTextParser namespace. To avoid unnecessary copy operations, this class is not recursive 00260 * itself. When querying, you always need to specify the full path. This class can be seen as accessor to the \ref ObjectType tree. 00261 * 00262 * \note The syntax of the parsed files is defined by the parser itself. See \ref Grammar for details. 00263 * \note This also stores the comments of the parsed file. This allows them to be written again if OW loads a file, modifies it and re-writes 00264 * it. 00265 */ 00266 class StructuredValueTree 00267 { 00268 friend class WStructuredTextParserTest; 00269 public: 00270 /** 00271 * This char is used as separator for identifying values in the tree. NEVER change this value. 00272 */ 00273 static const std::string Separator; 00274 00275 /** 00276 * Construct the instance given the original parsing structure. 00277 * 00278 * \param file the parsing result structure (the root node). 00279 */ 00280 explicit StructuredValueTree( const FileType& file ); 00281 00282 /** 00283 * Construct the instance given a text as string. 00284 * 00285 * \param toParse the text to parse 00286 */ 00287 explicit StructuredValueTree( const std::string& toParse ); 00288 00289 /** 00290 * Construct the instance given a path to a file to load. 00291 * 00292 * \param file the path to a file to load. 00293 */ 00294 explicit StructuredValueTree( const boost::filesystem::path& file ); 00295 00296 /** 00297 * Creates an empty tree. It will contain no information at all. 00298 */ 00299 StructuredValueTree(); 00300 00301 /** 00302 * Cleanup. 00303 */ 00304 virtual ~StructuredValueTree(); 00305 00306 /** 00307 * Checks whether the given value or object exists. If you want to know only if a value with the given name exists, set valuesOnly to 00308 * true. 00309 * 00310 * \param key path to the value 00311 * \param valuesOnly if true, it checks only if a value with the name exists. If false, also objects with this name cause this function 00312 * to return true. 00313 * 00314 * \return true if existing. 00315 */ 00316 bool exists( std::string key, bool valuesOnly = false ) const; 00317 00318 /** 00319 * It is possible that there are multiple values matching a key. This method counts them. 00320 * 00321 * \param key path to the values to count 00322 * \param valuesOnly if true, it only counts values matching the given name. 00323 * 00324 * \return the number of found values. 00325 */ 00326 size_t count( std::string key, bool valuesOnly = false ) const; 00327 00328 /** 00329 * Queries the value with the given name. If it is not found, the default value will be returned. 00330 * 00331 * \param key path to the value. Paths to whole objects are invalid. 00332 * \param defaultValue the default if no value was found 00333 * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid. 00334 * 00335 * \throw WTypeMismatch if the value cannot be cast to the specified target type 00336 * 00337 * \return the value 00338 * 00339 * \note this does not return a reference as the default value might be returned. It returns a copy of the value. 00340 */ 00341 template< typename T > 00342 T getValue( std::string key, const T& defaultValue ) const; 00343 00344 /** 00345 * Queries the list of values matching the given path. If it is not found, the default value will be returned. 00346 * 00347 * \param key path to the value. Paths to whole objects are invalid. 00348 * \param defaults the defaults if no value was found 00349 * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid. 00350 * 00351 * \throw WTypeMismatch if the value cannot be cast to the specified target type 00352 * 00353 * \return the value 00354 * 00355 * \note this does not return a reference as the default value might be returned. It returns a copy of the value. 00356 */ 00357 template< typename T > 00358 std::vector< T > getValues( std::string key, const std::vector< T >& defaults ) const; 00359 00360 /** 00361 * Queries the list of values matching the given path. If it is not found, an empty results vector is returned. 00362 * 00363 * \param key path to the value. Paths to whole objects are invalid. 00364 * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid. 00365 * 00366 * \throw WTypeMismatch if the value cannot be cast to the specified target type 00367 * 00368 * \return the value vector. Might be empty if no elements where found. 00369 * 00370 * \note this does not return a reference as the default value might be returned. It returns a copy of the value. 00371 */ 00372 template< typename T > 00373 std::vector< T > getValues( std::string key ) const; 00374 00375 /** 00376 * Queries the value with the given name. If it is not found, an exception is thrown. If multiple entries with this path exist, the first 00377 * one is returned. Use \ref getValues in this case. Query the count of a key:value pair using \ref count 00378 * 00379 * \param key path to the value. Paths to whole objects are invalid. 00380 * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid. 00381 * \throw WTypeMismatch if the value cannot be cast to the specified target type 00382 * \throw WNotFound if the key:value pair does not exist 00383 * 00384 * \return the value as copy to avoid any const_cast which would allow modification. 00385 */ 00386 template< typename T > 00387 T operator[]( std::string key ) const; 00388 00389 /** 00390 * Gets a subtree. The ValueTree returned contains the node you have searched. It only contains the first match. If all matches are 00391 * needed, use \ref getSubTrees instead. If the key is not valid/nothing matches the key, an empty value tree is returned. If they key 00392 * matches a key-value pair, nothing is returned. This means, this method is only useful for objects. 00393 * 00394 * \param key key to search. 00395 * 00396 * \return the structured value tree. 00397 */ 00398 StructuredValueTree getSubTree( std::string key ) const; 00399 00400 /** 00401 * Gets all matching subtrees. The subtrees returned contains the node you have searched. If multiple objects match the key, a list of 00402 * subtrees is returned. If nothing matches, the returned list is empty. If they key 00403 * matches a key-value pair, nothing is returned. This means, this method is only useful for objects. 00404 * 00405 * \param key key to search. 00406 * 00407 * \return the structured value trees. 00408 */ 00409 std::vector< StructuredValueTree > getSubTrees( std::string key ) const; 00410 00411 protected: 00412 private: 00413 /** 00414 * The named values. 00415 */ 00416 FileType m_file; 00417 00418 /** 00419 * Recursively fills a result vector using a given path iterator. It checks whether the current element matches the current key. If yes, 00420 * it traverses or adds the value to the result vector. This uses depth-first search and allows multiple matches for one key. 00421 * 00422 * \param current current element to check and recursively traverse 00423 * \param keyIter the current path element 00424 * \param keyEnd the end iter. Just used to stop iteration if the key as not further elements 00425 * \param resultObjects all matching instances of type \ref ObjectType 00426 * \param resultValues all matching instances of type \ref KeyValueType 00427 */ 00428 void traverse( MemberType current, std::vector< std::string >::const_iterator keyIter, 00429 std::vector< std::string >::const_iterator keyEnd, 00430 std::vector< ObjectType >& resultObjects, 00431 std::vector< KeyValueType >& resultValues ) const; 00432 00433 /** 00434 * Recursively fills a result vector using a given path iterator. It checks whether the current element matches the current key. If yes, 00435 * it traverses or adds the value to the result vector. This uses depth-first search and allows multiple matches for one key. 00436 * 00437 * \param current current element to check and recursively traverse 00438 * \param key the path 00439 * \param resultObjects all matching instances of type \ref ObjectType 00440 * \param resultValues all matching instances of type \ref KeyValueType 00441 */ 00442 void traverse( FileType current, std::string key, 00443 std::vector< ObjectType >& resultObjects, 00444 std::vector< KeyValueType >& resultValues ) const; 00445 }; 00446 00447 /** 00448 * Parse the given input and return the syntax tree. Throws an exception WParseError on error. 00449 * 00450 * \param input the input to parse. 00451 * 00452 * \return the syntax tree in plain format. You should use WStructuredValueTree to use this. 00453 * 00454 * \throw WParseError on parse error 00455 */ 00456 FileType parseFromString( std::string input ); 00457 00458 /** 00459 * Parse the given input and return the syntax tree. Throws an exception WParseError on error. 00460 * 00461 * \param path the file to parse 00462 * 00463 * \return the syntax tree in plain format. You should use WStructuredValueTree to use this. 00464 * 00465 * \throw WParseError on parse error 00466 * \throw WFileNotFOund in case the specified file could not be opened 00467 */ 00468 FileType parseFromFile( boost::filesystem::path path ); 00469 00470 template< typename T > 00471 T StructuredValueTree::getValue( std::string key, const T& defaultValue ) const 00472 { 00473 // NOTE: getValues ensures that always something is returned (the default value). So the returned vector has a valid begin iterator 00474 return *getValues< T >( key, std::vector< T >( 1, defaultValue ) ).begin(); 00475 } 00476 00477 template< typename T > 00478 std::vector< T > StructuredValueTree::getValues( std::string key, const std::vector< T >& defaults ) const 00479 { 00480 std::vector< T > r = getValues< T >( key ); 00481 if( r.size() ) 00482 { 00483 return r; 00484 } 00485 else 00486 { 00487 return defaults; 00488 } 00489 } 00490 00491 template< typename T > 00492 T StructuredValueTree::operator[]( std::string key ) const 00493 { 00494 std::vector< T > r = getValues< T >( key ); 00495 if( r.size() ) 00496 { 00497 return *r.begin(); 00498 } 00499 else 00500 { 00501 throw WNotFound( "The key \"" + key + "\" was not found." ); 00502 } 00503 } 00504 00505 /** 00506 * Visitor to identify whether the given variant of type \ref MemberType is a object or key-value pair. 00507 */ 00508 class IsLeafVisitor: public boost::static_visitor< bool > 00509 { 00510 public: 00511 /** 00512 * Returns always true as it is only called for key-value pairs. 00513 * 00514 * \return always true since it identified an key-value pair 00515 */ 00516 bool operator()( const KeyValueType& /* element */ ) const 00517 { 00518 return true; 00519 } 00520 00521 /** 00522 * Returns always false as it is only called for objects. 00523 * 00524 * \tparam T the type. Should be \ref ObjectType or \ref CommentType 00525 * \return always false since it identified an Object/comment 00526 */ 00527 template< typename T > 00528 bool operator()( const T& /* element */ ) const 00529 { 00530 return false; 00531 } 00532 }; 00533 00534 /** 00535 * Visitor to identify whether the given variant of type \ref MemberType is a comment. 00536 */ 00537 class IsCommentVisitor: public boost::static_visitor< bool > 00538 { 00539 public: 00540 /** 00541 * Returns always true as it is only called for comments. 00542 * 00543 * \return always true 00544 */ 00545 bool operator()( const CommentType& /* element */ ) const 00546 { 00547 return true; 00548 } 00549 00550 /** 00551 * Returns always false as it is only called for objects and key-value pairs. 00552 * 00553 * \tparam T the type. Should be \ref ObjectType or \ref KeyValueType 00554 * \return always false since it identified an Object/KeyValueType 00555 */ 00556 template< typename T > 00557 bool operator()( const T& /* element */ ) const 00558 { 00559 return false; 00560 } 00561 }; 00562 00563 /** 00564 * Visitor to query the m_name member of \ref ObjectType and \ref KeyValueType. 00565 */ 00566 class NameQueryVisitor: public boost::static_visitor< std::string > 00567 { 00568 public: 00569 /** 00570 * Comments have no name. 00571 * 00572 * \return empty string. 00573 */ 00574 std::string operator()( const CommentType& /* element */ ) const 00575 { 00576 return ""; 00577 } 00578 00579 /** 00580 * Returns the m_name member of the specified object or key-valuev pair. 00581 * 00582 * \param element Specified object. 00583 * 00584 * \tparam T one of the types of the \ref MemberType variant 00585 * \return always true since it identified an key-value pair 00586 */ 00587 template< typename T > 00588 std::string operator()( const T& element ) const 00589 { 00590 return element.m_name; 00591 } 00592 }; 00593 00594 template< typename T > 00595 std::vector< T > StructuredValueTree::getValues( std::string key ) const 00596 { 00597 // traverse the tree 00598 std::vector< ObjectType > rObj; 00599 std::vector< KeyValueType > rKV; 00600 00601 // traverse 00602 traverse( m_file, key, rObj, rKV ); 00603 00604 // copy to result vector and cast 00605 std::vector< T > r; 00606 for( std::vector< KeyValueType >::const_iterator i = rKV.begin(); i != rKV.end(); ++i ) 00607 { 00608 try 00609 { 00610 r.push_back( string_utils::fromString< T >( ( *i ).m_value ) ); 00611 } 00612 catch( ... ) 00613 { 00614 // convert the standard exception (if cannot convert) to a WTypeMismnatch. 00615 throw WTypeMismatch( "Cannot convert element \"" + key + "\" to desired type." ); 00616 } 00617 } 00618 00619 // done 00620 return r; 00621 } 00622 } 00623 00624 #endif // WSTRUCTUREDTEXTPARSER_H 00625