OpenWalnut  1.4.0
WStructuredTextParser.h
1 //---------------------------------------------------------------------------
2 //
3 // Project: OpenWalnut ( http://www.openwalnut.org )
4 //
5 // Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS
6 // For more information see http://www.openwalnut.org/copying
7 //
8 // This file is part of OpenWalnut.
9 //
10 // OpenWalnut is free software: you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // OpenWalnut is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public License
21 // along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>.
22 //
23 //---------------------------------------------------------------------------
24 
25 #ifndef WSTRUCTUREDTEXTPARSER_H
26 #define WSTRUCTUREDTEXTPARSER_H
27 
28 #include <algorithm>
29 #include <iostream>
30 #include <map>
31 #include <ostream>
32 #include <string>
33 #include <vector>
34 
35 #include <boost/config/warning_disable.hpp>
36 #include <boost/spirit/include/qi.hpp>
37 #include <boost/spirit/include/phoenix_core.hpp>
38 #include <boost/spirit/include/phoenix_operator.hpp>
39 #include <boost/spirit/include/phoenix_fusion.hpp>
40 #include <boost/spirit/include/phoenix_stl.hpp>
41 #include <boost/spirit/include/phoenix_object.hpp>
42 #include <boost/fusion/include/adapt_struct.hpp>
43 #include <boost/variant/recursive_variant.hpp>
44 #include <boost/fusion/include/io.hpp>
45 #include <boost/filesystem/path.hpp>
46 
47 #include "WStringUtils.h"
48 #include "exceptions/WTypeMismatch.h"
49 #include "exceptions/WNotFound.h"
50 
51 /**
52  * This namespace contains the WStructuredTextParser data types and the parser. It builds up the abstract syntax tree (AST)
53  * for the given input which later can be traversed.
54  */
55 namespace WStructuredTextParser
56 {
57  //! we use these quite often, so define some short alias for them:
58  namespace qi = boost::spirit::qi;
59 
60  //! we use these quite often, so define some short alias for them:
61  namespace fusion = boost::fusion;
62 
63  //! we use these quite often, so define some short alias for them:
64  namespace ascii = boost::spirit::ascii;
65 
66  //! we use these quite often, so define some short alias for them:
67  namespace phoenix = boost::phoenix;
68 
69  //! we use these quite often, so define some short alias for them:
70  namespace spirit = boost::spirit;
71 
72  /**
73  * The type used for keys
74  */
75  typedef std::string KeyType;
76 
77  /**
78  * The type used for values
79  */
80  typedef std::string ValueType;
81 
82  /**
83  * The type used for comments
84  */
85  typedef std::string CommentType;
86 
87  /**
88  * Forward declare the object type.
89  */
90  struct ObjectType;
91 
92  /**
93  * KeyValueType - a tuple containing name and value
94  */
95  struct KeyValueType
96  {
97  /**
98  * Name string.
99  */
100  std::string m_name;
101  /**
102  * Value string.
103  */
104  std::string m_value;
105  };
106 
107  /**
108  * A node inside the AST is either another object or a key-value pair.
109  */
110  typedef
111  boost::variant<
112  boost::recursive_wrapper< ObjectType >,
113  KeyValueType,
114  CommentType
115  >
117 
118  /**
119  * An object is always a name and contains several further nodes
120  */
121  struct ObjectType
122  {
123  /**
124  * Name of the object
125  */
126  std::string m_name;
127 
128  /**
129  * Object's members
130  */
131  std::vector< MemberType > m_nodes;
132  };
133 
134  /**
135  * An object representing all objects and comments on file level.
136  */
137  typedef std::vector< MemberType > FileType;
138 }
139 
140 
141 // Doxygen has problems with the following
142 // \cond Suppress_Doxygen
143 /**
144  * Tell boost::fusion about our types.
145  */
146 BOOST_FUSION_ADAPT_STRUCT(
148  ( std::string, m_name )
149  ( std::vector< WStructuredTextParser::MemberType >, m_nodes )
150  )
151 
152 /**
153  * Tell boost::fusion about our types.
154  */
155 BOOST_FUSION_ADAPT_STRUCT(
156  WStructuredTextParser::KeyValueType,
157  ( std::string, m_name )
158  ( std::string, m_value )
159  )
160 // \endcond
161 
162 namespace WStructuredTextParser
163 {
164  /**
165  * The grammar describing the structured format. It uses the boost::spirit features to parse the input. There are several rules to comply to
166  * successfully parse a file:
167  * <ul>
168  * <li>Key: identifier, needs to be a-z,A-Z,0-9,_
169  * <li>Object: defined as key + { ... }
170  * <li> ";" is optional after objects
171  * <li>Key-Value Pair: is a member of an object and is defines as key="value".
172  * <li>Comments begin with //
173  * </ul>
174  * For more details please see the test fixture file in core/common/test/fixtures/WStrutcuredTextParser_test.txt.
175  *
176  * \tparam Iterator the iterator, used to get the input stream
177  */
178  template <typename Iterator>
179  struct Grammar: qi::grammar<Iterator, FileType(), ascii::space_type >
180  {
181  /**
182  * Constructor and grammar description. It contains the EBNF (Extended Backus Naur Form) of the format we can parse.
183  *
184  * \param error Will contain error message if any occurs during functions execution
185  */
186  explicit Grammar( std::ostream& error ): Grammar::base_type( file, "WStructuredTextParser::Grammar" ) // NOLINT - non-const ref
187  {
188  // a key begins with a letter
189  key %= qi::char_( "a-zA-Z_" ) >> *qi::char_( "a-zA-Z_0-9" );
190  // a value is a quoted string. Multi-line strings possible
191  value %= '"' >> *( ~qi::char_( "\"" ) | qi::char_( " " ) ) >> '"';
192 
193  // a pair is: key = value
194  kvpair %= key >> '=' >> value >> ';';
195  // a comment is // + arbitrary symbols
196  comment %= qi::lexeme[ qi::char_( "/" ) >> qi::char_( "/" ) >> *qi::char_( "a-zA-Z_0-9!\"#$%&'()*,:;<>?@\\^`{|}~/ .@=[]ยง!+-" ) ];
197  // a object is a name, and a set of nested objects or key-value pairs
198  object %= ( key | value ) >> '{' >> *( object | kvpair | comment ) >> '}' >> *qi::char_( ";" );
199  // a file is basically an object without name.
200  file %= *( object | kvpair | comment );
201 
202  // provide names for these objects for better readability of parse errors
203  object.name( "object" );
204  kvpair.name( "key-value pair" );
205  key.name( "key" );
206  value.name( "value" );
207  file.name( "file" );
208  comment.name( "comment" );
209 
210  // provide error handlers
211  // XXX: can someone tell me how to get them work? According to the boost::spirit doc, this is everything needed but it doesn't work.
212  qi::on_error< qi::fail >( object, error << phoenix::val( "Error: " ) << qi::_4 );
213  qi::on_error< qi::fail >( kvpair, error << phoenix::val( "Error: " ) << qi::_4 );
214  qi::on_error< qi::fail >( key, error << phoenix::val( "Error: " ) << qi::_4 );
215  qi::on_error< qi::fail >( value, error << phoenix::val( "Error: " ) << qi::_4 );
216  qi::on_error< qi::fail >( comment, error << phoenix::val( "Error: " ) << qi::_4 );
217  qi::on_error< qi::fail >( file, error << phoenix::val( "Error: " ) << qi::_4 );
218  }
219 
220  // Rules we use
221 
222  /**
223  * Rule for objects. Attribute is ObjectType and is the start rule of the grammar. See constructor for exact definition.
224  */
225  qi::rule< Iterator, ObjectType(), ascii::space_type > object;
226 
227  /**
228  * Rule for files. Basically the same as an object but without name
229  */
230  qi::rule< Iterator, FileType(), ascii::space_type > file;
231 
232  /**
233  * Rule for comments. Ignored.
234  */
235  qi::rule< Iterator, CommentType(), ascii::space_type > comment;
236 
237  /**
238  * Key-value pair rule. See constructor for exact definition.
239  */
240  qi::rule< Iterator, KeyValueType(), ascii::space_type > kvpair;
241 
242  /**
243  * Key rule. See constructor for exact definition.
244  */
245  qi::rule< Iterator, KeyType() > key;
246 
247  /**
248  * Value rule. See constructor for exact definition.
249  */
250  qi::rule< Iterator, ValueType() > value;
251  };
252 
253  /**
254  * This simplifies working with a tree in a \ref WStructuredTextParser::FileType instance. It provides easy query and check methods. It does not
255  * provide any semantic options. So check validity of the contents and structure of the tree is the job of the using class/derived class. As
256  * the tree does not know anything about the semantics of your structure, it is also untyped. For every key you query, you need to specify
257  * the type.
258  *
259  * This tree uses the types in the WStructuredTextParser namespace. To avoid unnecessary copy operations, this class is not recursive
260  * itself. When querying, you always need to specify the full path. This class can be seen as accessor to the
261  * \ref WStructuredTextParser::ObjectType tree.
262  *
263  * \note The syntax of the parsed files is defined by the parser itself. See WStructuredTextParser::Grammar for details.
264  * \note This also stores the comments of the parsed file. This allows them to be written again if OW loads a file, modifies it and re-writes
265  * it.
266  */
268  {
269  friend class WStructuredTextParserTest;
270  public:
271  /**
272  * This char is used as separator for identifying values in the tree. NEVER change this value.
273  */
274  static const std::string Separator;
275 
276  /**
277  * Construct the instance given the original parsing structure.
278  *
279  * \param file the parsing result structure (the root node).
280  */
281  explicit StructuredValueTree( const FileType& file );
282 
283  /**
284  * Construct the instance given a text as string.
285  *
286  * \param toParse the text to parse
287  */
288  explicit StructuredValueTree( const std::string& toParse );
289 
290  /**
291  * Construct the instance given a path to a file to load.
292  *
293  * \param file the path to a file to load.
294  */
295  explicit StructuredValueTree( const boost::filesystem::path& file );
296 
297  /**
298  * Creates an empty tree. It will contain no information at all.
299  */
301 
302  /**
303  * Cleanup.
304  */
305  virtual ~StructuredValueTree();
306 
307  /**
308  * Checks whether the given value or object exists. If you want to know only if a value with the given name exists, set valuesOnly to
309  * true.
310  *
311  * \param key path to the value
312  * \param valuesOnly if true, it checks only if a value with the name exists. If false, also objects with this name cause this function
313  * to return true.
314  *
315  * \return true if existing.
316  */
317  bool exists( std::string key, bool valuesOnly = false ) const;
318 
319  /**
320  * It is possible that there are multiple values matching a key. This method counts them.
321  *
322  * \param key path to the values to count
323  * \param valuesOnly if true, it only counts values matching the given name.
324  *
325  * \return the number of found values.
326  */
327  size_t count( std::string key, bool valuesOnly = false ) const;
328 
329  /**
330  * Queries the value with the given name. If it is not found, the default value will be returned.
331  *
332  * \param key path to the value. Paths to whole objects are invalid.
333  * \param defaultValue the default if no value was found
334  * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
335  *
336  * \throw WTypeMismatch if the value cannot be cast to the specified target type
337  *
338  * \return the value
339  *
340  * \note this does not return a reference as the default value might be returned. It returns a copy of the value.
341  */
342  template< typename T >
343  T getValue( std::string key, const T& defaultValue ) const;
344 
345  /**
346  * Queries the list of values matching the given path. If it is not found, the default value will be returned.
347  *
348  * \param key path to the value. Paths to whole objects are invalid.
349  * \param defaults the defaults if no value was found
350  * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
351  *
352  * \throw WTypeMismatch if the value cannot be cast to the specified target type
353  *
354  * \return the value
355  *
356  * \note this does not return a reference as the default value might be returned. It returns a copy of the value.
357  */
358  template< typename T >
359  std::vector< T > getValues( std::string key, const std::vector< T >& defaults ) const;
360 
361  /**
362  * Queries the list of values matching the given path. If it is not found, an empty results vector is returned.
363  *
364  * \param key path to the value. Paths to whole objects are invalid.
365  * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
366  *
367  * \throw WTypeMismatch if the value cannot be cast to the specified target type
368  *
369  * \return the value vector. Might be empty if no elements where found.
370  *
371  * \note this does not return a reference as the default value might be returned. It returns a copy of the value.
372  */
373  template< typename T >
374  std::vector< T > getValues( std::string key ) const;
375 
376  /**
377  * Queries the value with the given name. If it is not found, an exception is thrown. If multiple entries with this path exist, the first
378  * one is returned. Use \ref getValues in this case. Query the count of a key:value pair using \ref count
379  *
380  * \param key path to the value. Paths to whole objects are invalid.
381  * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
382  * \throw WTypeMismatch if the value cannot be cast to the specified target type
383  * \throw WNotFound if the key:value pair does not exist
384  *
385  * \return the value as copy to avoid any const_cast which would allow modification.
386  */
387  template< typename T >
388  T operator[]( std::string key ) const;
389 
390  /**
391  * Gets a subtree. The ValueTree returned contains the node you have searched. It only contains the first match. If all matches are
392  * needed, use \ref getSubTrees instead. If the key is not valid/nothing matches the key, an empty value tree is returned. If they key
393  * matches a key-value pair, nothing is returned. This means, this method is only useful for objects.
394  *
395  * \param key key to search.
396  *
397  * \return the structured value tree.
398  */
399  StructuredValueTree getSubTree( std::string key ) const;
400 
401  /**
402  * Gets all matching subtrees. The subtrees returned contains the node you have searched. If multiple objects match the key, a list of
403  * subtrees is returned. If nothing matches, the returned list is empty. If they key
404  * matches a key-value pair, nothing is returned. This means, this method is only useful for objects.
405  *
406  * \param key key to search.
407  *
408  * \return the structured value trees.
409  */
410  std::vector< StructuredValueTree > getSubTrees( std::string key ) const;
411 
412  protected:
413  private:
414  /**
415  * The named values.
416  */
417  FileType m_file;
418 
419  /**
420  * Recursively fills a result vector using a given path iterator. It checks whether the current element matches the current key. If yes,
421  * it traverses or adds the value to the result vector. This uses depth-first search and allows multiple matches for one key.
422  *
423  * \param current current element to check and recursively traverse
424  * \param keyIter the current path element
425  * \param keyEnd the end iter. Just used to stop iteration if the key as not further elements
426  * \param resultObjects all matching instances of type \ref WStructuredTextParser::ObjectType
427  * \param resultValues all matching instances of type \ref WStructuredTextParser::KeyValueType
428  */
429  void traverse( MemberType current, std::vector< std::string >::const_iterator keyIter,
430  std::vector< std::string >::const_iterator keyEnd,
431  std::vector< ObjectType >& resultObjects,
432  std::vector< KeyValueType >& resultValues ) const;
433 
434  /**
435  * Recursively fills a result vector using a given path iterator. It checks whether the current element matches the current key. If yes,
436  * it traverses or adds the value to the result vector. This uses depth-first search and allows multiple matches for one key.
437  *
438  * \param current current element to check and recursively traverse
439  * \param key the path
440  * \param resultObjects all matching instances of type \ref WStructuredTextParser::ObjectType
441  * \param resultValues all matching instances of type \ref WStructuredTextParser::KeyValueType
442  */
443  void traverse( FileType current, std::string key,
444  std::vector< ObjectType >& resultObjects,
445  std::vector< KeyValueType >& resultValues ) const;
446  };
447 
448  /**
449  * Parse the given input and return the syntax tree. Throws an exception WParseError on error.
450  *
451  * \param input the input to parse.
452  *
453  * \return the syntax tree in plain format. You should use WStructuredValueTree to use this.
454  *
455  * \throw WParseError on parse error
456  */
457  FileType parseFromString( std::string input );
458 
459  /**
460  * Parse the given input and return the syntax tree. Throws an exception WParseError on error.
461  *
462  * \param path the file to parse
463  *
464  * \return the syntax tree in plain format. You should use WStructuredValueTree to use this.
465  *
466  * \throw WParseError on parse error
467  * \throw WFileNotFOund in case the specified file could not be opened
468  */
469  FileType parseFromFile( boost::filesystem::path path );
470 
471  template< typename T >
472  T StructuredValueTree::getValue( std::string key, const T& defaultValue ) const
473  {
474  // NOTE: getValues ensures that always something is returned (the default value). So the returned vector has a valid begin iterator
475  return *getValues< T >( key, std::vector< T >( 1, defaultValue ) ).begin();
476  }
477 
478  template< typename T >
479  std::vector< T > StructuredValueTree::getValues( std::string key, const std::vector< T >& defaults ) const
480  {
481  std::vector< T > r = getValues< T >( key );
482  if( r.size() )
483  {
484  return r;
485  }
486  else
487  {
488  return defaults;
489  }
490  }
491 
492  template< typename T >
493  T StructuredValueTree::operator[]( std::string key ) const
494  {
495  std::vector< T > r = getValues< T >( key );
496  if( r.size() )
497  {
498  return *r.begin();
499  }
500  else
501  {
502  throw WNotFound( "The key \"" + key + "\" was not found." );
503  }
504  }
505 
506  /**
507  * Visitor to identify whether the given variant of type \ref WStructuredTextParser::MemberType is a object or key-value pair.
508  */
509  class IsLeafVisitor: public boost::static_visitor< bool >
510  {
511  public:
512  /**
513  * Returns always true as it is only called for key-value pairs.
514  *
515  * \return always true since it identified an key-value pair
516  */
517  bool operator()( const KeyValueType& /* element */ ) const
518  {
519  return true;
520  }
521 
522  /**
523  * Returns always false as it is only called for objects.
524  *
525  * \tparam T the type. Should be \ref WStructuredTextParser::ObjectType or \ref WStructuredTextParser::CommentType
526  * \return always false since it identified an Object/comment
527  */
528  template< typename T >
529  bool operator()( const T& /* element */ ) const
530  {
531  return false;
532  }
533  };
534 
535  /**
536  * Visitor to identify whether the given variant of type \ref WStructuredTextParser::MemberType is a comment.
537  */
538  class IsCommentVisitor: public boost::static_visitor< bool >
539  {
540  public:
541  /**
542  * Returns always true as it is only called for comments.
543  *
544  * \return always true
545  */
546  bool operator()( const CommentType& /* element */ ) const
547  {
548  return true;
549  }
550 
551  /**
552  * Returns always false as it is only called for objects and key-value pairs.
553  *
554  * \tparam T the type. Should be \ref WStructuredTextParser::ObjectType or \ref WStructuredTextParser::KeyValueType
555  * \return always false since it identified an Object/KeyValueType
556  */
557  template< typename T >
558  bool operator()( const T& /* element */ ) const
559  {
560  return false;
561  }
562  };
563 
564  /**
565  * Visitor to query the m_name member of \ref WStructuredTextParser::ObjectType and \ref WStructuredTextParser::KeyValueType.
566  */
567  class NameQueryVisitor: public boost::static_visitor< std::string >
568  {
569  public:
570  /**
571  * Comments have no name.
572  *
573  * \return empty string.
574  */
575  std::string operator()( const CommentType& /* element */ ) const
576  {
577  return "";
578  }
579 
580  /**
581  * Returns the m_name member of the specified object or key-valuev pair.
582  *
583  * \param element Specified object.
584  *
585  * \tparam T one of the types of the \ref WStructuredTextParser::MemberType variant
586  * \return always true since it identified an key-value pair
587  */
588  template< typename T >
589  std::string operator()( const T& element ) const
590  {
591  return element.m_name;
592  }
593  };
594 
595  template< typename T >
596  std::vector< T > StructuredValueTree::getValues( std::string key ) const
597  {
598  // traverse the tree
599  std::vector< ObjectType > rObj;
600  std::vector< KeyValueType > rKV;
601 
602  // traverse
603  traverse( m_file, key, rObj, rKV );
604 
605  // copy to result vector and cast
606  std::vector< T > r;
607  for( std::vector< KeyValueType >::const_iterator i = rKV.begin(); i != rKV.end(); ++i )
608  {
609  try
610  {
611  r.push_back( string_utils::fromString< T >( ( *i ).m_value ) );
612  }
613  catch( ... )
614  {
615  // convert the standard exception (if cannot convert) to a WTypeMismnatch.
616  throw WTypeMismatch( "Cannot convert element \"" + key + "\" to desired type." );
617  }
618  }
619 
620  // done
621  return r;
622  }
623 }
624 
625 #endif // WSTRUCTUREDTEXTPARSER_H
626 
Indicates invalid type of something.
Definition: WTypeMismatch.h:36
qi::rule< Iterator, ValueType() > value
Value rule.
Indicates invalid value which could not be found.
Definition: WNotFound.h:35
size_t count(std::string key, bool valuesOnly=false) const
It is possible that there are multiple values matching a key.
std::vector< T > getValues(std::string key, const std::vector< T > &defaults) const
Queries the list of values matching the given path.
StructuredValueTree getSubTree(std::string key) const
Gets a subtree.
bool exists(std::string key, bool valuesOnly=false) const
Checks whether the given value or object exists.
FileType m_file
The named values.
std::string operator()(const T &element) const
Returns the m_name member of the specified object or key-valuev pair.
T operator[](std::string key) const
Queries the value with the given name.
StructuredValueTree()
Creates an empty tree.
STL namespace.
std::string KeyType
we use these quite often, so define some short alias for them:
Visitor to identify whether the given variant of type WStructuredTextParser::MemberType is a object o...
bool operator()(const CommentType &) const
Returns always true as it is only called for comments.
std::string ValueType
The type used for values.
namespace WStructuredTextParser { /** The grammar describing the structured format.
qi::rule< Iterator, KeyValueType(), ascii::space_type > kvpair
Key-value pair rule.
This namespace contains the WStructuredTextParser data types and the parser.
void traverse(MemberType current, std::vector< std::string >::const_iterator keyIter, std::vector< std::string >::const_iterator keyEnd, std::vector< ObjectType > &resultObjects, std::vector< KeyValueType > &resultValues) const
Recursively fills a result vector using a given path iterator.
qi::rule< Iterator, ObjectType(), ascii::space_type > object
Rule for objects.
std::string CommentType
The type used for comments.
std::vector< MemberType > m_nodes
Object's members.
std::vector< MemberType > FileType
An object representing all objects and comments on file level.
static const std::string Separator
This char is used as separator for identifying values in the tree.
qi::rule< Iterator, FileType(), ascii::space_type > file
Rule for files.
Grammar(std::ostream &error)
Constructor and grammar description.
bool operator()(const T &) const
Returns always false as it is only called for objects.
KeyValueType - a tuple containing name and value.
An object is always a name and contains several further nodes.
std::string operator()(const CommentType &) const
Comments have no name.
Visitor to query the m_name member of WStructuredTextParser::ObjectType and WStructuredTextParser::Ke...
std::vector< StructuredValueTree > getSubTrees(std::string key) const
Gets all matching subtrees.
boost::variant< boost::recursive_wrapper< ObjectType >, KeyValueType, CommentType > MemberType
A node inside the AST is either another object or a key-value pair.
This simplifies working with a tree in a WStructuredTextParser::FileType instance.
virtual ~StructuredValueTree()
Cleanup.
bool operator()(const T &) const
Returns always false as it is only called for objects and key-value pairs.
Visitor to identify whether the given variant of type WStructuredTextParser::MemberType is a comment...
T getValue(std::string key, const T &defaultValue) const
Queries the value with the given name.
Test parsing and query functionality.
bool operator()(const KeyValueType &) const
Returns always true as it is only called for key-value pairs.
std::string m_name
Name of the object.
qi::rule< Iterator, KeyType() > key
Key rule.
qi::rule< Iterator, CommentType(), ascii::space_type > comment
Rule for comments.