MultiAgentDecisionProcess  Release 0.2.1
DICEPSPlanner.h
Go to the documentation of this file.
1 
28 /* Only include this header file once. */
29 #ifndef _DICEPSPlannerPLANNER_H_
30 #define _DICEPSPlannerPLANNER_H_ 1
31 
32 /* the include directives */
33 #include <iostream>
34 #include <vector>
35 #include <list>
36 #include <fstream>
37 
38 #include "Globals.h"
41 #include "JointPolicyPureVector.h"
42 #include "JointPolicy.h"
43 #include "TimedAlgorithm.h"
44 
45 
46 class JPPVValuePair;
47 
48 using std::vector;
49 using std::list;
50 
56 class DICEPSPlanner :
58  public TimedAlgorithm
59 {
60 
61 private:
62  //output settings:
64  std::ofstream* _m_outputConvergenceFile;
66 
67  // CE Settings
68  size_t _m_nrRestarts;
73  double _m_alpha;
74  size_t _m_nrEvalRuns;
75 
76  //the best found policy
78  //the expected reward of the best found policy
80 
81 protected:
82 
83  static void SampleIndividualPolicy(PolicyPureVector& pol,
84  const vector< vector<double> >& ohistActionProbs );
85  static void OrderedInsertJPPVValuePair( JPPVValuePair* pv,
86  list< JPPVValuePair*>& l );
87  static void PrintBestSamples( const list< JPPVValuePair*>& l );
88 
90  vector< vector< vector<double> > >& Xi,
91  const list<JPPVValuePair* >& best_samples);
92  double ApproximateEvaluate(JointPolicyDiscrete &jpol, int nrRuns);
93 
94  public:
95 
96  // Constructor, destructor and copy assignment.
97  // (default) Constructor
98  //DICEPSPlanner();
102  size_t horizon,
103  size_t nrRestarts,
104  size_t nrIterations,
105  size_t nrSamples,
106  size_t nrSamplesForUpdate,
107  bool use_hard_threshold, //(gamma in CE papers)
108  double CEalpha, //the learning rate
109  size_t nrEvalRuns, // policy evaluation runs (set 0 for exact eval)
110  int verbose = 0
111  );
114  int horizon,
115  size_t nrRestarts,
116  size_t nrIterations,
117  size_t nrSamples,
118  size_t nrSamplesForUpdate,
119  bool use_hard_threshold, //(gamma in CE papers)
120  double CEalpha, //the learning rate
121  size_t nrEvalRuns, // policy evaluation runs (set 0 for exact eval)
122  int verbose = 0
123  );
127  size_t horizon,
128  size_t nrRestarts,
129  size_t nrIterations,
130  size_t nrSamples,
131  size_t nrSamplesForUpdate,
132  bool use_hard_threshold, //(gamma in CE papers)
133  double CEalpha, //the learning rate
134  size_t nrEvalRuns, // policy evaluation runs (set 0 for exact eval)
135  bool convergenceStats,
136  std::ofstream & convergenceStatsFile,
137  int verbose = 0
138  );
141  int horizon,
142  size_t nrRestarts,
143  size_t nrIterations,
144  size_t nrSamples,
145  size_t nrSamplesForUpdate,
146  bool use_hard_threshold, //(gamma in CE papers)
147  double CEalpha, //the learning rate
148  size_t nrEvalRuns, // policy evaluation runs (set 0 for exact eval)
149  bool convergenceStats,
150  std::ofstream & convergenceStatsFile,
151  int verbose = 0
152  );
153 
154  //operators:
155 
156  //data manipulation (set) functions:
160  void Plan();
161 
162  //get (data) functions:
164  { return(&_m_foundPolicy); }
166  { return(&_m_foundPolicy); }
168  { return(&_m_foundPolicy); }
169  double GetExpectedReward(void) const
170  { return(_m_expectedRewardFoundPolicy); }
171 
172 };
173 
174 
175 #endif /* !_DICEPSPlannerPLANNER_H_ */
176 
177 
178 // Local Variables: ***
179 // mode:c++ ***
180 // End: ***