MultiAgentDecisionProcess  Release 0.2.1
MDPValueIteration.h
Go to the documentation of this file.
1 
28 /* Only include this header file once. */
29 #ifndef _MDPVALUEITERATION_H_
30 #define _MDPVALUEITERATION_H_ 1
31 
32 /* the include directives */
33 #include <iostream>
34 #include <float.h>
35 #include "Globals.h"
36 
38 #include "MDPSolver.h"
39 #include "TimedAlgorithm.h"
40 #include "TransitionModelMapping.h"
42 
43 #define DEBUG_MDPValueIteration 0
44 
48  public TimedAlgorithm
49 {
50 private:
51 
56 
59 
62 
63  void Initialize();
64 
67  template <class M>
68  void Plan(std::vector<const M*> T)
69  {
70 
71  size_t horizon = GetPU()->GetHorizon();
72  size_t nrS = GetPU()->GetNrStates();
73  size_t nrJA = GetPU()->GetNrJointActions();
74 
75  Index sI,ssucI;
76  double gamma=GetPU()->GetDiscount();
77  double R_i,R_f,maxQsuc;
78 
79  // cache immediate reward for speed
80  QTable immReward(nrS,nrJA);
81  for(Index sI = 0; sI < nrS; sI++)
82  for(Index jaI = 0; jaI < nrJA; jaI++)
83  immReward(sI,jaI)=GetPU()->GetReward(sI, jaI);
84 
86  {
87  for(size_t t = horizon - 1; true; t--)
88  {
89  StartTimer("Iteration");
90  for(Index jaI = 0; jaI < nrJA; jaI++)
91  {
92  for(typename M::const_iterator1 ri=T[jaI]->begin1();
93  ri!=T[jaI]->end1(); ++ri)
94  {
95  sI=ri.index1();
96 
97  //calc. expected immediate reward
98  R_i = immReward(sI,jaI);
99  R_f = 0.0;
100  if(t < horizon - 1)
101  {
102  //calc. expected future reward
103  for (typename M::const_iterator2 ci = ri.begin();
104  ci != ri.end(); ++ci)
105  {
106  ssucI=ci.index2();
107  //find the best action at ssucI
108  maxQsuc = -DBL_MAX;
109  for(Index jasucI = 0; jasucI < nrJA; jasucI++)
110  maxQsuc = std::max( _m_QValues[t+1](ssucI,jasucI),
111  maxQsuc);
112 
113  R_f += *ci * maxQsuc;
114  }//done calc. expected future reward
115  }
116  _m_QValues[t](sI,jaI) = R_i + gamma*R_f;
117  }//end for jaI
118  }//end for sI
119  StopTimer("Iteration");
120  if(t == 0) //escape from (loop t is unsigned!)
121  break;
122  }
123  }
124  else // infinite horizon problem
125  {
126  double maxDelta=DBL_MAX;
127  QTable oldQtable;
128 
129  while(maxDelta>1e-4)
130  {
131  StartTimer("Iteration");
132  maxDelta=0;
133  oldQtable=_m_QValues[0];
134  for(Index jaI = 0; jaI < nrJA; jaI++)
135  {
136  for(typename M::const_iterator1 ri=T[jaI]->begin1();
137  ri!=T[jaI]->end1(); ++ri)
138  {
139  sI=ri.index1();
140 
141  //calc. expected immediate reward
142  R_i = immReward(sI,jaI);
143  R_f = 0.0;
144  //calc. expected future reward
145 
146  for (typename M::const_iterator2 ci = ri.begin();
147  ci != ri.end(); ++ci)
148  {
149  ssucI=ci.index2();
150 
151  //find the best action at ssucI
152  maxQsuc = -DBL_MAX;
153  for(Index jasucI = 0; jasucI < nrJA; jasucI++)
154  maxQsuc = std::max( oldQtable(ssucI,jasucI),
155  maxQsuc);
156 
157  R_f += *ci * maxQsuc;
158  }//done calc. expected future reward
159 
160  _m_QValues[0](sI,jaI) = R_i + gamma*R_f;
161  maxDelta=std::max(maxDelta,
162  std::abs(oldQtable(sI,jaI)-
163  _m_QValues[0](sI,jaI)));
164  }//end for jaI
165  }//end for sI
166 
167  StopTimer("Iteration");
168 
169 #if DEBUG_MDPValueIteration
170  cout << "delta " << maxDelta << endl;
172 #endif
173  }
174  }
175  }
176 
178  void PlanSlow();
179 
180 protected:
181 
182 public:
183  // Constructor, destructor and copy assignment.
186 
190 
191  void Plan()
192  {
193  if(!_m_initialized)
194  Initialize();
195 
196  StartTimer("Plan");
197 
198  size_t nrJA = GetPU()->GetNrJointActions();
199  const TransitionModelMappingSparse *tms=0;
200  const TransitionModelMapping *tm=0;
201  const TransitionModelDiscrete *tmd=
203 
204  if(tmd==0)
205  PlanSlow(); // just use GetTransitionProbability()
206  else if((tms=dynamic_cast<const TransitionModelMappingSparse *>(tmd)))
207  {
208  std::vector<const TransitionModelMappingSparse::SparseMatrix *> T;
209  for(unsigned int a=0;a!=nrJA;++a)
210  T.push_back(tms->GetMatrixPtr(a));
211  Plan(T);
212  }
213  else if((tm=dynamic_cast<const TransitionModelMapping *>(tmd)))
214  {
215  std::vector<const TransitionModelMapping::Matrix *> T;
216  for(unsigned int a=0;a!=nrJA;++a)
217  T.push_back(tm->GetMatrixPtr(a));
218  Plan(T);
219  }
220  else
221  throw(E("MDPValueIteration::Plan() TransitionModelDiscretePtr not handled"));
222 
223  StopTimer("Plan");
224  }
225 
226  double GetQ(Index time_step, Index sI,
227  Index jaI) const
228  { return(_m_QValues[time_step](sI,jaI)); }
229 
230  double GetQ(Index sI, Index jaI) const
231  { return(_m_QValues[0](sI,jaI)); }
232 
233  QTables GetQTables() const;
234  QTable GetQTable(Index time_step) const;
235 
236  void SetQTables(const QTables &Qs);
237  void SetQTable(const QTable &Q, Index time_step);
238 
239 };
240 
241 
242 #endif /* !_MDPVALUEITERATION_H_ */
243 
244 // Local Variables: ***
245 // mode:c++ ***
246 // End: ***