29 #ifndef _MDPVALUEITERATION_H_
30 #define _MDPVALUEITERATION_H_ 1
43 #define DEBUG_MDPValueIteration 0
68 void Plan(std::vector<const M*> T)
77 double R_i,R_f,maxQsuc;
80 QTable immReward(nrS,nrJA);
81 for(
Index sI = 0; sI < nrS; sI++)
82 for(
Index jaI = 0; jaI < nrJA; jaI++)
87 for(
size_t t = horizon - 1;
true; t--)
90 for(
Index jaI = 0; jaI < nrJA; jaI++)
92 for(
typename M::const_iterator1 ri=T[jaI]->begin1();
93 ri!=T[jaI]->end1(); ++ri)
98 R_i = immReward(sI,jaI);
103 for (
typename M::const_iterator2 ci = ri.begin();
104 ci != ri.end(); ++ci)
109 for(
Index jasucI = 0; jasucI < nrJA; jasucI++)
110 maxQsuc = std::max(
_m_QValues[t+1](ssucI,jasucI),
113 R_f += *ci * maxQsuc;
126 double maxDelta=DBL_MAX;
134 for(
Index jaI = 0; jaI < nrJA; jaI++)
136 for(
typename M::const_iterator1 ri=T[jaI]->begin1();
137 ri!=T[jaI]->end1(); ++ri)
142 R_i = immReward(sI,jaI);
146 for (
typename M::const_iterator2 ci = ri.begin();
147 ci != ri.end(); ++ci)
153 for(
Index jasucI = 0; jasucI < nrJA; jasucI++)
154 maxQsuc = std::max( oldQtable(ssucI,jasucI),
157 R_f += *ci * maxQsuc;
161 maxDelta=std::max(maxDelta,
162 std::abs(oldQtable(sI,jaI)-
169 #if DEBUG_MDPValueIteration
170 cout <<
"delta " << maxDelta << endl;
206 else if((tms=dynamic_cast<const TransitionModelMappingSparse *>(tmd)))
208 std::vector<const TransitionModelMappingSparse::SparseMatrix *> T;
209 for(
unsigned int a=0;a!=nrJA;++a)
213 else if((tm=dynamic_cast<const TransitionModelMapping *>(tmd)))
215 std::vector<const TransitionModelMapping::Matrix *> T;
216 for(
unsigned int a=0;a!=nrJA;++a)
221 throw(
E(
"MDPValueIteration::Plan() TransitionModelDiscretePtr not handled"));