39 #define DEBUG_QPOMDP 0
40 #define DEBUG_QPOMDP_COMP 0
41 #define DEBUG_QPOMDP_COMPREC 0
43 #if DEBUG_QPOMDP_COMPREC
61 #if QFunctionJAOH_useIndices
81 bool last_t = ( (time_step + 1) == GetPU()->GetHorizon()) ;
83 #if !QFunctionJAOH_useIndices
85 GetJointActionObservationHistory();
89 #if DEBUG_QPOMDP_COMPREC
90 cout <<
"QPOMDP::ComputeRecursively:"<< endl
91 <<
"time_step t="<<time_step <<
", prev. jaoh index jaoh^(t-1)="<<jaohI
92 <<
", prev. ja="<<lastJAI <<endl;
95 GetPU()->GetNrActions(),
96 GetPU()->GetNrObservations());
100 double discount = GetPU()->GetDiscount();
107 for(
Index newJOI=0; newJOI < GetPU()->GetNrJointObservations(); newJOI++)
110 cout <<
"looking for joint observationI="<<newJOI <<
" (=";
111 GetPU()->GetJointObservation(newJOI)->
Print();cout <<
")"<<endl;}
114 #if QFunctionJAOH_useIndices
115 new_jaohI = GetPU()->GetSuccessorJAOHI(jaohI, lastJAI, newJOI);
133 double Po_ba = GetPU()->GetJAOHProbs(newJB, new_jaohI, jaohI);
142 cout <<
"the new jaoh (for this joint observationI="<<newJOI<<
")\n"
143 "new jaohI="<< new_jaohI <<
144 endl<<
" new belief newJB="<< newJB->
SoftPrint() << endl; }
145 #if DEBUG_QPOMDP_COMPREC
148 double maxQ = -DBL_MAX;
149 for(
Index newJAI=0; newJAI < GetPU()->GetNrJointActions(); newJAI++)
152 double exp_imm_R = 0.0;
154 #if USE_BeliefIteratorGeneric
160 for(
Index sI=0; sI < GetPU()->GetNrStates(); sI++)
169 exp_imm_R += newJB->
Get(sI)*GetPU()->GetReward(sI, newJAI);
174 double exp_fut_R = 0.0;
176 #if QFunctionJAOH_useIndices
177 exp_fut_R = ComputeRecursively(time_step+1, new_jaohI, newJAI);
179 exp_fut_R = ComputeRecursively(time_step+1, new_jaoht, newJAI);
181 double Q = exp_imm_R + discount * exp_fut_R;
184 _m_QValues(new_jaohI,newJAI)=Q;
185 #if DEBUG_QPOMDP_COMPREC
189 #if DEBUG_QPOMDP_COMPREC
193 cout <<
"->max = " << maxQ<<endl;
204 cout <<
"QPOMDP::ComputeRecursively:"<< endl <<
"time_step t="<<
205 time_step <<
", prev. jaoh index jaoh^(t-1)="<<jaohI
206 <<
", prev. ja="<<lastJAI <<endl
207 <<
"FINISHED - v="<<v<<endl<<endl;