42 #define DEBUG_QBG_COMP 0
43 #define DEBUG_QBG_COMPREC 0
65 #if QFunctionJAOH_useIndices
82 if( (time_step + 1) == GetPU()->GetHorizon())
85 #if !QFunctionJAOH_useIndices
87 GetJointActionObservationHistory();
92 cout <<
"QBG::ComputeRecursively:"<< endl <<
"time_step t="
93 << time_step <<
", prev. jaoh index jaoh^(t-1)="<<jaohI
94 <<
", prev. ja="<<lastJAI<<
"="<<
95 GetPU()->GetJointAction(lastJAI)->SoftPrint()
96 <<
", now starting the computation of the future reward."
106 GetPU()->GetNrActions(), GetPU()->GetNrObservations());
108 double discount = GetPU()->GetDiscount();
112 for(
Index newJOI=0; newJOI < GetPU()->GetNrJointObservations(); newJOI++)
115 cout <<
"looking for joint observationI="<<newJOI <<
" (=";
116 cout << GetPU()->GetJointObservation(newJOI)->SoftPrint();
121 #if QFunctionJAOH_useIndices
122 new_jaohI = GetPU()->GetSuccessorJAOHI(jaohI, lastJAI, newJOI);
135 double Po_ba = GetPU()->GetJAOHProbGivenPred(new_jaohI);
144 cout <<
"new belief newJB=";
150 for(
Index newJAI=0; newJAI < GetPU()->GetNrJointActions(); newJAI++)
153 double exp_imm_R = 0.0;
155 #if USE_BeliefIteratorGeneric
159 double r_s_ja = GetPU()->GetReward(it.
GetStateIndex(), newJAI);
161 exp_imm_R += r_s_ja * prob_s;
164 for(
Index sI=0; sI < GetPU()->GetNrStates(); sI++)
166 double r_s_ja = GetPU()->GetReward(sI, newJAI);
167 double prob_s = newJB.
Get(sI);
168 exp_imm_R += r_s_ja * prob_s;
172 cout <<
"Expected imm reward for new JA"<<
173 GetPU()->GetJointAction(newJAI)->SoftPrint()
174 <<
"exp_imm_R="<< exp_imm_R << endl
175 <<
"about to start recursively computing future reward..."<<
182 double exp_fut_R = 0.0;
184 #if QFunctionJAOH_useIndices
185 exp_fut_R = ComputeRecursively(time_step+1, new_jaohI, newJAI);
187 exp_fut_R = ComputeRecursively(time_step+1, new_jaoht, newJAI);
189 double Q = exp_imm_R + discount * exp_fut_R;
191 cout <<
"Returned to QBG::ComputeRecursively(ts="<<
192 time_step <<
", prev. jaohI="<<jaohI
193 <<
", prev. ja="<<lastJAI <<
")"<<endl
194 <<
"computed the future reward for "
195 << GetPU()->GetJointObservation(newJOI)->SoftPrint()
196 <<
"and "<< GetPU()->GetJointAction(newJAI)->SoftPrint()
198 cout <<
"Q = exp_imm_R + discount * exp_fut_R = "
200 << exp_imm_R <<
" + "
205 _m_QValues(new_jaohI,newJAI)=Q;
216 double v = bgs.
Solve();
218 cout <<
"QBG::ComputeRecursively:"<< endl <<
"time_step t="<<
219 time_step <<
", prev. jaoh index jaoh^(t-1)="<<jaohI
220 <<
", prev. ja="<<lastJAI <<endl
222 bg_time_step.
Print();
223 cout <<
"Expected reward under best policy for sub-BG="<<v<<endl<< endl;
232 throw (
"not implemented - should be copy/pasted from QPOMDP::ComputeNoCache() and then have minor adjustments");
249 if( (time_step + 1) ==
GetPU()->GetHorizon())
254 cout <<
"QBG::ComputeRecursively("<< endl <<
"time_step="<<
255 time_step <<
", jahI="<< jahI <<
", johI="<< johI
256 <<
", JB, lastJAI="<<lastJAI
257 <<
") called, with JB=";
269 GetPU()->GetNrActions(),
GetPU()->GetNrObservations());
290 GetSuccessor(lastJAI);
291 new_jahI = new_jaht->GetIndex();
293 GetSuccessor(newJOI);
302 double Po_ba = newJB.
Update(*
GetPU()->GetReferred(), lastJAI, newJOI);
305 cout <<
"new belief newJB=";
316 double exp_imm_R = 0.0;
323 double exp_fut_R = 0.0;
326 new_johI, newJB, newJAI);
327 double Q = exp_imm_R + discount * exp_fut_R;
335 cout <<
"QBG::ComputeRecursively for..."<<endl<<
337 time_step <<
", Index jahI="<< jahI <<
", Index johI="<< johI
338 <<
",const vector<double> JB, Index lastJAI="<<lastJAI
339 <<
") called, with JB=";
342 bg_time_step.
Print();
347 double v = bgs.
Solve();
349 cout <<
"Expected reward under best policy for sub-BG="<<v<<endl<< endl;