39 #define DEBUG_BG4DECPOMDP1 0
40 #define DEBUG_BG4DECPOMDP2 0
41 #define DEBUG_BG4DECPOMDP3 0
42 #define DEBUG_BG4DECPOMDP4 0
61 pu->GetNrObservationHistoriesVector( pastJPol->GetDepth() )
68 ,_m_JBs( GetNrJointTypes() )
69 ,_m_areCachedImmediateRewards(false)
83 pu->GetNrObservationHistoriesVector(0)
88 ,_m_areCachedImmediateRewards(false)
96 const vector<size_t>& nrActions,
97 const vector<size_t>& nrTypes
106 ,_m_JBs( GetNrJointTypes() )
107 ,_m_areCachedImmediateRewards(false)
123 ,_m_qHeuristic(o._m_qHeuristic)
125 ,_m_areCachedImmediateRewards(o._m_areCachedImmediateRewards)
134 throw(
E(
"BayesianGameForDecPOMDPStage copy constructor not fully implemented yet.") );
142 std::vector< JointBeliefInterface* >::iterator it =
_m_JBs.begin();
143 std::vector< JointBeliefInterface* >::iterator last =
_m_JBs.end();
153 if (
this == &o)
return *
this;
162 throw E(
"BayesianGameForDecPOMDPStage::operator= not fully implemented yet...");
170 vector<Index>& firstOHtsI)
183 firstOHtsI.push_back(fI);
193 vector<size_t>& nrOHts,
194 vector<Index>& firstOHtsI
199 throw E(
"BayesianGameForDecPOMDPStage::ConstructExtendedPolicy --- jpolPrevTs.GetIndexDomainCategory() != PolicyGlobals::OHIST_INDEX ");
201 throw E(
"BayesianGameForDecPOMDPStage::ConstructExtendedPolicy --- jpolPrevTs.GetIndexDomainCategory() != PolicyGlobals::TYPE_INDEX ");
210 for(
Index type = 0; type < nrOHts[agentI]; type++)
212 Index ohI = type + firstOHtsI[agentI];
221 const vector<Index>& indTypes,
222 const vector<Index>& firstOHtsI,
226 vector<Index> indOHI = vector<Index>(indTypes);
228 vector< vector<Index> > indivObservations(ts,vector<Index>(
GetNrAgents()) );
231 indOHI[agentI] += firstOHtsI[agentI];
235 for(
Index tI=0; tI < ts; tI++)
236 indivObservations.at(tI).at(agentI) = obsArr[tI];
239 for(
Index tI=0; tI < ts; tI++)
241 indivObservations[tI] );
260 Index next_joI = joIs[t];
282 Index& jaohI,
double& PjaohI,
double& ExpR_0_prevTS_thisJAOH )
288 double CPjaohI = 1.0;
298 double ExpR_0_prevTS_thisJAOH_thisT = 0.0;
299 #if USE_BeliefIteratorGeneric
305 cout <<
"R(s="<<it.
GetStateIndex()<<
",ja="<<jaI_arr[tI]<<
")="<< R_si_ja <<
"\n";
313 cout <<
"R(s="<<sI<<
",ja="<<jaI_arr[tI]<<
")="<< R_si_ja <<
"\n";
314 ExpR_0_prevTS_thisJAOH_thisT += jb->
Get(sI) * R_si_ja;
317 ExpR_0_prevTS_thisJAOH += ExpR_0_prevTS_thisJAOH_thisT;
320 cout <<
"calculating expected reward R(oaHist,a) for tI="<<tI
322 cout << endl; cout <<
"R(b,a) (exp reward for jtI=" << jtI <<
323 ", tI="<<tI<<
") is "<< ExpR_0_prevTS_thisJAOH_thisT <<endl;
326 jaoht = jaoht->
GetSuccessor( jaI_arr[tI], joI_arr[tI] );
330 PjaohI = PjaohI * CPjaohI;
336 cout <<
"expected previous reward (up to ts-1) for (jtI=" << jtI <<
339 cout <<
" is "<< ExpR_0_prevTS_thisJAOH <<endl << endl;
481 cerr <<
"Warning Initialize called without past joint policy: aborting."
487 cout <<
">>>BayesianGameForDecPOMDPStage::Initialize() called for"
488 <<
" ts=" << ts << endl;
496 vector<Index> firstOHtsI;
510 for(
Index jtI = 0; jtI < nrJOHts; jtI++)
548 vector< Index > jaI_vec(&jaI_arr[0],&jaI_arr[ts]);
549 vector< Index > joI_vec(&joI_arr[0],&joI_arr[ts]);
597 _m_immR = vector< vector <double> >(nrJT, vector<double>(nrJA, 0.0) );
599 for(
Index jtI=0; jtI< nrJT; jtI++)
600 for(
Index jaI=0; jaI< nrJA; jaI++)
617 }
while (bit.
Next() );
627 for(
Index jt = 0; jt < nrJT ; jt++)
635 double discT = pow(discount , (
double)(
_m_t) );
636 double discounted_r = discT * r;
645 ss <<
"Bayesian Game for stage t="<<
_m_t<<
" of a Dec-POMDP"<<endl;