MultiAgentDecisionProcess  Release 0.2.1
QPOMDP.cpp
Go to the documentation of this file.
1 
28 #include "QPOMDP.h"
32 #include "JointObservation.h"
33 #include "JointBeliefInterface.h"
34 #include "BeliefIteratorGeneric.h"
35 #include <float.h>
36 
37 using namespace std;
38 
39 #define DEBUG_QPOMDP 0
40 #define DEBUG_QPOMDP_COMP 0
41 #define DEBUG_QPOMDP_COMPREC 0
42 
43 #if DEBUG_QPOMDP_COMPREC
45 #endif
46 
47 //Default constructor
49  :
50  QFunctionForDecPOMDP(pu), //virtual base first
52 {
53 }
54 
55 //Destructor
57 {
58 }
59 
60 //this uses cached joint beliefs per def.:
61 #if QFunctionJAOH_useIndices
62 double QPOMDP::ComputeRecursively(size_t time_step,
63  LIndex jaohI,
64  Index lastJAI)
65 #else
66 double QPOMDP::ComputeRecursively(size_t time_step,
68  Index lastJAI)
69 #endif
70 // time_step is the time-step of the BG that is constructed (and solved) in
71 // this function
72 //
73 // jah joint action history at t=time_step-1
74 // joh joint obser. history at t=time_step-1
75 // (together they are denoted jaoh, the joint act-obs hist. at t-1 )
76 //
77 // jB jaoh induces a probability over states, the joint belief
78 //
79 // lastJA ja at t=time_step-1 (so the JA taken at joah )
80 {
81  bool last_t = ( (time_step + 1) == GetPU()->GetHorizon()) ;
82 
83 #if !QFunctionJAOH_useIndices
84  JointActionObservationHistory* jaoh = jaoht->
85  GetJointActionObservationHistory();
86  Index jaohI = jaoht->GetIndex();
87 #endif
88 
89 #if DEBUG_QPOMDP_COMPREC
90  cout << "QPOMDP::ComputeRecursively:"<< endl
91  << "time_step t="<<time_step << ", prev. jaoh index jaoh^(t-1)="<<jaohI
92  << ", prev. ja="<<lastJAI <<endl;
93 
94  BayesianGameIdenticalPayoff bg_time_step(GetPU()->GetNrAgents(),
95  GetPU()->GetNrActions(),
96  GetPU()->GetNrObservations());
97 #endif
98 
99  double v = 0.0;
100  double discount = GetPU()->GetDiscount();
101 //--------------------------------------------
102 //XXX TODO discuss with how this should be changed for sparse beliefs
103 //such that the line below doesn't crash.
104 //-------------------------------------------
105  JointBeliefInterface* newJB = GetPU()->GetNewJointBeliefInterface();
106  //for all jointobservations newJO (jo^time_step)
107  for(Index newJOI=0; newJOI < GetPU()->GetNrJointObservations(); newJOI++)
108  {
110  cout << "looking for joint observationI="<<newJOI <<" (=";
111  GetPU()->GetJointObservation(newJOI)->Print();cout <<")"<<endl;}
112 
113  Index new_jaohI;
114 #if QFunctionJAOH_useIndices
115  new_jaohI = GetPU()->GetSuccessorJAOHI(jaohI, lastJAI, newJOI);
116 #else
118  lastJAI,newJOI);
119  new_jaohI = new_jaoht->GetIndex();
120 #endif
121 
122  //get the new joint belief at this time-step resulting from lastJAI,
123  //newJOI...(the true prob. dist over states for the actions and obser-
124  //vations as given by the history < jaoh, lastJA, newJOI > )
125 
126  //double Po_ba = GetPU()->GetJBeliefConditionalProb(new_jaohI);
127 
128 
129 //--------------------------------------------
130 //TODO XXX
131 //why does it not crash here?!
132 //-------------------------------------------
133  double Po_ba = GetPU()->GetJAOHProbs(newJB, new_jaohI, jaohI);
134 
135  // if the probability of this observation occurring is zero,
136  // the belief is not defined, and don't have to consider this
137  // part of the tree anymore
138  if(Po_ba<PROB_PRECISION)
139  continue;
140 
142  cout << "the new jaoh (for this joint observationI="<<newJOI<<")\n"
143  "new jaohI="<< new_jaohI <<
144  endl<<" new belief newJB="<< newJB->SoftPrint() << endl; }
145 #if DEBUG_QPOMDP_COMPREC
146  bg_time_step.SetProbability(newJOI, Po_ba);
147 #endif
148  double maxQ = -DBL_MAX;
149  for(Index newJAI=0; newJAI < GetPU()->GetNrJointActions(); newJAI++)
150  {
151  //calculate R(joah',newJA) - expected immediate reward for time_step
152  double exp_imm_R = 0.0;
153 
154 #if USE_BeliefIteratorGeneric
156  do exp_imm_R += it.GetProbability() *
157  GetPU()->GetReward(it.GetStateIndex(), newJAI);
158  while(it.Next());
159 #else
160  for(Index sI=0; sI < GetPU()->GetNrStates(); sI++)
161 //--------------------------------------------
162 //TODO XXX
163 //the following line crashes with sparse beliefs, because sparse beliefs
164 //are initialized with
165 // return (new JointBeliefSparse() );
166 //( rather then new JointBelief(GetNrStates()) )
167 //Therefore the Get() request here can fail:
168 //--------------------------------------------
169  exp_imm_R += newJB->Get(sI)*GetPU()->GetReward(sI, newJAI);
170 #endif
171 
172  //calculate Q(jaoh', newJA) = R(joah',newJA) + exp. future R
173  // and the exp. future R = ComputeRecursively(t+1, jaoh', newJA)
174  double exp_fut_R = 0.0;
175  if(!last_t)
176 #if QFunctionJAOH_useIndices
177  exp_fut_R = ComputeRecursively(time_step+1, new_jaohI, newJAI);
178 #else
179  exp_fut_R = ComputeRecursively(time_step+1, new_jaoht, newJAI);
180 #endif
181  double Q = exp_imm_R + discount * exp_fut_R;
182  if(Q > maxQ)
183  maxQ = Q;
184  _m_QValues(new_jaohI,newJAI)=Q;
185 #if DEBUG_QPOMDP_COMPREC
186  bg_time_step.SetUtility(newJOI, newJAI, Q);
187 #endif
188  }//end for newJAI
189 #if DEBUG_QPOMDP_COMPREC
190  {
191  //BG used to store and then print
192  bg_time_step.PrintUtilForJointType(newJOI);
193  cout << "->max = " << maxQ<<endl;
194  }
195 #endif
196  // v = v + P(jo|b,a) * max_a Q(b'_jo,a)
197  v += Po_ba * maxQ;
198  }//end for newJOI
199 
200  delete newJB;
201 
203  {
204  cout << "QPOMDP::ComputeRecursively:"<< endl << "time_step t="<<
205  time_step << ", prev. jaoh index jaoh^(t-1)="<<jaohI
206  << ", prev. ja="<<lastJAI <<endl
207  <<"FINISHED - v="<<v<<endl<<endl;
208  }
209  return( v );
210 }