MultiAgentDecisionProcess  Release 0.2.1
QBG.cpp
Go to the documentation of this file.
1 
28 #include "QBG.h"
29 #include "JointBelief.h"
33 #include "JointObservation.h"
34 #include "JointAction.h"
35 #include "BeliefIteratorGeneric.h"
38 
39 using namespace std;
40 
41 #define DEBUG_QBG 0
42 #define DEBUG_QBG_COMP 0
43 #define DEBUG_QBG_COMPREC 0
44 
45 //Default constructor
47  :
48  QFunctionForDecPOMDP(pu), //virtual base first
50 {
51 }
52 
53 //Destructor
55 {
56 }
57 
58 //In general, we want to
59 // calculate Q(jaoh', newJA) = R(joah',newJA) + exp. future R
60 // here the exp. future R = ComputeRecursively(t+1, root, newJA)
61 //
62 // is given by the following function
63 //
64 //this uses cached joint beliefs per def.:
65 #if QFunctionJAOH_useIndices
66 double QBG::ComputeRecursively(size_t time_step, LIndex jaohI, Index lastJAI)
67 #else
68 double QBG::ComputeRecursively(size_t time_step, JointActionObservationHistoryTree* jaoht, Index lastJAI)
69 #endif
70 // time_step is the time-step of the BG that is constructed (and solved) in
71 // this function
72 //
73 // jah joint action history at t=time_step-1
74 // joh joint obser. history at t=time_step-1
75 // (together they are denoted jaoh, the joint act-obs hist. at t-1 )
76 //
77 // jB jaoh induces a probability over states, the joint belief
78 //
79 // lastJA ja at t=time_step-1 (so the JA taken at joah )
80 {
81  bool last_t = false;
82  if( (time_step + 1) == GetPU()->GetHorizon())
83  last_t = true;
84 
85 #if !QFunctionJAOH_useIndices
86  JointActionObservationHistory* jaoh = jaoht->
87  GetJointActionObservationHistory();
88  Index jaohI = jaoht->GetIndex();
89 #endif
90 
92  cout << "QBG::ComputeRecursively:"<< endl << "time_step t="
93  << time_step << ", prev. jaoh index jaoh^(t-1)="<<jaohI
94  << ", prev. ja="<<lastJAI<<"="<<
95  GetPU()->GetJointAction(lastJAI)->SoftPrint()
96  << ", now starting the computation of the future reward."
97  <<endl;
98  }
99 
100  //we're going to construct a Bayesian game where the types are the observa-
101  //tions for o^t for t=time_step. (i.e. each joint observation is a joint
102  //type).
103  //These observations are following the history indicated by:
104  // jaoh, lastJAI (= jaoh^t-1, ja^t-1)
105  BayesianGameIdenticalPayoff bg_time_step(GetPU()->GetNrAgents(),
106  GetPU()->GetNrActions(), GetPU()->GetNrObservations());
107 
108  double discount = GetPU()->GetDiscount();
109 
110  //double v = 0.0; - we don't need to maintain this (not aver. over o)
111  //for all jointobservations newJO (jo^time_step)
112  for(Index newJOI=0; newJOI < GetPU()->GetNrJointObservations(); newJOI++)
113  {
114  if(DEBUG_QBG_COMPREC){
115  cout << "looking for joint observationI="<<newJOI <<" (=";
116  cout << GetPU()->GetJointObservation(newJOI)->SoftPrint();
117  cout <<")"<<endl;
118  }
119 
120  Index new_jaohI;
121 #if QFunctionJAOH_useIndices
122  new_jaohI = GetPU()->GetSuccessorJAOHI(jaohI, lastJAI, newJOI);
123 #else
125  lastJAI,newJOI);
126  new_jaohI = new_jaoht->GetIndex();
127 #endif
128 
129  //get the new joint belief at this time-step resulting from lastJAI,
130  //newJOI...(the true prob. dist over states for the actions and obser-
131  //vations as given by the history < jaoh, lastJA, newJOI > )
132 
133  JointBeliefInterface* new_jbi = GetPU()->GetJointBeliefInterface(new_jaohI);
134  JointBeliefInterface& newJB = *new_jbi;
135  double Po_ba = GetPU()->GetJAOHProbGivenPred(new_jaohI);
136 
137  // if the probability of this observation occurring is zero,
138  // the belief is not defined, and don't have to consider this
139  // part of the tree anymore
140  if(Po_ba<PROB_PRECISION)
141  continue;
142 
143  if(DEBUG_QBG_COMPREC){
144  cout << "new belief newJB=";
145  newJB.Print();
146  cout << endl;
147  }
148  bg_time_step.SetProbability(newJOI, Po_ba);
149  //for all joint actions newJA
150  for(Index newJAI=0; newJAI < GetPU()->GetNrJointActions(); newJAI++)
151  {
152  //calculate R(joah',newJA) - expected immediate reward for time_step
153  double exp_imm_R = 0.0;
154 
155 #if USE_BeliefIteratorGeneric
157  do
158  {
159  double r_s_ja = GetPU()->GetReward(it.GetStateIndex(), newJAI);
160  double prob_s = it.GetProbability();
161  exp_imm_R += r_s_ja * prob_s;
162  } while(it.Next());
163 #else
164  for(Index sI=0; sI < GetPU()->GetNrStates(); sI++)
165  {
166  double r_s_ja = GetPU()->GetReward(sI, newJAI);
167  double prob_s = newJB.Get(sI);
168  exp_imm_R += r_s_ja * prob_s;
169  }
170 #endif
171  if(DEBUG_QBG_COMPREC){
172  cout << "Expected imm reward for new JA"<<
173  GetPU()->GetJointAction(newJAI)->SoftPrint()
174  << "exp_imm_R="<< exp_imm_R << endl
175  << "about to start recursively computing future reward..."<<
176  endl;
177  }
178 
179 
180  //calculate Q(jaoh', newJA) = R(joah',newJA) + exp. future R
181  // and the exp. future R = ComputeRecursively(t+1, jaoh', newJA)
182  double exp_fut_R = 0.0;
183  if(!last_t)
184 #if QFunctionJAOH_useIndices
185  exp_fut_R = ComputeRecursively(time_step+1, new_jaohI, newJAI);
186 #else
187  exp_fut_R = ComputeRecursively(time_step+1, new_jaoht, newJAI);
188 #endif
189  double Q = exp_imm_R + discount * exp_fut_R;
190  if(DEBUG_QBG_COMPREC){
191  cout << "Returned to QBG::ComputeRecursively(ts="<<
192  time_step << ", prev. jaohI="<<jaohI
193  << ", prev. ja="<<lastJAI <<")"<<endl
194  << "computed the future reward for "
195  << GetPU()->GetJointObservation(newJOI)->SoftPrint()
196  << "and "<< GetPU()->GetJointAction(newJAI)->SoftPrint()
197  << endl;
198  cout << "Q = exp_imm_R + discount * exp_fut_R = "
199  << Q << " = "
200  << exp_imm_R << " + "
201  << discount << " * "
202  << exp_fut_R
203  << endl;
204  }
205  _m_QValues(new_jaohI,newJAI)=Q;
206  bg_time_step.SetUtility(newJOI, newJAI, Q);
207  }//end for newJAI
208 
209  //joint belief no longer needed:
210  delete new_jbi;
211 
212  }//end for newJOI
213 
214  //solve this bayesian game
216  double v = bgs.Solve();
217  if(DEBUG_QBG_COMPREC){
218  cout << "QBG::ComputeRecursively:"<< endl << "time_step t="<<
219  time_step << ", prev. jaoh index jaoh^(t-1)="<<jaohI
220  << ", prev. ja="<<lastJAI <<endl
221  <<"constructed BG:";
222  bg_time_step.Print();
223  cout << "Expected reward under best policy for sub-BG="<<v<<endl<< endl;
224  }
225 
226  return( v );
227 }
228 
229 
231 {
232  throw ("not implemented - should be copy/pasted from QPOMDP::ComputeNoCache() and then have minor adjustments");
233 
234 }
235 double QBG::ComputeRecursivelyNoCache(size_t time_step, Index jahI,
236  Index johI, const JointBelief &JB, Index lastJAI)
237 // time_step is the time-step of the BG that is constructed (and solved) in
238 // this function
239 //
240 // jah joint action history at t=time_step-1
241 // joh joint obser. history at t=time_step-1
242 // (together they are denoted jaoh, the joint act-obs hist. at t-1 )
243 //
244 // jB jaoh induces a probability over states, the joint belief
245 //
246 // lastJA ja at t=time_step-1 (so the JA taken at joah )
247 {
248  bool last_t = false;
249  if( (time_step + 1) == GetPU()->GetHorizon())
250  last_t = true;
251 
252 
253  if(DEBUG_QBG_COMPREC){
254  cout << "QBG::ComputeRecursively("<< endl << "time_step="<<
255  time_step << ", jahI="<< jahI <<", johI="<< johI
256  <<", JB, lastJAI="<<lastJAI
257  <<") called, with JB=";
258  JB.Print();
259  cout <<endl;
260  }
261 
262  //we're going to construct a Bayesian game where the types are the observa-
263  //tions for o^t for t=time_step. (i.e. each joint observation is a joint
264  //type).
265  //These observations are following the history indicated by:
266  // <jahI,johI>=jaoh, lastJAI (= jaoh^t-1, ja^t-1)
267  //
268  BayesianGameIdenticalPayoff bg_time_step(GetPU()->GetNrAgents(),
269  GetPU()->GetNrActions(), GetPU()->GetNrObservations());
270 
271  double discount = GetPU()->GetDiscount();
272 
273  //for all jointobservations newJO (jo^time_step)
274  for(Index newJOI=0; newJOI < GetPU()->GetNrJointObservations(); newJOI++)
275  {
276  if(DEBUG_QBG_COMPREC){ cout << "looking for joint observationI="<<newJOI
277  <<" (=";
278  GetPU()->GetJointObservation(newJOI)->Print();
279  cout <<")"<<endl;
280  }
281 
282  JointActionHistoryTree* new_jaht;
283  Index new_jahI = 0;
284  JointObservationHistoryTree* new_joht;
285  Index new_johI = 0;
286  if(!last_t)
287  {
288  //jaoh' = jaoh + lastJA + newJO
289  new_jaht = GetPU()->GetJointActionHistoryTree(jahI)->
290  GetSuccessor(lastJAI);
291  new_jahI = new_jaht->GetIndex();
292  new_joht = GetPU()->GetJointObservationHistoryTree(johI)->
293  GetSuccessor(newJOI);
294  new_johI = new_joht->GetIndex();
295  }
296 
297  //calculate the new joint belief at this time-step
298  //resulting from lastJAI, newJOI...
299  //(this is the true prob. dist over states for the actions and obser-
300  //vations as given by the history < (johI,jahI), lastJA, newJOI > )
301  JointBelief newJB=JB;
302  double Po_ba = newJB.Update(*GetPU()->GetReferred(), lastJAI, newJOI);
303 
304  if(DEBUG_QBG_COMPREC){
305  cout << "new belief newJB=";
306  newJB.Print();
307  cout << endl;
308  }
309 
310  bg_time_step.SetProbability(newJOI, Po_ba);
311 
312  //for all joint actions newJA
313  for(Index newJAI=0; newJAI < GetPU()->GetNrJointActions(); newJAI++)
314  {
315  //calculate R(joah',newJA) - expected immediate reward for time_step
316  double exp_imm_R = 0.0;
317  for(Index sI=0; sI < GetPU()->GetNrStates(); sI++)
318  exp_imm_R += newJB[sI] * GetPU()->GetReward(sI, newJAI);
319 
320  //calculate Q(jaoh', newJA) = R(joah',newJA) + exp. future R
321  // and the exp. future R =
322  // ComputeRecursively(time_step+1, jah', joh', newJA)
323  double exp_fut_R = 0.0;
324  if(!last_t)
325  exp_fut_R = ComputeRecursivelyNoCache(time_step+1, new_jahI,
326  new_johI, newJB, newJAI);
327  double Q = exp_imm_R + discount * exp_fut_R;
328  //add the Q value to the BayesianGame
329  bg_time_step.SetUtility(newJOI, newJAI, Q);
330  }//end for newJAI
331  }//end for newJOI
332 
334  {
335  cout << "QBG::ComputeRecursively for..."<<endl<<
336  "time_step="<<
337  time_step << ", Index jahI="<< jahI <<", Index johI="<< johI
338  <<",const vector<double> JB, Index lastJAI="<<lastJAI
339  <<") called, with JB=";
340  JB.Print();
341  cout <<endl;
342  bg_time_step.Print();
343  }
344 
345  //solve this bayesian game
347  double v = bgs.Solve();
349  cout << "Expected reward under best policy for sub-BG="<<v<<endl<< endl;
350 
351  //return the expected reward under the best policy.
352  return( v );
353 }