MultiAgentDecisionProcess  Release 0.2.1
AlphaVectorBG.cpp
Go to the documentation of this file.
1 
28 #include "AlphaVectorBG.h"
29 #include <float.h>
30 #include <sys/times.h>
31 #include "JointPolicyPureVector.h"
35 #include "BeliefValue.h"
37 #include "AlphaVector.h"
39 
40 using namespace std;
41 
42 #define DEBUG_AlphaVectorBG_BeliefBackup 0
43 #define DEBUG_AlphaVectorBG_CheckBGIP_SolverExhaustive 0
44 
45 //Default constructor
48 {
49  _m_bgip =
51  pu.GetReferred()->GetNrActions(),
53 }
54 
55 //Destructor
57 {
58  delete _m_bgip;
59 }
60 
63  Index a,
64  const GaoVectorSet &G,
66  BGBackupType type) const
67 {
68 #if DEBUG_AlphaVectorBG_BeliefBackup
69  tms timeStruct;
70  clock_t ticks_before, ticks_after;
71  ticks_before = times(&timeStruct);
72 #endif
73 
74  StartTimer("BeliefBackupBG");
75 
76  AlphaVector alpha(b.Size());
77  switch(type)
78  {
80  alpha=BeliefBackupExhaustiveOnlyKeepMax(b,a,G,V);
81  break;
83  alpha=BeliefBackupExhaustiveStoreAll(b,a,G,V);
84  break;
88  alpha=BeliefBackupBGIP_Solver(b,a,G,V,type);
89  break;
90  }
91 
92  StopTimer("BeliefBackupBG");
93 
94 #if DEBUG_AlphaVectorBG_BeliefBackup
95  ticks_after = times(&timeStruct);
96  cout << "AlphaVectorBG::BeliefBackup backuptype " << type
97  << " done in "
98  << ticks_after - ticks_before << " clock ticks, "
99  << static_cast<double>((ticks_after - ticks_before))
100  / sysconf(_SC_CLK_TCK)
101  << "s" << endl;
102 #endif
103 
104 #if DEBUG_AlphaVectorBG_CheckBGIP_SolverExhaustive
105  if(type==BGIP_SOLVER_EXHAUSTIVE)
106  {
108  if(!alphaOK.Equal(alpha))
109  abort();
110  }
111 #endif
112 
113  return(alpha);
114 }
115 
116 vector<vector<bool> >
118 {
119  vector<vector<bool> > mask;
120  size_t nrInV=V.size();
121  for(unsigned int a1=0;a1!=GetPU()->GetNrJointActions();++a1)
122  {
123  vector<bool> maskA(nrInV,false);
124  for(unsigned int i=0;i!=nrInV;++i)
125  if(V[i].GetAction()==a1)
126  maskA[i]=true;
127 
128  mask.push_back(maskA);
129  }
130 
131  return(mask);
132 }
133 
136  Index a,
137  const GaoVectorSet &G,
139  BGBackupType type) const
140 {
141  // Equation numbers refer to PWLC_Dec-POMDPs_b.ps of Nov 7
142 
143  unsigned int nrA=GetPU()->GetNrJointActions(),
144  nrO=GetPU()->GetNrJointObservations(),
145  nrS=GetPU()->GetNrStates();
146  double gamma=GetPU()->GetDiscount();
147  double value;
148 
149  // the mask selects which vectors to consider:
150  // mask[jaI][vI] is true <-> vector vI specifies action jaI
151  vector<vector<bool> > mask=GetMask(V);
152 
153  boost::numeric::ublas::matrix<int> bestG_oa1(nrO,nrA);
154 
155  for(unsigned int o=0;o!=nrO;o++)
156  for(unsigned int a1=0;a1!=nrA;++a1)
157  {
158  bestG_oa1(o,a1)=
160  mask[a1],value);
161 
162  if(bestG_oa1(o,a1)==-1)
163  abort();
164 
165  _m_bgip->SetUtility(o,a1,value);
166  _m_bgip->SetProbability(o,1.0/nrO);
167  }
168 
170  switch(type)
171  {
173  BGIP_Solver=new BGIP_SolverBruteForceSearch<JointPolicyPureVector>(*_m_bgip);
174  break;
177  break;
180  break;
181  default:
182  throw(E("AlphaVectorBG::BeliefBackupBGIP_Solver type not supported"));
183  }
184 
185  BGIP_Solver->Solve();
187 
188  delete BGIP_Solver;
189 
190  vector<double> best(nrS);
191  Index a1;
192  for(unsigned int s=0;s!=nrS;++s)
193  {
194  best[s]=0;
195  for(unsigned int o=0;o!=nrO;o++)
196  {
197  a1=jpol.GetJointActionIndex(o);
198  if(bestG_oa1(o,a1)!=-1)
199  best[s]+=(*G[a][o])(bestG_oa1(o,a1),s);
200  }
201  }
202 
203  double x;
204  // create the vector for b
205  AlphaVector newVector(nrS);
206  newVector.SetAction(a);
207  newVector.SetBetaI(jpol.GetIndex());
208  for(unsigned int s=0;s!=nrS;s++)
209  {
210  // (19)
211  x=GetPU()->GetReward(s,a)+gamma*best[s];
212  newVector.SetValue(x,s);
213  }
214 
215  return(newVector);
216 }
217 
220  Index a,
221  const GaoVectorSet &G,
222  const
224  const
225 {
226  // Equation numbers refer to PWLC_Dec-POMDPs_b.ps of Nov 7
227 
228  unsigned int nrA=GetPU()->GetNrJointActions(),
229  nrO=GetPU()->GetNrJointObservations(),
230  nrS=GetPU()->GetNrStates();
231  double gamma=GetPU()->GetDiscount();
232 
233  // the mask selects which vectors to consider:
234  // mask[jaI][vI] is true <-> vector vI specifies action jaI
235  vector<vector<bool> > mask=GetMask(V);
236 
237  // given a particular \beta
238  // for all o, g*_bao\beta = arg max_g^va'_ao \sum_s g^va'_ao(s) * b(s)
239  //
240  // i.e., we select the g^va'_ao from a set consistent with \beta
241  // (which means that \beta(o) = a') that maximizes the current belief.
242  //
243  // In this code, however, we do something slightly different namely:
244  //
245  // for all o, forall a'
246  // g*_baoa' = arg max_g^va'_ao \sum_s g^va'_ao(s) * b(s)
247  //
248  // I.e., we select the maximizing vector for each possible a'.
249  // we store this in bestG_oa1[o][a'].
250  boost::numeric::ublas::matrix<int> bestG_oa1(nrO,nrA);
251 
252  // (14)
253  for(unsigned int o=0;o!=nrO;o++)
254  for(unsigned int a1=0;a1!=nrA;++a1)
255  bestG_oa1(o,a1)=BeliefValue::GetMaximizingVectorIndex(b,*G[a][o],
256  mask[a1]);
257 
258  // now we create a jpol for the induced Bayesian game
259  // (i.e. \beta mentioned above, is the policy for a Bayesian game)
260  // and use that to combine the bestG_oa1 to g_a-vectors:
261  // g_ba\beta = \sum_o bestG_oa1[o][ \beta(o) ]
263  Index a1;
264  bool round=false;
265 
266  vector<double> current(nrS),best(nrS);
267  double v,bestValue=-DBL_MAX;
268  int betaMaxI=-1;
269  int k=0;
270  // (16)
271  while (!round) // i.e. forall \beta
272  {
273  // first create the current g_baBeta-vector:
274  // current g_ba\beta = \sum_o bestG_oa1[o][ \beta(o) ]
275  for(unsigned int s=0;s!=nrS;++s)
276  {
277  current[s]=0;
278  for(unsigned int o=0;o!=nrO;o++)
279  {
280  a1=jpol.GetJointActionIndex(o);
281  /* following code implements:
282  * VectorSet V=*G[a][o];
283  * int i=bestG_oa1(o,a1);
284  * current[s]+=V(i,s); // +=V[i][s]; */
285  if(bestG_oa1(o,a1)!=-1)
286  current[s]+=(*G[a][o])(bestG_oa1(o,a1),s);
287  }
288  }
289  // check if it is better...
290  v=b.InnerProduct(current);
291  if(v>bestValue)
292  {
293  bestValue=v;
294  best=current;
295  betaMaxI=k;
296  }
297  round = ++(jpol);
298  k++;
299  }
300 
301  double x;
302  // create the vector for b
303  AlphaVector newVector(nrS);
304  newVector.SetAction(a);
305  newVector.SetBetaI(betaMaxI);
306  for(unsigned int s=0;s!=nrS;s++)
307  {
308  // (19)
309  x=GetPU()->GetReward(s,a)+gamma*best[s];
310  newVector.SetValue(x,s);
311  }
312 
313  return(newVector);
314 }
315 
318  Index a,
319  const GaoVectorSet &G,
320  const
322  const
323 {
324  // Equation numbers refer to PWLC_Dec-POMDPs_b.ps of Nov 7
325 
326  unsigned int nrA=GetPU()->GetNrJointActions(),
327  nrO=GetPU()->GetNrJointObservations(),
328  nrS=GetPU()->GetNrStates();
329  double gamma=GetPU()->GetDiscount();
330 
331  // the mask selects which vectors to consider:
332  // mask[jaI][vI] is true <-> vector vI specifies action jaI
333  vector<vector<bool> > mask=GetMask(V);
334 
335  // given a particular \beta
336  // for all o, g*_bao\beta = arg max_g^va'_ao \sum_s g^va'_ao(s) * b(s)
337  //
338  // i.e., we select the g^va'_ao from a set consistent with \beta
339  // (which means that \beta(o) = a') that maximizes the current belief.
340  //
341  // In this code, however, we do something slightly different namely:
342  //
343  // for all o, forall a'
344  // g*_baoa' = arg max_g^va'_ao \sum_s g^va'_ao(s) * b(s)
345  //
346  // I.e., we select the maximizing vector for each possible a'.
347  // we store this in bestG_oa1[o][a'].
348  boost::numeric::ublas::matrix<int> bestG_oa1(nrO,nrA);
349 
350  // (14)
351  for(unsigned int o=0;o!=nrO;o++)
352  for(unsigned int a1=0;a1!=nrA;++a1)
353  bestG_oa1(o,a1)=BeliefValue::GetMaximizingVectorIndex(b,*G[a][o],
354  mask[a1]);
355 
356  // now we create a jpol for the induced Bayesian game
357  // (i.e. \beta mentioned above, is the policy for a Bayesian game)
358  // and use that to combine the bestG_oa1 to g_a-vectors:
359  // g_ba\beta = \sum_o bestG_oa1[o][ \beta(o) ]
361  Index a1;
362  bool round=false;
363 
364  VectorSet g_baBeta(_m_bgip->GetNrJointPolicies(),nrS);
365  g_baBeta.clear();
366  int k=-1;
367 
368  // (16)
369  while (!round) // i.e. forall \beta
370  {
371  k++;
372  for(unsigned int o=0;o!=nrO;o++)
373  for(unsigned int s=0;s!=nrS;++s)
374  {
375  a1=jpol.GetJointActionIndex(o);
376  /* following code implements:
377  * VectorSet V=*G[a][o];
378  * int i=bestG_oa1(o,a1);
379  * g_baBeta(k,s)+=V(i,s); // +=V[i][s]; */
380  if(bestG_oa1(o,a1)!=-1)
381  g_baBeta(k,s)+=(*G[a][o])(bestG_oa1(o,a1),s);
382  }
383  round = ++(jpol);
384  }
385 
386  int betaMaxI=BeliefValue::GetMaximizingVectorIndex(b,g_baBeta);
387 
388  double x;
389  // create the aplha-vector for b (i.e. add immediate reward)
390  AlphaVector newVector(nrS);
391  newVector.SetAction(a);
392  newVector.SetBetaI(betaMaxI);
393  for(unsigned int s=0;s!=nrS;s++)
394  {
395  // (19)
396  x=GetPU()->GetReward(s,a)+gamma*g_baBeta(betaMaxI,s);
397  newVector.SetValue(x,s);
398  }
399 
400  return(newVector);
401 }
402 
404 {
405  string str;
406  if(bgBackupType==-1)
407  return("POMDP");
408 
409  switch(bgBackupType)
410  {
412  str="EXHAUSTIVE_ONLYKEEPMAX";
413  break;
414  case EXHAUSTIVE_STOREALL:
415  str="EXHAUSTIVE_STOREALL";
416  break;
418  str="BGIP_SOLVER_EXHAUSTIVE";
419  break;
421  str="BGIP_SOLVER_ALTERNATINGMAXIMIZATION";
422  break;
424  str="BGIP_SOLVER_ALTERNATINGMAXIMIZATION_100STARTS";
425  break;
426  }
427  return(str);
428 }