MultiAgentDecisionProcess  Release 0.2.1
AlphaVectorPOMDP.cpp
Go to the documentation of this file.
1 
28 #include "AlphaVectorPOMDP.h"
29 #include "BeliefValue.h"
30 #include <float.h>
31 #include <fstream>
33 #include "AlphaVector.h"
34 
35 using namespace std;
36 
37 //Default constructor
40 {
41 }
42 
43 //Destructor
45 {
46 }
47 
52  const GaoVectorSet &G) const
53 {
54  int nrA=GetPU()->GetNrJointActions(),
56  nrS=GetPU()->GetNrStates();
57  VectorSet *VS;
58  // stores the index of the vector that maximizes b's value
59  int maximizingVectorI;
60 
61  StartTimer("BeliefBackupPOMDP");
62 
63  VectorSet Gab(nrA,nrS);
64 
65  // compute the Gab vectors
66  for(GaoVectorSetIndex a=0;a!=nrA;a++)
67  {
68  // initialize each Gab vector with the immediate reward
69  for(int s=0;s!=nrS;s++)
70  Gab(a,s)=GetPU()->GetReward(s,a);
71 
72  for(GaoVectorSetIndex o=0;o!=nrO;o++)
73  {
74  VS=G[a][o];
75  maximizingVectorI=BeliefValue::GetMaximizingVectorIndex(b,*VS);
76 
77  // add discounted maximizing vector (3.16)
78  for(int s=0;s!=nrS;s++)
79  Gab(a,s)+=GetPU()->GetDiscount()*
80  (*VS)(maximizingVectorI,s);
81  }
82  }
83 
84  // find the maximizing Gab vector
86 
87  // create the vector for b
88  AlphaVector newVector(nrS);
89  newVector.SetAction(maxA);
90  for(int s=0;s!=nrS;s++)
91  newVector.SetValue(Gab(maxA,s),s);
92 
93  StopTimer("BeliefBackupPOMDP");
94 
95  return(newVector);
96 }
97 
99  Index a,
100  const GaoVectorSet &G) const
101 {
102  int nrO=GetPU()->GetNrJointObservations(),
103  nrS=GetPU()->GetNrStates();
104  VectorSet *VS;
105  // stores the index of the vector that maximizes b's value
106  int maximizingVectorI;
107 
108  StartTimer("BeliefBackupPOMDP");
109 
110  vector<double> Gab(nrS);
111 
112  // initialize Gab vector with the immediate reward
113  for(int s=0;s!=nrS;s++)
114  Gab[s]=GetPU()->GetReward(s,a);
115 
116  for(GaoVectorSetIndex o=0;o!=nrO;o++)
117  {
118  VS=G[a][o];
119  maximizingVectorI=BeliefValue::GetMaximizingVectorIndex(b,*VS);
120 
121  // add discounted maximizing vector (3.16)
122  for(int s=0;s!=nrS;s++)
123  Gab[s]+=GetPU()->GetDiscount()*
124  (*VS)(maximizingVectorI,s);
125  }
126 
127  // create the vector for b
128  AlphaVector newVector(nrS);
129  newVector.SetAction(a);
130  for(int s=0;s!=nrS;s++)
131  newVector.SetValue(Gab[s],s);
132 
133  StopTimer("BeliefBackupPOMDP");
134 
135  return(newVector);
136 }