MultiAgentDecisionProcess  Release 0.2.1
IndividualBeliefJESP.cpp
Go to the documentation of this file.
1 
28 #include "IndividualBeliefJESP.h"
29 
30 #include <float.h>
31 #include <string>
32 //Necessary as header file contains a forward declaration:
34 #include "JointPolicyPureVector.h"
35 
36 using namespace std;
37 
38 #define IndividualBeliefJESP_doSanityCheckAfterEveryUpdate 0
39 
41  const PlanningUnitMADPDiscrete& pu)
42  :
43  Belief(0),
44  _m_pumadp(&pu),
45  _m_stage(stage),
46  _m_agentI(agentI)
47 {
48  //compute size of this belief:
49  size_t nrS = _m_pumadp->GetNrStates();
51  size_t nrJOH_others = 1;
52  for(Index j=0; j < _m_nrAgents; j++)
53  {
54  if(j == agentI)
55  continue;
56  //else:
57  _m_others.push_back(j);
58  size_t nrOH_j = _m_pumadp->GetNrObservationHistories(j, stage);
59  nrJOH_others *= nrOH_j;
60  _m_nrOH_others.push_back(nrOH_j);
61  }
62  size_t size = nrS * nrJOH_others;
63  _m_b = vector<double>(size,0.0);
64  _m_sizeVec.push_back(nrS);
65  _m_sizeVec.push_back(nrJOH_others);
67 
68  //we will use
69  // IndexTools::JointToIndividualIndicesStepSize(Index jointI,
70  // const vector<size_t> &step_size, size_t vec_size )
71  //and
72  // IndexTools::IndividualToJointIndicesStepSize (const std::vector< Index >
73  // &indices, const std::vector< size_t > &step_size)
74  //
75  // to do the conversions between JOHI_j <-> <OHI_1,...,OHI_nrA >
76  //
77  // so we cah the step_size
79 }
80 
81 //Destructor
83 {
84  delete [] _m_stepsizeJOHOH;
85  delete [] _m_stepsizeSJOH;
86 }
87 
90 {
91  if (this == &o) return *this; // Gracefully handle self assignment
92 
93  //TODO copy elements...
94  throw E("IndividualBeliefJESP assignment operator not implemented yet");
96 
97  return(*this);
98 }
99 
100 vector<Index>
102 {
103  //get <sI, JOHI_others>
104  vector<Index> v1 =
106  //get <individual observation history indices of others:
107  vector<Index> withinStageOHIndices_o =
109  v1.at(1), _m_stepsizeJOHOH, _m_nrAgents-1 );
110  //add the offset for the stage to the indices:
111  for(Index j=0; j < withinStageOHIndices_o.size(); j++)
112  withinStageOHIndices_o[j] += _m_pumadp->
113  GetFirstObservationHistoryIndex( _m_others[j], _m_stage);
114  return(withinStageOHIndices_o);
115 }
116 Index
118  const vector<Index>& oHist_others) const
119 {
120  //get indices without the offset for the stage:
121  vector<Index> withinStageOHIndices_o = vector<Index>(oHist_others.size());
122  for(Index j=0; j < withinStageOHIndices_o.size(); j++)
123  withinStageOHIndices_o[j] = oHist_others[j] - _m_pumadp->
124  GetFirstObservationHistoryIndex( _m_others[j], _m_stage);
125 
126  vector<Index> v;
127  v.push_back(sI);
128  v.push_back(
129  IndexTools::IndividualToJointIndicesStepSize(withinStageOHIndices_o,
130  _m_stepsizeJOHOH ) );
132  _m_stepsizeSJOH) );
133 }
134 
136  const IndividualBeliefJESP& b_prev,
137  Index lastAI, Index newOI, const JointPolicyPureVector* jpol)
138 {
139  //set all probs of this belief to 0
140  _m_b = vector<double>(_m_b.size(), 0.0);
141 
142  double Po_ba = 0.0; // P(o|b,a) with o=newJO
143  vector<double> newJB_unnorm;
144 
145  size_t nrJO_others = 1;
146  vector<Index> otherAgentIndices;
147  vector<size_t> nrO_others;
148  for(Index j=0; j < _m_pumadp->GetNrAgents(); j++)
149  {
150  if(j == _m_agentI)
151  continue;
152  //else
153  Index nrO_j = _m_pumadp->GetNrObservations(j);
154  nrJO_others *= nrO_j;
155  nrO_others.push_back(nrO_j);
156  otherAgentIndices.push_back(j);
157  }
158 
159  for(Index prev_eI=0; prev_eI < b_prev.Size(); prev_eI++)
160  {
161  Index prev_sI = b_prev.GetStateIndex(prev_eI);
162  vector<Index> prev_oHist_others = b_prev.
164  vector<Index> actions(_m_nrAgents);
165  actions.at(_m_agentI) = lastAI;
166  for(Index j=0; j < otherAgentIndices.size(); j++)
167  {
168  Index ag_j = otherAgentIndices[j];
169  Index prev_oHist_j = prev_oHist_others[j];//not ag_j!!!
170  Index act_j = jpol->GetActionIndex(ag_j, prev_oHist_j);
171  actions.at(ag_j) = act_j;
172  }
174 
175  for(Index next_sI=0; next_sI < _m_pumadp->GetNrStates(); next_sI++)
176  {
177  //note we do not loop over all possible next_eI, because *a lot*
178  //of transitions will be 0 ( if next_oHistJ != (prev_oHistJ, oJ) )
179  //
180  //rather we now loop over all possible oJ (observations of others)
181  double Ps_as = _m_pumadp->
182  GetTransitionProbability(prev_sI, jaI, next_sI);
183 
184  for(Index JO_o=0; JO_o < nrJO_others; JO_o++)
185  {
186  vector<Index> oIs(_m_nrAgents);
187  oIs.at(_m_agentI) = newOI; // `our' observation is fixed
188  vector<Index> oIs_others = IndexTools::JointToIndividualIndices(
189  JO_o, nrO_others);
190  for(Index j=0; j < otherAgentIndices.size(); j++)
191  oIs.at( otherAgentIndices.at(j) ) = oIs_others.at(j);
192 
194  //compute P(joI | jaI,s')
195  double Po_as = _m_pumadp->GetObservationProbability(
196  jaI, next_sI, joI);
197 
198  //prob of next_eI = <next_sI, (oHist_others, oIs_others)>
199  //AND prev_sI can now be computed.
200  //
201  //first, however, lets find the index, next_eI, for
202  // <next_sI, (oHist_others, oIs_others)>
203 
204  // first find the next_oHist_others indices.
205  vector<Index> next_oHist_others;
206  for(Index j=0; j < otherAgentIndices.size(); j++)
207  {
208  Index next_oHist_j = _m_pumadp->GetSuccessorOHI(j,
209  prev_oHist_others[j], oIs_others[j] );
210  next_oHist_others.push_back(next_oHist_j);
211  }
212  Index next_eI = GetAugmentedStateIndex(next_sI,
213  next_oHist_others);
214 
215  //p += P(oi | ai, <s',oH'>) * P(<s',oH'>|<s,oH>,ai) * b(<s,oH>)
216  // = P(oi, <s',oH'>|<s,oH'>,ai) * b(<s,oH>)
217  // = P(s', jo | s, ja) b(<s,oH>) //ja=<ai, aj>,aj=pol(oHj)
218  // = P(jo|ja,s')*P(s'|s,ja)
219  double Pso_sa = Po_as * Ps_as * b_prev.Get(prev_eI);
220  _m_b.at(next_eI) += Pso_sa;
221  Po_ba += Pso_sa; //running sum of P(oi|b,ai)
222  }
223  }
224  }
225  for(Index eI=0; eI < this->Size(); eI++)
226  _m_b.at(eI) = _m_b.at(eI) / Po_ba;
227 
228  return(Po_ba);
229 
230 }
232 {
233  stringstream ss;
234  for(Index eI=0; eI < Size(); eI++)
235  {
236  ss << "eI="<<eI<<",[sI="<<GetStateIndex(eI) << ", " <<
238  Get(eI) << endl;
239  }
240  return (ss.str());
241 }