MultiAgentDecisionProcess  Release 0.2.1
GMAA_kGMAA.cpp
Go to the documentation of this file.
1 
28 #include "GMAA_kGMAA.h"
29 #include "JPPVValuePair.h"
30 #include "PartialJPDPValuePair.h"
37 
38 using namespace std;
39 
43  size_t horizon,
45  size_t nrPoliciesToProcess) :
47  _m_newBGIP_Solver(bgsc)
48 {
49  _m_nrPoliciesToProcess=nrPoliciesToProcess;
50 }
51 
52 //This function will construct and sove a BG for the next timestep ts
55  PartialPolicyPoolInterface* poolOfNextPolicies
56  )
57 {
58  PartialJointPolicyDiscretePure* jpolPrevTs = ppi->GetJPol();//jpol^ts-1
59  size_t ts = jpolPrevTs->GetDepth(); // = depth = ts(jpolPrevTs) + 1
60  bool is_last_ts = (ts == GetHorizon() - 1);
61 
62  vector<Index> firstOHtsI(GetNrAgents());
63  for(Index agI=0; agI < GetNrAgents(); agI++)
64  firstOHtsI.at(agI) = GetFirstObservationHistoryIndex(agI, ts);
65  // Construct the bayesian game for this timestep -
67  this,
69  jpolPrevTs
70  );
71 
72  _m_bgCounter++;
73  if(_m_bgBaseFilename!="")
74  {
75  stringstream ss;
77  BayesianGameIdenticalPayoff::Save(*bg_ts,ss.str());
78  }
79 
80  double prevPastReward = jpolPrevTs->GetPastReward();
81  const vector<size_t>& nrOHts = bg_ts->GetNrTypes();
82  //size_t nrJOHts = bg_ts->GetNrJointTypes();
83 
84  //The set of Indicies of the policies added to poolOfNextPolicies
85  //(to avoid adding duplicates).
86  set<Index> poolOfNextPoliciesIndices;
87 
88  //the policy for the Bayesian game
91 
92  stringstream ss;
93  ss << "GMAA::kGMAA_ts" << ts;
94  StartTimer(ss.str());
95 
96  //solve the Bayesian game
97  BayesianGameIdenticalPayoffSolver<JointPolicyPureVector> * bgips = (*_m_newBGIP_Solver)(*bg_ts);
98  //TODO bgips->SetAnyTimeResults(true, FILES...);
99  bgips->Solve();
100  BGIPSolution & solution = bgips->GetSolution();
101 
102  //for each solution in BGIPSolution
103  bg_ts->ComputeAllImmediateRewards();
104  for(Index solI=0; solI < _m_nrPoliciesToProcess; solI++)
105  {
106  if(solution.IsEmptyJPPV())
107  {
108  cerr << "Warning, BGIP_Solver only returned "<<solI<<
109  " usable joint policies"<<endl;
110  break;
111  }
112  JPPVValuePair * jpvp = solution.GetNextSolutionJPPV();
113  JointPolicyPureVector* bgpol = jpvp->GetJPPV();
114  double val = jpvp->GetValue();
115 
116  solution.PopNextSolutionJPPV();
118  ConstructExtendedJointPolicy(*jpolPrevTs,
119  *bgpol, nrOHts, firstOHtsI);
120  //compute expected immediate reward for this stage
121  double immR = bg_ts->ComputeDiscountedImmediateRewardForJPol(bgpol);
122  double newPastreward = prevPastReward + immR;
123  jpolTs->SetPastReward(newPastreward);
124  //push this policy and value on the priority queue
127  if(is_last_ts)
128  poolOfNextPolicies->Insert( NewPPI(jpolTs,newPastreward) );
129  else
130  poolOfNextPolicies->Insert( NewPPI(jpolTs,
131  val + prevPastReward) );
132  delete jpvp;
133  }
134  //empty the imm reward cache
135  bg_ts->ClearAllImmediateRewards();
136  delete bg_ts;
137  delete bgips;
138  StopTimer(ss.str());
139  //if we created a BG for the last time step t=h-1 - we have a lowerbound
140  return(is_last_ts);
141 }
142 
144  poolOfNextPolicies, bool are_LBs, double bestLB)
145 {
146  SelectKBestPoliciesToProcessFurther(poolOfNextPolicies, are_LBs,
147  bestLB, _m_nrPoliciesToProcess);
148 }
149 
152 {return (new PolicyPoolPartialJPolValPair);}
153 
156 {
157  //return (new JPPVValuePair(jp,v));
159  return (ppi);
160 }
161 
162 
164 {
165  return new PartialJointPolicyPureVector(*this, OHIST_INDEX, 0.0);
166 }
167 
168