MultiAgentDecisionProcess  Release 0.2.1
SimulationDecPOMDPDiscrete.cpp
Go to the documentation of this file.
1 
29 #include "JointPolicyDiscrete.h"
30 #include "AgentLocalObservations.h"
33 #include "AgentFullyObservable.h"
34 
36 #include "JointObservation.h"
37 #include "JointAction.h"
38 
39 using namespace std;
40 
43  int nrRuns, int seed) :
44  Simulation(nrRuns, seed),
45  _m_pu(&pu),
46  _m_saveIntermediateResults(false)
47 {
48  Initialize();
49 }
50 
53  const ArgumentHandlers::Arguments &args) :
54  Simulation(args.nrRuns, args.randomSeed),
55  _m_pu(&pu),
56  _m_saveIntermediateResults(false)
57 {
58  if(args.verbose >= 4)
59  SetVerbose(true);
60  Initialize();
61 }
62 
63 //Destructor
65 {
66 }
67 
69 {
71  {
72  // figure out until what horizon we should sample to get a
73  // maximum error smaller than 1-e6
74  double maxAbsReward=0;
75  for(Index s=0;s!=_m_pu->GetNrStates();++s)
76  for(Index ja=0;ja!=_m_pu->GetNrJointActions();++ja)
77  if(abs(_m_pu->GetReward(s,ja))>maxAbsReward)
78  maxAbsReward=abs(_m_pu->GetReward(s,ja));
79 
80  _m_horizon=lrint(ceil((log(1e-6/maxAbsReward)/
81  log(_m_pu->GetDiscount()))));
82  if(GetVerbose())
83  cout << "Set horizon to " << _m_horizon << " (g "
84  << _m_pu->GetDiscount() << " max|R| " << maxAbsReward
85  << ")" << endl;
86  }
87  else
89 
90  if(GetVerbose())
91  cout << "Simulation::RunSimulations horizon " << _m_horizon
92  << " nrRuns " << GetNrRuns() << " seed "
93  << GetRandomSeed() << endl;
94 
96  {
97  // Seed the random number generator
98  srand(GetRandomSeed());
99  }
100 }
101 
103 {
106 }
107 
110 {
112 #if 0
113  if(GetVerbose())
114  jp->Print();
115 
116 #endif
117  // Run the simulations
118  int i;
119  for(i=0;i<GetNrRuns();i++)
120  {
121  double res = RunSimulation(jp);
122  if(GetVerbose())
123  cout << "Run ended r="<<res<<endl;
124  result.AddReward(res);
125  }
126 
127  return(result);
128 }
129 
130 double
132 {
133  Index jaI,sI,joI;
134  double r,sumR=0;
135  Index johI = INITIAL_JOHI;
136 
138 
139  if(GetVerbose())
140  cout << "Simulation::RunSimulation " << endl
141  << "Simulation::RunSimulation set initial state to "
142  << sI << endl;
143 
144  for(unsigned int t=0;t<_m_horizon;t++)
145  {
146  jaI = jp->SampleJointAction(johI);
147 
148  Step(jaI, t, sI, joI, r, sumR);
149 
150  /* action taken at ts=0,...,hor-1 - therefore only observation
151  * histories at ts=0,...,hor-2 have successors.*/
152  if(t < _m_horizon-1)
153  johI = _m_pu->GetSuccessorJOHI(johI, joI);
154  }
155 
156  return(sumR);
157 }
158 
159 void SimulationDecPOMDPDiscrete::Step(Index jaI, unsigned int t, Index &sI,
160  Index &joI, double &r,
161  double &sumR) const
162 {
163  Index sI_suc=_m_pu->GetReferred()->SampleSuccessorState(sI,jaI);
164 
165  joI=_m_pu->GetReferred()->SampleJointObservation(jaI,sI_suc);
166  r = _m_pu->GetReferred()->GetReward(sI,jaI);
167 
168  // calc. the discounted reward
169  sumR+=r*pow(_m_pu->GetDiscount(),static_cast<double>(t));
170 
171  if(GetVerbose())
172  cout << "Simulation::RunSimulation ("
173  << sI << "," << jaI << "," << sI_suc << ") ("
174  << (_m_pu->GetReferred()->GetState(sI)->SoftPrintBrief())
175  << ","
176  << (_m_pu->GetJointAction(jaI))->SoftPrintBrief()
177  << ","
178  << (_m_pu->GetReferred()->GetState(sI_suc)->SoftPrintBrief())
179  << ") (p "
180  << _m_pu->GetReferred()->GetTransitionProbability(sI,jaI,sI_suc)
181  << ") jo " << joI << " "
182  << (_m_pu->GetJointObservation(joI))->SoftPrintBrief()
183  << " (p "
184  << _m_pu->GetReferred()->GetObservationProbability(jaI,sI_suc,joI)
185  << ") r " << r << " sumR " << sumR << endl;
186 
187  sI = sI_suc;
188 }
189 
191 GetAction(const vector<AgentLocalObservations*> &agents, Index i,
192  Index jaI, Index joI, double r, Index prevJoI, Index sI) const
193 {
194  if(joI==INT_MAX) //first stage: there is no joI
195  return(agents[i]->Act(INT_MAX));
196  else
197  {
198  vector<Index> oIs=_m_pu->JointToIndividualObservationIndices(joI);
199  return(agents[i]->Act(oIs[i]));
200  }
201 }
202 
204 GetAction(const vector<AgentSharedObservations*> &agents, Index i,
205  Index jaI, Index joI, double r, Index prevJoI, Index sI) const
206 {
207  return(agents[i]->Act(joI));
208 }
209 
211 GetAction(const vector<AgentDelayedSharedObservations*> &agents,
212  Index i, Index jaI, Index joI, double r, Index prevJoI,
213  Index sI) const
214 {
215  if(joI==INT_MAX) //first stage: there is no joI
216  return(agents[i]->Act(INT_MAX,prevJoI));
217  else
218  {
219  vector<Index> oIs=_m_pu->JointToIndividualObservationIndices(joI);
220  return(agents[i]->Act(oIs[i],prevJoI));
221  }
222 }
223 
225 GetAction(const vector<AgentFullyObservable*> &agents,
226  Index i, Index jaI, Index joI, double r, Index prevJoI,
227  Index sI) const
228 {
229  return(agents[i]->Act(sI,joI,r));
230 }