MultiAgentDecisionProcess  Release 0.2.1
Perseus.cpp
Go to the documentation of this file.
1 
28 #include "Perseus.h"
29 #include "AlphaVectorPlanning.h"
30 #include "PerseusBackupType.h"
31 #include <fstream>
32 #include <float.h>
33 
34 using namespace std;
35 
36 //Default constructor
39  _m_pu(&pu),
40  _m_verbose(0),
41  _m_initializeWithImmediateReward(false),
42  _m_initializeWithZero(false),
43  _m_bestValue(-DBL_MAX),
44  _m_beliefsInitialized(false),
45  _m_identification("Perseus"),
46  _m_storeIntermediateValueFunctions(false),
47  _m_storeTimings(false),
48  _m_computeVectorForEachBelief(false),
49  _m_dryrun(false)
50 {
54 }
55 
56 //Destructor
58 {
59 }
60 
62 {
63 }
64 
66 {
67  int nrS=_m_pu->GetNrStates(),
68  nrA=_m_pu->GetNrJointActions();
69 
71 
73  throw(E("Perseus::GetInitialValueFunction() can only initialize in one way"));
74 
76  {
77  AlphaVector alpha(nrS);
78  for(int a=0;a<nrA;a++)
79  {
80  for(int s=0;s!=nrS;s++)
81  alpha.SetValue(_m_pu->GetReward(s,a),s);
82  V0.push_back(alpha);
83  }
84  }
85  else
86  {
87  AlphaVector alpha(nrS);
88  double initialValue;
89 
91  initialValue=0;
92  else
93  {
94  double minReward=DBL_MAX;
95  for(int a=0;a<nrA;a++)
96  for(int s=0;s<nrS;s++)
97  if(_m_pu->GetReward(s,a)<minReward)
98  minReward=_m_pu->GetReward(s,a);
99 
100  // check if the problem is finite or infinite horizon
101  if(_m_pu->GetHorizon()!=MAXHORIZON)
102  initialValue=minReward*_m_pu->GetHorizon();
103  else
104  initialValue=minReward/(1-_m_pu->GetDiscount());
105  }
106 
107  for(int s=0;s!=nrS;s++)
108  alpha.SetValue(initialValue,s);
109 
110  alpha.SetAction(INT_MAX); // set action to an illegal value
111  V0.push_back(alpha);
112  }
113 
114  return(V0);
115 }
116 
118 {
120 
121  unsigned int nrS=_m_pu->GetNrStates(),
122  nrA=_m_pu->GetNrJointActions();
123 
125  throw(E("Perseus::GetInitialQFunctions() can only initialize in one way"));
126 
128  {
129  AlphaVector alpha(nrS);
131  for(unsigned int a=0;a!=nrA;++a)
132  {
133  V0.clear();
134  for(unsigned int s=0;s!=nrS;++s)
135  alpha.SetValue(_m_pu->GetReward(s,a),s);
136  alpha.SetAction(a);
137  V0.push_back(alpha);
138  Q0.push_back(V0);
139  }
140  }
141  else
142  {
144  for(unsigned int a=0;a!=_m_pu->GetNrJointActions();++a)
145  {
146  for(unsigned int i=0;i!=V0.size();++i)
147  V0[i].SetAction(a);
148  Q0.push_back(V0);
149  }
150  }
151 
152  return(Q0);
153 }
154 
157 {
160  for(Index t=0;t!=GetPU()->GetHorizon();++t)
161  Q0.push_back(Qt0);
162  return(Q0);
163 }
164 
166 {
168  throw(E("Perseus::PrintMaxRewardInBeliefSet belief set not initialized"));
169 
170  vector<double> beliefValues=GetImmediateRewardBeliefSet();
171 
172  double maxBeliefValue=-DBL_MAX;
173  for(unsigned int i=0;i!=beliefValues.size();i++)
174  {
175  if(beliefValues[i]>maxBeliefValue)
176  maxBeliefValue=beliefValues[i];
177  }
178  if(GetVerbose() >= 0)
179  cout << GetIdentification() << ": max reward in beliefset is "
180  << maxBeliefValue << endl;
181 }
182 
183 int Perseus::SampleNotImprovedBeliefIndex(vector<bool> stillNeedToBeImproved,
184  int nrNotImproved) const
185 {
186  int beliefI,l,k;
187 
188  // sample a belief index from the number of not improved beliefs
189  beliefI=static_cast<int>(nrNotImproved*(rand() / (RAND_MAX + 1.0)));
190 
191  // figure out the index k of this belief in S
192  l=0;
193  k=-1;
194  for(unsigned int j=0;j!=stillNeedToBeImproved.size();j++)
195  {
196  if(stillNeedToBeImproved[j])
197  {
198  if(beliefI==l)
199  {
200  k=j;
201  break;
202  }
203  l++;
204  }
205  }
206 
207  if(k==-1)
208  {
209  PrintVectorCout(stillNeedToBeImproved);
210  cout << "nrNotImproved " << nrNotImproved << " beliefI "
211  << beliefI << endl;
212  throw(E("Perseus::SampleNotImprovedBeliefIndex did not sample valid k"));
213  }
214  return(k);
215 }
216 
217 bool Perseus::CheckConvergence(const vector<double> &VB,
218  const vector<double> &VBnew,
219  int iter) const
220 {
221  bool converged;
222 
223  double maxDiff=0;
224  for(unsigned int i=0;i!=VB.size();i++) // for all beliefs,
225  if(abs(VB[i]-VBnew[i])>maxDiff) // get the difference in val
226  maxDiff=abs(VB[i]-VBnew[i]); // and store the maximum...
227 
228  if(GetVerbose() >= 1)
229  cout << GetIdentification() << ":CheckConvergence maxDiff is "
230  << maxDiff << endl;
232  converged=true;
233  else if(maxDiff<1e-4 &&
234  (static_cast<size_t>(iter) >
235  max(static_cast<size_t>(_m_minimumNumberOfIterations),
236  5*_m_pu->GetHorizon())))
237  converged=true;
238  else
239  {
240  if(iter>=max(_m_minimumNumberOfIterations,1000))
241  converged=true;
242  else
243  converged=false;
244  }
245 
246  return(converged);
247 }
248 
250 {
252 
254  {
255  int nrB=1000;
256  // set the random seed and sample beliefs
257  srand(42);
258  if(GetVerbose() >= 0)
259  cout << GetIdentification() << ": sampling " << nrB
260  << " beliefs"; cout.flush();
261  InitializeBeliefs(nrB,true);
262  if(GetVerbose() >= 0)
263  cout << "." << endl;
264  }
265 
266  // just a manual check to figure out if the belief set has potential
268 }
269 
271 {
272  if(_m_storeTimings)
273  {
274  stringstream ss;
275  ss << directories::MADPGetResultsDir("POMDP",GetPU())
276  << "/intermediate/" << GetIdentification() << "Timings_h"
277  << GetPU()->GetHorizon();
278  SaveTimers(ss.str());
279  }
280 #if 0 // reduce verbosity
282 #endif
283 }
284 
286 {
288  if(_m_storeTimings)
289  {
290  stringstream ss;
291  ss << directories::MADPGetResultsDir("POMDP",GetPU()) << "/"
292  << GetIdentification() << "Timings_h" << GetPU()->GetHorizon();
293  SaveTimers(ss.str());
294  if(GetVerbose() >= 1)
295  cout << "Saved timing results to " << ss.str() << endl;
296  }
297 }
298 
301 {
302  StartTimer(GetIdentification() + "BackupStage");
303 
304  return(BackProject(V));
305 }
306 
308 {
309  // release the memory of the back-projected vectors
310  for(unsigned int a=0;a!=GetPU()->GetNrJointActions();a++)
311  for(unsigned int o=0;o!=GetPU()->GetNrJointObservations();o++)
312  delete(Gao[a][o]);
313 
314  StopTimer(GetIdentification() + "BackupStage");
315 }
316 
318 {
319  stringstream ss;
320  switch(params.backup)
321  {
322  case POMDP:
323  ss << "POMDP";
324  break;
325  case BG:
326  ss << "BG" << params.bgBackupType;
327  break;
328  default:
329  ss << "PerseusBackupType " << params.backup << " is unknown";
330  throw(E(ss));
331  }
332  return(ss.str());
333 }
334 
336 {
337  QAVParameters qavParams;
339 
340  qavParams.backup=args.backup;
341  switch(args.backup)
342  {
343  case POMDP:
344  break;
345  case BG:
346  qavParams.bgBackupType=args.bgBackup;
347  break;
348  }
349 
350  return(qavParams);
351 }
352 
353 void Perseus::SetIdentification(string identification)
354 {
355  _m_identification=identification;
357 }
358 
359 void Perseus::SetResultsFilename(string filename)
360 {
361  _m_resultsFilename=filename;
363 }
364 
366 {
367  stringstream valueFunctionFilename;
368  valueFunctionFilename << _m_resultsFilename
369  << GetIdentification()
370  << "ValueFunction_h" << _m_pu->GetHorizon();
371  _m_valueFunctionFilename=valueFunctionFilename.str();
372 
373  if(GetVerbose() >= 1)
374  cout << "Set value function filename to " << _m_valueFunctionFilename
375  << endl;
376 }
377 
379 {
380  throw(E("Perseus::StoreValueFunction should be implemented by deriving class"));
381 }
382 
384 {
385  throw(E("Perseus::StoreValueFunction should be implemented by deriving class"));
386 }
387 
389 {
390  throw(E("Perseus::StoreValueFunction should be implemented by deriving class"));
391 }
392 
393