MultiAgentDecisionProcess  Release 0.2.1
AlphaVectorPlanning.cpp
Go to the documentation of this file.
1 
28 #include "AlphaVectorPlanning.h"
29 #include <float.h>
30 #include <fstream>
31 #include <sys/times.h>
32 
33 #include <boost/numeric/ublas/matrix.hpp>
34 #include <boost/numeric/ublas/matrix_proxy.hpp>
35 #include <boost/numeric/ublas/io.hpp>
36 
37 #include "AlphaVector.h"
40 #include "JointBeliefSparse.h"
41 #include "JointObservation.h"
42 #include "JointAction.h"
43 #include "State.h"
44 #include "argumentHandlers.h"
45 #include "QMDP.h"
46 
47 #define DEBUG_AlphaVectorPlanning_BeliefSampling 0
48 #define DEBUG_AlphaVectorPlanning_BackProject 0
49 #define DEBUG_AlphaVectorPlanning_BackProjectFullPrintout 0
50 #define DEBUG_AlphaVectorPlanning_BackProjectFullSanityCheck 0
51 #define DEBUG_AlphaVectorPlanning_Prune 0
52 #define DEBUG_AlphaVectorPlanning_CrossSum 0
53 #define DEBUG_AlphaVectorPlanning_ValueFunctionToQ 0
54 #define DEBUG_AlphaVectorPlanning_ImportValueFunction 0
55 
56 #define AlphaVectorPlanning_CheckForDuplicates 1
57 #define AlphaVectorPlanning_UseUBLASinBackProject 1
58 #define AlphaVectorPlanning_VerifyUBLASinBackProject 0
59 #define AlphaVectorPlanning_UseFastSparseBackup 1
60 
61 using namespace std;
62 
65  _m_pu(&pu),
66  _m_TsForBackup(0),
67  _m_OsForBackup(0),
68  _m_TsOsForBackup(0)
69 {
71  const TransitionModelMapping *tm;
73 
74  if((tms=dynamic_cast<const TransitionModelMappingSparse *>(td)))
75  _m_useSparse=true;
76  else if((tm=dynamic_cast<const TransitionModelMapping *>(td)))
77  _m_useSparse=false;
78  else
79  throw(E("AlphaVectorPlanning::Ctor() TransitionModelDiscretePtr not handled"));
80 
81  _m_initialized=false;
82 }
83 
85 {
86  DeInitialize();
87 }
88 
89 
91 {
92  if(_m_initialized)
93  DeInitialize();
94 
97 
98  if(_m_useSparse)
99  {
100  const TransitionModelMappingSparse *tms;
101  tms=dynamic_cast<const TransitionModelMappingSparse *>(td);
103  oms=dynamic_cast<const ObservationModelMappingSparse *>(od);
104 
105 #if AlphaVectorPlanning_UseFastSparseBackup
106  for(unsigned int a=0;a!=GetPU()->GetNrJointActions();++a)
107  {
108  _m_Ts.push_back(tms->GetMatrixPtr(a));
109  _m_Os.push_back(oms->GetMatrixPtr(a));
110 
111  _m_TsForBackup.push_back(vector<SparseVector *>());
112  for(unsigned int s=0;s!=GetPU()->GetNrStates();s++)
113  {
114  SparseVector *temp=new SparseVector(GetPU()->GetNrStates());
115  for(unsigned int s1=0;s1!=GetPU()->GetNrStates();s1++)
116  {
117  if((*_m_Ts[a])(s,s1)!=0)
118  (*temp)(s1)=(*_m_Ts[a])(s,s1);
119  }
120  _m_TsForBackup.at(a).push_back(temp);
121  }
122 
123  _m_OsForBackup.push_back(vector<SparseVector *>());
124  for(unsigned o=0;o!=GetPU()->GetNrJointObservations();++o)
125  {
126  SparseVector *temp=new SparseVector(GetPU()->GetNrStates());
127  for(unsigned int s1=0;s1!=GetPU()->GetNrStates();s1++)
128  {
129  if((*_m_Os[a])(s1,o)!=0)
130  (*temp)(s1)=(*_m_Os[a])(s1,o);
131  }
132  _m_OsForBackup.at(a).push_back(temp);
133  }
134 
135  _m_TsOsForBackup.push_back(vector< vector<SparseVector *> >());
136  for(unsigned int s=0;s!=GetPU()->GetNrStates();s++)
137  {
138  _m_TsOsForBackup.at(a).push_back(vector<SparseVector *>());
139  for(unsigned o=0;o!=GetPU()->GetNrJointObservations();++o)
140  {
141  SparseVector *temp=
142  new SparseVector(element_prod(*_m_TsForBackup[a][s],
143  *_m_OsForBackup[a][o]));
144  _m_TsOsForBackup.at(a).at(s).push_back(temp);
145  }
146  }
147  }
148 #endif
149  }
150  else
151  {
152  const TransitionModelMapping *tm;
153  tm=dynamic_cast<const TransitionModelMapping *>(td);
154  const ObservationModelMapping *om;
155  om=dynamic_cast<const ObservationModelMapping *>(od);
156 
157  for(unsigned int a=0;a!=GetPU()->GetNrJointActions();++a)
158  {
159  _m_T.push_back(tm->GetMatrixPtr(a));
160  _m_O.push_back(om->GetMatrixPtr(a));
161  }
162  }
163  _m_initialized=true;
164 }
165 
167 {
168  _m_T.clear();
169  _m_O.clear();
170 
171  _m_Ts.clear();
172  _m_Os.clear();
173 
174  if(_m_TsForBackup.size())
175  {
176  for(unsigned i=0;i!=_m_TsForBackup.size();++i)
177  for(unsigned j=0;j!=_m_TsForBackup[i].size();++j)
178  delete _m_TsForBackup[i][j];
179  }
180  _m_TsForBackup.clear();
181 
182  if(_m_OsForBackup.size())
183  {
184  for(unsigned i=0;i!=_m_OsForBackup.size();++i)
185  for(unsigned j=0;j!=_m_OsForBackup[i].size();++j)
186  delete _m_OsForBackup[i][j];
187  }
188  _m_OsForBackup.clear();
189 
190  if(_m_TsOsForBackup.size())
191  {
192  for(unsigned i=0;i!=_m_TsOsForBackup.size();++i)
193  for(unsigned j=0;j!=_m_TsOsForBackup[i].size();++j)
194  for(unsigned k=0;k!=_m_TsOsForBackup[i][j].size();++k)
195  delete _m_TsOsForBackup[i][j][k];
196  }
197  _m_TsOsForBackup.clear();
198 }
199 
202 {
203  // convert the valuefunction (a set of AlphaVectors) to a
204  // VectorSet, basically removing the action information
205  int nrInV=v.size();
206  if(nrInV>0)
207  {
208  int nrS=(v[0].GetValues()).size();
209  VectorSet v1(nrInV,nrS);
210  for(int k=0;k!=nrInV;k++)
211  for(int s=0;s!=nrS;s++)
212  v1(k,s)=v[k].GetValue(s);
213 
214  if(_m_useSparse)
215  return(BackProjectSparse(v1));
216  else
217  return(BackProjectFull(v1));
218  }
219  else
220  {
221  // v is empty, so return empty G
222  GaoVectorSet G;
223  return(G);
224  }
225 }
226 
231 {
232  if(_m_useSparse)
233  return(BackProjectSparse(v));
234  else
235  return(BackProjectFull(v));
236 }
237 
242 {
243  unsigned int nrA=GetPU()->GetNrJointActions(),
244  nrO=GetPU()->GetNrJointObservations(),
245  nrS=GetPU()->GetNrStates(),
246  nrInV=v.size1();
247 
248  if(nrInV==0)
249  throw(E("AlphaVectorPlanning::BackProjectFull attempting to backproject empty value function"));
250 
251 #if DEBUG_AlphaVectorPlanning_BackProject
252  tms timeStruct;
253  clock_t ticks_before, ticks_after;
254  ticks_before = times(&timeStruct);
255 #endif
256 
257  StartTimer("BackProjectFull");
258 
259  GaoVectorSet G(boost::extents[nrA][nrO]);
260  VectorSet v1(nrInV,nrS);
261 #if AlphaVectorPlanning_UseUBLASinBackProject
262  VectorSet vv=v;
263 #endif
264  double x;
265 
266 #if AlphaVectorPlanning_CheckForDuplicates
267  vector<int> duplicates=GetDuplicateIndices(v);
268 #else
269  vector<int> duplicates(nrInV,-1);
270 #endif
271  int dup;
272 
273  using namespace boost::numeric::ublas;
274 
275  for(unsigned int a=0;a!=nrA;a++)
276  for(unsigned int o=0;o!=nrO;o++)
277  {
278 #if AlphaVectorPlanning_UseUBLASinBackProject
279  matrix_column<const ObservationModelMapping::Matrix> mO(*_m_O[a],o);
280 #endif
281  for(unsigned int k=0;k!=nrInV;k++)
282  {
283  if(duplicates[k]==-1)
284  {
285 #if AlphaVectorPlanning_UseUBLASinBackProject
286  const matrix_row<VectorSet> mV(vv,k);
287 #endif
288  for(unsigned int s=0;s!=nrS;s++)
289  {
290 #if AlphaVectorPlanning_UseUBLASinBackProject
291  matrix_row<const TransitionModelMapping::Matrix>
292  mT(*_m_T[a],s);
293  x=inner_prod(element_prod(mT,mO),mV);
294 #if AlphaVectorPlanning_VerifyUBLASinBackProject
295  double x1=0;
296  for(unsigned int s1=0;s1!=nrS;s1++)
297  x1+=(*_m_O[a])(s1,o)*(*_m_T[a])(s,s1)*v(k,s1);
298  if(abs(x-x1)>1e-14)
299  {
300  cerr << x << " " << x1 << " " << x-x1 << endl;
301  abort();
302  }
303 #endif
304 #else // AlphaVectorPlanning_UseUBLASinBackProject
305  x=0;
306  for(unsigned int s1=0;s1!=nrS;s1++)
307  x+=(*_m_O[a])(s1,o)*(*_m_T[a])(s,s1)*v(k,s1);
308 #endif
309  v1(k,s)=x;
310  }
311  }
312  else
313  {
314  dup=duplicates[k];
315  for(unsigned int s=0;s!=nrS;s++)
316  v1(k,s)=v1(dup,s);
317  }
318  }
319  G[a][o]=new VectorSet(v1);
320  }
321 
322  StopTimer("BackProjectFull");
323 
324 #if DEBUG_AlphaVectorPlanning_BackProjectFullPrintout
325  cout << "BackProjectFull of:" << endl;
326  for(unsigned int k=0;k!=nrInV;k++)
327  {
328  for(unsigned int s=0;s!=nrS;s++)
329  cout << v(k,s) << " ";
330  cout << endl;
331  }
332 
333  VectorSet *VS;
334  for(unsigned int a=0;a!=nrA;a++)
335  for(unsigned int o=0;o!=nrO;o++)
336  {
337  cout << "Gao a " << a << " o " << o << endl;
338 
339  VS=G[a][o];
340  for(unsigned int k=0;k!=VS->size1();k++)
341  {
342  for(unsigned int s=0;s!=VS->size2();s++)
343  cout << (*VS)(k,s) << " ";
344  cout << endl;
345  }
346  }
347 #endif
348 
349 #if DEBUG_AlphaVectorPlanning_BackProjectFullSanityCheck
350  double maxInV=-DBL_MAX;
351  for(unsigned int k=0;k!=nrInV;k++)
352  for(unsigned int s=0;s!=nrS;s++)
353  maxInV=max(maxInV,v(k,s));
354 
355  double maxInGao=-DBL_MAX;
356  for(unsigned int a=0;a!=nrA;a++)
357  for(unsigned int o=0;o!=nrO;o++)
358  for(unsigned int k=0;k!=nrInV;k++)
359  for(unsigned int s=0;s!=nrS;s++)
360  maxInGao=max(maxInGao,(*G[a][o])(k,s));
361 
362  if(maxInGao>maxInV)
363  {
364  cout << "Max value in V is " << maxInV << ", max in Gao is "
365  << maxInGao << endl;
366  abort();
367  }
368 #endif
369 
370 #if DEBUG_AlphaVectorPlanning_BackProject
371  ticks_after = times(&timeStruct);
372  cout << "AlphaVectorPlanning::BackProject done in "
373  << ticks_after - ticks_before << " clock ticks, "
374  << static_cast<double>((ticks_after - ticks_before))
375  / sysconf(_SC_CLK_TCK)
376  << "s" << endl;
377  // test results on 22-12-2006
378  // using dense Matrix is about 2 faster than calling the Get*Prob()
379  // using sparse Matrix is almost 3 times slower then dense Matrix
380 #endif
381  return(G);
382 }
383 
388 {
389  unsigned int nrA=GetPU()->GetNrJointActions(),
390  nrO=GetPU()->GetNrJointObservations(),
391  nrS=GetPU()->GetNrStates(),
392  nrInV=v.size1();
393 
394  if(nrInV==0)
395  throw(E("AlphaVectorPlanning::BackProjectSparse attempting to backproject empty value function"));
396 
397  StartTimer("BackProjectSparse");
398 
399  GaoVectorSet G(boost::extents[nrA][nrO]);
400  VectorSet v1(nrInV,nrS);
401  VectorSet vv=v;
402  double x;
403 
404 #if AlphaVectorPlanning_CheckForDuplicates
405  vector<int> duplicates=GetDuplicateIndices(v);
406 #else
407  vector<int> duplicates(nrInV,-1);
408 #endif
409  int dup;
410 
411  using namespace boost::numeric::ublas;
412 
413  for(unsigned int a=0;a!=nrA;a++)
414  for(unsigned int o=0;o!=nrO;o++)
415  {
416 #if !AlphaVectorPlanning_UseFastSparseBackup
417  matrix_column<const ObservationModelMappingSparse::SparseMatrix>
418  mO(*_m_Os[a],o);
419 #endif
420  for(unsigned int k=0;k!=nrInV;k++)
421  {
422  if(duplicates[k]==-1)
423  {
424  const matrix_row<VectorSet> mV(vv,k);
425 
426  for(unsigned int s=0;s!=nrS;s++)
427  {
428 #if AlphaVectorPlanning_UseFastSparseBackup
429  x=inner_prod(*_m_TsOsForBackup[a][s][o],mV);
430 #else
431  matrix_row<const TransitionModelMappingSparse::
432  SparseMatrix> mT(*_m_Ts[a],s);
433  x=inner_prod(element_prod(mT,mO),mV);
434 #endif
435  v1(k,s)=x;
436  }
437  }
438  else
439  {
440  dup=duplicates[k];
441  for(unsigned int s=0;s!=nrS;s++)
442  v1(k,s)=v1(dup,s);
443  }
444  }
445  G[a][o]=new VectorSet(v1);
446  }
447 
448  StopTimer("BackProjectSparse");
449 
450  return(G);
451 }
452 
454  ArgumentHandlers::Arguments args) const
455 {
456  StartTimer("SampleBeliefs");
457 
458  int h=_m_pu->GetHorizon(),
459  nrS=_m_pu->GetNrStates(),
460  nrA=_m_pu->GetNrJointActions(),
461  i,d=0,s;
462  Index a;
463  Index o;
464  Index s0=0,s1;
465  JointBeliefInterface *b0p, *b1p;
466  b0p = _m_pu->GetNewJointBeliefInterface(nrS);
467  b1p = _m_pu->GetNewJointBeliefInterface(nrS);
468  JointBeliefInterface& b0 = *b0p;
469  JointBeliefInterface& b1 = *b1p;
470  BeliefSet S(args.nrBeliefs);
471  bool foundEqualBelief,equal,addBelief;
472  int nrEqualFound=0;
473 
474  // we don't want to artificially reset the problem
475  if(args.resetAfter==0)
476  {
477  if(h==static_cast<int>(MAXHORIZON))
478  cout << "Warning: sampling beliefs for an infinite horizon "
479  << "without reset." << endl;
480  args.resetAfter=h;
481  }
482 
483  QMDP *qmdp=0;
485  {
486  qmdp=new QMDP(*GetPU());
487  qmdp->Compute();
488  }
489 
490  i=0;
491  // make sure we don't try to keep on sampling beliefs if there are
492  // not enough unique beliefs (<args.nrBeliefs)
493  while(i<args.nrBeliefs && nrEqualFound<(args.nrBeliefs*2))
494  {
495  // reset the problem if either we exceeded the horizon, or the
496  // user-supplied parameter (used in the infinite-horizon case)
497  if(d>h || d>args.resetAfter)
498  d=0;
499 
500  if(d==0)
501  {
503  b1.Set(* _m_pu->GetProblem()->GetISD());
504  }
505  else
506  {
507  if(args.useQMDPforSamplingBeliefs &&
508  (rand() / (RAND_MAX + 1.0)) > args.QMDPexploreProb)
509  {
510  double valMax=-DBL_MAX;
511  a=INT_MAX;
512  for(int aQMDP=0;aQMDP!=nrA;++aQMDP)
513  {
514  double qQMDP=qmdp->GetQ(b0,aQMDP);
515  if(qQMDP>valMax)
516  {
517  valMax=qQMDP;
518  a=aQMDP;
519  }
520  }
521  }
522  else
523  {
524  // sample an action uniformly at random
525  a=static_cast<int>(nrA*(rand() / (RAND_MAX + 1.0)));
526  }
527 
530 
531  b1=b0;
532  b1.Update(*_m_pu->GetReferred(),a,o);
533  }
534 
535  if(args.uniqueBeliefs)
536  {
537  foundEqualBelief=false;
538  // loop over all beliefs already in S
539  for(int j=0;j!=i;j++)
540  {
541  equal=true;
542  for(s=0;s!=nrS;s++)
543  {
544  // if one number differs we can move on to the next j
545  if(S[j]->Get(s)!=b1.Get(s))
546  {
547  equal=false;
548  break;
549  }
550  }
551  // if we found an equal belief we can stop
552  if(equal)
553  {
554  foundEqualBelief=true;
555  break;
556  }
557  }
558  if(foundEqualBelief)
559  {
560  nrEqualFound++;
561  addBelief=false;
562  }
563  else
564  addBelief=true;
565  }
566  else
567  addBelief=true;
568 
569  if(addBelief)
570  {
571  if(!b1.SanityCheck())
572  throw(E("AlphaVectorPlanning::BeliefSampling belief fails sanity check"));
573 
574  S[i]=new JointBeliefSparse(b1); // HMMM, always sparse?
575  i++;
576 
578  {
579  cout << "AlphaVectorPlanning::SampleBeliefs sampled belief nr "
580  << i << "/" << args.nrBeliefs << " (nrEqualFound "
581  << nrEqualFound << ")" << endl;
582  }
583  }
584 
585  d++;
586  s0=s1;
587  b0=b1;
588  }
589 
590  delete b0p;
591  delete b1p;
592  delete qmdp;
593 
594  StopTimer("SampleBeliefs");
595 
596  // we did not manage to sample args.nrBeliefs unique beliefs
597  if(i<args.nrBeliefs)
598  {
599  cout << "AlphaVectorPlanning::SampleBeliefs: warning, only "
600  << "managed to sample " << i << " unique beliefs instead of "
601  << args.nrBeliefs << endl;
602  BeliefSet S1(i);
603  for(int j=0;j!=i;j++)
604  S1[j]=S[j];
605  return(S1);
606  }
607  else
608  return(S);
609 }
610 
612  const VectorSet &B) const
613 {
614  int nrInA=A.size1(),
615  nrInB=B.size1(),
616  nrS=A.size1();
617 
618 #if DEBUG_AlphaVectorPlanning_CrossSum
619  cout << "AlphaVectorPlanning::CrossSum of " << nrInA
620  << " times " << nrInB << endl;
621 #endif
622 
623  VectorSet C(nrInA*nrInB,nrS);
624 
625  int k=-1;
626  for(int i=0;i!=nrInA;i++)
627  for(int j=0;j!=nrInB;j++)
628  {
629  k++;
630  for(int s=0;s!=nrS;s++)
631  C(k,s)=A(i,s)+B(j,s);
632  }
633 
634  return(C);
635 }
636 
639  Index a,
640  bool doIncPrune) const
641 {
642  int nrS=GetPU()->GetNrStates(),
643  nrO=GetPU()->GetNrJointObservations();
644  AlphaVector alpha(nrS);
645 
646 #if DEBUG_AlphaVectorPlanning_CrossSum
647  cout << "AlphaVectorPlanning::MonahanCrossSum for action " << a << endl;
648 #endif
649 
650  // Do the cross-sums, creates G_a of (3.25)
651  VectorSet Ga=*G[a][0];
652  for(GaoVectorSetIndex o=1;o!=nrO;o++)
653  {
654  if(doIncPrune)
655  {
656  VectorSet Ga2=CrossSum(Ga,*G[a][o]);
657  VectorSet Ga1=Prune(Ga2);
658  Ga.resize(Ga1.size1(),Ga1.size2());
659  Ga=Ga1;
660  }
661  else
662  {
663  VectorSet Ga1=CrossSum(Ga,*G[a][o]);
664  Ga.resize(Ga1.size1(),Ga1.size2());
665  Ga=Ga1;
666  }
667  }
668 
669  // Add the resulting vectors to V (HV_n of (3.25))
670  for(unsigned int k=0;k!=Ga.size1();k++)
671  {
672  alpha.SetAction(a);
673  for(int s=0;s!=nrS;s++)
674  alpha.SetValue(Ga(k,s),s);
675  V.push_back(alpha);
676  }
677 }
678 
682 {
684 }
685 
686 
690 {
691  int nrS=GetPU()->GetNrStates(),
692  nrA=GetPU()->GetNrJointActions(),
693  nrO=GetPU()->GetNrJointObservations(),
694  nrInV=V.size();
695  double gamma=GetPU()->GetDiscount();
696 
697  GaoVectorSet Gao=BackProject(V);
698 
699  GaoVectorSet G(boost::extents[nrA][nrO]);
700  VectorSet *vGao;
701  VectorSet v1(nrInV,nrS);
702 
703  // Create G^o_a vectors of (3.25) of PhD Matthijs
704  for(GaoVectorSetIndex a=0;a!=nrA;a++)
705  for(GaoVectorSetIndex o=0;o!=nrO;o++)
706  {
707  vGao=Gao[a][o];
708  for(int k=0;k!=nrInV;k++)
709  for(int s=0;s!=nrS;s++)
710  v1(k,s)=(GetPU()->GetReward(s,a))/nrO+gamma*(*vGao)(k,s);
711  G[a][o]=new VectorSet(v1);
712  }
713 
714  return(G);
715 }
716 
719 {
721  int nrS=GetPU()->GetNrStates();
722  bool dominated,valuesDominated;
723  vector<bool> dominatedVectors(V.size(),false);
724  int i;
725 
726 #if DEBUG_AlphaVectorPlanning_Prune
727  cout << "AlphaVectorPlanning::Prune " << V.size() << " vectors" << endl;
728 #endif
729 
730  vector<AlphaVector>::const_iterator it=V.begin();
731  vector<AlphaVector>::const_iterator it1;
732  i=0;
733  while(it!=V.end())
734  {
735  it1=V.begin();
736  dominated=false;
737  // check whether "it" is dominated by any it1
738  while(!dominatedVectors[i] && it1!=V.end() && !dominated)
739  {
740  valuesDominated=true;
741  for(int s=0;s!=nrS;s++)
742  if(it->GetValue(s) > it1->GetValue(s))
743  valuesDominated=false;
744 
745  if(valuesDominated && it1->GetAction()==it->GetAction() && it1!=it)
746  dominated=true;
747  else
748  dominated=false;
749 
750  it1++;
751  }
752  if(!dominated)
753  {
754  V1.push_back(*it);
755 #if DEBUG_AlphaVectorPlanning_Prune
756  cout << "AlphaVectorPlanning::Prune added vector " << i << endl;
757 #endif
758  }
759  else
760  dominatedVectors[i]=true;
761 
762  it++;
763  i++;
764  }
765 
766 #if DEBUG_AlphaVectorPlanning_Prune
767  cout << "AlphaVectorPlanning::Prune reduced " << V.size() << " to "
768  << V1.size() << endl;
769 #endif
770 
771  return(V1);
772 }
773 
774 VectorSet
776 {
777  int nrInV=V.size1(),nrS=V.size2();
778  bool dominated,valuesDominated;
779  vector<bool> dominatedVectors(nrInV,false);
780  int it,it1;
781  vector<int> vectorsToKeep;
782 
783 #if DEBUG_AlphaVectorPlanning_Prune
784  cout << "AlphaVectorPlanning::Prune " << nrInV << " vectors" << endl;
785 #endif
786 
787  for(it=0;it!=nrInV;it++)
788  {
789  it1=0;
790  dominated=false;
791  // check whether "it" is dominated by any it1
792  while(!dominatedVectors[it] && it1!=nrInV && !dominated)
793  {
794  valuesDominated=true;
795  for(int s=0;s!=nrS;s++)
796  if(V(it,s) > V(it1,s))
797  valuesDominated=false;
798 
799  if(valuesDominated && it1!=it)
800  dominated=true;
801  else
802  dominated=false;
803 
804  it1++;
805  }
806  if(!dominated)
807  {
808  vectorsToKeep.push_back(it);
809 #if DEBUG_AlphaVectorPlanning_Prune
810  cout << "AlphaVectorPlanning::Prune added vector " << it << endl;
811 #endif
812  }
813  else
814  dominatedVectors[it]=true;
815 
816  }
817 
818  int newNrInV=vectorsToKeep.size();
819  VectorSet V1(newNrInV,nrS);
820  for(int i=0;i!=newNrInV;i++)
821  for(int s=0;s!=nrS;s++)
822  V1(i,s)=V(vectorsToKeep[i],s);
823 
824 #if DEBUG_AlphaVectorPlanning_Prune
825  cout << "AlphaVectorPlanning::Prune reduced " << nrInV << " to "
826  << newNrInV << endl;
827 #endif
828 
829  return(V1);
830 }
831 
832 
833 void AlphaVectorPlanning::ExportValueFunction(const string & filename,
834  const QFunctionsDiscrete &Q)
835 {
837 }
838 
839 void
841 ExportValueFunction(const string & filename,
843 {
844  for(Index t=0;t!=Q.size();++t)
845  {
846  stringstream filenameT;
847  filenameT << filename << "_t" << t;
848  ExportValueFunction(filenameT.str(),Q[t]);
849  }
850 }
851 
853 AlphaVectorPlanning::ImportValueFunction(const string & filename, size_t nr,
854  size_t nrA, size_t nrS)
855 {
857  for(Index t=0;t!=nr;++t)
858  {
859  stringstream filenameT;
860  filenameT << filename << "_t" << t;
861  Q.push_back(ValueFunctionToQ(
862  ImportValueFunction(filenameT.str()),nrA,nrS));
863  }
864  return(Q);
865 }
866 
876 void
879 {
880  vector<double> values;
881 
882  ofstream fp(filename.c_str());
883  if(!fp)
884  {
885  cerr << "AlphaVectorPlanning::ExportValueFunction: failed to "
886  << "open file " << filename << endl;
887  }
888 
889  for(unsigned int i=0;i!=V.size();i++)
890  {
891  values=V[i].GetValues();
892  int nrS=values.size();
893 
894  fp << V[i].GetAction() << " " << V[i].GetBetaI() << endl;
895  for(int s=0;s!=nrS;s++)
896  {
897  fp << values[s];
898  if(s<(nrS-1))
899  fp << " ";
900  }
901  fp << endl << endl;
902  }
903 }
904 
908 {
910  const int bufsize=65536;
911  char buffer[bufsize];
912 
913  int lineState=0; /* lineState=0 -> read action
914  * lineState=1 -> read values
915  * lineState=2 -> empty line, skip */
916  int nrStates=-1;
917  bool first=true;
918  Index action=0;
919  int betaI=-1;
920  double value;
921  vector<double> values;
922  vector<int> actionBetaI;
923  long long int actionOrBetaI;
924 
925  ifstream fp(filename.c_str());
926  if(!fp)
927  {
928  cerr << "AlphaVectorPlanning::ImportValueFunction: failed to "
929  << "open file " << filename << endl;
930  }
931 
932  while(!fp.getline(buffer,bufsize).eof())
933  {
934  switch(lineState)
935  {
936  case 0:
937  // read action
938 // action=strtol(buffer,NULL,10);
939  actionBetaI.clear();
940  {
941  istringstream is(buffer);
942  while(is >> actionOrBetaI)
943  actionBetaI.push_back(actionOrBetaI);
944  }
945 
946  switch(actionBetaI.size())
947  {
948  case 1:
949  action=actionBetaI[0];
950  betaI=-1;
951  break;
952  case 2:
953  action=actionBetaI[0];
954  betaI=actionBetaI[1];
955  break;
956  default:
957  throw(E("AlphaVectorPlanning::ImportValueFunction parse error"));
958  }
959 
960  lineState++;
961  break;
962  case 1:
963  // read values
964  values.clear();
965 
966  {
967  istringstream is(buffer);
968  while(is >> value)
969  values.push_back(value);
970  }
971 
972  if(first)
973  {
974  nrStates=values.size();
975  first=false;
976  }
977 
978  // create new alpha vector and store it
979  {
980  AlphaVector alpha(nrStates);
981  alpha.SetAction(action);
982  alpha.SetValues(values);
983  alpha.SetBetaI(betaI);
984 
985 #if DEBUG_AlphaVectorPlanning_ImportValueFunction
986  cout << "AlphaVectorPlanning::ImportValueFunction "
987  << "added vector " << V.size() << " for action "
988  << action << " betaI " << betaI << endl;
989 #endif
990 
991  V.push_back(alpha);
992  }
993 
994  lineState++;
995  break;
996  case 2:
997  // do nothing, line is empty
998  lineState=0;
999  break;
1000  }
1001  }
1002 
1003  return(V);
1004 }
1005 
1009 {
1011 }
1012 
1016 {
1017  size_t nrA=pu->GetNrJointActions(),
1018  nrS=pu->GetNrStates();
1020  AlphaVector alpha(nrS);
1021 
1022  for(Index a=0;a<nrA;a++)
1023  {
1024  alpha.SetAction(a);
1025  for(Index s=0;s<nrS;s++)
1026  alpha.SetValue(pu->GetReward(s,a),s);
1027  V0.push_back(alpha);
1028  }
1029 
1030  return(V0);
1031 }
1032 
1035  QFunctionsDiscrete &Q)
1036 {
1038  for(QFDcit i=Q.begin();i!=Q.end();++i)
1039  for(VFPDcit j=i->begin();j!=i->end();++j)
1040  V.push_back(*j);
1041 
1042  return(V);
1043 }
1044 
1045 void AlphaVectorPlanning::ExportPOMDPFile(const string & filename) const
1046 {
1047  ExportPOMDPFile(filename,GetPU());
1048 }
1049 
1051 void AlphaVectorPlanning::ExportPOMDPFile(const string & filename,
1053  *pu)
1054 {
1055  int nrA=pu->GetNrJointActions(),
1056  nrO=pu->GetNrJointObservations(),
1057  nrS=pu->GetNrStates();
1058  ofstream fp(filename.c_str());
1059  if(!fp)
1060  {
1061  cerr << "AlphaVectorPOMDP::ExportPOMDPFile: failed to open file "
1062  << filename << endl;
1063  }
1064 
1065  fp << "discount: " << pu->GetDiscount() << endl;
1066  switch(pu->GetReferred()->GetRewardType())
1067  {
1068  case REWARD:
1069  fp << "values: reward" << endl;
1070  break;
1071  case COST:
1072  fp << "values: cost" << endl;
1073  }
1074 
1075  fp << "states:";
1076  for(int s=0;s<nrS;s++)
1077  fp << " " << pu->GetReferred()->GetState(s)->SoftPrintBrief();
1078  fp << endl;
1079 
1080  fp << "actions:";
1081  for(int a=0;a<nrA;a++)
1082  fp << " " << pu->GetReferred()->GetJointAction(a)->SoftPrintBrief();
1083  fp << endl;
1084 
1085  fp << "observations:";
1086  for(int o=0;o<nrO;o++)
1087  fp << " " << pu->GetReferred()->GetJointObservation(o)->
1088  SoftPrintBrief();
1089  fp << endl;
1090 
1091 
1093  fp << "start: ";
1094  for(int s0=0;s0<nrS;s0++)
1095  {
1096  double bs = isd->Get(s0);
1097  fp << bs << " ";
1098  }
1099  fp << endl;
1100 
1101  delete isd;
1102 
1103  double p;
1104  for(int a=0;a<nrA;a++)
1105  for(int s0=0;s0<nrS;s0++)
1106  for(int s1=0;s1<nrS;s1++)
1107  {
1108  p=pu->GetTransitionProbability(s0,a,s1);
1109  if(p!=0)
1110  fp << "T: " << a << " : " << s0 << " : " << s1 << " "
1111  << p << endl;
1112  }
1113 
1114  for(int a=0;a<nrA;a++)
1115  for(int o=0;o<nrO;o++)
1116  for(int s1=0;s1<nrS;s1++)
1117  {
1118  p=pu->GetObservationProbability(a,s1,o);
1119  if(p!=0)
1120  fp << "O: " << a << " : " << s1 << " : " << o << " "
1121  << p << endl;
1122  }
1123 
1124  for(int a=0;a<nrA;a++)
1125  for(int s0=0;s0<nrS;s0++)
1126  {
1127  p=pu->GetReward(s0,a);
1128  if(p!=0)
1129  fp << "R: " << a << " : " << s0 << " : * : * "
1130  << p << endl;
1131  }
1132 
1133 }
1134 
1136  const string &filename)
1137 {
1138  ofstream fp(filename.c_str());
1139  if(!fp)
1140  {
1141  cerr << "AlphaVectorPlanning::ExportBeliefSet: failed to open file "
1142  << filename << endl;
1143  }
1144 
1145  int nrB=B.size();
1146  for(int b=0;b!=nrB;b++)
1147  {
1148  for(unsigned int s=0;s!=B[b]->Size();s++)
1149  fp << B[b]->Get(s) << " ";
1150  fp << endl;
1151  }
1152 }
1153 
1156 {
1157  return(ValueFunctionToQ(V,GetPU()->GetNrJointActions(),
1158  GetPU()->GetNrStates()));
1159 }
1160 
1163  size_t nrA, size_t nrS)
1164 {
1165  QFunctionsDiscrete Qs;
1166 
1167  for(Index a=0;a!=nrA;a++)
1168  {
1170  for(Index i=0;i!=V.size();i++)
1171  {
1172  if(V[i].GetAction()==a)
1173  Q.push_back(V[i]);
1174  }
1175  // if the action has no vector, it's dominated everywhere, so
1176  // must never be chosen
1177  if(Q.size()==0)
1178  {
1179  AlphaVector dominatedVector(nrS,-DBL_MAX);
1180  dominatedVector.SetAction(a);
1181  Q.push_back(dominatedVector);
1182 #if DEBUG_AlphaVectorPlanning_ValueFunctionToQ
1183  cout << "AlphaVectorPlanning::GetQFunctionsFromV: action " << a
1184  << " is dominated" << endl;
1185 #endif
1186  }
1187  else
1188  {
1189 #if DEBUG_AlphaVectorPlanning_ValueFunctionToQ
1190  cout << "AlphaVectorPlanning::GetQFunctionsFromV: action " << a
1191  << " has " << Q.size()
1192  << " vector(s) " << endl;
1193 #endif
1194  }
1195  Qs.push_back(Q);
1196  }
1197 
1198  return(Qs);
1199 }
1200 
1207 {
1208  int nrInV=V.size1(), nrS=V.size2();
1209  bool equal;
1210 
1211  vector<int> duplicates(nrInV,-1);
1212 
1213  for(int i=1;i!=nrInV;++i) // start at 1, first is never a duplicate
1214  {
1215  for(int j=0;j!=i;++j) // loop over all previous vectors
1216  {
1217  equal=true;
1218  for(int s=0;s!=nrS;++s)
1219  {
1220  if(abs(V(i,s)-V(j,s))>PROB_PRECISION)
1221  {
1222  equal=false;
1223  break; // if 1 number differs, they are not equal
1224  }
1225  }
1226  if(equal)
1227  {
1228  duplicates[i]=j;
1229  break; // we need to find only the first duplicate
1230  }
1231  }
1232  }
1233 
1234 #if 0 // reduce verbosity
1235  PrintVectorCout(duplicates); cout << endl;
1236 #endif
1237  return(duplicates);
1238 }
1239 
1242  const ValueFunctionPOMDPDiscrete &V)
1243 {
1244  bool equal;
1245 
1246  for(VFPDcit it=V.begin(); it!=V.end(); ++it)
1247  {
1248  equal=true;
1249  // if the actions differ we don't need to check the values
1250  if(alpha.GetAction()!=it->GetAction())
1251  {
1252  equal=false;
1253  continue;
1254  }
1255 
1256  for(unsigned i=0; i!=alpha.GetNrValues(); ++i)
1257  if(abs(alpha.GetValue(i)-it->GetValue(i))>PROB_PRECISION)
1258  {
1259  equal=false;
1260  continue;
1261  }
1262 
1263  if(equal)
1264  return(true);
1265  }
1266 
1267  // if we get here we did not find a match, so alpha is not in V
1268  return(false);
1269 }