MultiAgentDecisionProcess  Release 0.2.1
GeneralizedMAAStarPlanner.cpp
Go to the documentation of this file.
1 
28 #include <float.h>
29 #include <limits.h>
31 #include "JointPolicyValuePair.h"
32 #include "QFunctionJAOHInterface.h"
34 #include "PolicyPoolJPolValPair.h"
35 
37 
38 #define DEBUG_GMAA_POLS 0
39 
40 using namespace std;
41 
42 
43 
44 //Default constructor
46  int verboseness,
47  double slack) :
48  _m_foundPolicy(0),
49  _m_verboseness(verboseness),
50  _m_slack(slack)
51 {
55  _m_nrPoliciesToProcess=UINT_MAX;
56  _m_bgCounter=0;
58 }
59 
60 //Destructor
62 {
63  delete _m_foundPolicy;
64 }
65 
66 //Copy assignment operator
68 {
69  if (this == &o) return *this; // Gracefully handle self assignment
70  // Put the normal assignment duties here...
71  throw("GeneralizedMAAStarPlanner::operator= not implemented");
72 
73  return *this;
74 }
75 
76 
77 /* this is the high-level pseudo-code for what happens:
78  start with a horizon 0 joint policy - i.e. specifying 0 actions
79  JPolValPool.push( <jpol,val=0.0> )
80  do
81  ppi = <pol,val> = JPolValPool.GetNext()
82 
83  //poolOfNextPolicies = {<pol,vals>}
84  //isLowerBound = bool - whether the vals are lower bounds to the
85  // optimal value (i.e. value for the optimal policy)
86  <poolOfNextPolicies, isLowerBound> = ConstructAndValuateNextPolicies(ppi)
87  if(isLowerBound)
88  Prune( JPolValPool, max(lowerBound) )
89 
90  poolOfNextPolicies = SelectPoliciesToProcessFurther(poolOfNextPolicies);
91  JPolValPool.insert(poolOfNextPolicies)
92 
93  while !empty JPolValPool
94 */
96 {
97  if( _m_foundPolicy != 0)
98  delete _m_foundPolicy;
99 
100  StartTimer("GMAA::Plan");
101 
102  //stuff for timing (if used)
103  tms ts_start; //the time struct
104  clock_t tck_start; //ticks
105  tck_start = times(&ts_start);
106  //the intermediate timing stream
107  ofstream & its = *_m_intermediateResultFile;
108 
109  //this counter maintains the maximum policy pool size.
110  _m_maxJPolPoolSize = 0;
111  double maxLB = -DBL_MAX;
113  bestJPol->SetPastReward(-DBL_MAX);
114 #if DEBUG_GMAA_POLS
115  cout << "GMAA initialized with empty policy:"<<endl;
116  bestJPol->Print();
117 #endif
118  PartialPolicyPoolInterface * pp_p = NewPP();
119  pp_p->Init( GetThisFromMostDerivedPU() ); //initialize with empty joint policy
120  do
121  {
122  StartTimer("GMAA::Plan::iteration");
125 
126  if(_m_verboseness >= 2) {
127  cout << "\n---------------------------------------------------\n";
128  cout << "-->>Start of new GMAA iteration, polpool size="<<
129  pp_p->Size()<<"<<--"<<endl;
130  }
131 
132  PartialPolicyPoolItemInterface* ppi = pp_p->Select();
133  PartialJointPolicyDiscretePure * jpol_sel = ppi->GetJPol();
134  double v_sel = ppi->GetValue();
135  size_t depth_sel = jpol_sel->GetDepth();
136  if(_m_verboseness >= 3) {
137  cout << "Select returned the following policy to expand:\n";
138  ppi->GetJPol()->Print();
139  cout << "of depth="<< depth_sel << " and heur. val="<<v_sel<<endl;
140  }
141 
142  if( (v_sel + _m_slack) < maxLB) //the highest upperbound < the best lower
143  {
144  //TODO:
145  // 1)if JPolValPool is no priority queue, this should be changed.
146  // 2)this is only necessary, because PRUNE (see todo below) is nyi
147  if(_m_verboseness >= 0)
148  cout<<"highest upper < best found lower bound, stopping\n";
149  break;
150  }
151 
152  //poolOfNextPolicies = {<pol,vals>}
153  //isLowerBound = bool - whether the vals are lower bounds to the
154  // optimal value (i.e. value for the optimal policy)
155  //<poolOfNextPolicies,isLowerBound>=ConstructAndValuateNextPolicies(ppi)
156 
157  PartialPolicyPoolInterface * poolOfNextPolicies = NewPP();
158  bool are_LBs = ConstructAndValuateNextPolicies(ppi, poolOfNextPolicies);
159 
160  //Clean up ppi
161  if(pp_p->Size() > 0) //should always be true
162  {
163  pp_p->Pop();
164  delete ppi;
165  }
166  else //should not happen
167  throw E("GeneralizedMAAStarPlanner.cpp:policy pool empty? - should not happen?");
168 
169 #if DEBUG_GMAA4
170  if(DEBUG_GMAA4){
171  cout << "\n>>>The next policies found, poolOfNextPolicies:"<<endl;
172  PartialPolicyPoolInterface* pp_copy = NewPP();
173  *pp_copy = *poolOfNextPolicies;
174  while(! pp_copy->Empty())
175  {
176  PartialPolicyPoolItemInterface* it = pp_copy->Select();
177  it->Print();
178  cout << endl;
179  pp_copy->Pop();
180  }
181  }
182 #endif
183 
184  //if(isLowerBound)
185  // Prune( JPolValPool, max(lowerBound) )
186  if(are_LBs && poolOfNextPolicies->Size() > 0)
187  {
188  PartialPolicyPoolItemInterface* bestRanked_ppi = poolOfNextPolicies->
189  GetBestRanked();
190  poolOfNextPolicies->PopBestRanked();
191  double bestNextVal = bestRanked_ppi->GetValue();
192  if(bestNextVal > maxLB) //new best lowerbound (and policy) found
193  {
194  maxLB = bestNextVal;
195  *bestJPol = *(bestRanked_ppi->GetJPol());
196  if(_m_verboseness >= 2) {
197  cout << "new bestJPol (and max. lowerbound) found!" << endl;
198  cout << "Its value v="
199  << bestNextVal <<" - "
200  << bestRanked_ppi->GetJPol()->SoftPrintBrief() << endl;
201  }
202  if(_m_verboseness >= 3)
203  cout << "new bestJPol->SoftPrint():"<<bestJPol->SoftPrint();
204 
205  //if we maintain the internal timings...
207  {
208  tms ts_cur;
209  clock_t tck_cur;
210  tck_cur = times(&ts_cur);
211  clock_t diff = tck_cur - tck_start;
212  its << diff << "\t" << maxLB << endl;
213  }
214  // prune JPolValPool
215  pp_p->Prune(maxLB - _m_slack );
216  }
217  delete bestRanked_ppi;
218 
219  }
220  SelectPoliciesToProcessFurther(poolOfNextPolicies, are_LBs, maxLB - _m_slack);
221  pp_p->Union(poolOfNextPolicies);
222 
223  delete poolOfNextPolicies;
224 
225  if( _m_maxJPolPoolSize < pp_p->Size())
226  _m_maxJPolPoolSize = pp_p->Size();
227 
228  StopTimer("GMAA::Plan::iteration");
229  if(_m_verboseness >= 2) {
230  cout << "\nGMAA::Plan::iteration ending, best policy found so far:";
231  cout << endl << bestJPol->SoftPrintBrief() <<endl;
232  if(_m_verboseness >= 3)
233  cout << endl << bestJPol->SoftPrint() <<endl;
234  }
235 
236  }
237  while(! pp_p->Empty() ); //<- end do...while
238  //we don't want to do any conversions here... takes (sometimes too much)
239  //time...
240  _m_foundPolicy=bestJPol; //->ToJointPolicyPureVector());
241 
242 
244  if(_m_verboseness >= 1) {
245  cout << "\nGMAA::Plan ending, best policy found: ";
246  cout << bestJPol->SoftPrintBrief() << " = " <<endl;
247  if(_m_verboseness >= 3)
248  cout << _m_foundPolicy->SoftPrint() << endl;
249  cout << endl;
250 #if 0
252  jppv->Print();
253 #endif
254  }
255  if(_m_verboseness >= 2)
256  cout << "\n\n ";
257  if(_m_verboseness >= 0)
258  cout << "GMAA::Plan GMAA ENDED"<<endl;
259  if(_m_verboseness >= 2)
260  cout << "\n\n ";
261 
262  delete pp_p;
263 
264  StopTimer("GMAA::Plan");
265 }
266 
267 void
269  PartialPolicyPoolInterface* poolOfNextPolicies, bool are_LBs,
270  double bestLB, size_t k)
271 {
272  if(are_LBs)
273  {
274  //if all policies are full policies, we don't return any
275  //of them (these need not be expanded further)
276  while(!poolOfNextPolicies->Empty())
277  poolOfNextPolicies->Pop();
278  return;
279  }
280  PartialPolicyPoolInterface * pp_new = NewPP();
281 
282  size_t nr_done = 0;
283  while(poolOfNextPolicies->Size() > 0 ) // && nr_done < k)
284  {
285  PartialPolicyPoolItemInterface* best_ppi = poolOfNextPolicies->GetBestRanked();
286  poolOfNextPolicies->PopBestRanked();
287  if(nr_done >= k || best_ppi->GetValue() < bestLB)
288  {
289  //we do not want this policy, so discard it:
290  delete best_ppi;
291  }
292  else
293  {
294  //we do want this policy, so store it:
295  pp_new->Insert(best_ppi);
296  }
297  nr_done++;
298  }
299  //done: - free the memory of the policies in poolOfNextPolicies
300  //that we will not consider further!
301 
302  //copy by value: //TODO check if this is what we want?
303  // separte input/output arguments might be better?
304  *poolOfNextPolicies = *pp_new;
305  //remove elements from pp_new before delete! (otherwise they will be deleted //with them) and we get a segfault later on...
306  while(pp_new->Size() > 0)
307  pp_new->Pop();
308 
309  delete pp_new;
310 
311 }
312 
314 {
319  if(pp.Size()<=k)
320  return;
321 
322  PartialPolicyPoolInterface * ppPruned = NewPP();
323 
324  while(ppPruned->Size()<k)
325  {
326  ppPruned->Insert(pp.GetBestRanked());
327  pp.PopBestRanked();
328  }
329 
330  //delete the rest which we will not use further...
331  while(pp.Size()>0)
332  {
333  delete pp.Select();
334  }
335  pp=*ppPruned;
336 }
337 
339 {
342 }
343 
345 {
346  _m_verboseness = verbose;
347 }
348 
350 { return(_m_foundPolicy); }
352 { return(_m_foundPolicy); }
354 { return(_m_foundPolicy); }