MultiAgentDecisionProcess  Release 0.2.1
ValueFunctionDecPOMDPDiscrete.cpp
Go to the documentation of this file.
1 
29 
30 #define DEBUG_CALCV 0
31 #define DEBUG_CALCV_CACHE 0
32 
33 using namespace std;
34 
39 {
40  _m_V_initialized = false;
41  _m_p_V = 0;
42  _m_pu = &p;
43  _m_jpol = &jp;
47  _m_h = _m_pu->GetHorizon();
48 }
49 
54 {
55  _m_V_initialized = false;
56  _m_p_V = 0;
57  _m_pu = p;
58  _m_jpol = jp;
62  _m_h = _m_pu->GetHorizon();
63 
64 }
65 
66 //Copy assignment constructor.
69 {
70  _m_V_initialized = false;
71  _m_p_V = new Matrix(*o._m_p_V);//this makes a deep copy.
72  _m_pu = o._m_pu;
73  _m_jpol = o._m_jpol;
74  _m_nrJOH = o._m_nrJOH;
75 }
76 
77 //Destructor
79 {
80  DeleteV();
81 }
82 
84 {
86  {
87  delete(_m_p_V);
88  _m_V_initialized = false;
89  }
90  _m_cached.clear();
91 }
92 
94 {
96  DeleteV();
97 
98  _m_p_V = new Matrix(_m_nrS, _m_nrJOH);
99  //GetPU()->GetNrStates(), _GetPU()->GetNrJointObservationHistories());
100  _m_p_V->clear();
101  _m_V_initialized = true;
102 }
103 
104 
106 {
107  pair<Index, bool> p(GetCombinedIndex(sI, JOHI), true);
108  pair<map<Index,bool>::iterator, bool> result = _m_cached.insert( p );
109  if(result.second != true)
110  {
111  // can't insert if this key ( GetCombinedIndex(sI, JOHI) ) is already
112  // present - in this case, we need to change the value to true...
113  // *(result.first) isa pair<int,bool>, so we do:
114  (*(result.first)).second = true;
115  }
116 }
117 
119 {
120  if(cache)
122  else
124 }
125 
127 {
128  CreateV();
129 #if DEBUG_CALCV
131  cout << "CalculateV0Recursively::creating a new value function"
132  << endl;
133  if(DEBUG_CALCV) {cout << "evaluating joint policy:\n"; GetJPol()->Print();}
134 #endif
135 
136  double val = 0;
137  for(Index sI = 0; sI < _m_nrS; sI++)
138  {
139  double v_sI = CalculateVsjohRecursivelyCached(sI,
141 #if DEBUG_CALCV
142  if(DEBUG_CALCV){
143  cout << ">>>ValueFunctionDecPOMDPDiscrete::CalculateV() -"
144  << " CalculateVsjohRecursively(sI=" << sI
145  << ", INITIAL_JOHI, cache="<< cache<< ") = " << v_sI << endl; }
146 #endif
147  val += _m_pu->GetInitialStateProbability(sI) * v_sI;
148  }
149 #if DEBUG_CALCV
150  if(DEBUG_CALCV) { cout << "This policy's V=" << val <<endl;}
151 #endif
152  return val;
153 }
154 
155 
156 
158 {
159 #if DEBUG_CALCV
160  if(DEBUG_CALCV) {cout << "evaluating joint policy:\n"; GetJPol()->Print();}
161 #endif
162 
163  double val = 0;
164  for(Index sI = 0; sI < _m_nrS; sI++)
165  {
166  double v_sI = CalculateVsjohRecursivelyNotCached(sI,
168 #if DEBUG_CALCV
169  if(DEBUG_CALCV){
170  cout << ">>>ValueFunctionDecPOMDPDiscrete::CalculateV() -"
171  << " CalculateVsjohRecursively(sI=" << sI
172  << ", INITIAL_JOHI, cache="<< cache<< ") = " << v_sI << endl; }
173 #endif
174  val += _m_pu->GetInitialStateProbability(sI) * v_sI;
175  }
176 #if DEBUG_CALCV
177  if(DEBUG_CALCV) { cout << "This policy's V=" << val <<endl;}
178 #endif
179  return val;
180 }
181 
182 
183 
184 
186  Index johI, Index stage)
187 {
188 #if DEBUG_CALCV
189  {cout<< "\nValueFunctionDecPOMDPDiscrete::CalculateVsjohRecursively("
190  << sI << ", " << johI << ") called"<<endl;}
191 #endif
192 
193 #if 0
194 //moved this check to before the call of this function, saving a function call
195  if( IsCached(sI, johI) )
196  {
197 #if DEBUG_CALCV
198  if(DEBUG_CALCV_CACHE)cout << "returning cached result"<<endl;
199 #endif
200  return( (*_m_p_V)(sI, johI) );
201  }
202 #endif
203  Index jaI = GetJPol()->GetJointActionIndex(johI);
204 
205  double R = _m_pu->GetReward(sI, jaI);
206  double ExpFutureR = 0.0;
207  // horizon h policy makes decision on observation histories which
208  // maximally have length h-1.
209  // t=0 - () - length=0
210  // ...
211  // t=h-1 - (o1,...,o{h-1}) - length h-1
212  if(stage >= _m_h - 1 )
213  {
214 #if DEBUG_CALCV
215  { cout << "ValueFunctionDecPOMDPDiscrete::CalculateVsjoh"
216  << "Recursively("<< sI <<", " << johI << ") - V="<<R<<endl;}
217 #endif
218  return(R);
219  }
220 
221 #if DEBUG_CALCV
222  if(DEBUG_CALCV){ cout << "Calculating future reward"<<endl;}
223 #endif
224  for(Index sucSI = 0; sucSI < _m_nrS; sucSI++)
225  {
226  double probSucSI = _m_pu->GetTransitionProbability(sI, jaI,sucSI);
227 #if DEBUG_CALCV
228  if(DEBUG_CALCV){ cout << "P(s"<<sucSI<<"|s"<<sI<<",ja"<<jaI<<")="<<
229  probSucSI<<endl;}
230 #endif
231 
232  for(Index joI = 0; joI < _m_nrJO; joI++)
233  {
234  double probJOI = _m_pu->GetObservationProbability(jaI, sucSI, joI);
235 #if DEBUG_CALCV
236  if(DEBUG_CALCV){ cout << "P(jo"<<joI<<"|ja"<<jaI<<",s"<<sucSI<<")="
237  <<probJOI<<endl;}
238 #endif
239  Index sucJohI = _m_pu->GetSuccessorJOHI(johI, joI);
240  double thisSucV;
241  if(IsCached(sucSI, sucJohI))
242  thisSucV = (*_m_p_V)(sucSI, sucJohI);
243  else
244  thisSucV=CalculateVsjohRecursivelyCached(sucSI,sucJohI,stage+1);
245  ExpFutureR += probSucSI * probJOI * thisSucV;
246  }//end for each observation
247  }//end for each potential succesor state
248  double val = R + ExpFutureR;
249  (*_m_p_V)(sI, johI) = val;
250  SetCached(sI, johI);
251 #if DEBUG_CALCV
252  if(DEBUG_CALCV){
253  cout << "ValueFunctionDecPOMDPDiscrete::CalculateVsjohRecursively("
254  << sI <<", " << johI << ") \n->immediate R="<<R<<
255  " \n->exp. future reward="<<ExpFutureR<<"\n->V="<<val<<endl;}
256 #endif
257  return(val);
258 }
259 
260 
262  Index sI, Index johI, Index stage)
263 {
264 #if DEBUG_CALCV
265  {cout<< "\nValueFunctionDecPOMDPDiscrete::CalculateVsjohRecursively("
266  << sI << ", " << johI << ") called"<<endl;}
267 #endif
268 
269  Index jaI = GetJPol()->GetJointActionIndex(johI);
270  double R = GetPU()->GetReward(sI, jaI);
271  double ExpFutureR = 0.0;
272  // horizon h policy makes decision on observation histories which
273  // maximally have length h-1.
274  // t=0 - () - length=0
275  // ...
276  // t=h-1 - (o1,...,o{h-1}) - length h-1
277  if(_m_pu->GetTimeStepForJOHI(johI) >= _m_h - 1 )
278  {
279 #if DEBUG_CALCV
280  { cout << "ValueFunctionDecPOMDPDiscrete::CalculateVsjoh"
281  << "Recursively("<< sI <<", " << johI << ") - V="<<R<<endl;}
282 #endif
283  return(R);
284  }
285 
286 #if DEBUG_CALCV
287  if(DEBUG_CALCV){ cout << "Calculating future reward"<<endl;}
288 #endif
289  for(Index sucSI = 0; sucSI < _m_nrS; sucSI++)
290  {
291  double probSucSI = _m_pu->GetTransitionProbability(sI, jaI,
292  sucSI);
293 #if DEBUG_CALCV
294  if(DEBUG_CALCV){ cout << "P(s"<<sucSI<<"|s"<<sI<<",ja"<<jaI<<")="<<
295  probSucSI<<endl;}
296 #endif
297 
298  for(Index joI = 0; joI < _m_nrJO; joI++)
299  {
300  double probJOI = _m_pu->GetObservationProbability(jaI, sucSI, joI);
301 #if DEBUG_CALCV
302  if(DEBUG_CALCV){ cout << "P(jo"<<joI<<"|ja"<<jaI<<",s"<<sucSI<<")="
303  <<probJOI<<endl;}
304 #endif
305  Index sucJohI = _m_pu->GetSuccessorJOHI(johI, joI);
306  double thisSucV = CalculateVsjohRecursivelyNotCached(sucSI,
307  sucJohI, stage+1);
308  ExpFutureR += probSucSI * probJOI * thisSucV;
309  }//end for each observation
310  }//end for each potential succesor state
311  double val = R + ExpFutureR;
312 #if DEBUG_CALCV
313  if(DEBUG_CALCV){
314  cout << "ValueFunctionDecPOMDPDiscrete::CalculateVsjohRecursively("
315  << sI <<", " << johI << ") \n->immediate R="<<R<<
316  " \n->exp. future reward="<<ExpFutureR<<"\n->V="<<val<<endl;}
317 #endif
318  return(val);
319 }