1 /*
  2  * =====================================================================================
  3  *
  4  *       Filename:  stats.h
  5  *
  6  *    Description:  simple distribution execise
  7  *
  8  *        Version:  1.0
  9  *        Created:  04/15/2011 06:12:42 PM
 10  *       Revision:  none
 11  *       Compiler:  gcc
 12  *
 13  *         Author:  Ruben Safir,
 14  *        Company:  
 15  *
 16  * =====================================================================================
 17  */
 18 #ifndef STATS_H
 19 #define STATS_H
 20 #include        <iostream>
 21 #include        <fstream>
 22 #include        <climits>
 23 #include        <cmath>
 24 #include        <stdlib.h>
 25 #include        <iomanip>
 26 #include        "linklist.h"
 27
 28 /*
 29  * =====================================================================================
 30  *        Class:  Distribution
 31  *  Description:  Keeps Track of Distribution of 6's (or anything else) in a series of List
 32  * =====================================================================================
 33  */
 34
 35
 36 namespace stats{
 37
 38
 39    template<class T>
 40       class Distribution {
 41          template<T> friend std::ostream & operator<<(std::ostream &, const Distribution<T>&);
 42          public:
 43          /* ====================  LIFECYCLE     ======================================= */
 44          Distribution (T descr, int occurances = 0);
 45          //Distribution():freq(NULL), occurances(0){};
 46          Distribution(){};
 47          //    Distribution ( const Distribution &other );   /* copy constructor */
 48          //            ~Distribution ();                            /* destructor       */
 49          /* ====================  ACCESSORS     ======================================= */
 50          T description()const{ return freq;}
 51          int population()const { return occurances; }  
 52          /* ====================  MUTATORS      ======================================= */
 53          void increase_occ(){ ++occurances; std::cout << "description " << freq << " occurances " << occurances << std::endl; }
 54          void descrease_occ(){ --occurances; }
 55          /* ====================  OPERATORS     ======================================= */
 56          //Distribution& operator = ( const Distribution &other ); /* assignment operator */
 57          T operator()(){
 58             return freq;
 59          }
 60
 61
 62          bool operator==(Distribution &tmp){
 63             if(this->freq == tmp.freq)
 64                return true;
 65             return false;
 66          }
 67
 68
 69          bool operator<(Distribution &tmp){
 70             if(freq < tmp.freq)
 71                return true;
 72             return false;
 73          }
 74          chainlist::List< stats::Distribution<T> > * tally;  //a list of distribution talleys
 75
 76          float stddev(chainlist::List<stats::Distribution<T> > *);
 77
 78
 79
 80
 81          protected:
 82          /* ====================  DATA MEMBERS  ======================================= */
 83          private:
 84          /* ====================  DATA MEMBERS  ======================================= */
 85          T freq;  //description of unique identifier of a sample point in a List
 86          int occurances; //description of how many times a frequency was found in a list
 87       }; /* -----  end of class Distribution  ----- */
 88
 89
 90    template<typename T>
 91       std::ostream & operator << ( std::ostream & os, const Distribution<T> & obj )
 92       {
 93          T desc = obj.description();
 94          int pop  = obj.population();
 95          os << "The Identification of " << desc << " was seen " << pop ;
 96          return os;
 97       }         /* -----  end of function operator <<  ----- */
 98
 99    template<typename T>
100       Distribution<T>::Distribution(T descr, int occ): occurances(occ){
101          freq = descr;
102       }
103
104   
105    // routinines not part of the distribution class
106
107    /* Routine to determin the mean value of populations in a list of distribribution members  */
108    template <typename T>
109       float mean_list(chainlist::List< Distribution<T> > * tally);
110
111    /*  Routine to go though a single list and add it to an existing distribution table */
112    template<typename T>
113       void mount_individual_data_point(chainlist::List<T> * tabulate, chainlist::List<stats::Distribution<T> > * table);
114
115    /* Routine to find all the occurances of a type in a list of lists */
116    template<typename T>
117       void take_tally(chainlist::List<T> *,chainlist::List<stats::Distribution<T> > *);
118
119
120    //calculation standard deviation of distribution list
121    //
122    template<typename T>
123       float stddev(chainlist::List<stats::Distribution<T> > * tally){
124          float dev, vari = 0;
125          //walk through the list
126          if( tally->endd() == 0 ){
127             std::cout << "Empty List"  << std::endl;
128             exit(0);
129          }
130          float mean = mean_list(tally);
131          tally->cursor() = tally->front();
132
133          while(tally->cursor() != tally->endd() ){
134             vari += pow (tally->cursor()->value()->population() -  mean, 2 );
135             tally->cursor() = tally->cursor()->next();
136          }
137          vari += pow (tally->cursor()->value()->population() -  mean, 2 );
138          dev = sqrt( (vari / tally->size()) );
139
140
141
142
143
144          return dev;
145       }
146
147
148
149    template<typename T>
150       float stddev(chainlist::List< chainlist::List<stats::Distribution<T> > * > *  tallies, T search_val){
151          float dev, vari = 0;
152          //walk through the list
153          chainlist::List<stats::Distribution<T> > * dump;
154          if( tallies->endd() == 0 ){
155             std::cout << "Empty List"  << std::endl;
156             exit(0);
157          }
158          float mean = mean_list(tallies, search_val);
159          tallies->cursor() = tallies->front();
160          while(tallies->cursor() != tallies->endd() ){
161             dump = *(tallies->cursor()->value());
162             dump->cursor() = dump->front(); //set cursor to the front
163             if(dump->endd() == 0){
164                std::cout << "Empty Distribution List"  << std::endl;
165                exit(0);
166             }
167             //we don't need a second while loop here because each Distribution object in the list is unique
168             dump->find_value(search_val);
169             if(dump->cursor() != 0)
170                vari += pow(dump->cursor()->value()->population() - mean, 2);
171             else
172                vari += pow(0 - mean, 2);
173
174             tallies->cursor() = tallies->cursor()->next();
175          }
176          dump->find_value(search_val);
177          if(dump->cursor() != 0)
178             vari += pow(dump->cursor()->value()->population() - mean, 2);
179          else
180             vari += pow(0 - mean, 2);
181          dev = sqrt( (vari / tallies->size()) );
182          return dev;
183       
184       
185       }
186
187
188
189
190    template<typename T>
191       void take_tally(chainlist::List<T> * tabulate, chainlist::List<stats::Distribution<T> > * table){
192          for(tabulate->cursor()=tabulate->front();tabulate->cursor() != tabulate->endd(); tabulate->cursor( tabulate->cursor()->next() ) ){ //build distribution list
193             mount_individual_data_point(tabulate, table);
194          }
195          //we are at the end of tabulate
196          mount_individual_data_point(tabulate, table);
197          table->sort(*table);
198       }
199
200    template<typename T>
201       void mount_individual_data_point(chainlist::List<T> * tabulate, chainlist::List<stats::Distribution<T> > * table){
202          T val;
203          stats::Distribution<T> * j;
204          val = *(tabulate->cursor()->value()); //get a value
205          table->cursor()= table->front(); //check to see if the distribution list exists
206          if(!table->cursor()){ // if not add a distribution table to the List of distributions
207             j = new stats::Distribution<T> (val);
208             table->insert(*j ); //now we have at least one
209             delete j;
210             j=table->cursor()->value();//and increased its population
211             j->increase_occ();
212          }else{
213             //otherwise search for a distribution node described as value
214             table->find_value(val);
215             if( table->cursor() ){
216                j=table->cursor()->value();//and increase its population
217                j->increase_occ();
218             }else{//otherwise add a new node
219                j = new stats::Distribution<T> (val);
220                table->insert( *j ); //now we have one for that value
221                delete j;
222                j=table->cursor()->value();//and increased its population
223                j->increase_occ();
224             }
225          }
226       }
227
228
229
230    template<typename T>
231       float mean_list(chainlist::List< chainlist::List<stats::Distribution<T> >*  > *  tallies, T search_val){
232          if(tallies->endd() == 0){
233             std::cout << "Empty List" << std::endl;
234             return 0.0;
235          }
236          int sum = 0;
237          chainlist::List<stats::Distribution<T> > * dump;
238          tallies->cursor() = tallies->front();
239          while(tallies->cursor() != tallies->endd() ){
240                  //tallies->cursor()->value()->find_value(search_val);
241                  dump = *(tallies->cursor()->value());
242                  //dump->cursor() = dump->endd();
243                  //std::cout << "Testing\n" << *(dump->cursor()->value()) << std::endl;
244                  dump->find_value(search_val);
245                  if(dump->cursor() != NULL)
246                     sum +=  dump->cursor()->value()->population();
247                  tallies->cursor(tallies->cursor()->next());
248          }
249          dump = *(tallies->cursor()->value());
250          dump->find_value(search_val);
251          if(dump->cursor() != 0)
252             sum +=  dump->cursor()->value()->population();
253          float tot = (float) sum/(float)(tallies->size());
254          std::cout << "Mean " << tot << " sum " << sum << " size " << tallies->size() << std::endl;
255          return tot;
256
257       }
258
259    template <typename T>
260       float mean_list(chainlist::List< Distribution<T> > * tally){
261          if(tally->endd() == 0){
262             std::cout << "Empty List" << std::endl;
263             return 0.0;
264          }
265
266          int sum = 0;
267
268          tally->cursor() = tally->front();
269          while(tally->cursor() != tally->endd() ){
270             sum +=  tally->cursor()->value()->population() ;
271             tally->cursor(tally->cursor()->next());
272          }
273          sum +=  tally->cursor()->value()->population() ;
274
275         float tot = (float)sum/(float)(tally->size());
276         std::cout << "Mean " << tot << " sum " << sum << " size " << tally->size() << std::endl;
277
278         return tot;
279       }
280
281
282
283
284
285 }
286 #endif /* STATS_H */