**************************;
* set parameters the first time the subroutine is set up (through %include x/pinmacros.sas). 
* The value of these parameters can be changed outside the macro, if desired. 
********************;
%let minlikelihood_allowed = log(1E-15);


%MACRO BesselKstarJmhlf;
   argB2=0.5*argB ;

   if JJB>2 then do ;
      k1B= 1 ; 
      k2B= 1 + argB ;
      do jB=3 to JJB ; 
          k3B = k2B + argB2 * argB2 * k1B/((jB-1.5)*(jB-2.5)) ;
          k1B = k2B ; 
          k2B = k3B ;
          end ;
      KstarJmhlf=k3B ;  
      end ;
   else if JJB=2 then KstarJmhlf =  1+argB ;
   else if JJB=1 then KstarJmhlf =  1 ;
   else  KstarJmhlf = -1/argB ;
   %MEND BesselKstarJmhlf;



%macro lkstar_nz;
    if trades gt 2 then do ;
      lks_m1  =  log(1) ;
      lks_cur = log(1+z) ;
      do jc = 3 to trades;
        lks_p1 = log(  exp(lks_cur - lks_m1) + z*z /((2*jc-3)*(2*jc-5)) )  +lks_m1;
        lks_m1 = lks_cur;
        lks_cur= lks_p1 ;
        end;
      lkstarnz = lks_cur;  
      end;
    
    else if trades eq 2 then lkstarnz = log(1+ z) ;
    else if trades eq 1 then lkstarnz = 0 ; 
    else lkstarnz =log(1/z);

   %mend lkstar_nz;


%macro pincalcvdj(_infile, _parest, _likedat,  _buyvar, _selvar, _firmidvar ) ;

      ****************************************                             ;
      * _infile is the input data set                                      ;
      * _parest is the output data set to contain the parameter estimates  ;
      * _likedat is the output dataset that will contain the tlikelihood   ;
      *      estimates for nonews, goodnews, badnews etc                   ;
      * _buyvar is the name of the BUYS variable                           ;
      * _selvar is the name of the SELLS variable                          ;
      * _firmidvar is the name of the variable(s) that identifies the      ;
      *      firm-period  (typically PERMCO YEAR)                          ;
      *                                                                    ;

    proc sort data = &_infile  ; 
      by &_firmidvar;
      run;
 
    ****************
    * compute mean levels of buys and sells for each firmidvar          ;
    * to use as initial parameter estimates                             ;
   
    proc means data = &_infile    noprint;
      by &_firmidvar;
      var &_buyvar &_selvar;
      output out = _mnbuysel  mean = _mbuys _msells;
      run;

    proc print u data  = _mnbuysel ; 
      title "_mnbuysel at 91 ";
      run; 
  
    ******
    * the variable__id is used to identify separate firm-periods
    * if we knew there would always be a single by variable on the way in, it 
    * would not be required. 
    * However, explicitly setting this variable means that we can use string variables 
    * as firm identifiers and/or multiple variables (e.g. gvkey yyyyq etc.) 
    * This datastep also adds initial parameter estimates for each firm-perid.
    ********;

    data _mnbuysel _llikeli (keep = &_firmidvar  __id); 
      set _mnbuysel(drop = _type_);
      by &_firmidvar;
      __id = _N_; 
      length  _type_ $8;
      alpha = 0.5;
      delta = 0.5;
      invpsi = 0.5;
      epsi = (_mbuys+ _msells) * 0.4;
      mu = epsi ;
      _type_ = "PARMS" ;
      run;




proc print u data = _mnbuysel (obs = 10); 
  title "_mnbuysel ";
  run;
  
proc print u data = _llikeli (obs = 10); 
  title "_llikeli ";
  run;
  
********************                                                             ;
* _llikeli                                                                       ;
* Obs      PERMNO    year    __id                                                ;
* 1        64151    1993      1                                                  ;
* 2        75188    1993      2                                                  ;
********************                                                             ;

********************                                                                                                           ;
* _mnbuysel  looks like this:                                                                                                  ;
*                                                                                                                              ;
* Obs      PERMNO    year    _FREQ_     _mbuys    _msells    __id    _type_    alpha    delta    invpsi      epsi        mu    ; 
*                                                                                                                              ;
*  1        64151    1993      252     11.8810    12.9960      1     PARMS      0.5      0.5       0.5      9.9508     9.9508  ;
*  2        75188    1993       43      9.9302    11.8605      2     PARMS      0.5      0.5       0.5      8.7163     8.7163  ;
*  3        75325    1993      252      8.2857     8.4127      3     PARMS      0.5      0.5       0.5      6.6794     6.6794  ;
*  4        77002    1993      252     18.9841    15.9206      4     PARMS      0.5      0.5       0.5     13.9619    13.9619  ;
*                                                                                                                              ;
********************                                                                                                           ;
 

   
********************                                                             ;
* now merge the numerical firm-period identifier into the original input file    ;
************************                                                         ;
 
    data &_infile;
      merge &_infile
            _llikeli;
      by    &_firmidvar;
      run;       
    
    data _tempbs;
      set &_infile;
      by  &_firmidvar;
      trades=&_buyvar+&_selvar; 
      LBFAC=lgamma(&_buyvar+1) ; 
      LSFAC=lgamma(&_selvar+1) ; 
      LTRADFAC=lgamma(trades+1) ; 
      if trades>0 then LTRADFAC2=lgamma(trades-0.5) ; 
             else LTRADFAC2 = log(3.54490770181103) ;
      run;
        
                
      %let hlogpi = 0.5 * log(3.1415926536) ; 

      title 'PIG fit using B,S data giving MLE of alpha, delta, epsi, mu, invpsi ' ; 
      run;

       proc nlp data=_tempbs  inest = _mnbuysel outest = _pars maxiter=1000 maxfunc=1000       
               out=_llikeli (keep = &_firmidvar loglik ll &_buyvar &_selvar )  
               gtol = 1e-9    noprint
	       tech = congra  ;
         by &_firmidvar;
         max loglik;
         parms alpha delta epsi mu invpsi ;
         bounds alpha > 0 ,  alpha < 1 , delta > 0 , delta < 1 , epsi > 0 , mu > 0, invpsi > 0 ;
         psi = 1/invpsi;
         psi2=psi*psi ; 
         psiepsi=sqrt(psi2+4*epsi) ; 
         psimu=sqrt(psi2+4*epsi+2*mu) ;
         trades = &_buyvar + &_selvar;
         n= trades-0.5;

         z = psi * psiepsi;
         %lkstar_nz;
         lKstarnonews=lkstarnz;

         z =psi*psimu ;
         %lkstar_nz;
         lKstarnews=lkstarnz;

         znonews = psiepsi;
         znews   = psimu;

         loglik =  - &hlogpi + trades*log(epsi) - LBFAC - LSFAC + LTRADFAC2 +  log(psi) + (trades-1)*log(2) +
                  - 2*(n)*log(znonews) + psi2 - psi*znonews  + lKstarnonews +  log( (1 - alpha)          
                  + alpha*delta    *exp(&_selvar*log(1+mu/epsi)  - 2*(n)*log(znews/znonews) 
                  - psi*znews + psi*znonews + lKstarnews - lKstarnonews )
                  + alpha*(1-delta)*exp(&_buyvar*log(1+mu/epsi)  - 2*(n)*log(znews/znonews) 
                  - psi*znews + psi*znonews + lKstarnews - lKstarnonews ) ) ;
         ll= exp(loglik);
         run;

         
****************
* the next section adds other variables to the summary data  
********************** ;

        proc means data = _llikeli noprint ; 
          by &_firmidvar;
          var loglik;
              output out = __f1 min = minlik n= numdays;
              run;
           
        data  __f2 (keep=  &_firmidvar  maxgrd terminat ) f3;  
          set _pars ;
          by &_firmidvar;             
              format maxgrd E9.3;
              if _type_ not in ("GRAD",  "PARMS", "UPPERBD" , "LOWERBD" , "TERMINAT" ) then delete;
              if _iter_ eq 0 then delete; 
              retain para pard pare parmu parps grada gradd grade gradmu gradps
                     uba ubd ube ubmu ubps lba lbd lbe lbmu lbps  terminat;
              if _type_ eq "TERMINAT" then terminat = _name_;
              if _type_ eq "PARMS" then do;
                 para = alpha;
                 pard = delta;
                 pare = epsi;
                 parmu = mu;
                 parips = invpsi;
                 end;
              else if _type_ eq "GRAD" then do;
                 grada = alpha;
                 gradd = delta;
                 grade = epsi;
                 gradmu = mu;
                 gradips = invpsi;
                 end;
              else if _type_ eq "UPPERBD" then do;
                 uba = alpha;
                 ubd = delta;
                 ube = epsi;
                 ubmu = mu;
                 ubips = invpsi;
                 end;
              else if _type_ eq "LOWERBD" then do;
                 lba = alpha;
                 lbd = delta;
                 lbe = epsi;
                 lbmu = mu;
                 lbips = invpsi;
                 end;
              if _type_ eq "LOWERBD" then do; 
                 * do maximums set max gradient etc;
                 maxgrd = 0 ;
                 if  lba < para< uba then  maxgrd = grada;
                 if  lbd < pard< ubd then  maxgrd = max(maxgrd, gradd) ;
                 if  lbe < pare< ube then  maxgrd = max(maxgrd, grade) ;
                 if  lbmu < parmu< ubmu then  maxgrd = max(maxgrd, gradmu) ;
                 if  lbps < parps< ubps then  maxgrd = max(maxgrd, gradps) ;
                 
                 output;
                 end;
                 
	         
   %let _nextbit = 0 ;    * set to 1 to print interim calculations of likelihoods ; 
   %if _nextbit = 1 %then %do ;
    
     proc print u data = __f1;
      title "__f1" ; 
      run;
  
          
    proc print u data = __f2; 
      title "__f2" ; 
      run;
      
    proc print u data = f3; 
      title "f3" ; 
      run;
    
    %end;
    
            
        data _pars; 
          merge _pars 
                __f1 (keep  =  &_firmidvar  minlik numdays)
                __f2 (keep  =  &_firmidvar  maxgrd terminat);
          by &_firmidvar;
          drop _TECH_ _TYPE_ _NAME_ _RHS_  _ITER_  ;
          if _type_ eq "PARMS" then output;
          run;
            
            
          data &_likedat;
             set _llikeli  ;
             run;
                 
              
          data &_parest;
             set _pars ;
             run;
         
    
%mend pincalcvdj ;







%macro pincalceko(_infile, _parest, _likedat,  _buyvar, _selvar, _firmidvar ) ;

      ****************************************                             ;
      * _infile is the input data set                                      ;
      * _parest is the output data set to contain the parameter estimates  ;
      * _likedat is the output dataset that will contain the tlikelihood   ;
      *      estimates for nonews, goodnews, badnews etc                   ;
      * _buyvar is the name of the BUYS variable                           ;
      * _selvar is the name of the SELLS variable                          ;
      * _firmidvar is the name of the variable that identifies the         ;
      *      firm-period  (typically PERMCO)                               ;
      * _method used is basic eko model                                    ;
      *                                                                    ;
    
   proc sort data = &_infile  ; 
      by &_firmidvar;
      run;
 
   proc means data = &_infile    noprint;
      by &_firmidvar;
      var &_buyvar &_selvar;
      output out = _mnbuysel  mean = _mbuys _msells;
      run;

    data _mnbuysel _llikeli (keep = &_firmidvar  __id); 
      set _mnbuysel(drop = _type_);
      by &_firmidvar;
          ******
          * the variable__id is used to identify separate firm-periods
          * if we knew there would always be a single 'by' variable being used, it 
          * would not be required. 
          * However, explicitly setting this variable means that we can use string variables 
          * as firm-period identifiers and/or multiple variables (e.g. gvkey yyyyq etc.) 
          ********;
          
          *******************                                                         ;
          * the next part of the program sets up initial parameter estimates to seed  ;
          * the loglikelihood estimations. One set of initial parameters for every    ;
          * firm period of data.                                                      ;
          *******************                                                         ;
          
      __id = _N_; 
      length  _type_ $8;
      alpha = 0.5;
      delta = 0.5;
      epsi = (_mbuys+ _msells) * 0.4;
      mu = epsi ;
      _type_ = "PARMS" ;
      run;


********************                                ;
* _llikeli looks like this:                         ;  
* Obs      PERMNO    year    __id                   ;
* 1        64151    1993      1                     ;
* 2        75188    1993      2                     ;
********************                                ;
  
********************                                                                                                           ;
* _mnbuysel  looks like this:                                                                                                  ;
*                                                                                                                              ;
* Obs      PERMNO    year    _FREQ_     _mbuys    _msells    __id    _type_    alpha    delta    invpsi      epsi        mu    ; 
*                                                                                                                              ;
*  1        64151    1993      252     11.8810    12.9960      1     PARMS      0.5      0.5       0.5      9.9508     9.9508  ;
*  2        75188    1993       43      9.9302    11.8605      2     PARMS      0.5      0.5       0.5      8.7163     8.7163  ;
*  3        75325    1993      252      8.2857     8.4127      3     PARMS      0.5      0.5       0.5      6.6794     6.6794  ;
*  4        77002    1993      252     18.9841    15.9206      4     PARMS      0.5      0.5       0.5     13.9619    13.9619  ;
*                                                                                                                              ;
********************                                                                                                           ;
 
 
*************;
* Copy data into new file called _tempbs for consistency with macro pincalcvdj   ;
*************;
 
    
    data &_infile ;
      merge &_infile
            _llikeli;
      by    &_firmidvar;
      run;       

         data _tempbs;
           set &_infile; 
           by  &_firmidvar;
           run;

*--------------------------------------------------------------------;
*    This is the procedure used to maximize the log likelihood       ;
*    function specified below.                                       ;
*                                                                    ;
*    This particular factorization of the log-likelihood function    ;
*    appears in the paper _Time-Varying Arrival Rates of Informed    ;
*    and Uninformed Trades_ by David Easley, Robert F. Engle,        ;
*    Maureen OHara, and Liuren Wu (paper presented at the 2002       ;
*    AFA meetings).                                                  ;
*                                                                    ;
*--------------------------------------------------------------------;

	 proc nlp data=_tempbs   inest = _mnbuysel outest = _pars maxiter=1000 maxfunc=1000  
                  out=_llikeli (keep = &_firmidvar loglik ll &_buyvar &_selvar )   noprint  ;
           * where &c1 le __id  lt &c2;
           by &_firmidvar;
           max loglik;
           * decvar alpha , delta , epsi , mu ;
           parms epsi = 100 , mu = 100, alpha = 0.5, delta = 0.5;
	   M = min(&_buyvar,&_selvar) + max(&_buyvar,&_selvar) / 2.0 ;
           x = epsi / (mu + epsi) ;
           bounds   1e-6 < epsi mu, 0.0 <= alpha delta <= 1.0 ;
           loglik = -2  * epsi + M * log(x) + (&_buyvar + &_selvar) * log(mu + epsi) 
                     + log(   alpha * (1  - delta) * exp(-1.0 * mu) * (x ** (&_selvar - M )) 
                            + alpha * delta        * exp(-1.0 * mu) * (x ** (&_buyvar - M ))  
                            + (1  - alpha)               * (x ** (&_buyvar + &_selvar - M ))   ) 
		    - lgamma(&_buyvar+1) - lgamma(&_selvar+1); 
           if loglik < &minlikelihood_allowed  |  loglik eq . then  loglik = &minlikelihood_allowed  ;
	   ll= exp(loglik);
           run;
           
           
         
         proc means data = _llikeli noprint ; 
           by &_firmidvar;
           var loglik;
               output out = __f1 min = minlik;
               run;
           
         data   _pars; 
           merge  _pars 
                 __f1 (keep  =  &_firmidvar  minlik );
           by &_firmidvar;
           drop _TECH_ _TYPE_ _NAME_ _RHS_  _ITER_  ;
           if _type_ eq "PARMS" then output;
           run;

              data &_likedat;
	   	set _llikeli  ;
	   	run;
	 

           data &_parest;
             set _pars ;
             run;

	  
	 proc print u data = &_parest (obs = 5) ;    
	   run;

   
   
%mend pincalceko ;