**************************; * set parameters the first time the subroutine is set up (through %include x/pinmacros.sas). * The value of these parameters can be changed outside the macro, if desired. ********************; %let minlikelihood_allowed = log(1E-15); %MACRO BesselKstarJmhlf; argB2=0.5*argB ; if JJB>2 then do ; k1B= 1 ; k2B= 1 + argB ; do jB=3 to JJB ; k3B = k2B + argB2 * argB2 * k1B/((jB-1.5)*(jB-2.5)) ; k1B = k2B ; k2B = k3B ; end ; KstarJmhlf=k3B ; end ; else if JJB=2 then KstarJmhlf = 1+argB ; else if JJB=1 then KstarJmhlf = 1 ; else KstarJmhlf = -1/argB ; %MEND BesselKstarJmhlf; %macro lkstar_nz; if trades gt 2 then do ; lks_m1 = log(1) ; lks_cur = log(1+z) ; do jc = 3 to trades; lks_p1 = log( exp(lks_cur - lks_m1) + z*z /((2*jc-3)*(2*jc-5)) ) +lks_m1; lks_m1 = lks_cur; lks_cur= lks_p1 ; end; lkstarnz = lks_cur; end; else if trades eq 2 then lkstarnz = log(1+ z) ; else if trades eq 1 then lkstarnz = 0 ; else lkstarnz =log(1/z); %mend lkstar_nz; %macro pincalcvdj(_infile, _parest, _likedat, _buyvar, _selvar, _firmidvar ) ; **************************************** ; * _infile is the input data set ; * _parest is the output data set to contain the parameter estimates ; * _likedat is the output dataset that will contain the tlikelihood ; * estimates for nonews, goodnews, badnews etc ; * _buyvar is the name of the BUYS variable ; * _selvar is the name of the SELLS variable ; * _firmidvar is the name of the variable(s) that identifies the ; * firm-period (typically PERMCO YEAR) ; * ; proc sort data = &_infile ; by &_firmidvar; run; **************** * compute mean levels of buys and sells for each firmidvar ; * to use as initial parameter estimates ; proc means data = &_infile noprint; by &_firmidvar; var &_buyvar &_selvar; output out = _mnbuysel mean = _mbuys _msells; run; proc print u data = _mnbuysel ; title "_mnbuysel at 91 "; run; ****** * the variable__id is used to identify separate firm-periods * if we knew there would always be a single by variable on the way in, it * would not be required. * However, explicitly setting this variable means that we can use string variables * as firm identifiers and/or multiple variables (e.g. gvkey yyyyq etc.) * This datastep also adds initial parameter estimates for each firm-perid. ********; data _mnbuysel _llikeli (keep = &_firmidvar __id); set _mnbuysel(drop = _type_); by &_firmidvar; __id = _N_; length _type_ $8; alpha = 0.5; delta = 0.5; invpsi = 0.5; epsi = (_mbuys+ _msells) * 0.4; mu = epsi ; _type_ = "PARMS" ; run; proc print u data = _mnbuysel (obs = 10); title "_mnbuysel "; run; proc print u data = _llikeli (obs = 10); title "_llikeli "; run; ******************** ; * _llikeli ; * Obs PERMNO year __id ; * 1 64151 1993 1 ; * 2 75188 1993 2 ; ******************** ; ******************** ; * _mnbuysel looks like this: ; * ; * Obs PERMNO year _FREQ_ _mbuys _msells __id _type_ alpha delta invpsi epsi mu ; * ; * 1 64151 1993 252 11.8810 12.9960 1 PARMS 0.5 0.5 0.5 9.9508 9.9508 ; * 2 75188 1993 43 9.9302 11.8605 2 PARMS 0.5 0.5 0.5 8.7163 8.7163 ; * 3 75325 1993 252 8.2857 8.4127 3 PARMS 0.5 0.5 0.5 6.6794 6.6794 ; * 4 77002 1993 252 18.9841 15.9206 4 PARMS 0.5 0.5 0.5 13.9619 13.9619 ; * ; ******************** ; ******************** ; * now merge the numerical firm-period identifier into the original input file ; ************************ ; data &_infile; merge &_infile _llikeli; by &_firmidvar; run; data _tempbs; set &_infile; by &_firmidvar; trades=&_buyvar+&_selvar; LBFAC=lgamma(&_buyvar+1) ; LSFAC=lgamma(&_selvar+1) ; LTRADFAC=lgamma(trades+1) ; if trades>0 then LTRADFAC2=lgamma(trades-0.5) ; else LTRADFAC2 = log(3.54490770181103) ; run; %let hlogpi = 0.5 * log(3.1415926536) ; title 'PIG fit using B,S data giving MLE of alpha, delta, epsi, mu, invpsi ' ; run; proc nlp data=_tempbs inest = _mnbuysel outest = _pars maxiter=1000 maxfunc=1000 out=_llikeli (keep = &_firmidvar loglik ll &_buyvar &_selvar ) gtol = 1e-9 noprint tech = congra ; by &_firmidvar; max loglik; parms alpha delta epsi mu invpsi ; bounds alpha > 0 , alpha < 1 , delta > 0 , delta < 1 , epsi > 0 , mu > 0, invpsi > 0 ; psi = 1/invpsi; psi2=psi*psi ; psiepsi=sqrt(psi2+4*epsi) ; psimu=sqrt(psi2+4*epsi+2*mu) ; trades = &_buyvar + &_selvar; n= trades-0.5; z = psi * psiepsi; %lkstar_nz; lKstarnonews=lkstarnz; z =psi*psimu ; %lkstar_nz; lKstarnews=lkstarnz; znonews = psiepsi; znews = psimu; loglik = - &hlogpi + trades*log(epsi) - LBFAC - LSFAC + LTRADFAC2 + log(psi) + (trades-1)*log(2) + - 2*(n)*log(znonews) + psi2 - psi*znonews + lKstarnonews + log( (1 - alpha) + alpha*delta *exp(&_selvar*log(1+mu/epsi) - 2*(n)*log(znews/znonews) - psi*znews + psi*znonews + lKstarnews - lKstarnonews ) + alpha*(1-delta)*exp(&_buyvar*log(1+mu/epsi) - 2*(n)*log(znews/znonews) - psi*znews + psi*znonews + lKstarnews - lKstarnonews ) ) ; ll= exp(loglik); run; **************** * the next section adds other variables to the summary data ********************** ; proc means data = _llikeli noprint ; by &_firmidvar; var loglik; output out = __f1 min = minlik n= numdays; run; data __f2 (keep= &_firmidvar maxgrd terminat ) f3; set _pars ; by &_firmidvar; format maxgrd E9.3; if _type_ not in ("GRAD", "PARMS", "UPPERBD" , "LOWERBD" , "TERMINAT" ) then delete; if _iter_ eq 0 then delete; retain para pard pare parmu parps grada gradd grade gradmu gradps uba ubd ube ubmu ubps lba lbd lbe lbmu lbps terminat; if _type_ eq "TERMINAT" then terminat = _name_; if _type_ eq "PARMS" then do; para = alpha; pard = delta; pare = epsi; parmu = mu; parips = invpsi; end; else if _type_ eq "GRAD" then do; grada = alpha; gradd = delta; grade = epsi; gradmu = mu; gradips = invpsi; end; else if _type_ eq "UPPERBD" then do; uba = alpha; ubd = delta; ube = epsi; ubmu = mu; ubips = invpsi; end; else if _type_ eq "LOWERBD" then do; lba = alpha; lbd = delta; lbe = epsi; lbmu = mu; lbips = invpsi; end; if _type_ eq "LOWERBD" then do; * do maximums set max gradient etc; maxgrd = 0 ; if lba < para< uba then maxgrd = grada; if lbd < pard< ubd then maxgrd = max(maxgrd, gradd) ; if lbe < pare< ube then maxgrd = max(maxgrd, grade) ; if lbmu < parmu< ubmu then maxgrd = max(maxgrd, gradmu) ; if lbps < parps< ubps then maxgrd = max(maxgrd, gradps) ; output; end; %let _nextbit = 0 ; * set to 1 to print interim calculations of likelihoods ; %if _nextbit = 1 %then %do ; proc print u data = __f1; title "__f1" ; run; proc print u data = __f2; title "__f2" ; run; proc print u data = f3; title "f3" ; run; %end; data _pars; merge _pars __f1 (keep = &_firmidvar minlik numdays) __f2 (keep = &_firmidvar maxgrd terminat); by &_firmidvar; drop _TECH_ _TYPE_ _NAME_ _RHS_ _ITER_ ; if _type_ eq "PARMS" then output; run; data &_likedat; set _llikeli ; run; data &_parest; set _pars ; run; %mend pincalcvdj ; %macro pincalceko(_infile, _parest, _likedat, _buyvar, _selvar, _firmidvar ) ; **************************************** ; * _infile is the input data set ; * _parest is the output data set to contain the parameter estimates ; * _likedat is the output dataset that will contain the tlikelihood ; * estimates for nonews, goodnews, badnews etc ; * _buyvar is the name of the BUYS variable ; * _selvar is the name of the SELLS variable ; * _firmidvar is the name of the variable that identifies the ; * firm-period (typically PERMCO) ; * _method used is basic eko model ; * ; proc sort data = &_infile ; by &_firmidvar; run; proc means data = &_infile noprint; by &_firmidvar; var &_buyvar &_selvar; output out = _mnbuysel mean = _mbuys _msells; run; data _mnbuysel _llikeli (keep = &_firmidvar __id); set _mnbuysel(drop = _type_); by &_firmidvar; ****** * the variable__id is used to identify separate firm-periods * if we knew there would always be a single 'by' variable being used, it * would not be required. * However, explicitly setting this variable means that we can use string variables * as firm-period identifiers and/or multiple variables (e.g. gvkey yyyyq etc.) ********; ******************* ; * the next part of the program sets up initial parameter estimates to seed ; * the loglikelihood estimations. One set of initial parameters for every ; * firm period of data. ; ******************* ; __id = _N_; length _type_ $8; alpha = 0.5; delta = 0.5; epsi = (_mbuys+ _msells) * 0.4; mu = epsi ; _type_ = "PARMS" ; run; ******************** ; * _llikeli looks like this: ; * Obs PERMNO year __id ; * 1 64151 1993 1 ; * 2 75188 1993 2 ; ******************** ; ******************** ; * _mnbuysel looks like this: ; * ; * Obs PERMNO year _FREQ_ _mbuys _msells __id _type_ alpha delta invpsi epsi mu ; * ; * 1 64151 1993 252 11.8810 12.9960 1 PARMS 0.5 0.5 0.5 9.9508 9.9508 ; * 2 75188 1993 43 9.9302 11.8605 2 PARMS 0.5 0.5 0.5 8.7163 8.7163 ; * 3 75325 1993 252 8.2857 8.4127 3 PARMS 0.5 0.5 0.5 6.6794 6.6794 ; * 4 77002 1993 252 18.9841 15.9206 4 PARMS 0.5 0.5 0.5 13.9619 13.9619 ; * ; ******************** ; *************; * Copy data into new file called _tempbs for consistency with macro pincalcvdj ; *************; data &_infile ; merge &_infile _llikeli; by &_firmidvar; run; data _tempbs; set &_infile; by &_firmidvar; run; *--------------------------------------------------------------------; * This is the procedure used to maximize the log likelihood ; * function specified below. ; * ; * This particular factorization of the log-likelihood function ; * appears in the paper _Time-Varying Arrival Rates of Informed ; * and Uninformed Trades_ by David Easley, Robert F. Engle, ; * Maureen OHara, and Liuren Wu (paper presented at the 2002 ; * AFA meetings). ; * ; *--------------------------------------------------------------------; proc nlp data=_tempbs inest = _mnbuysel outest = _pars maxiter=1000 maxfunc=1000 out=_llikeli (keep = &_firmidvar loglik ll &_buyvar &_selvar ) noprint ; * where &c1 le __id lt &c2; by &_firmidvar; max loglik; * decvar alpha , delta , epsi , mu ; parms epsi = 100 , mu = 100, alpha = 0.5, delta = 0.5; M = min(&_buyvar,&_selvar) + max(&_buyvar,&_selvar) / 2.0 ; x = epsi / (mu + epsi) ; bounds 1e-6 < epsi mu, 0.0 <= alpha delta <= 1.0 ; loglik = -2 * epsi + M * log(x) + (&_buyvar + &_selvar) * log(mu + epsi) + log( alpha * (1 - delta) * exp(-1.0 * mu) * (x ** (&_selvar - M )) + alpha * delta * exp(-1.0 * mu) * (x ** (&_buyvar - M )) + (1 - alpha) * (x ** (&_buyvar + &_selvar - M )) ) - lgamma(&_buyvar+1) - lgamma(&_selvar+1); if loglik < &minlikelihood_allowed | loglik eq . then loglik = &minlikelihood_allowed ; ll= exp(loglik); run; proc means data = _llikeli noprint ; by &_firmidvar; var loglik; output out = __f1 min = minlik; run; data _pars; merge _pars __f1 (keep = &_firmidvar minlik ); by &_firmidvar; drop _TECH_ _TYPE_ _NAME_ _RHS_ _ITER_ ; if _type_ eq "PARMS" then output; run; data &_likedat; set _llikeli ; run; data &_parest; set _pars ; run; proc print u data = &_parest (obs = 5) ; run; %mend pincalceko ;