/* Tab11_prediction_specialty.prg */ // Usses the iter_num=1 data output file=D:\a_data\proj\size_fun\exports\revision\wrap\public\Tab11_prediction_specialty.out reset; "Case with speciality"; internalChina=1; print "internalChina=" internalChina; /* Loading data */ load dat[83721,13] = D:\a_data\proj\size_fun\exports\revision\wrap\public\ea_CM97.asc; load dat07[83721,13] = D:\a_data\proj\size_fun\exports\revision\wrap\public\ea_cbp07.asc; load loc_dat[177,5] = D:\a_data\proj\size_fun\exports\revision\wrap\public\ea_pop_long_lat97.asc; load loc_dat07[177,4] = D:\a_data\proj\size_fun\exports\revision\wrap\public\ea_pop07.asc; load a_param_mat[466,6] = D:\a_data\proj\size_fun\exports\revision\wrap\public\stage1_estimatesNAICS97_for_gauss.asc; //file 'd:\a_data\proj\size_fun\exports\revision\prg\model_final\parameters_r.asc'; //put naicsindex ',' eta1 ',' eta2 ',' LogLike ',' eta_lnln ',' LogLike_lnln ','; load ea_dist_within_tract[177,5]=D:\a_data\proj\size_fun\exports\revision\wrap\public\ea_dist_within_tract.asc; // Pick a distance measure 2-5: (1) ea_index ',' (2) dist_wgtavg ',' (3) lndist_wgtavg ',' (4) dist_wgtavg_delete ',' (5) lndist_wgtavg_delete ','; // the _delete throughs out Census tracts, but let's ignore this and just uas either (2) or (3) eta_semi=a_param_mat[., 2 3]; eta_lnln=a_param_mat[., 5]; LogLike_semi=a_param_mat[., 4]; LogLike_lnln=a_param_mat[., 6]; use_lnln=LogLike_semi.>LogLike_lnln; //Use the best fit, which is smallest in absolute value. "Use the best fit of semi and lnln"; "Total industries used is" rows(use_lnln); "Number for which lnln is the best fit is" sumc(use_lnln); naicsindex_select_vec=a_param_mat[.,1]; population=loc_dat[.,3]; pop07=loc_dat07[.,3]; long=loc_dat[.,4]; lat=loc_dat[.,5]; x = population./sumc(population); x07 = pop07./sumc(pop07); popgrowfactor=299331865/265941883; //If instead hold population fixed then set equal to one; L = rows(x); t1=hsec; numsim=10000; // Now bring in new things for the mean reversion exercise; load naicslist[465,6]=D:\a_data\proj\size_fun\exports\revision\wrap\public\naicslist_iter10.asc; // naicsindex ',' naics ',' nuN ',' nuT ',' newChina_shr07 ',' newChinacat0 ','; load gamorig_and_fit[82305,11]=D:\a_data\proj\size_fun\exports\revision\wrap\public\gamorig_and_fit_iter10.asc; // put naicsindex ',' ea_index ',' gam97_orig ',' gam07_orig ',' gam97_new ',' gam07_new ',' gam97fit ',' cat97 ',' cat07 ',' gamC97_orig ',' gamC97_new ','; load pmat[11,11]=D:\a_data\proj\size_fun\exports\revision\wrap\public\pmat6_iter10.asc; //put prob1 ',' prob2 ',' prob3 ',' prob4 ',' prob5 ',' prob6 ',' prob7 ',' prob8 ',' prob9 ',' prob10 ',' ; load mean_lngam[10,1]=D:\a_data\proj\size_fun\exports\revision\wrap\public\mean_lngam97_iter1.asc; gamappox_vec=0|exp(mean_lngam); // this vector is the approximate value of gam, given state. // test, suppose no change // convert to pmatcum numgrid=rows(pmat); pmatcum=zeros(numgrid,numgrid);pmatcum[.,1]=pmat[.,1]; i=2; do while i<=numgrid; pmatcum[.,i]=pmatcum[.,i-1]+pmat[.,i]; i=i+1; endo; /* Construct distance matrix */ dist_mat = zeros(L,L); for i (1, L, 1); lat0=lat[i]; long0=long[i]; hold=sin(lat./57.2958).*sin(lat0/57.2958) + cos(lat./57.2958).*cos(lat0/57.2958).*cos(long0/57.2958 - long./57.2958); distvec= 3958.75.*arccos(hold); dist_mat[i,.]=distvec'; endfor; //******edit 2 load in ea_dist_within dist_mat = diagrv(dist_mat,ea_dist_within_tract[.,2]); lndist_mat=ln(dist_mat); lndist_mat = diagrv(lndist_mat,ea_dist_within_tract[.,3]); // below is a matrix where we put the output.. results_means=zeros(6,7); results_medians=zeros(6,7); LQdata_means=zeros(6,2); LQdata_medians=zeros(6,2); chinanewcat=1; do while chinanewcat<=6; "**********************************"; "Chinanewcat=" chinanewcat; //pick naics to cycle through for this job naicslist_this_job=selif(naicslist,naicslist[.,6].==chinanewcat); /* Now loop through the different NAICS */ indnew=1; do while indnew<=rows(naicslist_this_job); naicsindex=naicslist_this_job[indnew,1]; // use the new index to get started naics=naicslist_this_job[indnew,2]; nuN=naicslist_this_job[indnew,3]; nuT=naicslist_this_job[indnew,4]; ind=selif(seqa(1,1,466),naicsindex_select_vec.==naicsindex); //this is the index we used before, something between 1 and 466 sal_dat=selif(dat[.,6],dat[.,3].==naicsindex); est97_dat=selif(dat[.,10],dat[.,3].==naicsindex); est07_dat=selif(dat07[.,10],dat[.,3].==naicsindex); S = sal_dat./sumc(sal_dat); LQ97dat=(est97_dat./sumc(est97_dat))./x; LQ07dat=(est07_dat./sumc(est07_dat))./x; // Note use 1997 population in the denominator /* now set a parameters */ /* global parameters */ if use_lnln[ind]==0; alowerbar=0; eta=eta_semi[ind,.]'; xzi=0; /* shut down missclassification */ A = exp(-(.001*eta[1]).*dist_mat - (.000001*eta[2]).*dist_mat.^2); A = A.*( A.=ones(L,L)); /* now cap at 1. */ else; A = exp(-eta_lnln[ind,1].*lndist_mat); endif; if internalChina==1; A_China=A; endif; //This sets transportation costs for China imports; // not read in the various gam vectors gamdat=selif(gamorig_and_fit[.,3:11],gamorig_and_fit[.,1].==naicsindex); gam97_orig=gamdat[.,1]; gam07_orig=gamdat[.,2]; gam97_new=gamdat[.,3]; gam07_new=gamdat[.,4]; gam97fit=gamdat[.,5]; gam_cat97=gamdat[.,6]; gam_cat07=gamdat[.,7]; gamC97_orig=gamdat[.,8]; gamC97_new=gamdat[.,9]; proc (3) = LQest_calc(gam,A); local hold, LQest, pi_mat, est, denom; hold=gam[.,ones(L,1)]'; hold=A.*hold; denom=sumc(hold'); denom=denom[.,ones(L,1)]; pi_mat=hold./denom; if (nuT>0) and (nuN>0); est = nuN.*x + nuT.*diag(pi_mat); elseif (nuT<=0) and (nuN>0); est=nuN.*x; elseif (nuT>0) and (nuN<0); est=nuT.*diag(pi_mat); endif; LQest=(est./sumc(est))./x; retp(LQest,pi_mat,est); endp; proc (4) = LQest_calc_China(gam,gamC,A); local hold, LQest_China, est_China, denom, denomC, denom_combine,pi_mat_china, LQest_China_newpop, est_China_newpop; //first do domestic hold=gam[.,ones(L,1)]'; hold=A.*hold; denom=sumc(hold'); denom=denom[.,ones(L,1)]; // now China hold=gamC[.,ones(L,1)]'; hold=A_China.*hold; denomC=sumc(hold'); denomC=denomC[.,ones(L,1)]; denom_combine=denom+denomC; //now calculate probability domestic location, numerator is same as before. hold=gam[.,ones(L,1)]'; hold=A.*hold; pi_mat_china=hold./denom_combine; // this is the probability of buying from a domestic locaion ea, condition on the China experiment (i.e. not probability of buying from China. // first case: keep popolation fixed if (nuT>0) and (nuN>0); est_China = nuN.*x + nuT.*diag(pi_mat_china); elseif (nuT<=0) and (nuN>0); est_China=nuN.*x; elseif (nuT>0) and (nuN<0); est_China=nuT.*diag(pi_mat_china); endif; LQest_China=(est_China./sumc(est_China))./x; // Next calculate measure that uses the new population. if (nuT>0) and (nuN>0); est_China_newpop = nuN.*x07.*popgrowfactor + nuT.*diag(pi_mat_china); elseif (nuT<=0) and (nuN>0); est_China_newpop=nuN.*x07.*popgrowfactor; elseif (nuT>0) and (nuN<0); est_China_newpop=nuT.*diag(pi_mat_china); endif; LQest_China_newpop=(est_China_newpop./sumc(est_China_newpop))./x; retp(LQest_China,pi_mat_China,est_China,LQest_China_newpop); endp; {LQest_base2,pi_mat,est} = LQest_calc(gam97_new,A); {LQest_China2,pi_mat_China1,est_China1,LQestChina_newpop1} = LQest_calc_China(gam97_new,gamC97_new,A); {LQest_base2_aprox,pi_mat_aprox,est_aprox} = LQest_calc(gam97fit,A); {LQest_China2_aprox,pi_mat_China_aprox,est_China_aprox,LQestChina1_newpop_aprox} = LQest_calc_China(gam97fit,gamC97_new,A); LQsal97=s./x; // output vec is naicsindex, ea_index, LQest_base2, LQest_China2, LQest_base2_aprox, LQest_China2_aprox, s, x, LQsal97, LQ..China_newpop, ...newpop_approx // chinanewcat naics output_ind=(naicsindex.*ones(L,1))~seqa(1,1,L)~LQest_base2~LQest_China2~LQest_base2_aprox~LQest_China2_aprox~s~x~LQsal97~LQestChina_newpop1 ~LQestChina1_newpop_aprox~(chinanewcat.*ones(L,1))~(naics.*ones(L,1)); output_ind=real(output_ind); dataLQ_ind=LQ97dat~LQ07dat; //Part 2. Simulate the reversion to the mean. proc (1)=gam_simulate(gam_cat,pmatcum); local simdraw,hold,hold2,newstate,gam_sim; simdraw=rndu(L,1); hold=gam_cat+1; //this goes from 0 to 10, convert to 1 to 11 hold2=pmatcum[hold,.]; newstate=sumc((hold2.<=(simdraw.*ones(L,numgrid)))')+1; gam_sim=gamappox_vec[newstate]; gam_sim=gam_sim./sumc(gam_sim); //renormalize to sum to one. retp(gam_sim); endp; simhold=zeros(L,numsim); simholdChina=zeros(L,numsim); simholdChina_newpop=zeros(L,numsim); simindex=1; do while simindex<=numsim; gam_sim=gam_simulate(gam_cat97,pmatcum); {LQest_base2sim,pi_matsim,estsim} = LQest_calc(gam_sim,A); simhold[.,simindex]=LQest_base2sim; {LQest_China2sim,pi_mat_China1sim,est_China1sim,LQest_China2sim_newpop} = LQest_calc_China(gam_sim,gamC97_new,A); simholdChina[.,simindex]=LQest_China2sim; simholdChina_newpop[.,simindex]=LQest_China2sim_newpop; simindex=simindex+1; endo; if indnew==1; output_all=real(output_ind); output_sim=real(simhold); output_simChina=real(simholdChina); output_sim_China_newpop=real(simholdChina_newpop); output_dataLQ=dataLQ_ind; else; output_all=output_all|real(output_ind); output_sim=output_sim|real(simhold); output_simChina=output_simChina|real(simholdChina); output_sim_China_newpop=output_sim_China_newpop|real(simholdChina_newpop); output_dataLQ=output_dataLQ|dataLQ_ind; endif; indnew=indnew+1; endo; dataLQ=selif(output_dataLQ,(output_all[.,7].>=(.05)).and(output_all[.,9].>=2)); meanLQdata=meanc(dataLQ)'; medianLQdata=median(dataLQ)'; LQdata_means[chinanewcat,.]=meanLQdata; LQdata_medians[chinanewcat,.]=medianLQdata; output_select=selif(output_all,(output_all[.,7].>=(.05)).and(output_all[.,9].>=2)); //conditiong on given s>=.05 and LQsal97>=2" meanLQ=meanc(output_select[.,3 4 5 6])'; medianLQ=median(output_select[.,3 4 5 6])'; // above selecting LQest_China2~LQest_base2_aprox~LQest_China2_aprox results_means[chinanewcat, 1 2 3 4]=meanLQ; results_medians[chinanewcat, 1 2 3 4]=medianLQ; output_sim_select=selif(output_sim,(output_all[.,7].>=(.05)).and(output_all[.,9].>=2)); output_simChina_select=selif(output_simChina,(output_all[.,7].>=(.05)).and(output_all[.,9].>=2)); output_simChina_newpop_select=selif(output_sim_China_newpop,(output_all[.,7].>=(.05)).and(output_all[.,9].>=2)); meanLQ_regress_to_mean=meanc(meanc(output_sim_select)); hold=meanc(output_sim_select'); medianLQ_regress_to_mean=median(hold); meanLQ_regress_to_meanChina=meanc(meanc(output_simChina_select)); hold=meanc(output_simChina_select'); medianLQ_regress_to_meanChina=median(hold); meanLQ_regress_China_newpop=meanc(meanc(output_simChina_newpop_select)); hold=meanc(output_simChina_newpop_select'); medianLQ_regress_China_newpop=median(hold); results_means[chinanewcat, 5]=meanLQ_regress_to_mean; results_medians[chinanewcat, 5]=medianLQ_regress_to_mean; results_means[chinanewcat, 6]=meanLQ_regress_to_meanChina; results_medians[chinanewcat, 6]=medianLQ_regress_to_meanChina; results_means[chinanewcat, 7]=meanLQ_regress_China_newpop; results_medians[chinanewcat, 7]=medianLQ_regress_China_newpop; //Now aggregate to take back to SAS for regressions if chinanewcat==1; output_all_2=output_all; output_sim_2=meanc(output_sim'); output_simChina_2=meanc(output_simChina'); output_sim_China_newpop_2=meanc(output_sim_China_newpop'); output_dataLQ_2=output_dataLQ; else; output_all_2=output_all_2|output_all; output_sim_2=output_sim_2|meanc(output_sim'); output_simChina_2=output_simChina_2|meanc(output_simChina'); output_sim_China_newpop_2=output_sim_China_newpop_2|meanc(output_sim_China_newpop'); output_dataLQ_2=output_dataLQ_2|output_dataLQ; endif; chinanewcat=chinanewcat+1; endo; "means" "base china base(approx) China(approx) RTMonly RTM+China RTM+CHINA+newpop"; format /rd 7,2; results_means; " "; "medians" "base china base(approx) China(approx) RTMonly RTM+China RTM+CHINA+NEWpop"; format /rd 7,2; results_medians; " "; mean_ratios=results_means[.,4 5 6 7]./results_means[.,3]; median_ratios=results_medians[.,4 5 6 7]./results_medians[.,3]; "ratio of means"; "Chinaonly RTMonly China+RTM China+RTM+newpop"; mean_ratios; ""; "ratio of medians"; "Chinaonly RTMonly China+RTM China+RTM+newpop"; median_ratios; "data" "means" LQdata_means; " "; "medians" LQdata_medians; "LQ97, LQ07/LQ97"; "means"; LQdata_means[.,1]~(LQdata_means[.,2]./LQdata_means[.,1]); " "; "LQ97, LQ07/LQ97"; "medians"; LQdata_medians[.,1]~(LQdata_medians[.,2]./LQdata_medians[.,1]); " "; " "; print "numsim=" numsim; print ((hsec-t1)/100)/60 "minutes"; outwidth 180; format /rd 20,10; screen off; output file=D:\a_data\proj\size_fun\exports\revision\wrap\public\z_output_all_2.asc reset; output_all_2; output off; output file=D:\a_data\proj\size_fun\exports\revision\wrap\public\z_output_sim_2.asc reset; output_sim_2; output off; output file=D:\a_data\proj\size_fun\exports\revision\wrap\public\z_output_simChina_2.asc reset; output_simChina_2; output off; output file=D:\a_data\proj\size_fun\exports\revision\wrap\public\output_sim_China_newpop_2.asc reset; output_sim_China_newpop_2; output off; output file=D:\a_data\proj\size_fun\exports\revision\wrap\public\output_dataLQ_2.asc reset; output_dataLQ_2; output off; screen on;