/* Table_5_7_process_model_results.sas Revised May 29, 2012 run gauss program base97.prg first. this program processes the output of the gauss program which simulates the model, both for 1997 and 2007 (with the China_surge experiment) Note: T segment corresponds to "Primary" segment in paper (T for "tradable") N segment corresponds to "Speciality" segment in paper (N for nontradable") Note model 1 is pure BEJK model model 2 is general model with speciality segment. Hence in the notation, For example: est_fitted_base1 fitted values of establishment counts at location, under pure BEJK model (model1) est_fitted_base2 fitted values of establishment counts at location, under general model (model2) est_fitted_china1 predicted with China, using model 1 est_fitted_china2 predicted with China, using model 2 and original pop est_fitted_China2_newpop with China and model 2, using 2007 population */ libname public 'd:\a_data\proj\size_fun\exports\revision\wrap\public'; run; ods html file="d:\a_data\proj\size_fun\exports\revision\wrap\public\Table_6_7_process_model_results.html" style=minimal; * %%%%%%% Step 1 set up naics_level and loc_level data %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%; *Note S_TNboth is the sales share, with T and N section together; * S is just the T sector shares; filename nu_iter 'd:\a_data\proj\size_fun\exports\revision\wrap\public\base97_nu_bsize.asc'; filename n_lev 'd:\a_data\proj\size_fun\exports\revision\wrap\public\base97_naics_level.asc'; filename l_lev 'd:\a_data\proj\size_fun\exports\revision\wrap\public\base97_loc_level.asc'; *Note beta1 is constant, won't use it here, and in any case rerun regression below, so get a second crack at bringing it in; DATA nu_iter; INFILE nu_iter; input naicsindex naics iter_num boutiquesize beta1 beta2 beta3; nuN=beta2; nuT=beta3; data nu_iter; set nu_iter; drop naics; *not character, so merge it in; *naics_level has two observations for each industry, one for iter=1, ther other for iter=10; DATA naics_level; INFILE n_lev; input naicsindex naics iter_num sal_growth_chinaUS est_modelUS est_chinaUS alowerbar eta1 eta2 gam1-gam177; data naics_level; set naics_level; drop naics; *not character, so merge it in; data naicstext; set public.mandat_naics2; naicsindex=_n_; keep naicsindex naicstext naics; data nu_iter; merge nu_iter(in=in1) naicstext; by naicsindex; if in1; data naics_level; merge naics_level(in=in1) naicstext; by naicsindex; if in1; DATA loc_level; INFILE l_lev; input naicsindex iter_num ea_index sal_dat salT salN est_model estNhat S_TNboth S S_China est_China; S_justT=S; S=S_TNboth; *For iter1 S_justT and S_TNboth are the same, now even if have S_justT0 and nuN>0 then do; est_speciality=pop_share*nuN; est_primary=est_model*nuT; est_total=est_speciality+est_primary; est_fitted_base2=est_model*nuT + pop_share*nuN; *Just use explained, leave out constant. OK, because sum up and use totals; est_fitted_china2=est_china*nuT + (pop97/pop97_US)*nuN; est_fitted_china2_newpop=est_china*nuT + (pop2007_US/pop97_US)*(pop2007/pop2007_US)*nuN; end; if nuT<=0 and nuN>0 then do; *set nuT to zero if negative, 0 cases; est_speciality=pop_share*nuN; est_primary=0; est_total=est_speciality+est_primary; est_fitted_base2=pop_share*nuN; est_fitted_china2=(pop97/pop97_US)*nuN; est_fitted_china2_newpop=(pop2007_US/pop97_US)*(pop2007/pop2007_US)*nuN; end; if nuT>0 and nuN<=0 then do; *set nuN to zero if negative, 6 cases; est_speciality=0; est_primary=est_model*nuT; est_total=est_speciality+est_primary; est_fitted_base2=est_fitted_base1; est_fitted_china2=est_fitted_china1; est_fitted_china2_newpop=est_fitted_china1; end; *note above start with popsum07 analog, then reseacle by the population growth; *Note simpop07 is set at the start of the program, depending on the samepop option (which must be set manualy; proc means data=fitted noprint; by naics iter_num; var est_fitted_base1 est_fitted_base2 est_fitted_china1 est_fitted_china2 est_fitted_china2_newpop s_china est_speciality est_primary est_total; output out=tsum sum=est_fitted_baseUS1 est_fitted_baseUS2 est_fitted_chinaUS1 est_fitted_chinaUS2 est_fitted_china_newpopUS2 s_china_US est_specialityUS est_primaryUS est_totalUS; data tsum; set tsum; drop _freq_ _type_; data est_totalUS; set tsum; keep naics iter_num est_specialityUS est_primaryUS est_totalUS; data fitted; merge fitted tsum; by naics iter_num; if est_fitted_base1>0 then LQsize1=(s/est_fitted_base1)/(1/est_fitted_baseUS1); if est_fitted_base2>0 then LQsize2=(s/est_fitted_base2)/(1/est_fitted_baseUS2); if est_LM>0 then LQsize97=(s/est_LM)/(1/estUS_LM); s_est_base1=est_fitted_base1/est_fitted_baseUS1; s_est_base2=est_fitted_base2/est_fitted_baseUS2; s_est_China1=est_fitted_China1/est_fitted_ChinaUS1; s_est_China2=est_fitted_China2/est_fitted_ChinaUS2; s_est_China2_newpop=est_fitted_China2_newpop/est_fitted_China_newpopUS2; LQest_base1=s_est_base1/(pop97/pop97_US); LQest_base2=s_est_base2/(pop97/pop97_US); LQest_China1=s_est_China1/(pop97/pop97_US); LQest_China2=s_est_China2/(pop97/pop97_US); LQest_China2_newpop=s_est_China2_newpop/(pop97/pop97_US); s_sal_china1=s_china/s_china_US; LQsal_china1=s_sal_china1/(pop97/pop97_US); s_est97= est_LM/estUS_LM; LQest97=(est_LM/estUS_LM)/(pop97/pop97_US); LQsal97=s/(pop97/pop97_US); difLQ1=LQest_china1-LQest_base1; difLQ2=LQest_china2-LQest_base2; format est_fitted_base1 est_fitted_base2 est_fitted_baseUS1 est_fitted_baseUS2 7.1; format LQsize1 LQsize2 LQest97 LQsal97 LQsal_china1 7.3; *Now merge in 2007 information; data info07; set public.ea_cbp07; s07=snorm_cbp/snormUS_cbp; s_est07=est_cbp/estUS_cbp; keep naics ea estUS_cbp emphatUS_cbp s07 s_est07 est_CBP; proc sort data=info07; by naics ea; proc sort data=fitted; by naics ea; data fitted; merge fitted(in=in1) info07; by naics ea; if in1; LQest07=s_est07/(pop97/pop97_US); LQsal07=s07/(pop97/pop97_US); difLQest_data=LQest07-LQest97; g_est=100*(estUS_cbp-estUS_LM)/estUS_LM; g_emp=100*(emphatUS_cbp-emphatUS_LM)/emphatUS_LM; data t; set fitted; if reclass_dum=1; proc sort data=t; by iter_num naics descending LQsal97; run; data t; set t; by iter_num naics ; retain rank; if first.naics then rank=0; rank=rank+1; *Now print out industry level information; data k2; merge k2 est_totalUS; by naics iter_num; countNshr=100*est_specialityUS/est_totalUS; /* Construct Second Stage Estimates of the Plant Count Parameters and Related Model and Data Statistics */ proc print data=k2; where reclass_dum=1 and iter_num=10; var iter_num naics naicstext intercept se_intercept nuN se_nuN nuT se_nuT _RSQ_ countNshr ; format intercept se_intercept nuN se_nuN nuT se_nuT 7.1 salN_share 7.2; title 'iter=10, regression results '; proc sort data=k2; by iter_num reclass_dum; proc tabulate data=k2; where iter_num=10; class diffuse reclass_dum iter_num; var intercept se_intercept nuN se_nuN nuT se_nuT _RSQ_ countNshr est13_shr ; table iter_num, all reclass_dum, N*f=7.0 mean*(intercept se_intercept nuN se_nuN nuT se_nuT)*f=7.1 mean*(_RSQ_ countNshr est13_shr )*f=7.2; title 'proc tab'; proc tabulate data=k2; where iter_num=10; class diffuse iter_num; var intercept se_intercept nuN se_nuN nuT se_nuT _RSQ_ countNshr est13_shr ; table iter_num*(all diffuse),N mean min p10 p25 median p75 p90 max, (intercept se_intercept nuN se_nuN nuT se_nuT)*f=7.1 (_RSQ_ countNshr est13_shr )*f=7.2; title 'proc tab'; /* Construct Table Estimated Specialty Count Share By Quartiles of all Industries */ proc sort data=k2; by iter_num reclass_dum; data k3; set k2; if iter_num=10; if countNshr< 54.0164 then quartile=1; else if countNshr<68.0395 then quartile=2; else if countNshr<78.9763 then quartile=3; else quartile=4; proc tabulate data=k3; class quartile; var countNshr est13_shr; table quartile, N*f=comma12.0 (min max mean)*countNshr*f=7.1 mean*est13_shr*f=7.1; title 'break down into quartile by estimates share of speciality'; proc reg data=k3; model countNshr=est13_shr; title 'regression of countNshr onr est13_shr, unweighted'; proc print data=k2; where iter_num=10 and nuN<=0; var iter_num naics naicstext intercept se_intercept nuN se_nuN nuT se_nuT _RSQ_ countNshr est13_shr ; format intercept se_intercept nuN se_nuN nuT se_nuT 7.1 salN_share 7.2; title 'iter=10, regression results, where nuN<=0 '; /* Construct Table: Sales, Count and Size Quotients in Data, Size Quotients for Both Models In High Concentration Industry Locations */ proc print data=t; by iter_num; where s>=.05 and LQsal97>=2 and iter_num=1; var naicstext eatext s LQsal97 LQest97 LQsize97 LQsize1 ; format s 7.2 LQsal97 LQest97 LQsize97 LQsize97 LQsize1 7.1; title 'proc print reclass_dum=1 industries, s>=.05, LQsal97>=2, use iter=1 for primary only model'; proc print data=t; by iter_num; where s>=.05 and LQsal97>=2 and iter_num=10; var naicstext eatext LQsize2 ; format s 7.2 LQsize2 7.1; title 'proc print reclass_dum=1 industries, s>=.05, LQsal97>=2, use iter=10 for general model with speciality segment'; /* Now add summary statiatics */ proc sort data=fitted; by naics; data fitted; merge fitted(in=in1) diffuse; by naics; if in1; proc tabulate data=fitted; where s>=.05 and LQsal97>=2 and iter_num=1; class iter_num reclass_dum diffuse; var s LQsal97 LQest97 LQsize97 LQsize1 ; table iter_num, (all reclass_dum diffuse), N*f=7.0 mean*(s*f=7.2 (LQsal97 LQest97 LQsize97 LQsize1 )*f=7.1); table iter_num, (all reclass_dum diffuse), N*f=7.0 median*(s*f=7.2 (LQsal97 LQest97 LQsize97 LQsize1 )*f=7.1); title 'proc tabulate s>=.05, LQsal97>=2, use iter_num=1 for constrained model with primary only'; proc tabulate data=fitted; where s>=.05 and LQsal97>=2 and iter_num=10; class iter_num reclass_dum diffuse; var LQsize2 ; table iter_num, (all reclass_dum diffuse), N*f=7.0 mean*((LQsize2 )*f=7.1); table iter_num, (all reclass_dum diffuse), N*f=7.0 median*( (LQsize2 )*f=7.1); title 'proc tabulate s>=.05, LQsal97>=2, use iter_num=10 for general model with speciality segment'; run; ods html close; run;