/* setup_mean_reversion_iter10.sas is just like setup_mean_reversion_iter1.sas, except use iter_num=10, consistent with way do things for full model */ libname public 'd:\a_data\proj\size_fun\exports\revision\wrap\public';; run; ods html file="d:\a_data\proj\size_fun\exports\revision\wrap\public\setup_mean_reversion_iter10.html" style=minimal; *First do 1997; filename nu_iter 'd:\a_data\proj\size_fun\exports\revision\wrap\public\base97_nu_bsize.asc'; filename n_lev 'd:\a_data\proj\size_fun\exports\revision\wrap\public\base97_naics_level.asc'; filename n_levC 'd:\a_data\proj\size_fun\exports\revision\wrap\public\base97_naics_levelC.asc'; *Note beta1 is constant, won't use it here, and in any case rerun regression below, so get a second crack at bringing it in; DATA nu_iter97; INFILE nu_iter; input naicsindex naics iter_num boutiquesize beta1 beta2 beta3; nuN=beta2; nuT=beta3; if iter_num=10; data nu_iter97; set nu_iter97; drop naics; *not character, so merge it in; *naics_level has two observations for each industry, one for iter=1, ther other for iter=10; DATA naics_level97; INFILE n_lev; input naicsindex naics iter_num sal_growth_chinaUS est_modelUS est_chinaUS alowerbar eta1 eta2 gam97_orig1-gam97_orig177; data naics_level97; set naics_level97; drop naics; *not character, so merge it in; if iter_num=10; DATA naics_levelC97; INFILE n_levC; input naicsindex naics iter_num gamC97_orig1-gamC97_orig177; data naics_levelC97; set naics_levelC97; drop naics; *not character, so merge it in; if iter_num=10; *Next do 2007; filename nu_iter 'd:\a_data\proj\size_fun\exports\revision\wrap\public\base07_nu_bsize.asc'; filename n_lev 'd:\a_data\proj\size_fun\exports\revision\wrap\public\base07_naics_level.asc'; *Note beta1 is constant, won't use it here, and in any case rerun regression below, so get a second crack at bringing it in; DATA nu_iter07; INFILE nu_iter; input naicsindex naics iter_num boutiquesize nuN nuT beta1 beta2 beta3; if iter_num=10; data nu_iter07; set nu_iter07; drop naics; *not character, so merge it in; run; *naics_level has two observations for each industry, one for iter=1, ther other for iter=10; DATA naics_level07; INFILE n_lev; input naicsindex naics iter_num est_modelUS alowerbar eta1 eta2 gam07_orig1-gam07_orig177; *2007 naics_level=naicsindex~naics~bni~sumc(est)~alowerbar~(eta')~(gam'); *1997 naics_level=naicsindex~naics~bni~sal_growth_chinaUS~sumc(est)~sumc(est_china)~alowerbar~(eta')~(gam');; data naics_level07; set naics_level07; drop naics; *not character, so merge it in; if iter_num=10; data naicstext; set public.mandat_naics2; naicsindex=_n_; keep naicsindex naicstext naics; data naics_level97; merge naics_level97(in=in1) naicstext; by naicsindex; if in1; if naics='339111' then delete; *No valid observations for 2007; data naics_level07; merge naics_level07(in=in1) naicstext; by naicsindex; if in1; if naics='339111' then delete; *No valid observations for 2007; run; data tradedat; set public.tradedat_forgauss; if newChina_shr07>=.50 then newChinacat0=1; else if newChina_shr07>=.25 then newChinacat0=2; else if newChina_shr07>=.10 then newChinacat0=3; else if newChina_shr07>=.05 then newChinacat0=4; else if newChina_shr07>0 then newChinacat0=5; else if newChina_shr07=0 then newChinacat0=6; keep naics newChina_shr07 newChinacat0; *Note, newChinacat0 is a numeric version of the categorical variable; data t97; merge naics_level97(in=in1) tradedat(in=in2); by naics; if in1 and in2; array gam97_orig_{177} gam97_orig1-gam97_orig177; do ea_index=1 to 177; gam97_orig=gam97_orig_{ea_index}; output; end; keep naics naicsindex naicstext ea_index gam97_orig newChina_shr07 newChinacat0; data tC97; set naics_levelC97; array gamC97_orig_{177} gamC97_orig1-gamC97_orig177; do ea_index=1 to 177; gamC97_orig=gamC97_orig_{ea_index}; output; end; keep naicsindex ea_index gamC97_orig; run; data t07; merge naics_level07(in=in1) tradedat(in=in2); by naics; if in1 and in2; array gam07_orig_{177} gam07_orig1-gam07_orig177; do ea_index=1 to 177; gam07_orig=gam07_orig_{ea_index}; output; end; keep naics ea_index gam07_orig; data emp97; set public.ea_cm97; emp97=emphat_LM; keep naics ea_index emp97; data pop97; set public.ea_cm97; if naicsindex=1; keep ea_index pop97 pop97_US; data pop2007; set public.ea_cbp07; if naicsindex=1; keep ea_index pop2007 pop2007_US; data t; merge t97(in=in1) t07 emp97; by naics ea_index; if in1; data t; merge t(in=in1) tC97; by naicsindex ea_index; if in1; proc sort data=t; by ea_index; data t; merge t pop97 pop2007; by ea_index; popshr97=pop97/pop97_US; popshr07=pop2007/pop2007_US; wgt_emp=emp97+1; *set equal to +1 because locations with gam=0 have emp=0; proc sort data=t; by naics ea_index; proc means data=t noprint; by naics; var gam97_orig gam07_orig; output out=tsum sum=gam97_orig_US gam07_orig_US; data tsum; set tsum; drop _freq_ _type_; data t; merge t tsum; by naics; gam97_new=gam97_orig/gam97_orig_US; gamC97_new=gamC97_orig/gam97_orig_US; *rescale the same way; gam07_new=gam07_orig/gam07_orig_US; cat0_97=gam97_new=0; cat0_07=gam07_new=0; if gam97_new>0 then lngam97=log(gam97_new); if gam07_new>0 then lngam07=log(gam07_new); *us cutoffs of entire population of industries; if gam97_new=0 then cat97=0; else if lngam97< -10.9122151 then cat97=1; *1%; else if lngam97< -9.1504076 then cat97=2; *10%; else if lngam97< -7.7996085 then cat97=3; *25%; else if lngam97< -6.0779618 then cat97=4; *50%; else if lngam97< -4.6248727 then cat97=5; *75%; else if lngam97< -3.5306734 then cat97=6; *90%; else if lngam97< -2.9448961 then cat97=7; *95%; else if lngam97< -1.9757683 then cat97=8; *99%; else if lngam97< -1.5 then cat97=9; *?%; else cat97=10; if gam07_new=0 then cat07=0; else if lngam07< -10.9122151 then cat07=1; *1%; else if lngam07< -9.1504076 then cat07=2; *10%; else if lngam07< -7.7996085 then cat07=3; *25%; else if lngam07< -6.0779618 then cat07=4; *50%; else if lngam07< -4.6248727 then cat07=5; *75%; else if lngam07< -3.5306734 then cat07=6; *90%; else if lngam07< -2.9448961 then cat07=7; *95%; else if lngam07< -1.9757683 then cat07=8; *99%; else if lngam07< -1.5 then cat07=9; *?%; else cat07=10; dum07var0= cat07=0; dum07var1= cat07=1; dum07var2= cat07=2; dum07var3= cat07=3; dum07var4= cat07=4; dum07var5= cat07=5; dum07var6= cat07=6; dum07var7= cat07=7; dum07var8= cat07=8; dum07var9= cat07=9; dum07var10= cat07=10; proc freq data=t; tables cat97*cat07; title 'proc freq'; proc univariate data=t; var lngam97; title 'proc univariate'; proc tabulate data=t; where newChinacat0=6; weight wgt_emp; class cat97; var dum07var0 dum07var1 dum07var2 dum07var3 dum07var4 dum07var5 dum07var6 dum07var7 dum07var8 dum07var9 dum07var10; table all cat97, mean*(dum07var0 dum07var1 dum07var2 dum07var3 dum07var4 dum07var5 dum07var6 dum07var7 dum07var8 dum07var9 dum07var10)*f=7.4; table all cat97, N*(dum07var0 dum07var1 dum07var2 dum07var3 dum07var4 dum07var5 dum07var6 dum07var7 dum07var8 dum07var9 dum07var10)*f=7.0; title 'transition matrix newChinacat0=6, weighted by emp97+1'; proc sort data=t; by cat97; proc means data=t noprint; where newChinacat0=6; weight wgt_emp; by cat97; var dum07var0-dum07var10; output out=pmat6 mean=prob0-prob10; data pmat6; set pmat6; drop _freq_ _type_; *This is the probability transition matrix, given cat6; proc means data=t noprint; where cat97^=0; by cat97; var lngam97; output out=mean_lngam97 mean=mean_lngam97; data mean_lngam97; set mean_lngam97; drop _freq_ _type_; data t; merge t mean_lngam97; by cat97; if cat97=0 then gam97fit=0; else gam97fit=exp(mean_lngam97); proc corr data=t; var gam97_new gam97fit; title 'proc corr'; proc sort data=t; by naics ea_index; data _null_; set t; file 'd:\a_data\proj\size_fun\exports\revision\wrap\public\gamorig_and_fit_iter10.asc'; put naicsindex ',' ea_index ',' gam97_orig ',' gam07_orig ',' gam97_new ',' gam07_new ',' gam97fit ',' cat97 ',' cat07 ',' gamC97_orig ',' gamC97_new ','; *82305*11 records; data _null_; set pmat6; file 'd:\a_data\proj\size_fun\exports\revision\wrap\public\pmat6_iter10.asc'; put prob0 ',' prob1 ',' prob2 ',' prob3 ',' prob4 ',' prob5 ',' prob6 ',' prob7 ',' prob8 ',' prob9 ',' prob10 ',' ; *11*11 records; *calling it pmat6 because it conditions on newchina share category 6; data _null_; set mean_lngam97; file 'd:\a_data\proj\size_fun\exports\revision\wrap\public\mean_lngam97_iter10.asc'; put mean_lngam97 ','; *10 records; *now get coefficients for count regression and other variables; data list; set t; by naicsindex; if first.naicsindex; keep naics naicsindex newChina_shr07 newChinacat0 ; data list; merge list(in=in1) nu_iter97; by naicsindex; if in1; data _null_; set list; file 'd:\a_data\proj\size_fun\exports\revision\wrap\public\naicslist_iter10.asc'; put naicsindex ',' naics ',' nuN ',' nuT ',' newChina_shr07 ',' newChinacat0 ','; *465*6 records; run; ods html close; run;