/* setup_mean_reversion_iter1.sas Uses category 6 to define pmat (these aer the 0 import cases */ libname public 'd:\a_data\proj\size_fun\exports\revision\wrap\public'; run; ods html file="d:\a_data\proj\size_fun\exports\revision\wrap\public\setup_mean_reversion_iter1.html" style=minimal; *First do 1997; filename nu_iter 'd:\a_data\proj\size_fun\exports\revision\wrap\public\base97_nu_bsize.asc'; filename n_lev 'd:\a_data\proj\size_fun\exports\revision\wrap\public\base97_naics_level.asc'; filename n_levC 'd:\a_data\proj\size_fun\exports\revision\wrap\public\base97_naics_levelC.asc'; *Note beta1 is constant, won't use it here, and in any case rerun regression below, so get a second crack at bringing it in; DATA nu_iter97; INFILE nu_iter; input naicsindex naics iter_num boutiquesize beta1 beta2 beta3; nuN=beta2; nuT=beta3; if iter_num=1; data nu_iter97; set nu_iter97; drop naics; *not character, so merge it in; *naics_level has two observations for each industry, one for iter=1, ther other for iter=10; DATA naics_level97; INFILE n_lev; input naicsindex naics iter_num sal_growth_chinaUS est_modelUS est_chinaUS alowerbar eta1 eta2 gam97_orig1-gam97_orig177; data naics_level97; set naics_level97; drop naics; *not character, so merge it in; if iter_num=1; *Next bring in the gam for the China (to fit port shipments; DATA naics_levelC97; INFILE n_levC; input naicsindex naics iter_num gamC97_orig1-gamC97_orig177; data naics_levelC97; set naics_levelC97; drop naics; *not character, so merge it in; if iter_num=1; *Next do 2007; filename nu_iter 'd:\a_data\proj\size_fun\exports\revision\wrap\public\base07_nu_bsize.asc'; filename n_lev 'd:\a_data\proj\size_fun\exports\revision\wrap\public\base07_naics_level.asc'; DATA nu_iter07; INFILE nu_iter; input naicsindex naics iter_num boutiquesize nuN nuT beta1 beta2 beta3; if iter_num=1; data nu_iter07; set nu_iter07; drop naics; *not character, so merge it in; run; *naics_level has two observations for each industry, one for iter=1, ther other for iter=10; DATA naics_level07; INFILE n_lev; input naicsindex naics iter_num est_modelUS alowerbar eta1 eta2 gam07_orig1-gam07_orig177; *2007 naics_level=naicsindex~naics~bni~sumc(est)~alowerbar~(eta')~(gam'); *1997 naics_level=naicsindex~naics~bni~sal_growth_chinaUS~sumc(est)~sumc(est_china)~alowerbar~(eta')~(gam');; data naics_level07; set naics_level07; drop naics; *not character, so merge it in; if iter_num=1; data naicstext; set public.mandat_naics2; naicsindex=_n_; keep naicsindex naicstext naics; data naics_level97; merge naics_level97(in=in1) naicstext; by naicsindex; if in1; if naics='339111' then delete; *No valid observations for 2007; data naics_level07; merge naics_level07(in=in1) naicstext; by naicsindex; if in1; if naics='339111' then delete; *No valid observations for 2007; run; data tradedat; set public.tradedat_forgauss; if newChina_shr07>=.50 then newChinacat0=1; else if newChina_shr07>=.25 then newChinacat0=2; else if newChina_shr07>=.10 then newChinacat0=3; else if newChina_shr07>=.05 then newChinacat0=4; else if newChina_shr07>0 then newChinacat0=5; else if newChina_shr07=0 then newChinacat0=6; keep naics newChina_shr07 newChinacat0; *Note, newChinacat0 is a numeric version of the categorical variable; data t97; merge naics_level97(in=in1) tradedat(in=in2); by naics; if in1 and in2; array gam97_orig_{177} gam97_orig1-gam97_orig177; do ea_index=1 to 177; gam97_orig=gam97_orig_{ea_index}; output; end; keep naics naicsindex naicstext ea_index gam97_orig newChina_shr07 newChinacat0; data tC97; set naics_levelC97; array gamC97_orig_{177} gamC97_orig1-gamC97_orig177; do ea_index=1 to 177; gamC97_orig=gamC97_orig_{ea_index}; output; end; keep naicsindex ea_index gamC97_orig; run; data t07; merge naics_level07(in=in1) tradedat(in=in2); by naics; if in1 and in2; array gam07_orig_{177} gam07_orig1-gam07_orig177; do ea_index=1 to 177; gam07_orig=gam07_orig_{ea_index}; output; end; keep naics ea_index gam07_orig; data emp97; set public.ea_cm97; emp97=emphat_LM; keep naics ea_index emp97; data pop97; set public.ea_cm97; if naicsindex=1; keep ea_index pop97 pop97_US; data pop2007; set public.ea_cbp07; if naicsindex=1; keep ea_index pop2007 pop2007_US; data t; merge t97(in=in1) t07 emp97; by naics ea_index; if in1; *note break this up becauase tC97 has naicsindex but not naics; data t; merge t(in=in1) tC97; by naicsindex ea_index; if in1; proc sort data=t; by ea_index; data t; merge t pop97 pop2007; by ea_index; popshr97=pop97/pop97_US; popshr07=pop2007/pop2007_US; wgt_emp=emp97+1; *set equal to +1 because locations with gam=0 have emp=0; proc sort data=t; by naics ea_index; proc means data=t noprint; by naics; var gam97_orig gam07_orig; output out=tsum sum=gam97_orig_US gam07_orig_US; data tsum; set tsum; drop _freq_ _type_; data t; merge t tsum; by naics; gam97_new=gam97_orig/gam97_orig_US; gamC97_new=gamC97_orig/gam97_orig_US; *rescale the same way; gam07_new=gam07_orig/gam07_orig_US; cat0_97=gam97_new=0; cat0_07=gam07_new=0; if gam97_new>0 then lngam97=log(gam97_new); if gam07_new>0 then lngam07=log(gam07_new); *Note: above first rescale domestic gam for 1997 so that sum to one. next, to get gamC, use use the 1997 domestic gam, that is why we call this gamC97 If we rescale gam, we need to rescale gamC by the same amount, to hold imports fixed, so we do that above; *We also scale so that domestic gam sum to 1 in 2007. If tranporation costs are zero, this will hold fixed imports, even with the new 2007 gam, because some of domestic gam has stayed the same.; *us cutoffs of entire population of industries; if gam97_new=0 then cat97=0; else if lngam97< -10.9122151 then cat97=1; *1%; else if lngam97< -9.1504076 then cat97=2; *10%; else if lngam97< -7.7996085 then cat97=3; *25%; else if lngam97< -6.0779618 then cat97=4; *50%; else if lngam97< -4.6248727 then cat97=5; *75%; else if lngam97< -3.5306734 then cat97=6; *90%; else if lngam97< -2.9448961 then cat97=7; *95%; else if lngam97< -1.9757683 then cat97=8; *99%; else if lngam97< -1.5 then cat97=9; *?%; else cat97=10; if gam07_new=0 then cat07=0; else if lngam07< -10.9122151 then cat07=1; *1%; else if lngam07< -9.1504076 then cat07=2; *10%; else if lngam07< -7.7996085 then cat07=3; *25%; else if lngam07< -6.0779618 then cat07=4; *50%; else if lngam07< -4.6248727 then cat07=5; *75%; else if lngam07< -3.5306734 then cat07=6; *90%; else if lngam07< -2.9448961 then cat07=7; *95%; else if lngam07< -1.9757683 then cat07=8; *99%; else if lngam07< -1.5 then cat07=9; *?%; else cat07=10; dum07var0= cat07=0; dum07var1= cat07=1; dum07var2= cat07=2; dum07var3= cat07=3; dum07var4= cat07=4; dum07var5= cat07=5; dum07var6= cat07=6; dum07var7= cat07=7; dum07var8= cat07=8; dum07var9= cat07=9; dum07var10= cat07=10; proc freq data=t; tables cat97*cat07; title 'proc freq'; proc univariate data=t; var lngam97; title 'proc univariate'; proc tabulate data=t; where newChinacat0=6; weight wgt_emp; class cat97; var dum07var0 dum07var1 dum07var2 dum07var3 dum07var4 dum07var5 dum07var6 dum07var7 dum07var8 dum07var9 dum07var10; table all cat97, mean*(dum07var0 dum07var1 dum07var2 dum07var3 dum07var4 dum07var5 dum07var6 dum07var7 dum07var8 dum07var9 dum07var10)*f=7.4; table all cat97, N*(dum07var0 dum07var1 dum07var2 dum07var3 dum07var4 dum07var5 dum07var6 dum07var7 dum07var8 dum07var9 dum07var10)*f=7.0; title 'transition matrix newChinacat0=6, weighted by emp97+1'; proc sort data=t; by cat97; proc means data=t noprint; where newChinacat0=6; weight wgt_emp; by cat97; var dum07var0-dum07var10; output out=pmat6 mean=prob0-prob10; data pmat6; set pmat6; drop _freq_ _type_; *This is the probability transition matrix, given cat6; proc means data=t noprint; where cat97^=0; by cat97; var lngam97; output out=mean_lngam97 mean=mean_lngam97; data mean_lngam97; set mean_lngam97; drop _freq_ _type_; data t; merge t mean_lngam97; by cat97; if cat97=0 then gam97fit=0; else gam97fit=exp(mean_lngam97); proc corr data=t; var gam97_new gam97fit; title 'proc corr'; proc sort data=t; by naics ea_index; data _null_; set t; file 'd:\a_data\proj\size_fun\exports\revision\wrap\public\gamorig_and_fit_iter1.asc'; put naicsindex ',' ea_index ',' gam97_orig ',' gam07_orig ',' gam97_new ',' gam07_new ',' gam97fit ',' cat97 ',' cat07 ',' gamC97_orig ',' gamC97_new ','; *82305*11 records; data _null_; set pmat6; file 'd:\a_data\proj\size_fun\exports\revision\wrap\public\pmat6_iter1.asc'; put prob0 ',' prob1 ',' prob2 ',' prob3 ',' prob4 ',' prob5 ',' prob6 ',' prob7 ',' prob8 ',' prob9 ',' prob10 ',' ; *11*11 records; *calling it pmat6 because it conditions on newchina share category 6; data _null_; set mean_lngam97; file 'd:\a_data\proj\size_fun\exports\revision\wrap\public\mean_lngam97_iter1.asc'; put mean_lngam97 ','; *10 records; *now get coefficients for count regression and other variables; data list; set t; by naicsindex; if first.naicsindex; keep naics naicsindex newChina_shr07 newChinacat0 ; data list; merge list(in=in1) nu_iter97; by naicsindex; if in1; data _null_; set list; file 'd:\a_data\proj\size_fun\exports\revision\wrap\public\naicslist_iter1.asc'; put naicsindex ',' naics ',' nuN ',' nuT ',' newChina_shr07 ',' newChinacat0 ','; *465*6 records; run; ods html close; run;