/* program base07.prg */ // Uses program base97.prg as template output file=D:\a_data\proj\size_fun\exports\revision\wrap\public\base07.out reset; internalChina=1; samepop=0; sameshock=0; sameshock_level=.33; print "internalChina=" internalChina; print "samepop=" samepop; print "sameshock=" sameshock; if sameshock==1; print "sameshock_level=" sameshock_level; endif; /* Loading data */ // Use 2007 instead of 1997 load dat[83721,13] = D:\a_data\proj\size_fun\exports\revision\wrap\public\ea_cbp07.asc; load loc_dat[177,5] = D:\a_data\proj\size_fun\exports\revision\wrap\public\ea_pop_long_lat97.asc; load loc_dat07[177,4] = D:\a_data\proj\size_fun\exports\revision\wrap\public\ea_pop07.asc; load sal97_scalefactor[473,3] = D:\a_data\proj\size_fun\exports\revision\wrap\public\sal97_scalefactor.asc; load a_param_mat[466,6] = D:\a_data\proj\size_fun\exports\revision\wrap\public\stage1_estimatesNAICS97_for_gauss.asc; //file 'd:\a_data\proj\size_fun\exports\revision\prg\model_final\parameters_r.asc'; //put naicsindex ',' eta1 ',' eta2 ',' LogLike ',' eta_lnln ',' LogLike_lnln ','; load ea_dist_within_tract[177,5]=D:\a_data\proj\size_fun\exports\revision\wrap\public\ea_dist_within_tract.asc; // Pick a distance measure 2-5: (1) ea_index ',' (2) dist_wgtavg ',' (3) lndist_wgtavg ',' (4) dist_wgtavg_delete ',' (5) lndist_wgtavg_delete ','; // the _delete throughs out Census tracts, but let's ignore this and just uas either (2) or (3) load china_ea_con_share[177,2]=D:\a_data\proj\size_fun\exports\revision\wrap\public\china_ea_con_share.asc; weight_china_ea=china_ea_con_share[.,2]; //this is the import weight on each ea load tradedat_forgauss[473,13]=D:\a_data\proj\size_fun\exports\revision\wrap\public\tradedat_forgauss.asc; //put naics ',' sal97 ',' sal07 ',' impChina97 ',' impChina07 ',' impWorld97 ',' impWorld07 ',' expWorld97 ',' expWorld07 ',' impChina07orig ',' impChina07new ','; eta_semi=a_param_mat[., 2 3]; eta_lnln=a_param_mat[., 5]; LogLike_semi=a_param_mat[., 4]; LogLike_lnln=a_param_mat[., 6]; use_lnln=LogLike_semi.>LogLike_lnln; //Use the best fit, which is smallest in absolute value. "Use the best fit of semi and lnln"; "Total industries used is" rows(use_lnln); "Number for which lnln is the best fit is" sumc(use_lnln); naicsindex_select_vec=a_param_mat[.,1]; sal97_scalefactor_select=sal97_scalefactor[naicsindex_select_vec,3]; // Use 2007 population // population=loc_dat[.,3] (this is where population came before, but not now); pop07=loc_dat07[.,3]; population=pop07; long=loc_dat[.,4]; lat=loc_dat[.,5]; x = population./sumc(population); if samepop==0; x07 = pop07./sumc(pop07); endif; if samepop==1; x07 = x; endif; //with this option keep popultion unchanged, not relevant here because pop=2007 where or not option is selected. L = rows(x); /* Boutique size assumption */ scale_adjustment=1; //set equal to one to just alone; print "scale_adjustment=" scale_adjustment; boutique_size_assumpt=scale_adjustment.*sal97_scalefactor_select; /* Construct distance matrix */ dist_mat = zeros(L,L); for i (1, L, 1); lat0=lat[i]; long0=long[i]; hold=sin(lat./57.2958).*sin(lat0/57.2958) + cos(lat./57.2958).*cos(lat0/57.2958).*cos(long0/57.2958 - long./57.2958); distvec= 3958.75.*arccos(hold); dist_mat[i,.]=distvec'; endfor; //******edit 2 load in ea_dist_within dist_mat = diagrv(dist_mat,ea_dist_within_tract[.,2]); lndist_mat=ln(dist_mat); lndist_mat = diagrv(lndist_mat,ea_dist_within_tract[.,3]); /* setup some helpful matrices */ hold=seqa(1,1,L); dest_mat=hold[.,ones(L,1)]; /* contains the indes of destination for point i,j. */ orig_mat=dest_mat'; /* contains the index of the origin for point i,j. */ /* Now loop through the different NAICS */ ind=1; //do while ind<=rows(naicsindex_select_vec); do while ind<=5; naicsindex=naicsindex_select_vec[ind]; boutiquesize=boutique_size_assumpt[ind]; loc_index=selif(dat[.,1],dat[.,3].==naicsindex); naics=selif(dat[.,4],dat[.,3].==naicsindex); naics=naics[1]; emp_dat=selif(dat[.,5],dat[.,3].==naicsindex); sal_dat=selif(dat[.,6],dat[.,3].==naicsindex); sal1_dat=selif(dat[.,7],dat[.,3].==naicsindex); sal12_dat=selif(dat[.,8],dat[.,3].==naicsindex); sal13_dat=selif(dat[.,9],dat[.,3].==naicsindex); est_dat=selif(dat[.,10],dat[.,3].==naicsindex); est1_dat=selif(dat[.,11],dat[.,3].==naicsindex); est12_dat=selif(dat[.,12],dat[.,3].==naicsindex); est13_dat=selif(dat[.,13],dat[.,3].==naicsindex); tradedat_naics=selif(tradedat_forgauss,tradedat_forgauss[.,1].==naics); //Note use naics here, rathen then bother with index tradedat_sal97=tradedat_naics[2]; tradedat_sal07=tradedat_naics[3]; tradedat_impChina97=tradedat_naics[4]; tradedat_impChina07=tradedat_naics[5]; tradedat_impWorld97=tradedat_naics[6]; tradedat_impWorld07=tradedat_naics[7]; tradedat_expWorld97=tradedat_naics[8]; tradedat_expWorld07=tradedat_naics[9]; tradedat_impChina07orig=tradedat_naics[10]; tradedat_impChina07new=tradedat_naics[11]; tradedat_impWorld07orig=tradedat_naics[12]; tradedat_impWorld07new=tradedat_naics[13]; // put naics ',' sal97 ',' sal07 ',' impChina97 ',' impChina07 ',' impWorld97 ',' impWorld07 ',' expWorld97 ',' expWorld07 ',' impChina07orig ',' impChina07new ',' impWorld07orig ',' impWorld07new ','; // Note sales are already in, but convenient just to bring everything together for this operation /* note the sal, sal1, sal12, etc. means: (sal: all sales) (sal1, sales of est in emp category 1) (sal12, sales of est in cat 1 or 2) (sal13, sales of est in cat 1,2,3)*/ /* starting boutique parameters before iteration */ estNhat=0; /* starting estimate of boutique establishments */ bniters=10; nureport=zeros(bniters,9); for bni (1, bniters, 1); salT=sal_dat-boutiquesize*estNhat; salT=(salT.>zeros(L,1)).*salT; salN=sal_dat - salT; /* Note if one to throw out plants in small employment size classes, use sal=sal_dat-sal13_dat. */ S = salT./sumc(salT); /* sales shares (must sum to one for this to work) */ S_TNboth=sal_dat./sumc(sal_dat); nonzeroS= S.>zeros(L,1); /* vector of indicator variables equal to one if positive sales at each location */ non0sel=selif(seqa(1,1,L),nonzeroS); /* list of locations with nonzero sales */ L_pos = rows(non0sel); /* number of locations with nonzero sales */ i_maxS=maxindc(S); /* location with highest sales */ hold=seqa(1,1,rows(non0sel)); i_maxS_trun=selif(hold,non0sel.==i_maxS); /* location with highest set (indexed counting only nonzero locations) */ relsize_dat=(S./est_dat)./(sumc(S)/sumc(est_dat)); //Now in 2012 make key edit to original program /* now set a parameters */ /* global parameters */ if use_lnln[ind]==0; alowerbar=0; eta=eta_semi[ind,.]'; xzi=0; /* shut down missclassification */ A = exp(-(.001*eta[1]).*dist_mat - (.000001*eta[2]).*dist_mat.^2); A = A.*( A.=ones(L,L)); /* now cap at 1. */ else; A = exp(-eta_lnln[ind,1].*lndist_mat); endif; // Calcualte A_China, note this will be L*L, we don't do the selection on this, the weights will take care of everything. if internalChina==1; A_China=A; else; A_China=ones(L,L); endif; Atrun=A[.,non0sel]; /* select only columns with positive sales */ gam=0; maxdif=0; /* given parameters, solve for gam. */ proc solvegam(Atrun); local maxdif, dist, lam0, lam1, iter; lam0=((1./S[non0sel]).*(.0001)); lam0[i_maxS_trun]=1; maxdif=9999999; iter=1; // do while iter<=100; do while maxdif>=.000000001; //changed this, for estimation better to hold iter fixed. But for this exercise, lets stop when get close. //print "lam0=" lam0'; lam1=f(lam0); maxdif=maxc(abs(lam0-lam1)); lam0=lam1; iter=iter+1; endo; retp(1./lam0); endp; /* want to find fixed point of this function of lambda */ proc f(lam); local i, vof, A_lam, sum_A_lam; vof=zeros(L_pos,1); A_lam=Atrun./(lam[.,ones(L,1)]'); sum_A_lam=sumc(A_lam'); i=1; do while i<=L_pos; if i==i_maxS_trun; vof[i_maxS_trun]=1; else; vof[i]=(1/S[non0sel[i]])*sumc(Atrun[.,i].*x./sum_A_lam); endif; i=i+1; endo; retp(vof); endp; // Now solve for endogenous variables as function of the parameters pi_mat=0; S_disagg=0; est=0; meansize=0; meansizeUS=0; relsize=0; swigvec=0;S_cfs_disagg=0; S_cfs_local=0; denom=0; proc (2) = var_calc(alowerbar,eta); local hold; hold=solvegam(Atrun); gam=zeros(L,1); gam[non0sel]=hold;//fill in the nonzero stuff. hold=gam[.,ones(L,1)]'; hold=A.*hold; denom=sumc(hold'); denom=denom[.,ones(L,1)]; pi_mat=hold./denom; S_disagg=pi_mat.*x[.,ones(L,1)]; est=diag(pi_mat); meansize=S./est; meansizeUS=sumc(S)/sumc(est); relsize=meansize./meansizeUS; S_cfs_disagg=S_disagg; /*no distinction between S_cfs_disagg and S_disagg here (but later this will happen)*/ retp(S_cfs_disagg,relsize); endp; pi_mat_china=0; S_disagg_china=0; S_china=0; denom_combine=0; denomC=0; gamC=0; est_china=0; proc (2) = var_calc_china(gam,lamChina); local hold, denom_combine; // calculate denominator for China source, same as US source gamC=lamChina.*weight_china_ea; hold=gamC[.,ones(L,1)]'; hold=A_China.*hold; denomC=sumc(hold'); denomC=denomC[.,ones(L,1)]; denom_combine=denom+denomC; //now calculate probability, numerator is same as before. hold=gam[.,ones(L,1)]'; hold=A.*hold; pi_mat_china=hold./denom_combine; // this is the probability of buying from a domestic locaion ea, condition on the China experiment (i.e. not probability of buying from China. est_china=diag(pi_mat_china); S_disagg_china=pi_mat_china.*x07[.,ones(L,1)]; S_china=sumc(S_disagg_china); retp(S_disagg_china,S_china); endp; gamC_old=0; pi_mat_C_source=0; S_C_source=0; total_salC=0; proc (1) = lam_new(lam_old); local hold, denom_combine, vof; gamC_old=lam_old.*weight_china_ea; hold=gamC_old[.,ones(L,1)]'; hold=A_China.*hold; denomC=sumc(hold'); denomC=denomC[.,ones(L,1)]; denom_combine=denom+denomC; //now calculate probability for China, . hold=weight_china_ea[.,ones(L,1)]'; hold=A_China.*hold; pi_mat_C_source=hold./denom_combine; S_C_source=pi_mat_C_source.*x07[.,ones(L,1)]; total_salC=sumc(sumc(S_C_source)); // folloowing line put in newChina_shr07 instead of data, if go for the sameshock option if sameshock ne 1; vof=(tradedat_impChina07new/(tradedat_impChina07new+tradedat_sal07))/total_salC; else; vof=sameshock_level/total_salC; endif; retp(vof); endp; {S_cfs_disagg,relsize} = var_calc(alowerbar,eta); /* Skip china experiment. */ //lamChina0=0; //lamindex=1; do while lamindex<=20; //lamChina1=lam_new(lamChina0); //lamChina0=lamChina1; //lamindex=lamindex+1; endo; //lamChina=lamChina1; //{S_disagg_china,S_china} = var_calc_china(gam,lamChina); //sal_growth_chinaUS=(sumc(S_china)-sumc(S))/sumc(S); /* regression to find nu in model 2*/ invsqx=x^(-0.5); w=diagrv(zeros(L,L), invsqx); y=est_dat; z=ones(L,1)~x~est; beta=invpd(z'w*z)*z'w*y; nuN=beta[2,1]; nuT=beta[3,1]; estNhat=nuN*x; /* boutique establishments estimate */ // some comments about the variables to be output. // NAICS level // bni is the iteration index // boutiquiesize is the assumption we are making which gets used in the iterative process // est is BEJK model standard good s, just the diagonal of the pi matrix. // est_china is est by location in the US in standardized BEJK segment, after impose China // // Location Level // sal_dat sales by ea in $1,000 // salT and salN break it up by tradeable and nontradable in old notation // est T segment counts (diagonal of pi) // estNhat N segment counts, fitted value same model units as est // S_TNboth, in data, adding T+N together // s, this is sales shares in BEJK standard Model, it sums to one. // s_China, sales shares after China shock. // est_China, BEJK standard segment, (diag of pi) /* prepare for output */ nureport=naicsindex~naics~bni~boutiquesize~nuN~nuT~(beta'); naics_level=naicsindex~naics~bni~sumc(est)~alowerbar~(eta')~(gam'); loc_level=(naicsindex.*ones(L,1))~(bni.*ones(L,1))~loc_index~sal_dat~salT~salN~est~estNhat~S_TNboth~s; if (ind==1 and bni==1); nu_bsize_out=nureport; naics_level_out=naics_level; loc_level_out=loc_level; else; nu_bsize_out=nu_bsize_out|nureport; naics_level_out=naics_level_out|naics_level; loc_level_out=loc_level_out|loc_level; endif; endfor; print "naicsindex=" naicsindex "naics=" naics; ind=ind+1; endo; // just keep first and last iteration naics_level_out=selif(naics_level_out,(1 .==naics_level_out[.,3]).or(bniters.==naics_level_out[.,3])); loc_level_out=selif(loc_level_out,(1 .==loc_level_out[.,2]).or(bniters.==loc_level_out[.,2])); outwidth 180; format /rd 20,10; screen off; output file=D:\a_data\proj\size_fun\exports\revision\wrap\public\base07_nu_bsize.asc reset; real(nu_bsize_out); output off; output file=D:\a_data\proj\size_fun\exports\revision\wrap\public\base07_naics_level.asc reset; real(naics_level_out); output off; output file=D:\a_data\proj\size_fun\exports\revision\wrap\public\base07_loc_level.asc reset; real(loc_level_out); output off; screen on;