/* Tab11_prediction_primary_only */ // Usses the iter_num=1 data output file=D:\a_data\proj\size_fun\exports\revision\wrap\public\Tab11_prediction_primary_only.out reset; internalChina=1; print "internalChina=" internalChina; /* Loading data */ load dat[83721,13] = D:\a_data\proj\size_fun\exports\revision\wrap\public\ea_CM97.asc; load loc_dat[177,5] = D:\a_data\proj\size_fun\exports\revision\wrap\public\ea_pop_long_lat97.asc; load loc_dat07[177,4] = D:\a_data\proj\size_fun\exports\revision\wrap\public\ea_pop07.asc; load a_param_mat[466,6] = D:\a_data\proj\size_fun\exports\revision\wrap\public\stage1_estimatesNAICS97_for_gauss.asc; //file form of a_param_mat...; //put naicsindex ',' eta1 ',' eta2 ',' LogLike ',' eta_lnln ',' LogLike_lnln ','; load ea_dist_within_tract[177,5]=D:\a_data\proj\size_fun\exports\revision\wrap\public\ea_dist_within_tract.asc; // Pick a distance measure 2-5: (1) ea_index ',' (2) dist_wgtavg ',' (3) lndist_wgtavg ',' (4) dist_wgtavg_delete ',' (5) lndist_wgtavg_delete ','; // the _delete throughs out Census tracts, but let's ignore this and just uas either (2) or (3) eta_semi=a_param_mat[., 2 3]; eta_lnln=a_param_mat[., 5]; LogLike_semi=a_param_mat[., 4]; LogLike_lnln=a_param_mat[., 6]; use_lnln=LogLike_semi.>LogLike_lnln; //Use the best fit, which is smallest in absolute value. "Use the best fit of semi and lnln"; "Total industries used is" rows(use_lnln); "Number for which lnln is the best fit is" sumc(use_lnln); naicsindex_select_vec=a_param_mat[.,1]; population=loc_dat[.,3]; pop07=loc_dat07[.,3]; long=loc_dat[.,4]; lat=loc_dat[.,5]; x = population./sumc(population); x07 = pop07./sumc(pop07); L = rows(x); t1=hsec; numsim=10000; // Now bring in new things for the mean reversion exercise; load naicslist[465,6]=D:\a_data\proj\size_fun\exports\revision\wrap\public\naicslist_iter1.asc; // naicsindex ',' naics ',' nuN ',' nuT ',' newChina_shr07 ',' newChinacat0 ','; load gamorig_and_fit[82305,11]=D:\a_data\proj\size_fun\exports\revision\wrap\public\gamorig_and_fit_iter1.asc; // put naicsindex ',' ea_index ',' gam97_orig ',' gam07_orig ',' gam97_new ',' gam07_new ',' gam97fit ',' cat97 ',' cat07 ',' gamC97_orig ',' gamC97_new ','; load pmat[11,11]=D:\a_data\proj\size_fun\exports\revision\wrap\public\pmat6_iter1.asc; //put prob1 ',' prob2 ',' prob3 ',' prob4 ',' prob5 ',' prob6 ',' prob7 ',' prob8 ',' prob9 ',' prob10 ',' ; load mean_lngam[10,1]=D:\a_data\proj\size_fun\exports\revision\wrap\public\mean_lngam97_iter1.asc; gamappox_vec=0|exp(mean_lngam); // this vector is the approximate value of gam, given state. // convert to pmatcum numgrid=rows(pmat); pmatcum=zeros(numgrid,numgrid);pmatcum[.,1]=pmat[.,1]; i=2; do while i<=numgrid; pmatcum[.,i]=pmatcum[.,i-1]+pmat[.,i]; i=i+1; endo; /* Construct distance matrix */ dist_mat = zeros(L,L); for i (1, L, 1); lat0=lat[i]; long0=long[i]; hold=sin(lat./57.2958).*sin(lat0/57.2958) + cos(lat./57.2958).*cos(lat0/57.2958).*cos(long0/57.2958 - long./57.2958); distvec= 3958.75.*arccos(hold); dist_mat[i,.]=distvec'; endfor; //******edit 2 load in ea_dist_within dist_mat = diagrv(dist_mat,ea_dist_within_tract[.,2]); lndist_mat=ln(dist_mat); lndist_mat = diagrv(lndist_mat,ea_dist_within_tract[.,3]); // below is a matrix where we put the output.. results_means=zeros(6,6); results_medians=zeros(6,6); chinanewcat=1; do while chinanewcat<=6; "**********************************"; "Chinanewcat=" chinanewcat; //pick naics to cycle through for this job naicslist_this_job=selif(naicslist,naicslist[.,6].==chinanewcat); /* Now loop through the different NAICS */ indnew=1; do while indnew<=rows(naicslist_this_job); naicsindex=naicslist_this_job[indnew,1]; // use the new index to get started naics=naicslist_this_job[indnew,2]; nuN=naicslist_this_job[indnew,3]; nuT=naicslist_this_job[indnew,4]; ind=selif(seqa(1,1,466),naicsindex_select_vec.==naicsindex); //this is the index we used before, something between 1 and 466 sal_dat=selif(dat[.,6],dat[.,3].==naicsindex); S = sal_dat./sumc(sal_dat); /* now set a parameters */ /* global parameters */ if use_lnln[ind]==0; alowerbar=0; eta=eta_semi[ind,.]'; xzi=0; /* shut down missclassification */ A = exp(-(.001*eta[1]).*dist_mat - (.000001*eta[2]).*dist_mat.^2); A = A.*( A.=ones(L,L)); /* now cap at 1. */ else; A = exp(-eta_lnln[ind,1].*lndist_mat); endif; if internalChina==1; A_China=A; endif; //This sets transportation costs for China imports; // not read in the various gam vectors gamdat=selif(gamorig_and_fit[.,3:11],gamorig_and_fit[.,1].==naicsindex); gam97_orig=gamdat[.,1]; gam07_orig=gamdat[.,2]; gam97_new=gamdat[.,3]; gam07_new=gamdat[.,4]; gam97fit=gamdat[.,5]; gam_cat97=gamdat[.,6]; gam_cat07=gamdat[.,7]; gamC97_orig=gamdat[.,8]; gamC97_new=gamdat[.,9]; proc (3) = LQest_calc(gam,A); local hold, LQest, pi_mat, est, denom; hold=gam[.,ones(L,1)]'; hold=A.*hold; denom=sumc(hold'); denom=denom[.,ones(L,1)]; pi_mat=hold./denom; est=diag(pi_mat); LQest=(est./sumc(est))./x; retp(LQest,pi_mat,est); endp; proc (3) = LQest_calc_China(gam,gamC,A); local hold, LQest_China, est_China, denom, denomC, denom_combine,pi_mat_china; //first do domestic hold=gam[.,ones(L,1)]'; hold=A.*hold; denom=sumc(hold'); denom=denom[.,ones(L,1)]; // now China hold=gamC[.,ones(L,1)]'; hold=A_China.*hold; denomC=sumc(hold'); denomC=denomC[.,ones(L,1)]; denom_combine=denom+denomC; //now calculate probability domestic location, numerator is same as before. hold=gam[.,ones(L,1)]'; hold=A.*hold; pi_mat_china=hold./denom_combine; // this is the probability of buying from a domestic locaion ea, condition on the China experiment (i.e. not probability of buying from China. est_China=diag(pi_mat_China); LQest_China=(est_China./sumc(est_China))./x; retp(LQest_China,pi_mat_China,est_China); endp; {LQest_base1,pi_mat,est} = LQest_calc(gam97_new,A); {LQest_China1,pi_mat_China1,est_China1} = LQest_calc_China(gam97_new,gamC97_new,A); {LQest_base1_aprox,pi_mat_aprox,est_aprox} = LQest_calc(gam97fit,A); {LQest_China1_aprox,pi_mat_China_aprox,est_China_aprox} = LQest_calc_China(gam97fit,gamC97_new,A); LQsal97=s./x; // output vec is naicsindex, ea_index, LQest_base1, LQest_China1, LQest_base1_aprox, LQest_China1_aprox, s, x, LQsal97 output_ind=(naicsindex.*ones(L,1))~seqa(1,1,L)~LQest_base1~LQest_China1~LQest_base1_aprox~LQest_China1_aprox~s~x~LQsal97; output_ind=real(output_ind); //Part 2. Simulate the reversion to the mean. proc (1)=gam_simulate(gam_cat,pmatcum); local simdraw,hold,hold2,newstate,gam_sim; simdraw=rndu(L,1); hold=gam_cat+1; //this goes from 0 to 10, convert to 1 to 11 hold2=pmatcum[hold,.]; newstate=sumc((hold2.<=(simdraw.*ones(L,numgrid)))')+1; gam_sim=gamappox_vec[newstate]; gam_sim=gam_sim./sumc(gam_sim); //renormalize to sum to one. retp(gam_sim); endp; simhold=zeros(L,numsim); simholdChina=zeros(L,numsim); simindex=1; do while simindex<=numsim; gam_sim=gam_simulate(gam_cat97,pmatcum); {LQest_base1sim,pi_matsim,estsim} = LQest_calc(gam_sim,A); simhold[.,simindex]=LQest_base1sim; {LQest_China1sim,pi_mat_China1sim,est_China1sim} = LQest_calc_China(gam_sim,gamC97_new,A); simholdChina[.,simindex]=LQest_China1sim; simindex=simindex+1; endo; if indnew==1; output_all=real(output_ind); output_sim=real(simhold); output_simChina=real(simholdChina); else; output_all=output_all|real(output_ind); output_sim=output_sim|real(simhold); output_simChina=output_simChina|real(simholdChina); endif; indnew=indnew+1; endo; output_select=selif(output_all,(output_all[.,7].>=(.05)).and(output_all[.,9].>=2)); meanLQ=meanc(output_select[.,3 4 5 6])'; medianLQ=median(output_select[.,3 4 5 6])'; results_means[chinanewcat, 1 2 3 4]=meanLQ; results_medians[chinanewcat, 1 2 3 4]=medianLQ; output_sim_select=selif(output_sim,(output_all[.,7].>=(.05)).and(output_all[.,9].>=2)); output_simChina_select=selif(output_simChina,(output_all[.,7].>=(.05)).and(output_all[.,9].>=2)); meanLQ_regress_to_mean=meanc(meanc(output_sim_select)); hold=meanc(output_sim_select'); //first take means within industry before calculating median. medianLQ_regress_to_mean=median(hold); meanLQ_regress_to_meanChina=meanc(meanc(output_simChina_select)); hold=meanc(output_simChina_select'); medianLQ_regress_to_meanChina=median(hold); results_means[chinanewcat, 5]=meanLQ_regress_to_mean; results_medians[chinanewcat, 5]=medianLQ_regress_to_mean; results_means[chinanewcat, 6]=meanLQ_regress_to_meanChina; results_medians[chinanewcat, 6]=medianLQ_regress_to_meanChina; //Now aggregate to take back to SAS for regressions if chinanewcat==1; output_all_1=output_all; output_sim_1=meanc(output_sim'); output_simChina_1=meanc(output_simChina'); else; output_all_1=output_all_1|output_all; output_sim_1=output_sim_1|meanc(output_sim'); output_simChina_1=output_simChina_1|meanc(output_simChina'); endif; chinanewcat=chinanewcat+1; endo; "means" "base china base(approx) China(approx) RTMonly RTM+China"; format /rd 7,2; results_means; " "; "medians" "base china base(approx) China(approx) RTMonly RTM+China"; format /rd 7,2; results_medians; " "; mean_ratios=results_means[.,4 5 6]./results_means[.,3]; median_ratios=results_medians[.,4 5 6]./results_medians[.,3]; "ratio of means"; "Chinaonly RTMonly China+RTM"; mean_ratios; ""; "ratio of medians"; "Chinaonly RTMonly China+RTM"; median_ratios; " "; print "numsim=" numsim; print ((hsec-t1)/100)/60 "minutes"; outwidth 180; format /rd 20,10; screen off; output file=D:\a_data\proj\size_fun\exports\revision\wrap\public\z_output_all_1.asc reset; output_all_1; output off; output file=D:\a_data\proj\size_fun\exports\revision\wrap\public\z_output_sim_1.asc reset; output_sim_1; output off; output file=D:\a_data\proj\size_fun\exports\revision\wrap\public\z_output_simChina_1.asc reset; output_simChina_1; output off; screen on;