clear
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This code produces Figure 1 of Hansen and Lee (forthcoming at Econometrica)
% "Inference for Iterated GMM Under Misspecification"
%
% For errors and/or comments please contact Seojeong (Jay) Lee at
% jay.lee@unsw.edu.au
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Need the following functions in the same folder
%
% arellano_bond.m        calculates the data matrices for the difference GMM
% iterated_gmm_cluster.m calculates the iterated GMM, se, and the J test
%
% Need the following data files in the same folder
%
% fiveyearpanel.mat     five year data
% tenyearpanel.mat      ten year data

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Choose a Table to be replicated
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Acemoglu Johnson Robinson Yared 2008
% tc = 24: AJRY Table 2 Column 4
% tc = 28: AJRY Table 2 Column 8

% Cervellati Jung Sunde Vischer 2014
% tc = 41: CJSV Table 4 Column 1 Panel B
% tc = 42: CJSV Table 4 Column 2 Panel B
% tc = 43: CJSV Table 4 Column 3 Panel B 
% tc = 44: CJSV Table 4 Column 4 Panel B
% tc = 45: CJSV Table 4 Column 5 Panel B
% tc = 46: CJSV Table 4 Column 6 Panel B

tc = 24;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Construct Data Matrices
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

if tc==24 || tc==41 || tc==43 || tc==45
    load fiveyearpanel.mat
else
    load tenyearpanel.mat
end

d = fhpolrigaug;
y = lrgdpch;

if tc==24 || tc==28
    x = [];
else
    d = d(colony==1);
    y = y(colony==1);
    year_numeric = year_numeric(colony==1);
    country = country(colony==1);
    if tc==41 || tc==42
        x = xc_y;
    elseif tc==43 || tc==44
        x = y_indep_fl;
    elseif tc==45 || tc==46
        x = y_power;
    end
    x = x(colony==1);
end

[Dd,DX,Z,mem,n,G,ng] = arellano_bond(d,y,x,year_numeric,country);

k = length(DX(1,:));
l = max(ng)*(max(ng)+1)/2+k-1;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% GMM Estimation
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%% One-step GMM

W0 = zeros(l,l);
W0i = zeros(l,l,G);

for i = 1:G
    Zi = Z(sum(ng(1:i))-ng(i)+1:sum(ng(1:i)),:);

    h0 = 2*ones(ng(i),1);
    h1 = -1*ones(ng(i)-1,1);
    Hm = diag(h0)+diag(h1,1)+diag(h1,-1);

    W0i(:,:,i) = Zi'*Hm*Zi;
    W0 = W0 + W0i(:,:,i);
end
W0 = W0/n;

b1 = ((DX'*Z)/W0*(Z'*DX))\(DX'*Z)/W0*(Z'*Dd);

Q = -(Z'*DX)/n;
e1 = Dd - DX*b1;
mu1 = (Z'*e1)/n;

Om01 = zeros(l,l);
Om1 = zeros(k,k);
for i = 1:G
    Zi = Z(sum(ng(1:i))-ng(i)+1:sum(ng(1:i)),:);
    e1i = e1(sum(ng(1:i))-ng(i)+1:sum(ng(1:i)));
    DXi = DX(sum(ng(1:i))-ng(i)+1:sum(ng(1:i)),:);
    
    Om01 = Om01 + (Zi'*e1i)*(Zi'*e1i)';
    
    psi1 = Q'/W0*Zi'*e1i-DXi'*Zi/W0*mu1-Q'/W0*W0i(:,:,i)/W0*mu1;
    Om1 = Om1 + psi1*psi1';
end

Om01 = Om01/n;
Om1 = Om1/n;

H0 = Q'/W0*Q;

% Conventional Heteroskedasticity-and-Cluster-robust SE
V1 = H0\(Q'/W0*Om01/W0*Q)/H0;
s1 = sqrt(diag(V1/n));

% The new fully robust SE
V1r = H0\Om1/H0;
s1r = sqrt(diag(V1r/n));


%%% Two-step GMM
b2 = ((DX'*Z)/Om01*(Z'*DX))\(DX'*Z)/Om01*(Z'*Dd);

e2 = Dd - DX*b2;
mu2 = (Z'*e2)/n;

Om2 = zeros(l,l);
for i = 1:G
    Zi = Z(sum(ng(1:i))-ng(i)+1:sum(ng(1:i)),:);
    e2i = e2(sum(ng(1:i))-ng(i)+1:sum(ng(1:i)));
    
    Om2 = Om2 + (Zi'*e2i)*(Zi'*e2i)';
end

Om2 = Om2/n;

% Conventional Heteroskedasticity-and-Cluster-robust SE
V2 = inv(Q'/Om2*Q);
s2 = sqrt(diag(V2/n));

% Overidentifying restrictions J test
J2 = n*mu2'/Om2*mu2;
pv2 = chi2cdf(J2,l-k,'upper');

% Iterated GMM
tolerance = 1e-5;
maxit = 1e+3;

G = length(unique(mem));

bhist_dem = zeros(maxit,5);
bhist_inc = zeros(maxit,5);

y = Dd;
x = DX;
z = Z;

for wc = 1:5

    if wc == 1
        bs = b1;
    elseif wc == 2
        bs = ((DX'*Z)*(Z'*DX))\(DX'*Z)*(Z'*Dd);
    elseif wc == 3
        bs = zeros(k,1);
    elseif wc == 4
        bs = 0.5*ones(k,1);
    elseif wc == 5
        bs = -0.5*ones(k,1);
    end

    idx = repmat(mem,1,G)==kron(ones(n,1),unique(mem)');

    for iter = 1:maxit
       es = Dd - DX*bs;
       w = zeros(l,l);

       if n == G
           ze = Z.*repmat(es,1,l);
           w = (ze'*ze)/n;
       else
           for g = 1:G
               zg = Z(idx(:,g),:);
               eg = es(idx(:,g));
               zeg = zg'*eg;
               w = w + zeg*zeg';
           end
           w = w/n;
       end

       b = ((DX'*Z)/w*(Z'*DX))\(DX'*Z)/w*(Z'*Dd);
       db = b - bs;
       if norm(db) < tolerance
           break
       end
       bhist_dem(iter,wc) = b(1);
       bhist_inc(iter,wc) = b(2);
       bs = b;

       if iter == maxit
           b = NaN;
           sr = NaN;
           V = NaN;
           sw = NaN;
           Vw = NaN;
           s0 = NaN;
           V0 = NaN;
           J = NaN;
           pv = NaN;
           return
       end
    end

end
e = Dd - DX*b;
ze = Z.*(e*ones(1,l));
mu = mean(ze)';


if l>k
  J = (mu'/w*mu)*n;
  pv = chi2cdf(J,l-k,'upper');
else
  J = 0;
  pv = 1;
end

if n == G
    ezwze = e.*(Z/w*Z'*e);
    H = (1/n^2)*(DX'*Z)/w*(Z'*DX)-(2/n^3)*(DX'*Z)/w*(Z'*(DX.*repmat(ezwze,1,k)));
    Psi = -(1/n)*(Z.*repmat(e,1,l))/w*(Z'*DX)-(1/n)*repmat(((e'*Z)/w*Z')',1,k).*DX+(1/n^2)*repmat(((e'*Z)/w*Z')',1,k).*((Z.*repmat(e.^2,1,l))/w*Z'*DX);
else
    Hpart = zeros(l,k);
    Psi = zeros(G,k);
    for g = 1:G
        zg = Z(idx(:,g),:);
        eg = e(idx(:,g));
        xg = DX(idx(:,g),:);
        Hpart = Hpart + (zg'*eg)*(e'*Z)/w*(zg'*xg) + (zg'*xg)*((e'*Z)/w*(zg'*eg));

        Psi(g,:) = (-(1/n)*(DX'*Z)/w*(zg'*eg)-(1/n)*(xg'*zg)/w*(Z'*e)+(1/n^2)*(DX'*Z)/w*(zg'*eg)*(eg'*zg)/w*(Z'*e))';
    end
    H = (1/n^2)*(DX'*Z)/w*(Z'*DX)-(1/n^3)*(DX'*Z)/w*Hpart;

end

Om = (Psi'*Psi)/n;

V = H\Om/H';
sr = sqrt(diag(V/n));

V0 = inv(Q'/w*Q);
s0 = sqrt(diag(V0/n));

Vw = H\(Q'/w*Q)/H';
sw = sqrt(diag(Vw/n));


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Generate Figures
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


if tc==24
% Number of iterations can differ across initial starting values
% We chose the reported number of iterations in Table 3 (24)
% Otherwise use the actual number of iterations (27) 
% The results are very similar
    bhist_dem = bhist_dem(1:24,:);
    bhist_inc = bhist_inc(1:24,:);
else    
    bhist_dem = bhist_dem(1:max(sum(bhist_dem ~= 0)),:);
    bhist_inc = bhist_inc(1:max(sum(bhist_inc ~= 0)),:);
end

eff_it = length(bhist_dem(:,1));
bhist_dem_it = zeros(1,5);
bhist_inc_it = zeros(1,5);
for wc = 1:5
    bhist_dem_it0 = nonzeros(bhist_dem(:,wc));
    bhist_dem_it(wc) = bhist_dem_it0(end);
    bhist_inc_it0 = nonzeros(bhist_inc(:,wc));
    bhist_inc_it(wc) = bhist_inc_it0(end);
end
bhist_dem = bhist_dem + repmat(bhist_dem_it,eff_it,1).*(bhist_dem==0);
bhist_inc = bhist_inc + repmat(bhist_inc_it,eff_it,1).*(bhist_inc==0);
    
SEr_dem = sr(1)*1.96;
SE0_dem = s0(1)*1.96;
SEr_inc = sr(2)*1.96;
SE0_inc = s0(2)*1.96;

figure(1)
yline(0,'--')
hold on
plot(1:eff_it,bhist_dem(:,1),'-o',1:eff_it,bhist_dem(:,2),'-x',1:eff_it,bhist_dem(:,3),'-+',1:eff_it,bhist_dem(:,4),'-*',1:eff_it,bhist_dem(:,5),'-d','LineWidth',1.5)
errorbar(eff_it,bhist_dem(eff_it,1),SE0_dem,'LineWidth',1.5)
errorbar(eff_it,bhist_dem(eff_it,1),SEr_dem,'LineWidth',1.5)
xlabel('Iteration')

figure(2)
yline(0,'--')
hold on
plot(1:eff_it,bhist_inc(:,1),'-o',1:eff_it,bhist_inc(:,2),'-x',1:eff_it,bhist_inc(:,3),'-+',1:eff_it,bhist_inc(:,4),'-*',1:eff_it,bhist_inc(:,5),'-d','LineWidth',1.5)
errorbar(eff_it,bhist_inc(eff_it,1),SE0_inc,'LineWidth',1.5)
errorbar(eff_it,bhist_inc(eff_it,1),SEr_inc,'LineWidth',1.5)
xlabel('Iteration')

