function [Dd,DX,Z,mem,n,G,ng] = arellano_bond(d,y,x,year,country)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Calculates Data matrices for Arellano-Bond Difference GMM estimator
% Accommodates unbalanced panel
%
% Prepared for Hansen and Lee (2018) Inference for Iterated GMM Under
% Misspecification and Clustering
%
% Not tested for datasets other than Acemoglu Johnson Robinson Yared 2008 AER
% and Cervellati Jung Sunde Vischer 2014 AER so may contain errors
% 
% For errors and/or comments please contact Seojeong Lee at
% jay.lee@unsw.edu.au
%
% The number of observations of the output variable (n) is smaller than the
% number of observations of the input varible (n0) because the observations
% with missing values are deleted
%
% Model: d_it = a*d_i(t-1) + b*y_i(t-1) + c*x_i(t-1) + mu_t + eta_i + u_it
% Diffrenced: Dd_it = a*Dd_i(t-1) + b*Dy_i(t-1) + c*Dx_i(t-1) + Dmu_t + Du_it
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Inputs:
%	d         n0x1 vector of dependent variable
%   y         n0x1 vector of main regressor
%	x         n0xm matrix of covariates (if none set x=[])
%	year      n0x1 vector of year (index j within cluster)
%   country   n0x1 vector of country code (cluster membership)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Outputs:
%	Dd      nx1 vector of differenced dependent variable
%   DX      nx(m+1) matrix of differenced main regressor and covariates 
%	Z       nxl matrix of instruments
%   Z = [Z1;Z2;...ZG] where
%
%   Zi = d_i1   0       0       ...     0   0       y_i1      x_i1     1   0   0   ...
%        0      d_i1    d_i2    ...         0       y_i2      x_i2     -1  1   0   ...
%        ...    ...     ...     ...                 ...       ...      ... ..  ..  ...
%                               d_i1   ... d_i(T-2) y_i(T-2)  x_i(T-2)          -1  1
%
%   T is the largest (most recent in time) value across i (countries)                        
%   For those with missing t replace the row with the row of zeros                              
%   
%   mem     nx1 vector of country code (cluster membership)  
%   n       1x1 scalar of the sample size
%   G       1x1 scalar of the number of countries (clusters)
%   ng      Gx1 vector of cluster size
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

d0 = d;

n0 = length(year);

if isempty(x) == 1
    dimx = 0;
else
    dimx = length(x(1,:));
end
T = length(unique(year));
yr = repmat(year,1,T) == repmat(min(year):max(year),n0,1);

d_lag = zeros(n0,2);
y_lag = zeros(n0,2);
yr_lag = zeros(n0,T);
x_lag = zeros(n0,2*dimx);

for i = 1:(n0/T)
    d_lag(1+T*(i-1):T*i,:) = lagmatrix(d(1+T*(i-1):T*i),[1 2]);
    y_lag(1+T*(i-1):T*i,:) = lagmatrix(y(1+T*(i-1):T*i),[1 2]);
    yr_lag(1+T*(i-1):T*i,:) = lagmatrix(yr(1+T*(i-1):T*i,:),1);
    
    if isempty(x) ~= 1
        x_lag(1+T*(i-1):T*i,:) = lagmatrix(x(1+T*(i-1):T*i),[1 2]);
    end
end

data = [d d_lag y_lag y yr yr_lag year country x x_lag];

ind = min(1-isnan([data(:,1:5) data(:,2*T+8+dimx+1:end)])')'==1;
data = data(ind,:);

code = data(:,2*T+8);
[cc,~,mem] = unique(code);

G = length(cc);
n = length(code);
g_ng = [cc,histc(code(:),cc)];
ng = g_ng(:,2);
mxng = max(ng);

d = data(:,1);
d1 = data(:,2);
d2 = data(:,3);

y1 = data(:,4);
y2 = data(:,5);

yr = data(:,7:6+T);
yr1 = data(:,7+T:6+2*T);

x1 = data(:,2*T+9+dimx:2*T+8+2*dimx);
x2 = data(:,2*T+9+2*dimx:2*T+8+3*dimx);

Dyr = yr-yr1;

Dd = d-d1;
DX = [d1-d2 y1-y2 x1-x2 Dyr(:,T-mxng+1:end)];

d_ = zeros(T,G);
dc = [d0 country];

for i = 1:G
    d_(:,i) = d0(dc(:,2) == cc(i));
end
d_(isnan(d_)) = 0;
d_ = d_(T-2-mxng+1:T-2,:);

Z = [];

for i = 1:G

    Zi = zeros(mxng,sum(1:mxng));
    for j = 1:mxng
        Zi(j,sum(1:(j-1))+1:sum(1:j)) = d_(1:j,i)';
    end
    
    row = data(:,2*T+8)==cc(i);
    row = data(row,2*T+7)-(max(year)-max(ng));
    
    if isempty(x) ~= 1
        Zi = [Zi(row,:) y2(sum(ng(1:i))-ng(i)+1:sum(ng(1:i))) x2(sum(ng(1:i))-ng(i)+1:sum(ng(1:i))) Dyr(sum(ng(1:i))-ng(i)+1:sum(ng(1:i)),T-mxng+1:end)];
    else
        Zi = [Zi(row,:) y2(sum(ng(1:i))-ng(i)+1:sum(ng(1:i))) Dyr(sum(ng(1:i))-ng(i)+1:sum(ng(1:i)),T-mxng+1:end)];
    end

    Z = [Z;Zi];
end



