Cross-validation
Cross-validation is performed by removing one radar at the time and estimating bird density at the exact same location (and time). Comparing the estimation (and uncertainty estimated) to the true value allows to assess the model.
1. Loading
clear all; load('../1-Cleaning/data/dc_corr.mat'); load('../2-Inference/data/Density_inference.mat'); load coastlines; addpath('functions\')
dclat = [dc.lat]'; dclon = [dc.lon]'; dcname={dc.name};
2. Model
2.1 Multi-night scale
Compute the full covariance matrix
Dtime=squareform(pdist(multi.T(~multi.isnan)));
Ddist=squareform(pdist([dclat(multi.R(~multi.isnan)) dclon(multi.R(~multi.isnan))],@lldistkm));
Caa = multi.cov.C(Ddist,Dtime);
[~,tmp] = meshgrid(multi.t,1:numel(multi.r));
cvmulti.i_r=tmp(~multi.isnan);
cvmulti.Mn_est=nan(size(multi.Mn));
cvmulti.Mn_sig=nan(size(multi.Mn));
For each radar, ignore all its data
% Remove also radars within 100km
%sim(Dist_km_sf(i,:)<100)=false;
lambda = Caa(neigh,neigh) \ Caa(neigh,sim);
cvmulti.Mn_est(sim) = lambda' * multi.Mn(neigh);
cvmulti.Mn_sig(sim) = sqrt(sum(multi.cov.parm(1:3)) - diag(lambda' * Caa(neigh,sim)));
Add the trend back
cvmulti.M_est=nan(size(multi.M)); cvmulti.M_sig=nan(size(multi.M));
cvmulti.M_est(~multi.isnan) = cvmulti.Mn_est+dclat(multi.R(~multi.isnan))*multi.w(2)+multi.w(1);
cvmulti.M_sig(~multi.isnan) = cvmulti.Mn_sig;
2.2 Intra-night scale
Build the covariance function from the distance
Ddist_sm = squareform(pdist([dclat dclon], @lldistkm));
id = find(data.day==i_t);
d.Ddist = intrahape(Ddist_sm(data.radar(id),data.radar(id)),[],1);
d.Dtime = intrahape(squareform(pdist(data.time(id))),[],1);
d.Did1 = repmat(id,numel(id),1);
d.Did2 = repelem(id,numel(id));
C_intra = sparse(D.Did1,D.Did2, intra.cov.C(D.Ddist, D.Dtime));
Estimate the value removing all values of the radar.
cvintra.In_est=nan(size(intra.In));
cvintra.In_sig=nan(size(intra.In));
id_t=find(i_t==data.day);
id_r = i_r==data.radar(id_t);
Lambda = C_intra(id_t(~id_r),id_t(~id_r))\C_intra(id_t(~id_r),id_t(id_r));
cvintra.In_est(id_t(id_r)) = Lambda' * intra.In(id_t(~id_r));
cvintra.In_sig(id_t(id_r)) = sqrt(sum(intra.cov.parm(1:3)) - sum(Lambda.*C_intra(id_t(~id_r),id_t(id_r))));
Add the curve trend and variance normalization back
cvintra.I_est = cvintra.In_est .* sqrt(bsxfun(@power,data.NNT,0:intra.bpo)*intra.b) + bsxfun(@power,data.NNT,0:intra.apo)*intra.a;
cvintra.I_sig = sqrt(cvintra.In_sig.^2 .* (bsxfun(@power,data.NNT,0:intra.bpo)*intra.b));
2.3 Reassemble
cv.denst_est = cvmulti.M_est(data.dayradar) + cvintra.I_est;
cv.denst_sig = sqrt(cvmulti.M_sig(data.dayradar).^2 + cvintra.I_sig.^2);
Transform back to the original space. Value below zero cannot be converted.
disp(['WARNING: Remove ' num2str(sum(cv.denst_est<0)) ' value(s) because <0'])
cv.denst_est(cv.denst_est<0)=nan;
cv.dens_est = (cv.denst_est).^(1/pow);
Visual figure
figure('position',[0 0 1000 400]); hold on;
plot(data.time(id),data.denst(id),'k');
plot(data.time(id),cv.denst_est(id),'r')
plot(data.time(id),cv.denst_est(id)-cv.denst_sig(id),'--r')
plot(data.time(id),cv.denst_est(id)+cv.denst_sig(id),'--r');
datetick('x');axis tight; xlabel('Date'); ylabel('Bird densities [bird/km^2]'); legend('Data','Estimation','Uncertainty'); title(dcname(i_d))
2.4 Assessment of the model
Root-mean-squared error
cv.rmse_denst = sqrt(nanmean( (cv.denst_est-data.denst ).^2 ));
cv.rmse_dens = sqrt(nanmean( (cv.dens_est-data.dens ).^2 ));
disp(['RMSE: ' num2str(cv.rmse_dens) ' (bird/km^2)'])
RMSE: 17.6655 (bird/km^2)
The normalized error of estimation measures the misfit of the estimation divided by the standard deviation of estimation.
cv.nee = (data.denst-cv.denst_est)./cv.denst_sig;
Histogram of normalized error of estimation. Ideally, the histogram should be a standard Gaussian (zero-mean variance of one)
figure('position',[0 0 1000 400]); histogram( cv.nee ,'Normalization','pdf');
hold on; plot(-4:.1:4,normpdf(-4:.1:4),'linewidth',2)
legend(['Normalized error of kriging: mean=' num2str(nanmean(cv.nee)) ' and var=' num2str(nanvar(cv.nee))]);
QQ-plot
figure('position',[0 0 400 400]); hold on
plot(data.denst,cv.denst_est,'.k');
% coef=data.dens(~isnan(cv.dens_est)) \ cv.dens_est(~isnan(cv.dens_est));
plot([min(data.denst) max(data.denst)],[min(data.denst) max(data.denst)],'-r','LineWidth',2);
axis([min(data.denst) max(data.denst) min(data.denst) max(data.denst)])
Normalized error per radar
cv_nee_avg=splitapply(@mean,cv.nee,data.radar);
cv_nee_std=splitapply(@std,cv.nee,data.radar);
Illustration
figure('position',[0 0 1000 400]);
subplot(1,2,1); hold on; h=worldmap([min(dclat) max(dclat)], [min(dclon) max(dclon)]);
setm(h,'frame','on','grid','off'); set(findall(h,'Tag','MLabel'),'visible','off'); set(findall(h,'Tag','PLabel'),'visible','off')
geoshow('landareas.shp', 'FaceColor', [215 215 215]./255); geoshow('worldrivers.shp','Color', 'blue')
scatterm(dclat,dclon,100,cv_nee_avg,'filled','MarkerEdgeColor','k'); title('Mean normalized error of estimation'); colorbar;
subplot(1,2,2); hold on; h=worldmap([min(dclat) max(dclat)], [min(dclon) max(dclon)]);
setm(h,'frame','on','grid','off'); set(findall(h,'Tag','MLabel'),'visible','off'); set(findall(h,'Tag','PLabel'),'visible','off')
geoshow('landareas.shp', 'FaceColor', [215 215 215]./255); geoshow('worldrivers.shp','Color', 'blue')
scatterm(dclat,dclon,100,cv_nee_std,'filled','MarkerEdgeColor','k'); title('Std dev normalized error of estimation'); colorbar;
3. Comparing to scatter interpolation
Define the ratio distance/time for the interpolation (eg. 1 day = 1degree lat/lon). This value as been optimized with the data (see Appendix below).
3.1 Estimation
interpo.nn=nan(size(data.dens)); interpo.l=nan(size(data.dens)); interpo.n=nan(size(data.dens));
Fnn = scatteredInterpolant(data.lat(~sim),data.lon(~sim),data.time(~sim).*ratio_coord_time,data.dens(~sim),'nearest');
Fl = scatteredInterpolant(data.lat(~sim),data.lon(~sim),data.time(~sim).*ratio_coord_time,data.dens(~sim),'linear');
Fn = scatteredInterpolant(data.lat(~sim),data.lon(~sim),data.time(~sim).*ratio_coord_time,data.dens(~sim),'natural');
interpo.nn(sim) = Fnn(data.lat(sim),data.lon(sim),data.time(sim).*ratio_coord_time);
interpo.l(sim) = Fl(data.lat(sim),data.lon(sim),data.time(sim).*ratio_coord_time);
interpo.n(sim) = Fn(data.lat(sim),data.lon(sim),data.time(sim).*ratio_coord_time);
Figure
Visual figure
figure('position',[0 0 1000 400]); hold on;
plot(data.time(id),data.dens(id),'k');
plot(data.time(id),cv.dens_est(id))
plot(data.time(id),interpo.nn(id));
plot(data.time(id),interpo.l(id));
plot(data.time(id),interpo.n(id));
datetick('x');axis tight; xlabel('Date'); ylabel('Bird densities [bird/km^2]'); legend('Data','Estimation','nearst','linear','natural');
3.2 Assessement
Histogram of estimation error with RMSE and abs error in legend
cv.rmse_nn =sqrt(nanmean((interpo.nn - data.dens).^2));
cv.rmse_l =sqrt(nanmean((interpo.l - data.dens).^2));
cv.rmse_n =sqrt(nanmean((interpo.n - data.dens).^2));
cv.r2_dens = 1-sum((cv.dens_est-data.dens ).^2)/sum((data.dens-mean(data.dens) ).^2);
cv.r2_nn = 1-sum((interpo.nn-data.dens).^2)/sum((data.dens-mean(data.dens) ).^2);
cv.r2_l = 1-sum((interpo.l-data.dens).^2)/sum((data.dens-mean(data.dens) ).^2);
cv.r2_n = 1-sum((interpo.n-data.dens).^2)/sum((data.dens-mean(data.dens) ).^2);
Display
cv
cv =
denst_est: [50436×1 double]
denst_sig: [50436×1 double]
dens_est: [50436×1 double]
rmse_denst: 0.1403
rmse_dens: 17.6655
nee: [50436×1 double]
rmse_nn: 23.1713
rmse_l: 18.4955
rmse_n: 17.9504
r2_dens: 0.5855
r2_nn: 0.2869
r2_l: 0.5456
r2_n: 0.5720
Histogram
figure('position',[0 0 1000 400]); hold on;
histogram(cv.dens_est - data.dens,'DisplayStyle', 'stairs','DisplayName',['EST-RMSE: ' num2str(cv.rmse_dens) '|R^2: ' num2str(cv.r2_dens)]);
histogram(interpo.nn - data.dens,'DisplayStyle', 'stairs','DisplayName',['NN-RMSE: ' num2str(cv.rmse_nn) '|R^2: ' num2str(cv.r2_nn)]);
histogram(interpo.l - data.dens,'DisplayStyle', 'stairs','DisplayName',['L-RMSE: ' num2str(cv.rmse_l) '|R^2: ' num2str(cv.r2_l)]);
histogram(interpo.n - data.dens,'DisplayStyle', 'stairs','DisplayName',['N-RMSE: ' num2str(cv.rmse_n) '|R^2: ' num2str(cv.r2_n)]);
Histogram of transoformed estimation error with RMSE and abs error in legend
figure('position',[0 0 1000 400]); hold on;
histogram(cv.denst_est - data.denst,'DisplayStyle', 'stairs','DisplayName',['EST-RMSE: ' num2str(sqrt(nanmean((cv.denst_est - data.denst).^2))) '|abs: ' num2str(nanmean(abs(cv.denst_est - data.denst)))]);
histogram(max(interpo.nn,0).^pow - data.denst,'DisplayStyle', 'stairs','DisplayName',['NN-RMSE: ' num2str(sqrt(nanmean((max(interpo.nn,0).^pow - data.denst).^2))) '|abs: ' num2str(nanmean(abs(max(interpo.nn,0).^pow - data.denst)))]);
histogram(max(interpo.l,0).^pow - data.denst,'DisplayStyle', 'stairs','DisplayName',['L-RMSE: ' num2str(sqrt(nanmean((max(interpo.l,0).^pow - data.denst).^2))) '|abs: ' num2str(nanmean(abs(max(interpo.l,0).^pow - data.denst)))]);
histogram(max(interpo.n,0).^pow - data.denst,'DisplayStyle', 'stairs','DisplayName',['N-RMSE: ' num2str(sqrt(nanmean((max(interpo.n,0).^pow - data.denst).^2))) '|abs: ' num2str(nanmean(abs(max(interpo.n,0).^pow - data.denst)))]);
figure('position',[0 0 1000 400]);
subplot(1,2,1); hold on; h=worldmap([min(dclat) max(dclat)], [min(dclon) max(dclon)]);
setm(h,'frame','on','grid','off'); set(findall(h,'Tag','MLabel'),'visible','off'); set(findall(h,'Tag','PLabel'),'visible','off')
geoshow('landareas.shp', 'FaceColor', [215 215 215]./255); geoshow('worldrivers.shp','Color', 'blue')
scatterm(dclat,dclon,100,sqrt(splitapply(@nanmean,(cv.dens_est - data.dens).^2,data.radar)),'filled','MarkerEdgeColor','k');
caxis([0 40]); title('RMSE of model'); colorbar;
subplot(1,2,2); hold on; h=worldmap([min(dclat) max(dclat)], [min(dclon) max(dclon)]);
setm(h,'frame','on','grid','off'); set(findall(h,'Tag','MLabel'),'visible','off'); set(findall(h,'Tag','PLabel'),'visible','off')
geoshow('landareas.shp', 'FaceColor', [215 215 215]./255); geoshow('worldrivers.shp','Color', 'blue')
scatterm(dclat,dclon,100,sqrt(splitapply(@nanmean,(interpo.nn - data.dens).^2,data.radar)),'filled','MarkerEdgeColor','k');
caxis([0 40]); title('RMSE of nearest-neighbor'); colorbar;
Ddist_sm(Ddist_sm==0)=Inf;
plot(min(Ddist_sm)',sqrt(splitapply(@nanmean,(cv.dens_est - data.dens).^2,data.radar))-sqrt(splitapply(@nanmean,(interpo.nn - data.dens).^2,data.radar)),'.k');
xlabel('Distance to closest radar'); ylabel('RMSE model- RMSE nn')
7. Save
save('data/Density_crossValidation.mat','cv','cvmulti','cvintra','interpo')
Appendix: Calibrate ratio_coord_time
ratio_coord_time = 1000 *[ 0.0184 0.1006 -0.0024 -0.0007 -0.0013 0.0154 0.5188 0.0236 0.3456 0.0125 0.1205 0.1871 0.0738 0.1084 0.0266 0.0090 0.0036 0.6481 -0.0017 0.0131 0.3464 0.0130 0.9928 0.0130 0.0899 0.0130 0.0130 0.2673 0.0181 0.0202 2.4713 0.0571 0.5818 0.5503 0.0130 0.1414 0.0839 0.0040 0.1486 0.0179 0.0130 0.0130 0.4111 0.0040 0.0130 0.0130 0.1771 -0.0013 0.1080 0.0108 0.0235 0.0130 0.0074 0.0148 0.0332 -0.0013 0.0130 -0.0013 0.0091 0.0039 -0.0013 0.0388 0.0130 -0.0013 0.0104 0.0052 0.0039 -0.0013 0.0123];
id1=find(D(i_d,:)>distlelim);
id2=ismember(data.i_d,id1);
interpolafx =@(x) griddata(data.lat(id2),data.lon(id2),data.time(id2).*x,data.dens(id2),data.lat(id3),data.lon(id3),data.time(id3).*x,'natural');
rmse = @(x) nansum( (interpolafx(x)-data.dens(id3)).^2 );
% options = optimset('PlotFcns',{@optimplotfval,@optimplotx},'MaxFunEvals',100);
options = optimset('MaxFunEvals',30);
ratio_coord_time(i_d) = fminsearch(rmse,ratio_coord_time(i_d),options)
% ratio_coord_time = fminbnd(rmse,0,1,options);
Figure
figure; histogram(ratio_coord_time(ratio_coord_time>0 & ratio_coord_time<100),100); xlabel('ratio_coord_time');