function [xs, ys, yp, ysig, nb]=EGO(xs,ys,xp,trPRS,nmax,range,noise) % INPUTS xs: ny x ndim sample points % ys: ny x 1 samples % xp: np x ndim prediction points % trPRS: polynomial order of trend function (0, 1, 2) % nmax: maximum number of samples % range: range of design domain % noise: 1 for noisy samples, 0 for no noise % OUTPUTS (xs, ys): samples pairs included added samples % yp: np x 1 mean predictions % ysig: np x 1 prediction standard deviations % nb: # of coefficients in trend function % [ny,NDV]=size(xs);nvar=NDV+1;np=size(xp,1); % Size of samples meanx=mean(xs);stdx=std(xs);xs=(xs-meanx)./stdx;% Scaling sample locations meany=mean(ys); stdy=std(ys);ys=(ys-meany)/stdy; % Scaling samples xp=(xp-meanx)./stdx; % Scaling prediction points range=(range-meanx)./stdx; % Scaling design space % fprintf(" ny yPBS Theta maxEI xNew yNew\n") for loop=ny:nmax % EGO loop nij=ny*(ny-1)/2; l=0; % # of upper-triangular component of R diff=zeros(nij,NDV); % Distance between samples ij=zeros(nij,2); % Index of correlation matrix for i=1:ny-1 l=l(end)+(1:ny-i); ij(l,:)=[repmat(i,ny-i,1),(i+1:ny)']; diff(l,:)=repmat(xs(i,:),ny-i,1)-xs(i+1:ny,:); end % % Estimating hyper-parameters using max. likelihood estimate par=struct('xs',xs,'ys',ys,'d',diff,'ij',ij,'trPRS',min(trPRS,2)); hyp0 = [ny^(-1/4)*ones(NDV,1); min(0.1,noise)]; % Initial value lb=[0.01*ones(NDV,1);0];ub=[6*ones(NDV,1);noise];% Lower- & upper-bound opts=optimset('Display','off','TolFun',1e-6,'DiffMinChange',0.001); %hypOpt=fmincon(@(hyp) fitKRG(hyp,par),hyp0,[],[],[],[],lb,ub,[],opts); hypOpt = ga(@(hyp) fitKRG(hyp,par),nvar,[],[],[],[],lb,ub,[],opts)'; %hypOpt = simulannealbnd(@(hyp) fitKRG(hyp,par),hyp0,lb,ub,opts); hypmle=hypOpt(1:size(xs,2)); tau=max(0,hypOpt(end)); % Opt. hypereparam. % par.hyp=hypmle; par.yPBS=min(ys); % Add hyperparameter & PBS to database [~,fit]=fitKRG(hypOpt,par); % Fitting Kriging surrogate % % Optimization using EI to find a new sample location xNew=ga(@(x) exImp(x,fit,par),NDV,[],[],[],[],range(1,:),range(2,:),[],opts); % Stopping criteria if (loop==nmax||max(min(abs(xNew-xs)))<1E-3||exImp(xNew,fit,par)>-1E-3) fprintf('%4d %8.3g\n',loop,par.yPBS.*stdy+meany); break; end % fprintf('%4d %8.3g',loop,par.yPBS.*stdy+meany); fprintf(" %8.3g %8.3g",hypmle,-exImp(xNew,fit,par)); xs=[xs;xNew]; ny=ny+1; % Adding a new sample yNew=(EGOfn(xNew.*stdx+meanx)-meany)/stdy; ys=[ys;yNew]; fprintf(' %8.3g',xNew.*stdx+meanx); fprintf(' %8.3g\n',yNew*stdy+meany); % end xi = trend(xp,trPRS); nb=size(xi,2); % Trend basis vector at np points xrep=repmat(xp',ny,1); xrep=transpose(reshape(xrep,NDV,np*ny)); d=xrep-repmat(xs,np,1); % Distance b/w predictions & samples r = (1-tau)*exp(sum(-(d./hypmle').^2,2));% Correlation b/w preds & samples r = reshape(r,ny,np); yp=(xi*fit.beta + (fit.alpha*r)')*stdy+meany; % Kriging predictions % rt=fit.C\r; q=fit.G\(xi'-fit.Ft'*rt); sigma2=fit.sigma2.*stdy.^2; % Process variance ysig=sqrt(sigma2.*(1-sum(rt.^2,1)+sum(q.^2,1))'); % Prediction STD xs=xs.*stdx+meanx;ys=ys*stdy+meany; % Denormalizing samples end % function EI=exImp(x,fit,par) xi = trend(x,par.trPRS); % Trend basis vector at prediction points d=x-par.xs; % Distance b/w predictions & samples r = exp(sum(-(d./par.hyp').^2,2)); % Correlation b/w preds & samples yhat=xi*fit.beta + (fit.alpha*r)'; % Kriging predictions % rt=fit.C\r; q=fit.G\(xi'-fit.Ft'*rt); sigma2=fit.sigma2; % Process variance ysig=sqrt(sigma2.*(1-sum(rt.^2,1)+sum(q.^2,1))'); % Prediction STD u = (par.yPBS-yhat)./ysig; % Calculating negative expected improvement EI = -(par.yPBS-yhat).*normcdf(u,0,1)-ysig.*normpdf(u,0,1); end % function [negLogLK,fit]=fitKRG(xvar,par) % Negative log-likelihood estimate for given hyper-parameter % % INPUTS hyp : Hyper-parameter % par : Model parameters structure % OUTPUTS negLogLK : Negative log-likelihood function % fit : Necessary parameters for function evaluation % hyp=xvar(1:size(par.xs,2)); tau=xvar(end); H = trend(par.xs,par.trPRS); % Design matrix ny=size(par.ys,1); nb=size(H,2); % ny: # of samples, nb: # of coeff. r = (1-tau).*exp(sum(-(par.d./hyp(:)').^2, 2)); % Correlation b/w samples idx=find(r>0); ii=(1:ny)'; mu=(10+ny)*eps; % Small diagonal to prevent singularity R=sparse([par.ij(idx,1);ii],[par.ij(idx,2);ii],[r(idx);ones(ny,1)+mu]); [C,~]=chol(R); % Cholesky decomposition R = C'*C C=C'; Ft=C\H; Yt=C\par.ys; [Q,G]=qr(Ft,0); b=G\(Q'*Yt); % Coefficients of trend function rho=(Yt-Ft*b); % rho = Cinv*(y-Xb) alpha=rho'/C; % alpha = (y-Xb)'*Rinv detR=sum(log(full(diag(C)).^2)); % Determinant of correlation matrix sigma2=sum(rho.^2)/(ny-nb); % Process variance negLogLK=detR + (ny-nb)*log(sigma2); % Negative log-likelihood fit = struct('sigma2',sigma2,'beta',b,'alpha',alpha,'C',C,'Ft',Ft,'G',G'); end % function H=trend(x,order) % Basis vector of trend function % [m,n]=size(x); if order==0, H=ones(m,1); % Constant trend function basis elseif order==1, H=[ones(m,1), x]; % Linear trend fucntion basis elseif order==2 % Quadratic trend function basis nn=(n+1)*(n+2)/2; H=[ones(m,1) x zeros(m,nn-n-1)]; j=n+1; q=n; for k=1:n H(:,j+(1:q))=repmat(x(:,k),1,q).*x(:,k:n); j=j+q; q=q-1; end end end