function totalRewards = simulate_epsg_n_bandit(n, eps, gamecnt, rollcnt) # make room for the estimated value - action [Q_t(a)] totalRewards = zeros(1, rollcnt); for k = 1:gamecnt # generate the true value - action [q_*(a)] from a normal distribution of mean 0 and variance 1 q = randn(1, n); # reset some game related, auxiliary variables Qavg = zeros(1, n); Qsum = zeros(1, n); nPulls = zeros(1, n); for i = 1:rollcnt if (unifrnd(0, 1) <= 1 - eps) # do a tiebraking, exploitation step idsQstep = find(Qavg == max(Qavg)); randperm(length(idsQstep)); iQstep = idsQstep(1); else # do an exploration step iQstep = unidrnd(n); endif # calculate the reward and integrate it into the knwoledge base Rk = q(iQstep) + normrnd(0, 1); totalRewards(i) = totalRewards(i) + Rk; Qsum(iQstep) = Qsum(iQstep) + Rk; nPulls(iQstep) = nPulls(iQstep) + 1; Qavg(iQstep) = Qsum(iQstep)/nPulls(iQstep); endfor endfor endfunction