/*
 * Decompiled with CFR 0.152.
 */
package org.chocosolver.util.bandit;

import java.util.Arrays;
import org.chocosolver.util.bandit.Policy;

public class UCB1
implements Policy {
    int numActions;
    double[] payoffSums;
    int[] numPlays;

    public UCB1(int numActions) {
        this.numActions = numActions;
        this.payoffSums = new double[numActions];
        this.numPlays = new int[numActions];
    }

    @Override
    public void init() {
        Arrays.fill(this.numPlays, 1);
    }

    @Override
    public int nextAction(int step) {
        if (step < this.numActions) {
            return step;
        }
        int a2 = 0;
        double ucb = -2.147483648E9;
        for (int i = 0; i < this.numActions; ++i) {
            double ucbi = this.payoffSums[i] / (double)this.numPlays[i] + this.upperBound(step, this.numPlays[i]);
            if (!(ucb < ucbi)) continue;
            ucb = ucbi;
            a2 = i;
        }
        return a2;
    }

    protected double upperBound(int t, int n) {
        return Math.sqrt(2.0 * Math.log(t + 1) / (double)n);
    }

    @Override
    public void update(int action, double reward) {
        int n = action;
        this.numPlays[n] = this.numPlays[n] + 1;
        int n2 = action;
        this.payoffSums[n2] = this.payoffSums[n2] + reward;
    }
}

