Sampling.java

188 lines | 5.944 kB Blame History Raw Download
package br.ufrgs.inf.prosoft.tigris.sampling;

import org.apache.commons.collections4.queue.CircularFifoQueue;
import org.apache.commons.math3.distribution.BinomialDistribution;
import org.apache.commons.math3.ml.neuralnet.sofm.util.ExponentialDecayFunction;
import org.apache.commons.math3.stat.inference.TestUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.security.SecureRandom;
import java.util.ArrayList;
import java.util.List;
import java.util.Queue;

/**
 * The type Sampling decision.
 */
public class Sampling implements Runnable {

    private boolean samplingEnabled = true;
    private boolean performanceBaselineEnabled = false;
    private double samplingRate = 0.5; // in percentage, 0 to 1
    private FrequencyDataSet population = new FrequencyDataSet(), sample = new FrequencyDataSet();
    private PerformanceBaselineDataSet performanceBaselineDataSet = new PerformanceBaselineDataSet();
    private Queue<PerformanceBaselineDataSet> lastFourPerformanceBaselineDataSets = new CircularFifoQueue<>(4);
    private ExponentialDecayFunction decayingPrecision;

    Logger logger = LoggerFactory.getLogger(Sampling.class);

    /**
     * z confidence value, ex: 1.96 for 95%
     * p proportion of the population, 0.5 is default
     * e margin of error, ex: 0.05 for 5%
     */
    private double z = 1.96, p = 0.5, e = 0.05;

    public Sampling(double initialSamplingRate, int cycleLengthInSeconds) {

        samplingRate = initialSamplingRate;
        //3600 seconds = 1h
        decayingPrecision = new ExponentialDecayFunction(100, 0.01, cycleLengthInSeconds);
    }

    public boolean samplingDecision(Granularity granularity) {
        population.addItem(granularity);

        if(performanceBaselineEnabled) {
            return false;
        }

        boolean decision = samplingEnabled
                && new BinomialDistribution(1, samplingRate).sample() == 1 // sampling rate evaluation
                && population.getProportion(granularity) > sample.getProportion(granularity); // sample has not enough items of that granularity compared to the population

        if (decision)
            sample.addItem(granularity);

        return decision;
    }

    public void setSamplingRate(double samplingRate){
        this.samplingRate = samplingRate;
    }

    public boolean isReady() {
        return
                // margin of error is lower than threshold
                getSampleSizeErrorMargin() < e
                // the sample has the min sample size based on the population
                && sample.getTotalItems() > getMinimumSampleSize()
                // proportion test
                && isSameProportion()
                // t-test
                && tTestEvaluation();
    }

    private double decayingConfidence(int timeInSeconds){
        return decayingPrecision.value(timeInSeconds);
    }

    private boolean tTestEvaluation() {
        //To test the (one-sample t-test - compare with the population mean)
        // hypothesis sample mean = mu at the 95% level
        return TestUtils.tTest(population.getAsDescriptiveStatistics().getMean(),
                sample.getAsDescriptiveStatistics(),
                0.05);
    }

    //sample proportion is the same as population
    public boolean isSameProportion() {
        return population.getGranularities().stream().allMatch(granularity -> population.getProportion(granularity) == sample.getProportion(granularity));
    }

    /**
     * @return the minimum sample size for the population
     */
    public long getMinimumSampleSize() {
        long n_inf = (long) ((Math.pow(z, 2) * p * (1 - p)) / Math.pow(e, 2));
        return n_inf / (1 + ((n_inf - 1) / population.getTotalItems()));
    }

    public long getMinimumSampleSize(long n) {
        long n_inf = (long) ((Math.pow(z, 2) * p * (1 - p)) / Math.pow(e, 2));
        return n_inf / (1 + ((n_inf - 1) / n));
    }

    public double getSampleSizeErrorMargin() {
        double e_n_inf = Math.sqrt((Math.pow(z, 2) * p * (1 - p)) / sample.getTotalItems());
        return e_n_inf * Math.sqrt((population.getTotalItems() - sample.getTotalItems()) / (population.getTotalItems() - 1));
    }

    public void disable() {
        samplingEnabled = false;
    }

    public void enable() {
        samplingEnabled = true;
    }

    public boolean isSamplingEnabled() {
        return samplingEnabled;
    }

    public double getSamplingRate() {
        return samplingRate;
    }

    public void startMonitoringCycle() {

    }

    public void endMonitoringCycle() {

    }

    public boolean shouldCollectPerformanceBaseline() {
        return new BinomialDistribution(1, 0.1).sample() == 1;
    }

    public void adaptSamplingRate() {
        //TODO
    }

    @Override
    public void run() {
        logger.info("Running sampling adaptation.");

        if (isReady()) {
            logger.info("Sample is ready, releasing for analysis and resetting...");
            //TODO
            releaseForAnalysis();
            reset();
            return;
        }
        if (shouldCollectPerformanceBaseline()) {
            enablePerformanceBaseline();
            return;
        }
        adaptSamplingRate();
    }

    private void enablePerformanceBaseline() {
        performanceBaselineEnabled = true;
    }

    public void releaseForAnalysis() {
    }

    public void reset() {
    }

    public boolean isPerformanceBaselineEnabled() {
        return performanceBaselineEnabled;
    }

    public void addPerformanceBaselineItem(Granularity granularity, long executionTime) {
        this.performanceBaselineDataSet.addItem(granularity, executionTime);

        if(this.performanceBaselineDataSet.getTotalItems() >=
                getMinimumSampleSize(this.performanceBaselineDataSet.getTotalItems())) {
            enable();
            this.performanceBaselineEnabled = false;
            lastFourPerformanceBaselineDataSets.add(this.performanceBaselineDataSet);
            this.performanceBaselineDataSet = new PerformanceBaselineDataSet();
        }
    }
}