library(ggplot2)
library(scales)

parameters <- read.csv("../applications/output/uncached-parameters.csv")

print("=== inputs discarted by APLCache among recommendations ===")
pdf("parameters-balance.pdf")

iter_applications = unique(parameters$application)
iter_versions = unique(parameters$version)
iter_users = unique(parameters$users)
print("application, version, user, median, mean, standard-deviation")
for (iter_application in iter_applications) {
	for (iter_version in iter_versions) {
		for (iter_user in iter_users) {
			group_median <- median(parameters[parameters$application == iter_application & parameters$version == iter_version & parameters$users == iter_user, "amount"])
			group_mean <- mean(parameters[parameters$application == iter_application & parameters$version == iter_version & parameters$users == iter_user, "amount"])
			group_sd <- sd(parameters[parameters$application == iter_application & parameters$version == iter_version & parameters$users == iter_user, "amount"])
			print(paste(iter_application, iter_version, iter_user, group_median, group_mean, group_sd))
		}
	}
}

print("distinct number of inputs discarted per recommendation")
slice <- aggregate(formula = amount~application+version+users+name, data = parameters, FUN = length)
reshape(slice, timevar = "users", idvar = c("application", "version", "name"), direction = "wide")
print("distinct number of inputs discarted aggregated")
temp <- aggregate(formula = amount~application+version+users, data = slice, FUN = sum)
reshape(temp, timevar = "users", idvar = c("application", "version"), direction = "wide")

print("occurrences of discarted inputs per recommendation")
slice <- aggregate(formula = amount~application+version+users+name, data = parameters, FUN = sum)
reshape(slice, timevar = "users", idvar = c("application", "version", "name"), direction = "wide")
print("occurrences of discarted inputs aggregated")
temp <- aggregate(formula = amount~application+version+users, data = slice, FUN = sum)
reshape(temp, timevar = "users", idvar = c("application", "version"), direction = "wide")

plot <- ggplot(slice, aes(x = factor(name), y = amount, fill = factor(users))) +
	geom_bar(stat = "identity", position = position_dodge()) +
	scale_colour_grey(start = 0.2, end = 0.6) +
	scale_fill_grey(start = 0.2, end = 0.6) +
	scale_y_continuous(labels = scientific) +
	coord_flip() +
	facet_grid(version ~ application, scales = "free") +
	theme(text = element_text(size = 8)) +
	theme(axis.text.x = element_text(angle = 90)) +
	theme(legend.position = "bottom")
print(plot)

slice <- aggregate(formula = amount~application+version+users, data = slice, FUN = sum)

plot <- ggplot(slice, aes(x = factor(users), y = amount, fill = version)) +
	geom_bar(stat = "identity", position = "dodge") +
	scale_colour_grey(start = 0.2, end = 0.6) +
	scale_fill_grey(start = 0.2, end = 0.6) +
	geom_text(aes(label = amount), colour = "black", stat = "identity", size = 3.0, angle = 90, position = position_dodge(width = 0.9), hjust = -0.05) +
	facet_wrap(~ application, scales = "free") +
	theme(axis.text.x = element_text(angle = 0)) +
	theme(text = element_text(size = 8)) +
	theme(legend.position = "bottom")
print(plot)
