library(ggplot2)
library(scales)

print("*** loading data ***")
hits <- read.csv("../applications/output/hits-distribution.csv")

print("*** phase 1 -- non-aggregated analysis ***")
slice <- aggregate(formula = amount~application+version+users+name+event, data = hits, FUN = sum)

print("=== number of used recommendations ===")
temp <- aggregate(formula = amount~application+version+users+name, data = slice, FUN = length)
temp <- aggregate(formula = name~application+version+users, data = temp, FUN = length)
reshape(temp, timevar = "users", idvar = c("application", "version"), direction = "wide")

print("=== CSV conversion-comparison ===")
conversions <- reshape(slice, timevar = "event", idvar = c("application", "version", "users", "name"), direction = "wide")
conversions[is.na(conversions)] <- 1
conversions[, "add_per_miss"] <- conversions[, "amount.addition"] /  conversions[, "amount.miss"]
conversions[, "hit_per_add"] <- conversions[, "amount.hit"] / conversions[, "amount.addition"]
conversions[, "amount.hit"] <- NULL
conversions[, "amount.addition"] <- NULL
conversions[, "amount.miss"] <- NULL

conversion_comparison <- reshape(conversions, timevar = "users", idvar = c("application", "version", "name"), direction = "wide")
conversion_comparison <- format(conversion_comparison, digits = 5)
write.csv(conversion_comparison, "conversion-comparison.csv")

print("=== cache-balance-statistics ===")
print("application version users event median mean standard-deviation")
iter_applications = unique(slice$application)
iter_versions = unique(slice$version)
iter_users = unique(slice$users)
iter_events = unique(slice$event)
for (iter_application in iter_applications) {
	for (iter_version in iter_versions) {
		for (iter_user in iter_users) {
			for (iter_event in iter_events) {
				group_median <- median(slice[slice$application == iter_application & slice$version == iter_version & slice$users == iter_user & slice$event == iter_event, "amount"])
				group_mean <- mean(slice[slice$application == iter_application & slice$version == iter_version & slice$users == iter_user & slice$event == iter_event, "amount"])
				group_sd <- sd(slice[slice$application == iter_application & slice$version == iter_version & slice$users == iter_user & slice$event == iter_event, "amount"])
				print(paste(iter_application, iter_version, iter_user, iter_event, group_median, group_mean, group_sd))
			}
		}
	}
}

print("=== PDF cache-balance ===")
pdf("cache-balance.pdf")
iter_applications = unique(slice$application)
for (iter_application in iter_applications) {
	slice_application <- subset(slice, application == iter_application)
	misses <- subset(slice_application, event == "miss" | event == "addition")
	misses[, "amount"] <- -misses[, "amount"]
	plot <- ggplot(slice_application, aes(x = factor(name), y = amount, fill = event)) +
		geom_bar(data = subset(slice_application, event == "hit"), stat = "identity") +
		geom_bar(data = misses, position = "dodge", stat = "identity") +
		scale_colour_grey(start = 0.2, end = 0.6) +
		scale_fill_grey(start = 0.2, end = 0.6) +
		scale_y_continuous(labels = scientific) +
		coord_flip() +
		facet_grid(version ~ users, scales = "free") +
		theme(text = element_text(size = 8)) +
		theme(axis.text.x = element_text(angle = 45)) +
		theme(legend.position = "bottom") +
		ggtitle(iter_application)
	print(plot)
}
dev.off()

print("*** phase 2 -- aggregated analysis ***")
slice <- aggregate(formula = amount~application+version+users+event, data = hits, FUN = sum)

print("=== CSV conversion-comparison-aggregated ===")
conversions <- reshape(slice, timevar = "event", idvar = c("application", "version", "users"), direction = "wide")
conversions[is.na(conversions)] <- 1
conversions[, "add_per_miss"] <- conversions[, "amount.addition"] /  conversions[, "amount.miss"]
conversions[, "hit_per_add"] <- conversions[, "amount.hit"] / conversions[, "amount.addition"]
conversions[, "amount.hit"] <- NULL
conversions[, "amount.addition"] <- NULL
conversions[, "amount.miss"] <- NULL

conversion_comparison <- reshape(conversions, timevar = "users", idvar = c("application", "version"), direction = "wide")
conversion_comparison <- format(conversion_comparison, digits = 5)
write.csv(conversion_comparison, "conversion-comparison-aggregated.csv")

print("=== PDF cache-balance-agreggated ===")
iter_applications = unique(slice$application)
iter_users = unique(slice$users)
iter_events = unique(slice$event)
for (iter_application in iter_applications) {
	for (iter_user in iter_users) {
		for (iter_event in iter_events) {
			maximum <- max(slice[slice$application == iter_application & slice$users == iter_user & slice$event == iter_event, "amount"])
			slice[slice$application == iter_application & slice$users == iter_user & slice$event == iter_event, "percentage"] <- slice[slice$application == iter_application & slice$users == iter_user & slice$event == iter_event, "amount"] / maximum
		}
	}
}

applications_labels <- c("Azkaban", "Cloudstore", "Keycloak", "Killbill", "Petclinic", "Shopizer", "Thingsboard")
names(applications_labels) <- c("azkaban", "cloudstore", "keycloak", "killbill", "petclinic", "shopizer", "thingsboard")
plot <- ggplot(slice, aes(x = factor(users), y = percentage, fill = factor(version, levels = c("developers", "aplcache", "memoizeit"), labels = c("DEV", "APL", "MEM")))) +
	geom_bar(stat = "identity", position = "dodge") +
	scale_colour_grey(start = 0.2, end = 0.6) +
	scale_fill_grey(start = 0.2, end = 0.6) +
	geom_text(aes(label = amount), colour = "black", stat = "identity", size = 3.0, angle = 90, position = position_dodge(width = 0.9), hjust = -0.05) +
	ylim(NA, 1.6) +
	facet_grid(event ~ application, scales = "free", labeller = labeller(application = applications_labels)) +
	theme(axis.text.x = element_text(angle = 0)) +
	theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank()) +
	theme(text = element_text(size = 8)) +
	labs(x = "Number of Users", fill = "Approach") +
	theme(legend.position = "bottom")
ggsave("cache-balance-agreggated.pdf", width = 10, height = 5.5)
