cache-balance.R

85 lines | 3.774 kB Blame History Raw Download
library(ggplot2)
library(scales)

hits <- read.csv("../applications/output/hits-distribution.csv")

print("=== cache-balance ===")
pdf("cache-balance.pdf")

slice <- aggregate(formula = amount~application+version+users+name+event, data = hits, FUN = sum)
temp <- aggregate(formula = amount~application+version+users+name, data = slice, FUN = length)
temp <- aggregate(formula = name~application+version+users, data = temp, FUN = length)
print("number of used recommendations")
reshape(temp, timevar = "users", idvar = c("application", "version"), direction = "wide")

iter_applications = unique(slice$application)
for (iter_application in iter_applications) {
	slice_application <- subset(slice, application == iter_application)
	misses <- subset(slice_application, event == "miss" | event == "addition")
	misses[, "amount"] <- -misses[, "amount"]
	plot <- ggplot(slice_application, aes(x = factor(name), y = amount, fill = event)) +
		geom_bar(data = subset(slice_application, event == "hit"), stat = "identity") +
		geom_bar(data = misses, position = "dodge", stat = "identity") +
		scale_colour_grey(start = 0.2, end = 0.6) +
		scale_fill_grey(start = 0.2, end = 0.6) +
		scale_y_continuous(labels = scientific) +
		coord_flip() +
		facet_grid(version ~ users, scales = "free") +
		theme(text = element_text(size = 8)) +
		theme(axis.text.x = element_text(angle = 45)) +
		theme(legend.position = "bottom") +
		ggtitle(iter_application)
	print(plot)
}

dev.off()

print("cache-balance-statistics")

print("application version users event median mean standard-deviation")
iter_applications = unique(slice$application)
iter_versions = unique(slice$version)
iter_users = unique(slice$users)
iter_events = unique(slice$event)
for (iter_application in iter_applications) {
	for (iter_version in iter_versions) {
		for (iter_user in iter_users) {
			for (iter_event in iter_events) {
				group_median <- median(slice[slice$application == iter_application & slice$version == iter_version & slice$users == iter_user & slice$event == iter_event, "amount"])
				group_mean <- mean(slice[slice$application == iter_application & slice$version == iter_version & slice$users == iter_user & slice$event == iter_event, "amount"])
				group_sd <- sd(slice[slice$application == iter_application & slice$version == iter_version & slice$users == iter_user & slice$event == iter_event, "amount"])
				print(paste(iter_application, iter_version, iter_user, iter_event, group_median, group_mean, group_sd))
			}
		}
	}
}

print("cache-balance-agreggated")

slice <- aggregate(formula = amount~application+version+users+event, data = hits, FUN = sum)

iter_applications = unique(slice$application)
iter_users = unique(slice$users)
iter_events = unique(slice$event)
for (iter_application in iter_applications) {
	for (iter_user in iter_users) {
		for (iter_event in iter_events) {
			maximum <- max(slice[slice$application == iter_application & slice$users == iter_user & slice$event == iter_event, "amount"])
			slice[slice$application == iter_application & slice$users == iter_user & slice$event == iter_event, "percentage"] <- slice[slice$application == iter_application & slice$users == iter_user & slice$event == iter_event, "amount"] / maximum
		}
	}
}

plot <- ggplot(slice, aes(x = factor(users), y = percentage, fill = version)) +
	geom_bar(stat = "identity", position = "dodge") +
	scale_colour_grey(start = 0.2, end = 0.6) +
	scale_fill_grey(start = 0.2, end = 0.6) +
	geom_text(aes(label = amount), colour = "black", stat = "identity", size = 3.0, angle = 90, position = position_dodge(width = 0.9), hjust = -0.05) +
	ylim(NA, 1.6) +
	facet_grid(event ~ application, scales = "free") +
	theme(axis.text.x = element_text(angle = 0)) +
	theme(text = element_text(size = 8)) +
	theme(legend.position = "bottom")

ggsave("cache-balance-agreggated.pdf", width = 10, height = 5.5)