tfcache-comparison

improved readability. ggsave. dodge barplots normalised.

2/19/2020 3:31:40 PM

Details

diff --git a/analysis/cache-balance.R b/analysis/cache-balance.R
index fdaacc7..ba2bc56 100644
--- a/analysis/cache-balance.R
+++ b/analysis/cache-balance.R
@@ -1,5 +1,4 @@
 library(ggplot2)
-library(plyr)
 library(scales)
 
 hits <- read.csv("../applications/output/hits-distribution.csv")
@@ -10,24 +9,72 @@ pdf("cache-balance.pdf")
 slice <- aggregate(formula = amount~application+version+users+name+event, data = hits, FUN = sum)
 
 iter_applications = unique(slice$application)
+for (iter_application in iter_applications) {
+	slice_application <- subset(slice, application == iter_application)
+	misses <- subset(slice_application, event == "miss" | event == "addition")
+	misses[, "amount"] <- -misses[, "amount"]
+	plot <- ggplot(slice_application, aes(x = factor(name), y = amount, fill = event)) +
+		geom_bar(data = subset(slice_application, event == "hit"), stat = "identity") +
+		geom_bar(data = misses, position = "dodge", stat = "identity") +
+		scale_colour_grey(start = 0.2, end = 0.6) +
+		scale_fill_grey(start = 0.2, end = 0.6) +
+		scale_y_continuous(labels = scientific) +
+		coord_flip() +
+		facet_grid(version ~ users, scales = "free") +
+		theme(text = element_text(size = 8)) +
+		theme(axis.text.x = element_text(angle = 45)) +
+		theme(legend.position = "bottom") +
+		ggtitle(iter_application)
+	print(plot)
+}
+
+dev.off()
+
+print("cache-balance-statistics")
+
+print("application version users event median mean standard-deviation")
+iter_applications = unique(slice$application)
+iter_versions = unique(slice$version)
+iter_users = unique(slice$users)
+iter_events = unique(slice$event)
+for (iter_application in iter_applications) {
+	for (iter_version in iter_versions) {
+		for (iter_user in iter_users) {
+			for (iter_event in iter_events) {
+				group_median <- median(slice[slice$application == iter_application & slice$version == iter_version & slice$users == iter_user & slice$event == iter_event, "amount"])
+				group_mean <- mean(slice[slice$application == iter_application & slice$version == iter_version & slice$users == iter_user & slice$event == iter_event, "amount"])
+				group_sd <- sd(slice[slice$application == iter_application & slice$version == iter_version & slice$users == iter_user & slice$event == iter_event, "amount"])
+				print(paste(iter_application, iter_version, iter_user, iter_event, group_median, group_mean, group_sd))
+			}
+		}
+	}
+}
+
+print("cache-balance-agreggated")
+
+slice <- aggregate(formula = amount~application+version+users+event, data = hits, FUN = sum)
+
+iter_applications = unique(slice$application)
 iter_users = unique(slice$users)
+iter_events = unique(slice$event)
 for (iter_application in iter_applications) {
 	for (iter_user in iter_users) {
-		slice_application <- subset(slice, application == iter_application & users == iter_user)
-		misses <- subset(slice_application, event == "miss" | event == "addition")
-		misses[, "amount"] <- -misses[, "amount"]
-		plot <- ggplot(slice_application, aes(x = factor(name), y = amount, fill = event)) +
-			geom_bar(data = subset(slice_application, event == "hit"), stat = "identity") +
-			geom_bar(data = misses, position = "dodge", stat = "identity") +
-			scale_colour_grey(start = 0.1, end = 0.6) +
-			scale_fill_grey(start = 0.1, end = 0.6) +
-			scale_y_continuous(labels = scientific) +
-			coord_flip() +
-			facet_grid(~ version, scales = "free") +
-			theme(text = element_text(size = 8)) +
-			theme(axis.text.x = element_text(angle = 20)) +
-			theme(legend.position = "bottom") +
-			ggtitle(paste(iter_application, iter_user))
-		print(plot)
+		for (iter_event in iter_events) {
+			maximum <- max(slice[slice$application == iter_application & slice$users == iter_user & slice$event == iter_event, "amount"])
+			slice[slice$application == iter_application & slice$users == iter_user & slice$event == iter_event, "percentage"] <- slice[slice$application == iter_application & slice$users == iter_user & slice$event == iter_event, "amount"] / maximum
+		}
 	}
-}
\ No newline at end of file
+}
+
+plot <- ggplot(slice, aes(x = factor(users), y = percentage, fill = version)) +
+	geom_bar(stat = "identity", position = "dodge") +
+	scale_colour_grey(start = 0.2, end = 0.6) +
+	scale_fill_grey(start = 0.2, end = 0.6) +
+	geom_text(aes(label = amount), colour = "black", stat = "identity", size = 3.0, angle = 90, position = position_dodge(width = 0.9), hjust = -0.05) +
+	ylim(NA, 1.6) +
+	facet_grid(event ~ application, scales = "free") +
+	theme(axis.text.x = element_text(angle = 0)) +
+	theme(text = element_text(size = 8)) +
+	theme(legend.position = "bottom")
+
+ggsave("cache-balance-agreggated.pdf", width = 10, height = 5.5)
diff --git a/analysis/parameters-balance.R b/analysis/parameters-balance.R
new file mode 100644
index 0000000..3b1215b
--- /dev/null
+++ b/analysis/parameters-balance.R
@@ -0,0 +1,56 @@
+library(ggplot2)
+library(scales)
+
+parameters <- read.csv("../applications/output/uncached-parameters.csv")
+
+print("parameters-balance")
+pdf("parameters-balance.pdf")
+
+iter_applications = unique(parameters$application)
+iter_versions = unique(parameters$version)
+iter_users = unique(parameters$users)
+for (iter_application in iter_applications) {
+	for (iter_version in iter_versions) {
+		for (iter_user in iter_users) {
+			group_median <- median(parameters[parameters$application == iter_application & parameters$version == iter_version & parameters$users == iter_user, "amount"])
+			group_mean <- mean(parameters[parameters$application == iter_application & parameters$version == iter_version & parameters$users == iter_user, "amount"])
+			group_sd <- sd(parameters[parameters$application == iter_application & parameters$version == iter_version & parameters$users == iter_user, "amount"])
+			print(paste(iter_application, iter_version, iter_user, group_median, group_mean, group_sd))
+		}
+	}
+}
+
+slice <- aggregate(formula = amount~application+version+users+name, data = parameters, FUN = length)
+reshape(slice, timevar = "users", idvar = c("application", "version", "name"), direction = "wide")
+temp <- aggregate(formula = amount~application+version+users, data = slice, FUN = sum)
+reshape(temp, timevar = "users", idvar = c("application", "version"), direction = "wide")
+
+slice <- aggregate(formula = amount~application+version+users+name, data = parameters, FUN = sum)
+	reshape(slice, timevar = "users", idvar = c("application", "version", "name"), direction = "wide")
+temp <- aggregate(formula = amount~application+version+users, data = slice, FUN = sum)
+reshape(temp, timevar = "users", idvar = c("application", "version"), direction = "wide")
+
+plot <- ggplot(slice, aes(x = factor(name), y = amount, fill = factor(users))) +
+	geom_bar(stat = "identity", position = position_dodge()) +
+	scale_colour_grey(start = 0.2, end = 0.6) +
+	scale_fill_grey(start = 0.2, end = 0.6) +
+	scale_y_continuous(labels = scientific) +
+	coord_flip() +
+	facet_grid(version ~ application, scales = "free") +
+	theme(text = element_text(size = 8)) +
+	theme(axis.text.x = element_text(angle = 90)) +
+	theme(legend.position = "bottom")
+print(plot)
+
+slice <- aggregate(formula = amount~application+version+users, data = slice, FUN = sum)
+
+plot <- ggplot(slice, aes(x = factor(users), y = amount, fill = version)) +
+	geom_bar(stat = "identity", position = "dodge") +
+	scale_colour_grey(start = 0.2, end = 0.6) +
+	scale_fill_grey(start = 0.2, end = 0.6) +
+	geom_text(aes(label = amount), colour = "black", stat = "identity", size = 3.0, angle = 90, position = position_dodge(width = 0.9), hjust = -0.05) +
+	facet_grid(~ application, scales = "free") +
+	theme(axis.text.x = element_text(angle = 0)) +
+	theme(text = element_text(size = 8)) +
+	theme(legend.position = "bottom")
+print(plot)

analysis/throughput.R 88(+34 -54)

diff --git a/analysis/throughput.R b/analysis/throughput.R
index 74cfc89..d0f00bf 100644
--- a/analysis/throughput.R
+++ b/analysis/throughput.R
@@ -6,61 +6,41 @@ requests <- read.csv2("../applications/output/requests-handled.csv", header=TRUE
 print("median")
 
 aggregate <- aggregate(formula = requests~application+version+users+execution, data = requests, FUN = length)
-aggregate <- reshape(aggregate, idvar = c("application", "version", "users"), timevar = "execution", direction = "wide")
-
-min_aggregate <- aggregate
-min_aggregate[, "min_requests"] <- apply(min_aggregate[, 4:length(min_aggregate)], 1, FUN = which.min)
-min_aggregate[, "reference"] <- paste(min_aggregate$application, min_aggregate$version, min_aggregate$users, min_aggregate$min_requests)
-
-median_aggregate <- aggregate
-median_aggregate[, "median_requests"] <- apply(median_aggregate[, 4:length(median_aggregate)], 1, FUN = function(x) which.min(abs(x - median(x))))
-median_aggregate[, "reference"] <- paste(median_aggregate$application, median_aggregate$version, median_aggregate$users, median_aggregate$median_requests)
-
-max_aggregate <- aggregate
-max_aggregate[, "max_requests"] <- apply(max_aggregate[, 4:length(max_aggregate)], 1, FUN = which.max)
-max_aggregate[, "reference"] <- paste(max_aggregate$application, max_aggregate$version, max_aggregate$users, max_aggregate$max_requests)
-
-print("throughput")
-
-slice <- requests
-min_slice <- subset(slice, paste(slice$application, slice$version, slice$users, slice$execution) %in% min_aggregate$reference)
-median_slice <- subset(slice, paste(slice$application, slice$version, slice$users, slice$execution) %in% median_aggregate$reference)
-max_slice <- subset(slice, paste(slice$application, slice$version, slice$users, slice$execution) %in% max_aggregate$reference)
+names(aggregate)[names(aggregate) == "requests"] <- "time"
+aggregate["requests"] <- aggregate(formula = requests~application+version+users+execution, data = requests, FUN = max)["requests"]
 
-requests_slice <- aggregate(formula = requests~application+version+users, data = slice, FUN = max)
-min_slice <- aggregate(formula = time~application+version+users+execution, data = min_slice, FUN = max)
-median_slice <- aggregate(formula = time~application+version+users+execution, data = median_slice, FUN = max)
-max_slice <- aggregate(formula = time~application+version+users+execution, data = max_slice, FUN = max)
+aggregate["throughput"] <- aggregate$requests / aggregate$time
+aggregate["time"] <- NULL
+aggregate["requests"] <- NULL
 
-min_slice[, "execution"] <- NULL
-names(min_slice)[names(min_slice) == "time"] <- "min_time"
-
-median_slice[, "execution"] <- NULL
-names(median_slice)[names(median_slice) == "time"] <- "median_time"
-
-max_slice[, "execution"] <- NULL
-names(max_slice)[names(max_slice) == "time"] <- "max_time"
-
-throughput <- requests_slice
-throughput <- merge(throughput, min_slice)
-throughput <- merge(throughput, median_slice)
-throughput <- merge(throughput, max_slice)
-throughput[, "min_throughput"] <- throughput$requests / throughput$min_time
-throughput[, "median_throughput"] <- throughput$requests / throughput$median_time
-throughput[, "max_throughput"] <- throughput$requests / throughput$max_time
-
-pdf("throughput.pdf")
+aggregate <- reshape(aggregate, idvar = c("application", "version", "users"), timevar = "execution", direction = "wide")
 
-iter_applications = unique(slice$application)
-for(i in 1:ceiling(length(iter_applications) / 3)) {
-	plot <- ggplot(throughput, aes(x = version, y = median_throughput, fill = version)) +
-		geom_bar(stat = "identity") +
-		geom_errorbar(aes(ymin = min_throughput, ymax = max_throughput), width = .2, position = position_dodge(.9)) +
-		geom_text(aes(label = sprintf("%.2f", median_throughput)), vjust = 2, size = 2.5, colour = "white") +
-		scale_fill_grey(start = 0.2, end = 0.6) +
-		scale_colour_grey(start = 0.2, end = 0.6) +
-		facet_wrap_paginate(application ~ users, scales = "free", ncol = 3, nrow = 3, page = i) +
-		theme(axis.text.x = element_text(angle = 20)) +
-		theme(legend.position = "bottom")
-	print(plot)
+actual_length <- length(aggregate)
+
+aggregate["min_throughput"] <- apply(aggregate[, 4:actual_length], 1, FUN = min)
+aggregate["median_throughput"] <- apply(aggregate[, 4:actual_length], 1, FUN = median)
+aggregate["max_throughput"] <- apply(aggregate[, 4:actual_length], 1, FUN = max)
+
+iter_applications = unique(aggregate$application)
+iter_users = unique(aggregate$users)
+iter_events = unique(aggregate$event)
+for (iter_application in iter_applications) {
+	for (iter_user in iter_users) {
+		maximum <- max(aggregate[aggregate$application == iter_application & aggregate$users == iter_user, "max_throughput"])
+		aggregate[aggregate$application == iter_application & aggregate$users == iter_user, "median_percentage"] <- aggregate[aggregate$application == iter_application & aggregate$users == iter_user, "median_throughput"] / maximum
+		aggregate[aggregate$application == iter_application & aggregate$users == iter_user, "min_percentage"] <- aggregate[aggregate$application == iter_application & aggregate$users == iter_user, "min_throughput"] / maximum
+		aggregate[aggregate$application == iter_application & aggregate$users == iter_user, "max_percentage"] <- aggregate[aggregate$application == iter_application & aggregate$users == iter_user, "max_throughput"] / maximum
+	}
 }
+
+plot <- ggplot(aggregate, aes(x = factor(users), y = median_percentage, fill = version)) +
+	geom_bar(position = "dodge", stat = "identity") +
+	geom_errorbar(aes(ymin = min_percentage, ymax = max_percentage), width = .2, position = position_dodge(.9)) +
+	geom_text(aes(label = sprintf("%.2f", median_throughput)), colour = "white", stat = "identity", size = 3.2, angle = 90, position = position_dodge(width = 0.9), hjust = 1.2) +
+	scale_fill_grey(start = 0.2, end = 0.6) +
+	scale_colour_grey(start = 0.2, end = 0.6) +
+	facet_wrap(~ application, scales = "free", ncol = 4) +
+	theme(axis.text.x = element_text(angle = 0)) +
+	theme(legend.position = "bottom")
+
+ggsave("throughput-normalised.pdf", width = 10, height = 5)