caching-approaches-comparison

added plots, install R and dependencies we rely on, as well as

2/26/2020 2:23:43 AM

Details

diff --git a/analysis/cache-balance.pdf b/analysis/cache-balance.pdf
new file mode 100644
index 0000000..a21569f
Binary files /dev/null and b/analysis/cache-balance.pdf differ
diff --git a/analysis/cache-balance-agreggated.pdf b/analysis/cache-balance-agreggated.pdf
new file mode 100644
index 0000000..8e1da85
Binary files /dev/null and b/analysis/cache-balance-agreggated.pdf differ
diff --git a/analysis/parameters-balance.pdf b/analysis/parameters-balance.pdf
new file mode 100644
index 0000000..4201b32
Binary files /dev/null and b/analysis/parameters-balance.pdf differ

analysis/plot.sh 6(+6 -0)

diff --git a/analysis/plot.sh b/analysis/plot.sh
new file mode 100644
index 0000000..da94098
--- /dev/null
+++ b/analysis/plot.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+for plot in *.R; do
+	Rscript $plot &
+done
+wait
\ No newline at end of file
diff --git a/analysis/requests-handled.pdf b/analysis/requests-handled.pdf
new file mode 100644
index 0000000..8ef2de0
Binary files /dev/null and b/analysis/requests-handled.pdf differ
diff --git a/analysis/requests-variability.pdf b/analysis/requests-variability.pdf
new file mode 100644
index 0000000..7b9024c
Binary files /dev/null and b/analysis/requests-variability.pdf differ
diff --git a/analysis/throughput-normalised.pdf b/analysis/throughput-normalised.pdf
new file mode 100644
index 0000000..898235e
Binary files /dev/null and b/analysis/throughput-normalised.pdf differ

configure 5(+4 -1)

diff --git a/configure b/configure
index 260dc16..2027828 100755
--- a/configure
+++ b/configure
@@ -27,6 +27,9 @@ if [[ $type = "standalone" ]]; then
 	apt-get -qq -y install maven
 	apt-get -qq -y install git
 	apt-get -qq -y install htop
+	DEBIAN_FRONTEND=noninteractive apt-get -qq -y install r-base
+	R -e "install.packages('ggplot2')"
+	R -e "install.packages('ggforce')"
 
 	apt-get install apt-transport-https ca-certificates curl gnupg-agent software-properties-common -y
 	curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
@@ -41,4 +44,4 @@ fi
 cd adapters; bash configure $2 $3; cd ..
 cd approaches; bash configure $2 $3; cd ..
 cd applications; bash configure $2 $3; cd ..
-wait
\ No newline at end of file
+wait
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 665d032..cc614e2 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -18,6 +18,9 @@ RUN apt-get -qq -y install maven
 RUN apt-get -qq -y install git
 RUN apt-get -qq -y install curl
 RUN apt-get -qq -y install htop
+RUN DEBIAN_FRONTEND=noninteractive apt-get -qq -y install r-base
+RUN R -e "install.packages('ggplot2')"
+RUN R -e "install.packages('ggforce')"
 
 RUN apt-get -qq -y install ssh
 RUN echo "PermitRootLogin yes" >> /etc/ssh/sshd_config

README.md 271(+271 -0)

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..cfd4a3a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,271 @@
+# APL-Caching Approaches Comparison
+
+This is a framework developed by Prosoft Research Group at Federal University of Rio Grande do Sul.
+It's purpose is to extract traces from web applications as well as compare its thoughput and caching performance.
+
+- All resulting plots are available in PDF under the folder ``/analysis``
+	- Resulting tables are generated by commanding ``Rscript`` to run R files within this folder.
+	- In order to re-execute such R scripts, the compressed files under ``/applications/output`` shall be extracted.
+	- Typping ``bash configure`` automatically extract them, although it also downloads and install the whole experiment structure.
+- To download, extract, configure and compile the whole experiment without affecting you environment, there are docker machines under ``/docker`` with the properly version of software used.
+	- Docker machine cannot be used to reproduce the experiment.
+	- Any file under ``/caching-approaches-comparison`` is available both inside and outside docker container.
+	- To execute type:  
+	- 1. ``docker-compose -f docker/docker-compose.yml up --build -d``
+	- 2. ``docker-compose -f docker/docker-compose.yml exec caching-approaches-comparison /bin/bash``
+			-  ``ssh root@localhost -p 5001`` will also work with the password ``caching-approaches-comparison``
+	- 3. ``cd /caching-approaches-comparison``
+	- 4. ``bash configure docker`` to download and extract needed files
+			- two more arguments can be given in order to provide username and password for git cloning
+	- 5. ``bash compile.sh`` to compile adapters and approaches with maven
+	- 6. ``cd analysis && bash plot.sh`` in order to plot all R scripts
+	- 7. Hit ``ctrl+d`` to leave interactive mode inside the container
+	- 8. Dot not forget to shutdown docker container by typing ``docker-compose -f docker/docker-compose.yml down -v``
+			- ``-v`` argument deletes the container
+- Data under ``/adapters``, ``/approaches`` and  ``/applications`` are downloaded automatically during the configuration, which are hosted in different repositories.
+
+
+# Reproducing
+
+- The experiment can either be executed with a single host configuration or with---the ideal configuration of---two hosts, which one dedicated to make the requests whereas the other aims host the application to be logged/measured.
+- In order to execute the experiment with two hosts is just required that both machines are properly configured with the following steps and that the ``RemoteExecuter`` is running in the *application-machine*, whereas the *requester-machine* commands it through its IP address.
+
+## Configuring
+
+1. ``git clone --depth=3 https://github.com/rmeloca/caching-approaches-comparison.git caching-approaches-comparison``
+2. ``cd caching-approaches-comparison``
+3. ``bash configure``
+4. ``bash compile.sh``
+
+## Executing
+
+1. ``bash traces.sh [<host> ["<application-list>"]]``
+2. ``bash run.sh [<host> ["<version-list>" ["<application-list>"]]]``
+3. ``bash reduce.sh [<host> ["<version-list>" ["<application-list>" [<reduce> [<overwrite>]]]]]``
+
+- ``<host> ::= localhost`` or the ``<ip>`` of the ``application-machine``
+	- If not informed, ``localhost`` is assumed
+- ``<application-list> ::= * | <application-name> | <application-name> <application-list>``
+		- Each ``<application-name>`` stands for the some folder under ``/applications/uncached``
+		- ``*`` means that the default value shall be assumed
+		- If not informed, ``*`` will be assumed
+			- For ``run.sh`` the default value ``*`` means ``$(echo applications/uncached/*/)``
+			- For ``reduce.sh`` the default value ``*`` will check for applications under each version within the ``<version-list>`` provided
+- ``<version-list> ::= <version-name> | <version-name> <version-list>``
+	- Each ``<version-name>`` stands for some folder under ``/applications`` that holds the applications to be measured
+	- If not informed, ``uncached developers aplcache memoizeit`` is assumed
+- ``<reduce> ::= * | requests | cache``
+	- If not informed, ``*`` is assumed
+	- Means whether only ``requests-handled.csv`` will be generated inside ``requests-machine`` or it will be commanded to ``application-machine`` also generate ``hits-distribution.csv`` and ``uncached-parameters.csv``
+	- Every CSV output is generated under ``/application/output`` folder in its respective machine
+
+# Adding or changing applications
+ 
+1. Remove all caching statements of the desired application and put it into ``/applcations/uncached``
+2. [For MemoizeIt] Generate callgraphs with java-cg
+	1. Compile the desired application to ``<compiled-application>`` folder
+	2. ``zip -r <compiled-application>.zip <compiled-application>``
+	3. ``java -jar adapters/java-callgraph/target/javacg-0.1-SNAPSHOT-static.jar <compiled-application>.zip > applications/callgraphs/<application>``
+3. Create a database dump---if needed--- and put it into ``/applications/dumps``
+4. Create a workload file into ``/applications/workloads`` according to the rules described below
+5. Include ``ApplicationTracer`` as a maven or gradle dependency of the application
+	```xml
+	<dependency>
+	    <groupId>br.ufrgs.inf.prosoft.applicationtracer</groupId>
+	    <artifactId>ApplicationTracer</artifactId>
+	    <version>1.0</version>
+	</dependency>
+	```
+6. Trace with ``bash trace.sh``
+	- Pay attention to the environment variables under ``docker-compose.yml`` as described below
+7. Generate recomendations, analyse and cache them
+	- APLCache
+		- ``java -jar approaches/APLCache/target/APLCache-1.0.jar --trace=applications/traces/<application-name> --output=applications/output/aplcache-<application-name>-parameters.json``
+		- Do not forget to include APLCache's output to ``<application-machine>`` in the equivalent folder
+	- MemoizeIt
+		- ``java -jar approaches/MemoizeIt/target/MemoizeIt-1.0.jar --callgraph=applications/callgraphs/<application-name> --trace=applications/traces/<application-name> [--kernel=<iterative|exhaustive>]``
+	- Take a look into the caching examples section below
+8. Each cached version of the application should be placed into its respective folder ``/applications/<aplcache|memoizeit|developers>``
+	- APLCache
+		```xml
+		<dependency>
+		    <groupId>br.ufrgs.inf.prosoft.aplcache</groupId>
+		    <artifactId>APLCache</artifactId>
+		    <version>1.0</version>
+		</dependency>
+		```
+	- Memoizeit and Developers
+		```xml
+			<dependency>
+			    <groupId>br.ufrgs.inf.prosoft.cache</groupId>
+			    <artifactId>Cache</artifactId>
+			    <version>1.0</version>
+			</dependency>
+		```
+9. Execute the application by commanding ``bash run.sh``
+10. Reduce samples into CSV outputs by commanding ``bash reduce.sh`` 
+	- Each result will be available in its respective machine: ``requests-handled.csv`` in ``<requests-machine>`` & ``hits-distribution.csv`` and ``uncached-parameters.csv`` in ``application-machine``
+
+## Caching examples
+
+```java
+import br.ufrgs.inf.prosoft.cache.*;
+
+public static GetterCache<Vet> findAllCache = new GetterCache<>("findAllCache");
+return findAllCache.computeIfAbsent(() -> {}, 60000);
+
+public static SingleCache<Parameters, PetType> singleCache = new SingleCache<>("singleCache");
+return singleCache.computeIfAbsent(new Parameters(text, locale), () -> {}, 60000);
+
+public static MultiCache<Parameters, PetType> parseCache = new MultiCache<>("parseCache");
+return parseCache.computeIfAbsent(new Parameters(text, locale), () -> {}, 60000);
+```
+
+```java
+import br.ufrgs.inf.prosoft.aplcache.caching.APLCache;
+
+public static APLCache<Type> methodCache = new APLCache<>("methodCache");
+return methodCache.computeIfAbsent(Thread.currentThread(), new Object[]{parameter}, () -> {}, 60000);
+```
+
+
+## Environment variables
+
+- ``JAVA_OPTS``
+	- For ``requester-machine``: ``JAVA_OPTS="-Xms4096m -Xmx6124m"``
+	- For ``application-machine``: ``JAVA_SERVER_OPTS=${JAVA_SERVER_OPTS:-"-Xmx30000m"}``
+- Tracing
+	- ``TRACER_ENABLE=${TRACER_ENABLE:-true}``
+		- It does enable or disable the tracing while running an ``uncached`` version
+	- ``TRACER_MINIMUM_EXECUTION_TIME=${TRACER_MINIMUM_EXECUTION_TIME:-1}``
+		- It sets how much milliseconds a given method should last in order to be logged
+	- ``TRACER_SERIALISE_INTERNALS=false``
+		- It sets if classes within Java core should be serialised or not
+	- ``TRACER_VERBOSE=true``
+		- If enabled, every logged method that lasts longer than 5ms will be echoed
+	- ``TRACER_BLACKLIST="$(pwd)/blacklist"``
+		- Points to the folder of methods that shall be ignored
+	- ``TRACER_TRACES="$(pwd)/traces"``
+		- Points to the file where traces shall be logged
+	- ``TRACER_IGNORED_PACKAGES="$(pwd)/ignored"``
+		- Points to the file that lists the packages that shall be ignored
+	- ``TRACER_WHITELIST="$(pwd)/whitelist"``
+		- Points to the file that lists packages that will not be echoed, but still will be serialised
+		- Useful only for development purposes
+	- ``TRACER_LOG="$(pwd)/tracer.log"``
+		- Prints logged methods in a file
+		- Useful for development purposes
+- Measuring
+	- ``CACHE_EVENTS=${CACHE_EVENTS:-"$(pwd)/cache"}``
+		- Output file where to log caching events
+	- ``CACHE_REGISTER_SIZE=false``
+		- Choose whether to log the size of the cached object
+	- APLCache
+		- ``APLCACHE_CACHEABLE_PARAMETERS="$(pwd)/aplcache-parameters.json"``
+			- Points to the file where the recommended inputs were written by APLCache
+		- ``APLCACHE_LOG="$(pwd)/aplcache-parameters.log"``
+			- Output file where to log the uncached inputs for APLCache
+		- ``TRACER_SERIALISE_INTERNALS`` and ``TRACER_IGNORED_PACKAGES`` might be provided accordingly to its values for the tracing phase, in order to APLCache behave properly
+
+## Requests Graph Syntax
+
+```
+<reference>                  ::= <string>
+<method>                     ::= POST
+                               | GET
+                               | PUT
+                               | DELETE
+<url>                        ::= http://<string>/<url-definition>
+<url-definition>             ::= <string>
+                               | <variable>
+                               | <random>
+                               | <optional>
+                               | <url-definition><url-definition>
+<header>                     ::= Cookie: <string>=<variable>; <string>: <variable> <optional>
+<form>                       ::= <string>=<variable>&<string>=<variable> <optional>
+<data>                       ::= <string>
+                               | <variable>
+                               | <optional>
+                               | <data><data>
+<random>                     ::= $
+                               | $[<number>]
+                               | $[<number>-<number>]
+<variable>                   ::= #{<variable-definition>}
+                               | #{<string>@<variable-definition>}
+<store-field>                ::= <store-variable-definition>
+                               | <string>@<store-variable-definition>
+<variable-definition>        ::= <object>
+                               | <optional>
+                               | <array>
+                               | <variable-definition><variable-definition>
+<store-variable-definition>  ::= <object>
+                               | <optional>
+                               | <store-array>
+                               | <store-variable-definition><store-variable-definition>
+<array>                      ::= [$]
+                               | [<number>]
+<store-array>                ::= [<random>]
+                               | [<number>]
+<object>                     ::= <string>
+                               | #<string>
+<optional>                   ::= <<optional-definition>>
+<optional-definition>        ::= <string>|
+                               | <optional-definition><optional-definition>
+<link-references>            ::= <reference>
+                               | <copy-reference>
+                               | <ignore>
+<copy-reference>             ::= *<reference>
+<ignore>                     ::= *<copy-reference>
+```
+
+```json
+{
+	"<reference>": {
+		"method": "<method>",
+		"URL": "<url>",
+		"headers": "<header>*",
+		"forms": "<form>*",
+		"data": "<data>",
+		"storeFields": [
+			"<store-field>"*
+		]
+		"requirementsReferences": [
+			"<reference>"*
+		],
+		"linksReferences": [
+			"<link-reference>"*
+		]
+	}
+}
+```
+
+## RequestExecuter Lifecycle
+
+### Generating
+```
+read profile
+parallel foreach user
+	create session
+	while not timeout
+		if probability leave
+			break
+		choose request
+			generate probability
+			generate random
+			choose optionals
+		load variables
+		fire
+		store variables
+		log
+```
+### Executing
+```
+read profile
+read logs
+parallel foreach thread
+	create session
+	foreach request
+		load variables
+		fire
+			store variables
+```
\ No newline at end of file

reduce.sh 3(+2 -1)

diff --git a/reduce.sh b/reduce.sh
index 4b48150..edb9396 100755
--- a/reduce.sh
+++ b/reduce.sh
@@ -59,11 +59,12 @@ echo "$(date '+%F %T') Reducing $reduce in host $host for the applications $appl
 for version in $versions; do
 	version=${version%/}
 	if [[ $applications = "*" ]]; then
-		existing_applications=$(echo applications/$version/*/ | cut -d "/" -f 3)
+		existing_applications=$(echo applications/$version/*/)
 	else
 		existing_applications=$applications
 	fi
 	for application in $existing_applications; do
+		application=${application//applications\/$version\//}
 		application=${application%/}
 		for user in 1 5 25; do
 			if [[ "$reduce" = "*" || "$reduce" = "requests" ]]; then

run.sh 4(+4 -0)

diff --git a/run.sh b/run.sh
index 4408a26..76dbbd0 100644
--- a/run.sh
+++ b/run.sh
@@ -22,9 +22,13 @@ if [[ ! -z $1 ]]; then
 		fi
 	fi
 fi
+if [[ $applications = "*" ]]; then
+	applications=$(echo applications/uncached/*/)
+fi
 
 echo "$(date '+%F %T') Executing workloads in host $host for the applications $applications among the versions $versions"
 for application in $applications; do
+	application=${application//applications\/uncached\//}
 	application=${application%/}
 	for user in 25 5 1; do
 		if [[ ! -f applications/workloads/$application-workload-${user}user.json ]]; then

trace.sh 4(+4 -0)

diff --git a/trace.sh b/trace.sh
index 6607d08..486700b 100644
--- a/trace.sh
+++ b/trace.sh
@@ -18,9 +18,13 @@ if [[ ! -z $1 ]]; then
 		applications=$2
 	fi
 fi
+if [[ $applications = "*" ]]; then
+	applications=$(echo applications/uncached/*/)
+fi
 
 echo "$(date '+%F %T') Running in host $host for the applications: $applications"
 for application in $applications; do
+	application=${application//applications\/uncached\//}
 	application=${application%/}
 	if [[ -f applications/output/$application-uncached-requests ]]; then
 		echo "$(date '+%F %T')  Skipping application $application"