java-callgraph

Merge branch 'fix/broken_call_graph_for_nested_lambdas'

10/23/2018 5:10:20 AM

Details

pom.xml 2(+1 -1)

diff --git a/pom.xml b/pom.xml
index 3b51020..a606eca 100644
--- a/pom.xml
+++ b/pom.xml
@@ -17,7 +17,7 @@
     <dependency>
       <groupId>org.apache.bcel</groupId>
       <artifactId>bcel</artifactId>
-      <version>6.0</version>
+      <version>6.2</version>
       <scope>provided</scope>
     </dependency>
     <dependency>
diff --git a/src/main/java/gr/gousiosg/javacg/stat/ClassVisitor.java b/src/main/java/gr/gousiosg/javacg/stat/ClassVisitor.java
index e03d72c..d5dad60 100644
--- a/src/main/java/gr/gousiosg/javacg/stat/ClassVisitor.java
+++ b/src/main/java/gr/gousiosg/javacg/stat/ClassVisitor.java
@@ -45,6 +45,7 @@ public class ClassVisitor extends EmptyVisitor {
     private JavaClass clazz;
     private ConstantPoolGen constants;
     private String classReferenceFormat;
+    private final DynamicCallManager DCManager = new DynamicCallManager();
     
     public ClassVisitor(JavaClass jc) {
         clazz = jc;
@@ -55,8 +56,13 @@ public class ClassVisitor extends EmptyVisitor {
     public void visitJavaClass(JavaClass jc) {
         jc.getConstantPool().accept(this);
         Method[] methods = jc.getMethods();
-        for (int i = 0; i < methods.length; i++)
-            methods[i].accept(this);
+        for (int i = 0; i < methods.length; i++) {
+            Method method = methods[i];
+            DCManager.retrieveCalls(method, jc);
+            DCManager.linkCalls(method);
+            method.accept(this);
+
+        }
     }
 
     public void visitConstantPool(ConstantPool constantPool) {
diff --git a/src/main/java/gr/gousiosg/javacg/stat/DynamicCallManager.java b/src/main/java/gr/gousiosg/javacg/stat/DynamicCallManager.java
new file mode 100644
index 0000000..5a3bb0a
--- /dev/null
+++ b/src/main/java/gr/gousiosg/javacg/stat/DynamicCallManager.java
@@ -0,0 +1,116 @@
+package gr.gousiosg.javacg.stat;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.bcel.classfile.Attribute;
+import org.apache.bcel.classfile.BootstrapMethod;
+import org.apache.bcel.classfile.BootstrapMethods;
+import org.apache.bcel.classfile.ConstantCP;
+import org.apache.bcel.classfile.ConstantMethodHandle;
+import org.apache.bcel.classfile.ConstantNameAndType;
+import org.apache.bcel.classfile.ConstantPool;
+import org.apache.bcel.classfile.ConstantUtf8;
+import org.apache.bcel.classfile.JavaClass;
+import org.apache.bcel.classfile.Method;
+
+/**
+ * {@link DynamicCallManager} provides facilities to retrieve information about
+ * dynamic calls statically.
+ * <p>
+ * Most of the time, call relationships are explicit, which allows to properly
+ * build the call graph statically. But in the case of dynamic linking, i.e.
+ * <code>invokedynamic</code> instructions, this relationship might be unknown
+ * until the code is actually executed. Indeed, bootstrap methods are used to
+ * dynamically link the code at first call. One can read details about the
+ * <a href=
+ * "https://docs.oracle.com/javase/8/docs/technotes/guides/vm/multiple-language-support.html#invokedynamic"><code>invokedynamic</code>
+ * instruction</a> to know more about this mechanism.
+ * <p>
+ * Nested lambdas are particularly subject to such absence of concrete caller,
+ * which lead us to produce method names like <code>lambda$null$0</code>, which
+ * breaks the call graph. This information can however be retrieved statically
+ * through the code of the bootstrap method called.
+ * <p>
+ * In {@link #retrieveCalls(Method, JavaClass)}, we retrieve the (called,
+ * caller) relationships by analyzing the code of the caller {@link Method}.
+ * This information is then used in {@link #linkCalls(Method)} to rename the
+ * called {@link Method} properly.
+ * 
+ * @author Matthieu Vergne <matthieu.vergne@gmail.com>
+ *
+ */
+public class DynamicCallManager {
+    private static final Pattern BOOTSTRAP_CALL_PATTERN = Pattern
+            .compile("invokedynamic\t(\\d+):\\S+ \\S+ \\(\\d+\\)");
+    private static final int CALL_HANDLE_INDEX_ARGUMENT = 1;
+
+    private final Map<String, String> dynamicCallers = new HashMap<>();
+
+    /**
+     * Retrieve dynamic call relationships based on the code of the provided
+     * {@link Method}.
+     * 
+     * @param method
+     *            {@link Method} to analyze the code
+     * @param jc
+     *            {@link JavaClass} info, which contains the bootstrap methods
+     * @see #linkCalls(Method)
+     */
+    public void retrieveCalls(Method method, JavaClass jc) {
+        if (method.isAbstract()) {
+            // No code to consider
+            return;
+        }
+        ConstantPool cp = method.getConstantPool();
+        BootstrapMethod[] boots = getBootstrapMethods(jc);
+        String code = method.getCode().toString();
+        Matcher matcher = BOOTSTRAP_CALL_PATTERN.matcher(code);
+        while (matcher.find()) {
+            int bootIndex = Integer.parseInt(matcher.group(1));
+            BootstrapMethod bootMethod = boots[bootIndex];
+            int calledIndex = bootMethod.getBootstrapArguments()[CALL_HANDLE_INDEX_ARGUMENT];
+            String calledName = getMethodNameFromHandleIndex(cp, calledIndex);
+            String callerName = method.getName();
+            dynamicCallers.put(calledName, callerName);
+        }
+    }
+
+    private String getMethodNameFromHandleIndex(ConstantPool cp, int callIndex) {
+        ConstantMethodHandle handle = (ConstantMethodHandle) cp.getConstant(callIndex);
+        ConstantCP ref = (ConstantCP) cp.getConstant(handle.getReferenceIndex());
+        ConstantNameAndType nameAndType = (ConstantNameAndType) cp.getConstant(ref.getNameAndTypeIndex());
+        return nameAndType.getName(cp);
+    }
+
+    /**
+     * Link the {@link Method}'s name to its concrete caller if required.
+     * 
+     * @param method
+     *            {@link Method} to analyze
+     * @see #retrieveCalls(Method, JavaClass)
+     */
+    public void linkCalls(Method method) {
+        int nameIndex = method.getNameIndex();
+        ConstantPool cp = method.getConstantPool();
+        String methodName = ((ConstantUtf8) cp.getConstant(nameIndex)).getBytes();
+        String linkedName = methodName;
+        String callerName = methodName;
+        while (linkedName.matches("(lambda\\$)+null(\\$\\d+)+")) {
+            callerName = dynamicCallers.get(callerName);
+            linkedName = linkedName.replace("null", callerName);
+        }
+        cp.setConstant(nameIndex, new ConstantUtf8(linkedName));
+    }
+
+    private BootstrapMethod[] getBootstrapMethods(JavaClass jc) {
+        for (Attribute attribute : jc.getAttributes()) {
+            if (attribute instanceof BootstrapMethods) {
+                return ((BootstrapMethods) attribute).getBootstrapMethods();
+            }
+        }
+        return new BootstrapMethod[] {};
+    }
+}
diff --git a/src/test/resources/gr/gousiosg/javacg/lambda.feature b/src/test/resources/gr/gousiosg/javacg/lambda.feature
index 14d8f3f..c651e95 100644
--- a/src/test/resources/gr/gousiosg/javacg/lambda.feature
+++ b/src/test/resources/gr/gousiosg/javacg/lambda.feature
@@ -35,3 +35,43 @@ Feature: Lambda
       """
       M:LambdaTest:lambda$methodA$0() (M)LambdaTest:methodB()
       """
+
+  Scenario: Retrieve nested lambdas
+    Given I have the class "NestedLambdaTest" with code:
+      """
+      public class NestedLambdaTest {
+       public void methodA() {
+        Runner r = () -> {
+         Runner r2 = () -> {
+          Runner r3 = () -> methodB();
+          r3.run();
+         };
+         r2.run();
+        };
+        r.run();
+       }
+       
+       public void methodB() {}
+      }
+      """
+    When I run the analyze
+    # Creation of r in methodA
+    Then the result should contain:
+      """
+      M:NestedLambdaTest:methodA() (D)Runner:run(NestedLambdaTest)
+      """
+    # Creation of r2 in r
+    And the result should contain:
+      """
+      M:NestedLambdaTest:lambda$methodA$2() (D)Runner:run(NestedLambdaTest)
+      """
+    # Creation of r3 in r2
+    And the result should contain:
+      """
+      M:NestedLambdaTest:lambda$lambda$methodA$2$1() (D)Runner:run(NestedLambdaTest)
+      """
+    # Call of methodB in r3
+    And the result should contain:
+      """
+      M:NestedLambdaTest:lambda$lambda$lambda$methodA$2$1$0() (M)NestedLambdaTest:methodB()
+      """