Kotlin の apply{…} の速度
次のような単純なコードがあったとする。
// Something like HashMap<String,String> class Holder { @Volatile private var value :String = "" operator fun set(@Suppress("UNUSED_PARAMETER") key:String, value:String){ this.value = value } } class PrefKey(private val key: String) { fun put(holder: Holder, value: String) { holder[key] = value } } fun Holder.putA(pref: PrefKey, value: String): Holder { pref.put(this, value) return this } fun Holder.putB(pref: PrefKey, value: String): Holder = apply { pref.put(this, value) }
さてputAとputB、速いのはどちらだろうか?
putA
// access flags 0x19 public final static putA(Lbench/Holder;Lbench/PrefKey;Ljava/lang/String;)Lbench/Holder; @Lorg/jetbrains/annotations/NotNull;() // invisible // annotable parameter count: 3 (visible) // annotable parameter count: 3 (invisible) @Lorg/jetbrains/annotations/NotNull;() // invisible, parameter 0 @Lorg/jetbrains/annotations/NotNull;() // invisible, parameter 1 @Lorg/jetbrains/annotations/NotNull;() // invisible, parameter 2 L0 ALOAD 0 LDC "$this$putA" INVOKESTATIC kotlin/jvm/internal/Intrinsics.checkParameterIsNotNull (Ljava/lang/Object;Ljava/lang/String;)V ALOAD 1 LDC "pref" INVOKESTATIC kotlin/jvm/internal/Intrinsics.checkParameterIsNotNull (Ljava/lang/Object;Ljava/lang/String;)V ALOAD 2 LDC "value" INVOKESTATIC kotlin/jvm/internal/Intrinsics.checkParameterIsNotNull (Ljava/lang/Object;Ljava/lang/String;)V L1 LINENUMBER 16 L1 ALOAD 1 ALOAD 0 ALOAD 2 INVOKEVIRTUAL bench/PrefKey.put (Lbench/Holder;Ljava/lang/String;)V L2 LINENUMBER 17 L2 ALOAD 0 ARETURN L3 LOCALVARIABLE $this$putA Lbench/Holder; L0 L3 0 LOCALVARIABLE pref Lbench/PrefKey; L0 L3 1 LOCALVARIABLE value Ljava/lang/String; L0 L3 2 MAXSTACK = 3 MAXLOCALS = 3
putB
// access flags 0x19 public final static putB(Lbench/Holder;Lbench/PrefKey;Ljava/lang/String;)Lbench/Holder; @Lorg/jetbrains/annotations/NotNull;() // invisible // annotable parameter count: 3 (visible) // annotable parameter count: 3 (invisible) @Lorg/jetbrains/annotations/NotNull;() // invisible, parameter 0 @Lorg/jetbrains/annotations/NotNull;() // invisible, parameter 1 @Lorg/jetbrains/annotations/NotNull;() // invisible, parameter 2 L0 ALOAD 0 LDC "$this$putB" INVOKESTATIC kotlin/jvm/internal/Intrinsics.checkParameterIsNotNull (Ljava/lang/Object;Ljava/lang/String;)V ALOAD 1 LDC "pref" INVOKESTATIC kotlin/jvm/internal/Intrinsics.checkParameterIsNotNull (Ljava/lang/Object;Ljava/lang/String;)V ALOAD 2 LDC "value" INVOKESTATIC kotlin/jvm/internal/Intrinsics.checkParameterIsNotNull (Ljava/lang/Object;Ljava/lang/String;)V L1 LINENUMBER 21 L1 ALOAD 0 ASTORE 3 L2 ICONST_0 ISTORE 4 L3 ICONST_0 ISTORE 5 L4 ALOAD 3 ASTORE 6 L5 ICONST_0 ISTORE 7 L6 LINENUMBER 22 L6 ALOAD 1 ALOAD 6 ALOAD 2 INVOKEVIRTUAL bench/PrefKey.put (Lbench/Holder;Ljava/lang/String;)V L7 LINENUMBER 23 L7 L8 NOP L9 LINENUMBER 21 L9 L10 ALOAD 3 L11 LINENUMBER 23 L11 ARETURN L12 LOCALVARIABLE $this$apply Lbench/Holder; L5 L8 6 LOCALVARIABLE $i$a$-apply-Test1Kt$putB$1 I L6 L8 7 LOCALVARIABLE $this$putB Lbench/Holder; L0 L12 0 LOCALVARIABLE pref Lbench/PrefKey; L0 L12 1 LOCALVARIABLE value Ljava/lang/String; L0 L12 2 MAXSTACK = 3 MAXLOCALS = 8
逆コンパイル
putA
@NotNull public static final Holder putA(@NotNull Holder $this$putA, @NotNull PrefKey pref, @NotNull String value) { Intrinsics.checkParameterIsNotNull($this$putA, "$this$putA"); Intrinsics.checkParameterIsNotNull(pref, "pref"); Intrinsics.checkParameterIsNotNull(value, "value"); pref.put($this$putA, value); return $this$putA; }
putB
@NotNull public static final Holder putB(@NotNull Holder $this$putB, @NotNull PrefKey pref, @NotNull String value) { Intrinsics.checkParameterIsNotNull($this$putB, "$this$putB"); Intrinsics.checkParameterIsNotNull(pref, "pref"); Intrinsics.checkParameterIsNotNull(value, "value"); boolean var4 = false; boolean var5 = false; int var7 = false; pref.put($this$putB, value); return $this$putB; }
putBの方が無駄なローカル変数と無駄な代入、そしてなぜかNOPが含まれている。これはinline lambdaのオーバーヘッドという奴だろう。
間違ったベンチマーク
https://gist.github.com/tateisu/be6cdaee0a4e28186090fb3834899788 のようなコードで比較してみたら3倍の差がでて驚いたが、これは間違った測定だった。JVMではいくつかの理由で、ベンチマーク用のライブラリを使わないとコードの速度を正しく評価できない。たとえば呼び出し回数が一定以上になったメソッドに対してJITが働くのでウォームアップが必要になる。理由は他にもあるかもしれない。
JMHを使った計測
JMH https://openjdk.java.net/projects/code-tools/jmh/ で計測した結果がこちら。
6:58:09: Executing task 'jmh --stacktrace'... > Task :compileKotlin NO-SOURCE > Task :compileJava NO-SOURCE > Task :processResources NO-SOURCE > Task :classes UP-TO-DATE > Task :compileTestKotlin NO-SOURCE > Task :compileTestJava NO-SOURCE > Task :processTestResources NO-SOURCE > Task :testClasses UP-TO-DATE > Task :compileJmhKotlin UP-TO-DATE > Task :compileJmhJava NO-SOURCE > Task :processJmhResources NO-SOURCE > Task :jmhClasses UP-TO-DATE > Task :jmhRunBytecodeGenerator UP-TO-DATE > Task :jmhCompileGeneratedClasses UP-TO-DATE > Task :jmhJar UP-TO-DATE # Warmup Iteration 1: 197644398.040 ops/s # Warmup Iteration 2: > Task :jmh # JMH version: 1.22 # VM version: JDK 1.8.0_121, Java HotSpot(TM) 64-Bit Server VM, 25.121-b13 # VM invoker: C:\Java\jdk-x64-1.8\jre\bin\java.exe # VM options: <none> # Warmup: 15 iterations, 1 s each # Measurement: 5 iterations, 1 s each # Timeout: 10 min per iteration # Threads: 1 thread, will synchronize iterations # Benchmark mode: Throughput, ops/time # Benchmark: bench.Test1.usePutA # Run progress: 0.00% complete, ETA 00:00:40 # Fork: 1 of 1 198603045.551 ops/s # Warmup Iteration 3: 198549682.052 ops/s # Warmup Iteration 4: 194557723.411 ops/s # Warmup Iteration 5: 198470775.716 ops/s # Warmup Iteration 6: 196711343.247 ops/s # Warmup Iteration 7: 198641168.014 ops/s # Warmup Iteration 8: 199108411.601 ops/s # Warmup Iteration 9: 198914004.326 ops/s # Warmup Iteration 10: 198531682.261 ops/s # Warmup Iteration 11: 198777787.476 ops/s # Warmup Iteration 12: 198825547.011 ops/s # Warmup Iteration 13: 197627450.659 ops/s # Warmup Iteration 14: 195803620.165 ops/s # Warmup Iteration 15: 198606598.039 ops/s Iteration 1: 196452099.108 ops/s Iteration 2: 198454517.763 ops/s Iteration 3: 198752988.922 ops/s Iteration 4: 199905451.165 ops/s Iteration 5: 199674003.652 ops/s > Task :jmh Result "bench.Test1.usePutA": 198647812.122 �}(99.9%) 5274224.588 ops/s [Average] (min, avg, max) = (196452099.108, 198647812.122, 199905451.165), stdev = 1369699.377 CI (99.9%): [193373587.534, 203922036.710] (assumes normal distribution) # JMH version: 1.22 # VM version: JDK 1.8.0_121, Java HotSpot(TM) 64-Bit Server VM, 25.121-b13 # VM invoker: C:\Java\jdk-x64-1.8\jre\bin\java.exe # VM options: <none> # Warmup: 15 iterations, 1 s each # Measurement: 5 iterations, 1 s each # Timeout: 10 min per iteration # Threads: 1 thread, will synchronize iterations # Benchmark mode: Throughput, ops/time # Benchmark: bench.Test1.usePutB # Run progress: 50.00% complete, ETA 00:00:20 # Fork: 1 of 1 # Warmup Iteration 1: 198942094.378 ops/s # Warmup Iteration 2: 194860962.483 ops/s # Warmup Iteration 3: 199178184.960 ops/s # Warmup Iteration 4: 195436293.151 ops/s # Warmup Iteration 5: 199607202.705 ops/s # Warmup Iteration 6: 197020019.891 ops/s # Warmup Iteration 7: 198460528.526 ops/s # Warmup Iteration 8: 198985887.154 ops/s # Warmup Iteration 9: 199731334.489 ops/s # Warmup Iteration 10: 199309376.472 ops/s # Warmup Iteration 11: 200102008.778 ops/s # Warmup Iteration 12: 199003797.389 ops/s # Warmup Iteration 13: 199735482.462 ops/s # Warmup Iteration 14: 195999469.589 ops/s # Warmup Iteration 15: 199425721.275 ops/s Iteration 1: 198251071.528 ops/s Iteration 2: 198490905.407 ops/s Iteration 3: 199717039.707 ops/s Iteration 4: 199911875.985 ops/s Iteration 5: 199829906.354 ops/s > Task :jmh Result "bench.Test1.usePutB": 199240159.796 �}(99.9%) 3084180.600 ops/s [Average] (min, avg, max) = (198251071.528, 199240159.796, 199911875.985), stdev = 800951.908 CI (99.9%): [196155979.196, 202324340.396] (assumes normal distribution) # Run complete. Total time: 00:00:40 REMEMBER: The numbers below are just data. To gain reusable insights, you need to follow up on why the numbers are the way they are. Use profilers (see -prof, -lprof), design factorial experiments, perform baseline and negative tests that provide experimental control, make sure the benchmarking environment is safe on JVM/OS/HW level, ask for reviews from the domain experts. Do not assume the numbers tell you what you want them to tell. Benchmark Mode Cnt Score Error Units Test1.usePutA thrpt 5 198647812.122 �} 5274224.588 ops/s Test1.usePutB thrpt 5 199240159.796 �} 3084180.600 ops/s Benchmark result is saved to C:\kotlin\BenchmarkInlineFunction\build\reports\jmh\results.txt BUILD SUCCESSFUL in 40s 5 actionable tasks: 1 executed, 4 up-to-date 6:58:50: Task execution finished 'jmh --stacktrace'.
なぜかputBの方が速い。ただし差は0.3%程度で、ほぼ無視して構わない量だ。
なお文字化けしてるのは"±"らしい。
おまけ。 melix/jmh-gradle-plugin を使ってベンチマークをとる
melix/jmh-gradle-plugin https://github.com/melix/jmh-gradle-plugin を使うとGradle から比較的簡単にJMHを利用できる。
jmh-gradle-plugin 0.5.0 は Gradle 5.5 を要求する。IntelliJ IDEA のプロジェクトのgradle/wrapper/gradle-wrapper.properties ファイルを編集する。
(略) distributionUrl=https\://services.gradle.org/distributions/gradle-5.6.4-bin.zip (略)
次にbuild.gradleを編集する。 kotlinx.coroutines のベンチマーク https://github.com/Kotlin/kotlinx.coroutines/tree/master/benchmarks が参考になるだろう。
plugins { id 'java' id 'org.jetbrains.kotlin.jvm' version '1.3.41' id "me.champeau.gradle.jmh" version "0.5.0" } group 'jp.juggler.BenchmarkInlineFunction' version '1.0-SNAPSHOT' sourceCompatibility = 1.8 targetCompatibility = 1.8 repositories { mavenCentral() } compileKotlin { kotlinOptions { jvmTarget = "1.8" // https://discuss.kotlinlang.org/t/run-time-null-checks-and-performance/2086/17 freeCompilerArgs = [ '-Xno-param-assertions', '-Xno-call-assertions', '-Xno-receiver-assertions', '-Xjvm-default=enable' ] } } compileTestKotlin { kotlinOptions { jvmTarget = "1.8" // https://discuss.kotlinlang.org/t/run-time-null-checks-and-performance/2086/17 freeCompilerArgs +=[ '-Xno-param-assertions', '-Xno-call-assertions', '-Xno-receiver-assertions' ] } } compileJmhKotlin { kotlinOptions { jvmTarget = "1.8" freeCompilerArgs += [ '-Xjvm-default=enable', '-Xno-param-assertions', '-Xno-call-assertions', '-Xno-receiver-assertions' ] } } jmh { jmhVersion = '1.22' failOnError = true duplicateClassesStrategy DuplicatesStrategy.INCLUDE } dependencies { testCompile group: 'junit', name: 'junit', version: '4.12' implementation "org.jetbrains.kotlin:kotlin-stdlib-jdk8" implementation "org.openjdk.jmh:jmh-core:1.22" jmh 'org.apache.commons:commons-lang3:3.6' jmh 'com.google.guava:guava:22.0' }
ソースコードは src/jmh/java 以下に置く。また、パッケージ指定なしだJMHがエラーを出すので必ず何かしらのパッケージ名が必要になる。
src/jmh/java/bench/Test1.kt
package bench import org.openjdk.jmh.annotations.* class Holder { @Volatile private var value :String = "" operator fun set(@Suppress("UNUSED_PARAMETER") key:String, value:String){ this.value = value } } class PrefKey(private val key: String) { fun put(holder: Holder, value: String) { holder[key] = value } } fun Holder.putA(pref: PrefKey, value: String): Holder { pref.put(this, value) return this } fun Holder.putB(pref: PrefKey, value: String): Holder = apply { pref.put(this, value) } // Benchmark classes should not be final. // [jmh.bench.Test1] @State(Scope.Thread) @BenchmarkMode(Mode.Throughput) @Warmup(iterations = 15, time = 1) @Measurement(iterations = 5, time = 1) @Fork(1) @Suppress("unused") open class Test1 { private var holder= Holder() private var pref= PrefKey("foo") @Benchmark fun usePutA(){ holder.putA(pref, "zap") } @Benchmark fun usePutB(){ holder.putB(pref, "zap") } }
IntelliJ IDEA のGradleペインから Tasks/jmh/jmh を実行するとベンチマークが行われる。
ただしWindows環境だと build/libs/に生成されるjarファイルをjava.exeプロセスが握ったままなので、再試行する度にIntelliJ IDEA を終了してjarファイルを削除しなければならない。でないと Error reading benchmark list や MANIFEST中にbenchmarkListがないなどのエラーを出す。