Kotlin の apply{…} の速度

次のような単純なコードがあったとする。

// Something like HashMap<String,String>
class Holder {
    @Volatile private var value :String = ""

    operator fun set(@Suppress("UNUSED_PARAMETER") key:String, value:String){
       this.value = value
    }
}

class PrefKey(private val key: String) {
    fun put(holder: Holder, value: String) {
        holder[key] = value
    }
}

fun Holder.putA(pref: PrefKey, value: String): Holder {
    pref.put(this, value)
    return this
}

fun Holder.putB(pref: PrefKey, value: String): Holder =
    apply {
        pref.put(this, value)
    }

さてputAとputB、速いのはどちらだろうか?

バイトコード

IntelliJ IDEAで Search Everywhere から「Show Kotlin Bytecode」を開いてバイトコードを確認すると次のようになる。

putA

// access flags 0x19
public final static putA(Lbench/Holder;Lbench/PrefKey;Ljava/lang/String;)Lbench/Holder;
    @Lorg/jetbrains/annotations/NotNull;() // invisible
    // annotable parameter count: 3 (visible)
    // annotable parameter count: 3 (invisible)
    @Lorg/jetbrains/annotations/NotNull;() // invisible, parameter 0
    @Lorg/jetbrains/annotations/NotNull;() // invisible, parameter 1
    @Lorg/jetbrains/annotations/NotNull;() // invisible, parameter 2
L0
    ALOAD 0
    LDC "$this$putA"
    INVOKESTATIC kotlin/jvm/internal/Intrinsics.checkParameterIsNotNull (Ljava/lang/Object;Ljava/lang/String;)V
    ALOAD 1
    LDC "pref"
    INVOKESTATIC kotlin/jvm/internal/Intrinsics.checkParameterIsNotNull (Ljava/lang/Object;Ljava/lang/String;)V
    ALOAD 2
    LDC "value"
    INVOKESTATIC kotlin/jvm/internal/Intrinsics.checkParameterIsNotNull (Ljava/lang/Object;Ljava/lang/String;)V
L1
    LINENUMBER 16 L1
    ALOAD 1
    ALOAD 0
    ALOAD 2
    INVOKEVIRTUAL bench/PrefKey.put (Lbench/Holder;Ljava/lang/String;)V
L2
    LINENUMBER 17 L2
    ALOAD 0
    ARETURN
L3
    LOCALVARIABLE $this$putA Lbench/Holder; L0 L3 0
    LOCALVARIABLE pref Lbench/PrefKey; L0 L3 1
    LOCALVARIABLE value Ljava/lang/String; L0 L3 2
    MAXSTACK = 3
    MAXLOCALS = 3
putB
// access flags 0x19
public final static putB(Lbench/Holder;Lbench/PrefKey;Ljava/lang/String;)Lbench/Holder;
    @Lorg/jetbrains/annotations/NotNull;() // invisible
    // annotable parameter count: 3 (visible)
    // annotable parameter count: 3 (invisible)
    @Lorg/jetbrains/annotations/NotNull;() // invisible, parameter 0
    @Lorg/jetbrains/annotations/NotNull;() // invisible, parameter 1
    @Lorg/jetbrains/annotations/NotNull;() // invisible, parameter 2
L0
    ALOAD 0
    LDC "$this$putB"
    INVOKESTATIC kotlin/jvm/internal/Intrinsics.checkParameterIsNotNull (Ljava/lang/Object;Ljava/lang/String;)V
    ALOAD 1
    LDC "pref"
    INVOKESTATIC kotlin/jvm/internal/Intrinsics.checkParameterIsNotNull (Ljava/lang/Object;Ljava/lang/String;)V
    ALOAD 2
    LDC "value"
    INVOKESTATIC kotlin/jvm/internal/Intrinsics.checkParameterIsNotNull (Ljava/lang/Object;Ljava/lang/String;)V
L1
    LINENUMBER 21 L1
    ALOAD 0
    ASTORE 3
L2
    ICONST_0
    ISTORE 4
L3
    ICONST_0
    ISTORE 5
L4
    ALOAD 3
    ASTORE 6
L5
    ICONST_0
    ISTORE 7
L6
    LINENUMBER 22 L6
    ALOAD 1
    ALOAD 6
    ALOAD 2
    INVOKEVIRTUAL bench/PrefKey.put (Lbench/Holder;Ljava/lang/String;)V
L7
    LINENUMBER 23 L7
L8
    NOP
L9
    LINENUMBER 21 L9
L10
    ALOAD 3
L11
    LINENUMBER 23 L11
    ARETURN
L12
    LOCALVARIABLE $this$apply Lbench/Holder; L5 L8 6
    LOCALVARIABLE $i$a$-apply-Test1Kt$putB$1 I L6 L8 7
    LOCALVARIABLE $this$putB Lbench/Holder; L0 L12 0
    LOCALVARIABLE pref Lbench/PrefKey; L0 L12 1
    LOCALVARIABLE value Ljava/lang/String; L0 L12 2
    MAXSTACK = 3
    MAXLOCALS = 8

コンパイル

putA
@NotNull
public static final Holder putA(@NotNull Holder $this$putA, @NotNull PrefKey pref, @NotNull String value) {
      Intrinsics.checkParameterIsNotNull($this$putA, "$this$putA");
      Intrinsics.checkParameterIsNotNull(pref, "pref");
      Intrinsics.checkParameterIsNotNull(value, "value");
      pref.put($this$putA, value);
      return $this$putA;
}
putB
@NotNull
public static final Holder putB(@NotNull Holder $this$putB, @NotNull PrefKey pref, @NotNull String value) {
      Intrinsics.checkParameterIsNotNull($this$putB, "$this$putB");
      Intrinsics.checkParameterIsNotNull(pref, "pref");
      Intrinsics.checkParameterIsNotNull(value, "value");
      boolean var4 = false;
      boolean var5 = false;
      int var7 = false;
      pref.put($this$putB, value);
      return $this$putB;
}

putBの方が無駄なローカル変数と無駄な代入、そしてなぜかNOPが含まれている。これはinline lambdaのオーバーヘッドという奴だろう。

間違ったベンチマーク

https://gist.github.com/tateisu/be6cdaee0a4e28186090fb3834899788 のようなコードで比較してみたら3倍の差がでて驚いたが、これは間違った測定だった。JVMではいくつかの理由で、ベンチマーク用のライブラリを使わないとコードの速度を正しく評価できない。たとえば呼び出し回数が一定以上になったメソッドに対してJITが働くのでウォームアップが必要になる。理由は他にもあるかもしれない。

JMHを使った計測

JMH https://openjdk.java.net/projects/code-tools/jmh/ で計測した結果がこちら。

6:58:09: Executing task 'jmh --stacktrace'...

> Task :compileKotlin NO-SOURCE
> Task :compileJava NO-SOURCE
> Task :processResources NO-SOURCE
> Task :classes UP-TO-DATE
> Task :compileTestKotlin NO-SOURCE
> Task :compileTestJava NO-SOURCE
> Task :processTestResources NO-SOURCE
> Task :testClasses UP-TO-DATE
> Task :compileJmhKotlin UP-TO-DATE
> Task :compileJmhJava NO-SOURCE
> Task :processJmhResources NO-SOURCE
> Task :jmhClasses UP-TO-DATE
> Task :jmhRunBytecodeGenerator UP-TO-DATE
> Task :jmhCompileGeneratedClasses UP-TO-DATE
> Task :jmhJar UP-TO-DATE
# Warmup Iteration   1: 197644398.040 ops/s
# Warmup Iteration   2: 
> Task :jmh
# JMH version: 1.22
# VM version: JDK 1.8.0_121, Java HotSpot(TM) 64-Bit Server VM, 25.121-b13
# VM invoker: C:\Java\jdk-x64-1.8\jre\bin\java.exe
# VM options: <none>
# Warmup: 15 iterations, 1 s each
# Measurement: 5 iterations, 1 s each
# Timeout: 10 min per iteration
# Threads: 1 thread, will synchronize iterations
# Benchmark mode: Throughput, ops/time
# Benchmark: bench.Test1.usePutA

# Run progress: 0.00% complete, ETA 00:00:40
# Fork: 1 of 1

198603045.551 ops/s
# Warmup Iteration   3: 198549682.052 ops/s
# Warmup Iteration   4: 194557723.411 ops/s
# Warmup Iteration   5: 198470775.716 ops/s
# Warmup Iteration   6: 196711343.247 ops/s
# Warmup Iteration   7: 198641168.014 ops/s
# Warmup Iteration   8: 199108411.601 ops/s
# Warmup Iteration   9: 198914004.326 ops/s
# Warmup Iteration  10: 198531682.261 ops/s
# Warmup Iteration  11: 198777787.476 ops/s
# Warmup Iteration  12: 198825547.011 ops/s
# Warmup Iteration  13: 197627450.659 ops/s
# Warmup Iteration  14: 195803620.165 ops/s
# Warmup Iteration  15: 198606598.039 ops/s
Iteration   1: 196452099.108 ops/s
Iteration   2: 198454517.763 ops/s
Iteration   3: 198752988.922 ops/s
Iteration   4: 199905451.165 ops/s
Iteration   5: 199674003.652 ops/s

> Task :jmh


Result "bench.Test1.usePutA":
  198647812.122 �}(99.9%) 5274224.588 ops/s [Average]
  (min, avg, max) = (196452099.108, 198647812.122, 199905451.165), stdev = 1369699.377
  CI (99.9%): [193373587.534, 203922036.710] (assumes normal distribution)


# JMH version: 1.22
# VM version: JDK 1.8.0_121, Java HotSpot(TM) 64-Bit Server VM, 25.121-b13
# VM invoker: C:\Java\jdk-x64-1.8\jre\bin\java.exe
# VM options: <none>
# Warmup: 15 iterations, 1 s each
# Measurement: 5 iterations, 1 s each
# Timeout: 10 min per iteration
# Threads: 1 thread, will synchronize iterations
# Benchmark mode: Throughput, ops/time
# Benchmark: bench.Test1.usePutB

# Run progress: 50.00% complete, ETA 00:00:20
# Fork: 1 of 1

# Warmup Iteration   1: 198942094.378 ops/s
# Warmup Iteration   2: 194860962.483 ops/s
# Warmup Iteration   3: 199178184.960 ops/s
# Warmup Iteration   4: 195436293.151 ops/s
# Warmup Iteration   5: 199607202.705 ops/s
# Warmup Iteration   6: 197020019.891 ops/s
# Warmup Iteration   7: 198460528.526 ops/s
# Warmup Iteration   8: 198985887.154 ops/s
# Warmup Iteration   9: 199731334.489 ops/s
# Warmup Iteration  10: 199309376.472 ops/s
# Warmup Iteration  11: 200102008.778 ops/s
# Warmup Iteration  12: 199003797.389 ops/s
# Warmup Iteration  13: 199735482.462 ops/s
# Warmup Iteration  14: 195999469.589 ops/s
# Warmup Iteration  15: 199425721.275 ops/s
Iteration   1: 198251071.528 ops/s
Iteration   2: 198490905.407 ops/s
Iteration   3: 199717039.707 ops/s
Iteration   4: 199911875.985 ops/s
Iteration   5: 199829906.354 ops/s

> Task :jmh


Result "bench.Test1.usePutB":
  199240159.796 �}(99.9%) 3084180.600 ops/s [Average]
  (min, avg, max) = (198251071.528, 199240159.796, 199911875.985), stdev = 800951.908
  CI (99.9%): [196155979.196, 202324340.396] (assumes normal distribution)


# Run complete. Total time: 00:00:40

REMEMBER: The numbers below are just data. To gain reusable insights, you need to follow up on
why the numbers are the way they are. Use profilers (see -prof, -lprof), design factorial
experiments, perform baseline and negative tests that provide experimental control, make sure
the benchmarking environment is safe on JVM/OS/HW level, ask for reviews from the domain experts.
Do not assume the numbers tell you what you want them to tell.

Benchmark       Mode  Cnt          Score         Error  Units
Test1.usePutA  thrpt    5  198647812.122 �} 5274224.588  ops/s
Test1.usePutB  thrpt    5  199240159.796 �} 3084180.600  ops/s

Benchmark result is saved to C:\kotlin\BenchmarkInlineFunction\build\reports\jmh\results.txt

BUILD SUCCESSFUL in 40s
5 actionable tasks: 1 executed, 4 up-to-date
6:58:50: Task execution finished 'jmh --stacktrace'.

なぜかputBの方が速い。ただし差は0.3%程度で、ほぼ無視して構わない量だ。

なお文字化けしてるのは"±"らしい。

おまけ。 melix/jmh-gradle-plugin を使ってベンチマークをとる

melix/jmh-gradle-plugin https://github.com/melix/jmh-gradle-plugin を使うとGradle から比較的簡単にJMHを利用できる。

jmh-gradle-plugin 0.5.0 は Gradle 5.5 を要求する。IntelliJ IDEA のプロジェクトのgradle/wrapper/gradle-wrapper.properties ファイルを編集する。

(略)
distributionUrl=https\://services.gradle.org/distributions/gradle-5.6.4-bin.zip
(略)

次にbuild.gradleを編集する。 kotlinx.coroutines のベンチマーク https://github.com/Kotlin/kotlinx.coroutines/tree/master/benchmarks が参考になるだろう。

plugins {
    id 'java'
    id 'org.jetbrains.kotlin.jvm' version '1.3.41'
    id "me.champeau.gradle.jmh" version "0.5.0"
}

group 'jp.juggler.BenchmarkInlineFunction'
version '1.0-SNAPSHOT'

sourceCompatibility = 1.8
targetCompatibility = 1.8

repositories {
    mavenCentral()
}

compileKotlin {
    kotlinOptions {
        jvmTarget = "1.8"

        // https://discuss.kotlinlang.org/t/run-time-null-checks-and-performance/2086/17
        freeCompilerArgs = [
                '-Xno-param-assertions',
                '-Xno-call-assertions',
                '-Xno-receiver-assertions',
                '-Xjvm-default=enable'
        ]
    }
}
compileTestKotlin {
    kotlinOptions {
        jvmTarget = "1.8"

        // https://discuss.kotlinlang.org/t/run-time-null-checks-and-performance/2086/17
        freeCompilerArgs +=[
                '-Xno-param-assertions',
                '-Xno-call-assertions',
                '-Xno-receiver-assertions'
        ]
    }
}

compileJmhKotlin {
    kotlinOptions {
        jvmTarget = "1.8"
        freeCompilerArgs += [
                '-Xjvm-default=enable',
                '-Xno-param-assertions',
                '-Xno-call-assertions',
                '-Xno-receiver-assertions'
        ]
    }
}

jmh {
    jmhVersion = '1.22'
    failOnError = true

    duplicateClassesStrategy DuplicatesStrategy.INCLUDE
}

dependencies {
    testCompile group: 'junit', name: 'junit', version: '4.12'
    implementation "org.jetbrains.kotlin:kotlin-stdlib-jdk8"
    implementation "org.openjdk.jmh:jmh-core:1.22"

    jmh 'org.apache.commons:commons-lang3:3.6'
    jmh 'com.google.guava:guava:22.0'
}

ソースコードは src/jmh/java 以下に置く。また、パッケージ指定なしだJMHがエラーを出すので必ず何かしらのパッケージ名が必要になる。

src/jmh/java/bench/Test1.kt

package bench

import org.openjdk.jmh.annotations.*

class Holder {
    @Volatile private var value :String = ""

    operator fun set(@Suppress("UNUSED_PARAMETER") key:String, value:String){
       this.value = value
    }
}

class PrefKey(private val key: String) {
    fun put(holder: Holder, value: String) {
        holder[key] = value
    }
}

fun Holder.putA(pref: PrefKey, value: String): Holder {
    pref.put(this, value)
    return this
}

fun Holder.putB(pref: PrefKey, value: String): Holder =
    apply {
        pref.put(this, value)
    }

// Benchmark classes should not be final. // [jmh.bench.Test1]

@State(Scope.Thread)
@BenchmarkMode(Mode.Throughput)
@Warmup(iterations = 15, time = 1)
@Measurement(iterations = 5, time = 1)
@Fork(1)
@Suppress("unused")
open class Test1 {

    private var holder= Holder()
    private var pref= PrefKey("foo")

    @Benchmark
    fun usePutA(){
        holder.putA(pref, "zap")
    }

    @Benchmark
    fun usePutB(){
        holder.putB(pref, "zap")
    }
}

IntelliJ IDEA のGradleペインから Tasks/jmh/jmh を実行するとベンチマークが行われる。

ただしWindows環境だと build/libs/に生成されるjarファイルをjava.exeプロセスが握ったままなので、再試行する度にIntelliJ IDEA を終了してjarファイルを削除しなければならない。でないと Error reading benchmark list や MANIFEST中にbenchmarkListがないなどのエラーを出す。