Int VS FP performance

Now that we know how different FPU commands work compared to ALU, let’s discover performance implications

Benchmarks

Let’s cut to the chase and look at the numbers first

Integer VS single-precision float

Addition:
Benchmark                   Mode  Cnt     Score   Error  Units
Int32VsFloat.sumFloat       avgt   10   666.424 ± 1.149  us/op
Int32VsFloat.sumInt         avgt   10   224.425 ± 0.483  us/op

Subtraction:
Benchmark                   Mode  Cnt     Score   Error  Units
Int32VsFloat.subtractFloat  avgt   10   662.229 ± 0.802  us/op
Int32VsFloat.subtractInt    avgt   10   225.117 ± 0.234  us/op

Multiplication:
Benchmark                   Mode  Cnt     Score   Error  Units
Int32VsFloat.multiplyFloat  avgt   10   665.593 ± 1.243  us/op
Int32VsFloat.multiplyInt    avgt   10   667.031 ± 0.795  us/op

Division:
Benchmark                   Mode  Cnt     Score   Error  Units
Int32VsFloat.divideFloat    avgt   10  2313.469 ± 7.297  us/op
Int32VsFloat.divideInt      avgt   10  2003.448 ± 0.842  us/op

Long VS double-precision floats

Addition:
Benchmark                     Mode  Cnt     Score    Error  Units
Int64VsDouble.sumDouble       avgt   10   672.665 ±  0.862  us/op
Int64VsDouble.sumLong         avgt   10   242.405 ±  0.956  us/op

Subtraction:
Benchmark                     Mode  Cnt     Score    Error  Units
Int64VsDouble.subtractDouble  avgt   10   673.708 ±  0.974  us/op
Int64VsDouble.subtractLong    avgt   10   241.814 ±  1.054  us/op

Multiplication:
Benchmark                     Mode  Cnt     Score    Error  Units
Int64VsDouble.multiplyDouble  avgt   10   673.595 ±  7.287  us/op
Int64VsDouble.multiplyLong    avgt   10   674.941 ±  1.456  us/op

Division:
Benchmark                     Mode  Cnt     Score    Error  Units
Int64VsDouble.divideDouble    avgt   10  2972.955 ±  2.767  us/op
Int64VsDouble.divideLong      avgt   10  2007.159 ± 11.749  us/op

Benchmarks made with JMH. Code below.
CPU: AMD Ryzen 9 5900x
OS: Ubuntu 21.10 64-bit
JVM: OpenJDK 17 64-bit
JMH: version 1.34
Score: how long does it take to execute an operation on 1 mln values

Conclusions

As we can see, multiplication between integers and floats has similar performance. Division works slightly faster for integer types compared to fp types. Addition and subtraction operations are almost 3 times slower when performed on floating point numbers. The same trend continuous when comparing long and double precision fp types performance.

This result is expected. In case you are surprised by it, you may want to check out floating point arithmetics article.

Code

To run the same tests on your CPU you can use the following code:

import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import org.openjdk.jmh.runner.options.VerboseMode;

import java.util.Random;
import java.util.concurrent.TimeUnit;

public class Int32VsFloat {

    @State(Scope.Thread)
    public static class ExecutionPlan {

        int len = 1_000_000;

        float[] floats = new float[len];
        int[] ints = new int[len];

        Random rnd = new Random();

        @Setup(Level.Trial)
        public void setUp() {
            for (int i = 0; i < len; i++) {
                int valInt = rnd.nextInt();
                float valFloat = rnd.nextFloat();
                // do not generate 0 to ensure no division by zero
                if (valInt == 0) valInt = 1;
                if (valFloat == 0) valFloat = 1;
                ints[i] = valInt;
                floats[i] = valFloat;
            }
        }
    }

    //<editor-fold desc="Division">
    @Benchmark
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    @BenchmarkMode(Mode.AverageTime)
    public float divideFloat(ExecutionPlan plan) {
        float result = Float.MAX_VALUE;
        for (int i = 0; i < plan.len; i++){
            result /= plan.floats[i];
        }
        return result;
    }

    @Benchmark
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    @BenchmarkMode(Mode.AverageTime)
    public int divideInt(ExecutionPlan plan) {
        int result = Integer.MAX_VALUE;
        for (int i = 0; i < plan.len; i++){
            result /= plan.ints[i];
        }
        return result;
    }
    //</editor-fold>

    //<editor-fold desc="Multiply">
    @Benchmark
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    @BenchmarkMode(Mode.AverageTime)
    public float multiplyFloat(ExecutionPlan plan) {
        float result = 1;
        for (int i = 0; i < plan.len; i++){
            result *= plan.floats[i];
        }
        return result;
    }

    @Benchmark
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    @BenchmarkMode(Mode.AverageTime)
    public int multiplyInt(ExecutionPlan plan) {
        int result = 1;
        for (int i = 0; i < plan.len; i++){
            result *= plan.ints[i];
        }
        return result;
    }
    //</editor-fold>

    //<editor-fold desc="Subtract">
    @Benchmark
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    @BenchmarkMode(Mode.AverageTime)
    public float subtractFloat(ExecutionPlan plan) {
        float result = 0;
        for (int i = 0; i < plan.len; i++){
            result -= plan.floats[i];
        }
        return result;
    }

    @Benchmark
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    @BenchmarkMode(Mode.AverageTime)
    public int subtractInt(ExecutionPlan plan) {
        int result = Integer.MAX_VALUE;
        for (int i = 0; i < plan.len; i++){
            result -= plan.ints[i];
        }
        return result;
    }
    //</editor-fold>

    //<editor-fold desc="Sum">
    @Benchmark
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    @BenchmarkMode(Mode.AverageTime)
    public float sumFloat(ExecutionPlan plan) {
        float result = 0;
        for (int i = 0; i < plan.len; i++){
            result += plan.floats[i];
        }
        return result;
    }

    @Benchmark
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    @BenchmarkMode(Mode.AverageTime)
    public int sumInt(ExecutionPlan plan) {
        int result = 0;
        for (int i = 0; i < plan.len; i++){
            result += plan.ints[i];
        }
        return result;
    }
    //</editor-fold>

    public static void main(String[] args) throws RunnerException {
        Options opt = new OptionsBuilder()
                .include(Int32VsFloat.class.getSimpleName())
                .warmupIterations(1)
                .measurementIterations(10)
                .threads(6)
                .forks(1)
                .verbosity(VerboseMode.EXTRA)
                .build();

        new Runner(opt).run();
    }
    
}

01 Apr 2022 - Hasan Al-Ammori