Created
May 12, 2023 12:08
-
-
Save antimon2/9f93c700fdb16ba156e2d603e45700b5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [deps] | |
| BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" | |
| Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" | |
| Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" | |
| [compat] | |
| julia = "1.9" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "id": "21c03d02-e354-4c9d-bc94-670779ea0d86", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Julia Version 1.9.0\n", | |
| "Commit 8e630552924 (2023-05-07 11:25 UTC)\n", | |
| "Platform Info:\n", | |
| " OS: macOS (arm64-apple-darwin22.4.0)\n", | |
| " CPU: 8 × Apple M1\n", | |
| " WORD_SIZE: 64\n", | |
| " LIBM: libopenlibm\n", | |
| " LLVM: libLLVM-14.0.6 (ORCJIT, apple-m1)\n", | |
| " Threads: 2 on 8 virtual cores\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "versioninfo()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "id": "6bf27501-599e-4c9e-9c14-72ef89a8551e", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "]instantiate" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "f532cab6-c874-4a21-bc96-794d296ddadc", | |
| "metadata": {}, | |
| "source": [ | |
| "Inspired by: https://github.com/minoki/test-fma" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "id": "4985b4a7-5261-47cc-b421-00ff38222eb4", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "5-element Vector{Vector{Float16}}:\n", | |
| " [1.024e3, 1.024e3, -Inf, -Inf]\n", | |
| " [256.0, 256.0, -32.0, 6.55e4]\n", | |
| " [1.125, 1.52, 6.0e-8, 1.71]\n", | |
| " [64.5, 1.514e-5, 2.506, 2.506]\n", | |
| " [51.75, -3.25, 6.14e-6, -168.1]" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "const cases_h = [\n", | |
| " Float16[0x1p10, 0x1p10, -Inf16, -Inf16],\n", | |
| " Float16[0x1p8, 0x1p8, -0x1p5, 0x1.ffcp15],\n", | |
| " Float16[0x1.2p0, 0x1.85p0, 0x1p-24, 0x1.b5cp0],\n", | |
| " Float16[0x1.02p6, 0x1.fcp-17, 0x1.40cp1, 0x1.40cp1],\n", | |
| " Float16[0x1.9ep5, -0x1.ap1, 0x1.9cp-18, -0x1.504p7],\n", | |
| "]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "id": "5de944de-a0ad-4554-9939-48b927d9079c", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\t\u001b[0m.section\t\u001b[0m__TEXT\u001b[0m,\u001b[0m__text\u001b[0m,\u001b[0mregular\u001b[0m,\u001b[0mpure_instructions\n", | |
| "\t\u001b[0m.build_version \u001b[0mmacos\u001b[0m, \u001b[33m11\u001b[39m\u001b[0m, \u001b[33m0\u001b[39m\n", | |
| "\t\u001b[0m.globl\t\u001b[0m_julia_muladd_1496 \u001b[90m; -- Begin function julia_muladd_1496\u001b[39m\n", | |
| "\t\u001b[0m.p2align\t\u001b[33m2\u001b[39m\n", | |
| "\u001b[91m_julia_muladd_1496:\u001b[39m \u001b[90m; @julia_muladd_1496\u001b[39m\n", | |
| "\u001b[90m; ┌ @ float.jl:413 within `muladd`\u001b[39m\n", | |
| "\t\u001b[0m.cfi_startproc\n", | |
| "\u001b[90m; %bb.0: ; %top\u001b[39m\n", | |
| "\t\u001b[96m\u001b[1mfmadd\u001b[22m\u001b[39m\t\u001b[0mh0\u001b[0m, \u001b[0mh0\u001b[0m, \u001b[0mh1\u001b[0m, \u001b[0mh2\n", | |
| "\t\u001b[96m\u001b[1mret\u001b[22m\u001b[39m\n", | |
| "\t\u001b[0m.cfi_endproc\n", | |
| "\u001b[90m; └\u001b[39m\n", | |
| " \u001b[90m; -- End function\u001b[39m\n", | |
| "\u001b[0m.subsections_via_symbols\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "let (x, y, z, expected)=cases_h[2]\n", | |
| " @code_native muladd(x, y, z)\n", | |
| " @assert muladd(x, y, z) == expected\n", | |
| "end" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "id": "dde93f9a-0d0f-4863-95af-cbdeb7a5d30e", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\t\u001b[0m.section\t\u001b[0m__TEXT\u001b[0m,\u001b[0m__text\u001b[0m,\u001b[0mregular\u001b[0m,\u001b[0mpure_instructions\n", | |
| "\t\u001b[0m.build_version \u001b[0mmacos\u001b[0m, \u001b[33m11\u001b[39m\u001b[0m, \u001b[33m0\u001b[39m\n", | |
| "\t\u001b[0m.globl\t\u001b[0m_julia_fma_1529 \u001b[90m; -- Begin function julia_fma_1529\u001b[39m\n", | |
| "\t\u001b[0m.p2align\t\u001b[33m2\u001b[39m\n", | |
| "\u001b[91m_julia_fma_1529:\u001b[39m \u001b[90m; @julia_fma_1529\u001b[39m\n", | |
| "\u001b[90m; ┌ @ floatfuncs.jl:428 within `fma`\u001b[39m\n", | |
| "\t\u001b[0m.cfi_startproc\n", | |
| "\u001b[90m; %bb.0: ; %top\u001b[39m\n", | |
| "\u001b[90m; │ @ floatfuncs.jl:429 within `fma`\u001b[39m\n", | |
| "\u001b[90m; │┌ @ float.jl:260 within `Float32`\u001b[39m\n", | |
| "\t\u001b[96m\u001b[1mfcvt\u001b[22m\u001b[39m\t\u001b[0ms0\u001b[0m, \u001b[0mh0\n", | |
| "\t\u001b[96m\u001b[1mfcvt\u001b[22m\u001b[39m\t\u001b[0ms1\u001b[0m, \u001b[0mh1\n", | |
| "\t\u001b[96m\u001b[1mfcvt\u001b[22m\u001b[39m\t\u001b[0ms2\u001b[0m, \u001b[0mh2\n", | |
| "\u001b[90m; │└\u001b[39m\n", | |
| "\u001b[90m; │┌ @ float.jl:413 within `muladd`\u001b[39m\n", | |
| "\t\u001b[96m\u001b[1mfmadd\u001b[22m\u001b[39m\t\u001b[0ms0\u001b[0m, \u001b[0ms0\u001b[0m, \u001b[0ms1\u001b[0m, \u001b[0ms2\n", | |
| "\u001b[90m; │└\u001b[39m\n", | |
| "\u001b[90m; │┌ @ float.jl:256 within `Float16`\u001b[39m\n", | |
| "\t\u001b[96m\u001b[1mfcvt\u001b[22m\u001b[39m\t\u001b[0mh0\u001b[0m, \u001b[0ms0\n", | |
| "\u001b[90m; │└\u001b[39m\n", | |
| "\t\u001b[96m\u001b[1mret\u001b[22m\u001b[39m\n", | |
| "\t\u001b[0m.cfi_endproc\n", | |
| "\u001b[90m; └\u001b[39m\n", | |
| " \u001b[90m; -- End function\u001b[39m\n", | |
| "\u001b[0m.subsections_via_symbols\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "let (x, y, z, expected)=cases_h[2]\n", | |
| " @code_native fma(x, y, z)\n", | |
| " @assert fma(x, y, z) == expected\n", | |
| "end" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "id": "f3aee9b9-6314-4e2f-b3f0-c2281e7e70f4", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "using Test" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "id": "c607a918-1e10-4f15-8685-b392ebd4e9f3", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\u001b[0m\u001b[1mTest Summary: | \u001b[22m\u001b[32m\u001b[1mPass \u001b[22m\u001b[39m\u001b[36m\u001b[1mTotal \u001b[22m\u001b[39m\u001b[0m\u001b[1mTime\u001b[22m\n", | |
| "muladd(::Float16, ::Float16, ::Float16) | \u001b[32m 5 \u001b[39m\u001b[36m 5 \u001b[39m\u001b[0m0.1s\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Test.DefaultTestSet(\"muladd(::Float16, ::Float16, ::Float16)\", Any[], 5, false, false, true, 1.68381347961932e9, 1.683813479675832e9, false)" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# muladd(::Float16, ::Float16, ::Float16) is used `fmadd` directly\n", | |
| "@testset \"muladd(::Float16, ::Float16, ::Float16)\" begin\n", | |
| "\n", | |
| "for (x, y, z, expected) in cases_h\n", | |
| " @test muladd(x, y, z) === expected\n", | |
| "end\n", | |
| "\n", | |
| "end" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "id": "8c508736-3e63-451e-b34d-865918516e11", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "fma(::Float16, ::Float16, ::Float16): \u001b[91m\u001b[1mTest Failed\u001b[22m\u001b[39m at \u001b[39m\u001b[1mIn[8]:5\u001b[22m\n", | |
| " Expression: fma(x, y, z) === expected\n", | |
| " Evaluated: Float16(1.709) === Float16(1.71)\n", | |
| "\n", | |
| "Stacktrace:\n", | |
| " [1] \u001b[0m\u001b[1mmacro expansion\u001b[22m\n", | |
| "\u001b[90m @\u001b[39m \u001b[90m~/.julia/juliaup/julia-1.9.0+0.aarch64.apple.darwin14/share/julia/stdlib/v1.9/Test/src/\u001b[39m\u001b[90m\u001b[4mTest.jl:478\u001b[24m\u001b[39m\u001b[90m [inlined]\u001b[39m\n", | |
| " [2] \u001b[0m\u001b[1mmacro expansion\u001b[22m\n", | |
| "\u001b[90m @\u001b[39m \u001b[90m./\u001b[39m\u001b[90m\u001b[4mIn[8]:5\u001b[24m\u001b[39m\u001b[90m [inlined]\u001b[39m\n", | |
| " [3] \u001b[0m\u001b[1mmacro expansion\u001b[22m\n", | |
| "\u001b[90m @\u001b[39m \u001b[90m~/.julia/juliaup/julia-1.9.0+0.aarch64.apple.darwin14/share/julia/stdlib/v1.9/Test/src/\u001b[39m\u001b[90m\u001b[4mTest.jl:1498\u001b[24m\u001b[39m\u001b[90m [inlined]\u001b[39m\n", | |
| " [4] top-level scope\n", | |
| "\u001b[90m @\u001b[39m \u001b[90m./\u001b[39m\u001b[90m\u001b[4mIn[8]:4\u001b[24m\u001b[39m\n", | |
| "fma(::Float16, ::Float16, ::Float16): \u001b[91m\u001b[1mTest Failed\u001b[22m\u001b[39m at \u001b[39m\u001b[1mIn[8]:5\u001b[22m\n", | |
| " Expression: fma(x, y, z) === expected\n", | |
| " Evaluated: Float16(2.508) === Float16(2.506)\n", | |
| "\n", | |
| "Stacktrace:\n", | |
| " [1] \u001b[0m\u001b[1mmacro expansion\u001b[22m\n", | |
| "\u001b[90m @\u001b[39m \u001b[90m~/.julia/juliaup/julia-1.9.0+0.aarch64.apple.darwin14/share/julia/stdlib/v1.9/Test/src/\u001b[39m\u001b[90m\u001b[4mTest.jl:478\u001b[24m\u001b[39m\u001b[90m [inlined]\u001b[39m\n", | |
| " [2] \u001b[0m\u001b[1mmacro expansion\u001b[22m\n", | |
| "\u001b[90m @\u001b[39m \u001b[90m./\u001b[39m\u001b[90m\u001b[4mIn[8]:5\u001b[24m\u001b[39m\u001b[90m [inlined]\u001b[39m\n", | |
| " [3] \u001b[0m\u001b[1mmacro expansion\u001b[22m\n", | |
| "\u001b[90m @\u001b[39m \u001b[90m~/.julia/juliaup/julia-1.9.0+0.aarch64.apple.darwin14/share/julia/stdlib/v1.9/Test/src/\u001b[39m\u001b[90m\u001b[4mTest.jl:1498\u001b[24m\u001b[39m\u001b[90m [inlined]\u001b[39m\n", | |
| " [4] top-level scope\n", | |
| "\u001b[90m @\u001b[39m \u001b[90m./\u001b[39m\u001b[90m\u001b[4mIn[8]:4\u001b[24m\u001b[39m\n", | |
| "fma(::Float16, ::Float16, ::Float16): \u001b[91m\u001b[1mTest Failed\u001b[22m\u001b[39m at \u001b[39m\u001b[1mIn[8]:5\u001b[22m\n", | |
| " Expression: fma(x, y, z) === expected\n", | |
| " Evaluated: Float16(-168.2) === Float16(-168.1)\n", | |
| "\n", | |
| "Stacktrace:\n", | |
| " [1] \u001b[0m\u001b[1mmacro expansion\u001b[22m\n", | |
| "\u001b[90m @\u001b[39m \u001b[90m~/.julia/juliaup/julia-1.9.0+0.aarch64.apple.darwin14/share/julia/stdlib/v1.9/Test/src/\u001b[39m\u001b[90m\u001b[4mTest.jl:478\u001b[24m\u001b[39m\u001b[90m [inlined]\u001b[39m\n", | |
| " [2] \u001b[0m\u001b[1mmacro expansion\u001b[22m\n", | |
| "\u001b[90m @\u001b[39m \u001b[90m./\u001b[39m\u001b[90m\u001b[4mIn[8]:5\u001b[24m\u001b[39m\u001b[90m [inlined]\u001b[39m\n", | |
| " [3] \u001b[0m\u001b[1mmacro expansion\u001b[22m\n", | |
| "\u001b[90m @\u001b[39m \u001b[90m~/.julia/juliaup/julia-1.9.0+0.aarch64.apple.darwin14/share/julia/stdlib/v1.9/Test/src/\u001b[39m\u001b[90m\u001b[4mTest.jl:1498\u001b[24m\u001b[39m\u001b[90m [inlined]\u001b[39m\n", | |
| " [4] top-level scope\n", | |
| "\u001b[90m @\u001b[39m \u001b[90m./\u001b[39m\u001b[90m\u001b[4mIn[8]:4\u001b[24m\u001b[39m\n", | |
| "\u001b[0m\u001b[1mTest Summary: | \u001b[22m\u001b[32m\u001b[1mPass \u001b[22m\u001b[39m\u001b[91m\u001b[1mFail \u001b[22m\u001b[39m\u001b[36m\u001b[1mTotal \u001b[22m\u001b[39m\u001b[0m\u001b[1mTime\u001b[22m\n", | |
| "fma(::Float16, ::Float16, ::Float16) | \u001b[32m 2 \u001b[39m\u001b[91m 3 \u001b[39m\u001b[36m 5 \u001b[39m\u001b[0m0.7s\n" | |
| ] | |
| }, | |
| { | |
| "ename": "LoadError", | |
| "evalue": "\u001b[91mSome tests did not pass: 2 passed, 3 failed, 0 errored, 0 broken.\u001b[39m", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[91mSome tests did not pass: 2 passed, 3 failed, 0 errored, 0 broken.\u001b[39m", | |
| "", | |
| "Stacktrace:", | |
| " [1] finish(ts::Test.DefaultTestSet)", | |
| " @ Test ~/.julia/juliaup/julia-1.9.0+0.aarch64.apple.darwin14/share/julia/stdlib/v1.9/Test/src/Test.jl:1151", | |
| " [2] macro expansion", | |
| " @ ~/.julia/juliaup/julia-1.9.0+0.aarch64.apple.darwin14/share/julia/stdlib/v1.9/Test/src/Test.jl:1514 [inlined]", | |
| " [3] top-level scope", | |
| " @ ./In[8]:4" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# fma(::Float16, ::Float16, ::Float16) is equal to Float16(muladd(Float32(a), Float32(b), Float32(c))), that is BUGGY\n", | |
| "@testset \"fma(::Float16, ::Float16, ::Float16)\" begin\n", | |
| "\n", | |
| "for (x, y, z, expected) in cases_h\n", | |
| " @test fma(x, y, z) === expected\n", | |
| "end\n", | |
| "\n", | |
| "end" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "id": "ae109254-750a-45ce-a83b-73b3b9d090b2", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "myfma (generic function with 2 methods)" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "myfma(x::T, y::T, z::T) where {T <: Number} = fma(x, y, z)\n", | |
| "\n", | |
| "_shifttiesup(x::U, n::Unsigned) where {U <: Unsigned} = (x + (one(U) << ~-n)) >> n\n", | |
| "\n", | |
| "function myfma(x::Float16, y::Float16, z::Float16)\n", | |
| " uir = reinterpret(UInt64, muladd(Float64(x), Float64(y), Float64(z)))\n", | |
| " ui16sgn = ((uir & Base.sign_mask(Float64)) >> 0x30) % UInt16\n", | |
| " expx = uir & Base.exponent_mask(Float64)\n", | |
| " if expx == Base.exponent_mask(Float64)\n", | |
| " # Inf or NaN -> convert\n", | |
| " ui16exp = Base.exponent_mask(Float16)\n", | |
| " ui16mnt = ((uir & Base.significand_mask(Float64)) >> 0x2a) % UInt16\n", | |
| " return reinterpret(Float16, ui16sgn | ui16exp | ui16mnt)\n", | |
| " end\n", | |
| " expv = Int(expx >> 0x34) - Base.exponent_bias(Float64)\n", | |
| " expv16 = Base.exponent_bias(Float16)\n", | |
| " # v- overflow -> convert to Inf16\n", | |
| " expv > expv16 && return reinterpret(Float16, ui16sgn | Base.exponent_mask(Float16))\n", | |
| " # v- underflow -> convert to subnormal\n", | |
| " if expv <= -expv16\n", | |
| " mnt_16 = _shifttiesup(uir - UInt64(0x3fe + expv) << 0x34, unsigned(0x1c - expv)) % UInt16\n", | |
| " return reinterpret(Float16, ui16sgn | mnt_16)\n", | |
| " end\n", | |
| " # v- normal\n", | |
| " exp_16 = UInt16(expv16 + expv) << 0xa\n", | |
| " mnt_16 = _shifttiesup(uir & Base.significand_mask(Float64), 0x2a) % UInt16\n", | |
| " return reinterpret(Float16, ui16sgn | exp_16 | mnt_16)\n", | |
| "end" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "id": "e40f847f-84f5-42fc-aa22-b9ad1e86de19", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\u001b[0m\u001b[1mTest Summary: | \u001b[22m\u001b[32m\u001b[1mPass \u001b[22m\u001b[39m\u001b[36m\u001b[1mTotal \u001b[22m\u001b[39m\u001b[0m\u001b[1mTime\u001b[22m\n", | |
| "myfma(::Float16, ::Float16, ::Float16) | \u001b[32m 5 \u001b[39m\u001b[36m 5 \u001b[39m\u001b[0m0.0s\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Test.DefaultTestSet(\"myfma(::Float16, ::Float16, ::Float16)\", Any[], 5, false, false, true, 1.68381348119073e9, 1.683813481190744e9, false)" | |
| ] | |
| }, | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# myfma(::Float16, ::Float16, ::Float16) is correct implementation!\n", | |
| "@testset \"myfma(::Float16, ::Float16, ::Float16)\" begin\n", | |
| "\n", | |
| "for (x, y, z, expected) in cases_h\n", | |
| " @test myfma(x, y, z) === expected\n", | |
| "end\n", | |
| "\n", | |
| "end" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "id": "bde50940-854a-4011-bd29-099750b02bde", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "using BenchmarkTools, Random" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "id": "0ea8a5eb-7258-444c-9461-68cd80345dd5", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "BenchmarkTools.Trial: 10000 samples with 1000 evaluations.\n", | |
| " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m1.459 ns\u001b[22m\u001b[39m … \u001b[35m18.084 ns\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 0.00%\n", | |
| " Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m1.583 ns \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m0.00%\n", | |
| " Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m1.577 ns\u001b[22m\u001b[39m ± \u001b[32m 0.171 ns\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m0.00% ± 0.00%\n", | |
| "\n", | |
| " \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▆\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[34m█\u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m \u001b[39m▁\n", | |
| " \u001b[39m▃\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▇\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m█\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[32m▁\u001b[39m\u001b[39m▁\u001b[34m█\u001b[39m\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m█\u001b[39m \u001b[39m█\n", | |
| " 1.46 ns\u001b[90m \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m 1.62 ns \u001b[0m\u001b[1m<\u001b[22m\n", | |
| "\n", | |
| " Memory estimate\u001b[90m: \u001b[39m\u001b[33m0 bytes\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m0\u001b[39m." | |
| ] | |
| }, | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Random.seed!(1234)\n", | |
| "@benchmark muladd(x, y, z) setup=(x=rand(Float16)*256-128; y=rand(Float16)*256-128; z=rand(Float16)*256-128)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "id": "6844ebf9-ee81-4f91-84ab-08751a34dd1c", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "BenchmarkTools.Trial: 10000 samples with 1000 evaluations.\n", | |
| " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m1.500 ns\u001b[22m\u001b[39m … \u001b[35m6.417 ns\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 0.00%\n", | |
| " Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m1.583 ns \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m0.00%\n", | |
| " Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m1.577 ns\u001b[22m\u001b[39m ± \u001b[32m0.054 ns\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m0.00% ± 0.00%\n", | |
| "\n", | |
| " \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[34m█\u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n", | |
| " \u001b[39m▂\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▆\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[32m▁\u001b[39m\u001b[39m▁\u001b[34m█\u001b[39m\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▂\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▂\u001b[39m \u001b[39m▂\n", | |
| " 1.5 ns\u001b[90m Histogram: frequency by time\u001b[39m 1.67 ns \u001b[0m\u001b[1m<\u001b[22m\n", | |
| "\n", | |
| " Memory estimate\u001b[90m: \u001b[39m\u001b[33m0 bytes\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m0\u001b[39m." | |
| ] | |
| }, | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Random.seed!(1234)\n", | |
| "@benchmark fma(x, y, z) setup=(x=rand(Float16)*256-128; y=rand(Float16)*256-128; z=rand(Float16)*256-128)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "id": "40385e9a-8e74-4aaf-8230-7076bb2968db", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "BenchmarkTools.Trial: 10000 samples with 1000 evaluations.\n", | |
| " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m2.083 ns\u001b[22m\u001b[39m … \u001b[35m7.000 ns\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 0.00%\n", | |
| " Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m2.208 ns \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m0.00%\n", | |
| " Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m2.201 ns\u001b[22m\u001b[39m ± \u001b[32m0.076 ns\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m0.00% ± 0.00%\n", | |
| "\n", | |
| " \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[34m█\u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n", | |
| " \u001b[39m▂\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▂\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▃\u001b[39m▅\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[32m▁\u001b[39m\u001b[39m▁\u001b[34m█\u001b[39m\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▂\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▂\u001b[39m \u001b[39m▂\n", | |
| " 2.08 ns\u001b[90m Histogram: frequency by time\u001b[39m 2.29 ns \u001b[0m\u001b[1m<\u001b[22m\n", | |
| "\n", | |
| " Memory estimate\u001b[90m: \u001b[39m\u001b[33m0 bytes\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m0\u001b[39m." | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Random.seed!(1234)\n", | |
| "@benchmark myfma(x, y, z) setup=(x=rand(Float16)*256-128; y=rand(Float16)*256-128; z=rand(Float16)*256-128)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "c0395f42-2d99-494a-915d-1de2639d690f", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Julia 1.9.0", | |
| "language": "julia", | |
| "name": "julia-1.9" | |
| }, | |
| "language_info": { | |
| "file_extension": ".jl", | |
| "mimetype": "application/julia", | |
| "name": "julia", | |
| "version": "1.9.0" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment