Skip to content

Instantly share code, notes, and snippets.

@antimon2
Created May 12, 2023 12:08
Show Gist options
  • Select an option

  • Save antimon2/9f93c700fdb16ba156e2d603e45700b5 to your computer and use it in GitHub Desktop.

Select an option

Save antimon2/9f93c700fdb16ba156e2d603e45700b5 to your computer and use it in GitHub Desktop.
[deps]
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[compat]
julia = "1.9"
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "21c03d02-e354-4c9d-bc94-670779ea0d86",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Julia Version 1.9.0\n",
"Commit 8e630552924 (2023-05-07 11:25 UTC)\n",
"Platform Info:\n",
" OS: macOS (arm64-apple-darwin22.4.0)\n",
" CPU: 8 × Apple M1\n",
" WORD_SIZE: 64\n",
" LIBM: libopenlibm\n",
" LLVM: libLLVM-14.0.6 (ORCJIT, apple-m1)\n",
" Threads: 2 on 8 virtual cores\n"
]
}
],
"source": [
"versioninfo()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "6bf27501-599e-4c9e-9c14-72ef89a8551e",
"metadata": {},
"outputs": [],
"source": [
"]instantiate"
]
},
{
"cell_type": "markdown",
"id": "f532cab6-c874-4a21-bc96-794d296ddadc",
"metadata": {},
"source": [
"Inspired by: https://github.com/minoki/test-fma"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "4985b4a7-5261-47cc-b421-00ff38222eb4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"5-element Vector{Vector{Float16}}:\n",
" [1.024e3, 1.024e3, -Inf, -Inf]\n",
" [256.0, 256.0, -32.0, 6.55e4]\n",
" [1.125, 1.52, 6.0e-8, 1.71]\n",
" [64.5, 1.514e-5, 2.506, 2.506]\n",
" [51.75, -3.25, 6.14e-6, -168.1]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"const cases_h = [\n",
" Float16[0x1p10, 0x1p10, -Inf16, -Inf16],\n",
" Float16[0x1p8, 0x1p8, -0x1p5, 0x1.ffcp15],\n",
" Float16[0x1.2p0, 0x1.85p0, 0x1p-24, 0x1.b5cp0],\n",
" Float16[0x1.02p6, 0x1.fcp-17, 0x1.40cp1, 0x1.40cp1],\n",
" Float16[0x1.9ep5, -0x1.ap1, 0x1.9cp-18, -0x1.504p7],\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "5de944de-a0ad-4554-9939-48b927d9079c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\t\u001b[0m.section\t\u001b[0m__TEXT\u001b[0m,\u001b[0m__text\u001b[0m,\u001b[0mregular\u001b[0m,\u001b[0mpure_instructions\n",
"\t\u001b[0m.build_version \u001b[0mmacos\u001b[0m, \u001b[33m11\u001b[39m\u001b[0m, \u001b[33m0\u001b[39m\n",
"\t\u001b[0m.globl\t\u001b[0m_julia_muladd_1496 \u001b[90m; -- Begin function julia_muladd_1496\u001b[39m\n",
"\t\u001b[0m.p2align\t\u001b[33m2\u001b[39m\n",
"\u001b[91m_julia_muladd_1496:\u001b[39m \u001b[90m; @julia_muladd_1496\u001b[39m\n",
"\u001b[90m; ┌ @ float.jl:413 within `muladd`\u001b[39m\n",
"\t\u001b[0m.cfi_startproc\n",
"\u001b[90m; %bb.0: ; %top\u001b[39m\n",
"\t\u001b[96m\u001b[1mfmadd\u001b[22m\u001b[39m\t\u001b[0mh0\u001b[0m, \u001b[0mh0\u001b[0m, \u001b[0mh1\u001b[0m, \u001b[0mh2\n",
"\t\u001b[96m\u001b[1mret\u001b[22m\u001b[39m\n",
"\t\u001b[0m.cfi_endproc\n",
"\u001b[90m; └\u001b[39m\n",
" \u001b[90m; -- End function\u001b[39m\n",
"\u001b[0m.subsections_via_symbols\n"
]
}
],
"source": [
"let (x, y, z, expected)=cases_h[2]\n",
" @code_native muladd(x, y, z)\n",
" @assert muladd(x, y, z) == expected\n",
"end"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "dde93f9a-0d0f-4863-95af-cbdeb7a5d30e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\t\u001b[0m.section\t\u001b[0m__TEXT\u001b[0m,\u001b[0m__text\u001b[0m,\u001b[0mregular\u001b[0m,\u001b[0mpure_instructions\n",
"\t\u001b[0m.build_version \u001b[0mmacos\u001b[0m, \u001b[33m11\u001b[39m\u001b[0m, \u001b[33m0\u001b[39m\n",
"\t\u001b[0m.globl\t\u001b[0m_julia_fma_1529 \u001b[90m; -- Begin function julia_fma_1529\u001b[39m\n",
"\t\u001b[0m.p2align\t\u001b[33m2\u001b[39m\n",
"\u001b[91m_julia_fma_1529:\u001b[39m \u001b[90m; @julia_fma_1529\u001b[39m\n",
"\u001b[90m; ┌ @ floatfuncs.jl:428 within `fma`\u001b[39m\n",
"\t\u001b[0m.cfi_startproc\n",
"\u001b[90m; %bb.0: ; %top\u001b[39m\n",
"\u001b[90m; │ @ floatfuncs.jl:429 within `fma`\u001b[39m\n",
"\u001b[90m; │┌ @ float.jl:260 within `Float32`\u001b[39m\n",
"\t\u001b[96m\u001b[1mfcvt\u001b[22m\u001b[39m\t\u001b[0ms0\u001b[0m, \u001b[0mh0\n",
"\t\u001b[96m\u001b[1mfcvt\u001b[22m\u001b[39m\t\u001b[0ms1\u001b[0m, \u001b[0mh1\n",
"\t\u001b[96m\u001b[1mfcvt\u001b[22m\u001b[39m\t\u001b[0ms2\u001b[0m, \u001b[0mh2\n",
"\u001b[90m; │└\u001b[39m\n",
"\u001b[90m; │┌ @ float.jl:413 within `muladd`\u001b[39m\n",
"\t\u001b[96m\u001b[1mfmadd\u001b[22m\u001b[39m\t\u001b[0ms0\u001b[0m, \u001b[0ms0\u001b[0m, \u001b[0ms1\u001b[0m, \u001b[0ms2\n",
"\u001b[90m; │└\u001b[39m\n",
"\u001b[90m; │┌ @ float.jl:256 within `Float16`\u001b[39m\n",
"\t\u001b[96m\u001b[1mfcvt\u001b[22m\u001b[39m\t\u001b[0mh0\u001b[0m, \u001b[0ms0\n",
"\u001b[90m; │└\u001b[39m\n",
"\t\u001b[96m\u001b[1mret\u001b[22m\u001b[39m\n",
"\t\u001b[0m.cfi_endproc\n",
"\u001b[90m; └\u001b[39m\n",
" \u001b[90m; -- End function\u001b[39m\n",
"\u001b[0m.subsections_via_symbols\n"
]
}
],
"source": [
"let (x, y, z, expected)=cases_h[2]\n",
" @code_native fma(x, y, z)\n",
" @assert fma(x, y, z) == expected\n",
"end"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "f3aee9b9-6314-4e2f-b3f0-c2281e7e70f4",
"metadata": {},
"outputs": [],
"source": [
"using Test"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "c607a918-1e10-4f15-8685-b392ebd4e9f3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[0m\u001b[1mTest Summary: | \u001b[22m\u001b[32m\u001b[1mPass \u001b[22m\u001b[39m\u001b[36m\u001b[1mTotal \u001b[22m\u001b[39m\u001b[0m\u001b[1mTime\u001b[22m\n",
"muladd(::Float16, ::Float16, ::Float16) | \u001b[32m 5 \u001b[39m\u001b[36m 5 \u001b[39m\u001b[0m0.1s\n"
]
},
{
"data": {
"text/plain": [
"Test.DefaultTestSet(\"muladd(::Float16, ::Float16, ::Float16)\", Any[], 5, false, false, true, 1.68381347961932e9, 1.683813479675832e9, false)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# muladd(::Float16, ::Float16, ::Float16) is used `fmadd` directly\n",
"@testset \"muladd(::Float16, ::Float16, ::Float16)\" begin\n",
"\n",
"for (x, y, z, expected) in cases_h\n",
" @test muladd(x, y, z) === expected\n",
"end\n",
"\n",
"end"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "8c508736-3e63-451e-b34d-865918516e11",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"fma(::Float16, ::Float16, ::Float16): \u001b[91m\u001b[1mTest Failed\u001b[22m\u001b[39m at \u001b[39m\u001b[1mIn[8]:5\u001b[22m\n",
" Expression: fma(x, y, z) === expected\n",
" Evaluated: Float16(1.709) === Float16(1.71)\n",
"\n",
"Stacktrace:\n",
" [1] \u001b[0m\u001b[1mmacro expansion\u001b[22m\n",
"\u001b[90m @\u001b[39m \u001b[90m~/.julia/juliaup/julia-1.9.0+0.aarch64.apple.darwin14/share/julia/stdlib/v1.9/Test/src/\u001b[39m\u001b[90m\u001b[4mTest.jl:478\u001b[24m\u001b[39m\u001b[90m [inlined]\u001b[39m\n",
" [2] \u001b[0m\u001b[1mmacro expansion\u001b[22m\n",
"\u001b[90m @\u001b[39m \u001b[90m./\u001b[39m\u001b[90m\u001b[4mIn[8]:5\u001b[24m\u001b[39m\u001b[90m [inlined]\u001b[39m\n",
" [3] \u001b[0m\u001b[1mmacro expansion\u001b[22m\n",
"\u001b[90m @\u001b[39m \u001b[90m~/.julia/juliaup/julia-1.9.0+0.aarch64.apple.darwin14/share/julia/stdlib/v1.9/Test/src/\u001b[39m\u001b[90m\u001b[4mTest.jl:1498\u001b[24m\u001b[39m\u001b[90m [inlined]\u001b[39m\n",
" [4] top-level scope\n",
"\u001b[90m @\u001b[39m \u001b[90m./\u001b[39m\u001b[90m\u001b[4mIn[8]:4\u001b[24m\u001b[39m\n",
"fma(::Float16, ::Float16, ::Float16): \u001b[91m\u001b[1mTest Failed\u001b[22m\u001b[39m at \u001b[39m\u001b[1mIn[8]:5\u001b[22m\n",
" Expression: fma(x, y, z) === expected\n",
" Evaluated: Float16(2.508) === Float16(2.506)\n",
"\n",
"Stacktrace:\n",
" [1] \u001b[0m\u001b[1mmacro expansion\u001b[22m\n",
"\u001b[90m @\u001b[39m \u001b[90m~/.julia/juliaup/julia-1.9.0+0.aarch64.apple.darwin14/share/julia/stdlib/v1.9/Test/src/\u001b[39m\u001b[90m\u001b[4mTest.jl:478\u001b[24m\u001b[39m\u001b[90m [inlined]\u001b[39m\n",
" [2] \u001b[0m\u001b[1mmacro expansion\u001b[22m\n",
"\u001b[90m @\u001b[39m \u001b[90m./\u001b[39m\u001b[90m\u001b[4mIn[8]:5\u001b[24m\u001b[39m\u001b[90m [inlined]\u001b[39m\n",
" [3] \u001b[0m\u001b[1mmacro expansion\u001b[22m\n",
"\u001b[90m @\u001b[39m \u001b[90m~/.julia/juliaup/julia-1.9.0+0.aarch64.apple.darwin14/share/julia/stdlib/v1.9/Test/src/\u001b[39m\u001b[90m\u001b[4mTest.jl:1498\u001b[24m\u001b[39m\u001b[90m [inlined]\u001b[39m\n",
" [4] top-level scope\n",
"\u001b[90m @\u001b[39m \u001b[90m./\u001b[39m\u001b[90m\u001b[4mIn[8]:4\u001b[24m\u001b[39m\n",
"fma(::Float16, ::Float16, ::Float16): \u001b[91m\u001b[1mTest Failed\u001b[22m\u001b[39m at \u001b[39m\u001b[1mIn[8]:5\u001b[22m\n",
" Expression: fma(x, y, z) === expected\n",
" Evaluated: Float16(-168.2) === Float16(-168.1)\n",
"\n",
"Stacktrace:\n",
" [1] \u001b[0m\u001b[1mmacro expansion\u001b[22m\n",
"\u001b[90m @\u001b[39m \u001b[90m~/.julia/juliaup/julia-1.9.0+0.aarch64.apple.darwin14/share/julia/stdlib/v1.9/Test/src/\u001b[39m\u001b[90m\u001b[4mTest.jl:478\u001b[24m\u001b[39m\u001b[90m [inlined]\u001b[39m\n",
" [2] \u001b[0m\u001b[1mmacro expansion\u001b[22m\n",
"\u001b[90m @\u001b[39m \u001b[90m./\u001b[39m\u001b[90m\u001b[4mIn[8]:5\u001b[24m\u001b[39m\u001b[90m [inlined]\u001b[39m\n",
" [3] \u001b[0m\u001b[1mmacro expansion\u001b[22m\n",
"\u001b[90m @\u001b[39m \u001b[90m~/.julia/juliaup/julia-1.9.0+0.aarch64.apple.darwin14/share/julia/stdlib/v1.9/Test/src/\u001b[39m\u001b[90m\u001b[4mTest.jl:1498\u001b[24m\u001b[39m\u001b[90m [inlined]\u001b[39m\n",
" [4] top-level scope\n",
"\u001b[90m @\u001b[39m \u001b[90m./\u001b[39m\u001b[90m\u001b[4mIn[8]:4\u001b[24m\u001b[39m\n",
"\u001b[0m\u001b[1mTest Summary: | \u001b[22m\u001b[32m\u001b[1mPass \u001b[22m\u001b[39m\u001b[91m\u001b[1mFail \u001b[22m\u001b[39m\u001b[36m\u001b[1mTotal \u001b[22m\u001b[39m\u001b[0m\u001b[1mTime\u001b[22m\n",
"fma(::Float16, ::Float16, ::Float16) | \u001b[32m 2 \u001b[39m\u001b[91m 3 \u001b[39m\u001b[36m 5 \u001b[39m\u001b[0m0.7s\n"
]
},
{
"ename": "LoadError",
"evalue": "\u001b[91mSome tests did not pass: 2 passed, 3 failed, 0 errored, 0 broken.\u001b[39m",
"output_type": "error",
"traceback": [
"\u001b[91mSome tests did not pass: 2 passed, 3 failed, 0 errored, 0 broken.\u001b[39m",
"",
"Stacktrace:",
" [1] finish(ts::Test.DefaultTestSet)",
" @ Test ~/.julia/juliaup/julia-1.9.0+0.aarch64.apple.darwin14/share/julia/stdlib/v1.9/Test/src/Test.jl:1151",
" [2] macro expansion",
" @ ~/.julia/juliaup/julia-1.9.0+0.aarch64.apple.darwin14/share/julia/stdlib/v1.9/Test/src/Test.jl:1514 [inlined]",
" [3] top-level scope",
" @ ./In[8]:4"
]
}
],
"source": [
"# fma(::Float16, ::Float16, ::Float16) is equal to Float16(muladd(Float32(a), Float32(b), Float32(c))), that is BUGGY\n",
"@testset \"fma(::Float16, ::Float16, ::Float16)\" begin\n",
"\n",
"for (x, y, z, expected) in cases_h\n",
" @test fma(x, y, z) === expected\n",
"end\n",
"\n",
"end"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "ae109254-750a-45ce-a83b-73b3b9d090b2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"myfma (generic function with 2 methods)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"myfma(x::T, y::T, z::T) where {T <: Number} = fma(x, y, z)\n",
"\n",
"_shifttiesup(x::U, n::Unsigned) where {U <: Unsigned} = (x + (one(U) << ~-n)) >> n\n",
"\n",
"function myfma(x::Float16, y::Float16, z::Float16)\n",
" uir = reinterpret(UInt64, muladd(Float64(x), Float64(y), Float64(z)))\n",
" ui16sgn = ((uir & Base.sign_mask(Float64)) >> 0x30) % UInt16\n",
" expx = uir & Base.exponent_mask(Float64)\n",
" if expx == Base.exponent_mask(Float64)\n",
" # Inf or NaN -> convert\n",
" ui16exp = Base.exponent_mask(Float16)\n",
" ui16mnt = ((uir & Base.significand_mask(Float64)) >> 0x2a) % UInt16\n",
" return reinterpret(Float16, ui16sgn | ui16exp | ui16mnt)\n",
" end\n",
" expv = Int(expx >> 0x34) - Base.exponent_bias(Float64)\n",
" expv16 = Base.exponent_bias(Float16)\n",
" # v- overflow -> convert to Inf16\n",
" expv > expv16 && return reinterpret(Float16, ui16sgn | Base.exponent_mask(Float16))\n",
" # v- underflow -> convert to subnormal\n",
" if expv <= -expv16\n",
" mnt_16 = _shifttiesup(uir - UInt64(0x3fe + expv) << 0x34, unsigned(0x1c - expv)) % UInt16\n",
" return reinterpret(Float16, ui16sgn | mnt_16)\n",
" end\n",
" # v- normal\n",
" exp_16 = UInt16(expv16 + expv) << 0xa\n",
" mnt_16 = _shifttiesup(uir & Base.significand_mask(Float64), 0x2a) % UInt16\n",
" return reinterpret(Float16, ui16sgn | exp_16 | mnt_16)\n",
"end"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "e40f847f-84f5-42fc-aa22-b9ad1e86de19",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[0m\u001b[1mTest Summary: | \u001b[22m\u001b[32m\u001b[1mPass \u001b[22m\u001b[39m\u001b[36m\u001b[1mTotal \u001b[22m\u001b[39m\u001b[0m\u001b[1mTime\u001b[22m\n",
"myfma(::Float16, ::Float16, ::Float16) | \u001b[32m 5 \u001b[39m\u001b[36m 5 \u001b[39m\u001b[0m0.0s\n"
]
},
{
"data": {
"text/plain": [
"Test.DefaultTestSet(\"myfma(::Float16, ::Float16, ::Float16)\", Any[], 5, false, false, true, 1.68381348119073e9, 1.683813481190744e9, false)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# myfma(::Float16, ::Float16, ::Float16) is correct implementation!\n",
"@testset \"myfma(::Float16, ::Float16, ::Float16)\" begin\n",
"\n",
"for (x, y, z, expected) in cases_h\n",
" @test myfma(x, y, z) === expected\n",
"end\n",
"\n",
"end"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "bde50940-854a-4011-bd29-099750b02bde",
"metadata": {},
"outputs": [],
"source": [
"using BenchmarkTools, Random"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "0ea8a5eb-7258-444c-9461-68cd80345dd5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"BenchmarkTools.Trial: 10000 samples with 1000 evaluations.\n",
" Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m1.459 ns\u001b[22m\u001b[39m … \u001b[35m18.084 ns\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 0.00%\n",
" Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m1.583 ns \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m0.00%\n",
" Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m1.577 ns\u001b[22m\u001b[39m ± \u001b[32m 0.171 ns\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m0.00% ± 0.00%\n",
"\n",
" \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▆\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[34m█\u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m \u001b[39m▁\n",
" \u001b[39m▃\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▇\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m█\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[32m▁\u001b[39m\u001b[39m▁\u001b[34m█\u001b[39m\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m█\u001b[39m \u001b[39m█\n",
" 1.46 ns\u001b[90m \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m 1.62 ns \u001b[0m\u001b[1m<\u001b[22m\n",
"\n",
" Memory estimate\u001b[90m: \u001b[39m\u001b[33m0 bytes\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m0\u001b[39m."
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Random.seed!(1234)\n",
"@benchmark muladd(x, y, z) setup=(x=rand(Float16)*256-128; y=rand(Float16)*256-128; z=rand(Float16)*256-128)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "6844ebf9-ee81-4f91-84ab-08751a34dd1c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"BenchmarkTools.Trial: 10000 samples with 1000 evaluations.\n",
" Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m1.500 ns\u001b[22m\u001b[39m … \u001b[35m6.417 ns\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 0.00%\n",
" Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m1.583 ns \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m0.00%\n",
" Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m1.577 ns\u001b[22m\u001b[39m ± \u001b[32m0.054 ns\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m0.00% ± 0.00%\n",
"\n",
" \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[34m█\u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
" \u001b[39m▂\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▆\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[32m▁\u001b[39m\u001b[39m▁\u001b[34m█\u001b[39m\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▂\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▂\u001b[39m \u001b[39m▂\n",
" 1.5 ns\u001b[90m Histogram: frequency by time\u001b[39m 1.67 ns \u001b[0m\u001b[1m<\u001b[22m\n",
"\n",
" Memory estimate\u001b[90m: \u001b[39m\u001b[33m0 bytes\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m0\u001b[39m."
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Random.seed!(1234)\n",
"@benchmark fma(x, y, z) setup=(x=rand(Float16)*256-128; y=rand(Float16)*256-128; z=rand(Float16)*256-128)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "40385e9a-8e74-4aaf-8230-7076bb2968db",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"BenchmarkTools.Trial: 10000 samples with 1000 evaluations.\n",
" Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m2.083 ns\u001b[22m\u001b[39m … \u001b[35m7.000 ns\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 0.00%\n",
" Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m2.208 ns \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m0.00%\n",
" Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m2.201 ns\u001b[22m\u001b[39m ± \u001b[32m0.076 ns\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m0.00% ± 0.00%\n",
"\n",
" \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[34m█\u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
" \u001b[39m▂\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▂\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▃\u001b[39m▅\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[32m▁\u001b[39m\u001b[39m▁\u001b[34m█\u001b[39m\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▂\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▂\u001b[39m \u001b[39m▂\n",
" 2.08 ns\u001b[90m Histogram: frequency by time\u001b[39m 2.29 ns \u001b[0m\u001b[1m<\u001b[22m\n",
"\n",
" Memory estimate\u001b[90m: \u001b[39m\u001b[33m0 bytes\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m0\u001b[39m."
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Random.seed!(1234)\n",
"@benchmark myfma(x, y, z) setup=(x=rand(Float16)*256-128; y=rand(Float16)*256-128; z=rand(Float16)*256-128)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c0395f42-2d99-494a-915d-1de2639d690f",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Julia 1.9.0",
"language": "julia",
"name": "julia-1.9"
},
"language_info": {
"file_extension": ".jl",
"mimetype": "application/julia",
"name": "julia",
"version": "1.9.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment