goos: linux
goarch: amd64
pkg: test
cpu: AMD Ryzen 9 3900X 12-Core Processor
│ nosimd.txt │ simd.txt │ tensor.txt │
│ sec/op │ sec/op vs base │ sec/op vs base │
Add-24 316.2n ± 1% 434.1n ± 1% +37.32% (p=0.000 n=10) 184.9n ± 1% -41.52% (p=0.000 n=10)
│ nosimd.txt │ simd.txt │ tensor.txt │
│ B/op │ B/op vs base │ B/op vs base │
Add-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ 0.000 ± 0% ~ (p=1.000 n=10) ¹
¹ all samples are equal
│ nosimd.txt │ simd.txt │ tensor.txt │
│ allocs/op │ allocs/op vs base │ allocs/op vs base │
Add-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ 0.000 ± 0% ~ (p=1.000 n=10) ¹
¹ all samples are equal
Last active
February 16, 2026 21:47
-
-
Save DeedleFake/e02324020dfce6686eb6cc45e252fa08 to your computer and use it in GitHub Desktop.
Simple experimentation and benchmarking of Go 1.26's SIMD support.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| module test | |
| go 1.26.0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| goos: linux | |
| goarch: amd64 | |
| pkg: test | |
| cpu: AMD Ryzen 9 3900X 12-Core Processor | |
| BenchmarkAdd-24 3779433 315.9 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 3815323 315.3 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 3771746 319.1 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 3776943 316.6 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 3822039 315.8 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 3776506 317.8 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 3823285 314.4 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 3817754 315.3 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 3766824 316.4 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 3765676 318.0 ns/op 0 B/op 0 allocs/op | |
| PASS | |
| ok test 12.005s |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| goos: linux | |
| goarch: amd64 | |
| pkg: test | |
| cpu: AMD Ryzen 9 3900X 12-Core Processor | |
| BenchmarkAdd-24 2749945 435.4 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 2748529 438.4 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 2774643 432.3 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 2756433 433.8 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 2765024 432.6 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 2747886 436.3 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 2764462 433.3 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 2778027 434.5 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 2758598 436.8 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 2771936 433.3 ns/op 0 B/op 0 allocs/op | |
| PASS | |
| ok test 12.011s |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| goos: linux | |
| goarch: amd64 | |
| pkg: test | |
| cpu: AMD Ryzen 9 3900X 12-Core Processor | |
| BenchmarkAdd-24 6450811 185.0 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 6462780 185.2 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 6475417 185.8 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 6490584 184.3 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 6493693 185.0 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 6478975 185.1 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 6501334 184.8 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 6528841 183.9 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 6509636 183.9 ns/op 0 B/op 0 allocs/op | |
| BenchmarkAdd-24 6541306 183.8 ns/op 0 B/op 0 allocs/op | |
| PASS | |
| ok test 12.000s |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package test_test | |
| import ( | |
| "math/rand/v2" | |
| "simd/archsimd" | |
| "testing" | |
| ) | |
| //func BenchmarkAdd(b *testing.B) { | |
| // v1 := generateRandom() | |
| // v2 := generateRandom() | |
| // | |
| // b.ResetTimer() | |
| // for b.Loop() { | |
| // add(v1, v2) | |
| // } | |
| //} | |
| //func BenchmarkAdd(b *testing.B) { | |
| // v1 := generateRandom() | |
| // v2 := generateRandom() | |
| // | |
| // b.ResetTimer() | |
| // for b.Loop() { | |
| // addSIMD(v1, v2) | |
| // } | |
| //} | |
| func BenchmarkAdd(b *testing.B) { | |
| v1 := toTensor(generateRandom()) | |
| v2 := toTensor(generateRandom()) | |
| b.ResetTimer() | |
| for b.Loop() { | |
| addTensor(v1, v2) | |
| } | |
| } | |
| func generateRandom() []uint64 { | |
| v := make([]uint64, 1000) | |
| for i := range v { | |
| v[i] = rand.Uint64() | |
| } | |
| return v | |
| } | |
| func toTensor(v []uint64) []archsimd.Uint64x4 { | |
| s := make([]archsimd.Uint64x4, 0, (len(v)+4-1)/4) | |
| for i := 0; i < len(v); i += 4 { | |
| s = append(s, archsimd.LoadUint64x4SlicePart(v[i:])) | |
| } | |
| return s | |
| } | |
| func addTensor(v1, v2 []archsimd.Uint64x4) []archsimd.Uint64x4 { | |
| if len(v1) != len(v2) { | |
| panic("len(v1) != len(v2)") | |
| } | |
| for i, s1 := range v1 { | |
| v1[i] = s1.Add(v2[i]) | |
| } | |
| return v1 | |
| } | |
| func addSIMD(v1, v2 []uint64) []uint64 { | |
| if len(v1) != len(v2) { | |
| panic("len(v1) != len(v2)") | |
| } | |
| for i := 0; i < len(v1); i += 4 { | |
| s1 := archsimd.LoadUint64x4SlicePart(v1[i:]) | |
| s2 := archsimd.LoadUint64x4SlicePart(v2[i:]) | |
| r := s1.Add(s2) | |
| r.StoreSlicePart(v1[i:]) | |
| } | |
| return v1 | |
| } | |
| func add(v1, v2 []uint64) []uint64 { | |
| if len(v1) != len(v2) { | |
| panic("len(v1) != len(v2)") | |
| } | |
| for i := range v1 { | |
| v1[i] += v2[i] | |
| } | |
| return v1 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment