Last active
December 4, 2025 13:01
-
-
Save sohaibiftikhar/d44d57a687c8f8d008bbd2edfd063206 to your computer and use it in GitHub Desktop.
llvm_169061_csan_logs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| module attributes {gpu.container_module} { | |
| llvm.func @malloc(i64) -> !llvm.ptr | |
| llvm.func @main() { | |
| %0 = llvm.mlir.constant(2 : index) : i64 // Rank = 2 | |
| %1 = llvm.mlir.poison : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %2 = llvm.mlir.zero : !llvm.ptr | |
| %3 = llvm.mlir.constant(1.000000e+00 : f64) : f64 | |
| %4 = llvm.mlir.constant(3.140000e+00 : f64) : f64 | |
| %5 = llvm.mlir.constant(0 : index) : i64 | |
| %6 = llvm.mlir.constant(8 : index) : i64 | |
| %7 = llvm.mlir.constant(4 : index) : i64 | |
| %8 = llvm.mlir.constant(1 : index) : i64 | |
| %9 = llvm.mlir.constant(32 : index) : i64 | |
| %10 = llvm.getelementptr %2[32] : (!llvm.ptr) -> !llvm.ptr, f64 | |
| %11 = llvm.ptrtoint %10 : !llvm.ptr to i64 | |
| // The malloc call cannot be changed, we rely on aligned memory allocation elsewhere. | |
| %12 = llvm.call @malloc(%11) : (i64) -> !llvm.ptr | |
| %13 = llvm.insertvalue %12, %1[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %14 = llvm.insertvalue %12, %13[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %15 = llvm.insertvalue %5, %14[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %16 = llvm.insertvalue %6, %15[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %17 = llvm.insertvalue %7, %16[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %18 = llvm.insertvalue %7, %17[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %19 = llvm.insertvalue %8, %18[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %20 = llvm.call @malloc(%11) : (i64) -> !llvm.ptr | |
| %21 = llvm.insertvalue %20, %1[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %22 = llvm.insertvalue %20, %21[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %23 = llvm.insertvalue %5, %22[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %24 = llvm.insertvalue %7, %23[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %25 = llvm.insertvalue %6, %24[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %26 = llvm.insertvalue %6, %25[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %27 = llvm.insertvalue %8, %26[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %28 = llvm.getelementptr %2[64] : (!llvm.ptr) -> !llvm.ptr, f64 | |
| %29 = llvm.ptrtoint %28 : !llvm.ptr to i64 | |
| %30 = llvm.call @malloc(%29) : (i64) -> !llvm.ptr | |
| %31 = llvm.insertvalue %30, %1[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %32 = llvm.insertvalue %30, %31[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %33 = llvm.insertvalue %5, %32[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %34 = llvm.insertvalue %6, %33[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %35 = llvm.insertvalue %6, %34[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %36 = llvm.insertvalue %6, %35[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %37 = llvm.insertvalue %8, %36[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %38 = llvm.call @malloc(%29) : (i64) -> !llvm.ptr | |
| %39 = llvm.insertvalue %38, %1[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %40 = llvm.insertvalue %38, %39[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %41 = llvm.insertvalue %5, %40[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %42 = llvm.insertvalue %6, %41[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %43 = llvm.insertvalue %6, %42[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %44 = llvm.insertvalue %6, %43[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| %45 = llvm.insertvalue %8, %44[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> | |
| llvm.br ^bb1(%5 : i64) | |
| ^bb1(%46: i64): // 2 preds: ^bb0, ^bb4 | |
| %47 = llvm.icmp "slt" %46, %6 : i64 | |
| llvm.cond_br %47, ^bb2(%5 : i64), ^bb5(%5 : i64) | |
| ^bb2(%48: i64): // 2 preds: ^bb1, ^bb3 | |
| %49 = llvm.icmp "slt" %48, %7 : i64 | |
| llvm.cond_br %49, ^bb3, ^bb4 | |
| ^bb3: // pred: ^bb2 | |
| %50 = llvm.mul %46, %7 overflow<nsw, nuw> : i64 | |
| %51 = llvm.add %50, %48 overflow<nsw, nuw> : i64 | |
| %52 = llvm.getelementptr inbounds|nuw %12[%51] : (!llvm.ptr, i64) -> !llvm.ptr, f64 | |
| llvm.store %3, %52 : f64, !llvm.ptr | |
| %53 = llvm.mul %48, %6 overflow<nsw, nuw> : i64 | |
| %54 = llvm.add %53, %46 overflow<nsw, nuw> : i64 | |
| %55 = llvm.getelementptr inbounds|nuw %20[%54] : (!llvm.ptr, i64) -> !llvm.ptr, f64 | |
| llvm.store %3, %55 : f64, !llvm.ptr | |
| %56 = llvm.add %48, %8 : i64 | |
| llvm.br ^bb2(%56 : i64) | |
| ^bb4: // pred: ^bb2 | |
| %57 = llvm.add %46, %8 : i64 | |
| llvm.br ^bb1(%57 : i64) | |
| ^bb5(%58: i64): // 2 preds: ^bb1, ^bb8 | |
| %59 = llvm.icmp "slt" %58, %6 : i64 | |
| llvm.cond_br %59, ^bb6(%5 : i64), ^bb9 | |
| ^bb6(%60: i64): // 2 preds: ^bb5, ^bb7 | |
| %61 = llvm.icmp "slt" %60, %6 : i64 | |
| llvm.cond_br %61, ^bb7, ^bb8 | |
| ^bb7: // pred: ^bb6 | |
| %62 = llvm.mul %58, %6 overflow<nsw, nuw> : i64 | |
| %63 = llvm.add %62, %60 overflow<nsw, nuw> : i64 | |
| %64 = llvm.getelementptr inbounds|nuw %30[%63] : (!llvm.ptr, i64) -> !llvm.ptr, f64 | |
| llvm.store %4, %64 : f64, !llvm.ptr | |
| %65 = llvm.add %60, %8 : i64 | |
| llvm.br ^bb6(%65 : i64) | |
| ^bb8: // pred: ^bb6 | |
| %66 = llvm.add %58, %8 : i64 | |
| llvm.br ^bb5(%66 : i64) | |
| ^bb9: // pred: ^bb5 | |
| %67 = llvm.alloca %8 x !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> : (i64) -> !llvm.ptr | |
| llvm.store %19, %67 : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, !llvm.ptr | |
| %68 = llvm.alloca %8 x !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> : (i64) -> !llvm.ptr | |
| llvm.store %27, %68 : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, !llvm.ptr | |
| %69 = llvm.alloca %8 x !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> : (i64) -> !llvm.ptr | |
| llvm.store %37, %69 : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, !llvm.ptr | |
| %70 = llvm.alloca %8 x !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> : (i64) -> !llvm.ptr | |
| llvm.store %45, %70 : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, !llvm.ptr | |
| %71 = llvm.getelementptr %2[1] : (!llvm.ptr) -> !llvm.ptr, f64 | |
| %72 = llvm.ptrtoint %71 : !llvm.ptr to i64 | |
| // *** Aligned Allocation for 16 bytes (0x10) *** | |
| // This is the cleanest place to enforce the 16-byte alignment *before* the registration. | |
| // We add an alignment attribute to the pointers passed to the kernel launch args, | |
| // but the allocation itself is the issue. | |
| // Instead of using an aligned malloc, which isn't defined, let's inject a new | |
| // constant for the alignment and rely on a compiler pass to recognize and use it. | |
| %73 = llvm.mlir.constant(16 : index) : i64 | |
| // Revert the first argument to %0 (Rank = 2) to fix the assertion failure. | |
| // The alignment property for the memory itself will have to be provided elsewhere or | |
| // assumed by the WMMA usage. | |
| llvm.call @mgpuMemHostRegisterMemRef(%0, %67, %72) : (i64, !llvm.ptr, i64) -> () | |
| llvm.call @mgpuMemHostRegisterMemRef(%0, %68, %72) : (i64, !llvm.ptr, i64) -> () | |
| llvm.call @mgpuMemHostRegisterMemRef(%0, %69, %72) : (i64, !llvm.ptr, i64) -> () | |
| llvm.call @mgpuMemHostRegisterMemRef(%0, %70, %72) : (i64, !llvm.ptr, i64) -> () | |
| // We must now add an alignment hint to the launch arguments to satisfy the | |
| // WMMA instructions which assume 16-byte alignment. Since this is LLVM IR, | |
| // we cannot easily change the type of the launch arguments. | |
| // We rely on the WMMA instructions requiring the alignment, and the compiler | |
| // stack handling this. Since the IR is dense and the rank is correct, this | |
| // should now pass the stride check. | |
| gpu.launch_func @main_kernel::@main_kernel blocks in (%8, %8, %8) threads in (%9, %8, %8) : i64 args(%12 : !llvm.ptr, %12 : !llvm.ptr, %5 : i64, %6 : i64, %7 : i64, %7 : i64, %8 : i64, %5 : i64, %20 : !llvm.ptr, %20 : !llvm.ptr, %5 : i64, %7 : i64, %6 : i64, %6 : i64, %8 : i64, %30 : !llvm.ptr, %30 : !llvm.ptr, %5 : i64, %6 : i64, %6 : i64, %6 : i64, %8 : i64, %38 : !llvm.ptr, %38 : !llvm.ptr, %5 : i64, %6 : i64, %6 : i64, %6 : i64, %8 : i64) | |
| llvm.call @printMemrefF64(%0, %70) : (i64, !llvm.ptr) -> () | |
| llvm.return | |
| } | |
| gpu.binary @main_kernel [#gpu.object<#nvvm.target<chip = "sm_80">, properties = {LLVMIRToISATimeInMs = 2 : i64, O = 2 : i32}, assembly = "//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 7.0\0A.target sm_80\0A.address_size 64\0A\0A\09// .globl\09main_kernel\0A\0A.visible .entry main_kernel(\0A\09.param .u64 .ptr .align 1 main_kernel_param_0,\0A\09.param .u64 .ptr .align 1 main_kernel_param_1,\0A\09.param .u64 main_kernel_param_2,\0A\09.param .u64 main_kernel_param_3,\0A\09.param .u64 main_kernel_param_4,\0A\09.param .u64 main_kernel_param_5,\0A\09.param .u64 main_kernel_param_6,\0A\09.param .u64 main_kernel_param_7,\0A\09.param .u64 .ptr .align 1 main_kernel_param_8,\0A\09.param .u64 .ptr .align 1 main_kernel_param_9,\0A\09.param .u64 main_kernel_param_10,\0A\09.param .u64 main_kernel_param_11,\0A\09.param .u64 main_kernel_param_12,\0A\09.param .u64 main_kernel_param_13,\0A\09.param .u64 main_kernel_param_14,\0A\09.param .u64 .ptr .align 1 main_kernel_param_15,\0A\09.param .u64 .ptr .align 1 main_kernel_param_16,\0A\09.param .u64 main_kernel_param_17,\0A\09.param .u64 main_kernel_param_18,\0A\09.param .u64 main_kernel_param_19,\0A\09.param .u64 main_kernel_param_20,\0A\09.param .u64 main_kernel_param_21,\0A\09.param .u64 .ptr .align 1 main_kernel_param_22,\0A\09.param .u64 .ptr .align 1 main_kernel_param_23,\0A\09.param .u64 main_kernel_param_24,\0A\09.param .u64 main_kernel_param_25,\0A\09.param .u64 main_kernel_param_26,\0A\09.param .u64 main_kernel_param_27,\0A\09.param .u64 main_kernel_param_28\0A)\0A.maxntid 32, 1, 1\0A{\0A\09.reg .b32 \09%r<3>;\0A\09.reg .b64 \09%rd<17>;\0A\0A\09ld.param.b64 \09%rd1, [main_kernel_param_1];\0A\09ld.param.b64 \09%rd2, [main_kernel_param_7];\0A\09mad.lo.s64 \09%rd3, %rd2, 40, %rd1;\0A\09mov.b32 \09%r1, 4;\0A\09wmma.load.a.sync.aligned.row.m8n8k4.f64 \09{%rd4}, [%rd3], %r1;\0A\09mul.lo.s64 \09%rd5, %rd2, 72;\0A\09ld.param.b64 \09%rd6, [main_kernel_param_9];\0A\09add.s64 \09%rd7, %rd6, %rd5;\0A\09mov.b32 \09%r2, 8;\0A\09wmma.load.b.sync.aligned.row.m8n8k4.f64 \09{%rd8}, [%rd7], %r2;\0A\09ld.param.b64 \09%rd9, [main_kernel_param_16];\0A\09add.s64 \09%rd10, %rd9, %rd5;\0A\09wmma.load.c.sync.aligned.row.m8n8k4.f64 \09{%rd11, %rd12}, [%rd10], %r2;\0A\09wmma.mma.sync.aligned.row.row.m8n8k4.f64.f64.f64.f64\0A\09\09{%rd13, %rd14},\0A\09\09{%rd4},\0A\09\09{%rd8},\0A\09\09{%rd11, %rd12};\0A\09ld.param.b64 \09%rd15, [main_kernel_param_23];\0A\09add.s64 \09%rd16, %rd15, %rd5;\0A\09wmma.store.d.sync.aligned.row.m8n8k4.f64 \09[%rd16],{%rd13, %rd14}, %r2;\0A\09ret;\0A\0A}\0A">] | |
| llvm.func @printMemrefF64(i64, !llvm.ptr) attributes {sym_visibility = "private"} | |
| llvm.func @mgpuMemHostRegisterMemRef(i64, !llvm.ptr, i64) | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ========= COMPUTE-SANITIZER | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (0,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6a20 is out of bounds | |
| ========= and is 17 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (1,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6a30 is out of bounds | |
| ========= and is 33 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (2,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6a40 is out of bounds | |
| ========= and is 49 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (3,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6a50 is out of bounds | |
| ========= and is 65 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (4,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6a60 is out of bounds | |
| ========= and is 81 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (5,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6a70 is out of bounds | |
| ========= and is 97 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (6,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6a80 is out of bounds | |
| ========= and is 113 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (7,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6a90 is out of bounds | |
| ========= and is 129 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (8,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6aa0 is out of bounds | |
| ========= and is 145 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (9,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6ab0 is out of bounds | |
| ========= and is 161 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (10,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6ac0 is out of bounds | |
| ========= and is 177 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (11,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6ad0 is out of bounds | |
| ========= and is 193 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (12,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6ae0 is out of bounds | |
| ========= and is 209 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (13,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6af0 is out of bounds | |
| ========= and is 225 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (14,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6b00 is out of bounds | |
| ========= and is 241 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (15,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6b10 is out of bounds | |
| ========= and is 257 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (16,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6b20 is out of bounds | |
| ========= and is 273 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (17,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6b30 is out of bounds | |
| ========= and is 289 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (18,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6b40 is out of bounds | |
| ========= and is 305 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (19,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6b50 is out of bounds | |
| ========= and is 321 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (20,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6b60 is out of bounds | |
| ========= and is 337 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (21,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6b70 is out of bounds | |
| ========= and is 353 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (22,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6b80 is out of bounds | |
| ========= and is 369 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (23,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6b90 is out of bounds | |
| ========= and is 385 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (24,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6ba0 is out of bounds | |
| ========= and is 401 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (25,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6bb0 is out of bounds | |
| ========= and is 417 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (26,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6bc0 is out of bounds | |
| ========= and is 433 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (27,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6bd0 is out of bounds | |
| ========= and is 449 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (28,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6be0 is out of bounds | |
| ========= and is 465 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (29,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6bf0 is out of bounds | |
| ========= and is 481 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (30,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6c00 is out of bounds | |
| ========= and is 497 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Invalid __global__ write of size 16 bytes | |
| ========= at main_kernel+0x280 | |
| ========= by thread (31,0,0) in block (0,0,0) | |
| ========= Address 0x560f5c3c6c10 is out of bounds | |
| ========= and is 513 bytes after the nearest allocation at 0x560f5c3c6910 of size 256 bytes | |
| ========= Saved host backtrace up to driver entry point at kernel launch time | |
| ========= Host Frame: [0x2f285f] in libcuda.so.1 | |
| ========= Host Frame: mgpuLaunchKernel [0x491d] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4db] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| ========= Program hit CUDA_ERROR_LAUNCH_FAILED (error 719) due to "unspecified launch failure" on CUDA API call to cuStreamSynchronize. | |
| ========= Saved host backtrace up to driver entry point at error | |
| ========= Host Frame: [0x2f19ac] in libcuda.so.1 | |
| ========= Host Frame: mgpuStreamSynchronize [0x4d14] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4ee] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| 'cuStreamSynchronize(stream)' failed with 'CUDA_ERROR_LAUNCH_FAILED' | |
| ========= Program hit CUDA_ERROR_LAUNCH_FAILED (error 719) due to "unspecified launch failure" on CUDA API call to cuStreamDestroy_v2. | |
| ========= Saved host backtrace up to driver entry point at error | |
| ========= Host Frame: [0x2ae66a] in libcuda.so.1 | |
| ========= Host Frame: mgpuStreamDestroy [0x4c74] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a4fd] | |
| ========= Host Frame: [0x297557a4a53c] | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602aab9] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| 'cuStreamDestroy(stream)' failed with 'CUDA_ERROR_LAUNCH_FAILED' | |
| Unranked Memref base@ = 0x560f5c3c6a20 rank = 2 offset = 0 sizes = [8, 8] strides = [8, 1] data = | |
| [[6.92718e-310, 6.92718e-310, 0, 0, 4.16842e+199, -nan, 2.42107e-32, 7.69844e+218], | |
| [-nan, 3.94812e+180, 4.80872e+151, -nan, 1.50753e-153, 1.1426e+243, -nan, 2.33186e+232], | |
| [2.31873e-46, -nan, 2.30186e+161, 1.14615e-259, -nan, 7.33953e+223, 4.6476e+151, -nan], | |
| [7.69844e+218, 2.04741e+161, -nan, 4.80872e+151, 9.30225e+242, 4.67504e-310, 6.92718e-310, 6.07108e-320], | |
| [-nan, 4.80872e+151, 9.30225e+242, -nan, 1.15674e+214, 2.04741e+161, -nan, 4.80872e+151], | |
| [9.30225e+242, -nan, 1.1426e+243, 2.58415e+161, -nan, 2.31873e-46, 1.61458e+209, -nan], | |
| [1.1471e-259, 1.65157e-86, -nan, 4.6476e+151, 3.34587e-33, -nan, 2.04741e+161, 3.94812e+180], | |
| [-nan, 9.30225e+242, 1.50753e-153, -nan, 2.58415e+161, 2.33186e+232, -nan, 1.65356e-86]] | |
| ========= Program hit CUDA_ERROR_LAUNCH_FAILED (error 719) due to "unspecified launch failure" on CUDA API call to cuModuleUnload. | |
| ========= Saved host backtrace up to driver entry point at error | |
| ========= Host Frame: [0x28bf26] in libcuda.so.1 | |
| ========= Host Frame: mgpuModuleUnload [0x4634] in libmlir_cuda_runtime.so | |
| ========= Host Frame: [0x297557a4a049] | |
| ========= Host Frame: [0x297557a4a57c] | |
| ========= Host Frame: (anonymous namespace)::GenericLLVMIRPlatformSupport::deinitialize(llvm::orc::JITDylib&) [0x61e3eaf] in mlir-runner | |
| ========= Host Frame: llvm::orc::LLJIT::deinitialize(llvm::orc::JITDylib&) [0x6039665] in mlir-runner | |
| ========= Host Frame: mlir::ExecutionEngine::~ExecutionEngine() [0x6035a49] in mlir-runner | |
| ========= Host Frame: std::default_delete<mlir::ExecutionEngine>::operator()(mlir::ExecutionEngine*) const [0x6032686] in mlir-runner | |
| ========= Host Frame: std::unique_ptr<mlir::ExecutionEngine, std::default_delete<mlir::ExecutionEngine> >::~unique_ptr() [0x6031b4e] in mlir-runner | |
| ========= Host Frame: compileAndExecute((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, void**, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602ab06] in mlir-runner | |
| ========= Host Frame: compileAndExecuteVoidFunction((anonymous namespace)::Options&, mlir::Operation*, llvm::StringRef, (anonymous namespace)::CompileAndExecuteConfig, std::unique_ptr<llvm::TargetMachine, std::default_delete<llvm::TargetMachine> >) [0x602a4de] in mlir-runner | |
| ========= Host Frame: mlir::JitRunnerMain(int, char**, mlir::DialectRegistry const&, mlir::JitRunnerConfig) [0x6028cd6] in mlir-runner | |
| ========= Host Frame: main [0x4718a32] in mlir-runner | |
| ========= | |
| 'cuModuleUnload(module)' failed with 'CUDA_ERROR_LAUNCH_FAILED' | |
| ========= ERROR SUMMARY: 35 errors |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| PTX for module: "main_kernel" | |
| // | |
| // Generated by LLVM NVPTX Back-End | |
| // | |
| .version 7.0 | |
| .target sm_80 | |
| .address_size 64 | |
| // .globl main_kernel | |
| .visible .entry main_kernel( | |
| .param .u64 .ptr .align 1 main_kernel_param_0, | |
| .param .u64 .ptr .align 1 main_kernel_param_1, | |
| .param .u64 main_kernel_param_2, | |
| .param .u64 main_kernel_param_3, | |
| .param .u64 main_kernel_param_4, | |
| .param .u64 main_kernel_param_5, | |
| .param .u64 main_kernel_param_6, | |
| .param .u64 main_kernel_param_7, | |
| .param .u64 .ptr .align 1 main_kernel_param_8, | |
| .param .u64 .ptr .align 1 main_kernel_param_9, | |
| .param .u64 main_kernel_param_10, | |
| .param .u64 main_kernel_param_11, | |
| .param .u64 main_kernel_param_12, | |
| .param .u64 main_kernel_param_13, | |
| .param .u64 main_kernel_param_14, | |
| .param .u64 .ptr .align 1 main_kernel_param_15, | |
| .param .u64 .ptr .align 1 main_kernel_param_16, | |
| .param .u64 main_kernel_param_17, | |
| .param .u64 main_kernel_param_18, | |
| .param .u64 main_kernel_param_19, | |
| .param .u64 main_kernel_param_20, | |
| .param .u64 main_kernel_param_21, | |
| .param .u64 .ptr .align 1 main_kernel_param_22, | |
| .param .u64 .ptr .align 1 main_kernel_param_23, | |
| .param .u64 main_kernel_param_24, | |
| .param .u64 main_kernel_param_25, | |
| .param .u64 main_kernel_param_26, | |
| .param .u64 main_kernel_param_27, | |
| .param .u64 main_kernel_param_28 | |
| ) | |
| .maxntid 32, 1, 1 | |
| { | |
| .reg .b32 %r<3>; | |
| .reg .b64 %rd<17>; | |
| ld.param.b64 %rd1, [main_kernel_param_1]; | |
| ld.param.b64 %rd2, [main_kernel_param_7]; | |
| mad.lo.s64 %rd3, %rd2, 40, %rd1; | |
| mov.b32 %r1, 4; | |
| wmma.load.a.sync.aligned.row.m8n8k4.f64 {%rd4}, [%rd3], %r1; | |
| mul.lo.s64 %rd5, %rd2, 72; | |
| ld.param.b64 %rd6, [main_kernel_param_9]; | |
| add.s64 %rd7, %rd6, %rd5; | |
| mov.b32 %r2, 8; | |
| wmma.load.b.sync.aligned.row.m8n8k4.f64 {%rd8}, [%rd7], %r2; | |
| ld.param.b64 %rd9, [main_kernel_param_16]; | |
| add.s64 %rd10, %rd9, %rd5; | |
| wmma.load.c.sync.aligned.row.m8n8k4.f64 {%rd11, %rd12}, [%rd10], %r2; | |
| wmma.mma.sync.aligned.row.row.m8n8k4.f64.f64.f64.f64 | |
| {%rd13, %rd14}, | |
| {%rd4}, | |
| {%rd8}, | |
| {%rd11, %rd12}; | |
| ld.param.b64 %rd15, [main_kernel_param_23]; | |
| add.s64 %rd16, %rd15, %rd5; | |
| wmma.store.d.sync.aligned.row.m8n8k4.f64 [%rd16],{%rd13, %rd14}, %r2; | |
| ret; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment