Last active
February 10, 2026 20:06
-
-
Save julian-klode/d2c983ae69c93af43568796d836b48ef to your computer and use it in GitHub Desktop.
perf_event_open() only works for group of up to 5 items on kernel 6.18, Ryzen 6850U; 6 seems to pass but is all 0s.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| execve("./a.out", ["./a.out"], 0x7ffe6b19fd00 /* 86 vars */) = 0 | |
| brk(NULL) = 0x5bc7cb51f000 | |
| mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x724d0aa3e000 | |
| access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) | |
| openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 | |
| fstat(3, {st_mode=S_IFREG|0644, st_size=121541, ...}) = 0 | |
| mmap(NULL, 121541, PROT_READ, MAP_PRIVATE, 3, 0) = 0x724d0aa20000 | |
| close(3) = 0 | |
| openat(AT_FDCWD, "/usr/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 | |
| read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0000\247\2\0\0\0\0\0"..., 832) = 832 | |
| pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 840, 64) = 840 | |
| fstat(3, {st_mode=S_IFREG|0755, st_size=2186672, ...}) = 0 | |
| pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 840, 64) = 840 | |
| mmap(NULL, 2227792, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x724d0a800000 | |
| mmap(0x724d0a828000, 1667072, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x28000) = 0x724d0a828000 | |
| mmap(0x724d0a9bf000, 319488, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1bf000) = 0x724d0a9bf000 | |
| mmap(0x724d0aa0d000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x20d000) = 0x724d0aa0d000 | |
| mmap(0x724d0aa13000, 52816, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x724d0aa13000 | |
| close(3) = 0 | |
| mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x724d0a7fd000 | |
| arch_prctl(ARCH_SET_FS, 0x724d0a7fd740) = 0 | |
| set_tid_address(0x724d0a7fda10) = 67344 | |
| set_robust_list(0x724d0a7fda20, 24) = 0 | |
| rseq(0x724d0a7fd680, 0x20, 0, 0x53053053) = 0 | |
| mprotect(0x724d0aa0d000, 16384, PROT_READ) = 0 | |
| mprotect(0x5bc7c17dd000, 4096, PROT_READ) = 0 | |
| mprotect(0x724d0aa82000, 8192, PROT_READ) = 0 | |
| prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0 | |
| getrandom("\xed\x6a\x77\xf1\x82\x3e\xc3\xd6", 8, GRND_NONBLOCK) = 8 | |
| munmap(0x724d0aa20000, 121541) = 0 | |
| perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_CPU_CYCLES, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, -1, 0) = 3 | |
| ioctl(3, PERF_EVENT_IOC_ID, [42545]) = 0 | |
| perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_INSTRUCTIONS, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, 3, 0) = 4 | |
| ioctl(4, PERF_EVENT_IOC_ID, [42546]) = 0 | |
| perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_CACHE_REFERENCES, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, 3, 0) = 5 | |
| ioctl(5, PERF_EVENT_IOC_ID, [42547]) = 0 | |
| perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_CACHE_MISSES, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, 3, 0) = 6 | |
| ioctl(6, PERF_EVENT_IOC_ID, [42548]) = 0 | |
| perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_BRANCH_INSTRUCTIONS, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, 3, 0) = 7 | |
| ioctl(7, PERF_EVENT_IOC_ID, [42549]) = 0 | |
| ioctl(3, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) = 0 | |
| ioctl(3, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) = 0 | |
| ioctl(3, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) = 0 | |
| read(3, "\5\0\0\0\0\0\0\0L\223M\216\0\0\0\0001\246\0\0\0\0\0\0\327*k\356\0\0\0\0"..., 88) = 88 | |
| fstat(1, {st_mode=S_IFCHR|0600, st_rdev=makedev(0x88, 0x6), ...}) = 0 | |
| brk(NULL) = 0x5bc7cb51f000 | |
| brk(0x5bc7cb540000) = 0x5bc7cb540000 | |
| write(1, "Num events captured: 5\n", 23) = 23 | |
| write(1, "CPU cycles: 2387448652\n", 23) = 23 | |
| write(1, "Instructions retired: 4000000727"..., 33) = 33 | |
| write(1, "cache refs 5638\n", 16) = 16 | |
| write(1, "cache msises: 258\n", 18) = 18 | |
| write(1, "branch refs: 1000000692\n", 24) = 24 | |
| close(3) = 0 | |
| close(4) = 0 | |
| close(5) = 0 | |
| close(6) = 0 | |
| close(7) = 0 | |
| exit_group(0) = ? | |
| +++ exited with 0 +++ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| execve("./a.out", ["./a.out"], 0x7ffeff680e80 /* 86 vars */) = 0 | |
| brk(NULL) = 0x5b8e10ee7000 | |
| mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7ca4b7bca000 | |
| access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) | |
| openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 | |
| fstat(3, {st_mode=S_IFREG|0644, st_size=121541, ...}) = 0 | |
| mmap(NULL, 121541, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7ca4b7bac000 | |
| close(3) = 0 | |
| openat(AT_FDCWD, "/usr/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 | |
| read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0000\247\2\0\0\0\0\0"..., 832) = 832 | |
| pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 840, 64) = 840 | |
| fstat(3, {st_mode=S_IFREG|0755, st_size=2186672, ...}) = 0 | |
| pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 840, 64) = 840 | |
| mmap(NULL, 2227792, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7ca4b7800000 | |
| mmap(0x7ca4b7828000, 1667072, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x28000) = 0x7ca4b7828000 | |
| mmap(0x7ca4b79bf000, 319488, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1bf000) = 0x7ca4b79bf000 | |
| mmap(0x7ca4b7a0d000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x20d000) = 0x7ca4b7a0d000 | |
| mmap(0x7ca4b7a13000, 52816, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7ca4b7a13000 | |
| close(3) = 0 | |
| mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7ca4b7ba9000 | |
| arch_prctl(ARCH_SET_FS, 0x7ca4b7ba9740) = 0 | |
| set_tid_address(0x7ca4b7ba9a10) = 67306 | |
| set_robust_list(0x7ca4b7ba9a20, 24) = 0 | |
| rseq(0x7ca4b7ba9680, 0x20, 0, 0x53053053) = 0 | |
| mprotect(0x7ca4b7a0d000, 16384, PROT_READ) = 0 | |
| mprotect(0x5b8deb51e000, 4096, PROT_READ) = 0 | |
| mprotect(0x7ca4b7c0e000, 8192, PROT_READ) = 0 | |
| prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0 | |
| getrandom("\x2f\x5a\xae\x5c\x12\x3f\xeb\xc7", 8, GRND_NONBLOCK) = 8 | |
| munmap(0x7ca4b7bac000, 121541) = 0 | |
| perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_CPU_CYCLES, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, -1, 0) = 3 | |
| ioctl(3, PERF_EVENT_IOC_ID, [42539]) = 0 | |
| perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_INSTRUCTIONS, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, 3, 0) = 4 | |
| ioctl(4, PERF_EVENT_IOC_ID, [42540]) = 0 | |
| perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_CACHE_REFERENCES, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, 3, 0) = 5 | |
| ioctl(5, PERF_EVENT_IOC_ID, [42541]) = 0 | |
| perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_CACHE_MISSES, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, 3, 0) = 6 | |
| ioctl(6, PERF_EVENT_IOC_ID, [42542]) = 0 | |
| perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_BRANCH_INSTRUCTIONS, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, 3, 0) = 7 | |
| ioctl(7, PERF_EVENT_IOC_ID, [42543]) = 0 | |
| perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_BRANCH_MISSES, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, 3, 0) = 8 | |
| ioctl(8, PERF_EVENT_IOC_ID, [42544]) = 0 | |
| ioctl(3, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) = 0 | |
| ioctl(3, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) = 0 | |
| ioctl(3, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) = 0 | |
| read(3, "\6\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\246\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 104) = 104 | |
| fstat(1, {st_mode=S_IFCHR|0600, st_rdev=makedev(0x88, 0x6), ...}) = 0 | |
| brk(NULL) = 0x5b8e10ee7000 | |
| brk(0x5b8e10f08000) = 0x5b8e10f08000 | |
| write(1, "Num events captured: 6\n", 23) = 23 | |
| write(1, "CPU cycles: 0\n", 14) = 14 | |
| write(1, "Instructions retired: 0\n", 24) = 24 | |
| write(1, "cache refs 0\n", 13) = 13 | |
| write(1, "cache msises: 0\n", 16) = 16 | |
| write(1, "branch refs: 0\n", 15) = 15 | |
| write(1, "branch misses: 0\n", 17) = 17 | |
| close(3) = 0 | |
| close(4) = 0 | |
| close(5) = 0 | |
| close(6) = 0 | |
| close(7) = 0 | |
| close(8) = 0 | |
| exit_group(0) = ? | |
| +++ exited with 0 +++ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Only 5 events can be registered. | |
| // Specifying 6 events makes the kernel report 0 for all of them (6.18, Ryzen 6850U). | |
| // No errors are reported | |
| #include <inttypes.h> | |
| #include <linux/perf_event.h> /* Definition of PERF_* constants */ | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <string.h> | |
| #include <sys/ioctl.h> | |
| #include <sys/syscall.h> /* Definition of SYS_* constants */ | |
| #include <unistd.h> | |
| #ifndef TOTAL_EVENTS | |
| #define TOTAL_EVENTS 5 | |
| #endif | |
| // The function to counting through (called in main) | |
| void code_to_measure() | |
| { | |
| int sum = 0; | |
| for (int i = 0; i < 1000000000; ++i) | |
| { | |
| sum += 1; | |
| } | |
| } | |
| // Executes perf_event_open syscall and makes sure it is successful or exit | |
| static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) | |
| { | |
| int fd; | |
| fd = syscall(SYS_perf_event_open, hw_event, pid, cpu, group_fd, flags); | |
| if (fd == -1) | |
| { | |
| fprintf(stderr, "Error creating event"); | |
| exit(EXIT_FAILURE); | |
| } | |
| return fd; | |
| } | |
| // Helper function to setup a perf event structure (perf_event_attr; see man perf_open_event) | |
| void configure_event(struct perf_event_attr *pe, uint32_t type, uint64_t config) | |
| { | |
| memset(pe, 0, sizeof(struct perf_event_attr)); | |
| pe->type = type; | |
| pe->size = sizeof(struct perf_event_attr); | |
| pe->config = config; | |
| pe->read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID; | |
| pe->disabled = 1; | |
| pe->exclude_kernel = 1; | |
| pe->exclude_hv = 1; | |
| } | |
| // Format of event data to read | |
| // Note: This format changes depending on perf_event_attr.read_format | |
| // See `man perf_event_open` to understand how this structure can be different depending on event config | |
| // This read_format structure corresponds to when PERF_FORMAT_GROUP & PERF_FORMAT_ID are set | |
| struct read_format | |
| { | |
| uint64_t nr; | |
| struct | |
| { | |
| uint64_t value; | |
| uint64_t id; | |
| } values[TOTAL_EVENTS]; | |
| }; | |
| int main() | |
| { | |
| int fd[TOTAL_EVENTS]; // fd[0] will be the group leader file descriptor | |
| uint64_t id[TOTAL_EVENTS]; // event ids for file descriptors | |
| uint64_t pe_val[TOTAL_EVENTS]; // Counter value array corresponding to fd/id array. | |
| struct perf_event_attr pe[TOTAL_EVENTS]; // Configuration structure for perf events (see man perf_event_open) | |
| struct read_format counter_results; | |
| // Configure the group of PMUs to count | |
| configure_event(&pe[0], PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); | |
| configure_event(&pe[1], PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); | |
| configure_event(&pe[2], PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); | |
| configure_event(&pe[3], PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); | |
| #if TOTAL_EVENTS >= 5 | |
| configure_event(&pe[4], PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); | |
| #endif | |
| #if TOTAL_EVENTS >= 6 | |
| configure_event(&pe[5], PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); | |
| #endif | |
| // Create event group leader | |
| fd[0] = perf_event_open(&pe[0], 0, -1, -1, 0); | |
| ioctl(fd[0], PERF_EVENT_IOC_ID, &id[0]); | |
| // Let's create the rest of the events while using fd[0] as the group leader | |
| for (size_t i = 1; i < TOTAL_EVENTS; i++) | |
| { | |
| fd[i] = perf_event_open(&pe[i], 0, -1, fd[0], 0); | |
| ioctl(fd[i], PERF_EVENT_IOC_ID, &id[i]); | |
| } | |
| // Reset counters and start counting; Since fd[0] is leader, this resets and enables all counters | |
| // PERF_IOC_FLAG_GROUP required for the ioctl to act on the group of file descriptors | |
| ioctl(fd[0], PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP); | |
| ioctl(fd[0], PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP); | |
| // Example code to count through | |
| code_to_measure(); | |
| // Stop all counters | |
| ioctl(fd[0], PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP); | |
| // Read the group of counters and print result | |
| read(fd[0], &counter_results, sizeof(struct read_format)); | |
| printf("Num events captured: %" PRIu64 "\n", counter_results.nr); | |
| for (size_t i = 0; i < counter_results.nr; i++) | |
| { | |
| for (size_t j = 0; j < TOTAL_EVENTS; j++) | |
| { | |
| if (counter_results.values[i].id == id[j]) | |
| { | |
| pe_val[i] = counter_results.values[i].value; | |
| } | |
| } | |
| } | |
| printf("CPU cycles: %" PRIu64 "\n", pe_val[0]); | |
| printf("Instructions retired: %" PRIu64 "\n", pe_val[1]); | |
| printf("cache refs %" PRIu64 "\n", pe_val[2]); | |
| printf("cache msises: %" PRIu64 "\n", pe_val[3]); | |
| #if TOTAL_EVENTS >= 5 | |
| printf("branch refs: %" PRIu64 "\n", pe_val[4]); | |
| #endif | |
| #if TOTAL_EVENTS >= 6 | |
| printf("branch misses: %" PRIu64 "\n", pe_val[5]); | |
| #endif | |
| // Close counter file descriptors | |
| for (int i = 0; i < TOTAL_EVENTS; i++) | |
| { | |
| close(fd[i]); | |
| } | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment