Skip to content

Instantly share code, notes, and snippets.

@julian-klode
Last active February 10, 2026 20:06
Show Gist options
  • Select an option

  • Save julian-klode/d2c983ae69c93af43568796d836b48ef to your computer and use it in GitHub Desktop.

Select an option

Save julian-klode/d2c983ae69c93af43568796d836b48ef to your computer and use it in GitHub Desktop.
perf_event_open() only works for group of up to 5 items on kernel 6.18, Ryzen 6850U; 6 seems to pass but is all 0s.
execve("./a.out", ["./a.out"], 0x7ffe6b19fd00 /* 86 vars */) = 0
brk(NULL) = 0x5bc7cb51f000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x724d0aa3e000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=121541, ...}) = 0
mmap(NULL, 121541, PROT_READ, MAP_PRIVATE, 3, 0) = 0x724d0aa20000
close(3) = 0
openat(AT_FDCWD, "/usr/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0000\247\2\0\0\0\0\0"..., 832) = 832
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 840, 64) = 840
fstat(3, {st_mode=S_IFREG|0755, st_size=2186672, ...}) = 0
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 840, 64) = 840
mmap(NULL, 2227792, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x724d0a800000
mmap(0x724d0a828000, 1667072, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x28000) = 0x724d0a828000
mmap(0x724d0a9bf000, 319488, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1bf000) = 0x724d0a9bf000
mmap(0x724d0aa0d000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x20d000) = 0x724d0aa0d000
mmap(0x724d0aa13000, 52816, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x724d0aa13000
close(3) = 0
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x724d0a7fd000
arch_prctl(ARCH_SET_FS, 0x724d0a7fd740) = 0
set_tid_address(0x724d0a7fda10) = 67344
set_robust_list(0x724d0a7fda20, 24) = 0
rseq(0x724d0a7fd680, 0x20, 0, 0x53053053) = 0
mprotect(0x724d0aa0d000, 16384, PROT_READ) = 0
mprotect(0x5bc7c17dd000, 4096, PROT_READ) = 0
mprotect(0x724d0aa82000, 8192, PROT_READ) = 0
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
getrandom("\xed\x6a\x77\xf1\x82\x3e\xc3\xd6", 8, GRND_NONBLOCK) = 8
munmap(0x724d0aa20000, 121541) = 0
perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_CPU_CYCLES, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, -1, 0) = 3
ioctl(3, PERF_EVENT_IOC_ID, [42545]) = 0
perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_INSTRUCTIONS, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, 3, 0) = 4
ioctl(4, PERF_EVENT_IOC_ID, [42546]) = 0
perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_CACHE_REFERENCES, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, 3, 0) = 5
ioctl(5, PERF_EVENT_IOC_ID, [42547]) = 0
perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_CACHE_MISSES, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, 3, 0) = 6
ioctl(6, PERF_EVENT_IOC_ID, [42548]) = 0
perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_BRANCH_INSTRUCTIONS, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, 3, 0) = 7
ioctl(7, PERF_EVENT_IOC_ID, [42549]) = 0
ioctl(3, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) = 0
ioctl(3, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) = 0
ioctl(3, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) = 0
read(3, "\5\0\0\0\0\0\0\0L\223M\216\0\0\0\0001\246\0\0\0\0\0\0\327*k\356\0\0\0\0"..., 88) = 88
fstat(1, {st_mode=S_IFCHR|0600, st_rdev=makedev(0x88, 0x6), ...}) = 0
brk(NULL) = 0x5bc7cb51f000
brk(0x5bc7cb540000) = 0x5bc7cb540000
write(1, "Num events captured: 5\n", 23) = 23
write(1, "CPU cycles: 2387448652\n", 23) = 23
write(1, "Instructions retired: 4000000727"..., 33) = 33
write(1, "cache refs 5638\n", 16) = 16
write(1, "cache msises: 258\n", 18) = 18
write(1, "branch refs: 1000000692\n", 24) = 24
close(3) = 0
close(4) = 0
close(5) = 0
close(6) = 0
close(7) = 0
exit_group(0) = ?
+++ exited with 0 +++
execve("./a.out", ["./a.out"], 0x7ffeff680e80 /* 86 vars */) = 0
brk(NULL) = 0x5b8e10ee7000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7ca4b7bca000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=121541, ...}) = 0
mmap(NULL, 121541, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7ca4b7bac000
close(3) = 0
openat(AT_FDCWD, "/usr/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0000\247\2\0\0\0\0\0"..., 832) = 832
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 840, 64) = 840
fstat(3, {st_mode=S_IFREG|0755, st_size=2186672, ...}) = 0
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 840, 64) = 840
mmap(NULL, 2227792, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7ca4b7800000
mmap(0x7ca4b7828000, 1667072, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x28000) = 0x7ca4b7828000
mmap(0x7ca4b79bf000, 319488, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1bf000) = 0x7ca4b79bf000
mmap(0x7ca4b7a0d000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x20d000) = 0x7ca4b7a0d000
mmap(0x7ca4b7a13000, 52816, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7ca4b7a13000
close(3) = 0
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7ca4b7ba9000
arch_prctl(ARCH_SET_FS, 0x7ca4b7ba9740) = 0
set_tid_address(0x7ca4b7ba9a10) = 67306
set_robust_list(0x7ca4b7ba9a20, 24) = 0
rseq(0x7ca4b7ba9680, 0x20, 0, 0x53053053) = 0
mprotect(0x7ca4b7a0d000, 16384, PROT_READ) = 0
mprotect(0x5b8deb51e000, 4096, PROT_READ) = 0
mprotect(0x7ca4b7c0e000, 8192, PROT_READ) = 0
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
getrandom("\x2f\x5a\xae\x5c\x12\x3f\xeb\xc7", 8, GRND_NONBLOCK) = 8
munmap(0x7ca4b7bac000, 121541) = 0
perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_CPU_CYCLES, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, -1, 0) = 3
ioctl(3, PERF_EVENT_IOC_ID, [42539]) = 0
perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_INSTRUCTIONS, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, 3, 0) = 4
ioctl(4, PERF_EVENT_IOC_ID, [42540]) = 0
perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_CACHE_REFERENCES, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, 3, 0) = 5
ioctl(5, PERF_EVENT_IOC_ID, [42541]) = 0
perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_CACHE_MISSES, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, 3, 0) = 6
ioctl(6, PERF_EVENT_IOC_ID, [42542]) = 0
perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_BRANCH_INSTRUCTIONS, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, 3, 0) = 7
ioctl(7, PERF_EVENT_IOC_ID, [42543]) = 0
perf_event_open({type=PERF_TYPE_HARDWARE, size=0x90 /* PERF_ATTR_SIZE_??? */, config=PERF_COUNT_HW_BRANCH_MISSES, sample_period=0, sample_type=0, read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP, disabled=1, exclude_kernel=1, exclude_hv=1, precise_ip=0 /* arbitrary skid */, ...}, 0, -1, 3, 0) = 8
ioctl(8, PERF_EVENT_IOC_ID, [42544]) = 0
ioctl(3, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) = 0
ioctl(3, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) = 0
ioctl(3, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) = 0
read(3, "\6\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\246\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 104) = 104
fstat(1, {st_mode=S_IFCHR|0600, st_rdev=makedev(0x88, 0x6), ...}) = 0
brk(NULL) = 0x5b8e10ee7000
brk(0x5b8e10f08000) = 0x5b8e10f08000
write(1, "Num events captured: 6\n", 23) = 23
write(1, "CPU cycles: 0\n", 14) = 14
write(1, "Instructions retired: 0\n", 24) = 24
write(1, "cache refs 0\n", 13) = 13
write(1, "cache msises: 0\n", 16) = 16
write(1, "branch refs: 0\n", 15) = 15
write(1, "branch misses: 0\n", 17) = 17
close(3) = 0
close(4) = 0
close(5) = 0
close(6) = 0
close(7) = 0
close(8) = 0
exit_group(0) = ?
+++ exited with 0 +++
// Only 5 events can be registered.
// Specifying 6 events makes the kernel report 0 for all of them (6.18, Ryzen 6850U).
// No errors are reported
#include <inttypes.h>
#include <linux/perf_event.h> /* Definition of PERF_* constants */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/syscall.h> /* Definition of SYS_* constants */
#include <unistd.h>
#ifndef TOTAL_EVENTS
#define TOTAL_EVENTS 5
#endif
// The function to counting through (called in main)
void code_to_measure()
{
int sum = 0;
for (int i = 0; i < 1000000000; ++i)
{
sum += 1;
}
}
// Executes perf_event_open syscall and makes sure it is successful or exit
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
{
int fd;
fd = syscall(SYS_perf_event_open, hw_event, pid, cpu, group_fd, flags);
if (fd == -1)
{
fprintf(stderr, "Error creating event");
exit(EXIT_FAILURE);
}
return fd;
}
// Helper function to setup a perf event structure (perf_event_attr; see man perf_open_event)
void configure_event(struct perf_event_attr *pe, uint32_t type, uint64_t config)
{
memset(pe, 0, sizeof(struct perf_event_attr));
pe->type = type;
pe->size = sizeof(struct perf_event_attr);
pe->config = config;
pe->read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
pe->disabled = 1;
pe->exclude_kernel = 1;
pe->exclude_hv = 1;
}
// Format of event data to read
// Note: This format changes depending on perf_event_attr.read_format
// See `man perf_event_open` to understand how this structure can be different depending on event config
// This read_format structure corresponds to when PERF_FORMAT_GROUP & PERF_FORMAT_ID are set
struct read_format
{
uint64_t nr;
struct
{
uint64_t value;
uint64_t id;
} values[TOTAL_EVENTS];
};
int main()
{
int fd[TOTAL_EVENTS]; // fd[0] will be the group leader file descriptor
uint64_t id[TOTAL_EVENTS]; // event ids for file descriptors
uint64_t pe_val[TOTAL_EVENTS]; // Counter value array corresponding to fd/id array.
struct perf_event_attr pe[TOTAL_EVENTS]; // Configuration structure for perf events (see man perf_event_open)
struct read_format counter_results;
// Configure the group of PMUs to count
configure_event(&pe[0], PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES);
configure_event(&pe[1], PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS);
configure_event(&pe[2], PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES);
configure_event(&pe[3], PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES);
#if TOTAL_EVENTS >= 5
configure_event(&pe[4], PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
#endif
#if TOTAL_EVENTS >= 6
configure_event(&pe[5], PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES);
#endif
// Create event group leader
fd[0] = perf_event_open(&pe[0], 0, -1, -1, 0);
ioctl(fd[0], PERF_EVENT_IOC_ID, &id[0]);
// Let's create the rest of the events while using fd[0] as the group leader
for (size_t i = 1; i < TOTAL_EVENTS; i++)
{
fd[i] = perf_event_open(&pe[i], 0, -1, fd[0], 0);
ioctl(fd[i], PERF_EVENT_IOC_ID, &id[i]);
}
// Reset counters and start counting; Since fd[0] is leader, this resets and enables all counters
// PERF_IOC_FLAG_GROUP required for the ioctl to act on the group of file descriptors
ioctl(fd[0], PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
ioctl(fd[0], PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
// Example code to count through
code_to_measure();
// Stop all counters
ioctl(fd[0], PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
// Read the group of counters and print result
read(fd[0], &counter_results, sizeof(struct read_format));
printf("Num events captured: %" PRIu64 "\n", counter_results.nr);
for (size_t i = 0; i < counter_results.nr; i++)
{
for (size_t j = 0; j < TOTAL_EVENTS; j++)
{
if (counter_results.values[i].id == id[j])
{
pe_val[i] = counter_results.values[i].value;
}
}
}
printf("CPU cycles: %" PRIu64 "\n", pe_val[0]);
printf("Instructions retired: %" PRIu64 "\n", pe_val[1]);
printf("cache refs %" PRIu64 "\n", pe_val[2]);
printf("cache msises: %" PRIu64 "\n", pe_val[3]);
#if TOTAL_EVENTS >= 5
printf("branch refs: %" PRIu64 "\n", pe_val[4]);
#endif
#if TOTAL_EVENTS >= 6
printf("branch misses: %" PRIu64 "\n", pe_val[5]);
#endif
// Close counter file descriptors
for (int i = 0; i < TOTAL_EVENTS; i++)
{
close(fd[i]);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment