Created
December 24, 2025 14:29
-
-
Save farazsth98/9baa7a85cdf5764efba8f7635e3b60cc to your computer and use it in GitHub Desktop.
Profile how much CPU time a system call or any amount of code consumes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #define _GNU_SOURCE | |
| #include <stdio.h> | |
| #include <stdint.h> | |
| #include <time.h> | |
| #include <sched.h> | |
| #include <err.h> | |
| #include <sys/types.h> | |
| #include <sys/syscall.h> | |
| #include <pthread.h> | |
| #include <unistd.h> | |
| #include <stdlib.h> | |
| #define SYSCHK(x) ({ \ | |
| typeof(x) __res = (x); \ | |
| if (__res == (typeof(x))-1) \ | |
| err(1, "SYSCHK(" #x ")"); \ | |
| __res; \ | |
| }) | |
| void pin_on_cpu(int i) { | |
| cpu_set_t mask; | |
| CPU_ZERO(&mask); | |
| CPU_SET(i, &mask); | |
| sched_setaffinity(0, sizeof(mask), &mask); | |
| } | |
| static inline long long ts_to_ns(const struct timespec *ts) { | |
| return (long long)ts->tv_sec * 1000000000LL + (long long)ts->tv_nsec; | |
| } | |
| #define NUM_SAMPLES 100000 | |
| static long int clock_gettime_avg = 0; | |
| // Can overflow if `NUM_SAMPLES` is too high, but with simple syscalls, | |
| // this works just fine | |
| long int getpid_avg_cputime_used() { | |
| struct timespec *ts = malloc(NUM_SAMPLES * sizeof(struct timespec)); | |
| if (clock_gettime_avg == 0) { | |
| for (int i = 0; i < NUM_SAMPLES; i++) { | |
| syscall(__NR_clock_gettime, CLOCK_THREAD_CPUTIME_ID, &ts[i]); | |
| } | |
| long int total_nsec = 0; | |
| for (int i = 0; i < NUM_SAMPLES-1; i++) { | |
| long int time_taken = (long int)(ts_to_ns(&ts[i + 1]) - ts_to_ns(&ts[i])); | |
| total_nsec += time_taken; | |
| } | |
| clock_gettime_avg = total_nsec / (NUM_SAMPLES-1); | |
| } | |
| for (int i = 0; i < NUM_SAMPLES; i++) { | |
| syscall(__NR_clock_gettime, CLOCK_THREAD_CPUTIME_ID, &ts[i]); | |
| // Do whatever you're measuring here | |
| syscall(__NR_getpid); | |
| } | |
| long int total_nsec = 0; | |
| for (int i = 0; i < NUM_SAMPLES-1; i++) { | |
| long int time_taken = (long int)(ts_to_ns(&ts[i + 1]) - ts_to_ns(&ts[i])) - clock_gettime_avg; | |
| total_nsec += time_taken; | |
| } | |
| free(ts); | |
| return total_nsec / (NUM_SAMPLES-1); | |
| } | |
| int main(int argc, char *argv[]) { | |
| pin_on_cpu(0); // VERY important to pin to one CPU | |
| long int timer_settime_avg = getpid_avg_cputime_used(); | |
| printf("clock_gettime avg: %ld ns\n", clock_gettime_avg); | |
| printf("getpid avg: %ld ns\n", timer_settime_avg); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment