Created
December 8, 2025 01:55
-
-
Save nmoinvaz/b56489b6643156df798ea8f04d1ceefd to your computer and use it in GitHub Desktop.
ARM fast pmull detection
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* arm_cpu_id.c -- ARM CPU identification for microarchitecture detection | |
| * Copyright (C) 2025 Nathan Moinvaziri | |
| * For conditions of distribution and use, see copyright notice in zlib.h | |
| */ | |
| #include "zbuild.h" | |
| #include "arm_cpu_id.h" | |
| #if defined(__linux__) | |
| # include <stdio.h> | |
| # include <string.h> | |
| #elif defined(__APPLE__) | |
| # if !defined(_DARWIN_C_SOURCE) | |
| # define _DARWIN_C_SOURCE | |
| # endif | |
| # include <sys/sysctl.h> | |
| # include <string.h> | |
| #endif | |
| /* ARM CPU Implementer IDs */ | |
| #define ARM_IMPLEMENTER_ARM 0x41 | |
| #define ARM_IMPLEMENTER_APPLE 0x61 | |
| /* ARM CPU Part Numbers (ARM Limited - 0x41) */ | |
| /* Cortex-X series - Multiple PMULL lanes */ | |
| #define ARM_PART_CORTEX_X1 0xd44 | |
| #define ARM_PART_CORTEX_X1C 0xd4c | |
| #define ARM_PART_CORTEX_X2 0xd48 | |
| #define ARM_PART_CORTEX_X3 0xd4e | |
| #define ARM_PART_CORTEX_X4 0xd82 | |
| #define ARM_PART_CORTEX_X925 0xd85 | |
| /* Neoverse V/N2 series - Multiple PMULL lanes */ | |
| #define ARM_PART_NEOVERSE_N2 0xd49 | |
| #define ARM_PART_NEOVERSE_V1 0xd40 | |
| #define ARM_PART_NEOVERSE_V2 0xd4f | |
| #define ARM_PART_NEOVERSE_V3 0xd8e | |
| #if defined(__linux__) | |
| /* Read CPU implementer and part from /proc/cpuinfo */ | |
| static int read_cpuinfo_id(uint32_t *implementer, uint32_t *part) { | |
| FILE *f = fopen("/proc/cpuinfo", "r"); | |
| char line[256]; | |
| int found = 0; | |
| if (!f) | |
| return 0; | |
| while (fgets(line, sizeof(line), f)) { | |
| if (sscanf(line, "CPU implementer : 0x%x", implementer) == 1) | |
| found |= 1; | |
| else if (sscanf(line, "CPU part : 0x%x", part) == 1) | |
| found |= 2; | |
| if (found == 3) | |
| break; | |
| } | |
| fclose(f); | |
| return found == 3; | |
| } | |
| #endif | |
| #if defined(__APPLE__) | |
| /* Check if running on Apple Silicon */ | |
| static int is_apple_silicon(void) { | |
| char brand[64]; | |
| size_t size = sizeof(brand); | |
| if (sysctlbyname("machdep.cpu.brand_string", brand, &size, NULL, 0) == 0) | |
| return strncmp(brand, "Apple M", 7) == 0; | |
| return 0; | |
| } | |
| #endif | |
| /* Determine if CPU has fast PMULL (multiple execution units) */ | |
| Z_INTERNAL int arm_cpu_has_fast_pmull(void) { | |
| #if defined(__APPLE__) | |
| return is_apple_silicon(); | |
| #elif defined(__linux__) | |
| uint32_t implementer = 0, part = 0; | |
| if (!read_cpuinfo_id(&implementer, &part)) | |
| return 0; | |
| if (implementer == ARM_IMPLEMENTER_APPLE) | |
| return 1; | |
| if (implementer != ARM_IMPLEMENTER_ARM) | |
| return 0; | |
| /* Only Cortex-X and Neoverse V/N2 series have multi-lane PMULL */ | |
| switch (part) { | |
| case ARM_PART_CORTEX_X1: | |
| case ARM_PART_CORTEX_X1C: | |
| case ARM_PART_CORTEX_X2: | |
| case ARM_PART_CORTEX_X3: | |
| case ARM_PART_CORTEX_X4: | |
| case ARM_PART_CORTEX_X925: | |
| case ARM_PART_NEOVERSE_N2: | |
| case ARM_PART_NEOVERSE_V1: | |
| case ARM_PART_NEOVERSE_V2: | |
| case ARM_PART_NEOVERSE_V3: | |
| return 1; | |
| default: | |
| return 0; | |
| } | |
| #else | |
| return 0; | |
| #endif | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment