Created
February 5, 2026 01:44
-
-
Save SuperPauly/6d66b96d422a0bebf7d5d82451939bc7 to your computer and use it in GitHub Desktop.
CLI tool that scans common config/secret file formats (JSON, YAML/YML, INI, CONF, .env/.secret) for likely hardcoded credentials. It detects keyword-named assignments (supports =, :, dict["k"]=, .set("k", ...), and obj.k=), ignores UUID v1–v8 values by default, and uses normalised Shannon entropy plus simple heuristics (length/character-class ch…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| from __future__ import annotations | |
| import argparse | |
| import math | |
| import os | |
| import re | |
| import sys | |
| from dataclasses import dataclass | |
| from pathlib import Path | |
| from typing import Iterable, Iterator, Optional, Tuple | |
| KEYWORDS = [ | |
| "key", "token", "pass", "password", "passwd", "pwd", "secret", "client_secret", | |
| "api_key", "apikey", "access_key", "access_token", "refresh_token", | |
| "private_key", "public_key", "bearer", "jwt", "oauth", "auth", "signature", | |
| "session", "cookie", "salt", "pepper", "webhook", "dsn", "connection_string", | |
| ] | |
| KEY_PHRASE_ALLOWLIST = { | |
| "node_auth_token", | |
| "npm_token", | |
| "github_token", | |
| "gitlab_token", | |
| "auth_token", | |
| "access_token", | |
| "refresh_token", | |
| "api_key", | |
| "private_key", | |
| "client_secret", | |
| "webhook_secret", | |
| } | |
| KEY_DENYLIST = { | |
| "author", | |
| "authorship", | |
| "authentication", | |
| "authenticated", | |
| "authorize", | |
| "authorised", | |
| "authorizer", | |
| "authorisation", | |
| "authorization", | |
| } | |
| DEFAULT_MAX_BYTES = 300 * 1024 | |
| DEFAULT_ONLY_EXTS = {"json", "yaml", "yml", "conf", "ini", "env", "secret"} | |
| DEFAULT_ONLY_BASENAMES = {".env", ".secret", ".config"} | |
| # --- Regex building blocks --- | |
| _QUOTED_STR = r'(?P<qval>"(?:\\.|[^"\\])*"|\'(?:\\.|[^\'\\])*\'|`(?:\\.|[^`\\])*`)' | |
| _BARE_VAL = r"(?P<bval>[^#;\r\n]+)" | |
| _KEY_TOKEN = r'(?P<key>[A-Za-z_][A-Za-z0-9_.-]*|"(?:\\.|[^"\\])*"|\'(?:\\.|[^\'\\])*\')' | |
| _RX_ASSIGN_QUOTED = re.compile(rf"{_KEY_TOKEN}\s*(?:=|:)\s*{_QUOTED_STR}") | |
| _RX_ASSIGN_BARE = re.compile(rf"{_KEY_TOKEN}\s*(?:=|:)\s*{_BARE_VAL}") | |
| _RX_BRACKET_ASSIGN = re.compile(rf"\[\s*{_KEY_TOKEN}\s*\]\s*=\s*{_QUOTED_STR}") | |
| _RX_DOT_SET = re.compile(rf"\.\s*set\s*\(\s*{_KEY_TOKEN}\s*,\s*{_QUOTED_STR}") | |
| _RX_PROP_ASSIGN = re.compile(rf"\.\s*(?P<prop>[A-Za-z_][A-Za-z0-9_]*)\s*=\s*{_QUOTED_STR}") | |
| _UUID_V1_TO_V8_RE = re.compile( | |
| r"(?i)^[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" | |
| ) | |
| _TOKEN_SPLIT_RE = re.compile(r"[_\-.]+") | |
| _CAMEL_BOUNDARY_RE = re.compile(r"(?<=[a-z0-9])(?=[A-Z])") | |
| @dataclass(frozen=True) | |
| class Hit: | |
| name: str | |
| value: str | |
| path: Path | |
| line_no: int | |
| entropy: float | |
| class Ansi: | |
| def __init__(self, enabled: bool) -> None: | |
| self.enabled = enabled | |
| def wrap(self, s: str, code: str) -> str: | |
| return f"\x1b[{code}m{s}\x1b[0m" if self.enabled else s | |
| def red(self, s: str) -> str: | |
| return self.wrap(s, "31") | |
| def green(self, s: str) -> str: | |
| return self.wrap(s, "32") | |
| def yellow(self, s: str) -> str: | |
| return self.wrap(s, "33") | |
| def orange(self, s: str) -> str: | |
| # 256-colour orange-ish (works in most modern terminals) | |
| return self.wrap(s, "38;5;208") | |
| def is_uuid_v1_to_v8(s: str) -> bool: | |
| return bool(_UUID_V1_TO_V8_RE.match(s.strip())) | |
| def shannon_entropy_bits_per_char(s: str) -> float: | |
| if not s: | |
| return 0.0 | |
| counts: dict[str, int] = {} | |
| for ch in s: | |
| counts[ch] = counts.get(ch, 0) + 1 | |
| n = len(s) | |
| h = 0.0 | |
| for c in counts.values(): | |
| p = c / n | |
| h -= p * math.log2(p) | |
| return h | |
| def entropy_score(s: str) -> float: | |
| if not s: | |
| return 0.0 | |
| alphabet = len(set(s)) | |
| if alphabet <= 1: | |
| return 0.0 | |
| return max(0.0, min(1.0, shannon_entropy_bits_per_char(s) / math.log2(alphabet))) | |
| def _unquote_key(token: str) -> str: | |
| t = token.strip() | |
| if len(t) >= 2 and t[0] == t[-1] and t[0] in ("'", '"'): | |
| return t[1:-1] | |
| return t | |
| def _parse_quoted_string(token: str) -> str: | |
| t = token.strip() | |
| if len(t) < 2 or t[0] != t[-1] or t[0] not in ('"', "'", "`"): | |
| return "" | |
| body = t[1:-1] | |
| try: | |
| return bytes(body, "utf-8").decode("unicode_escape") | |
| except Exception: | |
| return body | |
| def _strip_inline_comment_env(s: str) -> str: | |
| out = s | |
| for ch in ("#", ";"): | |
| m = re.search(rf"(?<!\\)\s\{ch}", out) | |
| if m: | |
| out = out[: m.start()].rstrip() | |
| return out.strip() | |
| def _extract_from_match(m: re.Match[str], allow_bare: bool) -> Optional[Tuple[str, str]]: | |
| gd = m.groupdict() | |
| if gd.get("prop"): | |
| key = gd["prop"] | |
| val = _parse_quoted_string(gd["qval"]) | |
| return (key, val) if val else None | |
| key_tok = gd.get("key") | |
| if not key_tok: | |
| return None | |
| key = _unquote_key(key_tok) | |
| if gd.get("qval"): | |
| val = _parse_quoted_string(gd["qval"]) | |
| return (key, val) if val else None | |
| if allow_bare and gd.get("bval") is not None: | |
| raw = _strip_inline_comment_env(gd["bval"].strip().rstrip(",")) | |
| if not raw: | |
| return None | |
| if len(raw) >= 2 and raw[0] == raw[-1] and raw[0] in ("'", '"'): | |
| raw = raw[1:-1] | |
| return (key, raw) | |
| return None | |
| def key_tokens(key: str) -> list[str]: | |
| k = _CAMEL_BOUNDARY_RE.sub("_", key) | |
| parts = _TOKEN_SPLIT_RE.split(k) | |
| return [p.lower() for p in parts if p] | |
| def key_matches_keywords(key: str) -> bool: | |
| lk = key.strip().lower() | |
| if lk in KEY_DENYLIST: | |
| return False | |
| if lk in KEY_PHRASE_ALLOWLIST: | |
| return True | |
| toks = key_tokens(key) | |
| broad = {"auth", "key", "pass"} | |
| for kw in KEYWORDS: | |
| lkw = kw.lower() | |
| if lkw in KEY_PHRASE_ALLOWLIST: | |
| continue | |
| kw_parts = key_tokens(lkw) | |
| if len(kw_parts) > 1: | |
| if all(part in toks for part in kw_parts): | |
| return True | |
| continue | |
| if lkw in broad: | |
| if lkw in toks: | |
| return True | |
| else: | |
| if lkw in toks: | |
| return True | |
| return False | |
| def looks_like_secret_value(value: str, min_len: int) -> bool: | |
| v = value.strip() | |
| if len(v) < min_len: | |
| return False | |
| if is_uuid_v1_to_v8(v): | |
| return False | |
| if re.fullmatch(r"[a-z_]{3,}", v, flags=re.I): | |
| return False | |
| has_alpha = any(c.isalpha() for c in v) | |
| has_digit = any(c.isdigit() for c in v) | |
| has_sym = any(not c.isalnum() for c in v) | |
| if (has_alpha + has_digit + has_sym) < 2: | |
| return False | |
| return True | |
| def should_scan_file( | |
| path: Path, | |
| only_exts: set[str], | |
| ignore_exts: set[str], | |
| only_basenames: set[str], | |
| max_bytes: int, | |
| ) -> bool: | |
| try: | |
| st = path.stat() | |
| except OSError: | |
| return False | |
| if st.st_size > max_bytes: | |
| return False | |
| name_lower = path.name.lower() | |
| if path.suffix == "" and name_lower in only_basenames: | |
| return True | |
| ext = path.suffix.lower().lstrip(".") | |
| if not ext: | |
| return False | |
| if ext in ignore_exts: | |
| return False | |
| return ext in only_exts | |
| def iter_input_files(paths: Iterable[Path]) -> Iterator[Path]: | |
| for p in paths: | |
| if p.is_dir(): | |
| for root, _, files in os.walk(p): | |
| for fn in files: | |
| fp = Path(root) / fn | |
| if fp.is_file(): | |
| yield fp | |
| elif p.is_file(): | |
| yield p | |
| def scan_file(path: Path, allow_bare: bool) -> Iterator[Tuple[int, str, str]]: | |
| try: | |
| with path.open("r", encoding="utf-8", errors="replace", newline="") as f: | |
| for line_no, line in enumerate(f, start=1): | |
| if not line.strip(): | |
| continue | |
| for rx in (_RX_PROP_ASSIGN, _RX_DOT_SET, _RX_BRACKET_ASSIGN, _RX_ASSIGN_QUOTED): | |
| for m in rx.finditer(line): | |
| kv = _extract_from_match(m, allow_bare=False) | |
| if kv: | |
| yield line_no, kv[0], kv[1] | |
| if allow_bare: | |
| for m in _RX_ASSIGN_BARE.finditer(line): | |
| kv = _extract_from_match(m, allow_bare=True) | |
| if kv: | |
| yield line_no, kv[0], kv[1] | |
| except OSError: | |
| return | |
| def _parse_ext_list(s: Optional[str]) -> set[str]: | |
| if not s: | |
| return set() | |
| out: set[str] = set() | |
| for part in re.split(r"[,\s]+", s.strip()): | |
| if part: | |
| out.add(part.lower().lstrip(".")) | |
| return out | |
| def _parse_name_list(s: Optional[str]) -> set[str]: | |
| if not s: | |
| return set() | |
| out: set[str] = set() | |
| for part in re.split(r"[,\s]+", s.strip()): | |
| if part: | |
| out.add(part.lower()) | |
| return out | |
| def print_secret_block(ansi: Ansi, hit: Hit) -> None: | |
| abs_path = os.path.abspath(str(hit.path)) | |
| absoluteFilePath = os.path.dirname(abs_path) | |
| filename = os.path.basename(abs_path) | |
| print("\n") | |
| # print("—" * 45) | |
| print(ansi.red("—" * 40 + "SECRET DISCOVERED!" + "—" * 40)) | |
| # print(ansi.red("—" * 40, "SECRET DISCOVERED!", "—" * 40)) | |
| print(f"PATH: {ansi.green(f'{absoluteFilePath}{os.sep}')}") | |
| print(f"FILE: {ansi.orange(filename)} @ {ansi.yellow(f'line {hit.line_no}')}") | |
| print("-" * 90) | |
| print(f"KEY: {ansi.red(f'{hit.name}')}") | |
| print(f"VAL: {ansi.red(f'{hit.value}')}") | |
| print(ansi.red("—" * 100)) | |
| #print("—" * 45) | |
| print("\n") | |
| def main(argv: list[str]) -> int: | |
| ap = argparse.ArgumentParser() | |
| ap.add_argument("paths", nargs="+", help="1..N file paths and/or directories (recursively).") | |
| ap.add_argument("-e", "--entropy", type=float, default=0.9, help="Entropy threshold (default: 0.9).") | |
| ap.add_argument("--max-kb", type=int, default=300, help="Max file size in KB (default: 300).") | |
| ap.add_argument("--min-len", type=int, default=16, help="Min value length to qualify as secret (default: 16).") | |
| ap.add_argument("--only-ext", default=None, help="Extensions to scan (override defaults).") | |
| ap.add_argument("--ignore-ext", default=None, help="Extensions to skip.") | |
| ap.add_argument("--only-name", default=None, help="Basenames (no extension) to scan (override defaults).") | |
| ap.add_argument("-a", "--all", action="store_true", help="Print both yellow and red. Default: red only.") | |
| ap.add_argument("--no-ignore-uuid", action="store_true", help="Do not ignore UUID values.") | |
| ap.add_argument("--no-colour", action="store_true", help="Disable ANSI colours.") | |
| args = ap.parse_args(argv) | |
| thr = float(args.entropy) | |
| if not (0.0 <= thr <= 1.0): | |
| raise SystemExit("Error: --entropy must be within [0, 1].") | |
| max_bytes = int(args.max_kb) * 1024 | |
| only_exts = _parse_ext_list(args.only_ext) or set(DEFAULT_ONLY_EXTS) | |
| ignore_exts = _parse_ext_list(args.ignore_ext) | |
| only_basenames = _parse_name_list(args.only_name) or set(DEFAULT_ONLY_BASENAMES) | |
| ansi = Ansi(enabled=(sys.stdout.isatty() and not args.no_colour)) | |
| any_found = False | |
| inputs = [Path(p).expanduser().resolve() for p in args.paths] | |
| for fp in iter_input_files(inputs): | |
| if not should_scan_file(fp, only_exts, ignore_exts, only_basenames, max_bytes): | |
| continue | |
| allow_bare = ( | |
| fp.suffix.lower().lstrip(".") in {"env", "ini", "yaml", "yml", "conf", "toml", "secret"} | |
| or fp.name.lower() in only_basenames | |
| ) | |
| for line_no, key, value in scan_file(fp, allow_bare=allow_bare): | |
| if not key_matches_keywords(key) or value == "": | |
| continue | |
| if not args.no_ignore_uuid and is_uuid_v1_to_v8(value): | |
| continue | |
| ent = entropy_score(value) | |
| is_secret = ent >= thr and looks_like_secret_value(value, min_len=args.min_len) | |
| if args.all: | |
| print(ansi.yellow(f"Keyword detected & value of type str found —> {fp.name} @ {line_no}")) | |
| if is_secret: | |
| any_found = True | |
| print_secret_block(ansi, Hit(name=key, value=value, path=fp, line_no=line_no, entropy=ent)) | |
| elif args.all: | |
| any_found = True | |
| return 1 if any_found else 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main(sys.argv[1:])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment