Skip to content

Instantly share code, notes, and snippets.

@SuperPauly
Created February 5, 2026 01:44
Show Gist options
  • Select an option

  • Save SuperPauly/6d66b96d422a0bebf7d5d82451939bc7 to your computer and use it in GitHub Desktop.

Select an option

Save SuperPauly/6d66b96d422a0bebf7d5d82451939bc7 to your computer and use it in GitHub Desktop.
CLI tool that scans common config/secret file formats (JSON, YAML/YML, INI, CONF, .env/.secret) for likely hardcoded credentials. It detects keyword-named assignments (supports =, :, dict["k"]=, .set("k", ...), and obj.k=), ignores UUID v1–v8 values by default, and uses normalised Shannon entropy plus simple heuristics (length/character-class ch…
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import math
import os
import re
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable, Iterator, Optional, Tuple
KEYWORDS = [
"key", "token", "pass", "password", "passwd", "pwd", "secret", "client_secret",
"api_key", "apikey", "access_key", "access_token", "refresh_token",
"private_key", "public_key", "bearer", "jwt", "oauth", "auth", "signature",
"session", "cookie", "salt", "pepper", "webhook", "dsn", "connection_string",
]
KEY_PHRASE_ALLOWLIST = {
"node_auth_token",
"npm_token",
"github_token",
"gitlab_token",
"auth_token",
"access_token",
"refresh_token",
"api_key",
"private_key",
"client_secret",
"webhook_secret",
}
KEY_DENYLIST = {
"author",
"authorship",
"authentication",
"authenticated",
"authorize",
"authorised",
"authorizer",
"authorisation",
"authorization",
}
DEFAULT_MAX_BYTES = 300 * 1024
DEFAULT_ONLY_EXTS = {"json", "yaml", "yml", "conf", "ini", "env", "secret"}
DEFAULT_ONLY_BASENAMES = {".env", ".secret", ".config"}
# --- Regex building blocks ---
_QUOTED_STR = r'(?P<qval>"(?:\\.|[^"\\])*"|\'(?:\\.|[^\'\\])*\'|`(?:\\.|[^`\\])*`)'
_BARE_VAL = r"(?P<bval>[^#;\r\n]+)"
_KEY_TOKEN = r'(?P<key>[A-Za-z_][A-Za-z0-9_.-]*|"(?:\\.|[^"\\])*"|\'(?:\\.|[^\'\\])*\')'
_RX_ASSIGN_QUOTED = re.compile(rf"{_KEY_TOKEN}\s*(?:=|:)\s*{_QUOTED_STR}")
_RX_ASSIGN_BARE = re.compile(rf"{_KEY_TOKEN}\s*(?:=|:)\s*{_BARE_VAL}")
_RX_BRACKET_ASSIGN = re.compile(rf"\[\s*{_KEY_TOKEN}\s*\]\s*=\s*{_QUOTED_STR}")
_RX_DOT_SET = re.compile(rf"\.\s*set\s*\(\s*{_KEY_TOKEN}\s*,\s*{_QUOTED_STR}")
_RX_PROP_ASSIGN = re.compile(rf"\.\s*(?P<prop>[A-Za-z_][A-Za-z0-9_]*)\s*=\s*{_QUOTED_STR}")
_UUID_V1_TO_V8_RE = re.compile(
r"(?i)^[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$"
)
_TOKEN_SPLIT_RE = re.compile(r"[_\-.]+")
_CAMEL_BOUNDARY_RE = re.compile(r"(?<=[a-z0-9])(?=[A-Z])")
@dataclass(frozen=True)
class Hit:
name: str
value: str
path: Path
line_no: int
entropy: float
class Ansi:
def __init__(self, enabled: bool) -> None:
self.enabled = enabled
def wrap(self, s: str, code: str) -> str:
return f"\x1b[{code}m{s}\x1b[0m" if self.enabled else s
def red(self, s: str) -> str:
return self.wrap(s, "31")
def green(self, s: str) -> str:
return self.wrap(s, "32")
def yellow(self, s: str) -> str:
return self.wrap(s, "33")
def orange(self, s: str) -> str:
# 256-colour orange-ish (works in most modern terminals)
return self.wrap(s, "38;5;208")
def is_uuid_v1_to_v8(s: str) -> bool:
return bool(_UUID_V1_TO_V8_RE.match(s.strip()))
def shannon_entropy_bits_per_char(s: str) -> float:
if not s:
return 0.0
counts: dict[str, int] = {}
for ch in s:
counts[ch] = counts.get(ch, 0) + 1
n = len(s)
h = 0.0
for c in counts.values():
p = c / n
h -= p * math.log2(p)
return h
def entropy_score(s: str) -> float:
if not s:
return 0.0
alphabet = len(set(s))
if alphabet <= 1:
return 0.0
return max(0.0, min(1.0, shannon_entropy_bits_per_char(s) / math.log2(alphabet)))
def _unquote_key(token: str) -> str:
t = token.strip()
if len(t) >= 2 and t[0] == t[-1] and t[0] in ("'", '"'):
return t[1:-1]
return t
def _parse_quoted_string(token: str) -> str:
t = token.strip()
if len(t) < 2 or t[0] != t[-1] or t[0] not in ('"', "'", "`"):
return ""
body = t[1:-1]
try:
return bytes(body, "utf-8").decode("unicode_escape")
except Exception:
return body
def _strip_inline_comment_env(s: str) -> str:
out = s
for ch in ("#", ";"):
m = re.search(rf"(?<!\\)\s\{ch}", out)
if m:
out = out[: m.start()].rstrip()
return out.strip()
def _extract_from_match(m: re.Match[str], allow_bare: bool) -> Optional[Tuple[str, str]]:
gd = m.groupdict()
if gd.get("prop"):
key = gd["prop"]
val = _parse_quoted_string(gd["qval"])
return (key, val) if val else None
key_tok = gd.get("key")
if not key_tok:
return None
key = _unquote_key(key_tok)
if gd.get("qval"):
val = _parse_quoted_string(gd["qval"])
return (key, val) if val else None
if allow_bare and gd.get("bval") is not None:
raw = _strip_inline_comment_env(gd["bval"].strip().rstrip(","))
if not raw:
return None
if len(raw) >= 2 and raw[0] == raw[-1] and raw[0] in ("'", '"'):
raw = raw[1:-1]
return (key, raw)
return None
def key_tokens(key: str) -> list[str]:
k = _CAMEL_BOUNDARY_RE.sub("_", key)
parts = _TOKEN_SPLIT_RE.split(k)
return [p.lower() for p in parts if p]
def key_matches_keywords(key: str) -> bool:
lk = key.strip().lower()
if lk in KEY_DENYLIST:
return False
if lk in KEY_PHRASE_ALLOWLIST:
return True
toks = key_tokens(key)
broad = {"auth", "key", "pass"}
for kw in KEYWORDS:
lkw = kw.lower()
if lkw in KEY_PHRASE_ALLOWLIST:
continue
kw_parts = key_tokens(lkw)
if len(kw_parts) > 1:
if all(part in toks for part in kw_parts):
return True
continue
if lkw in broad:
if lkw in toks:
return True
else:
if lkw in toks:
return True
return False
def looks_like_secret_value(value: str, min_len: int) -> bool:
v = value.strip()
if len(v) < min_len:
return False
if is_uuid_v1_to_v8(v):
return False
if re.fullmatch(r"[a-z_]{3,}", v, flags=re.I):
return False
has_alpha = any(c.isalpha() for c in v)
has_digit = any(c.isdigit() for c in v)
has_sym = any(not c.isalnum() for c in v)
if (has_alpha + has_digit + has_sym) < 2:
return False
return True
def should_scan_file(
path: Path,
only_exts: set[str],
ignore_exts: set[str],
only_basenames: set[str],
max_bytes: int,
) -> bool:
try:
st = path.stat()
except OSError:
return False
if st.st_size > max_bytes:
return False
name_lower = path.name.lower()
if path.suffix == "" and name_lower in only_basenames:
return True
ext = path.suffix.lower().lstrip(".")
if not ext:
return False
if ext in ignore_exts:
return False
return ext in only_exts
def iter_input_files(paths: Iterable[Path]) -> Iterator[Path]:
for p in paths:
if p.is_dir():
for root, _, files in os.walk(p):
for fn in files:
fp = Path(root) / fn
if fp.is_file():
yield fp
elif p.is_file():
yield p
def scan_file(path: Path, allow_bare: bool) -> Iterator[Tuple[int, str, str]]:
try:
with path.open("r", encoding="utf-8", errors="replace", newline="") as f:
for line_no, line in enumerate(f, start=1):
if not line.strip():
continue
for rx in (_RX_PROP_ASSIGN, _RX_DOT_SET, _RX_BRACKET_ASSIGN, _RX_ASSIGN_QUOTED):
for m in rx.finditer(line):
kv = _extract_from_match(m, allow_bare=False)
if kv:
yield line_no, kv[0], kv[1]
if allow_bare:
for m in _RX_ASSIGN_BARE.finditer(line):
kv = _extract_from_match(m, allow_bare=True)
if kv:
yield line_no, kv[0], kv[1]
except OSError:
return
def _parse_ext_list(s: Optional[str]) -> set[str]:
if not s:
return set()
out: set[str] = set()
for part in re.split(r"[,\s]+", s.strip()):
if part:
out.add(part.lower().lstrip("."))
return out
def _parse_name_list(s: Optional[str]) -> set[str]:
if not s:
return set()
out: set[str] = set()
for part in re.split(r"[,\s]+", s.strip()):
if part:
out.add(part.lower())
return out
def print_secret_block(ansi: Ansi, hit: Hit) -> None:
abs_path = os.path.abspath(str(hit.path))
absoluteFilePath = os.path.dirname(abs_path)
filename = os.path.basename(abs_path)
print("\n")
# print("—" * 45)
print(ansi.red("—" * 40 + "SECRET DISCOVERED!" + "—" * 40))
# print(ansi.red("—" * 40, "SECRET DISCOVERED!", "—" * 40))
print(f"PATH: {ansi.green(f'{absoluteFilePath}{os.sep}')}")
print(f"FILE: {ansi.orange(filename)} @ {ansi.yellow(f'line {hit.line_no}')}")
print("-" * 90)
print(f"KEY: {ansi.red(f'{hit.name}')}")
print(f"VAL: {ansi.red(f'{hit.value}')}")
print(ansi.red("—" * 100))
#print("—" * 45)
print("\n")
def main(argv: list[str]) -> int:
ap = argparse.ArgumentParser()
ap.add_argument("paths", nargs="+", help="1..N file paths and/or directories (recursively).")
ap.add_argument("-e", "--entropy", type=float, default=0.9, help="Entropy threshold (default: 0.9).")
ap.add_argument("--max-kb", type=int, default=300, help="Max file size in KB (default: 300).")
ap.add_argument("--min-len", type=int, default=16, help="Min value length to qualify as secret (default: 16).")
ap.add_argument("--only-ext", default=None, help="Extensions to scan (override defaults).")
ap.add_argument("--ignore-ext", default=None, help="Extensions to skip.")
ap.add_argument("--only-name", default=None, help="Basenames (no extension) to scan (override defaults).")
ap.add_argument("-a", "--all", action="store_true", help="Print both yellow and red. Default: red only.")
ap.add_argument("--no-ignore-uuid", action="store_true", help="Do not ignore UUID values.")
ap.add_argument("--no-colour", action="store_true", help="Disable ANSI colours.")
args = ap.parse_args(argv)
thr = float(args.entropy)
if not (0.0 <= thr <= 1.0):
raise SystemExit("Error: --entropy must be within [0, 1].")
max_bytes = int(args.max_kb) * 1024
only_exts = _parse_ext_list(args.only_ext) or set(DEFAULT_ONLY_EXTS)
ignore_exts = _parse_ext_list(args.ignore_ext)
only_basenames = _parse_name_list(args.only_name) or set(DEFAULT_ONLY_BASENAMES)
ansi = Ansi(enabled=(sys.stdout.isatty() and not args.no_colour))
any_found = False
inputs = [Path(p).expanduser().resolve() for p in args.paths]
for fp in iter_input_files(inputs):
if not should_scan_file(fp, only_exts, ignore_exts, only_basenames, max_bytes):
continue
allow_bare = (
fp.suffix.lower().lstrip(".") in {"env", "ini", "yaml", "yml", "conf", "toml", "secret"}
or fp.name.lower() in only_basenames
)
for line_no, key, value in scan_file(fp, allow_bare=allow_bare):
if not key_matches_keywords(key) or value == "":
continue
if not args.no_ignore_uuid and is_uuid_v1_to_v8(value):
continue
ent = entropy_score(value)
is_secret = ent >= thr and looks_like_secret_value(value, min_len=args.min_len)
if args.all:
print(ansi.yellow(f"Keyword detected & value of type str found —> {fp.name} @ {line_no}"))
if is_secret:
any_found = True
print_secret_block(ansi, Hit(name=key, value=value, path=fp, line_no=line_no, entropy=ent))
elif args.all:
any_found = True
return 1 if any_found else 0
if __name__ == "__main__":
raise SystemExit(main(sys.argv[1:]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment