diff --git a/plugins/security-guidance/hooks/gitutil.py b/plugins/security-guidance/hooks/gitutil.py index 5495d18f..0144ef08 100644 --- a/plugins/security-guidance/hooks/gitutil.py +++ b/plugins/security-guidance/hooks/gitutil.py @@ -32,12 +32,17 @@ GIT_CMD = [ def _git_rev_parse_head(cwd): """Return the current HEAD SHA, or None if not a git repo / no commits.""" try: + # See #2099: text=True on Windows cp1252 crashes the reader thread on + # any UTF-8 byte undefined in cp1252 (e.g. via a git error message + # referencing a non-ASCII filename in stderr). stdout is a SHA so it + # IS safe; stderr is not. capture_output=True with bytes-by-default + # never decodes, so the reader thread can't crash. result = subprocess.run( [*GIT_CMD, "rev-parse", "HEAD"], - cwd=cwd, capture_output=True, text=True, timeout=5 + cwd=cwd, capture_output=True, timeout=5 ) if result.returncode == 0 and result.stdout.strip(): - return result.stdout.strip() + return result.stdout.decode("utf-8", errors="replace").strip() return None except (subprocess.TimeoutExpired, FileNotFoundError, OSError): return None @@ -52,13 +57,17 @@ def _find_git_index(cwd): Returns the absolute path to the index file, or None. """ try: + # See #2099: stdout here is a PATH which can contain non-ASCII bytes + # (e.g. C:\אבטחה\repo\.git). text=True decodes via cp1252 strict on + # Windows → crashes the reader thread → returns stdout=None → + # caller does .strip() on None → AttributeError. Decode manually. result = subprocess.run( [*GIT_CMD, "rev-parse", "--git-dir"], - cwd=cwd, capture_output=True, text=True, timeout=5 + cwd=cwd, capture_output=True, timeout=5 ) if result.returncode != 0: return None - git_dir = result.stdout.strip() + git_dir = result.stdout.decode("utf-8", errors="replace").strip() if not os.path.isabs(git_dir): git_dir = os.path.join(cwd, git_dir) index_path = os.path.join(git_dir, "index") @@ -128,9 +137,13 @@ def _temp_index(cwd, untracked_paths=None): else: add_args = None if add_args: + # No stdout used here (only returncode matters), but text=True + # still spawns reader threads that decode stderr — git error + # messages can reference non-ASCII filenames and crash on + # cp1252. See #2099. Drop text=True so bytes stay raw. subprocess.run( [*GIT_CMD, "add", "--intent-to-add"] + add_args, - cwd=cwd, capture_output=True, text=True, timeout=10, + cwd=cwd, capture_output=True, timeout=10, env=env, ) yield env @@ -144,11 +157,17 @@ def _temp_index(cwd, untracked_paths=None): def _git_toplevel(cwd): """Absolute repo root for `cwd`, or None if not in a work tree.""" try: + # See #2099: stdout is a PATH — `C:\אבטחה\repo` returned as UTF-8 + # bytes by git. text=True would decode via cp1252 strict on Windows + # → reader-thread crash. Decode manually with errors="replace". r = subprocess.run( [*GIT_CMD, "rev-parse", "--show-toplevel"], - cwd=cwd, capture_output=True, text=True, timeout=5, + cwd=cwd, capture_output=True, timeout=5, ) - return r.stdout.strip() if r.returncode == 0 and r.stdout.strip() else None + if r.returncode != 0: + return None + path = r.stdout.decode("utf-8", errors="replace").strip() + return path if path else None except (subprocess.TimeoutExpired, FileNotFoundError, OSError): return None @@ -164,13 +183,15 @@ def _git_dir(repo_root): callers can degrade (push-sweep state is best-effort). """ try: + # See #2099: stdout is a PATH (shared gitdir), may be non-ASCII. + # Decode bytes manually to avoid cp1252 reader-thread crash. r = subprocess.run( [*GIT_CMD, "rev-parse", "--git-common-dir"], - cwd=repo_root, capture_output=True, text=True, timeout=5, + cwd=repo_root, capture_output=True, timeout=5, ) if r.returncode != 0: return None - d = r.stdout.strip() + d = r.stdout.decode("utf-8", errors="replace").strip() return d if os.path.isabs(d) else os.path.join(repo_root, d) except (subprocess.TimeoutExpired, FileNotFoundError, OSError): return None @@ -179,13 +200,15 @@ def _git_dir(repo_root): def _git_rev_list_range(repo_root, base, head="HEAD"): """Shas in `base..head`, oldest→newest. Empty list on error.""" try: + # See #2099: stdout is ASCII SHAs, but stderr can carry git error + # messages referencing non-ASCII filenames — keep bytes raw. r = subprocess.run( [*GIT_CMD, "rev-list", "--reverse", f"{base}..{head}"], - cwd=repo_root, capture_output=True, text=True, timeout=10, + cwd=repo_root, capture_output=True, timeout=10, ) if r.returncode != 0: return [] - return [s for s in r.stdout.strip().split("\n") if s] + return [s for s in r.stdout.decode("utf-8", errors="replace").strip().split("\n") if s] except (subprocess.TimeoutExpired, FileNotFoundError, OSError): return [] @@ -220,9 +243,11 @@ def _git_diff_range(repo_root, base, head="HEAD"): def _detect_main_branch(repo_root): for ref in ("origin/HEAD", "origin/main", "origin/master", "main", "master"): try: + # See #2099: stdout is a SHA but stderr can carry non-ASCII git + # warnings — keep bytes raw to avoid cp1252 reader-thread crash. r = subprocess.run( [*GIT_CMD, "rev-parse", "--verify", "-q", ref], - cwd=repo_root, capture_output=True, text=True, timeout=5, + cwd=repo_root, capture_output=True, timeout=5, ) if r.returncode == 0 and r.stdout.strip(): return ref @@ -410,9 +435,12 @@ def _is_ancestor(cwd, maybe_ancestor, descendant): """True if `maybe_ancestor` is reachable from `descendant` (i.e. HEAD moved forward via commit/merge, not sideways via checkout).""" try: + # See #2099: only returncode matters, but text=True spawns reader + # threads that decode stderr — git error messages can carry non-ASCII + # filenames. Drop text=True to keep bytes raw, avoid cp1252 crash. result = subprocess.run( [*GIT_CMD, "merge-base", "--is-ancestor", maybe_ancestor, descendant], - cwd=cwd, capture_output=True, text=True, timeout=5, + cwd=cwd, capture_output=True, timeout=5, ) return result.returncode == 0 except (subprocess.TimeoutExpired, FileNotFoundError, OSError): diff --git a/plugins/security-guidance/hooks/security_reminder_hook.py b/plugins/security-guidance/hooks/security_reminder_hook.py index 18747d29..275d3e37 100755 --- a/plugins/security-guidance/hooks/security_reminder_hook.py +++ b/plugins/security-guidance/hooks/security_reminder_hook.py @@ -549,7 +549,11 @@ def handle_user_prompt_submit(input_data): elif sha: debug_log(f"Captured git baseline: {sha[:12]}") else: - debug_log("Failed to capture git baseline (not a git repo?)") + # Show cwd so the next reporter can immediately see when this isn't + # actually "not a git repo" but a path-encoding / permissions / git + # invocation failure. See #2099. + debug_log(f"Failed to capture git baseline (cwd={cwd!r}) — not a git repo, " + f"or git invocation failed (check log entries above)") sys.exit(0) @@ -856,23 +860,30 @@ def _detect_prev_upstream(repo_root, bash_output): # @{u}@{1} — only meaningful if an upstream is configured. for ref in ("@{u}@{1}", "@{push}@{1}"): try: + # See #2099: stdout is a SHA but stderr can carry non-ASCII git + # warnings — keep bytes raw to avoid cp1252 reader-thread crash. r = subprocess.run( [*GIT_CMD, "rev-parse", "--verify", "-q", ref], - cwd=repo_root, capture_output=True, text=True, timeout=5, + cwd=repo_root, capture_output=True, timeout=5, ) - if r.returncode == 0 and r.stdout.strip(): - return r.stdout.strip() + sha = r.stdout.decode("utf-8", errors="replace").strip() + if r.returncode == 0 and sha: + return sha except (subprocess.TimeoutExpired, FileNotFoundError, OSError): pass main = _detect_main_branch(repo_root) if main: try: + # See #2099: drop text=True; decode bytes manually so a + # cp1252-undefined byte in git's stderr doesn't crash the + # reader thread. r = subprocess.run( [*GIT_CMD, "merge-base", "HEAD", main], - cwd=repo_root, capture_output=True, text=True, timeout=5, + cwd=repo_root, capture_output=True, timeout=5, ) - if r.returncode == 0 and r.stdout.strip(): - return r.stdout.strip() + sha = r.stdout.decode("utf-8", errors="replace").strip() + if r.returncode == 0 and sha: + return sha except (subprocess.TimeoutExpired, FileNotFoundError, OSError): pass return None @@ -1324,12 +1335,13 @@ def handle_commit_review_posttooluse(input_data): try: full_shas = [] for s in shas: + # See #2099: drop text=True; decode manually for cp1252 safety. r = subprocess.run( [*GIT_CMD, "rev-parse", "--verify", "-q", s], - cwd=repo_root, capture_output=True, text=True, timeout=5, + cwd=repo_root, capture_output=True, timeout=5, ) if r.returncode == 0: - full_shas.append(r.stdout.strip()) + full_shas.append(r.stdout.decode("utf-8", errors="replace").strip()) _append_reviewed_shas(repo_root, full_shas, vulns_found=len(vulns or [])) except Exception: pass @@ -1531,9 +1543,10 @@ def handle_push_sweep_posttooluse(input_data): # both. head = None try: + # See #2099: drop text=True; decode manually for cp1252 safety. r = subprocess.run([*GIT_CMD, "rev-parse", "HEAD"], cwd=repo_root, - capture_output=True, text=True, timeout=5) - head = r.stdout.strip() if r.returncode == 0 else None + capture_output=True, timeout=5) + head = r.stdout.decode("utf-8", errors="replace").strip() if r.returncode == 0 else None except (subprocess.TimeoutExpired, FileNotFoundError, OSError): pass push_section = _push_section(bash_output or "") @@ -1563,14 +1576,15 @@ def handle_push_sweep_posttooluse(input_data): quiet_success = False if not (bash_output or "").strip() and not interrupted: try: + # See #2099: drop text=True; decode manually for cp1252 safety. r_cur = subprocess.run( [*GIT_CMD, "rev-parse", "--verify", "-q", "@{u}"], - cwd=repo_root, capture_output=True, text=True, timeout=5) + cwd=repo_root, capture_output=True, timeout=5) r_prev = subprocess.run( [*GIT_CMD, "rev-parse", "--verify", "-q", "@{u}@{1}"], - cwd=repo_root, capture_output=True, text=True, timeout=5) - cur = r_cur.stdout.strip() if r_cur.returncode == 0 else "" - prev_u = r_prev.stdout.strip() if r_prev.returncode == 0 else "" + cwd=repo_root, capture_output=True, timeout=5) + cur = r_cur.stdout.decode("utf-8", errors="replace").strip() if r_cur.returncode == 0 else "" + prev_u = r_prev.stdout.decode("utf-8", errors="replace").strip() if r_prev.returncode == 0 else "" quiet_success = bool(cur and prev_u and cur == head and prev_u != cur) except (subprocess.TimeoutExpired, FileNotFoundError, OSError): pass @@ -1584,11 +1598,12 @@ def handle_push_sweep_posttooluse(input_data): # reviewed-shas state. for local_ref in new_branch_matches: try: + # See #2099: drop text=True; decode manually for cp1252 safety. r = subprocess.run( [*GIT_CMD, "rev-parse", "--verify", "-q", local_ref], - cwd=repo_root, capture_output=True, text=True, timeout=5, + cwd=repo_root, capture_output=True, timeout=5, ) - local_sha = r.stdout.strip() if r.returncode == 0 else "" + local_sha = r.stdout.decode("utf-8", errors="replace").strip() if r.returncode == 0 else "" except (subprocess.TimeoutExpired, FileNotFoundError, OSError): local_sha = "" if local_sha and local_sha != head: diff --git a/plugins/security-guidance/hooks/sg-python.sh b/plugins/security-guidance/hooks/sg-python.sh index 67119105..9cf8cc07 100755 --- a/plugins/security-guidance/hooks/sg-python.sh +++ b/plugins/security-guidance/hooks/sg-python.sh @@ -22,6 +22,17 @@ # "${CLAUDE_PLUGIN_ROOT}/hooks/security_reminder_hook.py" set -e +# Force UTF-8 for ALL Python filesystem + IO operations (PEP 540). +# Without this, Windows Python defaults `locale.getpreferredencoding()` to +# cp1252 — which makes `text=True` in subprocess.run / open() / json.load +# crash the internal reader thread on any byte that's undefined in cp1252 +# (e.g. the 0x81 byte from ف, present in any path/filename with +# Arabic/Hebrew/CJK characters). See #2056, #2099. +# +# No-op on macOS/Linux (already UTF-8). Must be set BEFORE Python starts — +# changing it from inside the interpreter has no effect. +export PYTHONUTF8=1 + # Git Bash / MSYS on Windows hands script paths to this shim in POSIX form # (`/c/Users/...`). When we exec a Windows `python.exe` (which we do on # Windows since `python3` is the Microsoft Store stub), python interprets the