mirror of
https://github.com/anthropics/claude-plugins-official.git
synced 2026-06-13 22:26:03 -03:00
Merge pull request #2100 from anthropics/fix-2099-text-true-pythonutf8
security-guidance: purge text=True from subprocess.run + bake PYTHONUTF8=1 (#2099)
This commit is contained in:
commit
c7a3e2ffa0
@ -32,12 +32,17 @@ GIT_CMD = [
|
||||
def _git_rev_parse_head(cwd):
|
||||
"""Return the current HEAD SHA, or None if not a git repo / no commits."""
|
||||
try:
|
||||
# See #2099: text=True on Windows cp1252 crashes the reader thread on
|
||||
# any UTF-8 byte undefined in cp1252 (e.g. via a git error message
|
||||
# referencing a non-ASCII filename in stderr). stdout is a SHA so it
|
||||
# IS safe; stderr is not. capture_output=True with bytes-by-default
|
||||
# never decodes, so the reader thread can't crash.
|
||||
result = subprocess.run(
|
||||
[*GIT_CMD, "rev-parse", "HEAD"],
|
||||
cwd=cwd, capture_output=True, text=True, timeout=5
|
||||
cwd=cwd, capture_output=True, timeout=5
|
||||
)
|
||||
if result.returncode == 0 and result.stdout.strip():
|
||||
return result.stdout.strip()
|
||||
return result.stdout.decode("utf-8", errors="replace").strip()
|
||||
return None
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
return None
|
||||
@ -52,13 +57,17 @@ def _find_git_index(cwd):
|
||||
Returns the absolute path to the index file, or None.
|
||||
"""
|
||||
try:
|
||||
# See #2099: stdout here is a PATH which can contain non-ASCII bytes
|
||||
# (e.g. C:\אבטחה\repo\.git). text=True decodes via cp1252 strict on
|
||||
# Windows → crashes the reader thread → returns stdout=None →
|
||||
# caller does .strip() on None → AttributeError. Decode manually.
|
||||
result = subprocess.run(
|
||||
[*GIT_CMD, "rev-parse", "--git-dir"],
|
||||
cwd=cwd, capture_output=True, text=True, timeout=5
|
||||
cwd=cwd, capture_output=True, timeout=5
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
git_dir = result.stdout.strip()
|
||||
git_dir = result.stdout.decode("utf-8", errors="replace").strip()
|
||||
if not os.path.isabs(git_dir):
|
||||
git_dir = os.path.join(cwd, git_dir)
|
||||
index_path = os.path.join(git_dir, "index")
|
||||
@ -128,9 +137,13 @@ def _temp_index(cwd, untracked_paths=None):
|
||||
else:
|
||||
add_args = None
|
||||
if add_args:
|
||||
# No stdout used here (only returncode matters), but text=True
|
||||
# still spawns reader threads that decode stderr — git error
|
||||
# messages can reference non-ASCII filenames and crash on
|
||||
# cp1252. See #2099. Drop text=True so bytes stay raw.
|
||||
subprocess.run(
|
||||
[*GIT_CMD, "add", "--intent-to-add"] + add_args,
|
||||
cwd=cwd, capture_output=True, text=True, timeout=10,
|
||||
cwd=cwd, capture_output=True, timeout=10,
|
||||
env=env,
|
||||
)
|
||||
yield env
|
||||
@ -144,11 +157,17 @@ def _temp_index(cwd, untracked_paths=None):
|
||||
def _git_toplevel(cwd):
|
||||
"""Absolute repo root for `cwd`, or None if not in a work tree."""
|
||||
try:
|
||||
# See #2099: stdout is a PATH — `C:\אבטחה\repo` returned as UTF-8
|
||||
# bytes by git. text=True would decode via cp1252 strict on Windows
|
||||
# → reader-thread crash. Decode manually with errors="replace".
|
||||
r = subprocess.run(
|
||||
[*GIT_CMD, "rev-parse", "--show-toplevel"],
|
||||
cwd=cwd, capture_output=True, text=True, timeout=5,
|
||||
cwd=cwd, capture_output=True, timeout=5,
|
||||
)
|
||||
return r.stdout.strip() if r.returncode == 0 and r.stdout.strip() else None
|
||||
if r.returncode != 0:
|
||||
return None
|
||||
path = r.stdout.decode("utf-8", errors="replace").strip()
|
||||
return path if path else None
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
return None
|
||||
|
||||
@ -164,13 +183,15 @@ def _git_dir(repo_root):
|
||||
callers can degrade (push-sweep state is best-effort).
|
||||
"""
|
||||
try:
|
||||
# See #2099: stdout is a PATH (shared gitdir), may be non-ASCII.
|
||||
# Decode bytes manually to avoid cp1252 reader-thread crash.
|
||||
r = subprocess.run(
|
||||
[*GIT_CMD, "rev-parse", "--git-common-dir"],
|
||||
cwd=repo_root, capture_output=True, text=True, timeout=5,
|
||||
cwd=repo_root, capture_output=True, timeout=5,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
return None
|
||||
d = r.stdout.strip()
|
||||
d = r.stdout.decode("utf-8", errors="replace").strip()
|
||||
return d if os.path.isabs(d) else os.path.join(repo_root, d)
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
return None
|
||||
@ -179,13 +200,15 @@ def _git_dir(repo_root):
|
||||
def _git_rev_list_range(repo_root, base, head="HEAD"):
|
||||
"""Shas in `base..head`, oldest→newest. Empty list on error."""
|
||||
try:
|
||||
# See #2099: stdout is ASCII SHAs, but stderr can carry git error
|
||||
# messages referencing non-ASCII filenames — keep bytes raw.
|
||||
r = subprocess.run(
|
||||
[*GIT_CMD, "rev-list", "--reverse", f"{base}..{head}"],
|
||||
cwd=repo_root, capture_output=True, text=True, timeout=10,
|
||||
cwd=repo_root, capture_output=True, timeout=10,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
return []
|
||||
return [s for s in r.stdout.strip().split("\n") if s]
|
||||
return [s for s in r.stdout.decode("utf-8", errors="replace").strip().split("\n") if s]
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
return []
|
||||
|
||||
@ -220,9 +243,11 @@ def _git_diff_range(repo_root, base, head="HEAD"):
|
||||
def _detect_main_branch(repo_root):
|
||||
for ref in ("origin/HEAD", "origin/main", "origin/master", "main", "master"):
|
||||
try:
|
||||
# See #2099: stdout is a SHA but stderr can carry non-ASCII git
|
||||
# warnings — keep bytes raw to avoid cp1252 reader-thread crash.
|
||||
r = subprocess.run(
|
||||
[*GIT_CMD, "rev-parse", "--verify", "-q", ref],
|
||||
cwd=repo_root, capture_output=True, text=True, timeout=5,
|
||||
cwd=repo_root, capture_output=True, timeout=5,
|
||||
)
|
||||
if r.returncode == 0 and r.stdout.strip():
|
||||
return ref
|
||||
@ -410,9 +435,12 @@ def _is_ancestor(cwd, maybe_ancestor, descendant):
|
||||
"""True if `maybe_ancestor` is reachable from `descendant` (i.e. HEAD
|
||||
moved forward via commit/merge, not sideways via checkout)."""
|
||||
try:
|
||||
# See #2099: only returncode matters, but text=True spawns reader
|
||||
# threads that decode stderr — git error messages can carry non-ASCII
|
||||
# filenames. Drop text=True to keep bytes raw, avoid cp1252 crash.
|
||||
result = subprocess.run(
|
||||
[*GIT_CMD, "merge-base", "--is-ancestor", maybe_ancestor, descendant],
|
||||
cwd=cwd, capture_output=True, text=True, timeout=5,
|
||||
cwd=cwd, capture_output=True, timeout=5,
|
||||
)
|
||||
return result.returncode == 0
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
|
||||
@ -549,7 +549,11 @@ def handle_user_prompt_submit(input_data):
|
||||
elif sha:
|
||||
debug_log(f"Captured git baseline: {sha[:12]}")
|
||||
else:
|
||||
debug_log("Failed to capture git baseline (not a git repo?)")
|
||||
# Show cwd so the next reporter can immediately see when this isn't
|
||||
# actually "not a git repo" but a path-encoding / permissions / git
|
||||
# invocation failure. See #2099.
|
||||
debug_log(f"Failed to capture git baseline (cwd={cwd!r}) — not a git repo, "
|
||||
f"or git invocation failed (check log entries above)")
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
@ -856,23 +860,30 @@ def _detect_prev_upstream(repo_root, bash_output):
|
||||
# @{u}@{1} — only meaningful if an upstream is configured.
|
||||
for ref in ("@{u}@{1}", "@{push}@{1}"):
|
||||
try:
|
||||
# See #2099: stdout is a SHA but stderr can carry non-ASCII git
|
||||
# warnings — keep bytes raw to avoid cp1252 reader-thread crash.
|
||||
r = subprocess.run(
|
||||
[*GIT_CMD, "rev-parse", "--verify", "-q", ref],
|
||||
cwd=repo_root, capture_output=True, text=True, timeout=5,
|
||||
cwd=repo_root, capture_output=True, timeout=5,
|
||||
)
|
||||
if r.returncode == 0 and r.stdout.strip():
|
||||
return r.stdout.strip()
|
||||
sha = r.stdout.decode("utf-8", errors="replace").strip()
|
||||
if r.returncode == 0 and sha:
|
||||
return sha
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
pass
|
||||
main = _detect_main_branch(repo_root)
|
||||
if main:
|
||||
try:
|
||||
# See #2099: drop text=True; decode bytes manually so a
|
||||
# cp1252-undefined byte in git's stderr doesn't crash the
|
||||
# reader thread.
|
||||
r = subprocess.run(
|
||||
[*GIT_CMD, "merge-base", "HEAD", main],
|
||||
cwd=repo_root, capture_output=True, text=True, timeout=5,
|
||||
cwd=repo_root, capture_output=True, timeout=5,
|
||||
)
|
||||
if r.returncode == 0 and r.stdout.strip():
|
||||
return r.stdout.strip()
|
||||
sha = r.stdout.decode("utf-8", errors="replace").strip()
|
||||
if r.returncode == 0 and sha:
|
||||
return sha
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
pass
|
||||
return None
|
||||
@ -1324,12 +1335,13 @@ def handle_commit_review_posttooluse(input_data):
|
||||
try:
|
||||
full_shas = []
|
||||
for s in shas:
|
||||
# See #2099: drop text=True; decode manually for cp1252 safety.
|
||||
r = subprocess.run(
|
||||
[*GIT_CMD, "rev-parse", "--verify", "-q", s],
|
||||
cwd=repo_root, capture_output=True, text=True, timeout=5,
|
||||
cwd=repo_root, capture_output=True, timeout=5,
|
||||
)
|
||||
if r.returncode == 0:
|
||||
full_shas.append(r.stdout.strip())
|
||||
full_shas.append(r.stdout.decode("utf-8", errors="replace").strip())
|
||||
_append_reviewed_shas(repo_root, full_shas, vulns_found=len(vulns or []))
|
||||
except Exception:
|
||||
pass
|
||||
@ -1531,9 +1543,10 @@ def handle_push_sweep_posttooluse(input_data):
|
||||
# both.
|
||||
head = None
|
||||
try:
|
||||
# See #2099: drop text=True; decode manually for cp1252 safety.
|
||||
r = subprocess.run([*GIT_CMD, "rev-parse", "HEAD"], cwd=repo_root,
|
||||
capture_output=True, text=True, timeout=5)
|
||||
head = r.stdout.strip() if r.returncode == 0 else None
|
||||
capture_output=True, timeout=5)
|
||||
head = r.stdout.decode("utf-8", errors="replace").strip() if r.returncode == 0 else None
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
pass
|
||||
push_section = _push_section(bash_output or "")
|
||||
@ -1563,14 +1576,15 @@ def handle_push_sweep_posttooluse(input_data):
|
||||
quiet_success = False
|
||||
if not (bash_output or "").strip() and not interrupted:
|
||||
try:
|
||||
# See #2099: drop text=True; decode manually for cp1252 safety.
|
||||
r_cur = subprocess.run(
|
||||
[*GIT_CMD, "rev-parse", "--verify", "-q", "@{u}"],
|
||||
cwd=repo_root, capture_output=True, text=True, timeout=5)
|
||||
cwd=repo_root, capture_output=True, timeout=5)
|
||||
r_prev = subprocess.run(
|
||||
[*GIT_CMD, "rev-parse", "--verify", "-q", "@{u}@{1}"],
|
||||
cwd=repo_root, capture_output=True, text=True, timeout=5)
|
||||
cur = r_cur.stdout.strip() if r_cur.returncode == 0 else ""
|
||||
prev_u = r_prev.stdout.strip() if r_prev.returncode == 0 else ""
|
||||
cwd=repo_root, capture_output=True, timeout=5)
|
||||
cur = r_cur.stdout.decode("utf-8", errors="replace").strip() if r_cur.returncode == 0 else ""
|
||||
prev_u = r_prev.stdout.decode("utf-8", errors="replace").strip() if r_prev.returncode == 0 else ""
|
||||
quiet_success = bool(cur and prev_u and cur == head and prev_u != cur)
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
pass
|
||||
@ -1584,11 +1598,12 @@ def handle_push_sweep_posttooluse(input_data):
|
||||
# reviewed-shas state.
|
||||
for local_ref in new_branch_matches:
|
||||
try:
|
||||
# See #2099: drop text=True; decode manually for cp1252 safety.
|
||||
r = subprocess.run(
|
||||
[*GIT_CMD, "rev-parse", "--verify", "-q", local_ref],
|
||||
cwd=repo_root, capture_output=True, text=True, timeout=5,
|
||||
cwd=repo_root, capture_output=True, timeout=5,
|
||||
)
|
||||
local_sha = r.stdout.strip() if r.returncode == 0 else ""
|
||||
local_sha = r.stdout.decode("utf-8", errors="replace").strip() if r.returncode == 0 else ""
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
local_sha = ""
|
||||
if local_sha and local_sha != head:
|
||||
|
||||
@ -22,6 +22,17 @@
|
||||
# "${CLAUDE_PLUGIN_ROOT}/hooks/security_reminder_hook.py"
|
||||
set -e
|
||||
|
||||
# Force UTF-8 for ALL Python filesystem + IO operations (PEP 540).
|
||||
# Without this, Windows Python defaults `locale.getpreferredencoding()` to
|
||||
# cp1252 — which makes `text=True` in subprocess.run / open() / json.load
|
||||
# crash the internal reader thread on any byte that's undefined in cp1252
|
||||
# (e.g. the 0x81 byte from ف, present in any path/filename with
|
||||
# Arabic/Hebrew/CJK characters). See #2056, #2099.
|
||||
#
|
||||
# No-op on macOS/Linux (already UTF-8). Must be set BEFORE Python starts —
|
||||
# changing it from inside the interpreter has no effect.
|
||||
export PYTHONUTF8=1
|
||||
|
||||
# Git Bash / MSYS on Windows hands script paths to this shim in POSIX form
|
||||
# (`/c/Users/...`). When we exec a Windows `python.exe` (which we do on
|
||||
# Windows since `python3` is the Microsoft Store stub), python interprets the
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user