mirror of
https://github.com/anthropics/claude-plugins-official.git
synced 2026-06-15 07:06:07 -03:00
Merge pull request #2100 from anthropics/fix-2099-text-true-pythonutf8
security-guidance: purge text=True from subprocess.run + bake PYTHONUTF8=1 (#2099)
This commit is contained in:
commit
c7a3e2ffa0
@ -32,12 +32,17 @@ GIT_CMD = [
|
|||||||
def _git_rev_parse_head(cwd):
|
def _git_rev_parse_head(cwd):
|
||||||
"""Return the current HEAD SHA, or None if not a git repo / no commits."""
|
"""Return the current HEAD SHA, or None if not a git repo / no commits."""
|
||||||
try:
|
try:
|
||||||
|
# See #2099: text=True on Windows cp1252 crashes the reader thread on
|
||||||
|
# any UTF-8 byte undefined in cp1252 (e.g. via a git error message
|
||||||
|
# referencing a non-ASCII filename in stderr). stdout is a SHA so it
|
||||||
|
# IS safe; stderr is not. capture_output=True with bytes-by-default
|
||||||
|
# never decodes, so the reader thread can't crash.
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
[*GIT_CMD, "rev-parse", "HEAD"],
|
[*GIT_CMD, "rev-parse", "HEAD"],
|
||||||
cwd=cwd, capture_output=True, text=True, timeout=5
|
cwd=cwd, capture_output=True, timeout=5
|
||||||
)
|
)
|
||||||
if result.returncode == 0 and result.stdout.strip():
|
if result.returncode == 0 and result.stdout.strip():
|
||||||
return result.stdout.strip()
|
return result.stdout.decode("utf-8", errors="replace").strip()
|
||||||
return None
|
return None
|
||||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||||
return None
|
return None
|
||||||
@ -52,13 +57,17 @@ def _find_git_index(cwd):
|
|||||||
Returns the absolute path to the index file, or None.
|
Returns the absolute path to the index file, or None.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
# See #2099: stdout here is a PATH which can contain non-ASCII bytes
|
||||||
|
# (e.g. C:\אבטחה\repo\.git). text=True decodes via cp1252 strict on
|
||||||
|
# Windows → crashes the reader thread → returns stdout=None →
|
||||||
|
# caller does .strip() on None → AttributeError. Decode manually.
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
[*GIT_CMD, "rev-parse", "--git-dir"],
|
[*GIT_CMD, "rev-parse", "--git-dir"],
|
||||||
cwd=cwd, capture_output=True, text=True, timeout=5
|
cwd=cwd, capture_output=True, timeout=5
|
||||||
)
|
)
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
return None
|
return None
|
||||||
git_dir = result.stdout.strip()
|
git_dir = result.stdout.decode("utf-8", errors="replace").strip()
|
||||||
if not os.path.isabs(git_dir):
|
if not os.path.isabs(git_dir):
|
||||||
git_dir = os.path.join(cwd, git_dir)
|
git_dir = os.path.join(cwd, git_dir)
|
||||||
index_path = os.path.join(git_dir, "index")
|
index_path = os.path.join(git_dir, "index")
|
||||||
@ -128,9 +137,13 @@ def _temp_index(cwd, untracked_paths=None):
|
|||||||
else:
|
else:
|
||||||
add_args = None
|
add_args = None
|
||||||
if add_args:
|
if add_args:
|
||||||
|
# No stdout used here (only returncode matters), but text=True
|
||||||
|
# still spawns reader threads that decode stderr — git error
|
||||||
|
# messages can reference non-ASCII filenames and crash on
|
||||||
|
# cp1252. See #2099. Drop text=True so bytes stay raw.
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
[*GIT_CMD, "add", "--intent-to-add"] + add_args,
|
[*GIT_CMD, "add", "--intent-to-add"] + add_args,
|
||||||
cwd=cwd, capture_output=True, text=True, timeout=10,
|
cwd=cwd, capture_output=True, timeout=10,
|
||||||
env=env,
|
env=env,
|
||||||
)
|
)
|
||||||
yield env
|
yield env
|
||||||
@ -144,11 +157,17 @@ def _temp_index(cwd, untracked_paths=None):
|
|||||||
def _git_toplevel(cwd):
|
def _git_toplevel(cwd):
|
||||||
"""Absolute repo root for `cwd`, or None if not in a work tree."""
|
"""Absolute repo root for `cwd`, or None if not in a work tree."""
|
||||||
try:
|
try:
|
||||||
|
# See #2099: stdout is a PATH — `C:\אבטחה\repo` returned as UTF-8
|
||||||
|
# bytes by git. text=True would decode via cp1252 strict on Windows
|
||||||
|
# → reader-thread crash. Decode manually with errors="replace".
|
||||||
r = subprocess.run(
|
r = subprocess.run(
|
||||||
[*GIT_CMD, "rev-parse", "--show-toplevel"],
|
[*GIT_CMD, "rev-parse", "--show-toplevel"],
|
||||||
cwd=cwd, capture_output=True, text=True, timeout=5,
|
cwd=cwd, capture_output=True, timeout=5,
|
||||||
)
|
)
|
||||||
return r.stdout.strip() if r.returncode == 0 and r.stdout.strip() else None
|
if r.returncode != 0:
|
||||||
|
return None
|
||||||
|
path = r.stdout.decode("utf-8", errors="replace").strip()
|
||||||
|
return path if path else None
|
||||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -164,13 +183,15 @@ def _git_dir(repo_root):
|
|||||||
callers can degrade (push-sweep state is best-effort).
|
callers can degrade (push-sweep state is best-effort).
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
# See #2099: stdout is a PATH (shared gitdir), may be non-ASCII.
|
||||||
|
# Decode bytes manually to avoid cp1252 reader-thread crash.
|
||||||
r = subprocess.run(
|
r = subprocess.run(
|
||||||
[*GIT_CMD, "rev-parse", "--git-common-dir"],
|
[*GIT_CMD, "rev-parse", "--git-common-dir"],
|
||||||
cwd=repo_root, capture_output=True, text=True, timeout=5,
|
cwd=repo_root, capture_output=True, timeout=5,
|
||||||
)
|
)
|
||||||
if r.returncode != 0:
|
if r.returncode != 0:
|
||||||
return None
|
return None
|
||||||
d = r.stdout.strip()
|
d = r.stdout.decode("utf-8", errors="replace").strip()
|
||||||
return d if os.path.isabs(d) else os.path.join(repo_root, d)
|
return d if os.path.isabs(d) else os.path.join(repo_root, d)
|
||||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||||
return None
|
return None
|
||||||
@ -179,13 +200,15 @@ def _git_dir(repo_root):
|
|||||||
def _git_rev_list_range(repo_root, base, head="HEAD"):
|
def _git_rev_list_range(repo_root, base, head="HEAD"):
|
||||||
"""Shas in `base..head`, oldest→newest. Empty list on error."""
|
"""Shas in `base..head`, oldest→newest. Empty list on error."""
|
||||||
try:
|
try:
|
||||||
|
# See #2099: stdout is ASCII SHAs, but stderr can carry git error
|
||||||
|
# messages referencing non-ASCII filenames — keep bytes raw.
|
||||||
r = subprocess.run(
|
r = subprocess.run(
|
||||||
[*GIT_CMD, "rev-list", "--reverse", f"{base}..{head}"],
|
[*GIT_CMD, "rev-list", "--reverse", f"{base}..{head}"],
|
||||||
cwd=repo_root, capture_output=True, text=True, timeout=10,
|
cwd=repo_root, capture_output=True, timeout=10,
|
||||||
)
|
)
|
||||||
if r.returncode != 0:
|
if r.returncode != 0:
|
||||||
return []
|
return []
|
||||||
return [s for s in r.stdout.strip().split("\n") if s]
|
return [s for s in r.stdout.decode("utf-8", errors="replace").strip().split("\n") if s]
|
||||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@ -220,9 +243,11 @@ def _git_diff_range(repo_root, base, head="HEAD"):
|
|||||||
def _detect_main_branch(repo_root):
|
def _detect_main_branch(repo_root):
|
||||||
for ref in ("origin/HEAD", "origin/main", "origin/master", "main", "master"):
|
for ref in ("origin/HEAD", "origin/main", "origin/master", "main", "master"):
|
||||||
try:
|
try:
|
||||||
|
# See #2099: stdout is a SHA but stderr can carry non-ASCII git
|
||||||
|
# warnings — keep bytes raw to avoid cp1252 reader-thread crash.
|
||||||
r = subprocess.run(
|
r = subprocess.run(
|
||||||
[*GIT_CMD, "rev-parse", "--verify", "-q", ref],
|
[*GIT_CMD, "rev-parse", "--verify", "-q", ref],
|
||||||
cwd=repo_root, capture_output=True, text=True, timeout=5,
|
cwd=repo_root, capture_output=True, timeout=5,
|
||||||
)
|
)
|
||||||
if r.returncode == 0 and r.stdout.strip():
|
if r.returncode == 0 and r.stdout.strip():
|
||||||
return ref
|
return ref
|
||||||
@ -410,9 +435,12 @@ def _is_ancestor(cwd, maybe_ancestor, descendant):
|
|||||||
"""True if `maybe_ancestor` is reachable from `descendant` (i.e. HEAD
|
"""True if `maybe_ancestor` is reachable from `descendant` (i.e. HEAD
|
||||||
moved forward via commit/merge, not sideways via checkout)."""
|
moved forward via commit/merge, not sideways via checkout)."""
|
||||||
try:
|
try:
|
||||||
|
# See #2099: only returncode matters, but text=True spawns reader
|
||||||
|
# threads that decode stderr — git error messages can carry non-ASCII
|
||||||
|
# filenames. Drop text=True to keep bytes raw, avoid cp1252 crash.
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
[*GIT_CMD, "merge-base", "--is-ancestor", maybe_ancestor, descendant],
|
[*GIT_CMD, "merge-base", "--is-ancestor", maybe_ancestor, descendant],
|
||||||
cwd=cwd, capture_output=True, text=True, timeout=5,
|
cwd=cwd, capture_output=True, timeout=5,
|
||||||
)
|
)
|
||||||
return result.returncode == 0
|
return result.returncode == 0
|
||||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||||
|
|||||||
@ -549,7 +549,11 @@ def handle_user_prompt_submit(input_data):
|
|||||||
elif sha:
|
elif sha:
|
||||||
debug_log(f"Captured git baseline: {sha[:12]}")
|
debug_log(f"Captured git baseline: {sha[:12]}")
|
||||||
else:
|
else:
|
||||||
debug_log("Failed to capture git baseline (not a git repo?)")
|
# Show cwd so the next reporter can immediately see when this isn't
|
||||||
|
# actually "not a git repo" but a path-encoding / permissions / git
|
||||||
|
# invocation failure. See #2099.
|
||||||
|
debug_log(f"Failed to capture git baseline (cwd={cwd!r}) — not a git repo, "
|
||||||
|
f"or git invocation failed (check log entries above)")
|
||||||
|
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
@ -856,23 +860,30 @@ def _detect_prev_upstream(repo_root, bash_output):
|
|||||||
# @{u}@{1} — only meaningful if an upstream is configured.
|
# @{u}@{1} — only meaningful if an upstream is configured.
|
||||||
for ref in ("@{u}@{1}", "@{push}@{1}"):
|
for ref in ("@{u}@{1}", "@{push}@{1}"):
|
||||||
try:
|
try:
|
||||||
|
# See #2099: stdout is a SHA but stderr can carry non-ASCII git
|
||||||
|
# warnings — keep bytes raw to avoid cp1252 reader-thread crash.
|
||||||
r = subprocess.run(
|
r = subprocess.run(
|
||||||
[*GIT_CMD, "rev-parse", "--verify", "-q", ref],
|
[*GIT_CMD, "rev-parse", "--verify", "-q", ref],
|
||||||
cwd=repo_root, capture_output=True, text=True, timeout=5,
|
cwd=repo_root, capture_output=True, timeout=5,
|
||||||
)
|
)
|
||||||
if r.returncode == 0 and r.stdout.strip():
|
sha = r.stdout.decode("utf-8", errors="replace").strip()
|
||||||
return r.stdout.strip()
|
if r.returncode == 0 and sha:
|
||||||
|
return sha
|
||||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||||
pass
|
pass
|
||||||
main = _detect_main_branch(repo_root)
|
main = _detect_main_branch(repo_root)
|
||||||
if main:
|
if main:
|
||||||
try:
|
try:
|
||||||
|
# See #2099: drop text=True; decode bytes manually so a
|
||||||
|
# cp1252-undefined byte in git's stderr doesn't crash the
|
||||||
|
# reader thread.
|
||||||
r = subprocess.run(
|
r = subprocess.run(
|
||||||
[*GIT_CMD, "merge-base", "HEAD", main],
|
[*GIT_CMD, "merge-base", "HEAD", main],
|
||||||
cwd=repo_root, capture_output=True, text=True, timeout=5,
|
cwd=repo_root, capture_output=True, timeout=5,
|
||||||
)
|
)
|
||||||
if r.returncode == 0 and r.stdout.strip():
|
sha = r.stdout.decode("utf-8", errors="replace").strip()
|
||||||
return r.stdout.strip()
|
if r.returncode == 0 and sha:
|
||||||
|
return sha
|
||||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||||
pass
|
pass
|
||||||
return None
|
return None
|
||||||
@ -1324,12 +1335,13 @@ def handle_commit_review_posttooluse(input_data):
|
|||||||
try:
|
try:
|
||||||
full_shas = []
|
full_shas = []
|
||||||
for s in shas:
|
for s in shas:
|
||||||
|
# See #2099: drop text=True; decode manually for cp1252 safety.
|
||||||
r = subprocess.run(
|
r = subprocess.run(
|
||||||
[*GIT_CMD, "rev-parse", "--verify", "-q", s],
|
[*GIT_CMD, "rev-parse", "--verify", "-q", s],
|
||||||
cwd=repo_root, capture_output=True, text=True, timeout=5,
|
cwd=repo_root, capture_output=True, timeout=5,
|
||||||
)
|
)
|
||||||
if r.returncode == 0:
|
if r.returncode == 0:
|
||||||
full_shas.append(r.stdout.strip())
|
full_shas.append(r.stdout.decode("utf-8", errors="replace").strip())
|
||||||
_append_reviewed_shas(repo_root, full_shas, vulns_found=len(vulns or []))
|
_append_reviewed_shas(repo_root, full_shas, vulns_found=len(vulns or []))
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
@ -1531,9 +1543,10 @@ def handle_push_sweep_posttooluse(input_data):
|
|||||||
# both.
|
# both.
|
||||||
head = None
|
head = None
|
||||||
try:
|
try:
|
||||||
|
# See #2099: drop text=True; decode manually for cp1252 safety.
|
||||||
r = subprocess.run([*GIT_CMD, "rev-parse", "HEAD"], cwd=repo_root,
|
r = subprocess.run([*GIT_CMD, "rev-parse", "HEAD"], cwd=repo_root,
|
||||||
capture_output=True, text=True, timeout=5)
|
capture_output=True, timeout=5)
|
||||||
head = r.stdout.strip() if r.returncode == 0 else None
|
head = r.stdout.decode("utf-8", errors="replace").strip() if r.returncode == 0 else None
|
||||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||||
pass
|
pass
|
||||||
push_section = _push_section(bash_output or "")
|
push_section = _push_section(bash_output or "")
|
||||||
@ -1563,14 +1576,15 @@ def handle_push_sweep_posttooluse(input_data):
|
|||||||
quiet_success = False
|
quiet_success = False
|
||||||
if not (bash_output or "").strip() and not interrupted:
|
if not (bash_output or "").strip() and not interrupted:
|
||||||
try:
|
try:
|
||||||
|
# See #2099: drop text=True; decode manually for cp1252 safety.
|
||||||
r_cur = subprocess.run(
|
r_cur = subprocess.run(
|
||||||
[*GIT_CMD, "rev-parse", "--verify", "-q", "@{u}"],
|
[*GIT_CMD, "rev-parse", "--verify", "-q", "@{u}"],
|
||||||
cwd=repo_root, capture_output=True, text=True, timeout=5)
|
cwd=repo_root, capture_output=True, timeout=5)
|
||||||
r_prev = subprocess.run(
|
r_prev = subprocess.run(
|
||||||
[*GIT_CMD, "rev-parse", "--verify", "-q", "@{u}@{1}"],
|
[*GIT_CMD, "rev-parse", "--verify", "-q", "@{u}@{1}"],
|
||||||
cwd=repo_root, capture_output=True, text=True, timeout=5)
|
cwd=repo_root, capture_output=True, timeout=5)
|
||||||
cur = r_cur.stdout.strip() if r_cur.returncode == 0 else ""
|
cur = r_cur.stdout.decode("utf-8", errors="replace").strip() if r_cur.returncode == 0 else ""
|
||||||
prev_u = r_prev.stdout.strip() if r_prev.returncode == 0 else ""
|
prev_u = r_prev.stdout.decode("utf-8", errors="replace").strip() if r_prev.returncode == 0 else ""
|
||||||
quiet_success = bool(cur and prev_u and cur == head and prev_u != cur)
|
quiet_success = bool(cur and prev_u and cur == head and prev_u != cur)
|
||||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||||
pass
|
pass
|
||||||
@ -1584,11 +1598,12 @@ def handle_push_sweep_posttooluse(input_data):
|
|||||||
# reviewed-shas state.
|
# reviewed-shas state.
|
||||||
for local_ref in new_branch_matches:
|
for local_ref in new_branch_matches:
|
||||||
try:
|
try:
|
||||||
|
# See #2099: drop text=True; decode manually for cp1252 safety.
|
||||||
r = subprocess.run(
|
r = subprocess.run(
|
||||||
[*GIT_CMD, "rev-parse", "--verify", "-q", local_ref],
|
[*GIT_CMD, "rev-parse", "--verify", "-q", local_ref],
|
||||||
cwd=repo_root, capture_output=True, text=True, timeout=5,
|
cwd=repo_root, capture_output=True, timeout=5,
|
||||||
)
|
)
|
||||||
local_sha = r.stdout.strip() if r.returncode == 0 else ""
|
local_sha = r.stdout.decode("utf-8", errors="replace").strip() if r.returncode == 0 else ""
|
||||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||||
local_sha = ""
|
local_sha = ""
|
||||||
if local_sha and local_sha != head:
|
if local_sha and local_sha != head:
|
||||||
|
|||||||
@ -22,6 +22,17 @@
|
|||||||
# "${CLAUDE_PLUGIN_ROOT}/hooks/security_reminder_hook.py"
|
# "${CLAUDE_PLUGIN_ROOT}/hooks/security_reminder_hook.py"
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
|
# Force UTF-8 for ALL Python filesystem + IO operations (PEP 540).
|
||||||
|
# Without this, Windows Python defaults `locale.getpreferredencoding()` to
|
||||||
|
# cp1252 — which makes `text=True` in subprocess.run / open() / json.load
|
||||||
|
# crash the internal reader thread on any byte that's undefined in cp1252
|
||||||
|
# (e.g. the 0x81 byte from ف, present in any path/filename with
|
||||||
|
# Arabic/Hebrew/CJK characters). See #2056, #2099.
|
||||||
|
#
|
||||||
|
# No-op on macOS/Linux (already UTF-8). Must be set BEFORE Python starts —
|
||||||
|
# changing it from inside the interpreter has no effect.
|
||||||
|
export PYTHONUTF8=1
|
||||||
|
|
||||||
# Git Bash / MSYS on Windows hands script paths to this shim in POSIX form
|
# Git Bash / MSYS on Windows hands script paths to this shim in POSIX form
|
||||||
# (`/c/Users/...`). When we exec a Windows `python.exe` (which we do on
|
# (`/c/Users/...`). When we exec a Windows `python.exe` (which we do on
|
||||||
# Windows since `python3` is the Microsoft Store stub), python interprets the
|
# Windows since `python3` is the Microsoft Store stub), python interprets the
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user