Merge pull request #2086 from anthropics/fix-2082-diff-parser-non-ascii

security-guidance: pass core.quotePath=false to diff feeders (#2082)
This commit is contained in:
Mohamed Hegazy 2026-05-29 08:11:25 -07:00 committed by GitHub
commit 9e150cfd48
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 20 additions and 4 deletions

View File

@ -199,8 +199,15 @@ def _git_diff_range(repo_root, base, head="HEAD"):
them reviewed otherwise unreviewed commits get permanently silenced.
"""
try:
# core.quotePath=false makes git emit raw UTF-8 in `diff --git a/... b/...`
# headers instead of C-quoting non-ASCII path bytes (`"a/\303\201vila/..."`
# vs `a/Ávila/...`). The downstream `re.match(r'^a/(.+?) b/(.+)$', ...)`
# in parse_diff_into_files / extract_file_paths_from_diff matches the
# raw form only — quoted headers slip past and the entire file is
# silently dropped from review. See #2082 (sibling of #2056 / #2075).
r = subprocess.run(
[*GIT_CMD, "diff", "-p", "--no-color", "--no-ext-diff", base, head],
[*GIT_CMD, "-c", "core.quotePath=false",
"diff", "-p", "--no-color", "--no-ext-diff", base, head],
cwd=repo_root, capture_output=True, timeout=30,
)
if r.returncode != 0:
@ -436,7 +443,11 @@ def get_git_diff(cwd, baseline_sha, full_context=False, paths=None, untracked_pa
# change exists to fix.
return ""
cmd = [*GIT_CMD, "diff", "--no-color", "--no-ext-diff", baseline_sha] + (["--unified=99999"] if full_context else []) + pathspec
# core.quotePath=false: emit raw UTF-8 in `diff --git a/... b/...` headers
# so non-ASCII paths aren't C-quoted past the downstream parse_diff_into_files
# regex. See #2082 (sibling of #2056 / #2075).
cmd = [*GIT_CMD, "-c", "core.quotePath=false",
"diff", "--no-color", "--no-ext-diff", baseline_sha] + (["--unified=99999"] if full_context else []) + pathspec
try:
with _temp_index(cwd, untracked_paths) as env:
# env is None when no index could be found (bare repo / not a

View File

@ -1174,16 +1174,21 @@ def handle_commit_review_posttooluse(input_data):
resolved = 0
for sha in shas:
try:
# core.quotePath=false: emit raw UTF-8 in `diff --git a/... b/...`
# headers so non-ASCII paths aren't C-quoted past the downstream
# parse_diff_into_files regex (sibling of #2056 / #2075). See #2082.
if pre_amend_sha:
# Delta review: pre-amend → post-amend. `git diff` (not show)
# so the output is a pure unified diff with no commit header.
result = subprocess.run(
[*GIT_CMD, "diff", "--no-color", "--no-ext-diff", pre_amend_sha, sha, "--"],
[*GIT_CMD, "-c", "core.quotePath=false",
"diff", "--no-color", "--no-ext-diff", pre_amend_sha, sha, "--"],
cwd=repo_root, capture_output=True, timeout=15
)
else:
result = subprocess.run(
[*GIT_CMD, "show", "-p", "--no-color", "--no-ext-diff", sha, "--"],
[*GIT_CMD, "-c", "core.quotePath=false",
"show", "-p", "--no-color", "--no-ext-diff", sha, "--"],
cwd=repo_root, capture_output=True, timeout=15
)
except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e: