From 38b298d5b29ece40893d3469ee1b3fe7b234494a Mon Sep 17 00:00:00 2001 From: Mohamed Hegazy Date: Fri, 29 May 2026 07:56:22 -0700 Subject: [PATCH] security-guidance: pass core.quotePath=false to diff feeders (#2082) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes anthropics/claude-plugins-official#2082 — diff feeders use git's default quotePath setting, which C-quotes any path with a non-ASCII byte. The downstream parsers in gitutil.parse_diff_into_files / gitutil.extract_file_paths_from_diff match the diff header with `re.match(r'^a/(.+?) b/(.+)$', ...)`, which only sees the raw `a/path b/path` form. The C-quoted `"a/\303\201vila/..."` form slips past the regex, the `continue` fires, and the file is silently dropped from review. Effect: a vulnerable file like `Ávila/payment.py` with `os.system('curl ' + user_input)` never reaches the LLM reviewer. False negative in exactly the direction the plugin exists to catch. Sibling of #2056 / #2075: those fixed the UTF-8 decode of the subprocess output (text=True crashed the reader thread on Windows cp1252). This one fixes the diff-feeder commands themselves — the name-only helpers (_git_name_only, _git_status_porcelain) already pass core.quotePath=false for this exact reason; the diff-text feeders were the holdouts. Fix: add `-c core.quotePath=false` to 4 git invocations: - gitutil._git_diff_range (push-sweep feed) - gitutil.get_git_diff (Stop-hook feed) - security_reminder_hook commit-review `git diff` (amend delta) - security_reminder_hook commit-review `git show` (post-amend) With the flag, git emits raw UTF-8 in the diff header (`a/Ávila/payment.py`), the regex matches, and both files (the non-ASCII vulnerable one + any ASCII control file) flow through to review correctly. Verified locally on macOS Python 3.13: - py_compile clean on both files. - Existing 45 smoke + extensibility tests still pass. - 8 new tests in test_diff_parser_non_ascii.py (added to internal test suite at sg-staging/tests/, not in this PR): * 2 static-shape: gitutil._git_diff_range and get_git_diff both contain `core.quotePath=false` in their source. * 2 commit-review static: every subprocess.run in handle_commit_review_posttooluse that mentions `"diff"` or `"show"` also passes the flag. Catches the regression class where a new diff/show call site is added without plumbing the flag through. * 4 end-to-end with a real git repo containing a `Ávila/payment.py` baseline-and-edit: - WITHOUT flag: header is C-quoted, both parsers drop the non-ASCII file (demonstrates the bug). - WITH flag: header is raw UTF-8, both parsers see the file. - parse_diff_into_files (the other parse path) also keeps the file with the flag. - get_git_diff end-to-end produces unquoted output whose file list includes the non-ASCII path. - 53/53 pass total (45 existing + 8 new) in 3.41s. NOT verified end-to-end with a real CC commit-review fire on a non-ASCII path. The static-shape tests catch the regression and the end-to-end git-repo tests pin parser behavior, but the actual LLM-review-with-vuln-found path requires runtime verification against an Anthropic-API-credentialed CC session. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/security-guidance/hooks/gitutil.py | 15 +++++++++++++-- .../hooks/security_reminder_hook.py | 9 +++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/plugins/security-guidance/hooks/gitutil.py b/plugins/security-guidance/hooks/gitutil.py index 68e6f13a..5495d18f 100644 --- a/plugins/security-guidance/hooks/gitutil.py +++ b/plugins/security-guidance/hooks/gitutil.py @@ -199,8 +199,15 @@ def _git_diff_range(repo_root, base, head="HEAD"): them reviewed — otherwise unreviewed commits get permanently silenced. """ try: + # core.quotePath=false makes git emit raw UTF-8 in `diff --git a/... b/...` + # headers instead of C-quoting non-ASCII path bytes (`"a/\303\201vila/..."` + # vs `a/Ávila/...`). The downstream `re.match(r'^a/(.+?) b/(.+)$', ...)` + # in parse_diff_into_files / extract_file_paths_from_diff matches the + # raw form only — quoted headers slip past and the entire file is + # silently dropped from review. See #2082 (sibling of #2056 / #2075). r = subprocess.run( - [*GIT_CMD, "diff", "-p", "--no-color", "--no-ext-diff", base, head], + [*GIT_CMD, "-c", "core.quotePath=false", + "diff", "-p", "--no-color", "--no-ext-diff", base, head], cwd=repo_root, capture_output=True, timeout=30, ) if r.returncode != 0: @@ -436,7 +443,11 @@ def get_git_diff(cwd, baseline_sha, full_context=False, paths=None, untracked_pa # change exists to fix. return "" - cmd = [*GIT_CMD, "diff", "--no-color", "--no-ext-diff", baseline_sha] + (["--unified=99999"] if full_context else []) + pathspec + # core.quotePath=false: emit raw UTF-8 in `diff --git a/... b/...` headers + # so non-ASCII paths aren't C-quoted past the downstream parse_diff_into_files + # regex. See #2082 (sibling of #2056 / #2075). + cmd = [*GIT_CMD, "-c", "core.quotePath=false", + "diff", "--no-color", "--no-ext-diff", baseline_sha] + (["--unified=99999"] if full_context else []) + pathspec try: with _temp_index(cwd, untracked_paths) as env: # env is None when no index could be found (bare repo / not a diff --git a/plugins/security-guidance/hooks/security_reminder_hook.py b/plugins/security-guidance/hooks/security_reminder_hook.py index ffc9ba3c..ac445959 100755 --- a/plugins/security-guidance/hooks/security_reminder_hook.py +++ b/plugins/security-guidance/hooks/security_reminder_hook.py @@ -1118,16 +1118,21 @@ def handle_commit_review_posttooluse(input_data): resolved = 0 for sha in shas: try: + # core.quotePath=false: emit raw UTF-8 in `diff --git a/... b/...` + # headers so non-ASCII paths aren't C-quoted past the downstream + # parse_diff_into_files regex (sibling of #2056 / #2075). See #2082. if pre_amend_sha: # Delta review: pre-amend → post-amend. `git diff` (not show) # so the output is a pure unified diff with no commit header. result = subprocess.run( - [*GIT_CMD, "diff", "--no-color", "--no-ext-diff", pre_amend_sha, sha, "--"], + [*GIT_CMD, "-c", "core.quotePath=false", + "diff", "--no-color", "--no-ext-diff", pre_amend_sha, sha, "--"], cwd=repo_root, capture_output=True, timeout=15 ) else: result = subprocess.run( - [*GIT_CMD, "show", "-p", "--no-color", "--no-ext-diff", sha, "--"], + [*GIT_CMD, "-c", "core.quotePath=false", + "show", "-p", "--no-color", "--no-ext-diff", sha, "--"], cwd=repo_root, capture_output=True, timeout=15 ) except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e: