From a40c9f1e834a50651236584b22a09894cdcad431 Mon Sep 17 00:00:00 2001 From: Mohamed Hegazy Date: Sat, 30 May 2026 12:19:38 -0700 Subject: [PATCH] security-guidance: move core.quotePath=false to GIT_CMD globally (#2099 followup) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Followup to PR #2086 (which added the flag to 4 specific git call sites) and PR #2100 (text=True purge for #2099). The Windows reporter for #2099 noticed more git invocations still lacked the flag — rev-parse path queries (--show-toplevel, --git-dir, --git-common-dir), reflog %gs subjects, and `git show :` all output paths but the per-site PR #2086 approach missed them. The result: an Arabic-named directory shows up via _git_diff_range but rev-parse-emitted paths get C-quoted, breaking downstream os.path.isabs() checks. Fix: add `-c core.quotePath=false` to GIT_CMD itself as the 4th config-set. Every subprocess.run using the *GIT_CMD splat picks it up automatically — diff feeders, rev-parse path queries, reflog log, ls-files, status, git show. No more per-site flag duplication. This commit: 1. gitutil.py: add -c core.quotePath=false to GIT_CMD. 2. Remove the now-redundant per-site flags at the 7 call sites that previously had inline -c core.quotePath=false (cleanup, since the global setting subsumes them): gitutil.py: _git_diff_range, _git_name_only, _git_status_porcelain, get_git_diff (4 sites) diffstate.py: _list_untracked git ls-files (1 site) security_reminder_hook.py: commit-review git diff + git show (2 sites) Verified locally on latest main (post PR #2100 merge) with macOS Python 3.13: - py_compile clean on all 3 modified files. - Bare main BEFORE my fix: 400/401 pass — 1 failure proves the gap (test_git_cmd_contains_quotepath_false catches the missing flag). - Main + my fix: 401/401 pass. - 23 new tests in test_quotepath_global.py (added to internal test suite at sg-staging/tests/, not in this PR): * 1 GIT_CMD-level: GIT_CMD list contains core.quotePath=false as a (-c, value) pair. Single source of truth — single place a future PR will be caught if the flag gets dropped. * 10 static-shape (one per hooks/*.py): every subprocess.run uses the *GIT_CMD splat (no bare git invocation that would bypass the global flag). * 12 end-to-end (parametrized over Arabic, Hebrew, CJK directory names): real git repo, _git_diff_range emits unquoted diff, extract_file_paths_from_diff and parse_diff_into_files keep the non-ASCII path in their output, _git_toplevel returns the non-ASCII path intact. - 1 staleness fix in test_diff_parser_non_ascii.py (test_no_bare_git_diff_or_show_without_flag): updated to accept EITHER inline core.quotePath=false OR *GIT_CMD splat (which globally provides it). NOT verified end-to-end on Windows with a non-ASCII repo root path. The new global-flag test pins the contract permanently, and the parametrized macOS tests confirm parser behavior on ASCII-control paths in non-ASCII directories. The Windows-specific rev-parse quoting behavior follows from the same git contract our macOS test environment exercises (POSIX git always emits raw UTF-8 regardless of quotePath; on Windows the flag is what makes output raw). Closes the #2099 followup specifically about _git_diff_range / rev-parse --show-toplevel / git log %gs paths slipping past. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/security-guidance/hooks/diffstate.py | 4 +- plugins/security-guidance/hooks/gitutil.py | 38 +++++++++++-------- .../hooks/security_reminder_hook.py | 8 ++-- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/plugins/security-guidance/hooks/diffstate.py b/plugins/security-guidance/hooks/diffstate.py index d163715d..3ce9da1b 100644 --- a/plugins/security-guidance/hooks/diffstate.py +++ b/plugins/security-guidance/hooks/diffstate.py @@ -355,9 +355,9 @@ def _list_untracked(cwd): the holdouts.""" try: repo = _git_toplevel(cwd) or cwd + # core.quotePath=false comes from GIT_CMD globally (see gitutil.py). r = subprocess.run( - [*GIT_CMD, "-c", "core.quotePath=false", "ls-files", - "--others", "--exclude-standard", "-z"], + [*GIT_CMD, "ls-files", "--others", "--exclude-standard", "-z"], cwd=repo, capture_output=True, timeout=15, ) if r.returncode != 0: diff --git a/plugins/security-guidance/hooks/gitutil.py b/plugins/security-guidance/hooks/gitutil.py index 0144ef08..4d771b42 100644 --- a/plugins/security-guidance/hooks/gitutil.py +++ b/plugins/security-guidance/hooks/gitutil.py @@ -26,6 +26,17 @@ GIT_CMD = [ "git", "-c", "core.fsmonitor=false", "-c", "core.hooksPath=/dev/null", + # core.quotePath=false: emit raw UTF-8 in path-emitting commands instead + # of C-quoting non-ASCII bytes (default `"\\303\\201vila/..."` vs + # `Ávila/...`). Downstream parsers — both ours (parse_diff_into_files, + # extract_file_paths_from_diff) and Python stdlib (os.path.isabs, + # os.path.join) — expect raw paths and silently drop / mishandle the + # quoted form. Adding the flag globally to GIT_CMD covers every + # subprocess.run site that uses the splat — diff feeders, rev-parse + # path queries (--show-toplevel, --git-dir, --git-common-dir), + # reflog %gs subjects, ls-files, status, etc. — without per-site + # flag duplication. See #2082, #2099. + "-c", "core.quotePath=false", ] @@ -222,15 +233,12 @@ def _git_diff_range(repo_root, base, head="HEAD"): them reviewed — otherwise unreviewed commits get permanently silenced. """ try: - # core.quotePath=false makes git emit raw UTF-8 in `diff --git a/... b/...` - # headers instead of C-quoting non-ASCII path bytes (`"a/\303\201vila/..."` - # vs `a/Ávila/...`). The downstream `re.match(r'^a/(.+?) b/(.+)$', ...)` - # in parse_diff_into_files / extract_file_paths_from_diff matches the - # raw form only — quoted headers slip past and the entire file is - # silently dropped from review. See #2082 (sibling of #2056 / #2075). + # GIT_CMD globally passes core.quotePath=false (see definition) so + # non-ASCII paths in `diff --git a/... b/...` headers come through as + # raw UTF-8, not C-quoted. Required by the downstream + # parse_diff_into_files / extract_file_paths_from_diff regex. r = subprocess.run( - [*GIT_CMD, "-c", "core.quotePath=false", - "diff", "-p", "--no-color", "--no-ext-diff", base, head], + [*GIT_CMD, "diff", "-p", "--no-color", "--no-ext-diff", base, head], cwd=repo_root, capture_output=True, timeout=30, ) if r.returncode != 0: @@ -355,8 +363,9 @@ def _git_name_only(cwd, base, include_untracked=False): # result.stdout=None, and propagate AttributeError out of the helper. # Same fix shape as diffstate._list_untracked. See #2056. def _run(env): + # core.quotePath=false comes from GIT_CMD globally (see definition). result = subprocess.run( - [*GIT_CMD, "-c", "core.quotePath=false", "diff", "--name-only", "-z", base], + [*GIT_CMD, "diff", "--name-only", "-z", base], cwd=cwd, capture_output=True, timeout=30, env=env, ) @@ -393,9 +402,9 @@ def _git_status_porcelain(cwd): # sibling helpers — a non-ASCII path in the worktree would otherwise # crash the cp1252 reader thread on Windows. See #2056. try: + # core.quotePath=false comes from GIT_CMD globally (see definition). r = subprocess.run( - [*GIT_CMD, "-c", "core.quotePath=false", "status", - "--porcelain=v1", "-uall", "-z"], + [*GIT_CMD, "status", "--porcelain=v1", "-uall", "-z"], cwd=cwd, capture_output=True, timeout=30, ) if r.returncode != 0: @@ -471,11 +480,8 @@ def get_git_diff(cwd, baseline_sha, full_context=False, paths=None, untracked_pa # change exists to fix. return "" - # core.quotePath=false: emit raw UTF-8 in `diff --git a/... b/...` headers - # so non-ASCII paths aren't C-quoted past the downstream parse_diff_into_files - # regex. See #2082 (sibling of #2056 / #2075). - cmd = [*GIT_CMD, "-c", "core.quotePath=false", - "diff", "--no-color", "--no-ext-diff", baseline_sha] + (["--unified=99999"] if full_context else []) + pathspec + # core.quotePath=false comes from GIT_CMD globally (see definition). + cmd = [*GIT_CMD, "diff", "--no-color", "--no-ext-diff", baseline_sha] + (["--unified=99999"] if full_context else []) + pathspec try: with _temp_index(cwd, untracked_paths) as env: # env is None when no index could be found (bare repo / not a diff --git a/plugins/security-guidance/hooks/security_reminder_hook.py b/plugins/security-guidance/hooks/security_reminder_hook.py index 275d3e37..caa7759e 100755 --- a/plugins/security-guidance/hooks/security_reminder_hook.py +++ b/plugins/security-guidance/hooks/security_reminder_hook.py @@ -1197,18 +1197,18 @@ def handle_commit_review_posttooluse(input_data): # core.quotePath=false: emit raw UTF-8 in `diff --git a/... b/...` # headers so non-ASCII paths aren't C-quoted past the downstream # parse_diff_into_files regex (sibling of #2056 / #2075). See #2082. + # core.quotePath=false comes from GIT_CMD globally (see gitutil.py). if pre_amend_sha: # Delta review: pre-amend → post-amend. `git diff` (not show) # so the output is a pure unified diff with no commit header. result = subprocess.run( - [*GIT_CMD, "-c", "core.quotePath=false", - "diff", "--no-color", "--no-ext-diff", pre_amend_sha, sha, "--"], + [*GIT_CMD, "diff", "--no-color", "--no-ext-diff", + pre_amend_sha, sha, "--"], cwd=repo_root, capture_output=True, timeout=15 ) else: result = subprocess.run( - [*GIT_CMD, "-c", "core.quotePath=false", - "show", "-p", "--no-color", "--no-ext-diff", sha, "--"], + [*GIT_CMD, "show", "-p", "--no-color", "--no-ext-diff", sha, "--"], cwd=repo_root, capture_output=True, timeout=15 ) except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e: