security-guidance: move core.quotePath=false to GIT_CMD globally (#2099 followup)

Followup to PR #2086 (which added the flag to 4 specific git call
sites) and PR #2100 (text=True purge for #2099).

The Windows reporter for #2099 noticed more git invocations still
lacked the flag — rev-parse path queries (--show-toplevel, --git-dir,
--git-common-dir), reflog %gs subjects, and `git show <sha>:<path>`
all output paths but the per-site PR #2086 approach missed them. The
result: an Arabic-named directory shows up via _git_diff_range but
rev-parse-emitted paths get C-quoted, breaking downstream
os.path.isabs() checks.

Fix: add `-c core.quotePath=false` to GIT_CMD itself as the 4th
config-set. Every subprocess.run using the *GIT_CMD splat picks it
up automatically — diff feeders, rev-parse path queries, reflog log,
ls-files, status, git show. No more per-site flag duplication.

This commit:

1. gitutil.py: add -c core.quotePath=false to GIT_CMD.
2. Remove the now-redundant per-site flags at the 7 call sites that
   previously had inline -c core.quotePath=false (cleanup, since the
   global setting subsumes them):
   gitutil.py: _git_diff_range, _git_name_only, _git_status_porcelain,
               get_git_diff (4 sites)
   diffstate.py: _list_untracked git ls-files (1 site)
   security_reminder_hook.py: commit-review git diff + git show (2 sites)

Verified locally on latest main (post PR #2100 merge) with macOS
Python 3.13:

  - py_compile clean on all 3 modified files.
  - Bare main BEFORE my fix: 400/401 pass — 1 failure proves the gap
    (test_git_cmd_contains_quotepath_false catches the missing flag).
  - Main + my fix: 401/401 pass.
  - 23 new tests in test_quotepath_global.py (added to internal test
    suite at sg-staging/tests/, not in this PR):

      * 1 GIT_CMD-level: GIT_CMD list contains core.quotePath=false as
        a (-c, value) pair. Single source of truth — single place a
        future PR will be caught if the flag gets dropped.
      * 10 static-shape (one per hooks/*.py): every subprocess.run
        uses the *GIT_CMD splat (no bare git invocation that would
        bypass the global flag).
      * 12 end-to-end (parametrized over Arabic, Hebrew, CJK directory
        names): real git repo, _git_diff_range emits unquoted diff,
        extract_file_paths_from_diff and parse_diff_into_files keep
        the non-ASCII path in their output, _git_toplevel returns the
        non-ASCII path intact.

  - 1 staleness fix in test_diff_parser_non_ascii.py
    (test_no_bare_git_diff_or_show_without_flag): updated to accept
    EITHER inline core.quotePath=false OR *GIT_CMD splat (which
    globally provides it).

NOT verified end-to-end on Windows with a non-ASCII repo root path.
The new global-flag test pins the contract permanently, and the
parametrized macOS tests confirm parser behavior on ASCII-control
paths in non-ASCII directories. The Windows-specific rev-parse
quoting behavior follows from the same git contract our macOS test
environment exercises (POSIX git always emits raw UTF-8 regardless
of quotePath; on Windows the flag is what makes output raw).

Closes the #2099 followup specifically about _git_diff_range /
rev-parse --show-toplevel / git log %gs paths slipping past.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mohamed Hegazy 2026-05-30 12:19:38 -07:00
parent c7a3e2ffa0
commit a40c9f1e83
No known key found for this signature in database
3 changed files with 28 additions and 22 deletions

View File

@ -355,9 +355,9 @@ def _list_untracked(cwd):
the holdouts.""" the holdouts."""
try: try:
repo = _git_toplevel(cwd) or cwd repo = _git_toplevel(cwd) or cwd
# core.quotePath=false comes from GIT_CMD globally (see gitutil.py).
r = subprocess.run( r = subprocess.run(
[*GIT_CMD, "-c", "core.quotePath=false", "ls-files", [*GIT_CMD, "ls-files", "--others", "--exclude-standard", "-z"],
"--others", "--exclude-standard", "-z"],
cwd=repo, capture_output=True, timeout=15, cwd=repo, capture_output=True, timeout=15,
) )
if r.returncode != 0: if r.returncode != 0:

View File

@ -26,6 +26,17 @@ GIT_CMD = [
"git", "git",
"-c", "core.fsmonitor=false", "-c", "core.fsmonitor=false",
"-c", "core.hooksPath=/dev/null", "-c", "core.hooksPath=/dev/null",
# core.quotePath=false: emit raw UTF-8 in path-emitting commands instead
# of C-quoting non-ASCII bytes (default `"\\303\\201vila/..."` vs
# `Ávila/...`). Downstream parsers — both ours (parse_diff_into_files,
# extract_file_paths_from_diff) and Python stdlib (os.path.isabs,
# os.path.join) — expect raw paths and silently drop / mishandle the
# quoted form. Adding the flag globally to GIT_CMD covers every
# subprocess.run site that uses the splat — diff feeders, rev-parse
# path queries (--show-toplevel, --git-dir, --git-common-dir),
# reflog %gs subjects, ls-files, status, etc. — without per-site
# flag duplication. See #2082, #2099.
"-c", "core.quotePath=false",
] ]
@ -222,15 +233,12 @@ def _git_diff_range(repo_root, base, head="HEAD"):
them reviewed otherwise unreviewed commits get permanently silenced. them reviewed otherwise unreviewed commits get permanently silenced.
""" """
try: try:
# core.quotePath=false makes git emit raw UTF-8 in `diff --git a/... b/...` # GIT_CMD globally passes core.quotePath=false (see definition) so
# headers instead of C-quoting non-ASCII path bytes (`"a/\303\201vila/..."` # non-ASCII paths in `diff --git a/... b/...` headers come through as
# vs `a/Ávila/...`). The downstream `re.match(r'^a/(.+?) b/(.+)$', ...)` # raw UTF-8, not C-quoted. Required by the downstream
# in parse_diff_into_files / extract_file_paths_from_diff matches the # parse_diff_into_files / extract_file_paths_from_diff regex.
# raw form only — quoted headers slip past and the entire file is
# silently dropped from review. See #2082 (sibling of #2056 / #2075).
r = subprocess.run( r = subprocess.run(
[*GIT_CMD, "-c", "core.quotePath=false", [*GIT_CMD, "diff", "-p", "--no-color", "--no-ext-diff", base, head],
"diff", "-p", "--no-color", "--no-ext-diff", base, head],
cwd=repo_root, capture_output=True, timeout=30, cwd=repo_root, capture_output=True, timeout=30,
) )
if r.returncode != 0: if r.returncode != 0:
@ -355,8 +363,9 @@ def _git_name_only(cwd, base, include_untracked=False):
# result.stdout=None, and propagate AttributeError out of the helper. # result.stdout=None, and propagate AttributeError out of the helper.
# Same fix shape as diffstate._list_untracked. See #2056. # Same fix shape as diffstate._list_untracked. See #2056.
def _run(env): def _run(env):
# core.quotePath=false comes from GIT_CMD globally (see definition).
result = subprocess.run( result = subprocess.run(
[*GIT_CMD, "-c", "core.quotePath=false", "diff", "--name-only", "-z", base], [*GIT_CMD, "diff", "--name-only", "-z", base],
cwd=cwd, capture_output=True, timeout=30, cwd=cwd, capture_output=True, timeout=30,
env=env, env=env,
) )
@ -393,9 +402,9 @@ def _git_status_porcelain(cwd):
# sibling helpers — a non-ASCII path in the worktree would otherwise # sibling helpers — a non-ASCII path in the worktree would otherwise
# crash the cp1252 reader thread on Windows. See #2056. # crash the cp1252 reader thread on Windows. See #2056.
try: try:
# core.quotePath=false comes from GIT_CMD globally (see definition).
r = subprocess.run( r = subprocess.run(
[*GIT_CMD, "-c", "core.quotePath=false", "status", [*GIT_CMD, "status", "--porcelain=v1", "-uall", "-z"],
"--porcelain=v1", "-uall", "-z"],
cwd=cwd, capture_output=True, timeout=30, cwd=cwd, capture_output=True, timeout=30,
) )
if r.returncode != 0: if r.returncode != 0:
@ -471,11 +480,8 @@ def get_git_diff(cwd, baseline_sha, full_context=False, paths=None, untracked_pa
# change exists to fix. # change exists to fix.
return "" return ""
# core.quotePath=false: emit raw UTF-8 in `diff --git a/... b/...` headers # core.quotePath=false comes from GIT_CMD globally (see definition).
# so non-ASCII paths aren't C-quoted past the downstream parse_diff_into_files cmd = [*GIT_CMD, "diff", "--no-color", "--no-ext-diff", baseline_sha] + (["--unified=99999"] if full_context else []) + pathspec
# regex. See #2082 (sibling of #2056 / #2075).
cmd = [*GIT_CMD, "-c", "core.quotePath=false",
"diff", "--no-color", "--no-ext-diff", baseline_sha] + (["--unified=99999"] if full_context else []) + pathspec
try: try:
with _temp_index(cwd, untracked_paths) as env: with _temp_index(cwd, untracked_paths) as env:
# env is None when no index could be found (bare repo / not a # env is None when no index could be found (bare repo / not a

View File

@ -1197,18 +1197,18 @@ def handle_commit_review_posttooluse(input_data):
# core.quotePath=false: emit raw UTF-8 in `diff --git a/... b/...` # core.quotePath=false: emit raw UTF-8 in `diff --git a/... b/...`
# headers so non-ASCII paths aren't C-quoted past the downstream # headers so non-ASCII paths aren't C-quoted past the downstream
# parse_diff_into_files regex (sibling of #2056 / #2075). See #2082. # parse_diff_into_files regex (sibling of #2056 / #2075). See #2082.
# core.quotePath=false comes from GIT_CMD globally (see gitutil.py).
if pre_amend_sha: if pre_amend_sha:
# Delta review: pre-amend → post-amend. `git diff` (not show) # Delta review: pre-amend → post-amend. `git diff` (not show)
# so the output is a pure unified diff with no commit header. # so the output is a pure unified diff with no commit header.
result = subprocess.run( result = subprocess.run(
[*GIT_CMD, "-c", "core.quotePath=false", [*GIT_CMD, "diff", "--no-color", "--no-ext-diff",
"diff", "--no-color", "--no-ext-diff", pre_amend_sha, sha, "--"], pre_amend_sha, sha, "--"],
cwd=repo_root, capture_output=True, timeout=15 cwd=repo_root, capture_output=True, timeout=15
) )
else: else:
result = subprocess.run( result = subprocess.run(
[*GIT_CMD, "-c", "core.quotePath=false", [*GIT_CMD, "show", "-p", "--no-color", "--no-ext-diff", sha, "--"],
"show", "-p", "--no-color", "--no-ext-diff", sha, "--"],
cwd=repo_root, capture_output=True, timeout=15 cwd=repo_root, capture_output=True, timeout=15
) )
except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e: except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e: