mirror of
https://github.com/anthropics/claude-plugins-official.git
synced 2026-06-14 14:46:03 -03:00
2193 lines
101 KiB
Python
Executable File
2193 lines
101 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""
|
||
Security Guidance Plugin for Claude Code
|
||
|
||
A hooks-based plugin that guides Claude toward writing more secure code. It runs as
|
||
UserPromptSubmit, PostToolUse, and Stop hooks via the Claude Code plugin system.
|
||
|
||
## Architecture
|
||
|
||
The plugin has two layers:
|
||
|
||
1. **Pattern-based rules (PostToolUse, every edit)**: Fast regex checks that run on
|
||
every file write. Detects common vulnerabilities like hardcoded secrets, SQL injection,
|
||
command injection, path traversal, and insecure session configs. Injects brief warnings
|
||
via additionalContext.
|
||
|
||
2. **Stop hook (final review)**: When Claude finishes, uses `git diff` against a
|
||
baseline SHA (captured at UserPromptSubmit) to get only the code changed during the
|
||
session. Runs two Haiku analyses on the diff:
|
||
a) Concrete vulnerability scan with severity ratings
|
||
b) Areas-of-concern analysis identifying categories to investigate
|
||
Exits with code 2 to force Claude to continue and address findings.
|
||
|
||
## How the git baseline works
|
||
|
||
On each UserPromptSubmit, the plugin runs `git stash create` to get a SHA representing
|
||
the current working tree state (HEAD + any uncommitted changes). This SHA is saved to
|
||
the session state file. When the Stop hook fires, it runs `git diff <baseline_sha>` to
|
||
get only the changes made since that snapshot. After analysis, the baseline is updated
|
||
so the next Stop hook iteration only sees new changes.
|
||
|
||
This means:
|
||
- Only code Claude actually changed is reviewed (not pre-existing code)
|
||
- Mid-session commits are handled correctly (diff is against the snapshot, not HEAD)
|
||
- Each turn only reviews new changes (baseline updates after each stop hook)
|
||
|
||
## Configuration
|
||
|
||
Kill switches:
|
||
- SECURITY_GUIDANCE_DISABLE: "1" to fully disable the plugin (alias for ENABLE_SECURITY_REMINDER=0)
|
||
- ENABLE_SECURITY_REMINDER: "0" to fully disable the plugin (legacy name)
|
||
|
||
Per-feature toggles (all default enabled; set to "0" to disable):
|
||
- ENABLE_PATTERN_RULES: PostToolUse regex warnings on Edit/Write
|
||
- ENABLE_CODE_SECURITY_REVIEW: Stop-hook git-diff LLM review
|
||
- ENABLE_COMMIT_REVIEW: PostToolUse[Bash] commit security review
|
||
|
||
Other:
|
||
- SECURITY_REVIEW_MODEL: Model for LLM review (default: claude-opus-4-7)
|
||
- ANTHROPIC_API_KEY: Required for LLM-based reviews
|
||
- ANTHROPIC_AUTH_TOKEN: Alternative to API key — OAuth access token sent as Bearer auth.
|
||
Claude Code passes this automatically for OAuth-authenticated users.
|
||
"""
|
||
|
||
try:
|
||
import fcntl
|
||
except ImportError:
|
||
fcntl = None
|
||
import contextlib
|
||
import glob
|
||
import json
|
||
import os
|
||
import random
|
||
import re
|
||
import subprocess
|
||
import sys
|
||
import threading
|
||
import urllib.request
|
||
from datetime import datetime
|
||
from enum import IntEnum
|
||
from typing import Optional, Tuple, Dict, Any, List
|
||
|
||
# review_api is the importable surface for the agentic-review prompts,
|
||
# schemas, and pure filters. External callers (e.g. agentic review harnesses)
|
||
# import review_api directly so they run the same eval-covered prompts
|
||
# without going through the CC hook protocol. The underscored names below
|
||
# alias into it so this script stays the single CC-hook entrypoint.
|
||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||
import review_api # noqa: E402
|
||
from _base import ( # noqa: E402,F401
|
||
DEBUG_LOG_FILE, DEBUG_LOG_MAX_BYTES, debug_log,
|
||
PROVENANCE_TAG, PROVENANCE_BANNER,
|
||
_read_plugin_version_int, _PV, _USAGE, _USAGE_LOCK,
|
||
_PRICE_PER_MTOK, _PRICE_DEFAULT, _record_usage, _usage_metrics,
|
||
)
|
||
import extensibility # noqa: E402
|
||
from patterns import ( # noqa: E402,F401
|
||
_JS_EXTS, _PY_EXTS, _DOC_EXTS,
|
||
_UNSAFE_DESERIALIZATION_REMINDER, _UNSAFE_YAML_LOAD_REMINDER,
|
||
_UNSAFE_TORCH_LOAD_REMINDER, SECURITY_PATTERNS, RuleId,
|
||
_RULE_NAME_TO_ID, rule_names_to_mask,
|
||
)
|
||
from session_state import ( # noqa: E402,F401
|
||
_state_key, get_state_file, get_lock_file, cleanup_old_state_files,
|
||
load_state, save_state, with_locked_state,
|
||
)
|
||
from gitutil import ( # noqa: E402,F401
|
||
GIT_CMD,
|
||
_git_rev_parse_head, _find_git_index, _diff_pathspec, _temp_index,
|
||
_git_toplevel, _git_dir, _git_rev_list_range, _git_diff_range,
|
||
_detect_main_branch, _git_reflog_recent_commits, _git_name_only,
|
||
_git_status_porcelain, _is_ancestor, get_git_diff,
|
||
SOURCE_CODE_EXTENSIONS, SOURCE_CODE_BASENAMES,
|
||
NON_SOURCE_EXTENSIONLESS_BASENAMES, SKIP_PATH_PATTERNS,
|
||
SKIP_FILE_SUFFIXES, _SECURITY_RISK_PATH_TOKENS,
|
||
_LOW_PRIORITY_SUFFIXES, _LOW_PRIORITY_PATH_TOKENS,
|
||
_prioritize_diff_files, _is_reviewable_source,
|
||
extract_file_paths_from_diff, parse_diff_into_files,
|
||
filter_preexisting_from_diff,
|
||
)
|
||
from diffstate import ( # noqa: E402,F401
|
||
STOP_LOOP_STATE_TTL_SEC, PREVIOUS_FINDINGS_TTL_SEC,
|
||
save_baseline_sha, load_baseline_sha, record_touched_path,
|
||
consume_stop_state, restore_unreviewed_stop_state,
|
||
get_baseline_file_content, capture_git_baseline,
|
||
_REVIEWED_SHAS_BASENAME, _REVIEWED_SHAS_CAP,
|
||
_reviewed_shas_path, _load_reviewed_shas, _append_reviewed_shas,
|
||
UNTRACKED_BASELINE_CAP, _list_untracked, compute_v2_review_set,
|
||
)
|
||
import llm # noqa: E402 module ref for reassignable globals (_last_call_claude_http_error etc.)
|
||
from llm import ( # noqa: E402,F401
|
||
ANTHROPIC_API_KEY, ANTHROPIC_AUTH_TOKEN, HAS_API_CREDENTIALS,
|
||
SECURITY_REVIEW_MODEL, CLAUDE_CODE_SYSTEM_PROMPT,
|
||
_last_call_claude_http_error,
|
||
ensure_anthropic_reachable,
|
||
_last_review_truncated_bytes, _auth_prefer_token,
|
||
DIFF_PER_FILE_BYTES, DIFF_TOTAL_BYTES, _AGENTIC_INVESTIGATE_SYSTEM,
|
||
_FINDINGS_SCHEMA, _SURVIVED_SCHEMA, _REWAKE_SUMMARY_BUDGET,
|
||
_cap_files_for_prompt, _build_auth_headers, _call_claude, _call_claude_dual_or,
|
||
_format_vulns_guidance, _format_vulns_summary, _finding_keys, _dedup_against_state,
|
||
analyze_code_security, _agentic_commit_review_enabled, agentic_review,
|
||
analyze_security_concerns,
|
||
)
|
||
|
||
# LLM-based code security review (enabled by default when API key is available)
|
||
# Empty string or unset = enabled (default); "0" = disabled
|
||
_enable_code_review_str = os.environ.get("ENABLE_CODE_SECURITY_REVIEW", "1")
|
||
ENABLE_CODE_SECURITY_REVIEW = _enable_code_review_str != "0"
|
||
|
||
# Pattern-based rules (enabled by default; set to "0" to use only LLM review)
|
||
# Empty string or unset = enabled (default); "0" = disabled
|
||
_enable_pattern_str = os.environ.get("ENABLE_PATTERN_RULES", "1")
|
||
ENABLE_PATTERN_RULES = _enable_pattern_str != "0"
|
||
|
||
# Per-feature kill switches. Each defaults to enabled. Set to "0" to disable
|
||
# just that one feature without touching the rest. Motivated by feedback that
|
||
# autonomous-agent setups sometimes need to disable specific injection points
|
||
# (e.g. the PreToolUse[Task] prompt append, which can read as prompt injection
|
||
# to hardened subagents) while keeping the rest of the plugin active. See
|
||
# README for a full description of each feature.
|
||
# Commit review also honors legacy SECURITY_GUIDANCE_COMMIT_REVIEW=off; see
|
||
# is_commit_review_enabled().
|
||
ENABLE_COMMIT_REVIEW = os.environ.get("ENABLE_COMMIT_REVIEW", "1") != "0"
|
||
# Stop-hook git-diff review only — does NOT gate the commit/push reviews.
|
||
# Lets multi-agent / shared-worktree deployments keep the commit reviewer
|
||
# (anchored to a fixed SHA from the worker's own `git commit` stdout) while
|
||
# turning off the Stop-hook diff (anchored on baseline_sha…HEAD, which a
|
||
# sibling agent in the same worktree can move under us). The pre-existing
|
||
# ENABLE_CODE_SECURITY_REVIEW gate is shared between Stop and commit/push
|
||
# and stays for backwards compat as the all-LLM-review master switch.
|
||
ENABLE_STOP_REVIEW = os.environ.get("ENABLE_STOP_REVIEW", "1") != "0"
|
||
|
||
# Master kill switch. Either SECURITY_GUIDANCE_DISABLE=1 or
|
||
# ENABLE_SECURITY_REMINDER=0 disables the plugin entirely. Kept as two names
|
||
# because ENABLE_SECURITY_REMINDER predates the rename and some users already
|
||
# have it baked into shell rc files; SECURITY_GUIDANCE_DISABLE reads correctly
|
||
# as a kill switch (no double-negative).
|
||
_disable_str = os.environ.get("SECURITY_GUIDANCE_DISABLE", "").strip().lower()
|
||
SECURITY_GUIDANCE_DISABLED = (
|
||
_disable_str in ("1", "true", "yes", "on")
|
||
or os.environ.get("ENABLE_SECURITY_REMINDER", "1") == "0"
|
||
)
|
||
|
||
# Maximum number of times the stop hook can fire per user turn.
|
||
# Allows iterative fixing: Claude stops → review → fix → stop → review again.
|
||
# Set to 0 for unlimited (like the old plugin). Default 3 for iterative fixing.
|
||
MAX_STOP_HOOK_FIRINGS = int(os.environ.get("MAX_STOP_HOOK_FIRINGS", "3"))
|
||
|
||
# Cap on source files sent to the LLM reviewer per Stop fire. A stale baseline
|
||
# meeting an ungitignored build directory can produce an enormous spurious
|
||
# diff; unbounded diffs burn tokens and risk 400 on context length.
|
||
MAX_DIFF_FILES = int(os.environ.get("MAX_DIFF_FILES", "30"))
|
||
|
||
# Appended to all exit(2) guidance so the asyncRewake auto-turn doesn't
|
||
# cause the model to abandon the user's original request.
|
||
CONTINUATION_SUFFIX = (
|
||
"\n\nAfter addressing or acknowledging this finding, continue with the "
|
||
"user's original request or continue waiting for their reply — this "
|
||
"review is supplementary feedback, not a replacement for your previous "
|
||
"response."
|
||
)
|
||
|
||
def emit_metrics(metrics, rewake_summary=None):
|
||
"""
|
||
Write a SyncHookJSONOutput line to stdout for Claude Code to pick up.
|
||
For asyncRewake (Stop) hooks, CC scans stdout for the first {-prefixed line
|
||
that validates as SyncHookJSONOutput and emits the hook metrics event.
|
||
For sync (PostToolUse) hooks, the metrics key in the normal JSON response
|
||
is picked up directly.
|
||
|
||
Constraints: keys ^[a-z][a-z0-9_]{0,39}$, values bool|finite-number,
|
||
20-key cap (was 10 in older CC versions).
|
||
|
||
`pv` and the tok_*/cost_usd usage block are PREPENDED so they survive any
|
||
future overflow — CC keeps only the first 20 keys, so insertion order
|
||
decides what drops. The old `len(metrics) < 10` guard was load-bearing for
|
||
the same reason but stale: once `rate_count` was added to every
|
||
commit-review emit, the with-vulns dict hit 10 keys, `pv` was skipped, and
|
||
findings metrics landed without a plugin version attached, breaking
|
||
per-version breakdowns.
|
||
|
||
`rewake_summary` (asyncRewake only): per-run override of the static
|
||
rewakeSummary in hooks.json, shown to the user in the terminal as the
|
||
task-notification one-liner. Must be in the same JSON line as the metrics
|
||
because CC stops scanning stdout after the first {-prefixed line.
|
||
"""
|
||
head = {}
|
||
if _PV and "pv" not in metrics:
|
||
head["pv"] = _PV
|
||
head.update(_usage_metrics())
|
||
if head:
|
||
metrics = {**head, **metrics}
|
||
out = {"metrics": metrics}
|
||
if rewake_summary:
|
||
out["rewakeSummary"] = rewake_summary
|
||
print(json.dumps(out), flush=True)
|
||
|
||
# =====================================================================
|
||
# State management
|
||
# =====================================================================
|
||
|
||
#
|
||
# Low-level state-file plumbing (_state_key, get_state_file,
|
||
# get_lock_file, cleanup_old_state_files, load_state, save_state,
|
||
# with_locked_state) moved to session_state.py and re-exported above.
|
||
|
||
def atomic_check_and_mark_warning(session_id, warning_key):
|
||
"""
|
||
Atomically check if a warning has been shown and mark it as shown if not.
|
||
Returns True if this is the first time seeing this warning (should show it),
|
||
False if it was already shown (should skip it).
|
||
"""
|
||
def _check(state):
|
||
warnings = state["shown_warnings"]
|
||
if warning_key in warnings:
|
||
return False
|
||
warnings.append(warning_key)
|
||
return True
|
||
|
||
result = with_locked_state(session_id, _check)
|
||
return result if result is not None else True
|
||
|
||
def atomic_check_counter(session_id, counter_key, max_count):
|
||
"""
|
||
Atomically check if a counter has reached its limit and increment if not.
|
||
Returns True if the counter is below max_count (should proceed),
|
||
False if it has reached or exceeded max_count (should skip).
|
||
"""
|
||
def _check(state):
|
||
counters = state.get("counters", {})
|
||
current = counters.get(counter_key, 0)
|
||
if current >= max_count:
|
||
return False
|
||
counters[counter_key] = current + 1
|
||
state["counters"] = counters
|
||
return True
|
||
|
||
result = with_locked_state(session_id, _check)
|
||
return result if result is not None else True
|
||
|
||
def atomic_check_rate_limit(session_id, key, max_per_window, window_s):
|
||
"""Rolling-window rate limit: allow at most `max_per_window` calls per
|
||
`window_s` seconds, per (session_id, key).
|
||
|
||
Returns (allowed: bool, count_in_window: int). count_in_window is the
|
||
post-decision count (i.e., includes this call if allowed) so callers can
|
||
emit it directly as a telemetry gauge.
|
||
|
||
Replaces session-lifetime `atomic_check_counter` for commit-review and
|
||
push-sweep. Telemetry showed a small but persistent share of sessions hit
|
||
the lifetime cap, and those were multi-day persistent sessions that then
|
||
lost coverage for many subsequent commits — not burst abusers. A rolling
|
||
hour keeps the same cost ceiling for any 1h window while letting long
|
||
sessions regain coverage.
|
||
|
||
State key: rate_limits: {"<key>": [ts, ts, ...]}. Timestamps are pruned
|
||
on every call so the list is bounded by max_per_window; no migration
|
||
needed from the old `counters` dict — different key.
|
||
"""
|
||
import time as _time
|
||
now = _time.time()
|
||
cutoff = now - window_s
|
||
|
||
def _check(state):
|
||
buckets = state.setdefault("rate_limits", {})
|
||
ts_list = buckets.get(key, [])
|
||
# Prune; tolerate non-numeric junk from a corrupted state file.
|
||
ts_list = [t for t in ts_list if isinstance(t, (int, float)) and t > cutoff]
|
||
if len(ts_list) >= max_per_window:
|
||
buckets[key] = ts_list
|
||
return False, len(ts_list)
|
||
ts_list.append(now)
|
||
buckets[key] = ts_list
|
||
return True, len(ts_list)
|
||
|
||
result = with_locked_state(session_id, _check)
|
||
# State unavailable → fail-open (same posture as atomic_check_counter).
|
||
return result if result is not None else (True, 0)
|
||
|
||
# =====================================================================
|
||
# Warning outcome tracking
|
||
#
|
||
# Records each pattern warning as pending when it fires. At Stop, sweep
|
||
# all pending entries: re-read each file, re-check patterns, and emit a
|
||
# fixed-vs-unresolved tally. No per-edit work — pending is recorded only
|
||
# when a pattern matches (rare), and the sweep runs once at session end.
|
||
#
|
||
# State key: pending_warnings: {"<file>:<rule>": true}
|
||
# =====================================================================
|
||
|
||
def record_pending_warnings(session_id, file_path, rule_names):
|
||
"""Mark file:rule pairs as pending for the Stop-hook outcome sweep."""
|
||
def _record(state):
|
||
pending = state.get("pending_warnings")
|
||
if not isinstance(pending, dict):
|
||
pending = {}
|
||
state["pending_warnings"] = pending
|
||
for rule in rule_names:
|
||
pending[f"{file_path}:{rule}"] = True
|
||
with_locked_state(session_id, _record)
|
||
|
||
def sweep_pending_warnings(session_id):
|
||
"""
|
||
Stop-hook final sweep. Re-read every file in pending_warnings, re-check
|
||
patterns, and return (fixed, unresolved, unresolved_mask). Clears state.
|
||
A file that's been deleted counts as fixed — the dangerous code is gone.
|
||
Never raises — this is telemetry and must not break the Stop hook.
|
||
"""
|
||
def _sweep(state):
|
||
try:
|
||
pending = state.get("pending_warnings")
|
||
if not isinstance(pending, dict) or not pending:
|
||
return 0, 0, 0
|
||
|
||
by_file = {}
|
||
for key in pending:
|
||
if not isinstance(key, str) or ":" not in key:
|
||
continue
|
||
fp, _, rule = key.rpartition(":")
|
||
by_file.setdefault(fp, set()).add(rule)
|
||
|
||
unresolved = []
|
||
fixed = 0
|
||
for fp, rules in by_file.items():
|
||
try:
|
||
with open(fp, "r", errors="replace") as f:
|
||
still_matching = {r for r, _ in check_patterns(fp, f.read())}
|
||
except (OSError, IOError):
|
||
still_matching = set()
|
||
for rule in rules:
|
||
if rule in still_matching:
|
||
unresolved.append(rule)
|
||
else:
|
||
fixed += 1
|
||
|
||
state["pending_warnings"] = {}
|
||
# Filter to known rules so a renamed/removed rule in old state
|
||
# doesn't KeyError rule_names_to_mask.
|
||
known = [r for r in unresolved if r in _RULE_NAME_TO_ID]
|
||
return fixed, len(unresolved), rule_names_to_mask(known)
|
||
except Exception as e:
|
||
debug_log(f"sweep_pending_warnings failed: {e}")
|
||
return 0, 0, 0
|
||
|
||
result = with_locked_state(session_id, _sweep)
|
||
return result if result is not None else (0, 0, 0)
|
||
|
||
# =====================================================================
|
||
# Git baseline management
|
||
# =====================================================================
|
||
|
||
# =====================================================================
|
||
# Pattern matching
|
||
# =====================================================================
|
||
|
||
def check_patterns(file_path, content):
|
||
"""Check if file path or content matches any security patterns. Returns ALL matches."""
|
||
normalized_path = file_path.lstrip("/")
|
||
matches = []
|
||
|
||
for pattern in list(SECURITY_PATTERNS) + extensibility.user_patterns():
|
||
# path_filter is a gate: when present, the rule only applies to
|
||
# matching paths. Distinct from path_check, which is itself a
|
||
# positive match condition (e.g. .github/workflows/).
|
||
if "path_filter" in pattern:
|
||
try:
|
||
if not pattern["path_filter"](normalized_path):
|
||
continue
|
||
except Exception:
|
||
continue
|
||
|
||
matched = False
|
||
|
||
if "path_check" in pattern:
|
||
try:
|
||
if pattern["path_check"](normalized_path):
|
||
matched = True
|
||
except Exception:
|
||
pass
|
||
|
||
if not matched and "substrings" in pattern and content:
|
||
for substring in pattern["substrings"]:
|
||
if substring in content:
|
||
matched = True
|
||
break
|
||
|
||
if not matched and "regex" in pattern and content:
|
||
try:
|
||
if re.search(pattern["regex"], content):
|
||
matched = True
|
||
except Exception:
|
||
pass
|
||
|
||
if matched:
|
||
matches.append((pattern["ruleName"], pattern["reminder"]))
|
||
|
||
return matches
|
||
|
||
def extract_content_from_input(tool_name, tool_input):
|
||
"""Extract content to check from tool input based on tool type."""
|
||
if tool_name == "Write":
|
||
return tool_input.get("content", "")
|
||
elif tool_name == "Edit":
|
||
return tool_input.get("new_string", "")
|
||
elif tool_name == "MultiEdit":
|
||
edits = tool_input.get("edits", [])
|
||
if edits:
|
||
return " ".join(edit.get("new_string", "") for edit in edits)
|
||
return ""
|
||
return ""
|
||
|
||
# =====================================================================
|
||
# Hook handlers
|
||
# =====================================================================
|
||
|
||
def handle_user_prompt_submit(input_data):
|
||
"""
|
||
Handle UserPromptSubmit — capture git baseline SHA.
|
||
Called on every user prompt. Updates the baseline so the stop hook
|
||
only reviews changes made since the last prompt.
|
||
|
||
Does NOT reset touched_paths/fire_count/previous_findings — those are
|
||
consumed by Stop (consume_stop_state) and time-expired respectively.
|
||
UPS racing the asyncRewake Stop hook caused a meaningful share of reviews
|
||
to be lost when the wipe landed before Stop's state read.
|
||
|
||
"""
|
||
cwd = input_data.get("cwd", "")
|
||
if not cwd:
|
||
debug_log("UPS: no cwd, skipping baseline capture")
|
||
sys.exit(0)
|
||
|
||
session_id = input_data.get("session_id", "default")
|
||
# stash-create and ls-files both walk the worktree (~2-5s each in a very
|
||
# large repo). Run them concurrently so UPS latency stays ≈ max(both).
|
||
import concurrent.futures as _cf
|
||
with _cf.ThreadPoolExecutor(max_workers=2) as _ex:
|
||
_f_sha = _ex.submit(capture_git_baseline, cwd)
|
||
_f_ut = _ex.submit(_list_untracked, cwd)
|
||
sha = _f_sha.result()
|
||
# Always capture the untracked snapshot. `git stash create` returns
|
||
# empty when there are no TRACKED changes, but pre-existing untracked
|
||
# files still need to be excluded from the next Stop's review_set —
|
||
# otherwise an untracked-only working tree gets every untracked file
|
||
# reviewed on every turn until something tracked is dirtied.
|
||
untracked_now = _f_ut.result() or {}
|
||
head = _git_rev_parse_head(cwd)
|
||
|
||
# If the previous turn's Stop hook never ran (user interrupt, follow-up
|
||
# during work, tool-reject, model crash, maxTurns, PostToolUse block…),
|
||
# touched_paths is still populated because consume_stop_state is the only
|
||
# consumer and it runs under the state lock. Overwriting baseline_sha now
|
||
# would re-baseline *past* those unreviewed edits, making them permanently
|
||
# invisible to the next Stop. Preserve the old baseline so the next Stop
|
||
# diffs the aborted turn's edits plus the new turn's edits together.
|
||
preserved = {"value": False}
|
||
|
||
def _save(state):
|
||
# Only preserve if there's actually an old baseline to preserve.
|
||
# First UPS of a session can have touched_paths if PostToolUse
|
||
# somehow ran first (print mode, odd harnesses) — in that case
|
||
# we still need to capture a baseline.
|
||
if state.get("touched_paths") and state.get("baseline_sha"):
|
||
preserved["value"] = True
|
||
return
|
||
if sha:
|
||
state["baseline_sha"] = sha
|
||
state["head_at_capture"] = head
|
||
# untracked_at_baseline is independent of whether the stash produced
|
||
# a SHA — write it unconditionally so compute_v2_review_set's
|
||
# preexisting-untracked exclusion works in untracked-only trees.
|
||
state["untracked_at_baseline"] = untracked_now
|
||
with_locked_state(session_id, _save)
|
||
|
||
if preserved["value"]:
|
||
debug_log(
|
||
"UPS: preserving prior baseline — previous Stop hook never "
|
||
"consumed touched_paths (likely user interrupt / aborted turn)"
|
||
)
|
||
elif sha:
|
||
debug_log(f"Captured git baseline: {sha[:12]}")
|
||
else:
|
||
debug_log("Failed to capture git baseline (not a git repo?)")
|
||
|
||
sys.exit(0)
|
||
|
||
def _resolve_amend_pre_sha(repo_root, expected_post_sha=None):
|
||
"""For a `git commit --amend` we just ran, return the pre-amend SHA via
|
||
reflog, or None if it can't be safely determined.
|
||
|
||
expected_post_sha: the post-amend SHA the caller parsed from bash stdout
|
||
(or reflog). If provided, HEAD@{0} of `repo_root` must match it (prefix
|
||
compare — bash stdout SHAs are abbreviated, reflog %H is 40 chars) before
|
||
we trust the reflog-derived pre-amend SHA. This guards against the
|
||
cross-repo case (`cd ../other && git commit --amend && cd -`) where
|
||
`repo_root` happens to have its own recent amend that's unrelated to
|
||
the bash command we're reviewing.
|
||
|
||
We require HEAD@{0}'s reflog subject to start with `commit (amend)` —
|
||
otherwise our `--amend` regex matched something that didn't actually
|
||
perform an amend (e.g., `git commit --amend --dry-run`, aliased commands,
|
||
aborted amends), and HEAD@{1} would be the wrong commit. Also requires
|
||
HEAD@{1} to NOT itself be an amend, since back-to-back amends would have
|
||
HEAD@{1} as the previous-amend's post state — the original commit we
|
||
want to compare against is then HEAD@{2}, but at that point we're
|
||
reaching and fall back to a full review.
|
||
|
||
Bytes + decode('utf-8', errors='replace'): reflog subjects embed commit
|
||
subjects, which git stores as raw bytes (commit messages may be latin-1
|
||
/ cp1252 / etc.). text=True would raise UnicodeDecodeError (a
|
||
ValueError, not OSError) on non-UTF8 bytes and crash the hook.
|
||
"""
|
||
if not repo_root:
|
||
return None
|
||
try:
|
||
r = subprocess.run(
|
||
[*GIT_CMD, "log", "-g", "-2", "--format=%H|%gs", "HEAD"],
|
||
cwd=repo_root, capture_output=True, timeout=5,
|
||
)
|
||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||
return None
|
||
if r.returncode != 0:
|
||
return None
|
||
stdout_text = r.stdout.decode("utf-8", errors="replace")
|
||
lines = [ln for ln in stdout_text.splitlines() if "|" in ln]
|
||
if len(lines) < 2:
|
||
return None
|
||
head0_sha, _, head0_subj = lines[0].partition("|")
|
||
head1_sha, _, head1_subj = lines[1].partition("|")
|
||
if not head0_subj.startswith("commit (amend)"):
|
||
return None
|
||
if head1_subj.startswith("commit (amend)"):
|
||
return None
|
||
# Cross-repo guard: the post-amend SHA the caller is about to review must
|
||
# match HEAD@{0} of repo_root. Otherwise the bash command was likely run
|
||
# in a different repo than repo_root, and the reflog we just read is
|
||
# unrelated. Prefix-compare: expected_post_sha is typically the 7-char
|
||
# abbreviated SHA captured from bash stdout by _COMMIT_SHA_RE (git's
|
||
# default core.abbrev floor), while head0_sha is the full 40-char %H —
|
||
# strict equality would always fail and silently disable the delta path.
|
||
if expected_post_sha and not head0_sha.startswith(expected_post_sha):
|
||
return None
|
||
return head1_sha or None
|
||
|
||
# git-only signals that corroborate a real commit object — NOT emitted by
|
||
# pre-commit / lint-staged / husky hook output, which can contain bracketed
|
||
# labels like `[pre-commit abc1234]` that otherwise look like a commit line.
|
||
_COMMIT_DIFFSTAT_PATTERNS = [
|
||
re.compile(r'\b\d+ files? changed'),
|
||
re.compile(r'^ create mode ', re.MULTILINE),
|
||
re.compile(r'^ delete mode ', re.MULTILINE),
|
||
re.compile(r'^ rename ', re.MULTILINE),
|
||
]
|
||
|
||
# Capture-group form of the [branch sha] pattern. Mirrors Claude Code's own
|
||
# commit-id parsing, but tolerates spaces before the
|
||
# sha (covers `[detached HEAD abc1234]`). 7–40 hex chars: git's abbrev floor
|
||
# through full sha; the abbrev resolves fine with `git show`. Anchored to
|
||
# line-start so a `[hex]` in the commit subject (`[main abc] Revert [e38]`)
|
||
# or trailing hook output isn't picked up and fed to `git show`.
|
||
_COMMIT_SHA_RE = re.compile(r'^\[[^\]]*?\b([0-9a-f]{7,40})\]', re.MULTILINE)
|
||
|
||
# Regex matching `git commit` commands. Mirrors Claude Code's own commit
|
||
# detection — it does NOT tolerate `git -c k=v commit` global options, which
|
||
# keeps this hook aligned with CC's commit attribution on what counts as a
|
||
# commit.
|
||
_GIT_COMMIT_RE = re.compile(r'\bgit\s+commit(?:\s|$)')
|
||
_GIT_AMEND_RE = re.compile(r'\s--amend\b')
|
||
|
||
# Rolling-window cap on LLM commit-review calls. See atomic_check_rate_limit
|
||
# docstring for the rationale that motivated the switch from a lifetime cap.
|
||
# `MAX_COMMIT_REVIEWS_PER_SESSION` is read for backward-compat with users who
|
||
# tuned it; the value is reinterpreted as per-hour.
|
||
MAX_COMMIT_REVIEWS_PER_HOUR = int(
|
||
os.environ.get("MAX_COMMIT_REVIEWS_PER_HOUR")
|
||
or os.environ.get("MAX_COMMIT_REVIEWS_PER_SESSION", "20")
|
||
)
|
||
COMMIT_REVIEW_RATE_WINDOW_S = int(
|
||
os.environ.get("COMMIT_REVIEW_RATE_WINDOW_S", "3600")
|
||
)
|
||
|
||
# ─── push-sweep ─────────────────────────────────────────────────────────────
|
||
#
|
||
# Mirrors Claude Code's own push-command matching — tolerates `git -C <p>` /
|
||
# `git -c k=v` global options. The hooks.json `Bash(git push:*)` matcher
|
||
# (subcommand prefix) doesn't, but those forms are rare in practice
|
||
# and the python only ever runs after CC's matcher fired, so this regex is a
|
||
# defensive re-gate, not a widening — `git -C path push` won't reach python
|
||
# unless chained with a plain `git push` in the same compound command.
|
||
#
|
||
# `gh pr create` is intentionally NOT a separate hooks.json matcher: gh runs
|
||
# `git push` as a child process, which CC's matcher doesn't observe (it sees
|
||
# only the top-level `gh pr create` argv). A separate `Bash(gh pr create:*)`
|
||
# entry would buy minimal extra coverage (sessions that push only via gh) at
|
||
# the cost of an extra python spawn on every `... && gh pr create` compound
|
||
# (the common case). Those sessions are caught on their next standalone `git push`.
|
||
_GIT_PUSH_RE = re.compile(
|
||
r'\bgit(?:\s+-[cC]\s+\S+|\s+--\S+=\S+)*\s+push\b'
|
||
)
|
||
|
||
# `git push` stdout: "abc1234..def5678 branch -> branch" (or `+abc..def` on
|
||
# force, `* [new branch]` on first push). The left sha is where the remote
|
||
# was BEFORE this push — exactly the base we need. Captures (old, new,
|
||
# local-ref) so the handler can verify the pushed ref == HEAD before
|
||
# diffing — `git push origin other` while on a different branch would
|
||
# otherwise diff the wrong range.
|
||
_PUSH_RANGE_RE = re.compile(
|
||
r'^\s*\+?\s*([0-9a-f]{7,40})\.\.\.?([0-9a-f]{7,40})\s+(\S+)\s+->\s+\S+',
|
||
re.MULTILINE,
|
||
)
|
||
|
||
MAX_PUSH_SWEEP_FILES = int(os.environ.get("SG_PUSH_SWEEP_MAX_FILES", "30"))
|
||
MAX_PUSH_SWEEP_RANGE = int(os.environ.get("SG_PUSH_SWEEP_MAX_RANGE", "50"))
|
||
PUSH_SWEEP_REPORT_CAP = int(os.environ.get("SG_PUSH_SWEEP_REPORT_CAP", "3"))
|
||
|
||
def _claim_bash_hook_once(input_data):
|
||
"""De-dupe across hooks.json `if` matchers firing for the same Bash call.
|
||
|
||
`git commit -m x && git push` matches both `Bash(git commit:*)` and
|
||
`Bash(git push:*)` `if` configs → CC spawns this script twice with the
|
||
SAME `tool_use_id`. The first spawn atomically creates a
|
||
sentinel under `.git/`; subsequent spawns see it and exit early. Avoids
|
||
redundant LLM calls (and the redundant asyncRewake) on compound commands.
|
||
|
||
Returns True if this spawn won the claim (or no de-dupe is possible),
|
||
False if another spawn already claimed it.
|
||
|
||
Sentinel is per-clone (`.git/sg-hook-once-<tool_use_id>`), not /tmp,
|
||
so concurrent CC sessions in *different* repos don't collide. Stale
|
||
sentinels (>5min) are GC'd opportunistically.
|
||
"""
|
||
tuid = input_data.get("tool_use_id")
|
||
cwd = input_data.get("cwd")
|
||
if not tuid or not cwd:
|
||
return True
|
||
gd = _git_dir(_git_toplevel(cwd) or cwd)
|
||
if not gd:
|
||
return True
|
||
# GC: best-effort sweep of stale sentinels so they don't accumulate.
|
||
import time as _time
|
||
now = _time.time()
|
||
try:
|
||
for name in os.listdir(gd):
|
||
if name.startswith("sg-hook-once-"):
|
||
p = os.path.join(gd, name)
|
||
try:
|
||
if now - os.path.getmtime(p) > 300:
|
||
os.unlink(p)
|
||
except OSError:
|
||
pass
|
||
except OSError:
|
||
pass
|
||
# Sanitize tuid into a filesystem-safe basename — defensive, the value is
|
||
# CC-generated (toolu_<b64ish>), but it ends up in a path.
|
||
safe = re.sub(r"[^A-Za-z0-9_-]", "_", tuid)[:80]
|
||
sentinel = os.path.join(gd, f"sg-hook-once-{safe}")
|
||
try:
|
||
fd = os.open(sentinel, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
|
||
os.close(fd)
|
||
return True
|
||
except FileExistsError:
|
||
return False
|
||
except OSError:
|
||
# Can't write sentinel (read-only fs, perms) — proceed rather than
|
||
# silently dropping the review.
|
||
return True
|
||
|
||
def is_push_sweep_enabled():
|
||
"""Gate for the push-sweep PostToolUse[Bash] hook.
|
||
|
||
Enabled by default. ENABLE_COMMIT_REVIEW=0 remains the unconditional
|
||
kill switch (push-sweep reuses the same review pipeline and budget).
|
||
SG_PUSH_SWEEP is the per-user override (=1/on or =0/off) checked
|
||
next so users can opt out.
|
||
"""
|
||
if not ENABLE_COMMIT_REVIEW:
|
||
return False
|
||
v = os.environ.get("SG_PUSH_SWEEP", "").strip().lower()
|
||
if v in ("1", "on"):
|
||
return True
|
||
if v in ("0", "off"):
|
||
return False
|
||
return True
|
||
|
||
PUSH_SWEEP_ENABLED = is_push_sweep_enabled()
|
||
|
||
def _compute_push_sweep_base(prev_upstream, push_range, reviewed):
|
||
"""Advance the diff base past the contiguous reviewed prefix.
|
||
|
||
Spec: review `git diff B..HEAD` where `B` is the newest commit such that
|
||
`prev_upstream..B` is entirely in `reviewed`. Returns (B, unreviewed_tail).
|
||
`B == None` means the whole range is reviewed (caller should skip).
|
||
`push_range` must be oldest→newest.
|
||
|
||
Examples (✓=reviewed, ✗=not):
|
||
[✓1, ✗2, ✓3] → B=1, tail=[2,3] (cannot trim suffix; Read is at HEAD)
|
||
[✓1, ✓2, ✓3] → B=None (all reviewed → skip)
|
||
[✗1, ✓2, ✗3] → B=prev_upstream, tail=[1,2,3]
|
||
[] → B=None
|
||
"""
|
||
i = 0
|
||
while i < len(push_range) and push_range[i] in reviewed:
|
||
i += 1
|
||
if i == len(push_range):
|
||
return None, []
|
||
base = push_range[i - 1] if i > 0 else prev_upstream
|
||
return base, push_range[i:]
|
||
|
||
def _push_section(bash_output):
|
||
"""Return the slice of `bash_output` that contains the push's range lines.
|
||
|
||
`_PUSH_RANGE_RE` is not push-specific — `git fetch` and `git pull` print
|
||
range lines (`abc..def branch -> origin/branch`) in the same format. On
|
||
chained calls the Bash tool returns combined stdout+stderr, so a naive
|
||
`_PUSH_RANGE_RE.finditer(bash_output)` matches both sections and a
|
||
fetch+push compound trips the multi-ref skip.
|
||
|
||
`git push` prints `To <remote>` immediately before its range lines;
|
||
`git fetch`/`git pull` prints `From <remote>` before theirs. The slice
|
||
is symmetric: start at the LAST `To <remote>` header (strips fetch output
|
||
that ran *before* the push, e.g. `git fetch && git push`), and end at
|
||
the next `From <remote>` after that (strips fetch output that ran
|
||
*after* the push, e.g. `git push && git fetch`).
|
||
|
||
If no `To ` header is present (push failed before connecting, output
|
||
suppressed by `-q`) the full buffer is returned and the caller's
|
||
other guards handle it.
|
||
"""
|
||
if not bash_output:
|
||
return ""
|
||
# Match line-anchored "To " — look for "\nTo " or "To " at start-of-string.
|
||
idx = bash_output.rfind("\nTo ")
|
||
if idx >= 0:
|
||
section = bash_output[idx:]
|
||
elif bash_output.startswith("To "):
|
||
section = bash_output
|
||
else:
|
||
return bash_output
|
||
# Strip a trailing fetch/pull `From <remote>` block (push && fetch /
|
||
# push && pull, or any wrapper that re-syncs after the push).
|
||
end = section.find("\nFrom ")
|
||
if end >= 0:
|
||
section = section[:end]
|
||
return section
|
||
|
||
def _detect_prev_upstream(repo_root, bash_output):
|
||
"""Where the remote was BEFORE this push.
|
||
|
||
Preference order:
|
||
1. Parse `abc..def` from push stdout — authoritative, exact.
|
||
2. `<branch>@{u}@{1}` — the remote-tracking ref's reflog position before
|
||
this push moved it. PostToolUse runs after `git push` completes, so
|
||
`@{u}` is already updated and `@{u}@{1}` is the prior value.
|
||
3. merge-base with the detected main branch — first push of a new
|
||
branch (`* [new branch]` in output, no upstream reflog yet).
|
||
Returns a resolvable ref/sha or None.
|
||
"""
|
||
m = _PUSH_RANGE_RE.search(_push_section(bash_output or ""))
|
||
if m:
|
||
return m.group(1)
|
||
# @{u}@{1} — only meaningful if an upstream is configured.
|
||
for ref in ("@{u}@{1}", "@{push}@{1}"):
|
||
try:
|
||
r = subprocess.run(
|
||
[*GIT_CMD, "rev-parse", "--verify", "-q", ref],
|
||
cwd=repo_root, capture_output=True, text=True, timeout=5,
|
||
)
|
||
if r.returncode == 0 and r.stdout.strip():
|
||
return r.stdout.strip()
|
||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||
pass
|
||
main = _detect_main_branch(repo_root)
|
||
if main:
|
||
try:
|
||
r = subprocess.run(
|
||
[*GIT_CMD, "merge-base", "HEAD", main],
|
||
cwd=repo_root, capture_output=True, text=True, timeout=5,
|
||
)
|
||
if r.returncode == 0 and r.stdout.strip():
|
||
return r.stdout.strip()
|
||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||
pass
|
||
return None
|
||
|
||
def is_commit_review_enabled():
|
||
"""Gate for the commit-review PostToolUse[Bash] hook.
|
||
|
||
Commit review is enabled by default; ENABLE_COMMIT_REVIEW=0 remains the
|
||
unconditional kill switch and SECURITY_GUIDANCE_COMMIT_REVIEW (on/off)
|
||
remains a legacy per-user override; everything else defaults on.
|
||
commit_review_on is still emitted in metrics for continuity.
|
||
"""
|
||
if not ENABLE_COMMIT_REVIEW:
|
||
return False
|
||
override = os.environ.get("SECURITY_GUIDANCE_COMMIT_REVIEW", "").strip().lower()
|
||
if override in ("on", "off"):
|
||
return override == "on"
|
||
return True
|
||
|
||
COMMIT_REVIEW_ENABLED = is_commit_review_enabled()
|
||
|
||
def _agentic_review_with_race(
|
||
repo_root: str,
|
||
diff_files: List[Tuple[str, str]],
|
||
rel_touched: List[str],
|
||
previous_findings: List[Dict[str, Any]],
|
||
) -> Tuple[Optional[str], List[Dict[str, Any]], Dict[str, Any]]:
|
||
"""Race the agentic reviewer against a delayed single-shot fallback.
|
||
|
||
Agentic starts at t=0. After SG_AGENTIC_RACE_DELAY_S (default 180s), the
|
||
single-shot diff reviewer also starts. Whichever finishes first wins. If
|
||
agentic finishes before the delay elapses, the fallback never runs.
|
||
|
||
Metrics added:
|
||
race_winner : 1 = agentic won, 2 = fallback won (CC accepts only
|
||
bool/finite-number metric values — strings would discard the dict)
|
||
race_delay_s : the configured delay
|
||
race_started : 1 if the fallback was actually launched, else 0
|
||
|
||
Only the commit-review handler calls this — external harnesses invoke
|
||
agentic_review() directly and are unaffected. SG_AGENTIC_NO_RACE=1
|
||
disables the race for any other caller that wants pure agentic.
|
||
"""
|
||
import queue as _queue
|
||
import threading as _th
|
||
import time as _t
|
||
|
||
if os.environ.get("SG_AGENTIC_NO_RACE") == "1":
|
||
return agentic_review(repo_root, diff_files, rel_touched)
|
||
|
||
delay_s = int(os.environ.get("SG_AGENTIC_RACE_DELAY_S", "180"))
|
||
q: "_queue.Queue[Tuple[str, Any]]" = _queue.Queue(maxsize=1)
|
||
fallback_started = _th.Event()
|
||
|
||
def _agentic() -> None:
|
||
try:
|
||
r = agentic_review(repo_root, diff_files, rel_touched)
|
||
except Exception as e: # pragma: no cover — crash → let fallback win
|
||
r = (None, [], {"agentic_fallback": f"race_crash:{type(e).__name__}"})
|
||
try:
|
||
q.put_nowait(("agentic", r))
|
||
except _queue.Full:
|
||
pass
|
||
|
||
def _fallback() -> None:
|
||
_t.sleep(delay_s)
|
||
if not q.empty():
|
||
return # agentic finished within the delay — never start fallback
|
||
fallback_started.set()
|
||
try:
|
||
g, v = analyze_code_security(
|
||
diff_files, is_diff=True, previous_findings=previous_findings
|
||
)
|
||
except Exception as e: # pragma: no cover
|
||
g, v = None, []
|
||
try:
|
||
q.put_nowait(("fallback", (g, v, {"agentic": False})))
|
||
except _queue.Full:
|
||
pass
|
||
|
||
_th.Thread(target=_agentic, daemon=True).start()
|
||
_th.Thread(target=_fallback, daemon=True).start()
|
||
|
||
winner, (g, v, m) = q.get()
|
||
m = dict(m) # don't mutate the callee's metrics dict
|
||
m["race_winner"] = 1 if winner == "agentic" else 2
|
||
m["race_delay_s"] = delay_s
|
||
m["race_started"] = 1 if fallback_started.is_set() else 0
|
||
return g, v, m
|
||
|
||
def handle_commit_review_posttooluse(input_data):
|
||
"""PostToolUse handler for Bash — reviews git commits for security issues.
|
||
|
||
Runs as asyncRewake: detects `git commit` in the Bash command, parses
|
||
the resulting SHA(s) from the Bash stdout `[branch sha] msg` line, runs
|
||
`git show -p <sha>` per SHA, sends the combined diff through
|
||
analyze_code_security, and exits with code 2 (stderr findings) to wake
|
||
the model. Deduplicates against the shared previous_findings state so
|
||
the Stop hook won't re-flag the same (filePath, vulnerableCode) pair.
|
||
"""
|
||
session_id = input_data.get("session_id", "default")
|
||
tool_input = input_data.get("tool_input", {})
|
||
tool_response = input_data.get("tool_response", {})
|
||
cwd = input_data.get("cwd", "")
|
||
|
||
command = tool_input.get("command", "")
|
||
if not isinstance(command, str) or not _GIT_COMMIT_RE.search(command):
|
||
# Defensive only — hooks.json's `"if": "Bash(git commit:*)"` is the
|
||
# real gate so CC never spawns python3 for ls/grep/etc. This catches
|
||
# cases where CC's command matching fails open and spawns the hook anyway.
|
||
sys.exit(0)
|
||
|
||
debug_log(f"Commit review: detected git commit in command")
|
||
|
||
# Bash tool_response has no exit_code field (only stdout, stderr,
|
||
# interrupted), so success is inferred from the output text — the same
|
||
# heuristic Claude Code itself uses.
|
||
if not isinstance(tool_response, dict):
|
||
tool_response = {}
|
||
stdout = tool_response.get("stdout", "") or ""
|
||
stderr = tool_response.get("stderr", "") or ""
|
||
bash_output = stdout + "\n" + stderr
|
||
interrupted = bool(tool_response.get("interrupted"))
|
||
|
||
# Require BOTH a line-anchored `[branch sha]` AND a git-only diffstat
|
||
# signal before treating the tool call as a successful commit. The old
|
||
# `any()` check false-positived on (a) pre-commit/husky/lint-staged hooks
|
||
# emitting labels like `[pre-commit abc1234]`, and on (b) chained
|
||
# `git commit || git log --stat` where `N files changed` appears in output
|
||
# even though the commit itself failed.
|
||
commit_succeeded = (
|
||
not interrupted
|
||
and _COMMIT_SHA_RE.search(bash_output) is not None
|
||
and any(p.search(bash_output) for p in _COMMIT_DIFFSTAT_PATTERNS)
|
||
)
|
||
|
||
# commit_review_on emitted on every path so telemetry can filter on
|
||
# commit_review and group by commit_review_on.
|
||
_base = {"commit_review": True, "commit_review_on": COMMIT_REVIEW_ENABLED}
|
||
|
||
# Reflog fallback for hidden stdout. Analysis of skip_reason=21 emissions
|
||
# showed a large share were commits that DID succeed
|
||
# but whose `[branch sha]` line was hidden by piping/redirection/-q
|
||
# (e.g., `git commit -m ... 2>&1 | tail -3`). A HEAD@{0}
|
||
# reflog check substantially reduced this skip; follow-up analysis found
|
||
# the residual is dominated by (a) chained commands moving HEAD@{0} past
|
||
# `commit:` (`git commit && git push`), and (b) the `_obvious_noop` guard
|
||
# false-positiving on chained `git status` output after a successful -q
|
||
# commit. Widening to the last-5-entries × 120s scan and dropping the noop
|
||
# guard fixes both. The reviewed-shas dedup below prevents the wider window
|
||
# from re-reviewing a prior Bash call's commit, and is the same file
|
||
# push-sweep reads — so a SHA is reviewed at most once across both
|
||
# surfaces. See _git_reflog_recent_commits docstring for cross-repo /
|
||
# race safety.
|
||
_reflog_shas: List[str] = []
|
||
_skip_21_sub = 0
|
||
if not commit_succeeded and not interrupted and cwd:
|
||
_root = _git_toplevel(cwd)
|
||
_fresh, _stale = _git_reflog_recent_commits(_root)
|
||
if _fresh:
|
||
_already = _load_reviewed_shas(_root)
|
||
_reflog_shas = [s for s in _fresh if s not in _already]
|
||
if _reflog_shas:
|
||
commit_succeeded = True
|
||
debug_log(
|
||
f"Commit review: stdout had no `[branch sha]`; reflog "
|
||
f"shows {len(_reflog_shas)} fresh unreviewed commit(s) "
|
||
f"({_reflog_shas[0][:12]}...)"
|
||
)
|
||
else:
|
||
# Fresh commit(s) in reflog but all already in
|
||
# sg-reviewed-shas — likely a Bash retry or the commit was
|
||
# reviewed via a prior fire. Correct to skip; sub=2 lets telemetry
|
||
# split this from genuine fails.
|
||
_skip_21_sub = 2
|
||
elif _stale:
|
||
_skip_21_sub = 3 # commit entries exist but all >120s old
|
||
else:
|
||
_skip_21_sub = 4 # no commit-action entries — genuine fail
|
||
|
||
if not commit_succeeded:
|
||
debug_log("Commit review: commit did not succeed, skipping")
|
||
emit_metrics({"skipped": True, "skip_reason": 21, **_base,
|
||
**({"skip_21_sub": 1} if interrupted
|
||
else {"skip_21_sub": _skip_21_sub} if _skip_21_sub
|
||
else {})})
|
||
sys.exit(0)
|
||
|
||
if not COMMIT_REVIEW_ENABLED:
|
||
debug_log("Commit review: disabled, skipping")
|
||
emit_metrics({"skipped": True, "skip_reason": 32, **_base})
|
||
sys.exit(0)
|
||
|
||
if not ENABLE_CODE_SECURITY_REVIEW or not HAS_API_CREDENTIALS:
|
||
debug_log("Commit review: LLM review disabled or no API credentials")
|
||
emit_metrics({"skipped": True, "skip_reason": 22, **_base})
|
||
sys.exit(0)
|
||
|
||
if not ensure_anthropic_reachable():
|
||
debug_log("Commit review: api.anthropic.com unreachable")
|
||
emit_metrics({"skipped": True, "skip_reason": 24, **_base})
|
||
sys.exit(0)
|
||
|
||
if not cwd:
|
||
debug_log("Commit review: no cwd")
|
||
emit_metrics({"skipped": True, "skip_reason": 25, **_base})
|
||
sys.exit(0)
|
||
|
||
repo_root = _git_toplevel(cwd)
|
||
if not repo_root:
|
||
debug_log("Commit review: not in a git repo")
|
||
emit_metrics({"skipped": True, "skip_reason": 26, **_base})
|
||
sys.exit(0)
|
||
|
||
# Pin the review to the exact SHA the Bash command produced, parsed from
|
||
# its stdout. Reviewing HEAD instead is wrong when the commit was made in
|
||
# a different repo than the hook's cwd (`cd ../other && git commit && cd -`,
|
||
# subshells), or when a second commit lands before this async hook reaches
|
||
# `git show` — both would review an unrelated commit. The reflog-action
|
||
# fallback above is the narrow exception: it only fires when output gave
|
||
# us nothing AND the cwd repo's own reflog confirms a `commit:` just
|
||
# happened there, which rules out the cross-repo case.
|
||
#
|
||
# Take only the LAST match: pre-commit/husky hooks can print bracketed
|
||
# labels like `[pre-commit abc1234]` that precede the real `[branch sha]`
|
||
# line; chained commands like `git commit && git commit` produce multiple
|
||
# real SHAs and we want the most recent. The real commit line is always
|
||
# last in git's own output — the earlier matches are either decoys or
|
||
# superseded commits.
|
||
if _reflog_shas:
|
||
# Output-based detection already failed above; the reflog SHAs are the
|
||
# authoritative ones. Don't re-parse bash_output here — any bracketed
|
||
# token it contains is by construction NOT the `[branch sha]` line
|
||
# (or commit_succeeded would have been True via the fast path). The
|
||
# list is newest-first and may contain >1 entry when a single Bash
|
||
# call made multiple commits (`git commit -m a && git commit -m b`);
|
||
# all are reviewed.
|
||
shas = _reflog_shas
|
||
else:
|
||
all_shas = _COMMIT_SHA_RE.findall(bash_output)
|
||
shas = [all_shas[-1]] if all_shas else []
|
||
if not shas:
|
||
debug_log("Commit review: no SHA in commit output")
|
||
emit_metrics({"skipped": True, "skip_reason": 33, **_base})
|
||
sys.exit(0)
|
||
if _reflog_shas:
|
||
# Observability: track how often the fallback path is hit so
|
||
# future analysis can split on it.
|
||
# `reflog_shas_n` lets telemetry measure how often the widened scan picked
|
||
# up >1 commit (i.e., chained `git commit && git commit`).
|
||
_base = {**_base, "sha_via_reflog": True,
|
||
"reflog_shas_n": len(_reflog_shas)}
|
||
|
||
# `git commit --amend`: review only the delta added by the amend
|
||
# (pre-amend..post-amend) instead of the full amended commit. Without this,
|
||
# the amend re-reviews the entire commit including code already reviewed
|
||
# on the original commit, costing 30-60s of LLM time and re-flagging
|
||
# findings the user may have just amended IN ORDER TO fix. Pre-amend
|
||
# SHA comes from the reflog and is validated to be an amend (see
|
||
# _resolve_amend_pre_sha) — otherwise we fall back to full-commit review.
|
||
#
|
||
# Three guards skip the delta path and fall back to full `git show`
|
||
# review. All three close variants of "chained `git commit && git commit
|
||
# --amend` in one Bash call", which would otherwise enter the delta path,
|
||
# see an empty `git diff sha_wip sha_amend`, emit skip_reason=35, and
|
||
# silently drop the first commit's content from review (no prior
|
||
# PostToolUse fired for it — same Bash call):
|
||
#
|
||
# 1. `not _reflog_shas`: reflog fallback path was taken (both commits'
|
||
# bash output suppressed via -q / pipe / redirect). The multi-SHA scan
|
||
# already populates `shas` with every fresh commit (amend + any
|
||
# pre-amend WIP) and the loop below `git show`s each, so coverage is
|
||
# correct without delta — and the delta path doesn't compose with a
|
||
# multi-SHA `shas` list (it would diff every entry against the same
|
||
# pre-amend SHA). Losing the 30-60s saving on the reflog-fallback
|
||
# fraction is an acceptable trade.
|
||
#
|
||
# 2. `len(all_shas) <= 1`: both commits visible (no -q). Two `[branch
|
||
# sha]` lines in bash_output → all_shas len 2. Only defined on the
|
||
# bash-output path; short-circuit ordering keeps it unevaluated when
|
||
# `_reflog_shas` is non-empty.
|
||
#
|
||
# 3. `commit_invocations <= 1`: asymmetric — first commit -q, amend
|
||
# visible. Fast-path fires on the amend's `[branch sha]` line (so
|
||
# `_reflog_shas` stays empty), all_shas = [sha_amend] (len 1) — guards
|
||
# 1 and 2 both pass. The command string itself is the only remaining
|
||
# signal that two commits happened. False-positives (e.g.
|
||
# `git commit --amend -m "fix git commit bug"`) are safe — they fall
|
||
# back to full review.
|
||
is_amend = bool(_GIT_AMEND_RE.search(command))
|
||
commit_invocations = len(_GIT_COMMIT_RE.findall(command))
|
||
pre_amend_sha = None
|
||
if (is_amend and not _reflog_shas and len(all_shas) <= 1
|
||
and commit_invocations <= 1):
|
||
pre_amend_sha = _resolve_amend_pre_sha(repo_root, expected_post_sha=shas[0])
|
||
if is_amend and pre_amend_sha:
|
||
_base = {**_base, "amend_delta_review": True}
|
||
debug_log(
|
||
f"Commit review: --amend detected; reviewing delta "
|
||
f"{pre_amend_sha[:12]}..{shas[-1][:12]}"
|
||
)
|
||
|
||
# --no-color: `color.ui=always` would emit ANSI escapes that corrupt
|
||
# parse_diff_into_files' header match. Bytes + errors='replace': commits
|
||
# can contain non-UTF8 source (latin-1, cp1252) and text=True would raise
|
||
# UnicodeDecodeError outside the except clause.
|
||
diff_files = []
|
||
resolved = 0
|
||
for sha in shas:
|
||
try:
|
||
if pre_amend_sha:
|
||
# Delta review: pre-amend → post-amend. `git diff` (not show)
|
||
# so the output is a pure unified diff with no commit header.
|
||
result = subprocess.run(
|
||
[*GIT_CMD, "diff", "--no-color", "--no-ext-diff", pre_amend_sha, sha, "--"],
|
||
cwd=repo_root, capture_output=True, timeout=15
|
||
)
|
||
else:
|
||
result = subprocess.run(
|
||
[*GIT_CMD, "show", "-p", "--no-color", "--no-ext-diff", sha, "--"],
|
||
cwd=repo_root, capture_output=True, timeout=15
|
||
)
|
||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e:
|
||
_cmd = "git diff" if pre_amend_sha else "git show"
|
||
debug_log(f"Commit review: {_cmd} {sha} error: {e}")
|
||
continue
|
||
if result.returncode != 0:
|
||
# SHA not in this repo (cross-repo commit) or already gc'd. Better
|
||
# to skip than to fall back to HEAD and review the wrong commit.
|
||
_cmd = "git diff" if pre_amend_sha else "git show"
|
||
debug_log(f"Commit review: {_cmd} {sha} rc={result.returncode}")
|
||
continue
|
||
resolved += 1
|
||
diff_files.extend(parse_diff_into_files(
|
||
result.stdout.decode("utf-8", errors="replace")))
|
||
|
||
# Dedup by path. The widened reflog scan can return >1 SHA (e.g.
|
||
# `git commit && git commit --amend` within 120s); a path that appears in
|
||
# both diffs would consume two MAX_DIFF_FILES slots and be re-analyzed.
|
||
# `shas` is newest-first so the first occurrence is the most recent
|
||
# version of the file — keep it.
|
||
if len(shas) > 1:
|
||
_seen = set()
|
||
diff_files = [
|
||
(fp, c) for fp, c in diff_files
|
||
if not (fp in _seen or _seen.add(fp))
|
||
]
|
||
|
||
if resolved == 0:
|
||
debug_log("Commit review: no parsed SHA resolved in cwd repo")
|
||
emit_metrics({"skipped": True, "skip_reason": 28, **_base,
|
||
"shas_found": len(shas)})
|
||
sys.exit(0)
|
||
|
||
# Empty amend delta = message-only amend (or whitespace-only that the
|
||
# diff already collapses). No code to review; skip cleanly. skip_reason=35.
|
||
# Gated on resolved > 0 so subprocess failures (caught with `continue`
|
||
# above) don't get mislabeled as message-only — they fall through to
|
||
# skip_reason=28 correctly.
|
||
if pre_amend_sha and not diff_files:
|
||
debug_log("Commit review: --amend produced empty delta (message-only?), skipping")
|
||
emit_metrics({"skipped": True, "skip_reason": 35, **_base,
|
||
"files_reviewed": 0})
|
||
sys.exit(0)
|
||
|
||
debug_log(f"Commit review: {resolved}/{len(shas)} sha(s) resolved, "
|
||
f"{len(diff_files)} files")
|
||
if not diff_files:
|
||
debug_log("Commit review: no reviewable source files in commit")
|
||
emit_metrics({"skipped": True, "skip_reason": 30, **_base})
|
||
sys.exit(0)
|
||
|
||
# Large commits (initial scaffolds, big refactors) used to bail here with
|
||
# skip_reason=31. Large multi-file changes are exactly where
|
||
# cross-file source→sink vulns hide. Reviewing nothing is
|
||
# worse than reviewing the riskiest 30 — _cap_files_for_prompt already
|
||
# bounds total bytes downstream so this can't blow context.
|
||
# `diff_files_dropped` lets telemetry measure how often the prioritizer engages
|
||
# and how much it drops; skip_reason=31 is now reserved for the truly
|
||
# pathological case (e.g. >300 source files — almost certainly a bad
|
||
# baseline, not a real commit).
|
||
if len(diff_files) > 10 * MAX_DIFF_FILES:
|
||
debug_log(f"Commit review: pathological diff ({len(diff_files)} files), skipping")
|
||
emit_metrics({"skipped": True, "skip_reason": 31, **_base,
|
||
"diff_files_count": len(diff_files)})
|
||
sys.exit(0)
|
||
diff_files, _dropped = _prioritize_diff_files(diff_files, MAX_DIFF_FILES)
|
||
if _dropped:
|
||
debug_log(f"Commit review: prioritized to {len(diff_files)} files "
|
||
f"(dropped {_dropped} lower-risk)")
|
||
_base = {**_base, "diff_files_dropped": _dropped}
|
||
|
||
# Rolling-hour rate limit on LLM spend, so only burn a slot once we know
|
||
# we'll actually call analyze_code_security — skip 28/30/31/33 above are
|
||
# free. `rate_count` is emitted on every fire (not just rejections) so
|
||
# telemetry can show how close to the cap sessions run.
|
||
_allowed, _rate_n = atomic_check_rate_limit(
|
||
session_id, "CommitReview",
|
||
MAX_COMMIT_REVIEWS_PER_HOUR, COMMIT_REVIEW_RATE_WINDOW_S)
|
||
_base = {**_base, "rate_count": _rate_n}
|
||
if not _allowed:
|
||
debug_log("Commit review: hourly rate limit reached, skipping")
|
||
emit_metrics({"skipped": True, "skip_reason": 23, **_base})
|
||
sys.exit(0)
|
||
|
||
# Read previous_findings for dedup (shared with Stop hook)
|
||
import time as _time
|
||
now = _time.time()
|
||
|
||
def _read_previous(state):
|
||
findings_ts = state.get("previous_findings_ts", 0)
|
||
if (now - findings_ts) > PREVIOUS_FINDINGS_TTL_SEC:
|
||
return []
|
||
return list(state.get("previous_findings", []))
|
||
|
||
previous_findings = with_locked_state(session_id, _read_previous) or []
|
||
|
||
review_start = _time.time()
|
||
|
||
agentic_metrics: Dict[str, Any] = {}
|
||
if _agentic_commit_review_enabled():
|
||
rel_touched = [fp for fp, _ in diff_files]
|
||
concrete_guidance, vulns, _am = _agentic_review_with_race(
|
||
repo_root, diff_files, rel_touched, previous_findings
|
||
)
|
||
agentic_metrics.update(_am)
|
||
# Fall back to single-shot only on agentic FAILURE (SDK/investigate
|
||
# crash). If agentic completed and returned 0 findings, trust that.
|
||
if agentic_metrics.get("agentic_fallback"):
|
||
concrete_guidance, vulns = analyze_code_security(
|
||
diff_files, is_diff=True, previous_findings=previous_findings
|
||
)
|
||
else:
|
||
concrete_guidance, vulns = analyze_code_security(
|
||
diff_files, is_diff=True, previous_findings=previous_findings
|
||
)
|
||
|
||
# push-sweep state: record this commit as reviewed (full 40-hex sha) so a
|
||
# later `git push` can advance its diff base past it. Recorded here — after
|
||
# the review ran but before any exit path — so it's marked regardless of
|
||
# whether findings were emitted. `shas` holds abbreviated refs from
|
||
# `[branch sha]`; resolve to full so set-membership in the push-sweep is
|
||
# exact. Best-effort; failures here never block the review result.
|
||
try:
|
||
full_shas = []
|
||
for s in shas:
|
||
r = subprocess.run(
|
||
[*GIT_CMD, "rev-parse", "--verify", "-q", s],
|
||
cwd=repo_root, capture_output=True, text=True, timeout=5,
|
||
)
|
||
if r.returncode == 0:
|
||
full_shas.append(r.stdout.strip())
|
||
_append_reviewed_shas(repo_root, full_shas, vulns_found=len(vulns or []))
|
||
except Exception:
|
||
pass
|
||
|
||
review_ms = int((_time.time() - review_start) * 1000)
|
||
# `survived` is the raw self-refute count BEFORE the high/critical-only
|
||
# severity filter; `survived_after_sev` is the count the user actually
|
||
# sees. Include `survived_after_sev` ONLY when the filter actually
|
||
# dropped candidates — otherwise it's redundant with `survived` and eats
|
||
# into CC's 10-key emit cap, pushing files_reviewed/review_ms out of the
|
||
# emitted metrics.
|
||
#
|
||
# CC accepts only booleans and finite numbers as metric values.
|
||
# A null or string value makes CC discard the ENTIRE dict, so:
|
||
# - candidates/survived are omitted when None (early-return at
|
||
# candidates==0, or any fallback path)
|
||
# - agentic_fallback is mapped to an int reason code; the string detail
|
||
# stays in debug_log for diagnosis
|
||
_sev_raw = agentic_metrics.get("survived")
|
||
_sev_post = agentic_metrics.get("survived_after_sev")
|
||
_cand = agentic_metrics.get("candidates")
|
||
_fb = agentic_metrics.get("agentic_fallback")
|
||
# 1 = SDK import failed (claude_agent_sdk not installed)
|
||
# 2 = investigate stage failed (CLI/network/model error or schema-retry exhausted)
|
||
_fb_code = (1 if _fb and _fb.startswith("import:") else 2) if _fb else None
|
||
_race = agentic_metrics.get("race_winner")
|
||
_agentic_m = (
|
||
# `agentic` = which path produced the result, not which was attempted.
|
||
# On race-loss the _fallback() metrics dict has agentic=False — emitting
|
||
# True there blends the high-find-rate single-shot race-loss bucket into
|
||
# `agentic=true` queries and overstates agentic yield.
|
||
{"agentic": bool(agentic_metrics.get("agentic")),
|
||
**({"candidates": _cand} if _cand is not None else {}),
|
||
**({"survived": _sev_raw} if _sev_raw is not None else {}),
|
||
**({"survived_after_sev": _sev_post}
|
||
if _sev_post is not None and _sev_post != _sev_raw else {}),
|
||
**({"agentic_fallback": _fb_code} if _fb_code is not None else {}),
|
||
# 1 = agentic won, 2 = single-shot fallback won. review_ms already
|
||
# captures timing; race_winner lets telemetry segment recall by which path
|
||
# actually produced the result.
|
||
**({"race_winner": _race} if _race is not None else {})}
|
||
if agentic_metrics.get("agentic") or _fb or _race is not None
|
||
else {}
|
||
)
|
||
|
||
if not concrete_guidance:
|
||
debug_log("Commit review: no security issues found")
|
||
emit_metrics({
|
||
"vulns_found": 0, **_base, **_agentic_m,
|
||
"files_reviewed": len(diff_files), "review_ms": review_ms,
|
||
**({
|
||
"api_error": llm._last_call_claude_http_error
|
||
} if llm._last_call_claude_http_error is not None else {}),
|
||
})
|
||
sys.exit(0)
|
||
|
||
# Late dedup: drop only what a concurrent Stop hook wrote while our LLM
|
||
# ran. Anything in `previous_findings` (the pre-LLM snapshot) that the
|
||
# LLM chose to re-flag is an intentional "fix incomplete" verdict.
|
||
new_vulns, n_deduped = _dedup_against_state(
|
||
session_id, vulns, prompted=_finding_keys(previous_findings)
|
||
)
|
||
|
||
if not new_vulns:
|
||
debug_log("Commit review: all findings already known, skipping")
|
||
emit_metrics({
|
||
"vulns_found": 0, **_base, **_agentic_m, "deduped": n_deduped,
|
||
"files_reviewed": len(diff_files), "review_ms": review_ms,
|
||
})
|
||
sys.exit(0)
|
||
|
||
# Record new findings into shared state. Key on (filePath, category) —
|
||
# vulnerableCode bytes drift between fires (diff context lines shift) so
|
||
# matching on it under-dedupes; this aligns with Stop's _record_fire.
|
||
finding_snapshots = [
|
||
{
|
||
"filePath": v.get("filePath", ""),
|
||
"category": v.get("category", "Unknown"),
|
||
"vulnerableCode": v.get("vulnerableCode", ""),
|
||
}
|
||
for v in new_vulns
|
||
]
|
||
|
||
def _record_findings(state):
|
||
existing = [f for f in state.get("previous_findings", []) if isinstance(f, dict)]
|
||
seen = {(f.get("filePath", ""), f.get("category", "")) for f in existing}
|
||
for f in finding_snapshots:
|
||
key = (f["filePath"], f["category"])
|
||
if key not in seen:
|
||
seen.add(key)
|
||
existing.append(f)
|
||
state["previous_findings"] = existing
|
||
state["previous_findings_ts"] = _time.time()
|
||
with_locked_state(session_id, _record_findings)
|
||
|
||
sev = {"critical": 0, "high": 0, "medium": 0}
|
||
for v in new_vulns:
|
||
s = v.get("severity", "medium")
|
||
if s in sev:
|
||
sev[s] += 1
|
||
|
||
emit_metrics({
|
||
"vulns_found": len(new_vulns), **_base, **_agentic_m,
|
||
"critical_count": sev["critical"], "high_count": sev["high"],
|
||
"files_reviewed": len(diff_files), "review_ms": review_ms,
|
||
**({"deduped": n_deduped} if n_deduped else {}),
|
||
}, rewake_summary=_format_vulns_summary(new_vulns, prefix="Commit security review found"))
|
||
|
||
# Rebuild guidance from new_vulns only — concrete_guidance from the LLM
|
||
# still lists deduped entries.
|
||
sys.stderr.write(PROVENANCE_BANNER + "\n\n"
|
||
+ _format_vulns_guidance(new_vulns)
|
||
+ CONTINUATION_SUFFIX + "\n")
|
||
sys.exit(2)
|
||
|
||
def handle_push_sweep_posttooluse(input_data):
|
||
"""Review the just-pushed range as one diff, advancing the base past the
|
||
contiguous prefix of already-per-commit-reviewed shas.
|
||
|
||
Spec: review `git diff B..HEAD` where `B` is the newest commit such that
|
||
`prev_upstream..B` is entirely in `.git/sg-reviewed-shas`. Skip if
|
||
`B == HEAD`. Mark `B..HEAD` reviewed afterward.
|
||
|
||
Diff and Read are both at HEAD (push doesn't move the working tree), so the
|
||
agentic reviewer sees a consistent view — a vuln introduced in commit A and
|
||
removed in commit B is absent from the net diff by construction. Any
|
||
reviewed commits in the tail (after the first unreviewed one) are included
|
||
in the diff; their findings are dropped by `_dedup_against_state` against
|
||
`previous_findings` the per-commit hook already recorded.
|
||
|
||
Metrics: `push_sweep: True` is the telemetry splitter; `pushed`/`unreviewed`/
|
||
`prefix_advanced` give the funnel; skip_reasons 40-49 are reserved for
|
||
this surface.
|
||
"""
|
||
tool_input = input_data.get("tool_input", {}) or {}
|
||
tool_response = input_data.get("tool_response", {}) or {}
|
||
command = tool_input.get("command", "") or ""
|
||
cwd = input_data.get("cwd")
|
||
session_id = input_data.get("session_id", "")
|
||
bash_output = (
|
||
(tool_response.get("stdout", "") or "")
|
||
+ "\n"
|
||
+ (tool_response.get("stderr", "") or "")
|
||
)
|
||
interrupted = tool_response.get("interrupted", False)
|
||
|
||
# Re-gate: hooks.json `if` matched, but confirm with the broader regex
|
||
# (defensive — `git -C`/`-c` forms won't reach here via the hooks.json
|
||
# prefix matcher alone, but a compound with a plain `git push` would).
|
||
if not _GIT_PUSH_RE.search(command):
|
||
sys.exit(0)
|
||
|
||
_base = {"push_sweep": True, "push_sweep_on": PUSH_SWEEP_ENABLED}
|
||
|
||
if not PUSH_SWEEP_ENABLED:
|
||
emit_metrics({"skipped": True, "skip_reason": 40, **_base})
|
||
sys.exit(0)
|
||
if interrupted:
|
||
emit_metrics({"skipped": True, "skip_reason": 21, **_base})
|
||
sys.exit(0)
|
||
if not ENABLE_CODE_SECURITY_REVIEW or not HAS_API_CREDENTIALS:
|
||
emit_metrics({"skipped": True, "skip_reason": 22, **_base})
|
||
sys.exit(0)
|
||
if not cwd:
|
||
emit_metrics({"skipped": True, "skip_reason": 25, **_base})
|
||
sys.exit(0)
|
||
repo_root = _git_toplevel(cwd)
|
||
if not repo_root:
|
||
emit_metrics({"skipped": True, "skip_reason": 26, **_base})
|
||
sys.exit(0)
|
||
|
||
# Guard: the sweep diffs `base..HEAD` and the agent Reads the working
|
||
# tree, so the pushed ref MUST be HEAD or the review is of the wrong
|
||
# range. `git push origin other` while checked out elsewhere, or a
|
||
# multi-ref push, are skipped (skip_reason 44). Check the new-tip from
|
||
# the `abc..def local -> remote` line against HEAD.
|
||
#
|
||
# Scope range-line detection to the push section of bash_output: a chained
|
||
# `git fetch && git push` produces fetch range lines that the regex would
|
||
# otherwise match too, false-tripping multi-ref. `_push_section` slices
|
||
# forward from the last `To <remote>` header.
|
||
#
|
||
# If there are no range lines, we MUST also see a positive push-success
|
||
# signal (`* [new branch]` or `Everything up-to-date`) AND verify the
|
||
# pushed local ref resolves to HEAD before falling through to the
|
||
# @{u}@{1}/merge-base detection. Without this, two real cases misdirect
|
||
# the sweep: `git push origin feature2` while on `feature1` (no range
|
||
# line, no HEAD check → reviews wrong branch and poisons reviewed-shas),
|
||
# and rejected pushes (no range line, no `interrupted` signal → reviews
|
||
# unpushed local commits and marks them reviewed). skip_reason=46 covers
|
||
# both.
|
||
head = None
|
||
try:
|
||
r = subprocess.run([*GIT_CMD, "rev-parse", "HEAD"], cwd=repo_root,
|
||
capture_output=True, text=True, timeout=5)
|
||
head = r.stdout.strip() if r.returncode == 0 else None
|
||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||
pass
|
||
push_section = _push_section(bash_output or "")
|
||
range_matches = list(_PUSH_RANGE_RE.finditer(push_section))
|
||
if range_matches and head:
|
||
# Multi-ref push (multiple range lines) or pushed-tip ≠ HEAD → skip.
|
||
if len(range_matches) > 1:
|
||
emit_metrics({"skipped": True, "skip_reason": 44, **_base})
|
||
sys.exit(0)
|
||
new_tip = range_matches[0].group(2)
|
||
if not head.startswith(new_tip):
|
||
debug_log(f"Push sweep: pushed tip {new_tip} != HEAD {head[:12]}")
|
||
emit_metrics({"skipped": True, "skip_reason": 44, **_base})
|
||
sys.exit(0)
|
||
elif head:
|
||
# No range lines. Need a positive push-success signal — otherwise
|
||
# the push may have failed and we'd review unpushed local commits.
|
||
new_branch_matches = re.findall(
|
||
r"^\s*\*\s+\[new branch\]\s+(\S+)\s+->\s+\S+",
|
||
push_section, re.M)
|
||
up_to_date = "Everything up-to-date" in push_section
|
||
# `git push -q` suppresses all output on success. Distinguish quiet-
|
||
# success from a failed push (which has error text) by checking the
|
||
# upstream's reflog: a successful push leaves @{u}@{1} (the prior
|
||
# value) different from @{u} (now equal to HEAD). A rejected push
|
||
# would not advance @{u}, so this signal is push-specific.
|
||
quiet_success = False
|
||
if not (bash_output or "").strip() and not interrupted:
|
||
try:
|
||
r_cur = subprocess.run(
|
||
[*GIT_CMD, "rev-parse", "--verify", "-q", "@{u}"],
|
||
cwd=repo_root, capture_output=True, text=True, timeout=5)
|
||
r_prev = subprocess.run(
|
||
[*GIT_CMD, "rev-parse", "--verify", "-q", "@{u}@{1}"],
|
||
cwd=repo_root, capture_output=True, text=True, timeout=5)
|
||
cur = r_cur.stdout.strip() if r_cur.returncode == 0 else ""
|
||
prev_u = r_prev.stdout.strip() if r_prev.returncode == 0 else ""
|
||
quiet_success = bool(cur and prev_u and cur == head and prev_u != cur)
|
||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||
pass
|
||
if not (new_branch_matches or up_to_date or quiet_success):
|
||
debug_log("Push sweep: no push-success signal in bash output")
|
||
emit_metrics({"skipped": True, "skip_reason": 46, **_base})
|
||
sys.exit(0)
|
||
# `* [new branch] local -> remote`: verify the pushed local ref
|
||
# resolves to HEAD. `git push origin feature2` while on feature1
|
||
# would otherwise review feature1's commits and poison its
|
||
# reviewed-shas state.
|
||
for local_ref in new_branch_matches:
|
||
try:
|
||
r = subprocess.run(
|
||
[*GIT_CMD, "rev-parse", "--verify", "-q", local_ref],
|
||
cwd=repo_root, capture_output=True, text=True, timeout=5,
|
||
)
|
||
local_sha = r.stdout.strip() if r.returncode == 0 else ""
|
||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||
local_sha = ""
|
||
if local_sha and local_sha != head:
|
||
debug_log(f"Push sweep: new-branch {local_ref} ({local_sha[:12]}) != HEAD {head[:12]}")
|
||
emit_metrics({"skipped": True, "skip_reason": 44, **_base})
|
||
sys.exit(0)
|
||
|
||
prev_upstream = _detect_prev_upstream(repo_root, bash_output)
|
||
if not prev_upstream:
|
||
debug_log("Push sweep: could not determine prev_upstream")
|
||
emit_metrics({"skipped": True, "skip_reason": 41, **_base})
|
||
sys.exit(0)
|
||
|
||
push_range = _git_rev_list_range(repo_root, prev_upstream, "HEAD")
|
||
if not push_range:
|
||
emit_metrics({"skipped": True, "skip_reason": 42, **_base, "pushed": 0})
|
||
sys.exit(0)
|
||
if len(push_range) > MAX_PUSH_SWEEP_RANGE:
|
||
# Huge first-push of a long-lived branch — Stop hook is the backstop.
|
||
emit_metrics({"skipped": True, "skip_reason": 43, **_base,
|
||
"pushed": len(push_range)})
|
||
sys.exit(0)
|
||
|
||
reviewed = _load_reviewed_shas(repo_root)
|
||
base, tail = _compute_push_sweep_base(prev_upstream, push_range, reviewed)
|
||
prefix_advanced = len(push_range) - len(tail)
|
||
if base is None:
|
||
debug_log("Push sweep: every pushed commit already reviewed")
|
||
emit_metrics({**_base, "pushed": len(push_range), "unreviewed": 0,
|
||
"prefix_advanced": prefix_advanced})
|
||
sys.exit(0)
|
||
|
||
debug_log(f"Push sweep: range={len(push_range)} prefix_advanced="
|
||
f"{prefix_advanced} base={base[:12]} tail={len(tail)}")
|
||
|
||
diff_text = _git_diff_range(repo_root, base, "HEAD")
|
||
if diff_text is None:
|
||
# Diff failed (non-zero exit / 30s timeout / git missing). Do NOT
|
||
# mark `tail` reviewed — we did not actually review it. Marking
|
||
# them would silently advance the prefix past unreviewed commits
|
||
# forever (the whole point of push-sweep is to catch outside-CC
|
||
# commits, and a 50-commit range over large files can hit the
|
||
# 30s timeout). skip_reason=45 lets a retry / smaller subsequent
|
||
# push still cover them, mirroring how skip_reason=31 handles
|
||
# too-many-files without recording the tail.
|
||
emit_metrics({**_base, "pushed": len(push_range),
|
||
"unreviewed": len(tail), "skip_reason": 45})
|
||
sys.exit(0)
|
||
diff_files = parse_diff_into_files(diff_text)
|
||
if not diff_files:
|
||
emit_metrics({**_base, "pushed": len(push_range),
|
||
"unreviewed": len(tail), "skip_reason": 30})
|
||
# Still mark tail reviewed — there's nothing to review.
|
||
_append_reviewed_shas(repo_root, tail, vulns_found=0)
|
||
sys.exit(0)
|
||
# Same prioritize-don't-bail logic as commit-review (see comment there).
|
||
# push-sweep ranges are net diffs over many commits so they hit the cap
|
||
# more often; reviewing the riskiest MAX_PUSH_SWEEP_FILES is strictly
|
||
# better than reviewing none. We still mark `tail` reviewed afterward —
|
||
# the dropped files are by construction the low-risk ones (config, .gen,
|
||
# tests, migrations), and NOT advancing the base would make the next
|
||
# push re-hit the same overflow with an even larger range. Per-commit
|
||
# review remains the primary surface for those files. The 10×
|
||
# pathological guard stays so a 500-file vendored-dir push doesn't burn
|
||
# a counter slot.
|
||
if len(diff_files) > 10 * MAX_PUSH_SWEEP_FILES:
|
||
emit_metrics({**_base, "pushed": len(push_range),
|
||
"unreviewed": len(tail), "skip_reason": 31,
|
||
"diff_files_count": len(diff_files)})
|
||
sys.exit(0)
|
||
diff_files, _dropped = _prioritize_diff_files(diff_files, MAX_PUSH_SWEEP_FILES)
|
||
if _dropped:
|
||
_base = {**_base, "diff_files_dropped": _dropped}
|
||
|
||
_allowed, _rate_n = atomic_check_rate_limit(
|
||
session_id, "PushSweep",
|
||
MAX_COMMIT_REVIEWS_PER_HOUR, COMMIT_REVIEW_RATE_WINDOW_S)
|
||
_base = {**_base, "rate_count": _rate_n}
|
||
if not _allowed:
|
||
emit_metrics({"skipped": True, "skip_reason": 23, **_base})
|
||
sys.exit(0)
|
||
|
||
import time as _time
|
||
now = _time.time()
|
||
previous_findings = with_locked_state(
|
||
session_id,
|
||
lambda s: list(s.get("previous_findings", []))
|
||
if (now - s.get("previous_findings_ts", 0)) <= PREVIOUS_FINDINGS_TTL_SEC
|
||
else []
|
||
) or []
|
||
|
||
review_start = _time.time()
|
||
rel_touched = [fp for fp, _ in diff_files]
|
||
if _agentic_commit_review_enabled():
|
||
concrete_guidance, vulns, agentic_metrics = _agentic_review_with_race(
|
||
repo_root, diff_files, rel_touched, previous_findings
|
||
)
|
||
if agentic_metrics.get("agentic_fallback"):
|
||
concrete_guidance, vulns = analyze_code_security(
|
||
diff_files, is_diff=True, previous_findings=previous_findings
|
||
)
|
||
else:
|
||
concrete_guidance, vulns = analyze_code_security(
|
||
diff_files, is_diff=True, previous_findings=previous_findings
|
||
)
|
||
agentic_metrics = {}
|
||
review_ms = int((_time.time() - review_start) * 1000)
|
||
|
||
# The tail is now covered by this net-diff review.
|
||
_append_reviewed_shas(repo_root, tail, vulns_found=len(vulns or []))
|
||
|
||
new_vulns, n_deduped = _dedup_against_state(
|
||
session_id, vulns or [], prompted=_finding_keys(previous_findings)
|
||
)
|
||
|
||
# Metrics — keep within the 10-key cap; agentic sub-metrics are dropped
|
||
# here in favour of the push-sweep funnel keys (telemetry can join on session_id
|
||
# to the per-commit fires for agentic detail). rewake_summary must ride
|
||
# this line (CC reads only the first {-prefixed stdout line); it's a
|
||
# no-op when new_vulns is empty since we exit 0 below.
|
||
emit_metrics({
|
||
**_base, "pushed": len(push_range), "unreviewed": len(tail),
|
||
"prefix_advanced": prefix_advanced, "vulns_found": len(new_vulns),
|
||
"files_reviewed": len(diff_files), "review_ms": review_ms,
|
||
**({"deduped": n_deduped} if n_deduped else {}),
|
||
}, rewake_summary=_format_vulns_summary(new_vulns, prefix="Push security review found"))
|
||
|
||
if not new_vulns:
|
||
debug_log("Push sweep: no new findings")
|
||
sys.exit(0)
|
||
|
||
# First-push of a big branch can surface many findings at once across
|
||
# week-old code. Report only the top-N by severity so the asyncRewake
|
||
# isn't a wall of text; the rest go to telemetry (vulns_found is the
|
||
# full count) and into previous_findings so Stop / next commit-review
|
||
# don't re-flag them. Stable sort: severity, then category for
|
||
# determinism in tests.
|
||
_sev_rank = {"critical": 0, "high": 1, "medium": 2, "low": 3}
|
||
new_vulns.sort(key=lambda v: (_sev_rank.get(v.get("severity", "medium"), 2),
|
||
v.get("category", "")))
|
||
reported = new_vulns[:PUSH_SWEEP_REPORT_CAP]
|
||
n_suppressed = len(new_vulns) - len(reported)
|
||
|
||
# Record only the REPORTED findings into shared state. previous_findings
|
||
# means "the user was told about this — don't repeat it"; suppressed
|
||
# findings were NOT told, so recording them would silently bury them
|
||
# against any future commit-review/Stop that touches the same code. The
|
||
# range is marked reviewed in `.git/sg-reviewed-shas` regardless, so the
|
||
# push-sweep itself won't re-find them; leaving them out of
|
||
# previous_findings keeps the door open for the per-commit hook to
|
||
# surface them later if the code is touched again.
|
||
snapshots = [
|
||
{"filePath": v.get("filePath", ""),
|
||
"category": v.get("category", "Unknown"),
|
||
"vulnerableCode": v.get("vulnerableCode", "")}
|
||
for v in reported
|
||
]
|
||
def _record(state):
|
||
existing = [f for f in state.get("previous_findings", [])
|
||
if isinstance(f, dict)]
|
||
seen = {(f.get("filePath", ""), f.get("category", "")) for f in existing}
|
||
for f in snapshots:
|
||
k = (f["filePath"], f["category"])
|
||
if k not in seen:
|
||
seen.add(k); existing.append(f)
|
||
state["previous_findings"] = existing
|
||
state["previous_findings_ts"] = _time.time()
|
||
with_locked_state(session_id, _record)
|
||
|
||
# Prefer the LLM's formatted guidance (richer context, fix suggestions)
|
||
# when NOTHING was dropped from the LLM's full vuln list; fall back to
|
||
# re-formatting from `reported` whenever either the cap suppressed
|
||
# findings OR `_dedup_against_state` dropped findings the user has
|
||
# already been shown. concrete_guidance is built against the LLM's
|
||
# full pre-dedup list, so leaking it past dedup re-surfaces findings
|
||
# the per-commit hook already reported (the [✓1, ✗2, ✓3] case where
|
||
# the tail reviewed commits' findings are in previous_findings).
|
||
if n_suppressed or n_deduped:
|
||
guidance = _format_vulns_guidance(reported) or ""
|
||
else:
|
||
guidance = concrete_guidance or _format_vulns_guidance(reported) or ""
|
||
sys.stderr.write(
|
||
PROVENANCE_BANNER + "\n\n" + guidance + CONTINUATION_SUFFIX + "\n"
|
||
)
|
||
sys.exit(2)
|
||
|
||
def handle_stop_hook(input_data):
|
||
"""
|
||
Handle the Stop hook — final security check using git diff.
|
||
Diffs against the baseline SHA captured at UserPromptSubmit to review
|
||
only code changed during this turn. Runs two Haiku analyses and
|
||
exits with code 2 to force Claude to continue and fix issues.
|
||
|
||
Also sweeps pending pattern warnings to emit a session-level
|
||
fixed/unresolved tally; the sweep needs no LLM and measures
|
||
pattern-rule efficacy.
|
||
"""
|
||
session_id = input_data.get("session_id", "default")
|
||
stop_hook_active = input_data.get("stop_hook_active", False)
|
||
cwd = input_data.get("cwd", "")
|
||
|
||
# Recursion guard FIRST — consume_stop_state clears touched_paths, and CC
|
||
# sets stop_hook_active session-wide while any asyncRewake Stop is in
|
||
# flight, so a concurrent active=True fire winning the lock would discard
|
||
# paths the concurrent active=False fire needs.
|
||
if stop_hook_active:
|
||
debug_log("Stop hook: stop_hook_active=True, skipping to avoid recursion")
|
||
emit_metrics({"skipped": True, "skip_reason": 1, "diff_strategy_v2": True})
|
||
sys.exit(0)
|
||
|
||
# Snapshot all state under one lock BEFORE any slow work (sweep file I/O,
|
||
# git, network). asyncRewake Stop runs in the background; the next turn's
|
||
# UPS/PostToolUse can fire while we're still here. The snapshot is immune
|
||
# to those writes — they affect the NEXT Stop fire's snapshot.
|
||
snap = consume_stop_state(session_id)
|
||
fire_count = snap["fire_count"]
|
||
touched_paths = snap["touched_paths"]
|
||
baseline_sha = snap["baseline_sha"]
|
||
snap_baseline = baseline_sha # pre-reassignment value for restore-on-transient-skip
|
||
head_at_capture = snap["head_at_capture"]
|
||
untracked_at_baseline = snap.get("untracked_at_baseline") or {}
|
||
previous_findings = snap["previous_findings"]
|
||
|
||
# Sweep pattern-warning outcomes (pure local work; stop_hook_active is
|
||
# already guaranteed False here so no double-count guard needed).
|
||
sweep = {}
|
||
warn_fixed, warn_unresolved, warn_unresolved_mask = sweep_pending_warnings(session_id)
|
||
if warn_fixed or warn_unresolved:
|
||
sweep = {
|
||
"warn_fixed": warn_fixed,
|
||
"warn_unresolved": warn_unresolved,
|
||
"warn_unresolved_mask": warn_unresolved_mask,
|
||
}
|
||
|
||
v2_metrics = {}
|
||
|
||
def _skip(reason, restore=False, **extra):
|
||
if restore:
|
||
restore_unreviewed_stop_state(session_id, touched_paths, snap_baseline)
|
||
# CC truncates metrics to 10 keys by
|
||
# insertion order. v2_metrics (3) must precede sweep (3) so the v2
|
||
# diagnostics survive when extra adds touched_paths_count + ip_* keys.
|
||
emit_metrics({
|
||
"skipped": True, "skip_reason": reason, "fire_index": fire_count + 1,
|
||
"diff_strategy_v2": True,
|
||
**v2_metrics, **extra, **sweep,
|
||
})
|
||
sys.exit(0)
|
||
|
||
# Limit stop hook firings per asyncRewake loop to prevent infinite loops.
|
||
# fire_count auto-expires after STOP_LOOP_STATE_TTL_SEC so a stale count
|
||
# from a prior turn doesn't block this one.
|
||
if MAX_STOP_HOOK_FIRINGS > 0 and fire_count >= MAX_STOP_HOOK_FIRINGS:
|
||
debug_log(f"Stop hook: already fired {fire_count} times (max {MAX_STOP_HOOK_FIRINGS}), skipping")
|
||
_skip(2)
|
||
|
||
if not ENABLE_CODE_SECURITY_REVIEW or not HAS_API_CREDENTIALS:
|
||
debug_log("Stop hook: LLM review disabled or no API credentials")
|
||
_skip(3)
|
||
|
||
# Stop-hook-only kill switch — placed after consume_stop_state so
|
||
# touched_paths is still cleared each turn (a disabled Stop hook that
|
||
# never consumed state would accumulate stale paths) and after the sweep
|
||
# so pattern-warning efficacy metrics still emit. The commit/push reviews
|
||
# have their own gates (ENABLE_COMMIT_REVIEW / ENABLE_CODE_SECURITY_REVIEW).
|
||
if not ENABLE_STOP_REVIEW:
|
||
debug_log("Stop hook: ENABLE_STOP_REVIEW=0")
|
||
# 50+ for opt-out skips that aren't push-sweep (which owns 40-49).
|
||
_skip(50)
|
||
|
||
if not ensure_anthropic_reachable():
|
||
debug_log("Stop hook: api.anthropic.com unreachable")
|
||
_skip(10, restore=True)
|
||
|
||
if not cwd:
|
||
debug_log("Stop hook: no cwd")
|
||
_skip(4)
|
||
|
||
review_paths, diff_base, repo_root, untracked, v2_metrics = compute_v2_review_set(
|
||
cwd, baseline_sha, head_at_capture, untracked_at_baseline
|
||
)
|
||
if not review_paths:
|
||
debug_log("Stop hook: empty review set")
|
||
_skip(9, touched_paths_count=len(touched_paths))
|
||
debug_log(f"Stop hook: review_set={len(review_paths)} base={diff_base[:12]} dirty_now={v2_metrics['dirty_now_count']} changed_since={v2_metrics['changed_since_count']}")
|
||
# Run from repo_root so the toplevel-relative review_paths resolve.
|
||
# Diff CONTENT against the turn-start stash (baseline_sha) so the LLM
|
||
# sees only this-turn edits — diffing against HEAD includes the user's
|
||
# pre-turn uncommitted WIP, which inflates review_ms and can re-flag
|
||
# the same pre-existing pattern every turn. The file LIST still comes
|
||
# from git state (compute_v2_review_set), so Bash/subagent edits are
|
||
# caught either way. Fall back to diff_base (HEAD/head_at_capture)
|
||
# when the stash is missing or pruned.
|
||
content_base = baseline_sha or diff_base
|
||
diff_output = get_git_diff(repo_root, content_base, full_context=False,
|
||
paths=review_paths, untracked_paths=untracked)
|
||
if diff_output is None and content_base != diff_base:
|
||
debug_log(f"Stop hook: diff against {content_base[:12]} failed — falling back to {diff_base}")
|
||
diff_output = get_git_diff(repo_root, diff_base, full_context=False,
|
||
paths=review_paths, untracked_paths=untracked)
|
||
# filter_preexisting_from_diff needs a resolvable pre-turn ref; fall
|
||
# back to HEAD when UPS never captured a baseline (print mode).
|
||
if not baseline_sha:
|
||
baseline_sha = "HEAD"
|
||
|
||
if not diff_output or not diff_output.strip():
|
||
debug_log("Stop hook: no changes since baseline")
|
||
_skip(6)
|
||
|
||
# Parse diff into per-file content
|
||
diff_files = parse_diff_into_files(diff_output)
|
||
if not diff_files:
|
||
debug_log("Stop hook: no source code files in diff")
|
||
_skip(7)
|
||
|
||
# Mirror commit-review: hard-bail only on pathological diffs (>300 files,
|
||
# usually a bad baseline), otherwise prioritize by security-risk path
|
||
# tokens and review the top MAX_DIFF_FILES. Stop is the only surface for
|
||
# uncommitted edits; the old hard-skip at >30 files dropped the 31-300
|
||
# bucket entirely, which is where cross-file source→sink vulns hide.
|
||
# _cap_files_for_prompt already bounds bytes downstream.
|
||
_stop_dropped = 0
|
||
if len(diff_files) > 10 * MAX_DIFF_FILES:
|
||
debug_log(f"Stop hook: pathological diff ({len(diff_files)} files > "
|
||
f"{10 * MAX_DIFF_FILES}), skipping")
|
||
_skip(8, diff_files_count=len(diff_files))
|
||
if len(diff_files) > MAX_DIFF_FILES:
|
||
diff_files, _stop_dropped = _prioritize_diff_files(
|
||
diff_files, MAX_DIFF_FILES)
|
||
debug_log(f"Stop hook: prioritized to {len(diff_files)} files "
|
||
f"(dropped {_stop_dropped} lower-risk)")
|
||
|
||
# Filter out pre-existing content from file rewrites
|
||
diff_files = filter_preexisting_from_diff(diff_files, cwd, baseline_sha)
|
||
|
||
debug_log(f"Stop hook: reviewing {len(diff_files)} changed files (standard diff)")
|
||
|
||
import time as _time
|
||
stop_review_start = _time.time()
|
||
|
||
# Stop hook is single-shot only. Agentic review is wired into
|
||
# handle_commit_review_posttooluse (PostToolUse on `git commit`) — commits
|
||
# are slower-OK and benefit from the deeper context-reading loop.
|
||
concrete_guidance, vulns = analyze_code_security(
|
||
diff_files, is_diff=True, previous_findings=previous_findings
|
||
)
|
||
# NOTE: analyze_security_concerns disabled — it produces too many false positives
|
||
# on pre-existing patterns in starter code. The concrete vulnerability analysis
|
||
# is more precise and has severity filtering (high/critical only).
|
||
|
||
stop_review_elapsed = _time.time() - stop_review_start
|
||
debug_log(f"Stop hook: LLM reviews took {stop_review_elapsed:.1f}s total")
|
||
|
||
review_ms = int(stop_review_elapsed * 1000)
|
||
fire_index = fire_count + 1
|
||
|
||
# Late dedup: drop only what a concurrent commit-review wrote while our
|
||
# LLM ran. Anything already in `previous_findings` (the consume_stop_state
|
||
# snapshot) that the LLM re-flagged is an intentional "fix incomplete"
|
||
# verdict and passes through.
|
||
if vulns:
|
||
vulns, n_deduped = _dedup_against_state(
|
||
session_id, vulns, prompted=_finding_keys(previous_findings)
|
||
)
|
||
if n_deduped and not vulns:
|
||
debug_log("Stop hook: all findings already delivered by commit-review")
|
||
_skip(35, deduped=n_deduped, review_ms=review_ms)
|
||
concrete_guidance = _format_vulns_guidance(vulns)
|
||
|
||
if concrete_guidance:
|
||
finding_snapshots = [
|
||
{
|
||
"filePath": v.get("filePath", ""),
|
||
"category": v.get("category", "Unknown"),
|
||
"vulnerableCode": v.get("vulnerableCode", ""),
|
||
}
|
||
for v in vulns
|
||
]
|
||
# Update baseline so next stop hook iteration only sees new changes
|
||
new_sha = capture_git_baseline(cwd)
|
||
new_untracked_baseline = _list_untracked(cwd) if new_sha else None
|
||
|
||
def _record_fire(state):
|
||
state["stop_hook_fire_count"] = fire_index
|
||
state["stop_hook_fire_count_ts"] = _time.time()
|
||
# Re-read under lock — the commit-review PostToolUse hook may have
|
||
# appended findings since consume_stop_state snapshotted.
|
||
# Dedupe on (filePath, category) — vulnerableCode includes diff
|
||
# context lines that drift between fires, so byte-identical
|
||
# matching let the same finding accumulate as "new" each fire.
|
||
existing = [f for f in state.get("previous_findings", []) if isinstance(f, dict)]
|
||
seen = {(f.get("filePath", ""), f.get("category", "")) for f in existing}
|
||
for f in finding_snapshots:
|
||
key = (f["filePath"], f["category"])
|
||
if key not in seen:
|
||
seen.add(key)
|
||
existing.append(f)
|
||
state["previous_findings"] = existing
|
||
state["previous_findings_ts"] = _time.time()
|
||
if new_sha:
|
||
state["baseline_sha"] = new_sha
|
||
state["untracked_at_baseline"] = new_untracked_baseline
|
||
with_locked_state(session_id, _record_fire)
|
||
|
||
if new_sha:
|
||
debug_log(f"Updated git baseline after stop hook: {new_sha[:12]}")
|
||
|
||
sev = {"critical": 0, "high": 0, "medium": 0}
|
||
for v in vulns:
|
||
s = v.get("severity", "medium")
|
||
if s in sev:
|
||
sev[s] += 1
|
||
# 8 base keys + at most 2 sweep keys = 10 (cap). Drop the mask here.
|
||
# untracked_baseline_n is the signal for whether the UPS-time
|
||
# untracked-snapshot capture actually ran.
|
||
sweep_trimmed = {k: v for k, v in sweep.items() if k != "warn_unresolved_mask"}
|
||
emit_metrics({
|
||
"vulns_found": len(vulns),
|
||
"untracked_baseline_n": len(untracked_at_baseline),
|
||
"diff_strategy_v2": True,
|
||
"critical_count": sev["critical"],
|
||
"high_count": sev["high"],
|
||
"files_reviewed": len(diff_files),
|
||
"touched_paths_count": len(touched_paths),
|
||
"review_ms": review_ms,
|
||
"fire_index": fire_index,
|
||
**({"diff_truncated": llm._last_review_truncated_bytes}
|
||
if llm._last_review_truncated_bytes else {}),
|
||
**sweep_trimmed,
|
||
}, rewake_summary=_format_vulns_summary(vulns))
|
||
|
||
# Exit code 2 with stderr forces Claude to continue and fix
|
||
sys.stderr.write(PROVENANCE_BANNER + "\n\n" + concrete_guidance + CONTINUATION_SUFFIX + "\n")
|
||
sys.exit(2)
|
||
|
||
if llm._last_call_claude_http_error is not None:
|
||
debug_log(f"Stop hook: API call failed with status {llm._last_call_claude_http_error}")
|
||
restore_unreviewed_stop_state(session_id, touched_paths, snap_baseline)
|
||
else:
|
||
debug_log("Stop hook: no security issues found")
|
||
# CC truncates metrics to 10 keys by
|
||
# insertion order. The previous **sweep,**v2_metrics tail meant the 3
|
||
# v2_metrics keys were always sliced off this most-common path, so the
|
||
# diff-strategy diagnostics never reached telemetry. Drop sweep here (it's
|
||
# PostToolUse-warning state, orthogonal to diff-strategy comparison).
|
||
# 6 base + optional api_error + 3 v2_metrics = ≤10.
|
||
emit_metrics({
|
||
"vulns_found": 0,
|
||
"diff_strategy_v2": True,
|
||
"files_reviewed": len(diff_files),
|
||
"touched_paths_count": len(touched_paths),
|
||
"review_ms": review_ms,
|
||
"fire_index": fire_index,
|
||
**({"api_error": llm._last_call_claude_http_error} if llm._last_call_claude_http_error is not None else {}),
|
||
**({"diff_truncated": llm._last_review_truncated_bytes}
|
||
if llm._last_review_truncated_bytes else {}),
|
||
**v2_metrics,
|
||
})
|
||
sys.exit(0)
|
||
|
||
_SDK_BOOTSTRAP_THROTTLE = os.path.join(
|
||
os.environ.get("SECURITY_WARNINGS_STATE_DIR")
|
||
or os.path.expanduser("~/.claude/security"),
|
||
".sdk_bootstrap_spawned")
|
||
|
||
def _maybe_bootstrap_agent_sdk_async():
|
||
"""Fire-and-forget SDK bootstrap, for remote-pod environments.
|
||
|
||
Under CLAUDE_CODE_SYNC_PLUGIN_INSTALL=true (CCR-style remote pods),
|
||
plugins are synced *after* SessionStart fires, so the SessionStart
|
||
`ensure_agent_sdk.py` hook never runs and the agentic commit reviewer
|
||
falls back 100% of the time. A PostToolUse hook firing is itself proof
|
||
the plugin is now registered, so re-trigger the bootstrap here.
|
||
Detached, so the ~17s venv build never blocks the hook — the first
|
||
1-2 commits of a remote session still fall back while it builds, then
|
||
every subsequent commit gets the agentic path. ensure_agent_sdk.py
|
||
is idempotent and O_EXCL-locked, so concurrent/repeat spawns are safe;
|
||
the throttle file only avoids spawning dozens of subprocesses during
|
||
the build window. No-ops in ~10ms on local installs (SDK already
|
||
importable).
|
||
"""
|
||
try:
|
||
import importlib.util
|
||
if importlib.util.find_spec("claude_agent_sdk") is not None:
|
||
return
|
||
import time as _t
|
||
try:
|
||
if _t.time() - os.path.getmtime(_SDK_BOOTSTRAP_THROTTLE) < 300:
|
||
return
|
||
except OSError:
|
||
pass
|
||
os.makedirs(os.path.dirname(_SDK_BOOTSTRAP_THROTTLE), exist_ok=True)
|
||
# Touch the throttle BEFORE spawning so a burst of PostToolUse
|
||
# fires in the same second don't each spawn a subprocess.
|
||
open(_SDK_BOOTSTRAP_THROTTLE, "w").close()
|
||
script = os.path.join(
|
||
os.path.dirname(os.path.abspath(__file__)), "ensure_agent_sdk.py")
|
||
subprocess.Popen(
|
||
[sys.executable, script],
|
||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
||
stdin=subprocess.DEVNULL, start_new_session=True,
|
||
)
|
||
except Exception:
|
||
pass # best-effort; never break the hook over a bootstrap attempt
|
||
|
||
def main():
|
||
"""Main hook function."""
|
||
debug_log(f"Hook called with args: {sys.argv}")
|
||
|
||
# Master kill switch — honors ENABLE_SECURITY_REMINDER=0 (legacy) and
|
||
# SECURITY_GUIDANCE_DISABLE=1 (clearer name, no double negative). Emit
|
||
# empty metrics so asyncRewake hooks (Stop) don't hang waiting for stdout
|
||
# output that never comes.
|
||
if SECURITY_GUIDANCE_DISABLED:
|
||
emit_metrics({"skipped": True, "skip_reason": -1})
|
||
sys.exit(0)
|
||
|
||
# Periodically clean up old state files (10% chance per run)
|
||
if random.random() < 0.1:
|
||
cleanup_old_state_files()
|
||
|
||
# Read input from stdin
|
||
try:
|
||
raw_input = sys.stdin.read()
|
||
input_data = json.loads(raw_input)
|
||
except json.JSONDecodeError as e:
|
||
debug_log(f"JSON decode error: {e}")
|
||
emit_metrics({"skipped": True, "skip_reason": -2})
|
||
sys.exit(0)
|
||
|
||
session_id = input_data.get("session_id", "default")
|
||
tool_name = input_data.get("tool_name", "")
|
||
tool_input = input_data.get("tool_input", {})
|
||
hook_event_name = input_data.get("hook_event_name", "")
|
||
debug_log(f"Processing: hook_event={hook_event_name}, tool={tool_name}")
|
||
|
||
# Load project-specific security guidance and custom patterns once
|
||
# per invocation. Failures are non-fatal (debug-logged) so a malformed
|
||
# config never prevents the built-in checks from running.
|
||
extensibility.load_for_session(input_data.get("cwd"))
|
||
|
||
# Remote-pod SDK-bootstrap rescue: PostToolUse is the earliest hook event
|
||
# that is guaranteed to fire *after* async plugin sync (its firing proves
|
||
# the plugin is registered), so it's where we recover the SessionStart
|
||
# bootstrap that remote pods miss under CLAUDE_CODE_SYNC_PLUGIN_INSTALL.
|
||
# Fires on Edit/Write too (not just Bash), so the venv is usually built
|
||
# before the first `git commit`.
|
||
if hook_event_name == "PostToolUse":
|
||
_maybe_bootstrap_agent_sdk_async()
|
||
|
||
# Handle UserPromptSubmit — capture git baseline
|
||
if hook_event_name == "UserPromptSubmit":
|
||
handle_user_prompt_submit(input_data)
|
||
return
|
||
|
||
# Handle Stop hook — final security check
|
||
if hook_event_name == "Stop":
|
||
handle_stop_hook(input_data)
|
||
return
|
||
|
||
# Handle PostToolUse[Bash] — commit review or push sweep (asyncRewake).
|
||
#
|
||
# hooks.json has two `if` configs under the Bash matcher (`git commit:*`
|
||
# and `git push:*`). CC evaluates each `if` independently and spawns this
|
||
# script ONCE PER MATCH — so `git commit -m x && git push` spawns python
|
||
# twice with the same command string and the same tool_use_id. The python
|
||
# cannot tell which `if` fired it.
|
||
#
|
||
# Routing therefore MUST check commit FIRST so that compound commit+push
|
||
# commands continue to hit commit-review (the pre-existing behaviour) on
|
||
# the commit-matcher invocation. The push-matcher invocation of the SAME
|
||
# compound command is deduped by `_claim_bash_hook_once` below: the second
|
||
# spawn loses the tool_use_id sentinel race and exits early with
|
||
# `bash_hook_dedup`, so commit-review runs exactly once. The alternative —
|
||
# checking push first — would silently DROP commit-review
|
||
# on `git commit && git push`, which is a regression.
|
||
#
|
||
# The push-sweep does NOT run on the compound call. That's acceptable: the
|
||
# just-made commit is recorded by commit-review, so the next standalone
|
||
# push sees it as reviewed and the sweep base advances past it. Older
|
||
# unreviewed commits in the range are caught on that next push.
|
||
if tool_name == "Bash" and hook_event_name == "PostToolUse":
|
||
cmd = (input_data.get("tool_input") or {}).get("command", "") or ""
|
||
if not (_GIT_COMMIT_RE.search(cmd) or _GIT_PUSH_RE.search(cmd)):
|
||
return
|
||
if not _claim_bash_hook_once(input_data):
|
||
# Another spawn for this same tool_use_id already claimed the
|
||
# work (compound matched multiple `if` configs). Emit a single
|
||
# metric so telemetry can count how often the de-dupe kicks in.
|
||
print(json.dumps({"metrics": {"bash_hook_dedup": True}}), flush=True)
|
||
sys.exit(0)
|
||
if _GIT_COMMIT_RE.search(cmd):
|
||
handle_commit_review_posttooluse(input_data)
|
||
elif _GIT_PUSH_RE.search(cmd):
|
||
handle_push_sweep_posttooluse(input_data)
|
||
return
|
||
|
||
# Handle PostToolUse — pattern-based checks only (no LLM review per-edit)
|
||
if tool_name in ["Edit", "Write", "MultiEdit", "NotebookEdit"]:
|
||
file_path = tool_input.get("file_path") or tool_input.get("notebook_path") or ""
|
||
if not file_path:
|
||
sys.exit(0)
|
||
|
||
# Skip plan files
|
||
plans_dir = os.path.expanduser("~/.claude/plans")
|
||
if file_path.startswith(plans_dir):
|
||
sys.exit(0)
|
||
|
||
record_touched_path(session_id, file_path)
|
||
|
||
content = extract_content_from_input(tool_name, tool_input)
|
||
|
||
all_guidance = []
|
||
raw_pattern_matches = []
|
||
if ENABLE_PATTERN_RULES:
|
||
pattern_matches = check_patterns(file_path, content)
|
||
raw_pattern_matches = pattern_matches
|
||
if pattern_matches:
|
||
debug_log(f"Pattern matches for {file_path}: {[r for r, _ in pattern_matches]}")
|
||
|
||
# For Write tool, filter out patterns that existed in the baseline version
|
||
# This prevents flagging pre-existing insecure patterns when Claude rewrites a file
|
||
if tool_name == "Write" and pattern_matches:
|
||
cwd = os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())
|
||
baseline_content = get_baseline_file_content(session_id, file_path, cwd)
|
||
if baseline_content is not None:
|
||
baseline_matches = set(r for r, _ in check_patterns(file_path, baseline_content))
|
||
pattern_matches = [(r, msg) for r, msg in pattern_matches if r not in baseline_matches]
|
||
if pattern_matches:
|
||
debug_log(f"New patterns (not in baseline): {[r for r, _ in pattern_matches]}")
|
||
else:
|
||
debug_log("All patterns existed in baseline, skipping")
|
||
|
||
for rule_name, reminder in pattern_matches:
|
||
warning_key = f"{file_path}-{rule_name}"
|
||
if atomic_check_and_mark_warning(session_id, warning_key):
|
||
all_guidance.append(reminder)
|
||
|
||
# Record matched rules as pending so the Stop-hook sweep can
|
||
# later tally fixed vs unresolved. Only runs when patterns match.
|
||
if pattern_matches:
|
||
record_pending_warnings(session_id, file_path,
|
||
[r for r, _ in pattern_matches])
|
||
|
||
# Emit metrics when raw patterns matched (even if all were baseline-suppressed
|
||
# or dedup'd — pattern_hits reflects warnings actually shown, may be 0).
|
||
# Gate on raw matches so clean edits don't flood the metrics event.
|
||
# rule_id: RuleId of the first raw match (values stay small/enumerable in telemetry)
|
||
# rule_mask: bitmask of ALL raw matches — POPCOUNT gives raw hit count,
|
||
# (mask >> N) & 1 tests for a specific rule
|
||
if raw_pattern_matches:
|
||
raw_names = [r for r, _ in raw_pattern_matches]
|
||
output = {"metrics": {
|
||
"pattern_hits": len(all_guidance),
|
||
# User-defined patterns (rule_name="user:*") have no static
|
||
# RuleId; emit -1 so the metrics pipeline can distinguish.
|
||
"rule_id": int(_RULE_NAME_TO_ID.get(raw_names[0], -1)),
|
||
"rule_mask": rule_names_to_mask(raw_names),
|
||
**({"pv": _PV} if _PV else {}),
|
||
}}
|
||
if all_guidance:
|
||
output["hookSpecificOutput"] = {
|
||
"hookEventName": "PostToolUse",
|
||
"additionalContext": PROVENANCE_TAG + "\n\n" + "\n\n".join(all_guidance),
|
||
}
|
||
print(json.dumps(output))
|
||
elif all_guidance:
|
||
# Defensive: pattern rules disabled but guidance somehow set (shouldn't happen)
|
||
print(json.dumps({
|
||
"hookSpecificOutput": {
|
||
"hookEventName": "PostToolUse",
|
||
"additionalContext": PROVENANCE_TAG + "\n\n" + "\n\n".join(all_guidance),
|
||
}
|
||
}))
|
||
|
||
sys.exit(0)
|
||
|
||
if __name__ == "__main__":
|
||
main()
|