From 009392eee4342644992ddab1feb4649c5d99b84e Mon Sep 17 00:00:00 2001 From: Mohamed Hegazy Date: Mon, 1 Jun 2026 13:22:40 -0700 Subject: [PATCH] =?UTF-8?q?security-guidance:=205=20venv-specific=20err=5F?= =?UTF-8?q?kind=20categories=20+=20stderr=5Fsignature=20bucket=20(2.0.1=20?= =?UTF-8?q?=E2=86=92=202.0.2)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #2112's telemetry visibility surfaced an immediate finding from the first 3h of v2.0.1 data: **2,406 phase=2 / err=99 sessions** — "venv stage / uncategorized" — dominating BUILD_FAILED. The original err_kind detection patterns were all pip-flavored (pip_no_match, dns_fail, ssl_verify, etc.) and didn't catch venv-creation failure modes, so they all collapsed to the catch-all _uncategorized (99) bucket. This PR fills the gap on two axes. ## 1. Five new venv-specific err_kind categories (codes 11-15) Each gated on `err_phase == "venv"` so the same substring doesn't mis-fire in pip-phase failures: - 11 `venv_ensurepip_fail` — Debian/Ubuntu without python3-venv installed; stderr matches "ensurepip is not available" or "ensurepip ... returned non-zero". Predicted to be the biggest chunk based on Linux distro market share. - 12 `venv_path_too_long` — Windows MAX_PATH (260) or POSIX ENAMETOOLONG. Triggered when state_dir + venv layout exceeds the path limit (deep Lib/site-packages//<...> paths). - 13 `venv_no_module` — `python3 -m venv` itself missing ("No module named 'venv'"). Rare but distinctive. - 14 `venv_already_exists` — Errno 17 / "file exists" — sentinel race past O_EXCL or stale dir survived `--clear`. - 15 `venv_setup_failed` — generic "virtual environment was not created successfully" catch-all for venv setup failures that don't match a more specific category. All 5 occupy reserved slots in SDK_BOOTSTRAP_ERR_CODES per the APPEND-ONLY contract from PR #2112. ## 2. `sdk_bootstrap_stderr_sig` integer hash For "other:" err_kinds (which encode to _uncategorized = 99), emit a bounded integer hash (0-999) of the first ~30 chars of the stderr tail. This restores cardinality to the _uncategorized bucket in BQ aggregation without unbounded keyspace — same stderr message always maps to the same bucket, so a real failure mode replicating across thousands of machines clusters cleanly. Bounded at 1000 buckets: well below any "high cardinality" alarm but wide enough to distinguish ~30 distinct dominant patterns (birthday-paradox collision probability ~50% at ~37 distinct inputs). The field auto-omits (`if sig:` gate) when err_kind is categorized — no key-budget cost on the common-case categorized failures. ## Version bump 2.0.1 → 2.0.2 PR #2114 confirmed the version-bump mechanism is the only way to propagate code changes to the existing fleet — without a bump, CC's plugin updater short-circuits on string-equality of installation version vs marketplace version. Following the policy we established: **bump patch on every functional PR**. By 17:31:42Z on 2026-06-01 (1m22s after #2114 merged), v2.0.1 was already appearing in BQ. v2.0.2 should follow the same propagation curve — ~30% adoption within 3 hours, full convergence within a few days. ## Verified locally - py_compile clean. - 15 new tests in test_venv_failure_deepdive.py (added to internal test suite at sg-staging/tests/, not in this PR): * 5 parametrized: each new err_kind maps to its expected code (11-15). * 1 APPEND-ONLY regression: existing codes 1-10 + 99 unchanged. * 6 stderr_sig: non-other inputs → 0; None/empty → 0; deterministic same-input → same-output; bounded to 0-999; distinct inputs → distinct hashes (5/5 with P(collision) ≈ 1%); leading-chars focus (path-varying stderr with shared 30-char prefix collide as designed). * 1 static-shape catcher: every new `err_kind = "venv_..."` branch in main() is guarded by `err_phase == "venv"`. Catches the regression where someone adds a venv pattern without the phase gate and starts mis-categorizing pip-phase failures. * 1 map-coverage: all err_kind strings assigned anywhere in ensure_agent_sdk.main() are present in SDK_BOOTSTRAP_ERR_CODES (catches new categories added in code but forgotten in the map). * 1 emit-shape: the metric block uses `_encode_stderr_sig`, the `sdk_bootstrap_stderr_sig` key is written conditionally on `if sig:`. Catches the regression where someone removes the helper or makes the emit unconditional (would pad every categorized BUILD_FAILED row with a zero-valued field). - Full suite: 452/452 pass + 2 skipped (live API tests, opt-in). ## What this unblocks in BQ ```sql -- For the 2,406 sessions/3h that were phase=2/err=99 on v2.0.1, -- v2.0.2+ will split them across the new categories. Query: SELECT CAST(JSON_VALUE(additional_metadata, "$.sdk_bootstrap_err") AS INT64) AS err, CAST(JSON_VALUE(additional_metadata, "$.sdk_bootstrap_stderr_sig") AS INT64) AS sig, COUNT(*) AS sessions FROM `proj-product-data-nhme.raw_events.claude_code_internal_event` WHERE _PARTITIONTIME >= ... AND CAST(JSON_VALUE(additional_metadata, "$.sdk_bootstrap") AS INT64) = 3 AND CAST(JSON_VALUE(additional_metadata, "$.sdk_bootstrap_phase") AS INT64) = 2 -- venv GROUP BY err, sig ORDER BY sessions DESC ``` Co-Authored-By: Claude Opus 4.7 (1M context) --- .claude-plugin/marketplace.json | 2 +- .../.claude-plugin/plugin.json | 2 +- .../hooks/ensure_agent_sdk.py | 123 ++++++++++++++++-- 3 files changed, 111 insertions(+), 16 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 2c8c0d0a..9dc8b4f4 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -2186,7 +2186,7 @@ { "name": "security-guidance", "description": "Security review for Claude-generated code. Pattern-based warnings on edits, LLM-powered diff review on Stop, and an agentic commit reviewer that catches injection, XSS, SSRF, hardcoded secrets, and 25+ other vulnerability classes.", - "version": "2.0.1", + "version": "2.0.2", "author": { "name": "Anthropic", "email": "support@anthropic.com" diff --git a/plugins/security-guidance/.claude-plugin/plugin.json b/plugins/security-guidance/.claude-plugin/plugin.json index ccdedcdd..63a9c3da 100644 --- a/plugins/security-guidance/.claude-plugin/plugin.json +++ b/plugins/security-guidance/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "security-guidance", - "version": "2.0.1", + "version": "2.0.2", "description": "Security review for Claude-generated code. Pattern-based warnings on edits, LLM-powered diff review on Stop, and an agentic commit reviewer that catches injection, XSS, SSRF, hardcoded secrets, and 25+ other vulnerability classes.", "author": { "name": "David Dworken", diff --git a/plugins/security-guidance/hooks/ensure_agent_sdk.py b/plugins/security-guidance/hooks/ensure_agent_sdk.py index 4c5f8423..827c70ac 100644 --- a/plugins/security-guidance/hooks/ensure_agent_sdk.py +++ b/plugins/security-guidance/hooks/ensure_agent_sdk.py @@ -65,21 +65,41 @@ SDK_BOOTSTRAP_PHASE_CODES = { "main": 4, # uncaught exception above main() } SDK_BOOTSTRAP_ERR_CODES = { - "pip_no_match": 1, - "dns_fail": 2, - "conn_refused": 3, - "ssl_verify": 4, - "perm_denied": 5, - "no_pip": 6, - "disk_full": 7, - "proxy_auth": 8, - "stderr_timeout": 9, # pip stderr containing "timeout"/"timed out" - "subprocess_timeout": 10, # subprocess.TimeoutExpired (>120s) - # 11–98 reserved for future categories; APPEND-ONLY. + "pip_no_match": 1, + "dns_fail": 2, + "conn_refused": 3, + "ssl_verify": 4, + "perm_denied": 5, + "no_pip": 6, + "disk_full": 7, + "proxy_auth": 8, + "stderr_timeout": 9, # pip stderr containing "timeout"/"timed out" + "subprocess_timeout": 10, # subprocess.TimeoutExpired (>120s) + # Venv-stage specific categories added after PR #2112 telemetry surfaced + # 2,406 phase=2/err=99 sessions in the first 3h of v2.0.1 — venv phase + # failing in ways the original pip-flavored patterns didn't catch. These + # all split out of what was previously collapsing to _uncategorized. + "venv_ensurepip_fail": 11, # Debian/Ubuntu missing python3-venv; + # stderr mentions ensurepip non-zero exit + # or "ensurepip is not available" + "venv_path_too_long": 12, # Windows MAX_PATH (260) or POSIX + # ENAMETOOLONG — venv writes deep paths + # under state_dir/agent-sdk-venv/Lib/... + "venv_no_module": 13, # `python3 -m venv` itself missing — "No + # module named 'venv'" / "No module named venv" + "venv_already_exists": 14, # Errno 17 / "file exists" — sentinel race + # past O_EXCL or stale dir survived --clear + "venv_setup_failed": 15, # Generic "virtual environment was not + # created successfully" — catches the long + # tail of venv setup failures that don't + # match a more specific category above + # 16–98 reserved for future categories; APPEND-ONLY. # 99 catches everything else (including "exc:" and "other:" # — the original string is debug-loggable but the integer is what makes - # it to telemetry). - "_uncategorized": 99, + # it to telemetry). For the "other:" tail, `sdk_bootstrap_stderr_sig` + # carries a bounded integer hash so we can still distinguish patterns + # in BQ aggregation. + "_uncategorized": 99, } @@ -107,6 +127,37 @@ def _encode_err_kind(s): return SDK_BOOTSTRAP_ERR_CODES["_uncategorized"] +def _encode_stderr_sig(err_kind): + """Bounded integer hash of the stderr tail captured in "other:" + err_kinds. Lets us distinguish patterns INSIDE the _uncategorized + (code 99) bucket without unbounded cardinality. + + Returns 0 for non-"other:" err_kinds (so the field auto-omits from + emit_metrics on categorized failures — see the emit block in main()). + + Strategy: take the tail's first ~30 chars (post-lowercase, post-trim), + SHA-1, fold the first 2 bytes to 0–999. Different stderr messages + cluster into different buckets; same stderr always maps to the same + bucket. Cardinality is bounded at 1000, well below any "high + cardinality" alarm — and a real failure mode typically produces + near-identical stderr across thousands of machines, so 1000 buckets + is comfortably wide. + + Why first ~30 chars: stderr like "ERROR: Command failed: " varies the tail wildly (paths) but the categorization signal + is in the leading words. Dropping the suffix focuses the hash on + the discriminative part. + """ + if not err_kind or not err_kind.startswith("other:"): + return 0 + import hashlib + tail = err_kind[len("other:"):].strip().lower()[:30] + if not tail: + return 0 + h = hashlib.sha1(tail.encode("utf-8", errors="replace")).digest() + return int.from_bytes(h[:2], "big") % 1000 + + def _sdk_on_syspath() -> bool: # find_spec is ~10ms; actually importing the SDK pulls in # transitive deps and costs ~800ms — too heavy for a @@ -245,7 +296,34 @@ def main() -> tuple[int, str, str]: else: stderr_str = str(stderr_b) s = stderr_str.lower() - if "no matching distribution" in s or "could not find a version" in s: + # Venv-specific patterns checked FIRST — they overlap with some pip + # patterns (e.g. "no module named ensurepip" could match no_pip OR + # venv_ensurepip_fail; the venv-stage interpretation is the right + # one when err_phase=="venv"). Order is venv-most-specific → + # pip-historical → generic. + if err_phase == "venv" and ( + "ensurepip is not available" in s + or ("ensurepip" in s and "returned non-zero" in s) + or "the virtual environment was not created" in s and "ensurepip" in s + ): + err_kind = "venv_ensurepip_fail" + elif err_phase == "venv" and ( + "[errno 36]" in s + or "file name too long" in s + or "path too long" in s + ): + err_kind = "venv_path_too_long" + elif err_phase == "venv" and ( + "no module named venv" in s + or "no module named 'venv'" in s + ): + err_kind = "venv_no_module" + elif err_phase == "venv" and ( + "[errno 17]" in s + or ("file exists" in s and "venv" in s) + ): + err_kind = "venv_already_exists" + elif "no matching distribution" in s or "could not find a version" in s: err_kind = "pip_no_match" elif "name or service not known" in s or "name resolution" in s \ or "nodename nor servname" in s or "temporary failure in name" in s: @@ -264,6 +342,15 @@ def main() -> tuple[int, str, str]: err_kind = "proxy_auth" elif "timeout" in s or "timed out" in s: err_kind = "stderr_timeout" + elif err_phase == "venv" and ( + "virtual environment was not created" in s + or "error: command" in s and "venv" in s + ): + # Generic venv-setup catch-all — matched AFTER the more specific + # venv patterns above so we don't shadow them, but BEFORE the + # other: fallback so generic venv setup failures get their own + # bucket instead of polluting the long-tail signature space. + err_kind = "venv_setup_failed" else: # First 60 chars of the last non-empty stderr line — bounded to # stay inside CC's metric value-length budget. Real failure modes @@ -372,6 +459,14 @@ if __name__ == "__main__": # failure path, e.g. state_dir.mkdir perm-denied). metrics["sdk_bootstrap_phase"] = _encode_phase(err_phase or "pre") metrics["sdk_bootstrap_err"] = _encode_err_kind(err_kind) + # For "other:" (encoded err==99), emit a bounded integer + # hash of the stderr tail so BQ can distinguish patterns inside + # the _uncategorized bucket without unbounded cardinality. Zero + # when err_kind is categorized — the schema reader treats 0 as + # "no signal", matching the absence convention. + sig = _encode_stderr_sig(err_kind) + if sig: + metrics["sdk_bootstrap_stderr_sig"] = sig pv = _plugin_version_int() if pv: metrics["pv"] = pv