mirror of
https://github.com/anthropics/claude-plugins-official.git
synced 2026-05-11 14:05:52 -03:00
fix(telegram): prevent zombie pollers from blocking new sessions with 409 Conflict (#1349)
* fix(telegram): prevent zombie pollers from blocking new sessions The MCP server runs as a grandchild of the CLI (via `bun run start` → shell → `bun server.ts`). When the CLI is killed uncleanly (SIGKILL, crash, terminal close), the grandchild survives as an orphan and keeps long-polling getUpdates indefinitely. Telegram allows only one consumer per token, so every subsequent session sees 409 Conflict and the existing retry loop spins forever. Three layered mitigations: - PID lockfile (STATE_DIR/bot.pid): on startup, SIGTERM any stale holder before claiming the slot, so a fresh session always wins. - Orphan watchdog: every 5s check for parent reparenting (POSIX ppid change) or a dead stdin pipe, and self-terminate. Covers cases where the existing stdin end/close events never fire through the wrapper. - 409 retry cap: give up after 8 attempts (~28s) instead of looping forever, and bail immediately if shutdown has begun. Also adds a SIGHUP handler and removes the pidfile on clean shutdown (only if still owned by this process). * chore(telegram): bump version to 0.0.5 --------- Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
parent
1057d02c53
commit
58578a456a
@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"name": "telegram",
|
"name": "telegram",
|
||||||
"description": "Telegram channel for Claude Code \u2014 messaging bridge with built-in access control. Manage pairing, allowlists, and policy via /telegram:access.",
|
"description": "Telegram channel for Claude Code \u2014 messaging bridge with built-in access control. Manage pairing, allowlists, and policy via /telegram:access.",
|
||||||
"version": "0.0.4",
|
"version": "0.0.5",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"telegram",
|
"telegram",
|
||||||
"messaging",
|
"messaging",
|
||||||
|
|||||||
@ -51,6 +51,22 @@ if (!TOKEN) {
|
|||||||
process.exit(1)
|
process.exit(1)
|
||||||
}
|
}
|
||||||
const INBOX_DIR = join(STATE_DIR, 'inbox')
|
const INBOX_DIR = join(STATE_DIR, 'inbox')
|
||||||
|
const PID_FILE = join(STATE_DIR, 'bot.pid')
|
||||||
|
|
||||||
|
// Telegram allows exactly one getUpdates consumer per token. If a previous
|
||||||
|
// session crashed (SIGKILL, terminal closed) its server.ts grandchild can
|
||||||
|
// survive as an orphan and hold the slot forever, so every new session sees
|
||||||
|
// 409 Conflict. Kill any stale holder before we start polling.
|
||||||
|
mkdirSync(STATE_DIR, { recursive: true, mode: 0o700 })
|
||||||
|
try {
|
||||||
|
const stale = parseInt(readFileSync(PID_FILE, 'utf8'), 10)
|
||||||
|
if (stale > 1 && stale !== process.pid) {
|
||||||
|
process.kill(stale, 0)
|
||||||
|
process.stderr.write(`telegram channel: replacing stale poller pid=${stale}\n`)
|
||||||
|
process.kill(stale, 'SIGTERM')
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
writeFileSync(PID_FILE, String(process.pid))
|
||||||
|
|
||||||
// Last-resort safety net — without these the process dies silently on any
|
// Last-resort safety net — without these the process dies silently on any
|
||||||
// unhandled promise rejection. With them it logs and keeps serving tools.
|
// unhandled promise rejection. With them it logs and keeps serving tools.
|
||||||
@ -621,6 +637,9 @@ function shutdown(): void {
|
|||||||
if (shuttingDown) return
|
if (shuttingDown) return
|
||||||
shuttingDown = true
|
shuttingDown = true
|
||||||
process.stderr.write('telegram channel: shutting down\n')
|
process.stderr.write('telegram channel: shutting down\n')
|
||||||
|
try {
|
||||||
|
if (parseInt(readFileSync(PID_FILE, 'utf8'), 10) === process.pid) rmSync(PID_FILE)
|
||||||
|
} catch {}
|
||||||
// bot.stop() signals the poll loop to end; the current getUpdates request
|
// bot.stop() signals the poll loop to end; the current getUpdates request
|
||||||
// may take up to its long-poll timeout to return. Force-exit after 2s.
|
// may take up to its long-poll timeout to return. Force-exit after 2s.
|
||||||
setTimeout(() => process.exit(0), 2000)
|
setTimeout(() => process.exit(0), 2000)
|
||||||
@ -630,6 +649,19 @@ process.stdin.on('end', shutdown)
|
|||||||
process.stdin.on('close', shutdown)
|
process.stdin.on('close', shutdown)
|
||||||
process.on('SIGTERM', shutdown)
|
process.on('SIGTERM', shutdown)
|
||||||
process.on('SIGINT', shutdown)
|
process.on('SIGINT', shutdown)
|
||||||
|
process.on('SIGHUP', shutdown)
|
||||||
|
|
||||||
|
// Orphan watchdog: stdin events above don't reliably fire when the parent
|
||||||
|
// chain (`bun run` wrapper → shell → us) is severed by a crash. Poll for
|
||||||
|
// reparenting (POSIX) or a dead stdin pipe and self-terminate.
|
||||||
|
const bootPpid = process.ppid
|
||||||
|
setInterval(() => {
|
||||||
|
const orphaned =
|
||||||
|
(process.platform !== 'win32' && process.ppid !== bootPpid) ||
|
||||||
|
process.stdin.destroyed ||
|
||||||
|
process.stdin.readableEnded
|
||||||
|
if (orphaned) shutdown()
|
||||||
|
}, 5000).unref()
|
||||||
|
|
||||||
// Commands are DM-only. Responding in groups would: (1) leak pairing codes via
|
// Commands are DM-only. Responding in groups would: (1) leak pairing codes via
|
||||||
// /status to other group members, (2) confirm bot presence in non-allowlisted
|
// /status to other group members, (2) confirm bot presence in non-allowlisted
|
||||||
@ -975,7 +1007,15 @@ void (async () => {
|
|||||||
})
|
})
|
||||||
return // bot.stop() was called — clean exit from the loop
|
return // bot.stop() was called — clean exit from the loop
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
if (shuttingDown) return
|
||||||
if (err instanceof GrammyError && err.error_code === 409) {
|
if (err instanceof GrammyError && err.error_code === 409) {
|
||||||
|
if (attempt >= 8) {
|
||||||
|
process.stderr.write(
|
||||||
|
`telegram channel: 409 Conflict persists after ${attempt} attempts — ` +
|
||||||
|
`another poller is holding the bot token (stray 'bun server.ts' process or a second session). Exiting.\n`,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
}
|
||||||
const delay = Math.min(1000 * attempt, 15000)
|
const delay = Math.min(1000 * attempt, 15000)
|
||||||
const detail = attempt === 1
|
const detail = attempt === 1
|
||||||
? ' — another instance is polling (zombie session, or a second Claude Code running?)'
|
? ' — another instance is polling (zombie session, or a second Claude Code running?)'
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user