diff --git a/container/agent-runner/src/formatter.ts b/container/agent-runner/src/formatter.ts index c0475b2..348d5ab 100644 --- a/container/agent-runner/src/formatter.ts +++ b/container/agent-runner/src/formatter.ts @@ -66,6 +66,18 @@ export function isClearCommand(msg: MessageInRow): boolean { return text.toLowerCase().startsWith('/clear'); } +/** + * True for any chat that needs the outer loop's command path: /clear plus + * admin/passthrough slash commands the SDK can only dispatch when they are + * a query's first input. Used by the follow-up poller to bail out and let + * the outer loop reopen the query. + */ +export function isRunnerCommand(msg: MessageInRow): boolean { + if (msg.kind !== 'chat' && msg.kind !== 'chat-sdk') return false; + const cat = categorizeMessage(msg).category; + return cat === 'admin' || cat === 'passthrough'; +} + // eslint-disable-next-line @typescript-eslint/no-explicit-any function extractSenderId(msg: MessageInRow, content: any): string | null { const raw: string | null = content?.senderId || content?.author?.userId || null; diff --git a/container/agent-runner/src/poll-loop.ts b/container/agent-runner/src/poll-loop.ts index 986489f..e825184 100644 --- a/container/agent-runner/src/poll-loop.ts +++ b/container/agent-runner/src/poll-loop.ts @@ -7,7 +7,7 @@ import { migrateLegacyContinuation, setContinuation, } from './db/session-state.js'; -import { formatMessages, extractRouting, categorizeMessage, isClearCommand, stripInternalTags, type RoutingContext } from './formatter.js'; +import { formatMessages, extractRouting, categorizeMessage, isClearCommand, isRunnerCommand, stripInternalTags, type RoutingContext } from './formatter.js'; import type { AgentProvider, AgentQuery, ProviderEvent } from './providers/types.js'; const POLL_INTERVAL_MS = 1000; @@ -255,30 +255,46 @@ async function processQuery( let done = false; // Concurrent polling: push follow-ups into the active query as they arrive. - // We do NOT force-end the stream on silence — keeping the query open is - // strictly cheaper than close+reopen (no cold prompt cache, no reconnect). + // We do NOT force-end the stream on silence — keeping the query open avoids + // re-spawning the SDK subprocess (~few seconds) and re-loading the .jsonl + // transcript on every turn. The Anthropic prompt cache is server-side with + // a 5-min TTL keyed on prefix hash, so stream lifecycle does NOT affect + // cache lifetime — close+reopen within 5 min still gets cache hits. // Stream liveness is decided host-side via the heartbeat file + processing // claim age (see src/host-sweep.ts); if something is truly stuck, the host // will kill the container and messages get reset to pending. let pollInFlight = false; + let endedForCommand = false; const pollHandle = setInterval(() => { - if (done || pollInFlight) return; + if (done || pollInFlight || endedForCommand) return; pollInFlight = true; void (async () => { try { - // Skip system messages (MCP tool responses) and /clear (needs fresh query). + const pending = getPendingMessages(); + + // Slash commands need a fresh query: /clear resets the SDK's + // resume id (fixed at sdkQuery() time); admin/passthrough commands + // (/compact, /cost, …) only dispatch when they're the first input + // of a query — pushed mid-stream they arrive as plain text and + // the SDK never runs them. End the stream and leave the rows + // pending; the outer loop handles them on next iteration via the + // canonical command path + formatMessagesWithCommands. + if (pending.some((m) => isRunnerCommand(m))) { + log('Pending slash command — ending stream so outer loop can process'); + endedForCommand = true; + query.end(); + return; + } + + // Skip system messages (MCP tool responses). // Thread routing is the router's concern — if a message landed in this // session, the agent should see it. Per-thread sessions already isolate // threads into separate containers; shared sessions intentionally merge // everything. Filtering on thread_id here caused deadlocks when the // initial batch and follow-ups had mismatched thread_ids (e.g. a // host-generated welcome trigger with null thread vs a Discord DM reply). - const newMessages = getPendingMessages().filter((m) => { - if (m.kind === 'system') return false; - if ((m.kind === 'chat' || m.kind === 'chat-sdk') && isClearCommand(m)) return false; - return true; - }); + const newMessages = pending.filter((m) => m.kind !== 'system'); if (newMessages.length === 0) return; const newIds = newMessages.map((m) => m.id);