refactor(agent-runner): decouple provider interface from Claude specifics

Reshape AgentProvider so provider-specific assumptions stop leaking into the generic layer. No change to what reaches sdkQuery() — same values, different plumbing. - QueryInput: opaque `continuation` replaces `sessionId` + `resumeAt`; `systemContext.instructions` replaces ambiguous `systemPrompt`; `mcpServers`, `env`, `additionalDirectories` move to `ProviderOptions` at construction time. - AgentProvider gains `isSessionInvalid(err)` and `supportsNativeSlashCommands` so the poll-loop stops regex-matching Claude error strings and gates passthrough slash commands per provider. - ClaudeProvider owns `CLAUDE_CODE_AUTO_COMPACT_WINDOW` and the stale-session regex internally. - ProviderEvent.activity kept and documented as the liveness signal (fires on every SDK message so the idle timer stays honest during long tool runs); init carries `continuation` instead of `sessionId`. - poll-loop drops mcpServers/env/systemPrompt from its config; admin user id now passed explicitly. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 10:25:29 +03:00
parent e07158e194
commit b63dd186df
8 changed files with 156 additions and 114 deletions
--- a/container/agent-runner/src/poll-loop.ts
+++ b/container/agent-runner/src/poll-loop.ts
@@ -4,7 +4,7 @@ import { writeMessageOut } from './db/messages-out.js';
 import { touchHeartbeat, clearStaleProcessingAcks } from './db/connection.js';
 import { getStoredSessionId, setStoredSessionId, clearStoredSessionId } from './db/session-state.js';
 import { formatMessages, extractRouting, categorizeMessage, type RoutingContext } from './formatter.js';
-import type { AgentProvider, AgentQuery, McpServerConfig, ProviderEvent } from './providers/types.js';
+import type { AgentProvider, AgentQuery, ProviderEvent } from './providers/types.js';

 const POLL_INTERVAL_MS = 1000;
 const ACTIVE_POLL_INTERVAL_MS = 500;
@@ -21,10 +21,11 @@ function generateId(): string {
 export interface PollLoopConfig {
  provider: AgentProvider;
  cwd: string;
-  mcpServers: Record<string, McpServerConfig>;
-  systemPrompt?: string;
-  env: Record<string, string | undefined>;
-  additionalDirectories?: string[];
+  systemContext?: {
+    instructions?: string;
+  };
+  /** Admin user ID for permission checks on admin commands (e.g. /clear). */
+  adminUserId?: string;
 }

 /**
@@ -38,15 +39,14 @@ export interface PollLoopConfig {
 * 6. Loop
 */
 export async function runPollLoop(config: PollLoopConfig): Promise<void> {
-  // Resume the SDK session from a prior container run if one was persisted.
-  // The SDK's .jsonl transcripts live in the shared ~/.claude mount, so the
-  // conversation history is already on disk — we just need the session ID
-  // to tell the SDK which one to continue.
-  let sessionId: string | undefined = getStoredSessionId();
-  let resumeAt: string | undefined;
+  // Resume the agent's prior session from a previous container run if one
+  // was persisted. The continuation is opaque to the poll-loop — the
+  // provider decides how to use it (Claude resumes a .jsonl transcript,
+  // other providers may reload a thread ID, etc.).
+  let continuation: string | undefined = getStoredSessionId();

-  if (sessionId) {
-    log(`Resuming SDK session ${sessionId}`);
+  if (continuation) {
+    log(`Resuming agent session ${continuation}`);
  }

  // Clear leftover 'processing' acks from a previous crashed container.
@@ -75,7 +75,7 @@ export async function runPollLoop(config: PollLoopConfig): Promise<void> {
    const routing = extractRouting(messages);

    // Handle commands: categorize chat messages
-    const adminUserId = config.env.NANOCLAW_ADMIN_USER_ID;
+    const adminUserId = config.adminUserId;
    const normalMessages = [];
    const commandIds: string[] = [];

@@ -110,9 +110,8 @@ export async function runPollLoop(config: PollLoopConfig): Promise<void> {
        }
        // Handle admin commands directly
        if (cmdInfo.command === '/clear') {
-          log('Clearing session (resetting sessionId)');
-          sessionId = undefined;
-          resumeAt = undefined;
+          log('Clearing session (resetting continuation)');
+          continuation = undefined;
          clearStoredSessionId();
          writeMessageOut({
            id: generateId(),
@@ -149,43 +148,37 @@ export async function runPollLoop(config: PollLoopConfig): Promise<void> {
      continue;
    }

-    // Format messages: passthrough commands get raw text, others get XML
-    const prompt = formatMessagesWithCommands(normalMessages);
+    // Format messages: passthrough commands get raw text (only if the
+    // provider natively handles slash commands), others get XML.
+    const prompt = formatMessagesWithCommands(normalMessages, config.provider.supportsNativeSlashCommands);

    log(`Processing ${normalMessages.length} message(s), kinds: ${[...new Set(normalMessages.map((m) => m.kind))].join(',')}`);

    const query = config.provider.query({
      prompt,
-      sessionId,
-      resumeAt,
+      continuation,
      cwd: config.cwd,
-      mcpServers: config.mcpServers,
-      systemPrompt: config.systemPrompt,
-      env: config.env,
-      additionalDirectories: config.additionalDirectories,
+      systemContext: config.systemContext,
    });

    // Process the query while concurrently polling for new messages
    const processingIds = ids.filter((id) => !commandIds.includes(id));
    try {
      const result = await processQuery(query, routing, config, processingIds);
-      if (result.sessionId && result.sessionId !== sessionId) {
-        sessionId = result.sessionId;
-        setStoredSessionId(sessionId);
+      if (result.continuation && result.continuation !== continuation) {
+        continuation = result.continuation;
+        setStoredSessionId(continuation);
      }
-      if (result.resumeAt) resumeAt = result.resumeAt;
    } catch (err) {
      const errMsg = err instanceof Error ? err.message : String(err);
      log(`Query error: ${errMsg}`);

-      // Stale/corrupt session recovery: if the SDK can't find the session
-      // we asked it to resume, clear the stored ID so the next attempt
-      // starts fresh. The transcript .jsonl can go missing after a crash
-      // mid-write, manual deletion, or disk-full.
-      if (sessionId && /no conversation found|ENOENT.*\.jsonl|session.*not found/i.test(errMsg)) {
-        log(`Stale session detected (${sessionId}) — clearing for next retry`);
-        sessionId = undefined;
-        resumeAt = undefined;
+      // Stale/corrupt continuation recovery: ask the provider whether
+      // this error means the stored continuation is unusable, and clear
+      // it so the next attempt starts fresh.
+      if (continuation && config.provider.isSessionInvalid(err)) {
+        log(`Stale session detected (${continuation}) — clearing for next retry`);
+        continuation = undefined;
        clearStoredSessionId();
      }

@@ -207,17 +200,16 @@ export async function runPollLoop(config: PollLoopConfig): Promise<void> {

 /**
 * Format messages, handling passthrough commands differently.
- * Passthrough commands (e.g., /foo) are sent raw (no XML wrapping).
- * Admin commands from authorized users are formatted as system commands.
- * Normal messages get standard XML formatting.
+ * When the provider handles slash commands natively (Claude Code),
+ * passthrough commands are sent raw (no XML wrapping) so the SDK can
+ * dispatch them. Otherwise they fall through to standard XML formatting.
 */
-function formatMessagesWithCommands(messages: MessageInRow[]): string {
-  // Check if any message is a passthrough command
+function formatMessagesWithCommands(messages: MessageInRow[], nativeSlashCommands: boolean): string {
  const parts: string[] = [];
  const normalBatch: MessageInRow[] = [];

  for (const msg of messages) {
-    if (msg.kind === 'chat' || msg.kind === 'chat-sdk') {
+    if (nativeSlashCommands && (msg.kind === 'chat' || msg.kind === 'chat-sdk')) {
      const cmdInfo = categorizeMessage(msg);
      if (cmdInfo.category === 'passthrough' || cmdInfo.category === 'admin') {
        // Flush normal batch first
@@ -241,12 +233,11 @@ function formatMessagesWithCommands(messages: MessageInRow[]): string {
 }

 interface QueryResult {
-  sessionId?: string;
-  resumeAt?: string;
+  continuation?: string;
 }

 async function processQuery(query: AgentQuery, routing: RoutingContext, config: PollLoopConfig, processingIds: string[]): Promise<QueryResult> {
-  let querySessionId: string | undefined;
+  let queryContinuation: string | undefined;
  let done = false;
  let lastEventTime = Date.now();

@@ -289,7 +280,7 @@ async function processQuery(query: AgentQuery, routing: RoutingContext, config:
      touchHeartbeat();

      if (event.type === 'init') {
-        querySessionId = event.sessionId;
+        queryContinuation = event.continuation;
      } else if (event.type === 'result' && event.text) {
        dispatchResultText(event.text, routing);
      }
@@ -299,13 +290,13 @@ async function processQuery(query: AgentQuery, routing: RoutingContext, config:
    clearInterval(pollHandle);
  }

-  return { sessionId: querySessionId };
+  return { continuation: queryContinuation };
 }

 function handleEvent(event: ProviderEvent, _routing: RoutingContext): void {
  switch (event.type) {
    case 'init':
-      log(`Session: ${event.sessionId}`);
+      log(`Session: ${event.continuation}`);
      break;
    case 'result':
      log(`Result: ${event.text ? event.text.slice(0, 200) : '(empty)'}`);