diff --git a/container/agent-runner/src/poll-loop.ts b/container/agent-runner/src/poll-loop.ts index bd48db2..2846337 100644 --- a/container/agent-runner/src/poll-loop.ts +++ b/container/agent-runner/src/poll-loop.ts @@ -21,6 +21,20 @@ function generateId(): string { return `msg-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`; } +const AUTH_REQUIRED_USER_TEXT = + "I can't reach my Anthropic credentials right now. The operator running NanoClaw needs to re-run setup, or run `claude` in the project directory on the machine I'm running on."; + +function writeAuthRequiredMessage(routing: RoutingContext): void { + writeMessageOut({ + id: generateId(), + kind: 'chat', + platform_id: routing.platformId, + channel_type: routing.channelType, + thread_id: routing.threadId, + content: JSON.stringify({ text: AUTH_REQUIRED_USER_TEXT }), + }); +} + export interface PollLoopConfig { provider: AgentProvider; /** @@ -171,7 +185,7 @@ export async function runPollLoop(config: PollLoopConfig): Promise { const skippedSet = new Set(skipped); const processingIds = ids.filter((id) => !commandIds.includes(id) && !skippedSet.has(id)); try { - const result = await processQuery(query, routing, processingIds, config.providerName); + const result = await processQuery(query, routing, processingIds, config.provider, config.providerName); if (result.continuation && result.continuation !== continuation) { continuation = result.continuation; setContinuation(config.providerName, continuation); @@ -189,15 +203,18 @@ export async function runPollLoop(config: PollLoopConfig): Promise { clearContinuation(config.providerName); } - // Write error response so the user knows something went wrong - writeMessageOut({ - id: generateId(), - kind: 'chat', - platform_id: routing.platformId, - channel_type: routing.channelType, - thread_id: routing.threadId, - content: JSON.stringify({ text: `Error: ${errMsg}` }), - }); + if (config.provider.isAuthRequired?.(errMsg)) { + writeAuthRequiredMessage(routing); + } else { + writeMessageOut({ + id: generateId(), + kind: 'chat', + platform_id: routing.platformId, + channel_type: routing.channelType, + thread_id: routing.threadId, + content: JSON.stringify({ text: `Error: ${errMsg}` }), + }); + } } // Ensure completed even if processQuery ended without a result event @@ -249,6 +266,7 @@ async function processQuery( query: AgentQuery, routing: RoutingContext, initialBatchIds: string[], + provider: AgentProvider, providerName: string, ): Promise { let queryContinuation: string | undefined; @@ -310,7 +328,11 @@ async function processQuery( // at all — either way the turn is finished. markCompleted(initialBatchIds); if (event.text) { - dispatchResultText(event.text, routing); + if (provider.isAuthRequired?.(event.text)) { + writeAuthRequiredMessage(routing); + } else { + dispatchResultText(event.text, routing); + } } } } diff --git a/container/agent-runner/src/providers/claude.ts b/container/agent-runner/src/providers/claude.ts index fbb077c..6dcdb5a 100644 --- a/container/agent-runner/src/providers/claude.ts +++ b/container/agent-runner/src/providers/claude.ts @@ -236,6 +236,14 @@ const CLAUDE_CODE_AUTO_COMPACT_WINDOW = '165000'; */ const STALE_SESSION_RE = /no conversation found|ENOENT.*\.jsonl|session.*not found/i; +/** + * Auth-required detection. Matches Claude Code's output when no usable + * credential is available — "Not logged in · Please run /login" or + * "Invalid API key · Please run /login". The user can't run /login from + * chat, so the poll-loop substitutes a host-aware message. + */ +const AUTH_REQUIRED_RE = /(Not logged in|Invalid API key)[\s\S]*?Please run \/login/i; + export class ClaudeProvider implements AgentProvider { readonly supportsNativeSlashCommands = true; @@ -259,6 +267,10 @@ export class ClaudeProvider implements AgentProvider { return STALE_SESSION_RE.test(msg); } + isAuthRequired(text: string): boolean { + return AUTH_REQUIRED_RE.test(text); + } + query(input: QueryInput): AgentQuery { const stream = new MessageStream(); stream.push(input.prompt); diff --git a/container/agent-runner/src/providers/types.ts b/container/agent-runner/src/providers/types.ts index 55ab919..99833a7 100644 --- a/container/agent-runner/src/providers/types.ts +++ b/container/agent-runner/src/providers/types.ts @@ -14,6 +14,14 @@ export interface AgentProvider { * (missing transcript, unknown session, etc.) and should be cleared. */ isSessionInvalid(err: unknown): boolean; + + /** + * True if the given text/error indicates the underlying SDK or CLI has no + * usable Anthropic auth (e.g. Claude Code's "Not logged in · Please run + * /login"). The poll-loop swaps the raw output for a host-aware message + * since the user can't run /login from chat. + */ + isAuthRequired?(text: string): boolean; } /** diff --git a/src/container-runner.ts b/src/container-runner.ts index 029b5fe..dc71248 100644 --- a/src/container-runner.ts +++ b/src/container-runner.ts @@ -435,20 +435,18 @@ async function buildContainerArgs( } // OneCLI gateway — injects HTTPS_PROXY + certs so container API calls - // are routed through the agent vault for credential injection. - try { - if (agentIdentifier) { - await onecli.ensureAgent({ name: agentGroup.name, identifier: agentIdentifier }); - } - const onecliApplied = await onecli.applyContainerConfig(args, { addHostMapping: false, agent: agentIdentifier }); - if (onecliApplied) { - log.info('OneCLI gateway applied', { containerName }); - } else { - log.warn('OneCLI gateway not applied — container will have no credentials', { containerName }); - } - } catch (err) { - log.warn('OneCLI gateway error — container will have no credentials', { containerName, err }); + // are routed through the agent vault for credential injection. Treated as + // a transient hard failure: if we can't wire the gateway, we don't spawn. + // The caller (router or host-sweep) catches the throw, leaves the inbound + // message pending, and the next sweep tick retries. + if (agentIdentifier) { + await onecli.ensureAgent({ name: agentGroup.name, identifier: agentIdentifier }); } + const onecliApplied = await onecli.applyContainerConfig(args, { addHostMapping: false, agent: agentIdentifier }); + if (!onecliApplied) { + throw new Error('OneCLI gateway not applied — refusing to spawn container without credentials'); + } + log.info('OneCLI gateway applied', { containerName }); // Host gateway args.push(...hostGatewayArgs()); diff --git a/src/host-sweep.ts b/src/host-sweep.ts index 4dc2fb7..ff88fb0 100644 --- a/src/host-sweep.ts +++ b/src/host-sweep.ts @@ -168,7 +168,14 @@ async function sweepSession(session: Session): Promise { const dueCount = countDueMessages(inDb); if (dueCount > 0 && !isContainerRunning(session.id)) { log.info('Waking container for due messages', { sessionId: session.id, count: dueCount }); - await wakeContainer(session); + try { + await wakeContainer(session); + } catch (err) { + // Transient spawn failure (e.g. OneCLI gateway down). Leave messages + // pending so the next sweep tick retries; don't abort the rest of + // the sweep cycle for other sessions. + log.warn('wakeContainer failed — will retry on next sweep', { sessionId: session.id, err }); + } } const alive = isContainerRunning(session.id); diff --git a/src/router.ts b/src/router.ts index 3cf0192..e429977 100644 --- a/src/router.ts +++ b/src/router.ts @@ -27,7 +27,7 @@ import { getMessagingGroupWithAgentCount, } from './db/messaging-groups.js'; import { findSessionForAgent } from './db/sessions.js'; -import { startTypingRefresh } from './modules/typing/index.js'; +import { startTypingRefresh, stopTypingRefresh } from './modules/typing/index.js'; import { log } from './log.js'; import { resolveSession, writeSessionMessage, writeOutboundDirect } from './session-manager.js'; import { wakeContainer } from './container-runner.js'; @@ -450,7 +450,15 @@ async function deliverToAgent( startTypingRefresh(session.id, session.agent_group_id, event.channelType, event.platformId, event.threadId); const freshSession = getSession(session.id); if (freshSession) { - await wakeContainer(freshSession); + try { + await wakeContainer(freshSession); + } catch (err) { + // Transient spawn failure (e.g. OneCLI gateway down). The inbound + // row is already persisted — host-sweep will retry the wake on its + // next tick. Don't bubble out of the channel adapter. + log.warn('wakeContainer failed — host-sweep will retry', { sessionId: freshSession.id, err }); + stopTypingRefresh(freshSession.id); + } } } }