fix(credentials): address review feedback
- wakeContainer now never throws — returns Promise<boolean>, catches internally. Closes the regression risk for the 5 awaited callers in agent-to-agent, interactive, and approvals/response-handler that the previous version left unwrapped. Router uses the boolean to stop the typing indicator on transient failure; host-sweep just awaits. - Tighten AUTH_REQUIRED_RE: anchor to start-of-string with the specific `·` (U+00B7) separator the CLI uses, so an agent that quotes the banner mid-sentence in a normal reply doesn't trip the classifier. - Log a one-line note from writeAuthRequiredMessage so substitutions are visible when debugging "user got the credentials message but I don't see why." - Add unit tests for ClaudeProvider.isAuthRequired covering both banner variants, trailing content, mid-sentence quoting, leading-prose quoting, alternate separators, and unrelated text. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -25,6 +25,7 @@ const AUTH_REQUIRED_USER_TEXT =
|
||||
"I can't reach my Anthropic credentials right now. The operator running NanoClaw needs to re-run setup, or run `claude` in the project directory on the machine I'm running on.";
|
||||
|
||||
function writeAuthRequiredMessage(routing: RoutingContext): void {
|
||||
log('Auth-required detected — substituting host-aware message for the user');
|
||||
writeMessageOut({
|
||||
id: generateId(),
|
||||
kind: 'chat',
|
||||
|
||||
37
container/agent-runner/src/providers/claude.test.ts
Normal file
37
container/agent-runner/src/providers/claude.test.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
import { describe, it, expect } from 'bun:test';
|
||||
|
||||
import { ClaudeProvider } from './claude.js';
|
||||
|
||||
describe('ClaudeProvider.isAuthRequired', () => {
|
||||
const provider = new ClaudeProvider();
|
||||
|
||||
it('matches the "Not logged in" banner', () => {
|
||||
expect(provider.isAuthRequired('Not logged in · Please run /login')).toBe(true);
|
||||
});
|
||||
|
||||
it('matches the "Invalid API key" banner', () => {
|
||||
expect(provider.isAuthRequired('Invalid API key · Please run /login')).toBe(true);
|
||||
});
|
||||
|
||||
it('matches with trailing content after the banner', () => {
|
||||
expect(provider.isAuthRequired('Not logged in · Please run /login\n\nstack trace …')).toBe(true);
|
||||
});
|
||||
|
||||
it('does not match when the agent quotes the phrase mid-sentence', () => {
|
||||
const quoted = "The error 'Invalid API key · Please run /login' means your auth has expired.";
|
||||
expect(provider.isAuthRequired(quoted)).toBe(false);
|
||||
});
|
||||
|
||||
it('does not match when the agent leads its reply with the phrase in prose', () => {
|
||||
const prose = '"Not logged in · Please run /login" is a Claude Code error.';
|
||||
expect(provider.isAuthRequired(prose)).toBe(false);
|
||||
});
|
||||
|
||||
it('does not match a different separator (defensive against typos in agent output)', () => {
|
||||
expect(provider.isAuthRequired('Not logged in - Please run /login')).toBe(false);
|
||||
});
|
||||
|
||||
it('does not match unrelated text', () => {
|
||||
expect(provider.isAuthRequired('Tool execution failed: timeout')).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -237,12 +237,16 @@ const CLAUDE_CODE_AUTO_COMPACT_WINDOW = '165000';
|
||||
const STALE_SESSION_RE = /no conversation found|ENOENT.*\.jsonl|session.*not found/i;
|
||||
|
||||
/**
|
||||
* Auth-required detection. Matches Claude Code's output when no usable
|
||||
* Auth-required detection. Matches Claude Code's banner when no usable
|
||||
* credential is available — "Not logged in · Please run /login" or
|
||||
* "Invalid API key · Please run /login". The user can't run /login from
|
||||
* chat, so the poll-loop substitutes a host-aware message.
|
||||
*
|
||||
* Anchored to start-of-string with the specific `·` separator (U+00B7)
|
||||
* the CLI uses, so an agent that quotes the phrase verbatim mid-sentence
|
||||
* in a normal reply doesn't trip the classifier.
|
||||
*/
|
||||
const AUTH_REQUIRED_RE = /(Not logged in|Invalid API key)[\s\S]*?Please run \/login/i;
|
||||
const AUTH_REQUIRED_RE = /^(Not logged in|Invalid API key)\s*·\s*Please run \/login/;
|
||||
|
||||
export class ClaudeProvider implements AgentProvider {
|
||||
readonly supportsNativeSlashCommands = true;
|
||||
|
||||
@@ -58,7 +58,7 @@ const activeContainers = new Map<string, { process: ChildProcess; containerName:
|
||||
* a duplicate container against the same session directory, producing
|
||||
* racy double-replies.
|
||||
*/
|
||||
const wakePromises = new Map<string, Promise<void>>();
|
||||
const wakePromises = new Map<string, Promise<boolean>>();
|
||||
|
||||
export function getActiveContainerCount(): number {
|
||||
return activeContainers.size;
|
||||
@@ -73,20 +73,32 @@ export function isContainerRunning(sessionId: string): boolean {
|
||||
* (the in-flight wake promise is reused).
|
||||
*
|
||||
* The container runs the v2 agent-runner which polls the session DB.
|
||||
*
|
||||
* Contract: never throws. Returns `true` on successful spawn, `false` on
|
||||
* transient spawn failure (e.g. OneCLI gateway unreachable). Callers don't
|
||||
* need to wrap — the inbound row stays pending and host-sweep retries on
|
||||
* its next tick. Callers that care (e.g. the router's typing indicator)
|
||||
* can branch on the boolean.
|
||||
*/
|
||||
export function wakeContainer(session: Session): Promise<void> {
|
||||
export function wakeContainer(session: Session): Promise<boolean> {
|
||||
if (activeContainers.has(session.id)) {
|
||||
log.debug('Container already running', { sessionId: session.id });
|
||||
return Promise.resolve();
|
||||
return Promise.resolve(true);
|
||||
}
|
||||
const existing = wakePromises.get(session.id);
|
||||
if (existing) {
|
||||
log.debug('Container wake already in-flight — joining existing promise', { sessionId: session.id });
|
||||
return existing;
|
||||
}
|
||||
const promise = spawnContainer(session).finally(() => {
|
||||
wakePromises.delete(session.id);
|
||||
});
|
||||
const promise = spawnContainer(session)
|
||||
.then(() => true)
|
||||
.catch((err) => {
|
||||
log.warn('wakeContainer failed — host-sweep will retry', { sessionId: session.id, err });
|
||||
return false;
|
||||
})
|
||||
.finally(() => {
|
||||
wakePromises.delete(session.id);
|
||||
});
|
||||
wakePromises.set(session.id, promise);
|
||||
return promise;
|
||||
}
|
||||
|
||||
@@ -168,14 +168,9 @@ async function sweepSession(session: Session): Promise<void> {
|
||||
const dueCount = countDueMessages(inDb);
|
||||
if (dueCount > 0 && !isContainerRunning(session.id)) {
|
||||
log.info('Waking container for due messages', { sessionId: session.id, count: dueCount });
|
||||
try {
|
||||
await wakeContainer(session);
|
||||
} catch (err) {
|
||||
// Transient spawn failure (e.g. OneCLI gateway down). Leave messages
|
||||
// pending so the next sweep tick retries; don't abort the rest of
|
||||
// the sweep cycle for other sessions.
|
||||
log.warn('wakeContainer failed — will retry on next sweep', { sessionId: session.id, err });
|
||||
}
|
||||
// wakeContainer never throws — transient spawn failures (OneCLI down,
|
||||
// etc.) return false and leave messages pending for the next tick.
|
||||
await wakeContainer(session);
|
||||
}
|
||||
|
||||
const alive = isContainerRunning(session.id);
|
||||
|
||||
@@ -450,15 +450,11 @@ async function deliverToAgent(
|
||||
startTypingRefresh(session.id, session.agent_group_id, event.channelType, event.platformId, event.threadId);
|
||||
const freshSession = getSession(session.id);
|
||||
if (freshSession) {
|
||||
try {
|
||||
await wakeContainer(freshSession);
|
||||
} catch (err) {
|
||||
// Transient spawn failure (e.g. OneCLI gateway down). The inbound
|
||||
// row is already persisted — host-sweep will retry the wake on its
|
||||
// next tick. Don't bubble out of the channel adapter.
|
||||
log.warn('wakeContainer failed — host-sweep will retry', { sessionId: freshSession.id, err });
|
||||
stopTypingRefresh(freshSession.id);
|
||||
}
|
||||
const woke = await wakeContainer(freshSession);
|
||||
// wakeContainer never throws — it returns false on transient spawn
|
||||
// failure (host-sweep retries). Stop the typing indicator we just
|
||||
// started so it doesn't leak; the inbound row stays pending.
|
||||
if (!woke) stopTypingRefresh(freshSession.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user