feat(setup): Claude-assisted error recovery with resume-at-step retry

When a setup step fails — whether hard via fail() or soft via the "What's left" / "Skipping the first chat" notes — offer to ask Claude to diagnose. On consent, spawn `claude -p --output-format stream-json` with a scrolling 3-line action window ("Reading x", "Running y") so the 1–4 minute investigations feel active rather than hung. No hard timeout: debugging can take time, Ctrl-C is the escape hatch. The prompt is minimal: one-paragraph framing, failed step name + msg + hint, and a list of file references (not contents). Claude's Read/Grep tools fetch what they need. A per-step map in claude-assist.ts gives the most relevant files per step; the rest is README + auto.ts + logs/setup.log + the per-step raw log. Claude responds with REASON + COMMAND lines. We show the reason in a clack note, prefill the command via setup/run-suggested.sh (bash 4+ readline, 3.x fallback to Enter-to-run), and eval on the user's confirm. When the user runs a fix, fail() now offers to retry the failing step rather than aborting. setup/logs.ts tracks successfully-completed step names in-memory; fail() threads those as NANOCLAW_SKIP on a spawnSync retry, so the child picks up exactly where the parent left off — no rebuilding containers or reinstalling OneCLI. Other polish in this change: - fitToWidth + dimWrap in lib/theme.ts to prevent long spinner labels from soft-wrapping (each terminal row stacks a stale copy otherwise). - Shorter container step label ("Preparing your assistant's sandbox…") so it fits on narrow terminals. - Wordmark anchored in the clack intro line on every run. - All 25 existing fail() call sites updated to await fail(...) since fail is now async. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-22 12:42:32 +03:00
parent dfcbab5364
commit 4859d8fb2d
8 changed files with 589 additions and 37 deletions
--- a/setup/lib/claude-assist.ts
+++ b/setup/lib/claude-assist.ts
@@ -0,0 +1,410 @@
+/**
+ * Offer Claude-assisted debugging when a setup step fails.
+ *
+ * Flow:
+ *   1. Check `claude` is on PATH and has a working credential. If not,
+ *      silently skip — pre-auth failures can't use this path.
+ *   2. Ask the user for consent ("Want me to ask Claude for a fix?").
+ *   3. Build a minimal prompt: the one-paragraph situation, the failing
+ *      step's name/message/hint, and a short list of *file references*
+ *      (not contents) so Claude can Read what it needs on its own.
+ *   4. Spawn `claude -p --output-format text` with a 2-minute timeout and
+ *      a spinner that shows elapsed time.
+ *   5. Parse `REASON:` / `COMMAND:` out of the response. Show the reason
+ *      in a clack note, then hand off to `setup/run-suggested.sh` for
+ *      editable pre-fill + exec.
+ *
+ * Skippable with NANOCLAW_SKIP_CLAUDE_ASSIST=1 for CI/scripted runs.
+ */
+import { execSync, spawn } from 'child_process';
+import fs from 'fs';
+import path from 'path';
+
+import * as p from '@clack/prompts';
+import k from 'kleur';
+
+import { ensureAnswer } from './runner.js';
+import { fitToWidth } from './theme.js';
+
+export interface AssistContext {
+  stepName: string;
+  msg: string;
+  hint?: string;
+  /** Absolute path to the per-step raw log, if the caller has one. */
+  rawLogPath?: string;
+}
+
+/**
+ * File-path hints per step. Claude reads these on its own via its Read tool
+ * rather than us stuffing contents into the prompt. Keys are step names as
+ * they appear in fail() calls; values are repo-relative paths.
+ */
+const STEP_FILES: Record<string, string[]> = {
+  bootstrap: ['setup.sh', 'setup/install-node.sh', 'nanoclaw.sh'],
+  environment: ['setup/environment.ts'],
+  container: [
+    'setup/container.ts',
+    'setup/install-docker.sh',
+    'container/Dockerfile',
+  ],
+  onecli: ['setup/onecli.ts'],
+  auth: [
+    'setup/auth.ts',
+    'setup/register-claude-token.sh',
+    'setup/install-claude.sh',
+  ],
+  mounts: ['setup/mounts.ts'],
+  service: ['setup/service.ts'],
+  'cli-agent': ['setup/cli-agent.ts', 'scripts/init-cli-agent.ts'],
+  channel: ['setup/auto.ts'],
+  verify: ['setup/verify.ts'],
+  // Channel-specific sub-steps:
+  'telegram-install': ['setup/add-telegram.sh', 'setup/channels/telegram.ts'],
+  'telegram-validate': ['setup/channels/telegram.ts'],
+  'pair-telegram': ['setup/pair-telegram.ts', 'setup/channels/telegram.ts'],
+  'discord-install': ['setup/add-discord.sh', 'setup/channels/discord.ts'],
+  'init-first-agent': [
+    'scripts/init-first-agent.ts',
+    'setup/channels/telegram.ts',
+    'setup/channels/discord.ts',
+  ],
+};
+
+const BIG_PICTURE_FILES = ['README.md', 'setup/auto.ts'];
+
+/**
+ * Returns `true` if the user ran a Claude-suggested fix command; callers
+ * can use that signal to offer a retry instead of aborting outright.
+ * Returns `false` for every other outcome (skipped, declined, no command,
+ * Claude unreachable, user chose not to run).
+ */
+export async function offerClaudeAssist(
+  ctx: AssistContext,
+  projectRoot: string = process.cwd(),
+): Promise<boolean> {
+  if (process.env.NANOCLAW_SKIP_CLAUDE_ASSIST === '1') return false;
+  if (!isClaudeUsable()) return false;
+
+  const want = ensureAnswer(
+    await p.confirm({
+      message: 'Want me to ask Claude to diagnose this?',
+      initialValue: true,
+    }),
+  );
+  if (!want) return false;
+
+  const prompt = buildPrompt(ctx, projectRoot);
+  const response = await queryClaudeUnderSpinner(prompt, projectRoot);
+  if (!response) return false;
+
+  const parsed = parseResponse(response);
+  if (!parsed) {
+    p.log.warn("Claude responded but I couldn't parse a command out of it.");
+    p.log.message(k.dim(response.trim().slice(0, 500)));
+    return false;
+  }
+
+  p.note(
+    `${parsed.reason}\n\n${k.cyan('$')} ${parsed.command}`,
+    "Claude's suggestion",
+  );
+
+  const run = ensureAnswer(
+    await p.confirm({
+      message: 'Run this command? (you can edit it before executing)',
+      initialValue: false,
+    }),
+  );
+  if (!run) return false;
+
+  await runSuggested(parsed.command, projectRoot);
+  return true;
+}
+
+function isClaudeUsable(): boolean {
+  try {
+    execSync('command -v claude', { stdio: 'ignore' });
+  } catch {
+    return false;
+  }
+  // Availability without auth is half the story; a real query will still
+  // fail if the token isn't registered. We try first and surface the error
+  // rather than pre-checking auth with a separate round trip.
+  return true;
+}
+
+function buildPrompt(ctx: AssistContext, projectRoot: string): string {
+  const stepRefs = STEP_FILES[ctx.stepName] ?? [];
+  const references = [
+    ...BIG_PICTURE_FILES,
+    ...stepRefs,
+    'logs/setup.log',
+    ctx.rawLogPath
+      ? path.relative(projectRoot, ctx.rawLogPath)
+      : 'logs/setup-steps/',
+  ].filter((v, i, a) => a.indexOf(v) === i);
+
+  const hintLine = ctx.hint ? `Hint shown to the user: ${ctx.hint}\n` : '';
+
+  return [
+    "I'm trying to set up NanoClaw on my machine and ran into an issue",
+    'during the setup flow. Please read the referenced files to understand',
+    'the flow and the step that failed, look at the logs to see what went',
+    'wrong, then suggest a single bash command I can run to fix it.',
+    '',
+    `Failed step: ${ctx.stepName}`,
+    `Error shown to the user: ${ctx.msg}`,
+    hintLine,
+    'References (read as needed with your Read tool):',
+    ...references.map((r) => `  - ${r}`),
+    '',
+    'Respond in EXACTLY this format, nothing before or after:',
+    '',
+    'REASON: <one short line describing the root cause>',
+    'COMMAND: <single bash command, one line, no backticks>',
+    '',
+    'If no safe single command can fix it, respond with:',
+    'REASON: <why>',
+    'COMMAND: none',
+  ].join('\n');
+}
+
+/**
+ * Fixed-height scrolling window for Claude's progress.
+ *
+ * Clack's spinner only owns one line, so long tool-use breadcrumbs wrap
+ * and blow out the gutter. Instead we manage a 4-line window ourselves:
+ * a spinner header + 3 lines showing the most recent tool actions. On
+ * each update we use raw ANSI (cursor up, clear line) to redraw in
+ * place. When the query finishes we clear the whole block and emit a
+ * single `p.log.success` / `p.log.error` so the flow continues in
+ * standard clack style.
+ */
+const WINDOW_SIZE = 3;
+const SPINNER_FRAMES = ['◒', '◐', '◓', '◑'];
+const HIDE_CURSOR = '\x1b[?25l';
+const SHOW_CURSOR = '\x1b[?25h';
+
+async function queryClaudeUnderSpinner(
+  prompt: string,
+  projectRoot: string,
+): Promise<string | null> {
+  const out = process.stdout;
+  const start = Date.now();
+  const actions: string[] = [];
+  let frameIdx = 0;
+
+  const redraw = (): void => {
+    // Move cursor back to the start of the block (WINDOW_SIZE + 1 = header + window).
+    out.write(`\x1b[${WINDOW_SIZE + 1}A`);
+
+    const elapsed = Math.round((Date.now() - start) / 1000);
+    const icon = SPINNER_FRAMES[frameIdx % SPINNER_FRAMES.length];
+    const suffix = ` (${elapsed}s)`;
+    const header = fitToWidth('Asking Claude to diagnose…', suffix);
+    out.write(`\x1b[2K${k.cyan(icon)}  ${header}${k.dim(suffix)}\n`);
+
+    for (let i = 0; i < WINDOW_SIZE; i++) {
+      const idx = actions.length - WINDOW_SIZE + i;
+      const action = idx >= 0 ? actions[idx] : '';
+      out.write('\x1b[2K');
+      if (action) {
+        out.write(`${k.gray('│')}  ${k.dim(`▸ ${fitToWidth(action, '')}`)}`);
+      } else {
+        out.write(k.gray('│'));
+      }
+      out.write('\n');
+    }
+  };
+
+  const clearBlock = (): void => {
+    out.write(`\x1b[${WINDOW_SIZE + 1}A`);
+    for (let i = 0; i < WINDOW_SIZE + 1; i++) {
+      out.write('\x1b[2K\n');
+    }
+    out.write(`\x1b[${WINDOW_SIZE + 1}A`);
+  };
+
+  // Seed the block: move cursor to a fresh line, then write (header + window)
+  // blank lines so `redraw()`'s cursor-up math lands correctly. Hide the
+  // cursor for the duration so the redraw doesn't flicker.
+  out.write(HIDE_CURSOR);
+  for (let i = 0; i < WINDOW_SIZE + 1; i++) out.write('\n');
+  redraw();
+
+  // If the user Ctrl-C's during the query, we never reach `finish()` —
+  // add an exit hook so the cursor comes back regardless.
+  const restoreCursorOnExit = (): void => {
+    out.write(SHOW_CURSOR);
+  };
+  process.once('exit', restoreCursorOnExit);
+
+  const frameTick = setInterval(() => {
+    frameIdx++;
+    redraw();
+  }, 250);
+
+  return new Promise((resolve) => {
+    let lineBuf = '';
+    let finalText = '';
+    let stderr = '';
+    let settled = false;
+
+    const finish = (
+      kind: 'ok' | 'error',
+      payload: string | null,
+    ): void => {
+      clearInterval(frameTick);
+      clearBlock();
+      out.write(SHOW_CURSOR);
+      process.off('exit', restoreCursorOnExit);
+      const elapsed = Math.round((Date.now() - start) / 1000);
+      const suffix = ` (${elapsed}s)`;
+      if (kind === 'ok') {
+        p.log.success(`${fitToWidth('Claude replied.', suffix)}${k.dim(suffix)}`);
+        resolve(payload);
+      } else {
+        p.log.error(
+          `${fitToWidth("Claude couldn't help here.", suffix)}${k.dim(suffix)}`,
+        );
+        const tail = stderr.trim().split('\n').slice(-3).join('\n');
+        if (tail) p.log.message(k.dim(tail));
+        resolve(null);
+      }
+    };
+
+    // No hard timeout — debugging can take a long time, and the cost of
+    // cutting Claude off mid-investigation is worse than letting the
+    // spinner run. The user can Ctrl-C if they want to abort.
+    const child = spawn(
+      'claude',
+      [
+        '-p',
+        '--output-format',
+        'stream-json',
+        '--verbose',
+        '--permission-mode',
+        'bypassPermissions',
+      ],
+      { cwd: projectRoot, stdio: ['pipe', 'pipe', 'pipe'] },
+    );
+
+    child.stdout.on('data', (c: Buffer) => {
+      lineBuf += c.toString('utf-8');
+      let idx: number;
+      while ((idx = lineBuf.indexOf('\n')) !== -1) {
+        const line = lineBuf.slice(0, idx);
+        lineBuf = lineBuf.slice(idx + 1);
+        if (!line.trim()) continue;
+        try {
+          const event = JSON.parse(line) as StreamEvent;
+          handleStreamEvent(event, {
+            setAction: (a) => {
+              actions.push(a);
+              redraw();
+            },
+            appendText: (t) => {
+              finalText += t;
+            },
+          });
+        } catch {
+          // Malformed or non-JSON line — ignore.
+        }
+      }
+    });
+    child.stderr.on('data', (c: Buffer) => {
+      stderr += c.toString('utf-8');
+    });
+    child.on('close', (code) => {
+      if (settled) return;
+      settled = true;
+      if (code === 0 && finalText.trim()) finish('ok', finalText);
+      else finish('error', null);
+    });
+    child.on('error', () => {
+      if (settled) return;
+      settled = true;
+      finish('error', null);
+    });
+
+    child.stdin.end(prompt);
+  });
+}
+
+// Minimal shape of the stream-json events we care about. Claude emits
+// many more, but we only read tool_use blocks (for breadcrumbs) and text
+// blocks (to reassemble the final REASON/COMMAND answer).
+interface StreamEvent {
+  type: string;
+  message?: {
+    content?: Array<
+      | { type: 'text'; text: string }
+      | { type: 'tool_use'; name: string; input: Record<string, unknown> }
+    >;
+  };
+}
+
+function handleStreamEvent(
+  event: StreamEvent,
+  cb: { setAction: (a: string) => void; appendText: (t: string) => void },
+): void {
+  if (event.type !== 'assistant') return;
+  const blocks = event.message?.content ?? [];
+  for (const block of blocks) {
+    if (block.type === 'text') {
+      cb.appendText(block.text);
+    } else if (block.type === 'tool_use') {
+      cb.setAction(formatToolUse(block.name, block.input));
+    }
+  }
+}
+
+function formatToolUse(name: string, input: Record<string, unknown>): string {
+  const truncate = (v: string, n: number): string =>
+    v.length > n ? v.slice(0, n) + '…' : v;
+  if (name === 'Read') {
+    const f = String(input.file_path ?? '');
+    return `Reading ${shortenPath(f)}`;
+  }
+  if (name === 'Bash') {
+    const cmd = String(input.command ?? '').replace(/\s+/g, ' ').trim();
+    return `Running ${truncate(cmd, 60)}`;
+  }
+  if (name === 'Grep') return `Searching for "${truncate(String(input.pattern ?? ''), 40)}"`;
+  if (name === 'Glob') return `Finding ${truncate(String(input.pattern ?? ''), 40)}`;
+  return `Using ${name}`;
+}
+
+function shortenPath(abs: string): string {
+  const root = process.cwd();
+  return abs.startsWith(`${root}/`) ? abs.slice(root.length + 1) : abs;
+}
+
+function parseResponse(
+  raw: string,
+): { reason: string; command: string } | null {
+  // Accept the fields anywhere in the output — Claude sometimes wraps the
+  // answer in a trailing explanation we can safely ignore.
+  const reasonMatch = raw.match(/^\s*REASON:\s*(.+?)\s*$/m);
+  const commandMatch = raw.match(/^\s*COMMAND:\s*(.+?)\s*$/m);
+  if (!reasonMatch || !commandMatch) return null;
+  const command = commandMatch[1].trim();
+  if (!command || command.toLowerCase() === 'none') return null;
+  return { reason: reasonMatch[1].trim(), command };
+}
+
+function runSuggested(command: string, projectRoot: string): Promise<void> {
+  const script = path.join(projectRoot, 'setup/run-suggested.sh');
+  if (!fs.existsSync(script)) {
+    p.log.error(`Missing helper: ${script}`);
+    return Promise.resolve();
+  }
+  return new Promise((resolve) => {
+    const child = spawn('bash', [script, command], {
+      cwd: projectRoot,
+      stdio: 'inherit',
+    });
+    child.on('close', () => resolve());
+    child.on('error', () => resolve());
+  });
+}
--- a/setup/lib/runner.ts
+++ b/setup/lib/runner.ts
@@ -11,13 +11,15 @@
 *
 * See docs/setup-flow.md for the three-level output contract.
 */
-import { spawn } from 'child_process';
+import { spawn, spawnSync } from 'child_process';
 import fs from 'fs';

 import * as p from '@clack/prompts';
 import k from 'kleur';

 import * as setupLog from '../logs.js';
+import { offerClaudeAssist } from './claude-assist.js';
+import { fitToWidth } from './theme.js';

 export type Fields = Record<string, string>;
 export type Block = { type: string; fields: Fields };
@@ -261,23 +263,25 @@ async function runUnderSpinner<
 ): Promise<T> {
  const s = p.spinner();
  const start = Date.now();
-  s.start(labels.running);
+  s.start(fitToWidth(labels.running, ' (999s)'));
  const tick = setInterval(() => {
    const elapsed = Math.round((Date.now() - start) / 1000);
-    s.message(`${labels.running} ${k.dim(`(${elapsed}s)`)}`);
+    const suffix = ` (${elapsed}s)`;
+    s.message(`${fitToWidth(labels.running, suffix)}${k.dim(suffix)}`);
  }, 1000);

  const result = await work();

  clearInterval(tick);
  const elapsed = Math.round((Date.now() - start) / 1000);
+  const suffix = ` (${elapsed}s)`;
  if (result.ok) {
    const isSkipped = result.terminal?.fields.STATUS === 'skipped';
    const msg = isSkipped && labels.skipped ? labels.skipped : labels.done;
-    s.stop(`${msg} ${k.dim(`(${elapsed}s)`)}`);
+    s.stop(`${fitToWidth(msg, suffix)}${k.dim(suffix)}`);
  } else {
    const failMsg = labels.failed ?? labels.running.replace(/…$/, ' failed');
-    s.stop(`${failMsg} ${k.dim(`(${elapsed}s)`)}`, 1);
+    s.stop(`${fitToWidth(failMsg, suffix)}${k.dim(suffix)}`, 1);
    dumpTranscriptOnFailure(result.transcript);
  }
  return result;
@@ -301,12 +305,53 @@ export function dumpTranscriptOnFailure(transcript: string): void {
 * Abort the setup run with a user-facing error, logging the abort to the
 * progression log. Takes the step name explicitly so callers are clear
 * about which step they're failing from — no hidden module state.
+ *
+ * Before aborting we offer Claude-assisted debugging. Callers must
+ * `await fail(...)` so the offer can actually run before we call
+ * process.exit. The return type is `Promise<never>`; control-flow
+ * narrowing still works after `await`.
 */
-export function fail(stepName: string, msg: string, hint?: string): never {
+export async function fail(
+  stepName: string,
+  msg: string,
+  hint?: string,
+  rawLogPath?: string,
+): Promise<never> {
  setupLog.abort(stepName, msg);
  p.log.error(msg);
  if (hint) p.log.message(k.dim(hint));
  p.log.message(k.dim('Logs: logs/setup.log · Raw: logs/setup-steps/'));
+
+  const ranFix = await offerClaudeAssist({ stepName, msg, hint, rawLogPath });
+
+  // If the user just ran a Claude-suggested fix, offer to resume the flow
+  // at the step that failed instead of aborting. We re-exec via spawnSync
+  // and pass NANOCLAW_SKIP with every step that already completed so the
+  // child skips them and picks up where we left off.
+  if (ranFix) {
+    const retry = ensureAnswer(
+      await p.confirm({
+        message: `Fix applied. Retry the ${stepName} step?`,
+        initialValue: true,
+      }),
+    );
+    if (retry) {
+      const existingSkip = (process.env.NANOCLAW_SKIP ?? '')
+        .split(',')
+        .map((s) => s.trim())
+        .filter(Boolean);
+      const skipList = [
+        ...new Set([...existingSkip, ...setupLog.completedStepNames()]),
+      ].join(',');
+      p.log.step(`Retrying from ${stepName}…`);
+      const result = spawnSync('pnpm', ['--silent', 'run', 'setup:auto'], {
+        stdio: 'inherit',
+        env: { ...process.env, NANOCLAW_SKIP: skipList },
+      });
+      process.exit(result.status ?? 0);
+    }
+  }
+
  p.cancel('Setup aborted.');
  process.exit(1);
 }
--- a/setup/lib/theme.ts
+++ b/setup/lib/theme.ts
@@ -77,6 +77,28 @@ function visibleLength(s: string): number {
  return s.replace(ANSI_RE, '').length;
 }

+/**
+ * Truncate a label so the final line — base + reserved suffix — fits in
+ * the terminal width. Use on spinner labels that get an elapsed counter
+ * appended: if the total exceeds terminal width, clack's cursor-up
+ * redraw math breaks and each tick stacks a copy of the line instead
+ * of replacing it.
+ *
+ * `suffix` is the reserved space for what we'll append after `fit()`
+ * returns (e.g. ` (999s)` or a tool-use breadcrumb). We don't include
+ * it in the output — caller appends it.
+ */
+export function fitToWidth(base: string, suffix: string): string {
+  const cols = process.stdout.columns ?? 80;
+  // Overhead we reserve before sizing the label:
+  //   spinner icon (1) + 2 padding spaces = 3
+  //   clack's animated ellipsis after the label = up to 3 (". " -> "...")
+  //   1-char safety margin so wide-char glyphs don't tip over the edge
+  // Total reserved budget = 7 cols plus the caller's suffix.
+  const budget = Math.max(20, cols - 7 - visibleLength(suffix));
+  return base.length > budget ? base.slice(0, budget - 1) + '…' : base;
+}
+
 function wrapLine(line: string, width: number): string {
  if (visibleLength(line) <= width) return line;
  const words = line.split(' ');