feat(setup): Claude-assisted error recovery with resume-at-step retry

When a setup step fails — whether hard via fail() or soft via the "What's left" / "Skipping the first chat" notes — offer to ask Claude to diagnose. On consent, spawn `claude -p --output-format stream-json` with a scrolling 3-line action window ("Reading x", "Running y") so the 1–4 minute investigations feel active rather than hung. No hard timeout: debugging can take time, Ctrl-C is the escape hatch. The prompt is minimal: one-paragraph framing, failed step name + msg + hint, and a list of file references (not contents). Claude's Read/Grep tools fetch what they need. A per-step map in claude-assist.ts gives the most relevant files per step; the rest is README + auto.ts + logs/setup.log + the per-step raw log. Claude responds with REASON + COMMAND lines. We show the reason in a clack note, prefill the command via setup/run-suggested.sh (bash 4+ readline, 3.x fallback to Enter-to-run), and eval on the user's confirm. When the user runs a fix, fail() now offers to retry the failing step rather than aborting. setup/logs.ts tracks successfully-completed step names in-memory; fail() threads those as NANOCLAW_SKIP on a spawnSync retry, so the child picks up exactly where the parent left off — no rebuilding containers or reinstalling OneCLI. Other polish in this change: - fitToWidth + dimWrap in lib/theme.ts to prevent long spinner labels from soft-wrapping (each terminal row stacks a stale copy otherwise). - Shorter container step label ("Preparing your assistant's sandbox…") so it fits on narrow terminals. - Wordmark anchored in the clack intro line on every run. - All 25 existing fail() call sites updated to await fail(...) since fail is now async. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-22 12:42:32 +03:00
parent dfcbab5364
commit 4859d8fb2d
8 changed files with 589 additions and 37 deletions
--- a/setup/lib/runner.ts
+++ b/setup/lib/runner.ts
@@ -11,13 +11,15 @@
 *
 * See docs/setup-flow.md for the three-level output contract.
 */
-import { spawn } from 'child_process';
+import { spawn, spawnSync } from 'child_process';
 import fs from 'fs';

 import * as p from '@clack/prompts';
 import k from 'kleur';

 import * as setupLog from '../logs.js';
+import { offerClaudeAssist } from './claude-assist.js';
+import { fitToWidth } from './theme.js';

 export type Fields = Record<string, string>;
 export type Block = { type: string; fields: Fields };
@@ -261,23 +263,25 @@ async function runUnderSpinner<
 ): Promise<T> {
  const s = p.spinner();
  const start = Date.now();
-  s.start(labels.running);
+  s.start(fitToWidth(labels.running, ' (999s)'));
  const tick = setInterval(() => {
    const elapsed = Math.round((Date.now() - start) / 1000);
-    s.message(`${labels.running} ${k.dim(`(${elapsed}s)`)}`);
+    const suffix = ` (${elapsed}s)`;
+    s.message(`${fitToWidth(labels.running, suffix)}${k.dim(suffix)}`);
  }, 1000);

  const result = await work();

  clearInterval(tick);
  const elapsed = Math.round((Date.now() - start) / 1000);
+  const suffix = ` (${elapsed}s)`;
  if (result.ok) {
    const isSkipped = result.terminal?.fields.STATUS === 'skipped';
    const msg = isSkipped && labels.skipped ? labels.skipped : labels.done;
-    s.stop(`${msg} ${k.dim(`(${elapsed}s)`)}`);
+    s.stop(`${fitToWidth(msg, suffix)}${k.dim(suffix)}`);
  } else {
    const failMsg = labels.failed ?? labels.running.replace(/…$/, ' failed');
-    s.stop(`${failMsg} ${k.dim(`(${elapsed}s)`)}`, 1);
+    s.stop(`${fitToWidth(failMsg, suffix)}${k.dim(suffix)}`, 1);
    dumpTranscriptOnFailure(result.transcript);
  }
  return result;
@@ -301,12 +305,53 @@ export function dumpTranscriptOnFailure(transcript: string): void {
 * Abort the setup run with a user-facing error, logging the abort to the
 * progression log. Takes the step name explicitly so callers are clear
 * about which step they're failing from — no hidden module state.
+ *
+ * Before aborting we offer Claude-assisted debugging. Callers must
+ * `await fail(...)` so the offer can actually run before we call
+ * process.exit. The return type is `Promise<never>`; control-flow
+ * narrowing still works after `await`.
 */
-export function fail(stepName: string, msg: string, hint?: string): never {
+export async function fail(
+  stepName: string,
+  msg: string,
+  hint?: string,
+  rawLogPath?: string,
+): Promise<never> {
  setupLog.abort(stepName, msg);
  p.log.error(msg);
  if (hint) p.log.message(k.dim(hint));
  p.log.message(k.dim('Logs: logs/setup.log · Raw: logs/setup-steps/'));
+
+  const ranFix = await offerClaudeAssist({ stepName, msg, hint, rawLogPath });
+
+  // If the user just ran a Claude-suggested fix, offer to resume the flow
+  // at the step that failed instead of aborting. We re-exec via spawnSync
+  // and pass NANOCLAW_SKIP with every step that already completed so the
+  // child skips them and picks up where we left off.
+  if (ranFix) {
+    const retry = ensureAnswer(
+      await p.confirm({
+        message: `Fix applied. Retry the ${stepName} step?`,
+        initialValue: true,
+      }),
+    );
+    if (retry) {
+      const existingSkip = (process.env.NANOCLAW_SKIP ?? '')
+        .split(',')
+        .map((s) => s.trim())
+        .filter(Boolean);
+      const skipList = [
+        ...new Set([...existingSkip, ...setupLog.completedStepNames()]),
+      ].join(',');
+      p.log.step(`Retrying from ${stepName}…`);
+      const result = spawnSync('pnpm', ['--silent', 'run', 'setup:auto'], {
+        stdio: 'inherit',
+        env: { ...process.env, NANOCLAW_SKIP: skipList },
+      });
+      process.exit(result.status ?? 0);
+    }
+  }
+
  p.cancel('Setup aborted.');
  process.exit(1);
 }