feat(setup): Claude-assisted error recovery with resume-at-step retry
When a setup step fails — whether hard via fail() or soft via the
"What's left" / "Skipping the first chat" notes — offer to ask Claude
to diagnose. On consent, spawn `claude -p --output-format stream-json`
with a scrolling 3-line action window ("Reading x", "Running y") so
the 1–4 minute investigations feel active rather than hung. No hard
timeout: debugging can take time, Ctrl-C is the escape hatch.
The prompt is minimal: one-paragraph framing, failed step name + msg +
hint, and a list of file references (not contents). Claude's Read/Grep
tools fetch what they need. A per-step map in claude-assist.ts gives
the most relevant files per step; the rest is README + auto.ts +
logs/setup.log + the per-step raw log.
Claude responds with REASON + COMMAND lines. We show the reason in a
clack note, prefill the command via setup/run-suggested.sh (bash 4+
readline, 3.x fallback to Enter-to-run), and eval on the user's
confirm.
When the user runs a fix, fail() now offers to retry the failing step
rather than aborting. setup/logs.ts tracks successfully-completed step
names in-memory; fail() threads those as NANOCLAW_SKIP on a spawnSync
retry, so the child picks up exactly where the parent left off — no
rebuilding containers or reinstalling OneCLI.
Other polish in this change:
- fitToWidth + dimWrap in lib/theme.ts to prevent long spinner labels
from soft-wrapping (each terminal row stacks a stale copy otherwise).
- Shorter container step label ("Preparing your assistant's sandbox…")
so it fits on narrow terminals.
- Wordmark anchored in the clack intro line on every run.
- All 25 existing fail() call sites updated to await fail(...) since
fail is now async.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -11,13 +11,15 @@
|
||||
*
|
||||
* See docs/setup-flow.md for the three-level output contract.
|
||||
*/
|
||||
import { spawn } from 'child_process';
|
||||
import { spawn, spawnSync } from 'child_process';
|
||||
import fs from 'fs';
|
||||
|
||||
import * as p from '@clack/prompts';
|
||||
import k from 'kleur';
|
||||
|
||||
import * as setupLog from '../logs.js';
|
||||
import { offerClaudeAssist } from './claude-assist.js';
|
||||
import { fitToWidth } from './theme.js';
|
||||
|
||||
export type Fields = Record<string, string>;
|
||||
export type Block = { type: string; fields: Fields };
|
||||
@@ -261,23 +263,25 @@ async function runUnderSpinner<
|
||||
): Promise<T> {
|
||||
const s = p.spinner();
|
||||
const start = Date.now();
|
||||
s.start(labels.running);
|
||||
s.start(fitToWidth(labels.running, ' (999s)'));
|
||||
const tick = setInterval(() => {
|
||||
const elapsed = Math.round((Date.now() - start) / 1000);
|
||||
s.message(`${labels.running} ${k.dim(`(${elapsed}s)`)}`);
|
||||
const suffix = ` (${elapsed}s)`;
|
||||
s.message(`${fitToWidth(labels.running, suffix)}${k.dim(suffix)}`);
|
||||
}, 1000);
|
||||
|
||||
const result = await work();
|
||||
|
||||
clearInterval(tick);
|
||||
const elapsed = Math.round((Date.now() - start) / 1000);
|
||||
const suffix = ` (${elapsed}s)`;
|
||||
if (result.ok) {
|
||||
const isSkipped = result.terminal?.fields.STATUS === 'skipped';
|
||||
const msg = isSkipped && labels.skipped ? labels.skipped : labels.done;
|
||||
s.stop(`${msg} ${k.dim(`(${elapsed}s)`)}`);
|
||||
s.stop(`${fitToWidth(msg, suffix)}${k.dim(suffix)}`);
|
||||
} else {
|
||||
const failMsg = labels.failed ?? labels.running.replace(/…$/, ' failed');
|
||||
s.stop(`${failMsg} ${k.dim(`(${elapsed}s)`)}`, 1);
|
||||
s.stop(`${fitToWidth(failMsg, suffix)}${k.dim(suffix)}`, 1);
|
||||
dumpTranscriptOnFailure(result.transcript);
|
||||
}
|
||||
return result;
|
||||
@@ -301,12 +305,53 @@ export function dumpTranscriptOnFailure(transcript: string): void {
|
||||
* Abort the setup run with a user-facing error, logging the abort to the
|
||||
* progression log. Takes the step name explicitly so callers are clear
|
||||
* about which step they're failing from — no hidden module state.
|
||||
*
|
||||
* Before aborting we offer Claude-assisted debugging. Callers must
|
||||
* `await fail(...)` so the offer can actually run before we call
|
||||
* process.exit. The return type is `Promise<never>`; control-flow
|
||||
* narrowing still works after `await`.
|
||||
*/
|
||||
export function fail(stepName: string, msg: string, hint?: string): never {
|
||||
export async function fail(
|
||||
stepName: string,
|
||||
msg: string,
|
||||
hint?: string,
|
||||
rawLogPath?: string,
|
||||
): Promise<never> {
|
||||
setupLog.abort(stepName, msg);
|
||||
p.log.error(msg);
|
||||
if (hint) p.log.message(k.dim(hint));
|
||||
p.log.message(k.dim('Logs: logs/setup.log · Raw: logs/setup-steps/'));
|
||||
|
||||
const ranFix = await offerClaudeAssist({ stepName, msg, hint, rawLogPath });
|
||||
|
||||
// If the user just ran a Claude-suggested fix, offer to resume the flow
|
||||
// at the step that failed instead of aborting. We re-exec via spawnSync
|
||||
// and pass NANOCLAW_SKIP with every step that already completed so the
|
||||
// child skips them and picks up where we left off.
|
||||
if (ranFix) {
|
||||
const retry = ensureAnswer(
|
||||
await p.confirm({
|
||||
message: `Fix applied. Retry the ${stepName} step?`,
|
||||
initialValue: true,
|
||||
}),
|
||||
);
|
||||
if (retry) {
|
||||
const existingSkip = (process.env.NANOCLAW_SKIP ?? '')
|
||||
.split(',')
|
||||
.map((s) => s.trim())
|
||||
.filter(Boolean);
|
||||
const skipList = [
|
||||
...new Set([...existingSkip, ...setupLog.completedStepNames()]),
|
||||
].join(',');
|
||||
p.log.step(`Retrying from ${stepName}…`);
|
||||
const result = spawnSync('pnpm', ['--silent', 'run', 'setup:auto'], {
|
||||
stdio: 'inherit',
|
||||
env: { ...process.env, NANOCLAW_SKIP: skipList },
|
||||
});
|
||||
process.exit(result.status ?? 0);
|
||||
}
|
||||
}
|
||||
|
||||
p.cancel('Setup aborted.');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user