Files
nanoclaw/src/circuit-breaker.ts
gavrielc 336e01d2a1 fix circuit-breaker off-by-one, ENOENT, and reset-on-throw + tests
- getDelay indexed by attempt (1-based) into a 0-indexed array, so the
  leading 0 was unreachable and every "after a crash" delay was shifted
  up one slot. Use attempt - 1 so the documented schedule (0s → 0s →
  10s → 30s → 2min → 5min → 15min cap) actually holds.
- enforceStartupBackoff runs before initDb (which creates DATA_DIR), so
  on a fresh checkout fs.writeFileSync hit ENOENT. write() now
  mkdirSync's DATA_DIR first.
- shutdown() didn't run resetCircuitBreaker if teardownChannelAdapters
  threw, so a graceful exit with a teardown error would be counted as a
  crash on the next start. Wrap teardown in try/finally.
- Adds src/circuit-breaker.test.ts: state transitions, full schedule
  (parameterized), reset-window expiry, malformed file, and the
  fresh-install path.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 22:51:11 +03:00

85 lines
2.4 KiB
TypeScript

import fs from 'fs';
import path from 'path';
import { DATA_DIR } from './config.js';
import { log } from './log.js';
const CB_PATH = path.join(DATA_DIR, 'circuit-breaker.json');
const RESET_WINDOW_MS = 60 * 60 * 1000; // 1 hour
// Index = number of consecutive crashes (0 = clean start, attempt 1).
// 6+ crashes capped at 15min.
const BACKOFF_SCHEDULE_S = [0, 0, 10, 30, 120, 300, 900];
interface CircuitBreakerState {
attempt: number;
timestamp: string;
}
function read(): CircuitBreakerState | null {
try {
const raw = fs.readFileSync(CB_PATH, 'utf-8');
return JSON.parse(raw) as CircuitBreakerState;
} catch {
return null;
}
}
function write(state: CircuitBreakerState): void {
// The breaker runs before initDb (which is what creates DATA_DIR), so on a
// fresh checkout the dir may not exist yet.
fs.mkdirSync(DATA_DIR, { recursive: true });
fs.writeFileSync(CB_PATH, JSON.stringify(state, null, 2) + '\n');
}
function getDelay(attempt: number): number {
const idx = Math.min(attempt - 1, BACKOFF_SCHEDULE_S.length - 1);
return BACKOFF_SCHEDULE_S[idx];
}
export function resetCircuitBreaker(): void {
try {
fs.unlinkSync(CB_PATH);
log.info('Circuit breaker reset on clean shutdown');
} catch {}
}
export async function enforceStartupBackoff(): Promise<void> {
const now = new Date();
const prev = read();
let attempt: number;
if (!prev) {
attempt = 1;
} else {
const elapsedMs = now.getTime() - new Date(prev.timestamp).getTime();
if (elapsedMs < RESET_WINDOW_MS) {
attempt = prev.attempt + 1;
log.warn('Previous startup was not a clean shutdown', {
previousAttempt: prev.attempt,
previousTimestamp: prev.timestamp,
elapsedSec: Math.round(elapsedMs / 1000),
});
} else {
attempt = 1;
log.info('Circuit breaker reset — last startup was over 1h ago', {
previousAttempt: prev.attempt,
previousTimestamp: prev.timestamp,
});
}
}
write({ attempt, timestamp: now.toISOString() });
const delaySec = getDelay(attempt);
if (delaySec > 0) {
const resumeAt = new Date(now.getTime() + delaySec * 1000).toISOString();
log.warn('Circuit breaker: delaying startup due to repeated crashes', {
attempt,
delaySec,
resumeAt,
});
await new Promise((resolve) => setTimeout(resolve, delaySec * 1000));
log.info('Circuit breaker: backoff complete, resuming startup', { attempt });
}
}