fix(container): clear orphan heartbeat before spawn
After a container exits, its .heartbeat file is left behind with the mtime of its last SDK activity. When the same session spawns a new container, the host sweep's ceiling check reads that stale mtime and kills the freshly-spawned container within seconds — before the new instance has had time to touch the file itself. The sweep already has a carve-out for "no heartbeat file" (treated as a fresh spawn, given grace), so simply removing the orphan at spawn time restores the intended semantics. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -36,7 +36,7 @@ import {
|
|||||||
type ProviderContainerContribution,
|
type ProviderContainerContribution,
|
||||||
type VolumeMount,
|
type VolumeMount,
|
||||||
} from './providers/provider-container-registry.js';
|
} from './providers/provider-container-registry.js';
|
||||||
import { markContainerRunning, markContainerStopped, sessionDir, writeSessionRouting } from './session-manager.js';
|
import { heartbeatPath, markContainerRunning, markContainerStopped, sessionDir, writeSessionRouting } from './session-manager.js';
|
||||||
import type { AgentGroup, Session } from './types.js';
|
import type { AgentGroup, Session } from './types.js';
|
||||||
|
|
||||||
const onecli = new OneCLI({ url: ONECLI_URL, apiKey: ONECLI_API_KEY });
|
const onecli = new OneCLI({ url: ONECLI_URL, apiKey: ONECLI_API_KEY });
|
||||||
@@ -131,6 +131,12 @@ async function spawnContainer(session: Session): Promise<void> {
|
|||||||
|
|
||||||
log.info('Spawning container', { sessionId: session.id, agentGroup: agentGroup.name, containerName });
|
log.info('Spawning container', { sessionId: session.id, agentGroup: agentGroup.name, containerName });
|
||||||
|
|
||||||
|
// Clear any orphan heartbeat from a previous container instance — the
|
||||||
|
// sweep's ceiling check treats a missing file as "fresh spawn, give grace"
|
||||||
|
// (host-sweep.ts line 87). Without this, the stale mtime can trigger an
|
||||||
|
// immediate kill before the new container touches the file itself.
|
||||||
|
fs.rmSync(heartbeatPath(agentGroup.id, session.id), { force: true });
|
||||||
|
|
||||||
const container = spawn(CONTAINER_RUNTIME_BIN, args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
const container = spawn(CONTAINER_RUNTIME_BIN, args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
||||||
|
|
||||||
activeContainers.set(session.id, { process: container, containerName });
|
activeContainers.set(session.id, { process: container, containerName });
|
||||||
|
|||||||
Reference in New Issue
Block a user