feat: race-free on-wake messages and explicit restart CLI

Decouple container restart from config updates — config CLI ops now only
write to the DB; restart is a separate `ncl groups restart` command with
--rebuild and --message flags. Add on_wake column to messages_in so wake
messages are only picked up by a fresh container's first poll, preventing
dying containers from stealing them during the SIGTERM grace window.
killContainer accepts an onExit callback for race-free respawn. Agent-
called restart auto-scopes to the calling session.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
gavrielc
2026-05-09 19:02:15 +03:00
parent 08698da0d2
commit be3a8a97c6
12 changed files with 381 additions and 61 deletions

View File

@@ -196,7 +196,8 @@ export function initTestSessionDb(): { inbound: Database; outbound: Database } {
platform_id TEXT,
channel_type TEXT,
thread_id TEXT,
content TEXT NOT NULL
content TEXT NOT NULL,
on_wake INTEGER NOT NULL DEFAULT 0
);
CREATE TABLE delivered (
message_out_id TEXT PRIMARY KEY,

View File

@@ -49,7 +49,7 @@ function getMaxMessagesPerPrompt(): number {
* sees the prior context it missed. Host's countDueMessages gates waking on
* trigger=1 separately (see src/db/session-db.ts).
*/
export function getPendingMessages(): MessageInRow[] {
export function getPendingMessages(isFirstPoll = false): MessageInRow[] {
const inbound = openInboundDb();
const outbound = getOutboundDb();
@@ -59,10 +59,11 @@ export function getPendingMessages(): MessageInRow[] {
`SELECT * FROM messages_in
WHERE status = 'pending'
AND (process_after IS NULL OR datetime(process_after) <= datetime('now'))
AND (on_wake = 0 OR ?1 = 1)
ORDER BY seq DESC
LIMIT ?`,
LIMIT ?2`,
)
.all(getMaxMessagesPerPrompt()) as MessageInRow[];
.all(isFirstPoll ? 1 : 0, getMaxMessagesPerPrompt()) as MessageInRow[];
if (pending.length === 0) return [];

View File

@@ -14,13 +14,18 @@ afterEach(() => {
closeSessionDb();
});
function insertMessage(id: string, kind: string, content: object, opts?: { processAfter?: string; trigger?: 0 | 1 }) {
function insertMessage(
id: string,
kind: string,
content: object,
opts?: { processAfter?: string; trigger?: 0 | 1; onWake?: 0 | 1 },
) {
getInboundDb()
.prepare(
`INSERT INTO messages_in (id, kind, timestamp, status, process_after, trigger, content)
VALUES (?, ?, datetime('now'), 'pending', ?, ?, ?)`,
`INSERT INTO messages_in (id, kind, timestamp, status, process_after, trigger, on_wake, content)
VALUES (?, ?, datetime('now'), 'pending', ?, ?, ?, ?)`,
)
.run(id, kind, opts?.processAfter ?? null, opts?.trigger ?? 1, JSON.stringify(content));
.run(id, kind, opts?.processAfter ?? null, opts?.trigger ?? 1, opts?.onWake ?? 0, JSON.stringify(content));
}
describe('formatter', () => {
@@ -131,6 +136,58 @@ describe('accumulate gate (trigger column)', () => {
});
});
describe('on_wake filtering', () => {
it('first poll returns on_wake=1 messages', () => {
insertMessage('m1', 'chat', { sender: 'system', text: 'Resuming.' }, { onWake: 1 });
const messages = getPendingMessages(true);
expect(messages).toHaveLength(1);
expect(messages[0].id).toBe('m1');
});
it('subsequent polls skip on_wake=1 messages', () => {
insertMessage('m1', 'chat', { sender: 'system', text: 'Resuming.' }, { onWake: 1 });
const messages = getPendingMessages(false);
expect(messages).toHaveLength(0);
});
it('normal messages returned regardless of isFirstPoll', () => {
insertMessage('m1', 'chat', { sender: 'A', text: 'hello' });
expect(getPendingMessages(true)).toHaveLength(1);
// Reset: mark completed so we can re-test with a fresh message
markCompleted(['m1']);
insertMessage('m2', 'chat', { sender: 'A', text: 'hello again' });
expect(getPendingMessages(false)).toHaveLength(1);
});
it('mixed batch: first poll returns both normal and on_wake messages', () => {
insertMessage('m1', 'chat', { sender: 'A', text: 'user msg' });
insertMessage('m2', 'chat', { sender: 'system', text: 'Resuming.' }, { onWake: 1 });
const messages = getPendingMessages(true);
expect(messages).toHaveLength(2);
expect(messages.map((m) => m.id).sort()).toEqual(['m1', 'm2']);
});
it('mixed batch: subsequent poll returns only normal messages', () => {
insertMessage('m1', 'chat', { sender: 'A', text: 'user msg' });
insertMessage('m2', 'chat', { sender: 'system', text: 'Resuming.' }, { onWake: 1 });
const messages = getPendingMessages(false);
expect(messages).toHaveLength(1);
expect(messages[0].id).toBe('m1');
});
it('on_wake defaults to 0 for inserts without explicit value', () => {
getInboundDb()
.prepare(
`INSERT INTO messages_in (id, kind, timestamp, status, content)
VALUES ('m1', 'chat', datetime('now'), 'pending', '{"text":"hi"}')`,
)
.run();
// Should be returned even on non-first poll (on_wake=0)
expect(getPendingMessages(false)).toHaveLength(1);
});
});
describe('routing', () => {
it('should extract routing from messages', () => {
getInboundDb()

View File

@@ -67,9 +67,11 @@ export async function runPollLoop(config: PollLoopConfig): Promise<void> {
clearStaleProcessingAcks();
let pollCount = 0;
let isFirstPoll = true;
while (true) {
// Skip system messages — they're responses for MCP tools (e.g., ask_user_question)
const messages = getPendingMessages().filter((m) => m.kind !== 'system');
const messages = getPendingMessages(isFirstPoll).filter((m) => m.kind !== 'system');
isFirstPoll = false;
pollCount++;
// Periodic heartbeat so we know the loop is alive