v2: split session DB into inbound/outbound for write isolation

Eliminates SQLite write contention across the host-container mount
boundary by splitting the single session.db into two files, each with
exactly one writer:

  inbound.db  — host writes (messages_in, delivered tracking)
  outbound.db — container writes (messages_out, processing_ack)

Key changes:
- Host uses even seq numbers, container uses odd (collision-free)
- Container heartbeat via file touch instead of DB UPDATE
- Scheduling MCP tools now emit system actions via messages_out
  (host applies them to inbound.db during delivery)
- Host sweep reads processing_ack + heartbeat file for stale detection
- OneCLI ensureAgent() call added (was missing from v2, caused
  applyContainerConfig to reject unknown agent identifiers)

Verified: tsc clean, 327 tests pass, real e2e through Docker works.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
gavrielc
2026-04-09 12:17:31 +03:00
parent 320176e7e8
commit 82cb363f84
19 changed files with 738 additions and 347 deletions

View File

@@ -1,5 +1,6 @@
import { getPendingMessages, markProcessing, markCompleted, touchProcessing, type MessageInRow } from './db/messages-in.js';
import { getPendingMessages, markProcessing, markCompleted, type MessageInRow } from './db/messages-in.js';
import { writeMessageOut } from './db/messages-out.js';
import { touchHeartbeat, clearStaleProcessingAcks } from './db/connection.js';
import { formatMessages, extractRouting, categorizeMessage, type RoutingContext } from './formatter.js';
import type { AgentProvider, AgentQuery, McpServerConfig, ProviderEvent } from './providers/types.js';
@@ -38,6 +39,10 @@ export async function runPollLoop(config: PollLoopConfig): Promise<void> {
let sessionId: string | undefined;
let resumeAt: string | undefined;
// Clear leftover 'processing' acks from a previous crashed container.
// This lets the new container re-process those messages.
clearStaleProcessingAcks();
let pollCount = 0;
while (true) {
// Skip system messages — they're responses for MCP tools (e.g., ask_user_question)
@@ -260,7 +265,7 @@ async function processQuery(query: AgentQuery, routing: RoutingContext, config:
for await (const event of query.events) {
lastEventTime = Date.now();
handleEvent(event, routing);
touchProcessing(processingIds);
touchHeartbeat();
if (event.type === 'init') {
querySessionId = event.sessionId;