diff --git a/src/db.ts b/src/db.ts index 7fba354..5aaf0b1 100644 --- a/src/db.ts +++ b/src/db.ts @@ -561,6 +561,10 @@ export function setSession(groupFolder: string, sessionId: string): void { ).run(groupFolder, sessionId); } +export function deleteSession(groupFolder: string): void { + db.prepare('DELETE FROM sessions WHERE group_folder = ?').run(groupFolder); +} + export function getAllSessions(): Record { const rows = db .prepare('SELECT group_folder, session_id FROM sessions') diff --git a/src/index.ts b/src/index.ts index eaf9432..e186c40 100644 --- a/src/index.ts +++ b/src/index.ts @@ -33,6 +33,7 @@ import { getAllChats, getAllRegisteredGroups, getAllSessions, + deleteSession, getAllTasks, getLastBotMessageTimestamp, getMessagesSince, @@ -402,6 +403,24 @@ async function runAgent( } if (output.status === 'error') { + // Detect stale/corrupt session — clear it so the next retry starts fresh. + // The session .jsonl can go missing after a crash mid-write, manual + // deletion, or disk-full. The existing backoff in group-queue.ts + // handles the retry; we just need to remove the broken session ID. + const isStaleSession = + sessionId && + output.error && + /no conversation found|ENOENT.*\.jsonl|session.*not found/i.test(output.error); + + if (isStaleSession) { + logger.warn( + { group: group.name, staleSessionId: sessionId, error: output.error }, + 'Stale session detected — clearing for next retry', + ); + delete sessions[group.folder]; + deleteSession(group.folder); + } + logger.error( { group: group.name, error: output.error }, 'Container agent error',