fix(host-sweep): parse SQLite timestamps as UTC, not local time
SQLite TIMESTAMP columns store UTC without a zone marker. `Date.parse`
treats timezoneless ISO strings as local time, so on any non-UTC host
every claim and processAfter looks (TZ offset) hours stale. That makes
fresh claims trip the kill-claim path on the first sweep tick — every
container gets killed within seconds of spawn.
Two affected sites in host-sweep.ts:
- decideStuckAction reads claim.status_changed and computes claimAge.
On a TZ=Europe/Madrid host (UTC+2), a claim made 5s ago looks
7205s old and exceeds CLAIM_STUCK_MS (60s).
- The orphan retry loop reads msg.processAfter and skips messages
rescheduled into the future. On the same host, future timestamps
look (TZ offset) hours in the past, so the skip is missed and
tries gets bumped on every tick.
Fix: introduce parseSqliteUtc(s) which appends "Z" only when no zone
marker is present, then call it from both sites. Behavior under
TZ=UTC is unchanged.
Verified on a production v2 install on TZ=Europe/Madrid: with the
patch applied, an idle container survived 30+ minutes without being
killed (previously: killed within 60s of spawn).
Tests: 5 new cases covering the bare/Z/+offset/invalid input matrix
and a TZ-independence check. All 19 host-sweep tests pass and tsc
clears against main.
This commit is contained in:
@@ -12,6 +12,7 @@ import {
|
||||
CLAIM_STUCK_MS,
|
||||
_resetStuckProcessingRowsForTesting,
|
||||
decideStuckAction,
|
||||
parseSqliteUtc,
|
||||
} from './host-sweep.js';
|
||||
import type { Session } from './types.js';
|
||||
|
||||
@@ -292,3 +293,44 @@ describe('resetStuckProcessingRows — orphan claim cleanup', () => {
|
||||
expect(row.tries).toBe(1); // not bumped, the skip path held
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseSqliteUtc', () => {
|
||||
// Regression: SQLite TIMESTAMP strings have no zone marker, but Date.parse
|
||||
// treats those as local time. On non-UTC hosts this made every claim look
|
||||
// (TZ offset) hours stale and tripped kill-claim on freshly-claimed messages.
|
||||
// The helper appends "Z" only when no marker is present, so parsing is
|
||||
// always anchored to UTC regardless of host timezone.
|
||||
|
||||
const utcMs = Date.parse('2026-04-20T12:00:00.000Z');
|
||||
|
||||
it('treats a SQLite-style timestamp (no zone) as UTC', () => {
|
||||
expect(parseSqliteUtc('2026-04-20 12:00:00')).toBe(utcMs);
|
||||
expect(parseSqliteUtc('2026-04-20T12:00:00')).toBe(utcMs);
|
||||
expect(parseSqliteUtc('2026-04-20T12:00:00.000')).toBe(utcMs);
|
||||
});
|
||||
|
||||
it('preserves an explicit Z marker', () => {
|
||||
expect(parseSqliteUtc('2026-04-20T12:00:00.000Z')).toBe(utcMs);
|
||||
expect(parseSqliteUtc('2026-04-20T12:00:00z')).toBe(utcMs);
|
||||
});
|
||||
|
||||
it('preserves an explicit numeric offset', () => {
|
||||
// 14:00+02:00 == 12:00 UTC
|
||||
expect(parseSqliteUtc('2026-04-20T14:00:00+02:00')).toBe(utcMs);
|
||||
expect(parseSqliteUtc('2026-04-20T14:00:00+0200')).toBe(utcMs);
|
||||
// 07:00-05:00 == 12:00 UTC
|
||||
expect(parseSqliteUtc('2026-04-20T07:00:00-05:00')).toBe(utcMs);
|
||||
});
|
||||
|
||||
it('returns NaN for unparseable input', () => {
|
||||
expect(Number.isNaN(parseSqliteUtc('not a date'))).toBe(true);
|
||||
});
|
||||
|
||||
it('does not drift across host timezones for SQLite-style input', () => {
|
||||
// The helper itself is timezone-independent because it forces UTC parsing.
|
||||
// (Verifying the regex branch — without the helper, `Date.parse` of the
|
||||
// bare string returns different values depending on the host TZ.)
|
||||
const bare = '2026-04-20T12:00:00';
|
||||
expect(parseSqliteUtc(bare)).toBe(Date.parse(bare + 'Z'));
|
||||
});
|
||||
});
|
||||
|
||||
@@ -47,6 +47,17 @@ import { openInboundDb, openOutboundDb, openOutboundDbRw, inboundDbPath, heartbe
|
||||
import { isContainerRunning, killContainer, wakeContainer } from './container-runner.js';
|
||||
import type { Session } from './types.js';
|
||||
|
||||
/**
|
||||
* SQLite TIMESTAMP columns store UTC without a timezone marker. Date.parse
|
||||
* treats timezoneless ISO strings as local time, so on non-UTC hosts every
|
||||
* timestamp looks (TZ offset) hours stale — leading to spurious kill-claim
|
||||
* decisions on freshly-claimed messages. Append "Z" when no zone marker is
|
||||
* present so Date.parse interprets the string as UTC.
|
||||
*/
|
||||
export function parseSqliteUtc(s: string): number {
|
||||
return Date.parse(/[zZ]|[+-]\d{2}:?\d{2}$/.test(s) ? s : s + 'Z');
|
||||
}
|
||||
|
||||
const SWEEP_INTERVAL_MS = 60_000;
|
||||
// Absolute idle ceiling for a running container. If the heartbeat file hasn't
|
||||
// been touched in this long, the container is either stuck or doing genuinely
|
||||
@@ -95,7 +106,7 @@ export function decideStuckAction(args: {
|
||||
|
||||
const tolerance = Math.max(CLAIM_STUCK_MS, declaredBashMs ?? 0);
|
||||
for (const claim of claims) {
|
||||
const claimedAt = Date.parse(claim.status_changed);
|
||||
const claimedAt = parseSqliteUtc(claim.status_changed);
|
||||
if (Number.isNaN(claimedAt)) continue;
|
||||
const claimAge = now - claimedAt;
|
||||
if (claimAge <= tolerance) continue;
|
||||
@@ -275,7 +286,7 @@ function resetStuckProcessingRows(
|
||||
// Already rescheduled for a future retry — don't bump tries again. The
|
||||
// wake path (sweep step 2) will fire when process_after elapses and a
|
||||
// fresh container will clean the orphan claim on startup.
|
||||
if (msg.processAfter && Date.parse(msg.processAfter) > now) continue;
|
||||
if (msg.processAfter && parseSqliteUtc(msg.processAfter) > now) continue;
|
||||
|
||||
if (msg.tries >= MAX_TRIES) {
|
||||
markMessageFailed(inDb, msg.id);
|
||||
|
||||
Reference in New Issue
Block a user