From b9d302524e8b40be7e194e866cf2dd33d853fdc7 Mon Sep 17 00:00:00 2001 From: robbyczgw-cla Date: Wed, 29 Apr 2026 15:01:09 +0000 Subject: [PATCH 1/2] fix(session-manager): derive attachment extension from mimeType and att.type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a channel bridge passes an attachment without an explicit `name`, extractAttachmentFiles fell back to `attachment-` with no extension. Agents could not tell whether the file was a JPEG, PDF, or audio clip, and tools keyed on extension (image viewers, exiftool, etc.) misbehaved. Two cases are now covered: 1. Channels that set `mimeType` but no `name` (Discord/Slack documents, Telegram document uploads). A small MIME-to-extension table covers the common content types — image/*, audio/*, video/*, pdf, zip, txt, json. Unknown MIMEs fall back to the unsuffixed name. 2. Channels that set `att.type` but no `mimeType` (Telegram photos, stickers, voice, animations). The chat-sdk bridge sets a coarse media-class (`photo` / `sticker` / `voice` / `video` / `animation`) which is reliable enough to derive a canonical extension. Telegram GIFs are MP4 under the hood. The existing isSafeAttachmentName security guard is preserved — the derived name still passes through it before disk I/O. The new lookup tables emit static values from internal maps and cannot construct a path-traversal payload; attacker-controlled att.name continues to flow through the same validator. --- src/session-manager.ts | 56 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/src/session-manager.ts b/src/session-manager.ts index 996a750..342c155 100644 --- a/src/session-manager.ts +++ b/src/session-manager.ts @@ -230,6 +230,60 @@ export function writeSessionMessage( updateSession(sessionId, { last_active: new Date().toISOString() }); } +// Map common MIME types to canonical file extensions. Used to derive a +// usable suffix when the channel bridge passes an attachment without an +// explicit `name`. Without an extension, agents (and humans) can't tell +// what kind of file landed in the inbox. +const MIME_TO_EXT: Record = { + 'image/jpeg': 'jpg', + 'image/png': 'png', + 'image/webp': 'webp', + 'image/gif': 'gif', + 'image/heic': 'heic', + 'audio/ogg': 'ogg', + 'audio/mpeg': 'mp3', + 'audio/wav': 'wav', + 'audio/mp4': 'm4a', + 'video/mp4': 'mp4', + 'video/webm': 'webm', + 'video/quicktime': 'mov', + 'application/pdf': 'pdf', + 'text/plain': 'txt', + 'application/json': 'json', + 'application/zip': 'zip', +}; + +// Fallback when `mimeType` is missing — Telegram photos and stickers arrive +// without an explicit MIME on the attachment object. The channel bridge sets +// `att.type` to a coarse media-class (`photo` / `sticker` / `voice` / etc.) +// which is reliable enough to derive a canonical extension. Telegram's GIFs +// are actually MP4, hence `animation: 'mp4'`. +const TYPE_TO_EXT: Record = { + image: 'jpg', + photo: 'jpg', + sticker: 'webp', + voice: 'ogg', + audio: 'mp3', + video: 'mp4', + animation: 'mp4', +}; + +function extForMime(mime: string | undefined): string { + if (!mime) return ''; + const clean = mime.split(';')[0].trim().toLowerCase(); + return MIME_TO_EXT[clean] ?? ''; +} + +function deriveAttachmentName(att: Record): string { + const explicit = att.name as string | undefined; + if (explicit) return explicit; + let ext = extForMime(att.mimeType as string | undefined); + if (!ext && typeof att.type === 'string') { + ext = TYPE_TO_EXT[att.type.toLowerCase()] ?? ''; + } + return ext ? `attachment-${Date.now()}.${ext}` : `attachment-${Date.now()}`; +} + /** * If message content has attachments with base64 `data`, save them to * the session's inbox directory and replace with `localPath`. @@ -259,7 +313,7 @@ function extractAttachmentFiles( // this guard, `path.join(inboxDir, '../../...')` writes anywhere the // host process has fs permission — see Signal Desktop's Nov 2025 // attachment-fileName advisory for the same archetype. - const rawName = (att.name as string | undefined) ?? `attachment-${Date.now()}`; + const rawName = deriveAttachmentName(att); const filename = isSafeAttachmentName(rawName) ? rawName : `attachment-${Date.now()}`; if (filename !== rawName) { log.warn('Refused unsafe attachment filename — would escape inbox', { From 2a3be9ec7fc00b687489674258d6c8ffb35ce742 Mon Sep 17 00:00:00 2001 From: gavrielc Date: Thu, 30 Apr 2026 09:40:44 +0300 Subject: [PATCH 2/2] extract attachment-naming, harden mimeType guard, add tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the MIME/type-to-extension maps and derivation helpers out of session-manager.ts into a dedicated attachment-naming module — keeps session-manager focused on session lifecycle and gives the helpers a natural home for unit tests alongside the existing attachment-safety module. Two small fixes alongside the extraction: - extForMime now guards `typeof mime !== 'string'` before .split, so a buggy bridge passing `mimeType: { ... }` (object) no longer crashes the inbound write loop. - deriveAttachmentName computes Date.now() once per call instead of twice, and tightens the explicit-name check to a string-and-truthy guard so non-string values fall through to derivation. Adds attachment-naming.test.ts with 11 cases covering MIME normalization (case + parameters), Telegram type fallback, the non-string defensive guard, and the bare-timestamp fallback. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/attachment-naming.test.ts | 71 +++++++++++++++++++++++++++++++++++ src/attachment-naming.ts | 69 ++++++++++++++++++++++++++++++++++ src/session-manager.ts | 55 +-------------------------- 3 files changed, 141 insertions(+), 54 deletions(-) create mode 100644 src/attachment-naming.test.ts create mode 100644 src/attachment-naming.ts diff --git a/src/attachment-naming.test.ts b/src/attachment-naming.test.ts new file mode 100644 index 0000000..5ca13f1 --- /dev/null +++ b/src/attachment-naming.test.ts @@ -0,0 +1,71 @@ +import { describe, it, expect } from 'vitest'; + +import { deriveAttachmentName, extForMime } from './attachment-naming.js'; + +describe('extForMime', () => { + it('returns empty for undefined / non-string / empty', () => { + expect(extForMime(undefined)).toBe(''); + expect(extForMime('')).toBe(''); + expect(extForMime({})).toBe(''); + expect(extForMime(null)).toBe(''); + expect(extForMime(42)).toBe(''); + }); + + it('maps common MIME types to canonical extensions', () => { + expect(extForMime('image/jpeg')).toBe('jpg'); + expect(extForMime('application/pdf')).toBe('pdf'); + expect(extForMime('audio/ogg')).toBe('ogg'); + }); + + it('strips parameters and is case-insensitive', () => { + expect(extForMime('image/JPEG; foo=bar')).toBe('jpg'); + expect(extForMime(' Application/PDF ')).toBe('pdf'); + expect(extForMime('text/plain; charset=utf-8')).toBe('txt'); + }); + + it('returns empty for unknown MIMEs', () => { + expect(extForMime('application/octet-stream')).toBe(''); + expect(extForMime('application/x-totally-made-up')).toBe(''); + }); +}); + +describe('deriveAttachmentName', () => { + it('returns explicit name when set, no derivation', () => { + expect(deriveAttachmentName({ name: 'photo.jpg', mimeType: 'application/pdf' })).toBe('photo.jpg'); + }); + + it('ignores empty / non-string explicit name and falls through to derivation', () => { + const out = deriveAttachmentName({ name: '', mimeType: 'application/pdf' }); + expect(out).toMatch(/^attachment-\d+\.pdf$/); + + const out2 = deriveAttachmentName({ name: 42, mimeType: 'application/pdf' }); + expect(out2).toMatch(/^attachment-\d+\.pdf$/); + }); + + it('derives extension from mimeType when no name', () => { + expect(deriveAttachmentName({ mimeType: 'application/pdf' })).toMatch(/^attachment-\d+\.pdf$/); + expect(deriveAttachmentName({ mimeType: 'image/jpeg' })).toMatch(/^attachment-\d+\.jpg$/); + }); + + it('falls back to att.type when mimeType is missing (Telegram photos/stickers)', () => { + expect(deriveAttachmentName({ type: 'photo' })).toMatch(/^attachment-\d+\.jpg$/); + expect(deriveAttachmentName({ type: 'sticker' })).toMatch(/^attachment-\d+\.webp$/); + expect(deriveAttachmentName({ type: 'voice' })).toMatch(/^attachment-\d+\.ogg$/); + expect(deriveAttachmentName({ type: 'animation' })).toMatch(/^attachment-\d+\.mp4$/); + }); + + it('case-insensitive att.type lookup', () => { + expect(deriveAttachmentName({ type: 'PHOTO' })).toMatch(/^attachment-\d+\.jpg$/); + }); + + it('returns bare timestamp when nothing matches', () => { + expect(deriveAttachmentName({})).toMatch(/^attachment-\d+$/); + expect(deriveAttachmentName({ mimeType: 'application/octet-stream' })).toMatch(/^attachment-\d+$/); + expect(deriveAttachmentName({ type: 'mystery-class' })).toMatch(/^attachment-\d+$/); + }); + + it('does not crash on non-string mimeType (defensive against buggy bridges)', () => { + expect(() => deriveAttachmentName({ mimeType: { foo: 'bar' } })).not.toThrow(); + expect(deriveAttachmentName({ mimeType: { foo: 'bar' } })).toMatch(/^attachment-\d+$/); + }); +}); diff --git a/src/attachment-naming.ts b/src/attachment-naming.ts new file mode 100644 index 0000000..2dfe8c1 --- /dev/null +++ b/src/attachment-naming.ts @@ -0,0 +1,69 @@ +/** + * Derive a safe, extensioned filename for inbound attachments when the + * channel bridge passes data without an explicit `name`. + * + * Two-step lookup: + * 1. `mimeType` → extension (Discord/Slack documents, Telegram document + * uploads — channels that set the MIME but not a filename). + * 2. `att.type` → extension (Telegram photos/stickers/voice/animations — + * coarse media-class set by the chat-sdk bridge with no MIME). + * + * Output is still passed through `isSafeAttachmentName` at the call site. + * The maps emit static values, so no derivation path can construct a + * traversal payload — only an attacker-controlled `att.name` can, and that + * goes through the safety guard unchanged. + */ + +// Map common MIME types to canonical file extensions. Without an extension, +// agents (and humans) can't tell what kind of file landed in the inbox, and +// tools keyed on extension (image viewers, exiftool, etc.) misbehave. +const MIME_TO_EXT: Record = { + 'image/jpeg': 'jpg', + 'image/png': 'png', + 'image/webp': 'webp', + 'image/gif': 'gif', + 'image/heic': 'heic', + 'audio/ogg': 'ogg', + 'audio/mpeg': 'mp3', + 'audio/wav': 'wav', + 'audio/mp4': 'm4a', + 'video/mp4': 'mp4', + 'video/webm': 'webm', + 'video/quicktime': 'mov', + 'application/pdf': 'pdf', + 'text/plain': 'txt', + 'application/json': 'json', + 'application/zip': 'zip', +}; + +// Fallback when `mimeType` is missing — Telegram photos and stickers arrive +// without an explicit MIME on the attachment object. The channel bridge sets +// `att.type` to a coarse media-class (`photo` / `sticker` / `voice` / etc.) +// which is reliable enough to derive a canonical extension. Telegram's GIFs +// are actually MP4, hence `animation: 'mp4'`. +const TYPE_TO_EXT: Record = { + image: 'jpg', + photo: 'jpg', + sticker: 'webp', + voice: 'ogg', + audio: 'mp3', + video: 'mp4', + animation: 'mp4', +}; + +export function extForMime(mime: unknown): string { + if (typeof mime !== 'string' || !mime) return ''; + const clean = mime.split(';')[0].trim().toLowerCase(); + return MIME_TO_EXT[clean] ?? ''; +} + +export function deriveAttachmentName(att: Record): string { + const explicit = att.name; + if (typeof explicit === 'string' && explicit) return explicit; + let ext = extForMime(att.mimeType); + if (!ext && typeof att.type === 'string') { + ext = TYPE_TO_EXT[att.type.toLowerCase()] ?? ''; + } + const ts = Date.now(); + return ext ? `attachment-${ts}.${ext}` : `attachment-${ts}`; +} diff --git a/src/session-manager.ts b/src/session-manager.ts index 342c155..7751fba 100644 --- a/src/session-manager.ts +++ b/src/session-manager.ts @@ -14,6 +14,7 @@ import type Database from 'better-sqlite3'; import fs from 'fs'; import path from 'path'; +import { deriveAttachmentName } from './attachment-naming.js'; import { isSafeAttachmentName } from './attachment-safety.js'; import type { OutboundFile } from './channels/adapter.js'; import { DATA_DIR } from './config.js'; @@ -230,60 +231,6 @@ export function writeSessionMessage( updateSession(sessionId, { last_active: new Date().toISOString() }); } -// Map common MIME types to canonical file extensions. Used to derive a -// usable suffix when the channel bridge passes an attachment without an -// explicit `name`. Without an extension, agents (and humans) can't tell -// what kind of file landed in the inbox. -const MIME_TO_EXT: Record = { - 'image/jpeg': 'jpg', - 'image/png': 'png', - 'image/webp': 'webp', - 'image/gif': 'gif', - 'image/heic': 'heic', - 'audio/ogg': 'ogg', - 'audio/mpeg': 'mp3', - 'audio/wav': 'wav', - 'audio/mp4': 'm4a', - 'video/mp4': 'mp4', - 'video/webm': 'webm', - 'video/quicktime': 'mov', - 'application/pdf': 'pdf', - 'text/plain': 'txt', - 'application/json': 'json', - 'application/zip': 'zip', -}; - -// Fallback when `mimeType` is missing — Telegram photos and stickers arrive -// without an explicit MIME on the attachment object. The channel bridge sets -// `att.type` to a coarse media-class (`photo` / `sticker` / `voice` / etc.) -// which is reliable enough to derive a canonical extension. Telegram's GIFs -// are actually MP4, hence `animation: 'mp4'`. -const TYPE_TO_EXT: Record = { - image: 'jpg', - photo: 'jpg', - sticker: 'webp', - voice: 'ogg', - audio: 'mp3', - video: 'mp4', - animation: 'mp4', -}; - -function extForMime(mime: string | undefined): string { - if (!mime) return ''; - const clean = mime.split(';')[0].trim().toLowerCase(); - return MIME_TO_EXT[clean] ?? ''; -} - -function deriveAttachmentName(att: Record): string { - const explicit = att.name as string | undefined; - if (explicit) return explicit; - let ext = extForMime(att.mimeType as string | undefined); - if (!ext && typeof att.type === 'string') { - ext = TYPE_TO_EXT[att.type.toLowerCase()] ?? ''; - } - return ext ? `attachment-${Date.now()}.${ext}` : `attachment-${Date.now()}`; -} - /** * If message content has attachments with base64 `data`, save them to * the session's inbox directory and replace with `localPath`.