diff --git a/src/channels/chat-sdk-bridge.ts b/src/channels/chat-sdk-bridge.ts index 02ddeba..21c5088 100644 --- a/src/channels/chat-sdk-bridge.ts +++ b/src/channels/chat-sdk-bridge.ts @@ -57,10 +57,18 @@ export interface ChatSdkBridgeConfig { * way and the default depends on installation style. */ supportsThreads: boolean; + /** + * Optional transform applied to outbound text/markdown before it reaches the + * adapter. Used by channels that need to sanitize for a platform-specific + * quirk (e.g. Telegram's legacy Markdown parse mode). + */ + transformOutboundText?: (text: string) => string; } export function createChatSdkBridge(config: ChatSdkBridgeConfig): ChannelAdapter { const { adapter } = config; + const transformText = (t: string): string => + config.transformOutboundText ? config.transformOutboundText(t) : t; let chat: Chat; let state: SqliteStateAdapter; let setupConfig: ChannelSetup; @@ -321,7 +329,7 @@ export function createChatSdkBridge(config: ChatSdkBridgeConfig): ChannelAdapter if (content.operation === 'edit' && content.messageId) { await adapter.editMessage(tid, content.messageId as string, { - markdown: (content.text as string) || (content.markdown as string) || '', + markdown: transformText((content.text as string) || (content.markdown as string) || ''), }); return; } @@ -370,7 +378,8 @@ export function createChatSdkBridge(config: ChatSdkBridgeConfig): ChannelAdapter } // Normal message - const text = (content.markdown as string) || (content.text as string); + const rawText = (content.markdown as string) || (content.text as string); + const text = rawText ? transformText(rawText) : rawText; if (text) { // Attach files if present (FileUpload format: { data, filename }) const fileUploads = message.files?.map((f: { data: Buffer; filename: string }) => ({ diff --git a/src/channels/telegram-markdown-sanitize.test.ts b/src/channels/telegram-markdown-sanitize.test.ts new file mode 100644 index 0000000..d6aea12 --- /dev/null +++ b/src/channels/telegram-markdown-sanitize.test.ts @@ -0,0 +1,70 @@ +import { describe, it, expect } from 'vitest'; +import { sanitizeTelegramLegacyMarkdown } from './telegram-markdown-sanitize.js'; + +describe('sanitizeTelegramLegacyMarkdown', () => { + it('downgrades CommonMark **bold** to legacy *bold*', () => { + expect(sanitizeTelegramLegacyMarkdown('**Host path**')).toBe('*Host path*'); + }); + + it('downgrades CommonMark __bold__ to legacy _italic_', () => { + expect(sanitizeTelegramLegacyMarkdown('__label__')).toBe('_label_'); + }); + + it('leaves balanced legacy *bold* and _italic_ alone', () => { + expect(sanitizeTelegramLegacyMarkdown('a *b* c _d_ e')).toBe('a *b* c _d_ e'); + }); + + it('preserves inline code spans untouched', () => { + const input = 'see `file_name.py` and `**not bold**` here'; + expect(sanitizeTelegramLegacyMarkdown(input)).toBe(input); + }); + + it('preserves fenced code blocks untouched', () => { + const input = '```\nfoo_bar **baz**\n```'; + expect(sanitizeTelegramLegacyMarkdown(input)).toBe(input); + }); + + it('strips formatting chars on odd delimiter count (unbalanced *)', () => { + expect(sanitizeTelegramLegacyMarkdown('a * b *c*')).toBe('a b c'); + }); + + it('strips formatting chars on odd delimiter count (unbalanced _)', () => { + expect(sanitizeTelegramLegacyMarkdown('file_name has _one italic_')).toBe( + 'filename has one italic', + ); + }); + + it('strips brackets when unbalanced', () => { + expect(sanitizeTelegramLegacyMarkdown('see [docs here')).toBe('see docs here'); + }); + + it('leaves matched brackets (e.g. links) alone when counts balance', () => { + const input = 'see [docs](https://example.com) for more'; + expect(sanitizeTelegramLegacyMarkdown(input)).toBe(input); + }); + + it('fixes the real failing message', () => { + const input = + 'Sure! What do you want to mount, and where should it appear inside the container?\n\n' + + '- **Host path** (on your machine): e.g. `~/projects/webapp`\n' + + '- **Container path**: e.g. `workspace/webapp`\n' + + '- **Read-only or read-write?**'; + const out = sanitizeTelegramLegacyMarkdown(input); + expect(out).not.toContain('**'); + expect(out).toContain('*Host path*'); + expect(out).toContain('`~/projects/webapp`'); + expect((out.match(/\*/g) ?? []).length % 2).toBe(0); + }); + + it('is a no-op on empty string', () => { + expect(sanitizeTelegramLegacyMarkdown('')).toBe(''); + }); + + it('replaces dash list bullets with • so the adapter does not re-emit `*` markers', () => { + expect(sanitizeTelegramLegacyMarkdown('- one\n- two')).toBe('• one\n• two'); + }); + + it('preserves indented list structure', () => { + expect(sanitizeTelegramLegacyMarkdown(' - nested')).toBe(' • nested'); + }); +}); diff --git a/src/channels/telegram-markdown-sanitize.ts b/src/channels/telegram-markdown-sanitize.ts new file mode 100644 index 0000000..be92954 --- /dev/null +++ b/src/channels/telegram-markdown-sanitize.ts @@ -0,0 +1,50 @@ +/** + * Sanitize outbound text for Telegram's legacy `Markdown` parse mode. + * + * WORKAROUND: The @chat-adapter/telegram adapter hardcodes parse_mode=Markdown + * (legacy) but its converter emits CommonMark. Messages with `**bold**`, odd + * delimiter counts, or malformed links are rejected by Telegram and dropped + * after retries. Remove this once upstream ships real mode-aware conversion + * (vercel/chat PR #367 adds the knob; a follow-up is needed for the converter). + */ + +const CODE_PATTERN = /```[\s\S]*?```|`[^`\n]*`/g; +const PLACEHOLDER_PREFIX = '\x00CODE'; +const PLACEHOLDER_SUFFIX = '\x00'; + +export function sanitizeTelegramLegacyMarkdown(input: string): string { + if (!input) return input; + + const codeSegments: string[] = []; + let text = input.replace(CODE_PATTERN, (m) => { + codeSegments.push(m); + return `${PLACEHOLDER_PREFIX}${codeSegments.length - 1}${PLACEHOLDER_SUFFIX}`; + }); + + // The adapter re-parses and re-stringifies markdown before sending, which + // rewrites `- item` list bullets into `* item` — injecting unbalanced + // asterisks that Telegram's legacy Markdown parser then rejects. Replace + // list bullets with a plain Unicode bullet so the adapter treats the line + // as prose. + text = text.replace(/^(\s*)[-+]\s+/gm, '$1• '); + + text = text.replace(/\*\*([^*\n]+?)\*\*/g, '*$1*'); + text = text.replace(/__([^_\n]+?)__/g, '_$1_'); + + const starCount = (text.match(/\*/g) ?? []).length; + const underCount = (text.match(/_/g) ?? []).length; + if (starCount % 2 !== 0 || underCount % 2 !== 0) { + text = text.replace(/[*_]/g, ''); + } + + const openBrackets = (text.match(/\[/g) ?? []).length; + const closeBrackets = (text.match(/\]/g) ?? []).length; + if (openBrackets !== closeBrackets) { + text = text.replace(/[[\]]/g, ''); + } + + return text.replace( + new RegExp(`${PLACEHOLDER_PREFIX}(\\d+)${PLACEHOLDER_SUFFIX}`, 'g'), + (_, i) => codeSegments[Number(i)], + ); +} diff --git a/src/channels/telegram.ts b/src/channels/telegram.ts index 6580770..939ac37 100644 --- a/src/channels/telegram.ts +++ b/src/channels/telegram.ts @@ -9,6 +9,7 @@ import { readEnvFile } from '../env.js'; import { log } from '../log.js'; import { createMessagingGroup, getMessagingGroupByPlatform, updateMessagingGroup } from '../db/messaging-groups.js'; import { createChatSdkBridge, type ReplyContext } from './chat-sdk-bridge.js'; +import { sanitizeTelegramLegacyMarkdown } from './telegram-markdown-sanitize.js'; import { registerChannelAdapter } from './channel-registry.js'; import type { ChannelAdapter, ChannelSetup, InboundMessage } from './adapter.js'; import { tryConsume } from './telegram-pairing.js'; @@ -155,6 +156,7 @@ registerChannelAdapter('telegram', { concurrency: 'concurrent', extractReplyContext, supportsThreads: false, + transformOutboundText: sanitizeTelegramLegacyMarkdown, }); const botUsernamePromise = fetchBotUsername(token);