From 96b3c796c5275e1e36f6f7f33235a28406de6a2a Mon Sep 17 00:00:00 2001 From: Scott Idem Date: Tue, 12 May 2026 21:46:50 -0400 Subject: [PATCH] feat: file attachment support in chat (images + text/code files) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Text files (.md, .py, .js, .json, etc.): read client-side and injected into the message body as a fenced code block β€” works with all backends with zero model capability requirements. Images (PNG/JPG/WebP/GIF, max 5 MB): encoded as base64 data URL on the client and sent as a separate attachment field. Backend formats them as OpenAI multimodal content (text + image_url) for local_openai backends. Claude CLI and Gemini CLI see the text message with a "πŸ“Ž filename.png" note; image data is never written to session history. - index.html: πŸ“Ž button + hidden file input in mode-select row; attachment-row preview area with thumbnail (images) or filename chip - app.js: _resolveAttachment(), file reader, clearAttachment(); sendMessage/sendOrchestrate updated to allow no-text sends when a file is pending; attachment spread into chat payload for images - chat.py: Attachment model; attachment field on ChatRequest; llm_attachment extracted in _stream_chat and passed to complete() - llm_client.py: attachment param through complete()/_dispatch()/_local(); _local() builds multimodal content array for vision calls - style.css: #attach-btn, #attachment-row, #attachment-preview, thumb Co-Authored-By: Claude Sonnet 4.6 --- cortex/llm_client.py | 30 +++++++++-- cortex/routers/chat.py | 21 ++++++++ cortex/static/app.js | 114 +++++++++++++++++++++++++++++++++++---- cortex/static/index.html | 13 +++++ cortex/static/style.css | 52 ++++++++++++++++++ 5 files changed, 215 insertions(+), 15 deletions(-) diff --git a/cortex/llm_client.py b/cortex/llm_client.py index 0b2be5b..a3ae37e 100644 --- a/cortex/llm_client.py +++ b/cortex/llm_client.py @@ -51,6 +51,7 @@ async def complete( role: str = "chat", slot: str | None = None, max_tokens: int = 2048, + attachment: dict | None = None, ) -> tuple[str, str]: """ Returns (response_text, actual_backend_used). @@ -96,7 +97,7 @@ async def complete( fallback = _FALLBACK.get(primary, "claude") try: - response = await _dispatch(primary, system_prompt, messages, resolved_cfg) + response = await _dispatch(primary, system_prompt, messages, resolved_cfg, attachment=attachment) return response, primary except Exception as e: err_str = str(e) @@ -116,11 +117,12 @@ async def _dispatch( system_prompt: str, messages: list[dict], model_cfg: dict | None, + attachment: dict | None = None, ) -> str: if backend == "gemini": return await _gemini(system_prompt, messages) if backend == "local": - return await _local(system_prompt, messages, model_cfg) + return await _local(system_prompt, messages, model_cfg, attachment=attachment) return await _claude(system_prompt, messages, model_cfg) @@ -166,11 +168,17 @@ async def _claude(system_prompt: str, messages: list[dict], model_cfg: dict | No return await _run(cmd, timeout=settings.timeout_claude, env=env) -async def _local(system_prompt: str, messages: list[dict], model_cfg: dict | None = None) -> str: +async def _local( + system_prompt: str, + messages: list[dict], + model_cfg: dict | None = None, + attachment: dict | None = None, +) -> str: """OpenAI-compatible backend β€” Open WebUI / Ollama. model_cfg is pre-resolved by complete() via model_registry. Falls back to registry lookup if not provided. + attachment: optional image dict {filename, mime_type, data} for vision calls. """ import httpx @@ -200,8 +208,20 @@ async def _local(system_prompt: str, messages: list[dict], model_cfg: dict | Non msgs: list[dict] = [] if system_prompt: msgs.append({"role": "system", "content": system_prompt}) - # Strip any non-standard metadata fields before sending to the API - msgs.extend({"role": m["role"], "content": m["content"]} for m in messages) + + # Build message list; inject image into the last user message when present. + for i, m in enumerate(messages): + is_last = (i == len(messages) - 1) + if is_last and m["role"] == "user" and attachment: + content: list[dict] = [{"type": "text", "text": m["content"]}] + content.append({ + "type": "image_url", + "image_url": {"url": attachment["data"]}, + }) + msgs.append({"role": "user", "content": content}) + else: + # Strip non-standard metadata fields before sending to the API + msgs.append({"role": m["role"], "content": m["content"]}) url = api_url.rstrip("/") + chat_path headers: dict[str, str] = {} diff --git a/cortex/routers/chat.py b/cortex/routers/chat.py index 85ed050..a0af5bd 100644 --- a/cortex/routers/chat.py +++ b/cortex/routers/chat.py @@ -42,6 +42,12 @@ def _role_model_label(username: str, role: str, actual_backend: str) -> str: return _backend_label(actual_backend, username, role) +class Attachment(BaseModel): + filename: str + mime_type: str + data: str # base64 data URL for images (e.g. "data:image/png;base64,...") + + class ChatRequest(BaseModel): message: str session_id: str | None = None @@ -55,6 +61,7 @@ class ChatRequest(BaseModel): off_record: bool = False # skip session log (in-memory context preserved) user: str = "scott" persona: str = "inara" + attachment: Attachment | None = None # image attachment (text files injected client-side) class BackendRequest(BaseModel): @@ -103,6 +110,19 @@ async def _stream_chat(req: ChatRequest): mode="otr" if req.off_record else "chat", ) history = load_session(session_id) + + # req.message already contains the full user text: + # - text files: client embedded content as a fenced code block + # - images: client added "πŸ“Ž filename.png" note; image data is in req.attachment + # History always stores text only β€” base64 image data is never written to disk. + llm_attachment: dict | None = None + if req.attachment and req.attachment.mime_type.startswith("image/"): + llm_attachment = { + "filename": req.attachment.filename, + "mime_type": req.attachment.mime_type, + "data": req.attachment.data, + } + history.append({"role": "user", "content": req.message, "off_record": req.off_record}) task = asyncio.create_task(complete( @@ -111,6 +131,7 @@ async def _stream_chat(req: ChatRequest): model=req.model, role=req.chat_role, slot=req.slot, + attachment=llm_attachment, )) try: diff --git a/cortex/static/app.js b/cortex/static/app.js index 5b28249..508d5a2 100644 --- a/cortex/static/app.js +++ b/cortex/static/app.js @@ -535,6 +535,94 @@ addMessage('system', `Model: ${entry.label}`); }); + // ── File attachment ────────────────────────────────────────── + const attachBtn = document.getElementById('attach-btn'); + const fileInput = document.getElementById('file-input'); + const attachRow = document.getElementById('attachment-row'); + const attachName = document.getElementById('attachment-name'); + const attachClear = document.getElementById('attachment-clear'); + const attachThumb = document.getElementById('attachment-thumb'); + + const _IMG_TYPES = new Set(['image/png', 'image/jpeg', 'image/webp', 'image/gif']); + const _TXT_EXTS = new Set(['.md','.txt','.py','.js','.ts','.jsx','.tsx','.json','.yaml','.yml','.toml','.html','.css','.sh','.csv','.xml','.rs','.go','.java','.c','.cpp','.h','.rb','.php','.swift','.kt','.sql','.env','.ini','.cfg','.log']); + const MAX_IMAGE_B = 5 * 1024 * 1024; // 5 MB + const MAX_TEXT_B = 100 * 1024; // 100 KB + + let _pendingAttach = null; // {type:'image'|'text', filename, mime_type, data} + + function _isTextFile(file) { + if (file.type.startsWith('text/') || file.type === 'application/json') return true; + const ext = '.' + file.name.split('.').pop().toLowerCase(); + return _TXT_EXTS.has(ext); + } + + function _langHint(filename) { + const ext = filename.split('.').pop().toLowerCase(); + const m = {py:'python',js:'javascript',ts:'typescript',jsx:'jsx',tsx:'tsx',json:'json',yaml:'yaml',yml:'yaml',toml:'toml',html:'html',css:'css',sh:'bash',md:'markdown',rs:'rust',go:'go',java:'java',c:'c',cpp:'cpp',h:'c',rb:'ruby',php:'php',swift:'swift',kt:'kotlin',sql:'sql'}; + return m[ext] || ''; + } + + function clearAttachment() { + _pendingAttach = null; + fileInput.value = ''; + attachRow.style.display = 'none'; + if (attachThumb) { attachThumb.src = ''; attachThumb.style.display = 'none'; } + } + + /** + * Resolve the pending attachment into send-ready values. + * - Text files: inject file content as a fenced code block in the message. + * displayText = serverText = injected content (what the model sees). + * - Images: keep text separate; pass image as payloadAttachment for vision APIs. + * serverText includes a πŸ“Ž filename note for non-vision backends. + */ + function _resolveAttachment(inputText) { + if (!_pendingAttach) return { displayText: inputText, serverText: inputText, payloadAttachment: null }; + const { type, filename, mime_type, data } = _pendingAttach; + if (type === 'text') { + const lang = _langHint(filename); + const block = `πŸ“Ž ${filename}\n\`\`\`${lang}\n${data.trimEnd()}\n\`\`\``; + const serverText = inputText ? `${inputText}\n\n${block}` : block; + return { displayText: serverText, serverText, payloadAttachment: null }; + } + // Image + const note = `πŸ“Ž ${filename}`; + const displayText = inputText ? `${inputText}\n${note}` : note; + return { displayText, serverText: displayText, payloadAttachment: { filename, mime_type, data } }; + } + + attachBtn.addEventListener('click', () => fileInput.click()); + attachClear.addEventListener('click', clearAttachment); + + fileInput.addEventListener('change', () => { + const file = fileInput.files[0]; + if (!file) return; + fileInput.value = ''; // reset so the same file can be re-selected + + const isImg = _IMG_TYPES.has(file.type); + const isTxt = !isImg && _isTextFile(file); + + if (!isImg && !isTxt) { showToast('Unsupported file type'); return; } + if (isImg && file.size > MAX_IMAGE_B) { showToast('Image too large (max 5 MB)'); return; } + if (isTxt && file.size > MAX_TEXT_B) { showToast('Text file too large (max 100 KB)'); return; } + + const reader = new FileReader(); + reader.onload = (e) => { + _pendingAttach = { type: isImg ? 'image' : 'text', filename: file.name, mime_type: file.type || 'text/plain', data: e.target.result }; + attachName.textContent = file.name; + if (isImg && attachThumb) { + attachThumb.src = e.target.result; + attachThumb.style.display = 'block'; + attachRow.querySelector('#attachment-icon').style.display = 'none'; + } else if (attachThumb) { + attachThumb.style.display = 'none'; + attachRow.querySelector('#attachment-icon').style.display = ''; + } + attachRow.style.display = 'flex'; + }; + isImg ? reader.readAsDataURL(file) : reader.readAsText(file); + }); + // ── Sessions panel ─────────────────────────────────────────── sessionsBtn.addEventListener('click', async (e) => { @@ -1308,8 +1396,8 @@ } async function sendMessage() { - const text = inputEl.value.trim(); - if (!text || activeController) return; + const rawText = inputEl.value.trim(); + if ((!rawText && !_pendingAttach) || activeController) return; const wasNewSession = !sessionId; @@ -1323,10 +1411,12 @@ activeController = new AbortController(); const isOtr = current_mode === 'otr'; + const { displayText, serverText, payloadAttachment } = _resolveAttachment(rawText); + clearAttachment(); const userHistIdx = currentHistory.length; - currentHistory.push({ role: 'user', content: text }); - const userMsgDiv = addMessage('user', text); + currentHistory.push({ role: 'user', content: serverText }); + const userMsgDiv = addMessage('user', displayText); attachHistoryControls(userMsgDiv, userHistIdx); if (isOtr) setMessageMeta(userMsgDiv, {otr: true}); scrollToBottom(); @@ -1334,7 +1424,7 @@ const thinkingDiv = addMessage('assistant thinking', '✨ thinking…'); const payload = { - message: text, + message: serverText, session_id: sessionId, tier: currentTier, include_long: memLong, @@ -1345,6 +1435,7 @@ slot: activeChatModel()?.slot || null, user: CORTEX_USER, persona: CORTEX_PERSONA, + ...(payloadAttachment ? { attachment: payloadAttachment } : {}), }; await _doSend(payload, thinkingDiv, wasNewSession); @@ -1509,8 +1600,8 @@ } async function sendOrchestrate() { - const text = inputEl.value.trim(); - if (!text || activeController) return; + const rawText = inputEl.value.trim(); + if ((!rawText && !_pendingAttach) || activeController) return; inputEl.value = ''; syncHeight(); @@ -1521,13 +1612,16 @@ activeController = new AbortController(); - currentHistory.push({ role: 'user', content: text }); - const userMsgDiv = addMessage('user', text); + const { displayText, serverText } = _resolveAttachment(rawText); + clearAttachment(); + + currentHistory.push({ role: 'user', content: serverText }); + const userMsgDiv = addMessage('user', displayText); scrollToBottom(); const thinkingDiv = addMessage('assistant thinking', '⚑ working…'); - await _doOrchestrate(text, thinkingDiv, userMsgDiv); + await _doOrchestrate(serverText, thinkingDiv, userMsgDiv); activeController = null; setProcessing(false); diff --git a/cortex/static/index.html b/cortex/static/index.html index 6a0988b..c97ffab 100644 --- a/cortex/static/index.html +++ b/cortex/static/index.html @@ -180,6 +180,19 @@ + + + + + +
diff --git a/cortex/static/style.css b/cortex/static/style.css index 5ac01f3..0f2f0bc 100644 --- a/cortex/static/style.css +++ b/cortex/static/style.css @@ -861,6 +861,58 @@ } #tools-toggle.local-on:hover { box-shadow: 0 0 10px var(--amber-glow); } + #attach-btn { + background: var(--bg); + border: 1px solid rgba(255,255,255,0.1); + border-radius: 6px; + color: rgba(255,255,255,0.3); + font-size: 0.95rem; + padding: 3px 7px; + cursor: pointer; + transition: color 0.15s, border-color 0.15s; + } + #attach-btn:hover { color: rgba(255,255,255,0.6); border-color: rgba(255,255,255,0.25); } + + #attachment-row { + padding: 0.3rem 0.5rem; + border-bottom: 1px solid var(--border); + } + #attachment-preview { + display: inline-flex; + align-items: center; + gap: 0.4rem; + background: var(--bg-alt); + border: 1px solid var(--border); + border-radius: 6px; + padding: 0.2rem 0.5rem; + font-size: 0.82rem; + max-width: 100%; + } + #attachment-thumb { + max-height: 2.4rem; + max-width: 3.5rem; + border-radius: 3px; + object-fit: contain; + } + #attachment-name { + color: var(--text-mid); + max-width: 220px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } + #attachment-clear { + background: none; + border: none; + color: var(--muted); + cursor: pointer; + padding: 0 0.15rem; + font-size: 0.78rem; + line-height: 1; + flex-shrink: 0; + } + #attachment-clear:hover { color: var(--text); } + #input { flex: 1; background: var(--bg);