From 96b3c796c5275e1e36f6f7f33235a28406de6a2a Mon Sep 17 00:00:00 2001
From: Scott Idem <stidem@gmail.com>
Date: Tue, 12 May 2026 21:46:50 -0400
Subject: [PATCH] feat: file attachment support in chat (images + text/code
 files)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Text files (.md, .py, .js, .json, etc.): read client-side and injected
into the message body as a fenced code block — works with all backends
with zero model capability requirements.

Images (PNG/JPG/WebP/GIF, max 5 MB): encoded as base64 data URL on the
client and sent as a separate attachment field. Backend formats them as
OpenAI multimodal content (text + image_url) for local_openai backends.
Claude CLI and Gemini CLI see the text message with a "📎 filename.png"
note; image data is never written to session history.

- index.html: 📎 button + hidden file input in mode-select row;
  attachment-row preview area with thumbnail (images) or filename chip
- app.js: _resolveAttachment(), file reader, clearAttachment();
  sendMessage/sendOrchestrate updated to allow no-text sends when a
  file is pending; attachment spread into chat payload for images
- chat.py: Attachment model; attachment field on ChatRequest;
  llm_attachment extracted in _stream_chat and passed to complete()
- llm_client.py: attachment param through complete()/_dispatch()/_local();
  _local() builds multimodal content array for vision calls
- style.css: #attach-btn, #attachment-row, #attachment-preview, thumb

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 cortex/llm_client.py     |  30 +++++++++--
 cortex/routers/chat.py   |  21 ++++++++
 cortex/static/app.js     | 114 +++++++++++++++++++++++++++++++++++----
 cortex/static/index.html |  13 +++++
 cortex/static/style.css  |  52 ++++++++++++++++++
 5 files changed, 215 insertions(+), 15 deletions(-)

diff --git a/cortex/llm_client.py b/cortex/llm_client.py
index 0b2be5b..a3ae37e 100644
--- a/cortex/llm_client.py
+++ b/cortex/llm_client.py
@@ -51,6 +51,7 @@ async def complete(
     role: str = "chat",
     slot: str | None = None,
     max_tokens: int = 2048,
+    attachment: dict | None = None,
 ) -> tuple[str, str]:
     """
     Returns (response_text, actual_backend_used).
@@ -96,7 +97,7 @@ async def complete(
     fallback = _FALLBACK.get(primary, "claude")
 
     try:
-        response = await _dispatch(primary, system_prompt, messages, resolved_cfg)
+        response = await _dispatch(primary, system_prompt, messages, resolved_cfg, attachment=attachment)
         return response, primary
     except Exception as e:
         err_str = str(e)
@@ -116,11 +117,12 @@ async def _dispatch(
     system_prompt: str,
     messages: list[dict],
     model_cfg: dict | None,
+    attachment: dict | None = None,
 ) -> str:
     if backend == "gemini":
         return await _gemini(system_prompt, messages)
     if backend == "local":
-        return await _local(system_prompt, messages, model_cfg)
+        return await _local(system_prompt, messages, model_cfg, attachment=attachment)
     return await _claude(system_prompt, messages, model_cfg)
 
 
@@ -166,11 +168,17 @@ async def _claude(system_prompt: str, messages: list[dict], model_cfg: dict | No
     return await _run(cmd, timeout=settings.timeout_claude, env=env)
 
 
-async def _local(system_prompt: str, messages: list[dict], model_cfg: dict | None = None) -> str:
+async def _local(
+    system_prompt: str,
+    messages: list[dict],
+    model_cfg: dict | None = None,
+    attachment: dict | None = None,
+) -> str:
     """OpenAI-compatible backend — Open WebUI / Ollama.
 
     model_cfg is pre-resolved by complete() via model_registry.
     Falls back to registry lookup if not provided.
+    attachment: optional image dict {filename, mime_type, data} for vision calls.
     """
     import httpx
 
@@ -200,8 +208,20 @@ async def _local(system_prompt: str, messages: list[dict], model_cfg: dict | Non
     msgs: list[dict] = []
     if system_prompt:
         msgs.append({"role": "system", "content": system_prompt})
-    # Strip any non-standard metadata fields before sending to the API
-    msgs.extend({"role": m["role"], "content": m["content"]} for m in messages)
+
+    # Build message list; inject image into the last user message when present.
+    for i, m in enumerate(messages):
+        is_last = (i == len(messages) - 1)
+        if is_last and m["role"] == "user" and attachment:
+            content: list[dict] = [{"type": "text", "text": m["content"]}]
+            content.append({
+                "type": "image_url",
+                "image_url": {"url": attachment["data"]},
+            })
+            msgs.append({"role": "user", "content": content})
+        else:
+            # Strip non-standard metadata fields before sending to the API
+            msgs.append({"role": m["role"], "content": m["content"]})
 
     url = api_url.rstrip("/") + chat_path
     headers: dict[str, str] = {}
diff --git a/cortex/routers/chat.py b/cortex/routers/chat.py
index 85ed050..a0af5bd 100644
--- a/cortex/routers/chat.py
+++ b/cortex/routers/chat.py
@@ -42,6 +42,12 @@ def _role_model_label(username: str, role: str, actual_backend: str) -> str:
     return _backend_label(actual_backend, username, role)
 
 
+class Attachment(BaseModel):
+    filename: str
+    mime_type: str
+    data: str  # base64 data URL for images (e.g. "data:image/png;base64,...")
+
+
 class ChatRequest(BaseModel):
     message: str
     session_id: str | None = None
@@ -55,6 +61,7 @@ class ChatRequest(BaseModel):
     off_record: bool = False        # skip session log (in-memory context preserved)
     user: str = "scott"
     persona: str = "inara"
+    attachment: Attachment | None = None  # image attachment (text files injected client-side)
 
 
 class BackendRequest(BaseModel):
@@ -103,6 +110,19 @@ async def _stream_chat(req: ChatRequest):
         mode="otr" if req.off_record else "chat",
     )
     history = load_session(session_id)
+
+    # req.message already contains the full user text:
+    # - text files: client embedded content as a fenced code block
+    # - images: client added "📎 filename.png" note; image data is in req.attachment
+    # History always stores text only — base64 image data is never written to disk.
+    llm_attachment: dict | None = None
+    if req.attachment and req.attachment.mime_type.startswith("image/"):
+        llm_attachment = {
+            "filename": req.attachment.filename,
+            "mime_type": req.attachment.mime_type,
+            "data": req.attachment.data,
+        }
+
     history.append({"role": "user", "content": req.message, "off_record": req.off_record})
 
     task = asyncio.create_task(complete(
@@ -111,6 +131,7 @@ async def _stream_chat(req: ChatRequest):
         model=req.model,
         role=req.chat_role,
         slot=req.slot,
+        attachment=llm_attachment,
     ))
 
     try:
diff --git a/cortex/static/app.js b/cortex/static/app.js
index 5b28249..508d5a2 100644
--- a/cortex/static/app.js
+++ b/cortex/static/app.js
@@ -535,6 +535,94 @@
             addMessage('system', `Model: ${entry.label}`);
         });
 
+        // ── File attachment ──────────────────────────────────────────
+        const attachBtn   = document.getElementById('attach-btn');
+        const fileInput   = document.getElementById('file-input');
+        const attachRow   = document.getElementById('attachment-row');
+        const attachName  = document.getElementById('attachment-name');
+        const attachClear = document.getElementById('attachment-clear');
+        const attachThumb = document.getElementById('attachment-thumb');
+
+        const _IMG_TYPES = new Set(['image/png', 'image/jpeg', 'image/webp', 'image/gif']);
+        const _TXT_EXTS  = new Set(['.md','.txt','.py','.js','.ts','.jsx','.tsx','.json','.yaml','.yml','.toml','.html','.css','.sh','.csv','.xml','.rs','.go','.java','.c','.cpp','.h','.rb','.php','.swift','.kt','.sql','.env','.ini','.cfg','.log']);
+        const MAX_IMAGE_B = 5 * 1024 * 1024;   // 5 MB
+        const MAX_TEXT_B  = 100 * 1024;          // 100 KB
+
+        let _pendingAttach = null;  // {type:'image'|'text', filename, mime_type, data}
+
+        function _isTextFile(file) {
+            if (file.type.startsWith('text/') || file.type === 'application/json') return true;
+            const ext = '.' + file.name.split('.').pop().toLowerCase();
+            return _TXT_EXTS.has(ext);
+        }
+
+        function _langHint(filename) {
+            const ext = filename.split('.').pop().toLowerCase();
+            const m = {py:'python',js:'javascript',ts:'typescript',jsx:'jsx',tsx:'tsx',json:'json',yaml:'yaml',yml:'yaml',toml:'toml',html:'html',css:'css',sh:'bash',md:'markdown',rs:'rust',go:'go',java:'java',c:'c',cpp:'cpp',h:'c',rb:'ruby',php:'php',swift:'swift',kt:'kotlin',sql:'sql'};
+            return m[ext] || '';
+        }
+
+        function clearAttachment() {
+            _pendingAttach = null;
+            fileInput.value = '';
+            attachRow.style.display = 'none';
+            if (attachThumb) { attachThumb.src = ''; attachThumb.style.display = 'none'; }
+        }
+
+        /**
+         * Resolve the pending attachment into send-ready values.
+         * - Text files: inject file content as a fenced code block in the message.
+         *   displayText = serverText = injected content (what the model sees).
+         * - Images: keep text separate; pass image as payloadAttachment for vision APIs.
+         *   serverText includes a 📎 filename note for non-vision backends.
+         */
+        function _resolveAttachment(inputText) {
+            if (!_pendingAttach) return { displayText: inputText, serverText: inputText, payloadAttachment: null };
+            const { type, filename, mime_type, data } = _pendingAttach;
+            if (type === 'text') {
+                const lang  = _langHint(filename);
+                const block = `📎 ${filename}\n\`\`\`${lang}\n${data.trimEnd()}\n\`\`\``;
+                const serverText = inputText ? `${inputText}\n\n${block}` : block;
+                return { displayText: serverText, serverText, payloadAttachment: null };
+            }
+            // Image
+            const note        = `📎 ${filename}`;
+            const displayText = inputText ? `${inputText}\n${note}` : note;
+            return { displayText, serverText: displayText, payloadAttachment: { filename, mime_type, data } };
+        }
+
+        attachBtn.addEventListener('click', () => fileInput.click());
+        attachClear.addEventListener('click', clearAttachment);
+
+        fileInput.addEventListener('change', () => {
+            const file = fileInput.files[0];
+            if (!file) return;
+            fileInput.value = '';  // reset so the same file can be re-selected
+
+            const isImg = _IMG_TYPES.has(file.type);
+            const isTxt = !isImg && _isTextFile(file);
+
+            if (!isImg && !isTxt) { showToast('Unsupported file type'); return; }
+            if (isImg && file.size > MAX_IMAGE_B) { showToast('Image too large (max 5 MB)'); return; }
+            if (isTxt && file.size > MAX_TEXT_B)  { showToast('Text file too large (max 100 KB)'); return; }
+
+            const reader = new FileReader();
+            reader.onload = (e) => {
+                _pendingAttach = { type: isImg ? 'image' : 'text', filename: file.name, mime_type: file.type || 'text/plain', data: e.target.result };
+                attachName.textContent = file.name;
+                if (isImg && attachThumb) {
+                    attachThumb.src = e.target.result;
+                    attachThumb.style.display = 'block';
+                    attachRow.querySelector('#attachment-icon').style.display = 'none';
+                } else if (attachThumb) {
+                    attachThumb.style.display = 'none';
+                    attachRow.querySelector('#attachment-icon').style.display = '';
+                }
+                attachRow.style.display = 'flex';
+            };
+            isImg ? reader.readAsDataURL(file) : reader.readAsText(file);
+        });
+
         // ── Sessions panel ───────────────────────────────────────────
 
         sessionsBtn.addEventListener('click', async (e) => {
@@ -1308,8 +1396,8 @@
         }
 
         async function sendMessage() {
-            const text = inputEl.value.trim();
-            if (!text || activeController) return;
+            const rawText = inputEl.value.trim();
+            if ((!rawText && !_pendingAttach) || activeController) return;
 
             const wasNewSession = !sessionId;
 
@@ -1323,10 +1411,12 @@
             activeController = new AbortController();
 
             const isOtr = current_mode === 'otr';
+            const { displayText, serverText, payloadAttachment } = _resolveAttachment(rawText);
+            clearAttachment();
 
             const userHistIdx = currentHistory.length;
-            currentHistory.push({ role: 'user', content: text });
-            const userMsgDiv = addMessage('user', text);
+            currentHistory.push({ role: 'user', content: serverText });
+            const userMsgDiv = addMessage('user', displayText);
             attachHistoryControls(userMsgDiv, userHistIdx);
             if (isOtr) setMessageMeta(userMsgDiv, {otr: true});
             scrollToBottom();
@@ -1334,7 +1424,7 @@
             const thinkingDiv = addMessage('assistant thinking', '✨ thinking…');
 
             const payload = {
-                message: text,
+                message: serverText,
                 session_id: sessionId,
                 tier: currentTier,
                 include_long: memLong,
@@ -1345,6 +1435,7 @@
                 slot: activeChatModel()?.slot || null,
                 user: CORTEX_USER,
                 persona: CORTEX_PERSONA,
+                ...(payloadAttachment ? { attachment: payloadAttachment } : {}),
             };
 
             await _doSend(payload, thinkingDiv, wasNewSession);
@@ -1509,8 +1600,8 @@
         }
 
         async function sendOrchestrate() {
-            const text = inputEl.value.trim();
-            if (!text || activeController) return;
+            const rawText = inputEl.value.trim();
+            if ((!rawText && !_pendingAttach) || activeController) return;
 
             inputEl.value = '';
             syncHeight();
@@ -1521,13 +1612,16 @@
 
             activeController = new AbortController();
 
-            currentHistory.push({ role: 'user', content: text });
-            const userMsgDiv = addMessage('user', text);
+            const { displayText, serverText } = _resolveAttachment(rawText);
+            clearAttachment();
+
+            currentHistory.push({ role: 'user', content: serverText });
+            const userMsgDiv = addMessage('user', displayText);
             scrollToBottom();
 
             const thinkingDiv = addMessage('assistant thinking', '⚡ working…');
 
-            await _doOrchestrate(text, thinkingDiv, userMsgDiv);
+            await _doOrchestrate(serverText, thinkingDiv, userMsgDiv);
 
             activeController = null;
             setProcessing(false);
diff --git a/cortex/static/index.html b/cortex/static/index.html
index 6a0988b..c97ffab 100644
--- a/cortex/static/index.html
+++ b/cortex/static/index.html
@@ -180,6 +180,19 @@
             <button id="note-vis-btn" title="Toggle note visibility (private / public)">prv</button>
             <!-- Tools toggle — routes through the orchestrator tool loop when active -->
             <button id="tools-toggle" title="Tools disabled — click to enable">⚡</button>
+            <!-- Attach file — images (vision) or text/code files -->
+            <button id="attach-btn" title="Attach image or text file">📎</button>
+            <input type="file" id="file-input" style="display:none"
+                   accept="image/png,image/jpeg,image/webp,image/gif,text/plain,text/markdown,.md,.txt,.py,.js,.ts,.jsx,.tsx,.json,.yaml,.yml,.toml,.html,.css,.sh,.csv,.xml,.rs,.go,.java,.c,.cpp,.h,.rb,.php,.swift,.kt,.sql">
+        </div>
+        <!-- Attachment preview — shown when a file is pending -->
+        <div id="attachment-row" style="display:none">
+            <div id="attachment-preview">
+                <img id="attachment-thumb" alt="" style="display:none">
+                <span id="attachment-icon">📎</span>
+                <span id="attachment-name"></span>
+                <button id="attachment-clear" title="Remove attachment">✕</button>
+            </div>
         </div>
         <textarea id="input" rows="1" placeholder="Message…" autofocus></textarea>
         <div id="send-col">
diff --git a/cortex/static/style.css b/cortex/static/style.css
index 5ac01f3..0f2f0bc 100644
--- a/cortex/static/style.css
+++ b/cortex/static/style.css
@@ -861,6 +861,58 @@
         }
         #tools-toggle.local-on:hover { box-shadow: 0 0 10px var(--amber-glow); }
 
+        #attach-btn {
+            background: var(--bg);
+            border: 1px solid rgba(255,255,255,0.1);
+            border-radius: 6px;
+            color: rgba(255,255,255,0.3);
+            font-size: 0.95rem;
+            padding: 3px 7px;
+            cursor: pointer;
+            transition: color 0.15s, border-color 0.15s;
+        }
+        #attach-btn:hover { color: rgba(255,255,255,0.6); border-color: rgba(255,255,255,0.25); }
+
+        #attachment-row {
+            padding: 0.3rem 0.5rem;
+            border-bottom: 1px solid var(--border);
+        }
+        #attachment-preview {
+            display: inline-flex;
+            align-items: center;
+            gap: 0.4rem;
+            background: var(--bg-alt);
+            border: 1px solid var(--border);
+            border-radius: 6px;
+            padding: 0.2rem 0.5rem;
+            font-size: 0.82rem;
+            max-width: 100%;
+        }
+        #attachment-thumb {
+            max-height: 2.4rem;
+            max-width: 3.5rem;
+            border-radius: 3px;
+            object-fit: contain;
+        }
+        #attachment-name {
+            color: var(--text-mid);
+            max-width: 220px;
+            overflow: hidden;
+            text-overflow: ellipsis;
+            white-space: nowrap;
+        }
+        #attachment-clear {
+            background: none;
+            border: none;
+            color: var(--muted);
+            cursor: pointer;
+            padding: 0 0.15rem;
+            font-size: 0.78rem;
+            line-height: 1;
+            flex-shrink: 0;
+        }
+        #attachment-clear:hover { color: var(--text); }
+
         #input {
             flex: 1;
             background: var(--bg);