From 6ca588959d5af8b3dcd88057dc6305bf30413b7f Mon Sep 17 00:00:00 2001 From: Scott Idem Date: Tue, 16 Jun 2026 21:07:42 -0400 Subject: [PATCH] feat: root agent hierarchy at Level 1 from both orchestrators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gemini and OpenAI orchestrators now inject _agent_level=1 into spawn_agent args so the full 3-level chain works: Persona (L1) → Specialist (L2) → Support (L3, cannot delegate further). Co-Authored-By: Claude Sonnet 4.6 --- cortex/openai_orchestrator.py | 4 ++++ cortex/orchestrator_engine.py | 2 ++ documentation/ROADMAP.md | 1 + documentation/TODO__Agents.md | 24 ++++++++++++++++++++---- 4 files changed, 27 insertions(+), 4 deletions(-) diff --git a/cortex/openai_orchestrator.py b/cortex/openai_orchestrator.py index d866d37..11864c7 100644 --- a/cortex/openai_orchestrator.py +++ b/cortex/openai_orchestrator.py @@ -509,6 +509,8 @@ async def _execute_tool( args = json.loads(arguments_json) except json.JSONDecodeError: args = {} + if name == "spawn_agent": + args = {**args, "_agent_level": 1} try: return await call_tool(name, args, callables) except Exception as e: @@ -524,6 +526,8 @@ async def _execute_tool_dict( ) -> str: """Execute a tool from a pre-parsed args dict.""" _, callables = get_tools_for_role(user_role, tool_list) + if name == "spawn_agent": + args = {**args, "_agent_level": 1} try: return await call_tool(name, args, callables) except Exception as e: diff --git a/cortex/orchestrator_engine.py b/cortex/orchestrator_engine.py index fa1fa34..c470dbe 100644 --- a/cortex/orchestrator_engine.py +++ b/cortex/orchestrator_engine.py @@ -468,6 +468,8 @@ async def _claude_handoff( async def _execute_tool(name: str, args: dict, callables: dict | None = None) -> str: """Execute a single tool call, catching all exceptions.""" + if name == "spawn_agent": + args = {**args, "_agent_level": 1} try: return await call_tool(name, args, callables) except Exception as e: diff --git a/documentation/ROADMAP.md b/documentation/ROADMAP.md index 7020631..af3fc2c 100644 --- a/documentation/ROADMAP.md +++ b/documentation/ROADMAP.md @@ -77,3 +77,4 @@ - **RAG via Open WebUI** — feed Nextcloud docs into local knowledge collections; possible complement to AE Journals search - **Multi-host local models** — per-user config already supports multiple hosts; routing logic TBD - **WhatsApp** — requires Business API account or a bridge; not started +- **Docling** (https://github.com/docling-project/docling) — IBM Research doc parser; converts PDF/DOCX/PPTX/images → clean Markdown/JSON for LLM ingestion; would enhance file attachments and the knowledge import pipeline diff --git a/documentation/TODO__Agents.md b/documentation/TODO__Agents.md index b341864..97b6a5f 100644 --- a/documentation/TODO__Agents.md +++ b/documentation/TODO__Agents.md @@ -97,10 +97,10 @@ pipelines) unusable without freezing the orchestrator. - [x] **L2→L3 boundary enforcement** — `spawn_agent` param `_agent_level` (default 2); when `child_level >= 3`, auto-adds `spawn_agent` + `aider_run` to deny_tools so Level 3 children cannot delegate; level stored in AgentRecord — 2026-06-03 -- [ ] **`_agent_level=1` from main orchestrators** — Gemini and OpenAI orchestrators - should pass `_agent_level=1` when calling spawn_agent so the hierarchy is rooted - correctly; currently defaults to 2 (children become Level 3, which is safe but - means Level 1 cannot spawn Level 2 that itself spawns Level 3) +- [x] **`_agent_level=1` from main orchestrators** — Gemini and OpenAI orchestrators + inject `_agent_level=1` into args in `_execute_tool` / `_execute_tool_dict` so the + hierarchy is rooted correctly; Persona spawns Level 2 agents that can themselves + spawn Level 3 — 2026-06-16 **Phase 3 — `aider_run` async:** - [x] **`aider_run` background mode** — added `background: bool = False` and @@ -313,6 +313,22 @@ and lint commands; model/key come from env vars (not committed). - `cortex/routers/` already has pattern; add `gitea.py` - Gitea Actions (CI) for "run tests on push" — simpler than custom runner +### [Intelligence] Docling — document parsing for LLM ingestion +https://github.com/docling-project/docling + +IBM Research library that converts PDF, DOCX, PPTX, XLSX, images, HTML, and more into +clean Markdown or structured JSON ready for LLM consumption. Two integration points: + +- **File attachment enrichment** — today, non-text attachments are limited; Docling would + let Cortex extract structured content from PDFs and Office docs before injecting into context +- **Knowledge ingestion pipeline** — convert Nextcloud files (meeting notes, contracts, reports) + into AE Journal entries or a local RAG corpus; complements the existing markdown import script +- **`file_read` / `web_read` enhancement** — optionally route binary file types through Docling + for clean extraction instead of raw binary handling + +Install: `pip install docling`; GPU-accelerated OCR support available. +No design started — evaluate before integrating. + ### [Local] RAG via Open WebUI Open WebUI has a full RAG pipeline (file upload → embed → knowledge collections → reference in chat). Could feed Nextcloud docs or session logs into a local knowledge