feat: Intelligence Layer Phase 1 — orchestrator service

Adds the Gemini API orchestrator (ReAct tool loop → Claude responder): Orchestrator engine + router: - orchestrator_engine.py: Gemini API tool loop, Claude CLI handoff - routers/orchestrator.py: POST /orchestrate (async job queue), GET /orchestrate/{job_id} Tools (cortex/tools/): - web.py: DuckDuckGo web search (no key required) - ae_knowledge.py: ae_journal_search + ae_journal_entry_create (AE V3 API) - ae_tasks.py: ae_task_list (reads agents_sync Kanban filesystem) - files.py: file_read (path-allowlisted to safe dirs) Config + deps: - config.py: orchestrator, DuckDuckGo, and AE API settings - requirements.txt: google-genai, duckduckgo-search - .env.default: reference config with all new keys documented Docs: - CLAUDE.md, README.md, documentation/ added to repo - Port references updated 7331 → 8000 throughout - Default model updated to gemini-2.5-flash Tested: ae_task_list, ae_journal_search, web_search all working end-to-end. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-18 19:37:49 -04:00
parent 23f8659aaa
commit ed472ce9a0
15 changed files with 1840 additions and 1 deletions
--- a/cortex/tools/init.py
+++ b/cortex/tools/init.py
@@ -0,0 +1,193 @@
+"""
+Orchestrator tool registry.
+
+Each tool has two parts:
+  1. A Gemini FunctionDeclaration — tells the model what the tool does and what args it takes
+  2. A Python async callable — the actual implementation
+
+To add a new tool:
+  1. Implement it in a tools/<domain>.py module
+  2. Import it here and add (declaration, callable) to _REGISTRY
+  3. Add a FunctionDeclaration below and include it in TOOL_DECLARATIONS
+
+IMPORTANT: These tools are separate from the ae_* MCP tools used by the fleet agents.
+           Do not modify the ae_* MCP server to support orchestrator needs.
+"""
+
+from google.genai import types
+from tools.web import search as _web_search
+from tools.ae_knowledge import journal_search as _ae_journal_search
+from tools.ae_knowledge import journal_entry_create as _ae_journal_entry_create
+from tools.ae_tasks import task_list as _ae_task_list
+from tools.files import file_read as _file_read
+
+
+# ---------------------------------------------------------------------------
+# Gemini function declarations
+# ---------------------------------------------------------------------------
+
+_web_search_declaration = types.FunctionDeclaration(
+    name="web_search",
+    description=(
+        "Search the web for current information. Use this when you need up-to-date "
+        "facts, news, documentation, or anything not in your training data."
+    ),
+    parameters=types.Schema(
+        type=types.Type.OBJECT,
+        properties={
+            "query": types.Schema(
+                type=types.Type.STRING,
+                description="The search query string",
+            ),
+            "max_results": types.Schema(
+                type=types.Type.INTEGER,
+                description="Number of results to return (default 5, max 10)",
+            ),
+        },
+        required=["query"],
+    ),
+)
+
+_ae_journal_search_declaration = types.FunctionDeclaration(
+    name="ae_journal_search",
+    description=(
+        "Search the Aether Journals knowledge base by keyword. "
+        "Use this to look up notes, documentation, meeting summaries, or any saved knowledge. "
+        "Always search before creating a new entry to avoid duplicates."
+    ),
+    parameters=types.Schema(
+        type=types.Type.OBJECT,
+        properties={
+            "query": types.Schema(
+                type=types.Type.STRING,
+                description="Keyword or phrase to search for",
+            ),
+            "journal_id": types.Schema(
+                type=types.Type.STRING,
+                description=(
+                    "Optional: scope search to a specific journal by its id_random. "
+                    "Omit to search all journals."
+                ),
+            ),
+            "max_results": types.Schema(
+                type=types.Type.INTEGER,
+                description="Maximum number of entries to return (default 10)",
+            ),
+        },
+        required=["query"],
+    ),
+)
+
+_ae_journal_entry_create_declaration = types.FunctionDeclaration(
+    name="ae_journal_entry_create",
+    description=(
+        "Create a new entry in an Aether Journal. "
+        "Use this to save notes, summaries, or any content the user wants to store. "
+        "Always call ae_journal_search first to check for existing entries on the same topic."
+    ),
+    parameters=types.Schema(
+        type=types.Type.OBJECT,
+        properties={
+            "journal_id": types.Schema(
+                type=types.Type.STRING,
+                description=(
+                    "The id_random of the target journal. "
+                    "Ask the user which journal to write to if not specified."
+                ),
+            ),
+            "title": types.Schema(
+                type=types.Type.STRING,
+                description="Entry title",
+            ),
+            "content": types.Schema(
+                type=types.Type.STRING,
+                description="Full entry content (markdown supported)",
+            ),
+            "summary": types.Schema(
+                type=types.Type.STRING,
+                description="Optional short summary (1-2 sentences)",
+            ),
+            "tags": types.Schema(
+                type=types.Type.STRING,
+                description="Optional comma-separated tags (e.g. 'wireguard, networking, homelab')",
+            ),
+        },
+        required=["journal_id", "title", "content"],
+    ),
+)
+
+_ae_task_list_declaration = types.FunctionDeclaration(
+    name="ae_task_list",
+    description=(
+        "List tasks from the agents_sync Kanban board (todo and in-progress). "
+        "Use this when asked about current work, pending tasks, or project status."
+    ),
+    parameters=types.Schema(
+        type=types.Type.OBJECT,
+        properties={
+            "include_done": types.Schema(
+                type=types.Type.BOOLEAN,
+                description="If true, also include completed tasks (default false)",
+            ),
+        },
+    ),
+)
+
+_file_read_declaration = types.FunctionDeclaration(
+    name="file_read",
+    description=(
+        "Read a local file and return its contents. "
+        "Allowed directories: ~/agents_sync/, ~/OSIT_dev/, ~/DgrZone_Nextcloud/, ~/OSIT_Nextcloud/. "
+        "Use this to read documentation, notes, CLAUDE.md files, or config references. "
+        "If given a directory path, returns a directory listing instead."
+    ),
+    parameters=types.Schema(
+        type=types.Type.OBJECT,
+        properties={
+            "path": types.Schema(
+                type=types.Type.STRING,
+                description=(
+                    "Absolute or home-relative path to the file "
+                    "(e.g. ~/agents_sync/CLAUDE.md or /home/scott/agents_sync/tasks/01_todo/)"
+                ),
+            ),
+            "max_lines": types.Schema(
+                type=types.Type.INTEGER,
+                description="Optional line limit (default 500)",
+            ),
+        },
+        required=["path"],
+    ),
+)
+
+
+# ---------------------------------------------------------------------------
+# Registry: maps tool name → async callable
+# ---------------------------------------------------------------------------
+
+_CALLABLES: dict[str, callable] = {
+    "web_search": _web_search,
+    "ae_journal_search": _ae_journal_search,
+    "ae_journal_entry_create": _ae_journal_entry_create,
+    "ae_task_list": _ae_task_list,
+    "file_read": _file_read,
+}
+
+# Gemini Tool object — pass this to GenerateContentConfig
+TOOL_DECLARATIONS = [
+    types.Tool(function_declarations=[
+        _web_search_declaration,
+        _ae_journal_search_declaration,
+        _ae_journal_entry_create_declaration,
+        _ae_task_list_declaration,
+        _file_read_declaration,
+    ])
+]
+
+
+async def call_tool(name: str, args: dict) -> str:
+    """Dispatch a tool call by name. Returns result as a string."""
+    fn = _CALLABLES.get(name)
+    if fn is None:
+        return f"Unknown tool: {name}"
+    return await fn(**args)
--- a/cortex/tools/ae_knowledge.py
+++ b/cortex/tools/ae_knowledge.py
@@ -0,0 +1,177 @@
+"""
+Aether Platform knowledge tools — journal search and entry creation.
+
+These tools give the orchestrator read/write access to the AE Journals module,
+which serves as the primary long-term knowledge base.
+
+Auth: x-aether-api-key + x-account-id headers (same pattern as agents_sync scripts).
+API:  V3 CRUD — POST /v3/crud/journal_entry/search, POST /v3/crud/journal/{id}/journal_entry/
+"""
+
+import asyncio
+import logging
+from config import settings
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Shared helpers
+# ---------------------------------------------------------------------------
+
+def _headers() -> dict:
+    return {
+        "x-aether-api-key": settings.ae_api_key,
+        "x-account-id": settings.ae_account_id,
+        "Content-Type": "application/json",
+    }
+
+
+def _check_config() -> str | None:
+    """Return an error string if AE API is not configured, else None."""
+    if not settings.ae_api_key or not settings.ae_account_id:
+        return (
+            "AE API not configured. Set AE_API_KEY and AE_ACCOUNT_ID in .env. "
+            "Values are the same as agents_sync/mcp/.env."
+        )
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Tool: ae_journal_search
+# ---------------------------------------------------------------------------
+
+async def journal_search(query: str, journal_id: str | None = None, max_results: int = 10) -> str:
+    """Search AE Journal entries by keyword.
+
+    Searches across the default_qry_str field (title + content excerpt).
+    Optionally scoped to a specific journal by journal_id (id_random).
+    Returns a markdown-formatted list of matching entries.
+    """
+    err = _check_config()
+    if err:
+        return err
+
+    return await asyncio.to_thread(_sync_journal_search, query, journal_id, max_results)
+
+
+def _sync_journal_search(query: str, journal_id: str | None, max_results: int) -> str:
+    import requests
+
+    url = f"{settings.ae_api_url}/v3/crud/journal_entry/search"
+    search_body = {
+        "and_filters": [
+            {"field": "default_qry_str", "op": "icontains", "value": query}
+        ],
+        "page_size": max_results,
+    }
+
+    params = {}
+    if journal_id:
+        params["for_obj_type"] = "journal"
+        params["for_obj_id"] = journal_id
+
+    try:
+        resp = requests.post(
+            url,
+            headers=_headers(),
+            params=params,
+            json=search_body,
+            timeout=settings.ae_api_timeout,
+        )
+        resp.raise_for_status()
+        data = resp.json()
+    except Exception as e:
+        logger.warning("ae_journal_search failed: %s", e)
+        return f"Journal search error: {e}"
+
+    entries = data.get("data", [])
+    if not entries:
+        return f"No journal entries found matching: {query}"
+
+    lines = [f"Journal entries matching **{query}** ({len(entries)} result(s)):\n"]
+    for entry in entries:
+        title = entry.get("name") or "(untitled)"
+        entry_id = entry.get("id_random", "")
+        journal_name = entry.get("journal_name") or entry.get("parent_name") or ""
+        summary = entry.get("summary") or ""
+        content_preview = (entry.get("content") or "")[:200].replace("\n", " ")
+
+        header = f"**{title}**"
+        if journal_name:
+            header += f" ({journal_name})"
+        if entry_id:
+            header += f" — id: `{entry_id}`"
+
+        lines.append(header)
+        if summary:
+            lines.append(f"  Summary: {summary}")
+        if content_preview:
+            lines.append(f"  {content_preview}…")
+        lines.append("")
+
+    return "\n".join(lines).strip()
+
+
+# ---------------------------------------------------------------------------
+# Tool: ae_journal_entry_create
+# ---------------------------------------------------------------------------
+
+async def journal_entry_create(
+    journal_id: str,
+    title: str,
+    content: str,
+    summary: str = "",
+    tags: str = "",
+) -> str:
+    """Create a new entry in an AE Journal.
+
+    Args:
+        journal_id: The id_random of the target journal (use ae_journal_search to find it,
+                    or ask the user which journal to write to).
+        title:      Entry title (name field).
+        content:    Full entry content (markdown supported).
+        summary:    Optional short summary (1-2 sentences).
+        tags:       Optional comma-separated tags.
+
+    Returns a confirmation with the new entry's id_random, or an error message.
+    """
+    err = _check_config()
+    if err:
+        return err
+
+    return await asyncio.to_thread(
+        _sync_journal_entry_create, journal_id, title, content, summary, tags
+    )
+
+
+def _sync_journal_entry_create(
+    journal_id: str, title: str, content: str, summary: str, tags: str
+) -> str:
+    import requests
+
+    url = f"{settings.ae_api_url}/v3/crud/journal/{journal_id}/journal_entry/"
+    data: dict = {"name": title, "content": content}
+    if summary:
+        data["summary"] = summary
+    if tags:
+        data["tags"] = [t.strip() for t in tags.split(",") if t.strip()]
+
+    try:
+        resp = requests.post(
+            url,
+            headers=_headers(),
+            json=data,
+            timeout=settings.ae_api_timeout,
+        )
+        resp.raise_for_status()
+        result = resp.json()
+    except Exception as e:
+        logger.warning("ae_journal_entry_create failed: %s", e)
+        return f"Journal entry creation error: {e}"
+
+    entry_id = (
+        result.get("data", {}).get("id_random")
+        or result.get("id_random")
+        or "unknown"
+    )
+    return f"Journal entry created. id: `{entry_id}`, title: \"{title}\", journal: `{journal_id}`"
--- a/cortex/tools/ae_tasks.py
+++ b/cortex/tools/ae_tasks.py
@@ -0,0 +1,100 @@
+"""
+Aether task list tool — reads the agents_sync Kanban board.
+
+Reads task JSON files directly from the agents_sync filesystem rather than
+making an HTTP call, since the tasks directory is always locally available
+(synced via Syncthing). This avoids needing a separate API endpoint for tasks.
+
+Structure:
+  agents_sync/tasks/01_todo/       — pending tasks
+  agents_sync/tasks/02_in_progress/ — active tasks
+  agents_sync/tasks/03_done/       — completed tasks (not included by default)
+"""
+
+import asyncio
+import json
+import logging
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+# Resolved at import time — agents_sync is always at ~/agents_sync on this machine.
+# If the path doesn't exist the tool returns a helpful error rather than crashing.
+_AGENTS_SYNC = Path.home() / "agents_sync"
+_TASKS_ROOT = _AGENTS_SYNC / "tasks"
+
+
+async def task_list(include_done: bool = False) -> str:
+    """List tasks from the agents_sync Kanban board.
+
+    Reads the todo and in_progress buckets (and optionally done).
+    Returns a markdown summary grouped by status.
+
+    Args:
+        include_done: If True, also include completed tasks (can be noisy).
+    """
+    return await asyncio.to_thread(_sync_task_list, include_done)
+
+
+def _sync_task_list(include_done: bool) -> str:
+    if not _TASKS_ROOT.exists():
+        return f"Task directory not found: {_TASKS_ROOT}"
+
+    buckets = [
+        ("01_todo", "Todo"),
+        ("02_in_progress", "In Progress"),
+    ]
+    if include_done:
+        buckets.append(("03_done", "Done"))
+
+    sections: list[str] = []
+    total = 0
+
+    for dir_name, label in buckets:
+        bucket_dir = _TASKS_ROOT / dir_name
+        if not bucket_dir.exists():
+            continue
+
+        tasks = _read_bucket(bucket_dir)
+        total += len(tasks)
+        if not tasks:
+            continue
+
+        lines = [f"## {label} ({len(tasks)})\n"]
+        for task in tasks:
+            title = task.get("title") or task.get("name") or "(untitled)"
+            assigned = task.get("assigned_to") or ""
+            task_id = task.get("id") or ""
+            desc = task.get("description") or ""
+
+            header = f"- **{title}**"
+            if assigned:
+                header += f" (assigned: {assigned})"
+            if task_id:
+                header += f" — `{task_id}`"
+            lines.append(header)
+
+            if desc:
+                # First sentence / 120 chars of description
+                short = desc.split(".")[0][:120]
+                lines.append(f"  {short}")
+
+        sections.append("\n".join(lines))
+
+    if not sections:
+        return "No tasks found on the Kanban board."
+
+    header_line = f"# Kanban Board — {total} task(s)\n"
+    return header_line + "\n\n".join(sections)
+
+
+def _read_bucket(bucket_dir: Path) -> list[dict]:
+    """Read and parse all JSON task files in a bucket directory."""
+    tasks = []
+    for path in sorted(bucket_dir.glob("*.json")):
+        try:
+            data = json.loads(path.read_text())
+            tasks.append(data)
+        except Exception as e:
+            logger.warning("Failed to read task file %s: %s", path, e)
+    return tasks
--- a/cortex/tools/files.py
+++ b/cortex/tools/files.py
@@ -0,0 +1,112 @@
+"""
+File read tool — restricted to known-safe directory roots.
+
+Lets the orchestrator read local files (documentation, notes, config references)
+without exposing arbitrary filesystem access. All paths are resolved and checked
+against an allowlist of roots before any read is performed.
+"""
+
+import asyncio
+import logging
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+# Directories the orchestrator is allowed to read from.
+# Paths are resolved (symlinks followed, ~ expanded) at import time.
+_ALLOWED_ROOTS: list[Path] = [
+    Path.home() / "agents_sync",
+    Path.home() / "OSIT_dev",
+    Path.home() / "DgrZone_Nextcloud",
+    Path.home() / "OSIT_Nextcloud",
+]
+
+# Hard cap on file size to prevent accidental context blowout
+_MAX_BYTES = 50_000   # ~50 KB
+_MAX_LINES = 500
+
+
+async def file_read(path: str, max_lines: int | None = None) -> str:
+    """Read a local file and return its contents as a string.
+
+    Only files within allowed directories can be read:
+      ~/agents_sync/, ~/OSIT_dev/, ~/DgrZone_Nextcloud/, ~/OSIT_Nextcloud/
+
+    Args:
+        path:      Absolute or home-relative path to the file (e.g. ~/agents_sync/CLAUDE.md).
+        max_lines: Optional line limit (default 500, hard cap). Use for large files.
+
+    Returns the file contents (truncated if over the size limit), or an error message.
+    """
+    return await asyncio.to_thread(_sync_file_read, path, max_lines)
+
+
+def _sync_file_read(path: str, max_lines: int | None) -> str:
+    # Expand ~ and resolve to absolute path
+    try:
+        resolved = Path(path).expanduser().resolve()
+    except Exception as e:
+        return f"Invalid path: {e}"
+
+    # Security check — must be under an allowed root
+    if not _is_allowed(resolved):
+        allowed_str = ", ".join(str(r) for r in _ALLOWED_ROOTS)
+        return (
+            f"Access denied: {resolved}\n"
+            f"Allowed directories: {allowed_str}"
+        )
+
+    if not resolved.exists():
+        return f"File not found: {resolved}"
+
+    if not resolved.is_file():
+        # If it's a directory, list its contents instead
+        try:
+            entries = sorted(resolved.iterdir())
+            names = [e.name + ("/" if e.is_dir() else "") for e in entries[:100]]
+            return f"Directory listing for {resolved}:\n" + "\n".join(names)
+        except Exception as e:
+            return f"Cannot list directory: {e}"
+
+    # Read the file
+    try:
+        raw = resolved.read_bytes()
+    except Exception as e:
+        return f"Read error: {e}"
+
+    # Binary files
+    try:
+        text = raw.decode("utf-8")
+    except UnicodeDecodeError:
+        return f"Binary file (not readable as text): {resolved}  [{len(raw)} bytes]"
+
+    # Apply line limit
+    limit = min(max_lines or _MAX_LINES, _MAX_LINES)
+    lines = text.splitlines()
+    truncated = False
+
+    if len(lines) > limit:
+        lines = lines[:limit]
+        truncated = True
+
+    # Apply byte cap as a final safety net
+    result = "\n".join(lines)
+    if len(result) > _MAX_BYTES:
+        result = result[:_MAX_BYTES]
+        truncated = True
+
+    if truncated:
+        result += f"\n\n… [truncated — file has {len(text.splitlines())} lines total]"
+
+    return result
+
+
+def _is_allowed(resolved: Path) -> bool:
+    """Check that resolved path is under one of the allowed roots."""
+    for root in _ALLOWED_ROOTS:
+        try:
+            resolved.relative_to(root)
+            return True
+        except ValueError:
+            continue
+    return False
--- a/cortex/tools/web.py
+++ b/cortex/tools/web.py
@@ -0,0 +1,50 @@
+"""
+Web search tool — DuckDuckGo backend.
+
+Uses the duckduckgo-search library. Set DDG_API_KEY in .env for a paid account
+(higher rate limits). The free unauthenticated tier works for moderate usage.
+"""
+
+import asyncio
+import logging
+from config import settings
+
+logger = logging.getLogger(__name__)
+
+
+async def search(query: str, max_results: int | None = None) -> str:
+    """Search DuckDuckGo and return results as a formatted string.
+
+    Returns a markdown-formatted list of results: title, URL, and snippet.
+    The orchestrator includes this in the context it passes to Claude.
+    """
+    n = min(max_results or settings.ddg_max_results, 10)
+    results = await asyncio.to_thread(_sync_search, query, n)
+    if not results:
+        return f"No results found for: {query}"
+
+    lines = [f"Search results for: **{query}**\n"]
+    for i, r in enumerate(results, 1):
+        lines.append(f"{i}. [{r['title']}]({r['href']})")
+        if r.get("body"):
+            lines.append(f"   {r['body']}")
+        lines.append("")
+
+    return "\n".join(lines).strip()
+
+
+def _sync_search(query: str, max_results: int) -> list[dict]:
+    """Synchronous DuckDuckGo search — run via asyncio.to_thread."""
+    from duckduckgo_search import DDGS
+
+    kwargs = {}
+    if settings.ddg_api_key:
+        # Paid account — pass token for higher rate limits
+        kwargs["headers"] = {"Authorization": f"Bearer {settings.ddg_api_key}"}
+
+    try:
+        with DDGS(**kwargs) as ddgs:
+            return list(ddgs.text(query, max_results=max_results))
+    except Exception as e:
+        logger.warning("DuckDuckGo search error: %s", e)
+        return []