feat: Intelligence Layer Phase 1 — orchestrator service

Adds the Gemini API orchestrator (ReAct tool loop → Claude responder):

Orchestrator engine + router:
- orchestrator_engine.py: Gemini API tool loop, Claude CLI handoff
- routers/orchestrator.py: POST /orchestrate (async job queue), GET /orchestrate/{job_id}

Tools (cortex/tools/):
- web.py: DuckDuckGo web search (no key required)
- ae_knowledge.py: ae_journal_search + ae_journal_entry_create (AE V3 API)
- ae_tasks.py: ae_task_list (reads agents_sync Kanban filesystem)
- files.py: file_read (path-allowlisted to safe dirs)

Config + deps:
- config.py: orchestrator, DuckDuckGo, and AE API settings
- requirements.txt: google-genai, duckduckgo-search
- .env.default: reference config with all new keys documented

Docs:
- CLAUDE.md, README.md, documentation/ added to repo
- Port references updated 7331 → 8000 throughout
- Default model updated to gemini-2.5-flash

Tested: ae_task_list, ae_journal_search, web_search all working end-to-end.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Scott Idem
2026-03-18 19:37:49 -04:00
parent 23f8659aaa
commit ed472ce9a0
15 changed files with 1840 additions and 1 deletions

193
cortex/tools/__init__.py Normal file
View File

@@ -0,0 +1,193 @@
"""
Orchestrator tool registry.
Each tool has two parts:
1. A Gemini FunctionDeclaration — tells the model what the tool does and what args it takes
2. A Python async callable — the actual implementation
To add a new tool:
1. Implement it in a tools/<domain>.py module
2. Import it here and add (declaration, callable) to _REGISTRY
3. Add a FunctionDeclaration below and include it in TOOL_DECLARATIONS
IMPORTANT: These tools are separate from the ae_* MCP tools used by the fleet agents.
Do not modify the ae_* MCP server to support orchestrator needs.
"""
from google.genai import types
from tools.web import search as _web_search
from tools.ae_knowledge import journal_search as _ae_journal_search
from tools.ae_knowledge import journal_entry_create as _ae_journal_entry_create
from tools.ae_tasks import task_list as _ae_task_list
from tools.files import file_read as _file_read
# ---------------------------------------------------------------------------
# Gemini function declarations
# ---------------------------------------------------------------------------
_web_search_declaration = types.FunctionDeclaration(
name="web_search",
description=(
"Search the web for current information. Use this when you need up-to-date "
"facts, news, documentation, or anything not in your training data."
),
parameters=types.Schema(
type=types.Type.OBJECT,
properties={
"query": types.Schema(
type=types.Type.STRING,
description="The search query string",
),
"max_results": types.Schema(
type=types.Type.INTEGER,
description="Number of results to return (default 5, max 10)",
),
},
required=["query"],
),
)
_ae_journal_search_declaration = types.FunctionDeclaration(
name="ae_journal_search",
description=(
"Search the Aether Journals knowledge base by keyword. "
"Use this to look up notes, documentation, meeting summaries, or any saved knowledge. "
"Always search before creating a new entry to avoid duplicates."
),
parameters=types.Schema(
type=types.Type.OBJECT,
properties={
"query": types.Schema(
type=types.Type.STRING,
description="Keyword or phrase to search for",
),
"journal_id": types.Schema(
type=types.Type.STRING,
description=(
"Optional: scope search to a specific journal by its id_random. "
"Omit to search all journals."
),
),
"max_results": types.Schema(
type=types.Type.INTEGER,
description="Maximum number of entries to return (default 10)",
),
},
required=["query"],
),
)
_ae_journal_entry_create_declaration = types.FunctionDeclaration(
name="ae_journal_entry_create",
description=(
"Create a new entry in an Aether Journal. "
"Use this to save notes, summaries, or any content the user wants to store. "
"Always call ae_journal_search first to check for existing entries on the same topic."
),
parameters=types.Schema(
type=types.Type.OBJECT,
properties={
"journal_id": types.Schema(
type=types.Type.STRING,
description=(
"The id_random of the target journal. "
"Ask the user which journal to write to if not specified."
),
),
"title": types.Schema(
type=types.Type.STRING,
description="Entry title",
),
"content": types.Schema(
type=types.Type.STRING,
description="Full entry content (markdown supported)",
),
"summary": types.Schema(
type=types.Type.STRING,
description="Optional short summary (1-2 sentences)",
),
"tags": types.Schema(
type=types.Type.STRING,
description="Optional comma-separated tags (e.g. 'wireguard, networking, homelab')",
),
},
required=["journal_id", "title", "content"],
),
)
_ae_task_list_declaration = types.FunctionDeclaration(
name="ae_task_list",
description=(
"List tasks from the agents_sync Kanban board (todo and in-progress). "
"Use this when asked about current work, pending tasks, or project status."
),
parameters=types.Schema(
type=types.Type.OBJECT,
properties={
"include_done": types.Schema(
type=types.Type.BOOLEAN,
description="If true, also include completed tasks (default false)",
),
},
),
)
_file_read_declaration = types.FunctionDeclaration(
name="file_read",
description=(
"Read a local file and return its contents. "
"Allowed directories: ~/agents_sync/, ~/OSIT_dev/, ~/DgrZone_Nextcloud/, ~/OSIT_Nextcloud/. "
"Use this to read documentation, notes, CLAUDE.md files, or config references. "
"If given a directory path, returns a directory listing instead."
),
parameters=types.Schema(
type=types.Type.OBJECT,
properties={
"path": types.Schema(
type=types.Type.STRING,
description=(
"Absolute or home-relative path to the file "
"(e.g. ~/agents_sync/CLAUDE.md or /home/scott/agents_sync/tasks/01_todo/)"
),
),
"max_lines": types.Schema(
type=types.Type.INTEGER,
description="Optional line limit (default 500)",
),
},
required=["path"],
),
)
# ---------------------------------------------------------------------------
# Registry: maps tool name → async callable
# ---------------------------------------------------------------------------
_CALLABLES: dict[str, callable] = {
"web_search": _web_search,
"ae_journal_search": _ae_journal_search,
"ae_journal_entry_create": _ae_journal_entry_create,
"ae_task_list": _ae_task_list,
"file_read": _file_read,
}
# Gemini Tool object — pass this to GenerateContentConfig
TOOL_DECLARATIONS = [
types.Tool(function_declarations=[
_web_search_declaration,
_ae_journal_search_declaration,
_ae_journal_entry_create_declaration,
_ae_task_list_declaration,
_file_read_declaration,
])
]
async def call_tool(name: str, args: dict) -> str:
"""Dispatch a tool call by name. Returns result as a string."""
fn = _CALLABLES.get(name)
if fn is None:
return f"Unknown tool: {name}"
return await fn(**args)

View File

@@ -0,0 +1,177 @@
"""
Aether Platform knowledge tools — journal search and entry creation.
These tools give the orchestrator read/write access to the AE Journals module,
which serves as the primary long-term knowledge base.
Auth: x-aether-api-key + x-account-id headers (same pattern as agents_sync scripts).
API: V3 CRUD — POST /v3/crud/journal_entry/search, POST /v3/crud/journal/{id}/journal_entry/
"""
import asyncio
import logging
from config import settings
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Shared helpers
# ---------------------------------------------------------------------------
def _headers() -> dict:
return {
"x-aether-api-key": settings.ae_api_key,
"x-account-id": settings.ae_account_id,
"Content-Type": "application/json",
}
def _check_config() -> str | None:
"""Return an error string if AE API is not configured, else None."""
if not settings.ae_api_key or not settings.ae_account_id:
return (
"AE API not configured. Set AE_API_KEY and AE_ACCOUNT_ID in .env. "
"Values are the same as agents_sync/mcp/.env."
)
return None
# ---------------------------------------------------------------------------
# Tool: ae_journal_search
# ---------------------------------------------------------------------------
async def journal_search(query: str, journal_id: str | None = None, max_results: int = 10) -> str:
"""Search AE Journal entries by keyword.
Searches across the default_qry_str field (title + content excerpt).
Optionally scoped to a specific journal by journal_id (id_random).
Returns a markdown-formatted list of matching entries.
"""
err = _check_config()
if err:
return err
return await asyncio.to_thread(_sync_journal_search, query, journal_id, max_results)
def _sync_journal_search(query: str, journal_id: str | None, max_results: int) -> str:
import requests
url = f"{settings.ae_api_url}/v3/crud/journal_entry/search"
search_body = {
"and_filters": [
{"field": "default_qry_str", "op": "icontains", "value": query}
],
"page_size": max_results,
}
params = {}
if journal_id:
params["for_obj_type"] = "journal"
params["for_obj_id"] = journal_id
try:
resp = requests.post(
url,
headers=_headers(),
params=params,
json=search_body,
timeout=settings.ae_api_timeout,
)
resp.raise_for_status()
data = resp.json()
except Exception as e:
logger.warning("ae_journal_search failed: %s", e)
return f"Journal search error: {e}"
entries = data.get("data", [])
if not entries:
return f"No journal entries found matching: {query}"
lines = [f"Journal entries matching **{query}** ({len(entries)} result(s)):\n"]
for entry in entries:
title = entry.get("name") or "(untitled)"
entry_id = entry.get("id_random", "")
journal_name = entry.get("journal_name") or entry.get("parent_name") or ""
summary = entry.get("summary") or ""
content_preview = (entry.get("content") or "")[:200].replace("\n", " ")
header = f"**{title}**"
if journal_name:
header += f" ({journal_name})"
if entry_id:
header += f" — id: `{entry_id}`"
lines.append(header)
if summary:
lines.append(f" Summary: {summary}")
if content_preview:
lines.append(f" {content_preview}")
lines.append("")
return "\n".join(lines).strip()
# ---------------------------------------------------------------------------
# Tool: ae_journal_entry_create
# ---------------------------------------------------------------------------
async def journal_entry_create(
journal_id: str,
title: str,
content: str,
summary: str = "",
tags: str = "",
) -> str:
"""Create a new entry in an AE Journal.
Args:
journal_id: The id_random of the target journal (use ae_journal_search to find it,
or ask the user which journal to write to).
title: Entry title (name field).
content: Full entry content (markdown supported).
summary: Optional short summary (1-2 sentences).
tags: Optional comma-separated tags.
Returns a confirmation with the new entry's id_random, or an error message.
"""
err = _check_config()
if err:
return err
return await asyncio.to_thread(
_sync_journal_entry_create, journal_id, title, content, summary, tags
)
def _sync_journal_entry_create(
journal_id: str, title: str, content: str, summary: str, tags: str
) -> str:
import requests
url = f"{settings.ae_api_url}/v3/crud/journal/{journal_id}/journal_entry/"
data: dict = {"name": title, "content": content}
if summary:
data["summary"] = summary
if tags:
data["tags"] = [t.strip() for t in tags.split(",") if t.strip()]
try:
resp = requests.post(
url,
headers=_headers(),
json=data,
timeout=settings.ae_api_timeout,
)
resp.raise_for_status()
result = resp.json()
except Exception as e:
logger.warning("ae_journal_entry_create failed: %s", e)
return f"Journal entry creation error: {e}"
entry_id = (
result.get("data", {}).get("id_random")
or result.get("id_random")
or "unknown"
)
return f"Journal entry created. id: `{entry_id}`, title: \"{title}\", journal: `{journal_id}`"

100
cortex/tools/ae_tasks.py Normal file
View File

@@ -0,0 +1,100 @@
"""
Aether task list tool — reads the agents_sync Kanban board.
Reads task JSON files directly from the agents_sync filesystem rather than
making an HTTP call, since the tasks directory is always locally available
(synced via Syncthing). This avoids needing a separate API endpoint for tasks.
Structure:
agents_sync/tasks/01_todo/ — pending tasks
agents_sync/tasks/02_in_progress/ — active tasks
agents_sync/tasks/03_done/ — completed tasks (not included by default)
"""
import asyncio
import json
import logging
from pathlib import Path
logger = logging.getLogger(__name__)
# Resolved at import time — agents_sync is always at ~/agents_sync on this machine.
# If the path doesn't exist the tool returns a helpful error rather than crashing.
_AGENTS_SYNC = Path.home() / "agents_sync"
_TASKS_ROOT = _AGENTS_SYNC / "tasks"
async def task_list(include_done: bool = False) -> str:
"""List tasks from the agents_sync Kanban board.
Reads the todo and in_progress buckets (and optionally done).
Returns a markdown summary grouped by status.
Args:
include_done: If True, also include completed tasks (can be noisy).
"""
return await asyncio.to_thread(_sync_task_list, include_done)
def _sync_task_list(include_done: bool) -> str:
if not _TASKS_ROOT.exists():
return f"Task directory not found: {_TASKS_ROOT}"
buckets = [
("01_todo", "Todo"),
("02_in_progress", "In Progress"),
]
if include_done:
buckets.append(("03_done", "Done"))
sections: list[str] = []
total = 0
for dir_name, label in buckets:
bucket_dir = _TASKS_ROOT / dir_name
if not bucket_dir.exists():
continue
tasks = _read_bucket(bucket_dir)
total += len(tasks)
if not tasks:
continue
lines = [f"## {label} ({len(tasks)})\n"]
for task in tasks:
title = task.get("title") or task.get("name") or "(untitled)"
assigned = task.get("assigned_to") or ""
task_id = task.get("id") or ""
desc = task.get("description") or ""
header = f"- **{title}**"
if assigned:
header += f" (assigned: {assigned})"
if task_id:
header += f" — `{task_id}`"
lines.append(header)
if desc:
# First sentence / 120 chars of description
short = desc.split(".")[0][:120]
lines.append(f" {short}")
sections.append("\n".join(lines))
if not sections:
return "No tasks found on the Kanban board."
header_line = f"# Kanban Board — {total} task(s)\n"
return header_line + "\n\n".join(sections)
def _read_bucket(bucket_dir: Path) -> list[dict]:
"""Read and parse all JSON task files in a bucket directory."""
tasks = []
for path in sorted(bucket_dir.glob("*.json")):
try:
data = json.loads(path.read_text())
tasks.append(data)
except Exception as e:
logger.warning("Failed to read task file %s: %s", path, e)
return tasks

112
cortex/tools/files.py Normal file
View File

@@ -0,0 +1,112 @@
"""
File read tool — restricted to known-safe directory roots.
Lets the orchestrator read local files (documentation, notes, config references)
without exposing arbitrary filesystem access. All paths are resolved and checked
against an allowlist of roots before any read is performed.
"""
import asyncio
import logging
from pathlib import Path
logger = logging.getLogger(__name__)
# Directories the orchestrator is allowed to read from.
# Paths are resolved (symlinks followed, ~ expanded) at import time.
_ALLOWED_ROOTS: list[Path] = [
Path.home() / "agents_sync",
Path.home() / "OSIT_dev",
Path.home() / "DgrZone_Nextcloud",
Path.home() / "OSIT_Nextcloud",
]
# Hard cap on file size to prevent accidental context blowout
_MAX_BYTES = 50_000 # ~50 KB
_MAX_LINES = 500
async def file_read(path: str, max_lines: int | None = None) -> str:
"""Read a local file and return its contents as a string.
Only files within allowed directories can be read:
~/agents_sync/, ~/OSIT_dev/, ~/DgrZone_Nextcloud/, ~/OSIT_Nextcloud/
Args:
path: Absolute or home-relative path to the file (e.g. ~/agents_sync/CLAUDE.md).
max_lines: Optional line limit (default 500, hard cap). Use for large files.
Returns the file contents (truncated if over the size limit), or an error message.
"""
return await asyncio.to_thread(_sync_file_read, path, max_lines)
def _sync_file_read(path: str, max_lines: int | None) -> str:
# Expand ~ and resolve to absolute path
try:
resolved = Path(path).expanduser().resolve()
except Exception as e:
return f"Invalid path: {e}"
# Security check — must be under an allowed root
if not _is_allowed(resolved):
allowed_str = ", ".join(str(r) for r in _ALLOWED_ROOTS)
return (
f"Access denied: {resolved}\n"
f"Allowed directories: {allowed_str}"
)
if not resolved.exists():
return f"File not found: {resolved}"
if not resolved.is_file():
# If it's a directory, list its contents instead
try:
entries = sorted(resolved.iterdir())
names = [e.name + ("/" if e.is_dir() else "") for e in entries[:100]]
return f"Directory listing for {resolved}:\n" + "\n".join(names)
except Exception as e:
return f"Cannot list directory: {e}"
# Read the file
try:
raw = resolved.read_bytes()
except Exception as e:
return f"Read error: {e}"
# Binary files
try:
text = raw.decode("utf-8")
except UnicodeDecodeError:
return f"Binary file (not readable as text): {resolved} [{len(raw)} bytes]"
# Apply line limit
limit = min(max_lines or _MAX_LINES, _MAX_LINES)
lines = text.splitlines()
truncated = False
if len(lines) > limit:
lines = lines[:limit]
truncated = True
# Apply byte cap as a final safety net
result = "\n".join(lines)
if len(result) > _MAX_BYTES:
result = result[:_MAX_BYTES]
truncated = True
if truncated:
result += f"\n\n… [truncated — file has {len(text.splitlines())} lines total]"
return result
def _is_allowed(resolved: Path) -> bool:
"""Check that resolved path is under one of the allowed roots."""
for root in _ALLOWED_ROOTS:
try:
resolved.relative_to(root)
return True
except ValueError:
continue
return False

50
cortex/tools/web.py Normal file
View File

@@ -0,0 +1,50 @@
"""
Web search tool — DuckDuckGo backend.
Uses the duckduckgo-search library. Set DDG_API_KEY in .env for a paid account
(higher rate limits). The free unauthenticated tier works for moderate usage.
"""
import asyncio
import logging
from config import settings
logger = logging.getLogger(__name__)
async def search(query: str, max_results: int | None = None) -> str:
"""Search DuckDuckGo and return results as a formatted string.
Returns a markdown-formatted list of results: title, URL, and snippet.
The orchestrator includes this in the context it passes to Claude.
"""
n = min(max_results or settings.ddg_max_results, 10)
results = await asyncio.to_thread(_sync_search, query, n)
if not results:
return f"No results found for: {query}"
lines = [f"Search results for: **{query}**\n"]
for i, r in enumerate(results, 1):
lines.append(f"{i}. [{r['title']}]({r['href']})")
if r.get("body"):
lines.append(f" {r['body']}")
lines.append("")
return "\n".join(lines).strip()
def _sync_search(query: str, max_results: int) -> list[dict]:
"""Synchronous DuckDuckGo search — run via asyncio.to_thread."""
from duckduckgo_search import DDGS
kwargs = {}
if settings.ddg_api_key:
# Paid account — pass token for higher rate limits
kwargs["headers"] = {"Authorization": f"Bearer {settings.ddg_api_key}"}
try:
with DDGS(**kwargs) as ddgs:
return list(ddgs.text(query, max_results=max_results))
except Exception as e:
logger.warning("DuckDuckGo search error: %s", e)
return []