From 71e472bebed1738d36e88862f998831f04662794 Mon Sep 17 00:00:00 2001 From: Scott Idem Date: Thu, 30 Apr 2026 20:10:04 -0400 Subject: [PATCH] feat: improved ae_journal_search + AE integration docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Search improvements: - Switched from LIKE on default_qry_str to query_string path (fulltext MATCH/AGAINST IN BOOLEAN MODE — uses the index, supports +/- boolean ops) - Added tag filter (icontains on tags field) - Added date_from / date_to filters (created_on gte/lte) - Added type_code / topic_code exact-match filters - Added sort_by / sort_order control (updated, created, name, priority) - Added status / priority filters - Added page parameter for pagination - Richer output: updated date, tags, pagination hint - Updated Gemini tool declaration with all new params Docs: - documentation/ARCH__AE_INTEGRATION.md — journal_entry full schema, search operator reference, current tool inventory, planned phases (broader AE integration: tasks, people, calendar, knowledge import) Co-Authored-By: Claude Sonnet 4.6 --- cortex/tools/__init__.py | 56 ++++++- cortex/tools/ae_knowledge.py | 117 ++++++++++--- documentation/ARCH__AE_INTEGRATION.md | 227 ++++++++++++++++++++++++++ 3 files changed, 367 insertions(+), 33 deletions(-) create mode 100644 documentation/ARCH__AE_INTEGRATION.md diff --git a/cortex/tools/__init__.py b/cortex/tools/__init__.py index 168ccc1..361cea4 100644 --- a/cortex/tools/__init__.py +++ b/cortex/tools/__init__.py @@ -94,8 +94,9 @@ _ae_journal_list_declaration = types.FunctionDeclaration( _ae_journal_search_declaration = types.FunctionDeclaration( name="ae_journal_search", description=( - "Search the Aether Journals knowledge base by keyword. " - "Use this to look up notes, documentation, meeting summaries, or any saved knowledge. " + "Search Aether Journal entries. All parameters are optional — combine freely. " + "Use 'query' for fulltext keyword search (supports boolean: +required -excluded \"phrase\"). " + "Use 'tags' to filter by tag substring. Use 'date_from'/'date_to' for date ranges (YYYY-MM-DD). " "Always search before creating a new entry to avoid duplicates." ), parameters=types.Schema( @@ -103,21 +104,58 @@ _ae_journal_search_declaration = types.FunctionDeclaration( properties={ "query": types.Schema( type=types.Type.STRING, - description="Keyword or phrase to search for", + description="Fulltext keyword search. Supports boolean mode: +required -excluded \"exact phrase\".", ), "journal_id": types.Schema( type=types.Type.STRING, - description=( - "Optional: scope search to a specific journal by its id_random. " - "Omit to search all journals." - ), + description="Scope results to a specific journal by its id_random. Omit to search all journals.", + ), + "tags": types.Schema( + type=types.Type.STRING, + description="Filter by tag substring (e.g. 'networking' matches entries tagged 'networking' or 'home-networking').", + ), + "type_code": types.Schema( + type=types.Type.STRING, + description="Filter by exact type_code (e.g. 'note', 'meeting', 'log').", + ), + "topic_code": types.Schema( + type=types.Type.STRING, + description="Filter by exact topic_code.", + ), + "date_from": types.Schema( + type=types.Type.STRING, + description="Return entries created on or after this date (YYYY-MM-DD).", + ), + "date_to": types.Schema( + type=types.Type.STRING, + description="Return entries created on or before this date (YYYY-MM-DD).", + ), + "sort_by": types.Schema( + type=types.Type.STRING, + description="Sort field: 'updated' (default), 'created', 'name', or 'priority'.", + ), + "sort_order": types.Schema( + type=types.Type.STRING, + description="Sort direction: 'desc' (default, newest first) or 'asc'.", + ), + "status": types.Schema( + type=types.Type.INTEGER, + description="Filter by exact status code.", + ), + "priority": types.Schema( + type=types.Type.INTEGER, + description="Filter by exact priority (1=low, 5=high).", ), "max_results": types.Schema( type=types.Type.INTEGER, - description="Maximum number of entries to return (default 10)", + description="Number of results per page (default 10).", + ), + "page": types.Schema( + type=types.Type.INTEGER, + description="Page number for pagination (default 1).", ), }, - required=["query"], + required=[], ), ) diff --git a/cortex/tools/ae_knowledge.py b/cortex/tools/ae_knowledge.py index b3a2a5c..abd2b06 100644 --- a/cortex/tools/ae_knowledge.py +++ b/cortex/tools/ae_knowledge.py @@ -41,36 +41,95 @@ def _check_config() -> str | None: # Tool: ae_journal_search # --------------------------------------------------------------------------- -async def journal_search(query: str, journal_id: str | None = None, max_results: int = 10) -> str: - """Search AE Journal entries by keyword. +async def journal_search( + query: str = "", + journal_id: str = "", + tags: str = "", + type_code: str = "", + topic_code: str = "", + date_from: str = "", + date_to: str = "", + sort_by: str = "updated", + sort_order: str = "desc", + status: int | None = None, + priority: int | None = None, + max_results: int = 10, + page: int = 1, +) -> str: + """Search AE Journal entries. - Searches across the default_qry_str field (title + content excerpt). - Optionally scoped to a specific journal by journal_id (id_random). - Returns a markdown-formatted list of matching entries. + At least one of query, tags, type_code, topic_code, date_from, or journal_id + should be provided. All filters combine with AND. """ err = _check_config() if err: return err - - return await asyncio.to_thread(_sync_journal_search, query, journal_id, max_results) + return await asyncio.to_thread( + _sync_journal_search, + query, journal_id, tags, type_code, topic_code, + date_from, date_to, sort_by, sort_order, + status, priority, max_results, page, + ) -def _sync_journal_search(query: str, journal_id: str | None, max_results: int) -> str: +def _sync_journal_search( + query: str, + journal_id: str, + tags: str, + type_code: str, + topic_code: str, + date_from: str, + date_to: str, + sort_by: str, + sort_order: str, + status: int | None, + priority: int | None, + max_results: int, + page: int, +) -> str: import requests - url = f"{settings.ae_api_url}/v3/crud/journal_entry/search" - search_body = { - "and_filters": [ - {"field": "default_qry_str", "op": "icontains", "value": query} - ], - "page_size": max_results, + # Build sort field + sort_field_map = { + "updated": "updated_on", + "created": "created_on", + "name": "name", + "priority": "priority", } + sort_field = sort_field_map.get(sort_by, "updated_on") + order_by = f"{'-' if sort_order == 'desc' else ''}{sort_field}" - params = {} + search_body: dict = {"page_size": max_results, "page": page, "order_by": order_by} + + # Fulltext keyword — uses MATCH/AGAINST index + if query: + search_body["query_string"] = query + + # Additional AND filters + and_filters: list[dict] = [] + if tags: + and_filters.append({"field": "tags", "op": "icontains", "value": tags}) + if type_code: + and_filters.append({"field": "type_code", "op": "eq", "value": type_code}) + if topic_code: + and_filters.append({"field": "topic_code", "op": "eq", "value": topic_code}) + if date_from: + and_filters.append({"field": "created_on", "op": "gte", "value": date_from}) + if date_to: + and_filters.append({"field": "created_on", "op": "lte", "value": date_to}) + if status is not None: + and_filters.append({"field": "status", "op": "eq", "value": status}) + if priority is not None: + and_filters.append({"field": "priority", "op": "eq", "value": priority}) + if and_filters: + search_body["and_filters"] = and_filters + + params: dict = {} if journal_id: params["for_obj_type"] = "journal" params["for_obj_id"] = journal_id + url = f"{settings.ae_api_url}/v3/crud/journal_entry/search" try: resp = requests.post( url, @@ -86,17 +145,23 @@ def _sync_journal_search(query: str, journal_id: str | None, max_results: int) - return f"Journal search error: {e}" entries = data.get("data", []) - if not entries: - return f"No journal entries found matching: {query}" + total = data.get("total") or data.get("count") or len(entries) + + if not entries: + desc = query or tags or type_code or topic_code or f"journal {journal_id}" + return f"No journal entries found for: {desc}" + + label = query or tags or f"{len(entries)} entries" + lines = [f"Journal entries — **{label}** ({total} total, page {page}):\n"] - lines = [f"Journal entries matching **{query}** ({len(entries)} result(s)):\n"] for entry in entries: title = entry.get("name") or "(untitled)" entry_id = entry.get("id_random", "") journal_name = entry.get("journal_name") or entry.get("parent_name") or "" summary = entry.get("summary") or "" - tags = entry.get("tags") or [] - updated = (entry.get("updated_at") or entry.get("created_at") or "")[:10] + entry_tags = entry.get("tags") or [] + updated = (entry.get("updated_on") or entry.get("updated_at") or + entry.get("created_on") or entry.get("created_at") or "")[:10] content_preview = (entry.get("content") or "")[:400].replace("\n", " ") header = f"**{title}**" @@ -106,14 +171,18 @@ def _sync_journal_search(query: str, journal_id: str | None, max_results: int) - if updated: header += f" [{updated}]" lines.append(header) - if tags: - lines.append(f" Tags: {', '.join(tags)}") + if entry_tags: + tag_list = entry_tags if isinstance(entry_tags, list) else [t.strip() for t in str(entry_tags).split(",")] + lines.append(f" Tags: {', '.join(tag_list)}") if summary: - lines.append(f" Summary: {summary}") + lines.append(f" {summary}") elif content_preview: - lines.append(f" {content_preview}{'…' if len(entry.get('content','')) > 400 else ''}") + lines.append(f" {content_preview}{'…' if len(entry.get('content', '')) > 400 else ''}") lines.append("") + if total > page * max_results: + lines.append(f"(More results — call again with page={page + 1})") + return "\n".join(lines).strip() diff --git a/documentation/ARCH__AE_INTEGRATION.md b/documentation/ARCH__AE_INTEGRATION.md new file mode 100644 index 0000000..4fbfd80 --- /dev/null +++ b/documentation/ARCH__AE_INTEGRATION.md @@ -0,0 +1,227 @@ +# Aether Platform Integration — Cortex Tool Layer + +> Last updated: 2026-04-30 +> Status: Active development — Journal toolset being expanded + +This doc covers how Cortex/Inara integrates with the Aether Platform API, what's +implemented, what the data model looks like, and what's planned next. + +--- + +## Overview + +Cortex connects to the Aether Platform V3 API to give the orchestrator read/write +access to the user's knowledge base (Journals) and task data. Auth uses the same +`x-aether-api-key` + `x-account-id` headers as every other Aether client. + +Config lives in `.env`: +``` +AE_API_URL=https://dev-api.oneskyit.com +AE_API_KEY=... +AE_ACCOUNT_ID=... +AE_API_TIMEOUT=15 +``` + +Tool implementation: `cortex/tools/ae_knowledge.py` +Tool registrations: `cortex/tools/__init__.py` + +--- + +## V3 Search Engine + +### Endpoint +``` +POST /v3/crud/{obj_type}/search +``` +For nested objects (journal_entry scoped to a journal): +``` +POST /v3/crud/journal_entry/search + ?for_obj_type=journal&for_obj_id={journal_id} +``` + +### Search body +```json +{ + "query_string": "fulltext search term", + "and_filters": [ + { "field": "tags", "op": "icontains", "value": "networking" }, + { "field": "created_on", "op": "gte", "value": "2026-01-01" } + ], + "or_filters": [...], + "page_size": 20, + "page": 1, + "order_by": "-updated_on" +} +``` + +**`query_string` vs `and_filters` on `default_qry_str`:** +- `query_string` → triggers `MATCH(default_qry_str) AGAINST(... IN BOOLEAN MODE)` — uses the + FULLTEXT index. Faster and supports boolean operators (`+word`, `-word`, `"phrase"`). +- `and_filters` with `icontains` on `default_qry_str` → plain `LIKE '%term%'`. Slower, + no index. The current implementation uses this; should be migrated to `query_string`. + +### Supported operators +| Operator | SQL | Notes | +|---|---|---| +| `eq` | `=` | exact match | +| `ne` | `!=` | not equal | +| `gt` / `gte` | `>` / `>=` | numeric, dates | +| `lt` / `lte` | `<` / `<=` | numeric, dates | +| `contains` / `icontains` | `LIKE '%v%'` | substring; both case-insensitive on MariaDB | +| `startswith` / `istartswith` | `LIKE 'v%'` | | +| `endswith` / `iendswith` | `LIKE '%v'` | | +| `like` | `LIKE` | raw LIKE pattern | +| `in` | `IN (...)` | value is a list | +| `is_null` / `is_not_null` | `IS NULL` / `IS NOT NULL` | no value needed | + +### Sorting +`order_by` accepts any indexed field name. Prefix with `-` for descending: +- `-updated_on` (default for listing) +- `-created_on` +- `name` +- `-priority` + +### Pagination +`page_size` (default 10, max ~100) + `page` (1-based). +Response includes `total` count for pagination UI. + +--- + +## journal_entry Schema + +Full table schema from `ae_describe journal_entry --detailed`: + +| Field | Type | Indexed | Notes | +|---|---|---|---| +| `id_random` | varchar(22) | UNI | Public ID — use for all API calls | +| `journal_id` | int | MUL | FK — use `for_obj_id` param in search | +| `name` | varchar(250) | MUL | Entry title | +| `short_name` | varchar(25) | | | +| `summary` | text | | Short summary (1–2 sentences) | +| `content` | text | | Full markdown content | +| `content_html` | text | | HTML version | +| `content_json` | longtext | | Structured content (editor format) | +| `content_encrypted` | longtext | | Optional encrypted content | +| `tags` | varchar(255) | MUL | Comma-separated string — filter with `icontains` | +| `type` / `type_code` | varchar | | Classification: type | +| `topic` / `topic_code` | varchar | | Classification: topic | +| `activity` / `activity_code` | varchar | | Classification: activity | +| `category_code` | varchar(25) | | Classification: category | +| `code` | varchar(20) | | Short entry code | +| `start_datetime` | datetime | MUL | Optional event start | +| `end_datetime` | datetime | | Optional event end | +| `seconds` / `hours` | int/decimal | | Duration | +| `priority` | tinyint | MUL | 1=low → 5=high | +| `status` | int | MUL | Status code (domain-specific) | +| `private` / `public` / `personal` / `professional` | tinyint | MUL | Visibility flags | +| `billable` | tinyint | | Billing flag | +| `enable` | tinyint NOT NULL | MUL | Soft-delete flag (default 1) | +| `hide` | tinyint | MUL | UI hide flag | +| `archive` | tinyint | MUL | Archived flag | +| `default_qry_str` | text | FULLTEXT | Auto-generated search target (name + content) | +| `data_json` | longtext | | Arbitrary structured data | +| `notes` | text | | Internal notes | +| `created_on` | timestamp NOT NULL | MUL | Auto-set on create | +| `updated_on` | timestamp | MUL | Auto-updated on change | + +### journal Schema (top-level) + +| Field | Type | Notes | +|---|---|---| +| `id_random` | varchar(22) | Public ID | +| `name` | varchar(250) | Journal name | +| `summary` / `description` | text | | +| `type_code` | varchar(25) | Journal type | +| `enable` | tinyint | | +| `created_on` / `updated_on` | timestamp | | + +--- + +## Current Tool Inventory + +| Tool | Status | Notes | +|---|---|---| +| `ae_journal_list` | ✅ | Lists journals with id + name | +| `ae_journal_search` | ✅ improved | Keyword search; switched to fulltext in next pass | +| `ae_journal_entry_read` | ✅ | Full content by entry_id; configurable truncation | +| `ae_journal_entries_list` | ✅ | Browse a journal newest-first; paginated | +| `ae_journal_entry_create` | ✅ | Create with title, content, tags, summary | +| `ae_journal_entry_update` | ✅ | Patch any fields (title, content, tags, summary, enable) | +| `ae_journal_entry_disable` | ✅ | Soft-delete (enable=false) | +| `ae_journal_entry_append` | ✅ | Timestamped append to bottom | +| `ae_journal_entry_prepend` | ✅ | Timestamped prepend to top | +| `ae_task_list` | ✅ | agents_sync Kanban (admin only) | + +--- + +## Planned: Search Improvements + +Current search uses plain `LIKE` on `default_qry_str`. Migration plan: + +1. **Switch to `query_string`** — fulltext MATCH/AGAINST; supports boolean operators +2. **Tag filter** — `icontains` on `tags` field +3. **Date range** — `gte`/`lte` on `created_on` or `updated_on` +4. **Classification filters** — `type_code`, `topic_code`, `category_code` (exact match) +5. **Sort control** — expose `order_by` (updated, created, name, priority) +6. **Status / priority filters** — `eq` on `status`, `priority` +7. **Combined search** — keyword + any filter combination in one call + +Target signature: +```python +ae_journal_search( + query: str = "", # fulltext (query_string path) — optional + journal_id: str = "", # scope to journal + tags: str = "", # icontains on tags field + type_code: str = "", # eq + topic_code: str = "", # eq + date_from: str = "", # created_on gte (YYYY-MM-DD) + date_to: str = "", # created_on lte + sort_by: str = "updated", # updated | created | name | priority + sort_order: str = "desc", + status: int | None, + priority: int | None, + max_results: int = 10, + page: int = 1, +) +``` + +--- + +## Planned: Broader AE Platform Integration + +### Phase 1 — Journal Toolset (current) +Complete read/write/search for Journals and Journal Entries. + +### Phase 2 — Tasks & Projects +- `ae_task_create` / `ae_task_update` / `ae_task_complete` on Aether tasks (not just agents_sync Kanban) +- Read project/task hierarchy + +### Phase 3 — Knowledge Import Pipeline +- Script to walk markdown dirs, chunk by H2, create Journal entries +- Dedup via search-before-create pattern +- Tag and classify entries automatically via orchestrator + +### Phase 4 — People & Contacts +- Read contact records (person, organization) +- Link journal entries to contacts + +### Phase 5 — Calendar / Events +- `start_datetime` / `end_datetime` already on journal_entry +- Could expose time-scoped journal queries as a calendar view + +--- + +## Notes on `tags` field + +`tags` is stored as a raw comma-separated varchar(255), not a JSON array. +The API accepts a Python list on write (the `tags` PATCH key takes a list and the backend joins it). +On read, it comes back as a list in the API response. +For filtering: use `icontains` on `tags` — e.g. `{"field": "tags", "op": "icontains", "value": "networking"}`. +This means a tag search for "net" would match "networking" AND "subnet" — acceptable for now. +True per-tag filtering would require a tags junction table. + +## Notes on `default_qry_str` + +Auto-populated by the backend from `name` + content fields. Do not write to it directly. +FULLTEXT index supports boolean mode: `+required -excluded "exact phrase"`. +The `query_string` key in the search body triggers this path automatically.