feat: usage tracking + knowledge import script

- usage_tracker.py: daily token/call buckets per user (home/{user}/usage.json) - Hook into local backend (OpenAI usage field) and Gemini API (usage_metadata) - Claude/Gemini CLI backends produce no structured token data and are not tracked - Fix CLAUDE.md stale tool count (27 → 39) and refresh tool list - scripts/import_knowledge.py: walk markdown dirs, chunk by H2, call local LLM for summaries, create AE journal entries with path-derived tags; resumable via state file; --dry-run and --limit flags for safe testing Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-02 20:38:31 -04:00
parent eab92d876d
commit 8d4aa4094c
5 changed files with 524 additions and 3 deletions
--- a/cortex/llm_client.py
+++ b/cortex/llm_client.py
@@ -218,6 +218,19 @@ async def _local(system_prompt: str, messages: list[dict], model_cfg: dict | Non
    text = data["choices"][0]["message"]["content"]
    if not text or not text.strip():
        raise RuntimeError("Local model returned an empty response")
+
+    usage = data.get("usage") or {}
+    if usage.get("prompt_tokens") is not None:
+        import usage_tracker
+        from persona import _user
+        asyncio.create_task(usage_tracker.record(
+            username=_user.get(),
+            backend="local",
+            model_name=model,
+            prompt_tokens=usage.get("prompt_tokens", 0),
+            completion_tokens=usage.get("completion_tokens", 0),
+        ))
+
    return text.strip()


--- a/cortex/orchestrator_engine.py
+++ b/cortex/orchestrator_engine.py
@@ -26,6 +26,8 @@ from google.genai import types
 from config import settings
 from llm_client import complete
 from tools import TOOL_DECLARATIONS, call_tool, get_tools_for_role, CONFIRM_REQUIRED
+import usage_tracker
+from persona import _user

 logger = logging.getLogger(__name__)

@@ -44,6 +46,25 @@ Keep your summary factual and complete. Include relevant URLs, data, and specifi
 If no tools are needed, return an empty summary."""


+def _track_gemini_usage(response, model_name: str | None) -> None:
+    meta = getattr(response, "usage_metadata", None)
+    if not meta:
+        return
+    prompt_tokens = getattr(meta, "prompt_token_count", 0) or 0
+    completion_tokens = getattr(meta, "candidates_token_count", 0) or 0
+    if prompt_tokens or completion_tokens:
+        try:
+            asyncio.create_task(usage_tracker.record(
+                username=_user.get(),
+                backend="gemini_api",
+                model_name=model_name or settings.orchestrator_model,
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+            ))
+        except Exception:
+            pass
+
+
@dataclass
 class OrchestrateCheckpoint:
    """Saved execution state for a job paused at a confirmation gate."""
@@ -285,6 +306,7 @@ async def _run_from_contents(
                system_instruction=_ORCHESTRATOR_SYSTEM,
            ),
        )
+        _track_gemini_usage(response, model_name)

        candidate = response.candidates[0]
        parts = candidate.content.parts if candidate.content else []
@@ -348,6 +370,7 @@ async def _run_from_contents(
                    system_instruction=_ORCHESTRATOR_SYSTEM,
                ),
            )
+            _track_gemini_usage(conf_response, model_name)
            conf_parts = (
                conf_response.candidates[0].content.parts
                if conf_response.candidates and conf_response.candidates[0].content
--- a/cortex/usage_tracker.py
+++ b/cortex/usage_tracker.py
@@ -0,0 +1,75 @@
+"""
+API usage and token tracking.
+
+Writes daily buckets to home/{username}/usage.json:
+
+  {
+    "2026-05-01": {
+      "gemini_api/gemini-2.0-flash": {"calls": 3, "prompt_tokens": 8400, "completion_tokens": 520},
+      "local/llama3.2:latest":       {"calls": 2, "prompt_tokens": 1200, "completion_tokens": 310}
+    }
+  }
+
+Claude CLI and Gemini CLI backends produce no structured token data and are not tracked.
+"""
+
+import asyncio
+import json
+import logging
+from datetime import date
+from pathlib import Path
+
+from config import settings
+
+logger = logging.getLogger(__name__)
+
+_LOCK = asyncio.Lock()
+
+
+def _usage_path(username: str) -> Path:
+    return settings.home_root() / username / "usage.json"
+
+
+async def record(
+    username: str,
+    backend: str,
+    model_name: str,
+    prompt_tokens: int,
+    completion_tokens: int,
+) -> None:
+    """Append one call's token counts to the daily usage log for this user.
+
+    backend    — "gemini_api" | "local"
+    model_name — the exact model string (e.g. "gemini-2.0-flash", "llama3.2:latest")
+    """
+    path = _usage_path(username)
+    today = date.today().isoformat()
+    key = f"{backend}/{model_name}"
+
+    async with _LOCK:
+        try:
+            data: dict = json.loads(path.read_text()) if path.exists() else {}
+        except Exception:
+            data = {}
+
+        entry = data.setdefault(today, {}).setdefault(
+            key, {"calls": 0, "prompt_tokens": 0, "completion_tokens": 0}
+        )
+        entry["calls"] += 1
+        entry["prompt_tokens"] += prompt_tokens
+        entry["completion_tokens"] += completion_tokens
+
+        try:
+            path.parent.mkdir(parents=True, exist_ok=True)
+            path.write_text(json.dumps(data, indent=2))
+        except Exception as e:
+            logger.warning("Failed to write usage data to %s: %s", path, e)
+
+
+def read_usage(username: str) -> dict:
+    """Return the full usage dict for this user. Empty dict if no file yet."""
+    path = _usage_path(username)
+    try:
+        return json.loads(path.read_text()) if path.exists() else {}
+    except Exception:
+        return {}