feat: usage tracking + knowledge import script

- usage_tracker.py: daily token/call buckets per user (home/{user}/usage.json) - Hook into local backend (OpenAI usage field) and Gemini API (usage_metadata) - Claude/Gemini CLI backends produce no structured token data and are not tracked - Fix CLAUDE.md stale tool count (27 → 39) and refresh tool list - scripts/import_knowledge.py: walk markdown dirs, chunk by H2, call local LLM for summaries, create AE journal entries with path-derived tags; resumable via state file; --dry-run and --limit flags for safe testing Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-02 20:38:31 -04:00
parent eab92d876d
commit 8d4aa4094c
5 changed files with 524 additions and 3 deletions
--- a/cortex/llm_client.py
+++ b/cortex/llm_client.py
@@ -218,6 +218,19 @@ async def _local(system_prompt: str, messages: list[dict], model_cfg: dict | Non
    text = data["choices"][0]["message"]["content"]
    if not text or not text.strip():
        raise RuntimeError("Local model returned an empty response")
+
+    usage = data.get("usage") or {}
+    if usage.get("prompt_tokens") is not None:
+        import usage_tracker
+        from persona import _user
+        asyncio.create_task(usage_tracker.record(
+            username=_user.get(),
+            backend="local",
+            model_name=model,
+            prompt_tokens=usage.get("prompt_tokens", 0),
+            completion_tokens=usage.get("completion_tokens", 0),
+        ))
+
    return text.strip()