feat: usage tracking + knowledge import script

- usage_tracker.py: daily token/call buckets per user (home/{user}/usage.json)
- Hook into local backend (OpenAI usage field) and Gemini API (usage_metadata)
- Claude/Gemini CLI backends produce no structured token data and are not tracked
- Fix CLAUDE.md stale tool count (27 → 39) and refresh tool list
- scripts/import_knowledge.py: walk markdown dirs, chunk by H2, call local LLM
  for summaries, create AE journal entries with path-derived tags; resumable via
  state file; --dry-run and --limit flags for safe testing

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Scott Idem
2026-05-02 20:38:31 -04:00
parent eab92d876d
commit 8d4aa4094c
5 changed files with 524 additions and 3 deletions

View File

@@ -218,6 +218,19 @@ async def _local(system_prompt: str, messages: list[dict], model_cfg: dict | Non
text = data["choices"][0]["message"]["content"]
if not text or not text.strip():
raise RuntimeError("Local model returned an empty response")
usage = data.get("usage") or {}
if usage.get("prompt_tokens") is not None:
import usage_tracker
from persona import _user
asyncio.create_task(usage_tracker.record(
username=_user.get(),
backend="local",
model_name=model,
prompt_tokens=usage.get("prompt_tokens", 0),
completion_tokens=usage.get("completion_tokens", 0),
))
return text.strip()

View File

@@ -26,6 +26,8 @@ from google.genai import types
from config import settings
from llm_client import complete
from tools import TOOL_DECLARATIONS, call_tool, get_tools_for_role, CONFIRM_REQUIRED
import usage_tracker
from persona import _user
logger = logging.getLogger(__name__)
@@ -44,6 +46,25 @@ Keep your summary factual and complete. Include relevant URLs, data, and specifi
If no tools are needed, return an empty summary."""
def _track_gemini_usage(response, model_name: str | None) -> None:
meta = getattr(response, "usage_metadata", None)
if not meta:
return
prompt_tokens = getattr(meta, "prompt_token_count", 0) or 0
completion_tokens = getattr(meta, "candidates_token_count", 0) or 0
if prompt_tokens or completion_tokens:
try:
asyncio.create_task(usage_tracker.record(
username=_user.get(),
backend="gemini_api",
model_name=model_name or settings.orchestrator_model,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
))
except Exception:
pass
@dataclass
class OrchestrateCheckpoint:
"""Saved execution state for a job paused at a confirmation gate."""
@@ -285,6 +306,7 @@ async def _run_from_contents(
system_instruction=_ORCHESTRATOR_SYSTEM,
),
)
_track_gemini_usage(response, model_name)
candidate = response.candidates[0]
parts = candidate.content.parts if candidate.content else []
@@ -348,6 +370,7 @@ async def _run_from_contents(
system_instruction=_ORCHESTRATOR_SYSTEM,
),
)
_track_gemini_usage(conf_response, model_name)
conf_parts = (
conf_response.candidates[0].content.parts
if conf_response.candidates and conf_response.candidates[0].content

75
cortex/usage_tracker.py Normal file
View File

@@ -0,0 +1,75 @@
"""
API usage and token tracking.
Writes daily buckets to home/{username}/usage.json:
{
"2026-05-01": {
"gemini_api/gemini-2.0-flash": {"calls": 3, "prompt_tokens": 8400, "completion_tokens": 520},
"local/llama3.2:latest": {"calls": 2, "prompt_tokens": 1200, "completion_tokens": 310}
}
}
Claude CLI and Gemini CLI backends produce no structured token data and are not tracked.
"""
import asyncio
import json
import logging
from datetime import date
from pathlib import Path
from config import settings
logger = logging.getLogger(__name__)
_LOCK = asyncio.Lock()
def _usage_path(username: str) -> Path:
return settings.home_root() / username / "usage.json"
async def record(
username: str,
backend: str,
model_name: str,
prompt_tokens: int,
completion_tokens: int,
) -> None:
"""Append one call's token counts to the daily usage log for this user.
backend — "gemini_api" | "local"
model_name — the exact model string (e.g. "gemini-2.0-flash", "llama3.2:latest")
"""
path = _usage_path(username)
today = date.today().isoformat()
key = f"{backend}/{model_name}"
async with _LOCK:
try:
data: dict = json.loads(path.read_text()) if path.exists() else {}
except Exception:
data = {}
entry = data.setdefault(today, {}).setdefault(
key, {"calls": 0, "prompt_tokens": 0, "completion_tokens": 0}
)
entry["calls"] += 1
entry["prompt_tokens"] += prompt_tokens
entry["completion_tokens"] += completion_tokens
try:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(data, indent=2))
except Exception as e:
logger.warning("Failed to write usage data to %s: %s", path, e)
def read_usage(username: str) -> dict:
"""Return the full usage dict for this user. Empty dict if no file yet."""
path = _usage_path(username)
try:
return json.loads(path.read_text()) if path.exists() else {}
except Exception:
return {}