feat: usage tracking + knowledge import script
- usage_tracker.py: daily token/call buckets per user (home/{user}/usage.json)
- Hook into local backend (OpenAI usage field) and Gemini API (usage_metadata)
- Claude/Gemini CLI backends produce no structured token data and are not tracked
- Fix CLAUDE.md stale tool count (27 → 39) and refresh tool list
- scripts/import_knowledge.py: walk markdown dirs, chunk by H2, call local LLM
for summaries, create AE journal entries with path-derived tags; resumable via
state file; --dry-run and --limit flags for safe testing
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -218,6 +218,19 @@ async def _local(system_prompt: str, messages: list[dict], model_cfg: dict | Non
|
||||
text = data["choices"][0]["message"]["content"]
|
||||
if not text or not text.strip():
|
||||
raise RuntimeError("Local model returned an empty response")
|
||||
|
||||
usage = data.get("usage") or {}
|
||||
if usage.get("prompt_tokens") is not None:
|
||||
import usage_tracker
|
||||
from persona import _user
|
||||
asyncio.create_task(usage_tracker.record(
|
||||
username=_user.get(),
|
||||
backend="local",
|
||||
model_name=model,
|
||||
prompt_tokens=usage.get("prompt_tokens", 0),
|
||||
completion_tokens=usage.get("completion_tokens", 0),
|
||||
))
|
||||
|
||||
return text.strip()
|
||||
|
||||
|
||||
|
||||
@@ -26,6 +26,8 @@ from google.genai import types
|
||||
from config import settings
|
||||
from llm_client import complete
|
||||
from tools import TOOL_DECLARATIONS, call_tool, get_tools_for_role, CONFIRM_REQUIRED
|
||||
import usage_tracker
|
||||
from persona import _user
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -44,6 +46,25 @@ Keep your summary factual and complete. Include relevant URLs, data, and specifi
|
||||
If no tools are needed, return an empty summary."""
|
||||
|
||||
|
||||
def _track_gemini_usage(response, model_name: str | None) -> None:
|
||||
meta = getattr(response, "usage_metadata", None)
|
||||
if not meta:
|
||||
return
|
||||
prompt_tokens = getattr(meta, "prompt_token_count", 0) or 0
|
||||
completion_tokens = getattr(meta, "candidates_token_count", 0) or 0
|
||||
if prompt_tokens or completion_tokens:
|
||||
try:
|
||||
asyncio.create_task(usage_tracker.record(
|
||||
username=_user.get(),
|
||||
backend="gemini_api",
|
||||
model_name=model_name or settings.orchestrator_model,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class OrchestrateCheckpoint:
|
||||
"""Saved execution state for a job paused at a confirmation gate."""
|
||||
@@ -285,6 +306,7 @@ async def _run_from_contents(
|
||||
system_instruction=_ORCHESTRATOR_SYSTEM,
|
||||
),
|
||||
)
|
||||
_track_gemini_usage(response, model_name)
|
||||
|
||||
candidate = response.candidates[0]
|
||||
parts = candidate.content.parts if candidate.content else []
|
||||
@@ -348,6 +370,7 @@ async def _run_from_contents(
|
||||
system_instruction=_ORCHESTRATOR_SYSTEM,
|
||||
),
|
||||
)
|
||||
_track_gemini_usage(conf_response, model_name)
|
||||
conf_parts = (
|
||||
conf_response.candidates[0].content.parts
|
||||
if conf_response.candidates and conf_response.candidates[0].content
|
||||
|
||||
75
cortex/usage_tracker.py
Normal file
75
cortex/usage_tracker.py
Normal file
@@ -0,0 +1,75 @@
|
||||
"""
|
||||
API usage and token tracking.
|
||||
|
||||
Writes daily buckets to home/{username}/usage.json:
|
||||
|
||||
{
|
||||
"2026-05-01": {
|
||||
"gemini_api/gemini-2.0-flash": {"calls": 3, "prompt_tokens": 8400, "completion_tokens": 520},
|
||||
"local/llama3.2:latest": {"calls": 2, "prompt_tokens": 1200, "completion_tokens": 310}
|
||||
}
|
||||
}
|
||||
|
||||
Claude CLI and Gemini CLI backends produce no structured token data and are not tracked.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
|
||||
from config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_LOCK = asyncio.Lock()
|
||||
|
||||
|
||||
def _usage_path(username: str) -> Path:
|
||||
return settings.home_root() / username / "usage.json"
|
||||
|
||||
|
||||
async def record(
|
||||
username: str,
|
||||
backend: str,
|
||||
model_name: str,
|
||||
prompt_tokens: int,
|
||||
completion_tokens: int,
|
||||
) -> None:
|
||||
"""Append one call's token counts to the daily usage log for this user.
|
||||
|
||||
backend — "gemini_api" | "local"
|
||||
model_name — the exact model string (e.g. "gemini-2.0-flash", "llama3.2:latest")
|
||||
"""
|
||||
path = _usage_path(username)
|
||||
today = date.today().isoformat()
|
||||
key = f"{backend}/{model_name}"
|
||||
|
||||
async with _LOCK:
|
||||
try:
|
||||
data: dict = json.loads(path.read_text()) if path.exists() else {}
|
||||
except Exception:
|
||||
data = {}
|
||||
|
||||
entry = data.setdefault(today, {}).setdefault(
|
||||
key, {"calls": 0, "prompt_tokens": 0, "completion_tokens": 0}
|
||||
)
|
||||
entry["calls"] += 1
|
||||
entry["prompt_tokens"] += prompt_tokens
|
||||
entry["completion_tokens"] += completion_tokens
|
||||
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(data, indent=2))
|
||||
except Exception as e:
|
||||
logger.warning("Failed to write usage data to %s: %s", path, e)
|
||||
|
||||
|
||||
def read_usage(username: str) -> dict:
|
||||
"""Return the full usage dict for this user. Empty dict if no file yet."""
|
||||
path = _usage_path(username)
|
||||
try:
|
||||
return json.loads(path.read_text()) if path.exists() else {}
|
||||
except Exception:
|
||||
return {}
|
||||
Reference in New Issue
Block a user