""" File read/write/search tools — two access scopes. Project scope (no admin required): project_file_read — read a file with optional line-range (offset) project_file_list — list a directory with sizes + timestamps file_stat — size, modified time, line count for a path file_grep — regex search with context lines; up to 50 matches file_syntax_check — py_compile (.py) or json.loads (.json) check System scope (admin-only): file_read — read a file from ~/agents_sync/, ~/OSIT_dev/, etc. file_list — list a directory (same roots) file_write — write/append (~/agents_sync/ + Cortex home/) Session tools (user-level, persona-isolated): session_read — read a session log by date session_search — keyword search across session logs All project-scope tools are restricted to the Cortex project root: ~/agents_sync/projects/Cortex_and_Inara_dev/ """ import asyncio import json import logging import re import subprocess from datetime import datetime from pathlib import Path from google.genai import types logger = logging.getLogger(__name__) # ── Access roots ────────────────────────────────────────────────────────────── # Project root: two levels up from cortex/tools/files.py → Cortex_and_Inara_dev/ _PROJECT_ROOT: Path = Path(__file__).parent.parent.parent.resolve() # System-wide read roots def _build_allowed_roots() -> list[Path]: roots = [ Path.home() / "agents_sync", Path.home() / "OSIT_dev", Path.home() / "DgrZone_Nextcloud", Path.home() / "OSIT_Nextcloud", ] try: from config import settings roots.append(settings.home_root()) except Exception: pass return roots _ALLOWED_ROOTS: list[Path] = _build_allowed_roots() # Write is tighter _WRITE_ROOTS: list[Path] = [Path.home() / "agents_sync"] # Size limits _MAX_BYTES = 50_000 _MAX_LINES = 500 _MAX_GREP_MATCHES = 50 def _is_project_allowed(resolved: Path) -> bool: try: resolved.relative_to(_PROJECT_ROOT) return True except ValueError: return False def _is_allowed(resolved: Path) -> bool: for root in _ALLOWED_ROOTS: try: resolved.relative_to(root) return True except ValueError: continue return False def _is_write_allowed(resolved: Path) -> bool: for root in _WRITE_ROOTS: try: resolved.relative_to(root) return True except ValueError: continue try: from config import settings resolved.relative_to(settings.home_root()) return True except (ValueError, Exception): pass return False # ── Shared implementations ──────────────────────────────────────────────────── def _read_impl(path_str: str, offset: int | None, max_lines: int | None, is_allowed_fn) -> str: try: resolved = Path(path_str).expanduser().resolve() except Exception as e: return f"Invalid path: {e}" if not is_allowed_fn(resolved): return f"Access denied: {resolved}" if not resolved.exists(): return f"File not found: {resolved}" if not resolved.is_file(): try: entries = sorted(resolved.iterdir()) names = [e.name + ("/" if e.is_dir() else "") for e in entries[:100]] return f"Directory listing for {resolved}:\n" + "\n".join(names) except Exception as e: return f"Cannot list directory: {e}" try: raw = resolved.read_bytes() except Exception as e: return f"Read error: {e}" try: text = raw.decode("utf-8") except UnicodeDecodeError: return f"Binary file (not readable as text): {resolved} [{len(raw)} bytes]" all_lines = text.splitlines() total = len(all_lines) # offset is 1-based; default = start of file start = max(0, (offset or 1) - 1) working = all_lines[start:] limit = min(max_lines or _MAX_LINES, _MAX_LINES) truncated = False if len(working) > limit: working = working[:limit] truncated = True result = "\n".join(working) if len(result) > _MAX_BYTES: result = result[:_MAX_BYTES] truncated = True end_line = start + len(working) header = f"[Lines {start + 1}–{end_line} of {total}]\n" if (start > 0 or truncated) else "" trailer = f"\n\n… [truncated — file has {total} lines; use offset={end_line + 1} to read more]" if truncated else "" return header + result + trailer def _list_impl(path_str: str, is_allowed_fn) -> str: try: resolved = Path(path_str).expanduser().resolve() except Exception as e: return f"Invalid path: {e}" if not is_allowed_fn(resolved): return f"Access denied: {resolved}" if not resolved.exists(): return f"Path not found: {resolved}" if resolved.is_file(): return f"{resolved} is a file. Use file_read / project_file_read to read it." try: entries = sorted(resolved.iterdir(), key=lambda e: (e.is_file(), e.name.lower())) lines = [] for e in entries[:200]: if e.is_dir(): suffix = "/" else: try: st = e.stat() mtime = datetime.fromtimestamp(st.st_mtime).strftime("%Y-%m-%d %H:%M") suffix = f" ({st.st_size:,} B, {mtime})" except Exception: suffix = "" lines.append(f"{e.name}{suffix}") result = "\n".join(lines) if len(entries) > 200: result += f"\n… ({len(entries) - 200} more not shown)" return f"Contents of {resolved}:\n\n{result}" except Exception as e: return f"Cannot list directory: {e}" # ── Project-scoped tools ────────────────────────────────────────────────────── async def project_file_read(path: str, offset: int | None = None, max_lines: int | None = None) -> str: """Read a file within the Cortex project directory, with optional line range.""" return await asyncio.to_thread(_read_impl, path, offset, max_lines, _is_project_allowed) async def project_file_list(path: str) -> str: """List directory contents within the Cortex project directory, with sizes and timestamps.""" return await asyncio.to_thread(_list_impl, path, _is_project_allowed) async def file_stat(path: str) -> str: """Return metadata for a file or directory: type, size, modified time, line count.""" return await asyncio.to_thread(_sync_file_stat, path) def _sync_file_stat(path_str: str) -> str: try: resolved = Path(path_str).expanduser().resolve() except Exception as e: return f"Invalid path: {e}" if not _is_project_allowed(resolved): return f"Access denied: {resolved}\nProject root: {_PROJECT_ROOT}" if not resolved.exists(): return f"Path not found: {resolved}" try: st = resolved.stat() except Exception as e: return f"Cannot stat: {e}" modified = datetime.fromtimestamp(st.st_mtime).strftime("%Y-%m-%d %H:%M:%S") lines = [ f"Path: {resolved}", f"Type: {'directory' if resolved.is_dir() else 'file'}", f"Size: {st.st_size:,} bytes", f"Modified: {modified}", ] if resolved.is_file(): try: raw = resolved.read_bytes() if b'\x00' not in raw[:1024]: lines.append(f"Lines: {len(raw.decode('utf-8', errors='replace').splitlines())}") except Exception: pass elif resolved.is_dir(): try: entries = list(resolved.iterdir()) n_files = sum(1 for e in entries if e.is_file()) n_dirs = sum(1 for e in entries if e.is_dir()) lines.append(f"Contents: {n_files} file(s), {n_dirs} subdirector{'y' if n_dirs == 1 else 'ies'}") except Exception: pass return "\n".join(lines) async def file_grep(path: str, pattern: str, context_lines: int = 2, recursive: bool = True) -> str: """Search for a regex pattern in a file or directory, returning matching lines with context.""" return await asyncio.to_thread(_sync_file_grep, path, pattern, context_lines, recursive) def _sync_file_grep(path_str: str, pattern: str, context_lines: int, recursive: bool) -> str: try: resolved = Path(path_str).expanduser().resolve() except Exception as e: return f"Invalid path: {e}" if not _is_project_allowed(resolved): return f"Access denied: {resolved}\nProject root: {_PROJECT_ROOT}" if not resolved.exists(): return f"Path not found: {resolved}" try: regex = re.compile(pattern, re.IGNORECASE) except re.error as e: return f"Invalid regex pattern: {e}" ctx = max(0, min(context_lines, 5)) if resolved.is_file(): files_to_search = [resolved] elif recursive: files_to_search = sorted(f for f in resolved.rglob("*") if f.is_file()) else: files_to_search = sorted(f for f in resolved.iterdir() if f.is_file()) total_matches = 0 sections: list[str] = [] capped = False for fp in files_to_search: if total_matches >= _MAX_GREP_MATCHES: capped = True break try: raw = fp.read_bytes() except OSError: continue if b'\x00' in raw[:1024]: continue # skip binary try: text = raw.decode("utf-8", errors="replace") except Exception: continue file_lines = text.splitlines() match_indices = [i for i, line in enumerate(file_lines) if regex.search(line)] if not match_indices: continue total_matches += len(match_indices) try: label = str(fp.relative_to(_PROJECT_ROOT)) except ValueError: label = str(fp) file_output = [f"── {label} ──"] printed: set[int] = set() for mi in match_indices: start = max(0, mi - ctx) end = min(len(file_lines), mi + ctx + 1) if printed and start > max(printed) + 1: file_output.append(" ···") for j in range(start, end): if j not in printed: marker = "►" if j == mi else " " file_output.append(f" {j + 1:4d}{marker} {file_lines[j]}") printed.add(j) sections.append("\n".join(file_output)) if not sections: return f"No matches for '{pattern}' in {resolved}" cap_note = f" (capped at {_MAX_GREP_MATCHES})" if capped else "" header = f"grep '{pattern}' — {total_matches} match(es){cap_note}:" return header + "\n\n" + "\n\n".join(sections) async def file_syntax_check(path: str) -> str: """Check syntax of a Python (.py) or JSON (.json) file.""" return await asyncio.to_thread(_sync_file_syntax_check, path) def _sync_file_syntax_check(path_str: str) -> str: try: resolved = Path(path_str).expanduser().resolve() except Exception as e: return f"Invalid path: {e}" if not _is_project_allowed(resolved): return f"Access denied: {resolved}\nProject root: {_PROJECT_ROOT}" if not resolved.exists(): return f"File not found: {resolved}" if not resolved.is_file(): return f"Not a file: {resolved}" suffix = resolved.suffix.lower() if suffix == ".py": try: result = subprocess.run( ["python3", "-m", "py_compile", str(resolved)], capture_output=True, text=True, timeout=15, ) if result.returncode == 0: return f"OK — {resolved.name}: syntax valid" err = (result.stderr or result.stdout).strip() return f"Syntax error in {resolved.name}:\n{err}" except subprocess.TimeoutExpired: return f"Timeout running py_compile on {resolved.name}" except Exception as e: return f"Error: {e}" elif suffix == ".json": try: text = resolved.read_text(encoding="utf-8") json.loads(text) return f"OK — {resolved.name}: valid JSON" except json.JSONDecodeError as e: return f"JSON error in {resolved.name}: {e}" except Exception as e: return f"Error reading {resolved.name}: {e}" else: return f"Syntax check not supported for '{suffix}' files. Supported: .py, .json" # ── System-scoped tools ─────────────────────────────────────────────────────── async def file_read(path: str, offset: int | None = None, max_lines: int | None = None) -> str: """Read a local file from the broader system. Allowed: ~/agents_sync/, ~/OSIT_dev/, etc. ADMIN ONLY.""" return await asyncio.to_thread(_read_impl, path, offset, max_lines, _is_allowed) async def file_list(path: str) -> str: """List directory contents from the broader system. ADMIN ONLY.""" return await asyncio.to_thread(_list_impl, path, _is_allowed) async def file_write(path: str, content: str, mode: str = "overwrite") -> str: """Write or append content to a file. Write roots: ~/agents_sync/ and Cortex home/. ADMIN ONLY.""" return await asyncio.to_thread(_sync_file_write, path, content, mode) def _sync_file_write(path: str, content: str, mode: str) -> str: try: resolved = Path(path).expanduser().resolve() except Exception as e: return f"Invalid path: {e}" if not _is_write_allowed(resolved): return ( f"Write access denied: {resolved}\n" f"Allowed write roots: ~/agents_sync/ and the Cortex home/ directory." ) if mode not in ("overwrite", "append"): return f"Invalid mode '{mode}' — use 'overwrite' or 'append'." try: resolved.parent.mkdir(parents=True, exist_ok=True) if mode == "append": with resolved.open("a", encoding="utf-8") as f: f.write(content) return f"Appended {len(content)} chars to {resolved}" else: resolved.write_text(content, encoding="utf-8") return f"Wrote {len(content)} chars to {resolved}" except Exception as e: logger.error("file_write error for %s: %s", resolved, e) return f"Write error: {e}" # ── Session tools ───────────────────────────────────────────────────────────── _SEARCH_EXCERPT_CHARS = 150 async def session_read(date: str) -> str: """Read a full session log by date (YYYY-MM-DD).""" return await asyncio.to_thread(_sync_session_read, date.strip()) def _sync_session_read(date: str) -> str: from persona import persona_path sessions_dir = persona_path() / "sessions" if not sessions_dir.exists(): return "No session logs found." target = sessions_dir / f"{date}.md" if target.exists(): content = target.read_text() return f"Session log for {date} ({len(content)} chars):\n\n{content}" available = sorted([f.stem for f in sessions_dir.glob("*.md")], reverse=True) if not available: return "No session logs found." recent = "\n".join(f" {d}" for d in available[:15]) return f"No session log found for '{date}'. Available dates (most recent first):\n{recent}" async def session_search(query: str, limit: int = 5) -> str: """Search past session logs for a keyword or phrase.""" return await asyncio.to_thread(_sync_session_search, query, limit) def _sync_session_search(query: str, limit: int) -> str: from persona import persona_path sessions_dir = persona_path() / "sessions" if not sessions_dir.exists(): return "No session logs found." limit = max(1, min(limit, 20)) pattern = re.compile(re.escape(query), re.IGNORECASE) session_files = sorted(sessions_dir.glob("*.md"), reverse=True) matches = [] for sf in session_files: if len(matches) >= limit: break try: text = sf.read_text() except OSError: continue for m in pattern.finditer(text): if len(matches) >= limit: break start = max(0, m.start() - _SEARCH_EXCERPT_CHARS) end = min(len(text), m.end() + _SEARCH_EXCERPT_CHARS) excerpt = text[start:end].strip() if start > 0: excerpt = "…" + excerpt if end < len(text): excerpt = excerpt + "…" matches.append(f"[{sf.stem}] {excerpt}") if not matches: return f"No matches for '{query}' across {len(session_files)} session logs." header = f"Session search: '{query}' — {len(matches)} match(es) across {len(session_files)} logs\n" return header + "\n\n".join(matches) # ── Declarations ────────────────────────────────────────────────────────────── DECLARATIONS = [ # Project-scoped types.FunctionDeclaration( name="project_file_read", description=( "Read a file within the Cortex project directory (source code, docs, config, persona files). " "Supports reading a specific line range via offset — use to page through large files " "without re-reading from the top. If given a directory path, returns a listing instead. " "Project root: ~/agents_sync/projects/Cortex_and_Inara_dev/" ), parameters=types.Schema( type=types.Type.OBJECT, properties={ "path": types.Schema( type=types.Type.STRING, description="Absolute or ~/... path to the file", ), "offset": types.Schema( type=types.Type.INTEGER, description="Start reading from this line number (1-based). Omit to read from the top.", ), "max_lines": types.Schema( type=types.Type.INTEGER, description="Maximum lines to return (default 500)", ), }, required=["path"], ), ), types.FunctionDeclaration( name="project_file_list", description=( "List files and subdirectories within the Cortex project directory. " "Shows file sizes and modified timestamps. " "Project root: ~/agents_sync/projects/Cortex_and_Inara_dev/" ), parameters=types.Schema( type=types.Type.OBJECT, properties={ "path": types.Schema( type=types.Type.STRING, description="Absolute or ~/... path to the directory", ), }, required=["path"], ), ), types.FunctionDeclaration( name="file_stat", description=( "Get metadata for a file or directory: type, size, modified timestamp, line count (for text files) " "or entry counts (for directories). Use before reading to check recency or size. " "Restricted to the Cortex project directory." ), parameters=types.Schema( type=types.Type.OBJECT, properties={ "path": types.Schema( type=types.Type.STRING, description="Absolute or ~/... path to the file or directory", ), }, required=["path"], ), ), types.FunctionDeclaration( name="file_grep", description=( "Search for a regex pattern in a file or directory, returning matching lines with surrounding " "context. Much more efficient than reading an entire source file — use this to find function " "definitions, variable names, TODO comments, imports, error strings, etc. " "Searches recursively by default. Capped at 50 matches. Skips binary files. " "Case-insensitive. Restricted to the Cortex project directory." ), parameters=types.Schema( type=types.Type.OBJECT, properties={ "path": types.Schema( type=types.Type.STRING, description="File or directory to search (e.g. ~/agents_sync/projects/Cortex_and_Inara_dev/cortex/)", ), "pattern": types.Schema( type=types.Type.STRING, description="Regex pattern to search for (case-insensitive). Examples: 'def ha_', 'import httpx', 'TODO'", ), "context_lines": types.Schema( type=types.Type.INTEGER, description="Lines of context before/after each match (default 2, max 5)", ), "recursive": types.Schema( type=types.Type.BOOLEAN, description="Search subdirectories recursively (default true)", ), }, required=["path", "pattern"], ), ), types.FunctionDeclaration( name="file_syntax_check", description=( "Check the syntax of a Python (.py) or JSON (.json) file without executing it. " "Returns OK or the error with line number. " "Use after editing a file before restarting Cortex. " "Restricted to the Cortex project directory." ), parameters=types.Schema( type=types.Type.OBJECT, properties={ "path": types.Schema( type=types.Type.STRING, description="Path to the .py or .json file to check", ), }, required=["path"], ), ), # System-scoped types.FunctionDeclaration( name="file_read", description=( "Read a local file from the broader system (~/agents_sync/, ~/OSIT_dev/, ~/DgrZone_Nextcloud/, " "~/OSIT_Nextcloud/, Cortex home/). Supports offset for reading specific line ranges. " "For files within the Cortex project, prefer project_file_read instead. " "ADMIN ONLY." ), parameters=types.Schema( type=types.Type.OBJECT, properties={ "path": types.Schema( type=types.Type.STRING, description="Absolute or ~/... path to the file", ), "offset": types.Schema( type=types.Type.INTEGER, description="Start reading from this line number (1-based)", ), "max_lines": types.Schema( type=types.Type.INTEGER, description="Maximum lines to return (default 500)", ), }, required=["path"], ), ), types.FunctionDeclaration( name="file_list", description=( "List files and subdirectories from the broader system. " "Shows sizes and modified timestamps. " "Allowed: ~/agents_sync/, ~/OSIT_dev/, ~/DgrZone_Nextcloud/, ~/OSIT_Nextcloud/. " "ADMIN ONLY." ), parameters=types.Schema( type=types.Type.OBJECT, properties={ "path": types.Schema( type=types.Type.STRING, description="Absolute or ~/... path to the directory", ), }, required=["path"], ), ), types.FunctionDeclaration( name="file_write", description=( "Write or append content to a file. " "Write-allowed paths: ~/agents_sync/ and the Cortex home/ directory. " "Creates parent directories if needed. " "ADMIN ONLY. Requires user confirmation before executing." ), parameters=types.Schema( type=types.Type.OBJECT, properties={ "path": types.Schema( type=types.Type.STRING, description="Absolute or ~/... path to write to", ), "content": types.Schema( type=types.Type.STRING, description="Content to write", ), "mode": types.Schema( type=types.Type.STRING, description="'overwrite' (default, replaces file) or 'append' (adds to end)", ), }, required=["path", "content"], ), ), types.FunctionDeclaration( name="session_read", description=( "Read a full conversation session log by date (YYYY-MM-DD). " "Useful for continuity and recalling past decisions. " "If the date is not found, lists available dates. " "Only reads this user's own sessions." ), parameters=types.Schema( type=types.Type.OBJECT, properties={ "date": types.Schema( type=types.Type.STRING, description="Date in YYYY-MM-DD format (e.g. '2026-05-08')", ), }, required=["date"], ), ), types.FunctionDeclaration( name="session_search", description=( "Search past conversation session logs for a keyword or phrase. " "Returns matching excerpts with session dates, newest first. " "Only searches this user's own sessions." ), parameters=types.Schema( type=types.Type.OBJECT, properties={ "query": types.Schema( type=types.Type.STRING, description="Keyword or phrase to search for", ), "limit": types.Schema( type=types.Type.INTEGER, description="Max results to return (default 5, max 20)", ), }, required=["query"], ), ), ]