diff --git a/cortex/requirements.txt b/cortex/requirements.txt
index 816a287..fd2be4a 100644
--- a/cortex/requirements.txt
+++ b/cortex/requirements.txt
@@ -19,6 +19,9 @@ python-multipart>=0.0.9   # required by FastAPI for Form() data
 # Async HTTP client — used for local OpenAI-compatible backend (Open WebUI / Ollama)
 httpx>=0.27.0
 
+# Web content extraction — strips ads/nav/boilerplate, returns clean article text
+trafilatura>=1.6.0
+
 # OpenAI-compatible client — tool calling for OpenRouter / LiteLLM / any OAI-compat host
 openai>=1.0.0
 
diff --git a/cortex/tools/__init__.py b/cortex/tools/__init__.py
index aa9ac00..e4d1e0a 100644
--- a/cortex/tools/__init__.py
+++ b/cortex/tools/__init__.py
@@ -17,7 +17,7 @@ from google.genai import types
 
 # ── Callable imports ──────────────────────────────────────────────────────────
 
-from tools.web import search as _web_search, http_fetch as _http_fetch
+from tools.web import search as _web_search, http_fetch as _http_fetch, web_read as _web_read
 from tools.ae_knowledge import (
     journal_list         as _ae_journal_list,
     journal_search       as _ae_journal_search,
@@ -30,7 +30,7 @@ from tools.ae_knowledge import (
     journal_entry_prepend as _ae_journal_entry_prepend,
 )
 from tools.ae_tasks import task_list as _ae_task_list
-from tools.files import file_read as _file_read, file_list as _file_list, file_write as _file_write, session_search as _session_search
+from tools.files import file_read as _file_read, file_list as _file_list, file_write as _file_write, session_search as _session_search, session_read as _session_read
 from tools.system import (
     shell_exec      as _shell_exec,
     claude_allow_dir as _claude_allow_dir,
@@ -90,8 +90,8 @@ import tools.agents       as _mod_agents
 # ── Tool categories — used by the Model Registry UI for grouped checkboxes ───
 
 TOOL_CATEGORIES: dict[str, list[str]] = {
-    "Web":              ["web_search", "http_fetch"],
-    "Files":            ["file_read", "file_list", "file_write", "session_search"],
+    "Web":              ["web_search", "http_fetch", "web_read"],
+    "Files":            ["file_read", "file_list", "file_write", "session_read", "session_search"],
     "Shell":            ["shell_exec", "claude_allow_dir"],
     "System":           ["cortex_restart", "cortex_logs", "cortex_status", "cortex_update"],
     "Tasks":            ["task_list", "task_create", "task_update", "task_complete"],
@@ -116,6 +116,7 @@ TOOL_CATEGORIES: dict[str, list[str]] = {
 _CALLABLES: dict[str, callable] = {
     "web_search":                _web_search,
     "http_fetch":                _http_fetch,
+    "web_read":                  _web_read,
     "ae_journal_list":           _ae_journal_list,
     "ae_journal_search":         _ae_journal_search,
     "ae_journal_entry_read":     _ae_journal_entry_read,
@@ -129,6 +130,7 @@ _CALLABLES: dict[str, callable] = {
     "file_read":                 _file_read,
     "file_list":                 _file_list,
     "file_write":                _file_write,
+    "session_read":              _session_read,
     "session_search":            _session_search,
     "shell_exec":                _shell_exec,
     "claude_allow_dir":          _claude_allow_dir,
diff --git a/cortex/tools/files.py b/cortex/tools/files.py
index 198c613..6493ac1 100644
--- a/cortex/tools/files.py
+++ b/cortex/tools/files.py
@@ -230,6 +230,34 @@ def _sync_file_write(path: str, content: str, mode: str) -> str:
 _SEARCH_EXCERPT_CHARS = 150
 
 
+async def session_read(date: str) -> str:
+    """Read a full session log by date (YYYY-MM-DD).
+
+    Returns the complete session log for that date. If the date is not found,
+    lists the most recent available dates instead.
+    Only reads the current user's own sessions (per-persona isolation via ContextVars).
+    """
+    return await asyncio.to_thread(_sync_session_read, date.strip())
+
+
+def _sync_session_read(date: str) -> str:
+    from persona import persona_path
+    sessions_dir = persona_path() / "sessions"
+    if not sessions_dir.exists():
+        return "No session logs found."
+
+    target = sessions_dir / f"{date}.md"
+    if target.exists():
+        content = target.read_text()
+        return f"Session log for {date} ({len(content)} chars):\n\n{content}"
+
+    available = sorted([f.stem for f in sessions_dir.glob("*.md")], reverse=True)
+    if not available:
+        return "No session logs found."
+    recent = "\n".join(f"  {d}" for d in available[:15])
+    return f"No session log found for '{date}'. Available dates (most recent first):\n{recent}"
+
+
 async def session_search(query: str, limit: int = 5) -> str:
     """Search past session logs for a keyword or phrase.
 
@@ -329,6 +357,22 @@ DECLARATIONS = [
             required=["path", "content"],
         ),
     ),
+    types.FunctionDeclaration(
+        name="session_read",
+        description=(
+            "Read a full session log by date (YYYY-MM-DD). Returns the complete conversation "
+            "from that session — useful for continuity, recalling decisions, or reviewing "
+            "what was discussed on a specific day. If the date is not found, lists available dates. "
+            "Only reads this user's own sessions."
+        ),
+        parameters=types.Schema(
+            type=types.Type.OBJECT,
+            properties={
+                "date": types.Schema(type=types.Type.STRING, description="Date in YYYY-MM-DD format (e.g. '2026-05-08')"),
+            },
+            required=["date"],
+        ),
+    ),
     types.FunctionDeclaration(
         name="session_search",
         description=(
diff --git a/cortex/tools/web.py b/cortex/tools/web.py
index d9c78db..a553620 100644
--- a/cortex/tools/web.py
+++ b/cortex/tools/web.py
@@ -1,5 +1,5 @@
 """
-Web tools — search (DuckDuckGo) and direct HTTP fetch.
+Web tools — search (DuckDuckGo), direct HTTP fetch, and clean content extraction.
 """
 
 import asyncio
@@ -56,20 +56,25 @@ async def http_fetch(
     method: str = "GET",
     body: str | None = None,
     timeout: int = 15,
+    max_chars: int = 8192,
 ) -> str:
-    """Fetch a URL directly and return the response body.
+    """Fetch a URL directly and return the raw response body.
 
     Unlike web_search, this hits a specific URL — useful for health checks,
-    API probing, JSON endpoints, webhook testing, etc.
-    Response body is capped at 8 KB.
+    API probing, JSON endpoints, webhook testing, or reading raw page source.
+    For readable article content, use web_read instead.
+    Response body is capped at max_chars (default 8192, max 32768).
     """
     method = method.upper()
     timeout = min(max(int(timeout), 1), 60)
+    max_chars = min(max(int(max_chars), 100), 32768)
     try:
         async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
             resp = await client.request(method, url, content=body)
-            body_text = resp.text[:8192]
-            return f"HTTP {resp.status_code} {resp.url}\n\n{body_text}"
+            body_text = resp.text[:max_chars]
+            truncated = len(resp.text) > max_chars
+            suffix = f"\n\n[… truncated at {max_chars} chars]" if truncated else ""
+            return f"HTTP {resp.status_code} {resp.url}\n\n{body_text}{suffix}"
     except httpx.HTTPError as e:
         return f"HTTP error: {e}"
     except Exception as e:
@@ -77,6 +82,39 @@ async def http_fetch(
         return f"Error: {e}"
 
 
+async def web_read(url: str, max_chars: int = 16000) -> str:
+    """Fetch a URL and extract clean readable text, stripping ads, navigation, and boilerplate.
+
+    Uses trafilatura to extract the main article content — ideal for blog posts,
+    documentation, news articles, and any page where you want the text without
+    surrounding noise. Returns markdown-formatted output.
+    For raw responses (JSON APIs, health checks), use http_fetch instead.
+    """
+    max_chars = min(max(int(max_chars), 1000), 32000)
+    return await asyncio.to_thread(_sync_web_read, url, max_chars)
+
+
+def _sync_web_read(url: str, max_chars: int) -> str:
+    try:
+        import trafilatura
+    except ImportError:
+        return "web_read requires trafilatura — run: pip install trafilatura"
+
+    downloaded = trafilatura.fetch_url(url)
+    if downloaded is None:
+        return f"Failed to download content from: {url}"
+
+    text = trafilatura.extract(downloaded, output_format="markdown", include_links=True, url=url)
+    if not text:
+        text = trafilatura.extract(downloaded, url=url)
+    if not text:
+        return f"Could not extract readable content from: {url}"
+
+    if len(text) > max_chars:
+        text = text[:max_chars] + f"\n\n[… truncated at {max_chars} chars — pass a larger max_chars to see more]"
+    return f"Content from {url}:\n\n{text}"
+
+
 DECLARATIONS = [
     types.FunctionDeclaration(
         name="web_search",
@@ -96,10 +134,10 @@ DECLARATIONS = [
     types.FunctionDeclaration(
         name="http_fetch",
         description=(
-            "Fetch a specific URL and return the response. Unlike web_search, this hits "
+            "Fetch a specific URL and return the raw response body. Unlike web_search, this hits "
             "a direct URL — useful for health checks, JSON API endpoints, webhook testing, "
-            "or reading a specific page when you already know the URL. "
-            "Response body is capped at 8 KB."
+            "or inspecting raw page source. For readable article/doc content, use web_read instead. "
+            "Response body is capped at max_chars (default 8192, max 32768)."
         ),
         parameters=types.Schema(
             type=types.Type.OBJECT,
@@ -108,6 +146,25 @@ DECLARATIONS = [
                 "method": types.Schema(type=types.Type.STRING, description="HTTP method: GET (default), POST, HEAD"),
                 "body": types.Schema(type=types.Type.STRING, description="Optional request body (for POST requests)"),
                 "timeout": types.Schema(type=types.Type.INTEGER, description="Request timeout in seconds (default 15, max 60)"),
+                "max_chars": types.Schema(type=types.Type.INTEGER, description="Max characters to return (default 8192, max 32768)"),
+            },
+            required=["url"],
+        ),
+    ),
+    types.FunctionDeclaration(
+        name="web_read",
+        description=(
+            "Fetch a URL and extract clean readable text, stripping ads, navigation, sidebars, "
+            "and other boilerplate. Returns the main article/document content as markdown. "
+            "Use this for blog posts, documentation, news articles, GitHub READMEs, or any page "
+            "where you want the content without surrounding noise. "
+            "For raw HTTP responses (JSON APIs, health checks, source inspection), use http_fetch."
+        ),
+        parameters=types.Schema(
+            type=types.Type.OBJECT,
+            properties={
+                "url": types.Schema(type=types.Type.STRING, description="Full URL to fetch and extract"),
+                "max_chars": types.Schema(type=types.Type.INTEGER, description="Max characters to return (default 16000, max 32000)"),
             },
             required=["url"],
         ),