Cortex-Inara/cortex/tools/web.py

"""
Web tools — search (DuckDuckGo) and direct HTTP fetch.
"""

import asyncio
import logging

import httpx
from google.genai import types

from config import settings

logger = logging.getLogger(__name__)


async def search(query: str, max_results: int | None = None) -> str:
    """Search DuckDuckGo and return results as a formatted string.

    Returns a markdown-formatted list of results: title, URL, and snippet.
    The orchestrator includes this in the context it passes to Claude.
    """
    n = min(max_results or settings.ddg_max_results, 10)
    results = await asyncio.to_thread(_sync_search, query, n)
    if not results:
        return f"No results found for: {query}"

    lines = [f"Search results for: **{query}**\n"]
    for i, r in enumerate(results, 1):
        lines.append(f"{i}. [{r['title']}]({r['href']})")
        if r.get("body"):
            lines.append(f"   {r['body']}")
        lines.append("")

    return "\n".join(lines).strip()


def _sync_search(query: str, max_results: int) -> list[dict]:
    """Synchronous DuckDuckGo search — run via asyncio.to_thread."""
    from ddgs import DDGS

    kwargs = {}
    if settings.ddg_api_key:
        # Paid account — pass token for higher rate limits
        kwargs["headers"] = {"Authorization": f"Bearer {settings.ddg_api_key}"}

    try:
        with DDGS(**kwargs) as ddgs:
            return list(ddgs.text(query, max_results=max_results))
    except Exception as e:
        logger.warning("DuckDuckGo search error: %s", e)
        return []


async def http_fetch(
    url: str,
    method: str = "GET",
    body: str | None = None,
    timeout: int = 15,
) -> str:
    """Fetch a URL directly and return the response body.

    Unlike web_search, this hits a specific URL — useful for health checks,
    API probing, JSON endpoints, webhook testing, etc.
    Response body is capped at 8 KB.
    """
    method = method.upper()
    timeout = min(max(int(timeout), 1), 60)
    try:
        async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
            resp = await client.request(method, url, content=body)
            body_text = resp.text[:8192]
            return f"HTTP {resp.status_code} {resp.url}\n\n{body_text}"
    except httpx.HTTPError as e:
        return f"HTTP error: {e}"
    except Exception as e:
        logger.warning("http_fetch error for %s: %s", url, e)
        return f"Error: {e}"


DECLARATIONS = [
    types.FunctionDeclaration(
        name="web_search",
        description=(
            "Search the web for current information. Use this when you need up-to-date "
            "facts, news, documentation, or anything not in your training data."
        ),
        parameters=types.Schema(
            type=types.Type.OBJECT,
            properties={
                "query": types.Schema(type=types.Type.STRING, description="The search query string"),
                "max_results": types.Schema(type=types.Type.INTEGER, description="Number of results to return (default 5, max 10)"),
            },
            required=["query"],
        ),
    ),
    types.FunctionDeclaration(
        name="http_fetch",
        description=(
            "Fetch a specific URL and return the response. Unlike web_search, this hits "
            "a direct URL — useful for health checks, JSON API endpoints, webhook testing, "
            "or reading a specific page when you already know the URL. "
            "Response body is capped at 8 KB."
        ),
        parameters=types.Schema(
            type=types.Type.OBJECT,
            properties={
                "url": types.Schema(type=types.Type.STRING, description="Full URL to fetch"),
                "method": types.Schema(type=types.Type.STRING, description="HTTP method: GET (default), POST, HEAD"),
                "body": types.Schema(type=types.Type.STRING, description="Optional request body (for POST requests)"),
                "timeout": types.Schema(type=types.Type.INTEGER, description="Request timeout in seconds (default 15, max 60)"),
            },
            required=["url"],
        ),
    ),
]