Production Tool-Agent Patterns

Building a tool-augmented agent that works in a demo is straightforward. Building one that performs reliably at scale in production requires additional patterns: proper tool orchestration, defensive architecture, observability, and performance optimization. This lesson catalogs the patterns used in production tool-agent systems.

Pattern 1: Tool Grouping and Namespacing

In complex agents with many tools, group tools by domain and use namespacing to prevent confusion:

from langchain_core.tools import BaseTool, tool
from typing import Sequence

def create_tool_group(namespace: str, tools: list) -> list:
    """Add namespace prefix to tool names for clarity."""
    for t in tools:
        if not t.name.startswith(f"{namespace}_"):
            t.name = f"{namespace}_{t.name}"
    return tools

# Database tools group
@tool
def db_query(sql: str) -> str:
    """Execute a read-only SQL query. Use for data retrieval."""
    ...

@tool  
def db_count(table: str, where: str = "") -> str:
    """Count rows in a table. Faster than SELECT * for large tables."""
    ...

# Email tools group
@tool
def email_send(to: str, subject: str, body: str) -> str:
    """Send an email. Requires approval for external addresses."""
    ...

@tool
def email_search(query: str, max_results: int = 10) -> str:
    """Search the email inbox for messages matching the query."""
    ...

# Group and namespace
database_tools = create_tool_group("db", [db_query, db_count])
email_tools = create_tool_group("email", [email_send, email_search])

all_tools = database_tools + email_tools

Pattern 2: Tiered Tool Selection

Not every task needs every tool. Use a tiered approach to limit tool availability based on context:

from enum import Enum

class AgentTier(Enum):
    READ_ONLY = "read_only"          # Query, search, analyze
    STANDARD = "standard"            # + create, update
    PRIVILEGED = "privileged"        # + delete, admin operations

TOOL_TIERS = {
    AgentTier.READ_ONLY: ["db_query", "db_count", "search_web", "email_search"],
    AgentTier.STANDARD: ["db_query", "db_count", "search_web", "email_search", "db_insert", "email_send"],
    AgentTier.PRIVILEGED: None,  # All tools
}

def get_tools_for_tier(tier: AgentTier, all_tools: list[BaseTool]) -> list[BaseTool]:
    allowed_names = TOOL_TIERS.get(tier)
    if allowed_names is None:
        return all_tools
    return [t for t in all_tools if t.name in allowed_names]

# Create a read-only agent for untrusted users
read_only_tools = get_tools_for_tier(AgentTier.READ_ONLY, all_tools)

Pattern 3: Tool Call Audit Logging

Every tool call in production should be logged with full context for compliance and debugging:

import logging
import time
import json
from functools import wraps
from typing import Callable, Any

logger = logging.getLogger("tool_audit")

def audit_tool(func: Callable) -> Callable:
    """Decorator that logs all tool calls with timing and outcomes."""
    @wraps(func)
    def wrapper(*args, **kwargs) -> Any:
        start_time = time.monotonic()
        tool_name = func.__name__
        
        # Log call
        logger.info(json.dumps({
            "event": "tool_call_start",
            "tool": tool_name,
            "args": str(args)[:200],
            "kwargs": {k: str(v)[:100] for k, v in kwargs.items()},
        }))
        
        try:
            result = func(*args, **kwargs)
            duration_ms = (time.monotonic() - start_time) * 1000
            
            logger.info(json.dumps({
                "event": "tool_call_success",
                "tool": tool_name,
                "duration_ms": round(duration_ms, 2),
                "result_preview": str(result)[:200],
            }))
            return result
        
        except Exception as e:
            duration_ms = (time.monotonic() - start_time) * 1000
            logger.error(json.dumps({
                "event": "tool_call_error",
                "tool": tool_name,
                "duration_ms": round(duration_ms, 2),
                "error_type": type(e).__name__,
                "error_msg": str(e)[:500],
            }))
            raise
    
    return wrapper

Pattern 4: Human-in-the-Loop for High-Stakes Actions

Some tool calls should require human confirmation before execution:

from langchain_core.tools import tool
from langchain_core.callbacks import CallbackManagerForToolRun
import asyncio

REQUIRES_CONFIRMATION = {"email_send", "db_delete", "file_delete", "payment_process"}

class ConfirmationRequired(Exception):
    """Raised when a tool requires human confirmation before proceeding."""
    def __init__(self, tool_name: str, args: dict, preview: str):
        self.tool_name = tool_name
        self.args = args
        self.preview = preview
        super().__init__(f"Action requires confirmation: {preview}")

async def ask_for_confirmation(action_preview: str) -> bool:
    """In production: send to UI for human approval. Here: prompt via CLI."""
    print(f"\n⚠️  CONFIRMATION REQUIRED:\n{action_preview}\n")
    response = input("Approve? (yes/no): ").strip().lower()
    return response == "yes"

def guarded_tool(func):
    """Wrap a tool to require human confirmation before destructive operations."""
    @wraps(func)
    async def wrapper(*args, **kwargs):
        preview = f"Tool: {func.__name__}\nArgs: {json.dumps(kwargs, indent=2)}"
        approved = await ask_for_confirmation(preview)
        if not approved:
            return f"Action cancelled by user: {func.__name__}"
        return func(*args, **kwargs)
    return wrapper

Pattern 5: Tool Result Caching

Cache tool results to avoid redundant API calls and reduce costs:

import hashlib
import json
from datetime import datetime, timedelta

class ToolResultCache:
    """LRU cache for tool results with TTL."""
    
    def __init__(self, ttl_seconds: int = 300, max_size: int = 100):
        self._cache: dict[str, dict] = {}
        self.ttl = ttl_seconds
        self.max_size = max_size
    
    def _make_key(self, tool_name: str, args: dict) -> str:
        content = json.dumps({"tool": tool_name, "args": args}, sort_keys=True)
        return hashlib.sha256(content.encode()).hexdigest()
    
    def get(self, tool_name: str, args: dict) -> str | None:
        key = self._make_key(tool_name, args)
        entry = self._cache.get(key)
        if not entry:
            return None
        if datetime.utcnow() > entry["expires_at"]:
            del self._cache[key]
            return None
        return entry["result"]
    
    def set(self, tool_name: str, args: dict, result: str) -> None:
        if len(self._cache) >= self.max_size:
            # Evict oldest entry
            oldest_key = min(self._cache, key=lambda k: self._cache[k]["created_at"])
            del self._cache[oldest_key]
        
        key = self._make_key(tool_name, args)
        self._cache[key] = {
            "result": result,
            "created_at": datetime.utcnow(),
            "expires_at": datetime.utcnow() + timedelta(seconds=self.ttl),
        }

cache = ToolResultCache(ttl_seconds=300)

# Cacheable tools: read-only, deterministic operations
CACHEABLE_TOOLS = {"db_query", "search_web", "get_weather", "fetch_documentation"}

Pattern 6: Agent Observability Dashboard

Track tool call patterns for optimization:

from collections import defaultdict, Counter

class AgentMetrics:
    def __init__(self):
        self.call_counts = Counter()
        self.error_counts = Counter()
        self.total_duration_ms = defaultdict(float)
        self.cache_hits = Counter()
    
    def record(self, tool_name: str, duration_ms: float, success: bool, from_cache: bool = False) -> None:
        self.call_counts[tool_name] += 1
        if not success:
            self.error_counts[tool_name] += 1
        self.total_duration_ms[tool_name] += duration_ms
        if from_cache:
            self.cache_hits[tool_name] += 1
    
    def report(self) -> dict:
        return {
            tool: {
                "calls": self.call_counts[tool],
                "errors": self.error_counts[tool],
                "error_rate": f"{self.error_counts[tool]/self.call_counts[tool]:.1%}",
                "avg_duration_ms": round(self.total_duration_ms[tool] / self.call_counts[tool], 1),
                "cache_hit_rate": f"{self.cache_hits[tool]/self.call_counts[tool]:.1%}",
            }
            for tool in self.call_counts
        }

These patterns aren't necessary for prototypes but become critical once you're running thousands of agent invocations per day. Implement them incrementally as your scale grows.