Production Tool-Agent Patterns
Building a tool-augmented agent that works in a demo is straightforward. Building one that performs reliably at scale in production requires additional patterns: proper tool orchestration, defensive architecture, observability, and performance optimization. This lesson catalogs the patterns used in production tool-agent systems.
Pattern 1: Tool Grouping and Namespacing
In complex agents with many tools, group tools by domain and use namespacing to prevent confusion:
from langchain_core.tools import BaseTool, tool
from typing import Sequence
def create_tool_group(namespace: str, tools: list) -> list:
"""Add namespace prefix to tool names for clarity."""
for t in tools:
if not t.name.startswith(f"{namespace}_"):
t.name = f"{namespace}_{t.name}"
return tools
# Database tools group
@tool
def db_query(sql: str) -> str:
"""Execute a read-only SQL query. Use for data retrieval."""
...
@tool
def db_count(table: str, where: str = "") -> str:
"""Count rows in a table. Faster than SELECT * for large tables."""
...
# Email tools group
@tool
def email_send(to: str, subject: str, body: str) -> str:
"""Send an email. Requires approval for external addresses."""
...
@tool
def email_search(query: str, max_results: int = 10) -> str:
"""Search the email inbox for messages matching the query."""
...
# Group and namespace
database_tools = create_tool_group("db", [db_query, db_count])
email_tools = create_tool_group("email", [email_send, email_search])
all_tools = database_tools + email_tools
Pattern 2: Tiered Tool Selection
Not every task needs every tool. Use a tiered approach to limit tool availability based on context:
from enum import Enum
class AgentTier(Enum):
READ_ONLY = "read_only" # Query, search, analyze
STANDARD = "standard" # + create, update
PRIVILEGED = "privileged" # + delete, admin operations
TOOL_TIERS = {
AgentTier.READ_ONLY: ["db_query", "db_count", "search_web", "email_search"],
AgentTier.STANDARD: ["db_query", "db_count", "search_web", "email_search", "db_insert", "email_send"],
AgentTier.PRIVILEGED: None, # All tools
}
def get_tools_for_tier(tier: AgentTier, all_tools: list[BaseTool]) -> list[BaseTool]:
allowed_names = TOOL_TIERS.get(tier)
if allowed_names is None:
return all_tools
return [t for t in all_tools if t.name in allowed_names]
# Create a read-only agent for untrusted users
read_only_tools = get_tools_for_tier(AgentTier.READ_ONLY, all_tools)
Pattern 3: Tool Call Audit Logging
Every tool call in production should be logged with full context for compliance and debugging:
import logging
import time
import json
from functools import wraps
from typing import Callable, Any
logger = logging.getLogger("tool_audit")
def audit_tool(func: Callable) -> Callable:
"""Decorator that logs all tool calls with timing and outcomes."""
@wraps(func)
def wrapper(*args, **kwargs) -> Any:
start_time = time.monotonic()
tool_name = func.__name__
# Log call
logger.info(json.dumps({
"event": "tool_call_start",
"tool": tool_name,
"args": str(args)[:200],
"kwargs": {k: str(v)[:100] for k, v in kwargs.items()},
}))
try:
result = func(*args, **kwargs)
duration_ms = (time.monotonic() - start_time) * 1000
logger.info(json.dumps({
"event": "tool_call_success",
"tool": tool_name,
"duration_ms": round(duration_ms, 2),
"result_preview": str(result)[:200],
}))
return result
except Exception as e:
duration_ms = (time.monotonic() - start_time) * 1000
logger.error(json.dumps({
"event": "tool_call_error",
"tool": tool_name,
"duration_ms": round(duration_ms, 2),
"error_type": type(e).__name__,
"error_msg": str(e)[:500],
}))
raise
return wrapper
Pattern 4: Human-in-the-Loop for High-Stakes Actions
Some tool calls should require human confirmation before execution:
from langchain_core.tools import tool
from langchain_core.callbacks import CallbackManagerForToolRun
import asyncio
REQUIRES_CONFIRMATION = {"email_send", "db_delete", "file_delete", "payment_process"}
class ConfirmationRequired(Exception):
"""Raised when a tool requires human confirmation before proceeding."""
def __init__(self, tool_name: str, args: dict, preview: str):
self.tool_name = tool_name
self.args = args
self.preview = preview
super().__init__(f"Action requires confirmation: {preview}")
async def ask_for_confirmation(action_preview: str) -> bool:
"""In production: send to UI for human approval. Here: prompt via CLI."""
print(f"\n⚠️ CONFIRMATION REQUIRED:\n{action_preview}\n")
response = input("Approve? (yes/no): ").strip().lower()
return response == "yes"
def guarded_tool(func):
"""Wrap a tool to require human confirmation before destructive operations."""
@wraps(func)
async def wrapper(*args, **kwargs):
preview = f"Tool: {func.__name__}\nArgs: {json.dumps(kwargs, indent=2)}"
approved = await ask_for_confirmation(preview)
if not approved:
return f"Action cancelled by user: {func.__name__}"
return func(*args, **kwargs)
return wrapper
Pattern 5: Tool Result Caching
Cache tool results to avoid redundant API calls and reduce costs:
import hashlib
import json
from datetime import datetime, timedelta
class ToolResultCache:
"""LRU cache for tool results with TTL."""
def __init__(self, ttl_seconds: int = 300, max_size: int = 100):
self._cache: dict[str, dict] = {}
self.ttl = ttl_seconds
self.max_size = max_size
def _make_key(self, tool_name: str, args: dict) -> str:
content = json.dumps({"tool": tool_name, "args": args}, sort_keys=True)
return hashlib.sha256(content.encode()).hexdigest()
def get(self, tool_name: str, args: dict) -> str | None:
key = self._make_key(tool_name, args)
entry = self._cache.get(key)
if not entry:
return None
if datetime.utcnow() > entry["expires_at"]:
del self._cache[key]
return None
return entry["result"]
def set(self, tool_name: str, args: dict, result: str) -> None:
if len(self._cache) >= self.max_size:
# Evict oldest entry
oldest_key = min(self._cache, key=lambda k: self._cache[k]["created_at"])
del self._cache[oldest_key]
key = self._make_key(tool_name, args)
self._cache[key] = {
"result": result,
"created_at": datetime.utcnow(),
"expires_at": datetime.utcnow() + timedelta(seconds=self.ttl),
}
cache = ToolResultCache(ttl_seconds=300)
# Cacheable tools: read-only, deterministic operations
CACHEABLE_TOOLS = {"db_query", "search_web", "get_weather", "fetch_documentation"}
Pattern 6: Agent Observability Dashboard
Track tool call patterns for optimization:
from collections import defaultdict, Counter
class AgentMetrics:
def __init__(self):
self.call_counts = Counter()
self.error_counts = Counter()
self.total_duration_ms = defaultdict(float)
self.cache_hits = Counter()
def record(self, tool_name: str, duration_ms: float, success: bool, from_cache: bool = False) -> None:
self.call_counts[tool_name] += 1
if not success:
self.error_counts[tool_name] += 1
self.total_duration_ms[tool_name] += duration_ms
if from_cache:
self.cache_hits[tool_name] += 1
def report(self) -> dict:
return {
tool: {
"calls": self.call_counts[tool],
"errors": self.error_counts[tool],
"error_rate": f"{self.error_counts[tool]/self.call_counts[tool]:.1%}",
"avg_duration_ms": round(self.total_duration_ms[tool] / self.call_counts[tool], 1),
"cache_hit_rate": f"{self.cache_hits[tool]/self.call_counts[tool]:.1%}",
}
for tool in self.call_counts
}
These patterns aren't necessary for prototypes but become critical once you're running thousands of agent invocations per day. Implement them incrementally as your scale grows.