1. PydanticAI Harness Overview
The PydanticAI Harness is an orchestration layer purpose-built for coding agents. It wraps a standard PydanticAI Agent with file system access, shell execution, project understanding, and structured tool sets — transforming a conversational agent into an autonomous coding assistant.
1.1 Architecture
1.2 Basic Harness Setup
from pydantic_ai import Agent
from pydantic_ai.harness import Harness, HarnessConfig
# Configure the harness for a Python project
config = HarnessConfig(
workspace_dir="/path/to/project",
allowed_commands=["python", "pip", "pytest", "ruff"],
max_file_size_bytes=1_000_000, # 1MB limit per file
sandbox_mode="docker", # or "local" for development
)
# Create harness-wrapped agent
harness = Harness(
agent=Agent(
"openai:gpt-4o",
system_prompt="You are an expert Python developer. Write clean, tested code.",
),
config=config,
)
# Run a coding task
result = harness.run_sync(
"Create a FastAPI endpoint that returns the current time in ISO format. "
"Include a test file."
)
print(f"Files created/modified: {result.files_changed}")
print(f"Commands executed: {result.commands_run}")
print(f"Agent output: {result.output}")
2. Code Mode
Code Mode enables file-aware agent operations — reading existing code, writing new files, editing specific sections, and executing shell commands in a sandboxed environment.
2.1 File Read/Write/Edit Tools
from pydantic_ai import Agent
from pydantic_ai.harness import Harness, HarnessConfig
from pydantic_ai.harness.tools import FileReadTool, FileWriteTool, FileEditTool
# Harness with explicit file tools
config = HarnessConfig(
workspace_dir="/path/to/project",
tools=[
FileReadTool(allowed_extensions=[".py", ".txt", ".md", ".json"]),
FileWriteTool(require_confirmation=False),
FileEditTool(max_edit_lines=50),
],
)
harness = Harness(
agent=Agent("openai:gpt-4o", system_prompt="You are a code editor."),
config=config,
)
# Agent can now read, write, and edit files
result = harness.run_sync(
"Read main.py, add type hints to all functions, and write the result back."
)
print(f"Output: {result.output}")
print(f"Files modified: {result.files_changed}")
2.2 Shell Execution in Sandboxed Environment
from pydantic_ai import Agent
from pydantic_ai.harness import Harness, HarnessConfig
from pydantic_ai.harness.tools import ShellTool
# Shell tool with explicit allowlist
config = HarnessConfig(
workspace_dir="/path/to/project",
tools=[
ShellTool(
allowed_commands=["python", "pip", "pytest", "ruff", "mypy"],
timeout_seconds=30,
max_output_bytes=10_000,
),
],
sandbox_mode="docker", # Isolate execution
)
harness = Harness(
agent=Agent(
"openai:gpt-4o",
system_prompt="You are a testing assistant. Run tests and fix failures.",
),
config=config,
)
# Agent runs tests, reads failures, fixes code
result = harness.run_sync(
"Run pytest on the project. If any tests fail, read the failing test, "
"understand the issue, fix the source code, and re-run to confirm."
)
print(f"Commands run: {result.commands_run}")
print(f"Final test status: {result.output}")
sandbox_mode="docker" in production. The "local" mode executes commands directly on your machine — only use during development with trusted prompts. Never allow rm, curl, or network commands without explicit approval workflows.
3. Coding Agent Skills
Skills are reusable capability modules that coding agents can activate based on context. Instead of loading all tools for every task, skills are selected dynamically based on the task type, project language, and available frameworks.
3.1 Defining & Composing Skills
from pydantic_ai.harness import Skill, SkillRegistry
from pydantic import BaseModel
class TestingSkill(Skill):
"""Skill for writing and running tests."""
name = "testing"
description = "Write unit tests, run test suites, analyze coverage"
triggers = ["test", "coverage", "pytest", "unittest"]
def get_system_prompt_addition(self) -> str:
return (
"When writing tests: use pytest conventions, include edge cases, "
"aim for >80% coverage, use fixtures for setup/teardown."
)
def get_tools(self) -> list:
return [
"file_read", "file_write", "shell_run_pytest",
"shell_run_coverage",
]
class RefactoringSkill(Skill):
"""Skill for code refactoring operations."""
name = "refactoring"
description = "Refactor code for clarity, performance, and maintainability"
triggers = ["refactor", "clean up", "simplify", "optimize"]
def get_system_prompt_addition(self) -> str:
return (
"When refactoring: preserve behavior, run tests before and after, "
"prefer small incremental changes, explain each transformation."
)
def get_tools(self) -> list:
return ["file_read", "file_edit", "shell_run_pytest", "shell_run_ruff"]
# Register skills
registry = SkillRegistry()
registry.register(TestingSkill())
registry.register(RefactoringSkill())
# Skills are auto-selected based on user input
selected = registry.select_skills("Write tests for the authentication module")
print(f"Selected skills: {[s.name for s in selected]}")
# Output: ['testing']
selected = registry.select_skills("Refactor the database layer for clarity")
print(f"Selected skills: {[s.name for s in selected]}")
# Output: ['refactoring']
from pydantic_ai import Agent
from pydantic_ai.harness import Harness, HarnessConfig, SkillRegistry
# Full harness with skill-based tool selection
registry = SkillRegistry()
# ... register skills ...
config = HarnessConfig(
workspace_dir="/path/to/project",
skill_registry=registry,
auto_select_skills=True, # Dynamically select based on prompt
)
harness = Harness(
agent=Agent("openai:gpt-4o", system_prompt="You are a senior developer."),
config=config,
)
# Skills loaded dynamically per request
result = harness.run_sync("Add comprehensive tests for the user service module")
print(f"Skills activated: {result.skills_used}")
print(f"Output: {result.output}")
4. Gateway in Production
The PydanticAI Gateway provides a unified API layer in front of multiple model providers. Teams route all LLM traffic through the Gateway for centralized authentication, rate limiting, cost tracking, and model fallback.
4.1 Authentication & Rate Limiting
from pydantic_ai.gateway import Gateway, GatewayConfig, RateLimitPolicy
# Production gateway configuration
config = GatewayConfig(
providers={
"openai": {"api_key_env": "OPENAI_API_KEY"},
"anthropic": {"api_key_env": "ANTHROPIC_API_KEY"},
"google": {"api_key_env": "GEMINI_API_KEY"},
},
auth={
"type": "api_key",
"header": "X-Gateway-Key",
"keys_env": "GATEWAY_API_KEYS", # Comma-separated valid keys
},
rate_limits=[
RateLimitPolicy(
scope="per_user",
requests_per_minute=60,
tokens_per_minute=100_000,
),
RateLimitPolicy(
scope="per_team",
requests_per_minute=300,
tokens_per_minute=500_000,
),
],
)
gateway = Gateway(config)
print(f"Gateway configured with {len(config.providers)} providers")
print(f"Rate limits: {len(config.rate_limits)} policies")
print("Start with: gateway.serve(host='0.0.0.0', port=8080)")
4.2 Cost Allocation & Usage Tracking
Zero-Downtime Agent Deployment
A fintech company deploys PydanticAI agents using blue-green deployment: new versions are deployed alongside old ones, traffic gradually shifts (10% → 50% → 100%) with automated quality checks at each stage. If evaluation scores drop, traffic automatically routes back to the old version. Result: zero customer-visible incidents in 8 months of weekly deployments.
from pydantic_ai.gateway import Gateway, UsageTracker
# Usage tracking for cost allocation
tracker = UsageTracker(
storage="postgresql", # or "sqlite", "redis"
connection_string="postgresql://user:pass@localhost/gateway",
)
# Query usage by team/user
import asyncio
async def check_usage():
# Get usage for a specific team this month
usage = await tracker.get_usage(
team="engineering",
period="2026-05",
)
print(f"Team: engineering")
print(f" Total requests: {usage.total_requests}")
print(f" Total tokens: {usage.total_tokens:,}")
print(f" Estimated cost: ${usage.estimated_cost:.2f}")
print(f" By model:")
for model, stats in usage.by_model.items():
print(f" {model}: {stats.requests} requests, ${stats.cost:.2f}")
asyncio.run(check_usage())
5. Production Deployment Patterns
Deploying AI agents to production requires the same engineering rigor as any critical system: input validation, monitoring, graceful degradation, cost controls, and security hardening.
5.1 Security & Input Sanitization
from pydantic_ai import Agent
from pydantic import BaseModel, field_validator
import re
class SanitizedInput(BaseModel):
"""Validated and sanitized user input."""
query: str
user_id: str
session_id: str
@field_validator("query")
@classmethod
def sanitize_query(cls, v: str) -> str:
# Remove potential prompt injection patterns
v = v.strip()
if len(v) > 10_000:
raise ValueError("Query too long (max 10,000 characters)")
# Block common injection patterns
injection_patterns = [
r"ignore\s+previous\s+instructions",
r"system:\s*",
r"<\|im_start\|>",
]
for pattern in injection_patterns:
if re.search(pattern, v, re.IGNORECASE):
raise ValueError("Input contains disallowed patterns")
return v
# Production agent with input validation
agent = Agent(
"openai:gpt-4o-mini",
system_prompt="You are a customer support agent for an e-commerce platform.",
)
def handle_request(raw_query: str, user_id: str, session_id: str) -> str:
"""Production request handler with validation."""
# Validate input
validated = SanitizedInput(
query=raw_query,
user_id=user_id,
session_id=session_id,
)
result = agent.run_sync(validated.query)
return result.output
# Test
response = handle_request(
raw_query="What is your return policy?",
user_id="user_123",
session_id="sess_abc",
)
print(f"Response: {response}")
5.2 Circuit Breakers, Fallbacks & Graceful Degradation
from pydantic_ai import Agent
from pydantic import BaseModel
import asyncio
import time
class CircuitBreaker:
"""Simple circuit breaker for model provider failures."""
def __init__(self, failure_threshold: int = 5, reset_timeout: float = 60.0):
self.failure_threshold = failure_threshold
self.reset_timeout = reset_timeout
self.failure_count = 0
self.last_failure_time = 0.0
self.state = "closed" # closed, open, half-open
def can_execute(self) -> bool:
if self.state == "closed":
return True
if self.state == "open":
if time.time() - self.last_failure_time > self.reset_timeout:
self.state = "half-open"
return True
return False
return True # half-open: allow one attempt
def record_success(self):
self.failure_count = 0
self.state = "closed"
def record_failure(self):
self.failure_count += 1
self.last_failure_time = time.time()
if self.failure_count >= self.failure_threshold:
self.state = "open"
class ProductionAgent:
"""Production-ready agent wrapper with fallbacks and circuit breakers."""
def __init__(self):
self.primary = Agent("openai:gpt-4o-mini", system_prompt="You are helpful.")
self.fallback = Agent("anthropic:claude-sonnet-4-20250514", system_prompt="You are helpful.")
self.circuit_breaker = CircuitBreaker(failure_threshold=3, reset_timeout=30)
async def run(self, prompt: str) -> str:
"""Execute with circuit breaker and fallback."""
# Try primary model
if self.circuit_breaker.can_execute():
try:
result = await self.primary.run(prompt)
self.circuit_breaker.record_success()
return result.output
except Exception as e:
self.circuit_breaker.record_failure()
print(f"Primary failed: {e}")
# Fallback to secondary model
try:
result = await self.fallback.run(prompt)
return result.output
except Exception as e:
print(f"Fallback also failed: {e}")
return "I'm temporarily unable to process your request. Please try again shortly."
# Usage
agent = ProductionAgent()
print(f"Circuit breaker state: {agent.circuit_breaker.state}")
print("Primary: openai:gpt-4o-mini")
print("Fallback: anthropic:claude-sonnet-4-20250514")
print("Graceful degradation: returns friendly message if both fail")
from pydantic_ai import Agent
import logfire
import time
# Production monitoring wrapper
logfire.configure()
class MonitoredAgent:
"""Agent with comprehensive production monitoring."""
def __init__(self, model: str, name: str):
self.agent = Agent(model, system_prompt="You are a helpful assistant.")
self.name = name
self.request_count = 0
self.error_count = 0
self.total_latency_ms = 0.0
def run(self, prompt: str) -> str:
"""Run with metrics collection."""
self.request_count += 1
start = time.time()
with logfire.span(
"agent_request",
agent_name=self.name,
request_number=self.request_count,
):
try:
result = self.agent.run_sync(prompt)
latency_ms = (time.time() - start) * 1000
self.total_latency_ms += latency_ms
logfire.info(
"Agent request succeeded",
latency_ms=latency_ms,
output_length=len(result.output),
)
return result.output
except Exception as e:
self.error_count += 1
logfire.error(
"Agent request failed",
error=str(e),
error_rate=self.error_count / self.request_count,
)
raise
@property
def avg_latency_ms(self) -> float:
if self.request_count == 0:
return 0.0
return self.total_latency_ms / self.request_count
@property
def error_rate(self) -> float:
if self.request_count == 0:
return 0.0
return self.error_count / self.request_count
# Deploy monitored agent
agent = MonitoredAgent(model="openai:gpt-4o-mini", name="support-agent")
print(f"Monitored agent '{agent.name}' ready")
print("Metrics: request_count, error_count, avg_latency_ms, error_rate")
print("All spans visible in Logfire dashboard")
PydanticAI SDK Track Complete!
Congratulations on completing the 14-part PydanticAI SDK track! You’ve mastered type-safe agents, dependency injection, multi-provider support, tools & toolsets, Pydantic Graph workflows, evaluation with Pydantic Evals, MCP integration, durable execution, and production deployment. Return to the AI App Dev Series Hub to explore other SDK tracks.