1. Structured Output via tool_use
Before Claude’s native JSON-output features were added, a common structured-output pattern was the tool_use trick: define a “tool” with the schema you want, force Claude to call it with tool_choice: {"type": "tool", "name": "..."}, then extract the structured arguments. This remains useful in tool-centric workflows, but for direct response formatting the current API also supports native JSON outputs via output_config.format.
import anthropic
import json
client = anthropic.Anthropic()
# Define a "tool" that represents your desired output schema
extraction_tool = {
"name": "extract_info",
"description": "Extract structured information from the text.",
"input_schema": {
"type": "object",
"properties": {
"company_name": {"type": "string", "description": "Company mentioned"},
"revenue": {"type": "number", "description": "Revenue in millions USD"},
"growth_rate": {"type": "number", "description": "YoY growth as decimal"},
"sentiment": {"type": "string", "enum": ["positive", "negative", "neutral"]},
"key_products": {
"type": "array",
"items": {"type": "string"},
"description": "Main products mentioned"
}
},
"required": ["company_name", "sentiment"]
}
}
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
tools=[extraction_tool],
tool_choice={"type": "tool", "name": "extract_info"}, # Force this exact tool
messages=[{
"role": "user",
"content": "Analyze: TechCorp reported $2.4B revenue, up 18% YoY, driven by their cloud platform and AI suite."
}]
)
# Extract the structured data from tool_use block
tool_block = next(b for b in response.content if b.type == "tool_use")
structured_data = tool_block.input
print(json.dumps(structured_data, indent=2))
# {"company_name": "TechCorp", "revenue": 2400, "growth_rate": 0.18, "sentiment": "positive", "key_products": ["cloud platform", "AI suite"]}
tool_choice: {"type": "tool", "name": "..."} to force it, (3) the tool is never executed — just parse tool_block.input. This is typically more reliable than prompt-only formatting, but application-side validation is still the right safety net.
2. Pydantic Validation
from pydantic import BaseModel, Field, field_validator
from typing import Optional
import anthropic
import json
# Define your expected output as a Pydantic model
class TicketClassification(BaseModel):
category: str = Field(description="One of: billing, technical, account, shipping")
priority: str = Field(description="One of: low, medium, high, critical")
sentiment: str = Field(description="One of: positive, neutral, frustrated, angry")
suggested_action: str = Field(description="Recommended next step")
confidence: float = Field(ge=0.0, le=1.0, description="Model confidence 0-1")
@field_validator("category")
@classmethod
def validate_category(cls, v):
allowed = {"billing", "technical", "account", "shipping"}
if v not in allowed:
raise ValueError(f"Category must be one of {allowed}")
return v
def classify_with_validation(text: str, max_retries: int = 2) -> TicketClassification:
"""Classify text with Pydantic validation and retry on failure."""
client = anthropic.Anthropic()
# Convert Pydantic schema to tool schema
tool = {
"name": "classify_ticket",
"description": "Classify a support ticket",
"input_schema": TicketClassification.model_json_schema()
}
for attempt in range(max_retries + 1):
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=512,
tools=[tool],
tool_choice={"type": "tool", "name": "classify_ticket"},
messages=[{"role": "user", "content": f"Classify this ticket: {text}"}]
)
tool_block = next(b for b in response.content if b.type == "tool_use")
try:
return TicketClassification(**tool_block.input)
except Exception as e:
if attempt == max_retries:
raise ValueError(f"Failed after {max_retries} retries: {e}")
# Retry with error feedback (could add to messages)
continue
Invoice Processing Pipeline
An accounting firm built an agent that extracts structured data from PDF invoices (vendor, amount, date, line items, tax) with 99.5% accuracy. Key pattern: defining Pydantic models for the expected output and using Claude’s tool_use to enforce the schema.
3. Message Batches API
The Message Batches API processes up to 100,000 requests at 50% cost with 24-hour turnaround. Ideal for classification, extraction, and evaluation tasks:
import anthropic
import json
client = anthropic.Anthropic()
# Prepare batch requests
tickets = [
"My order #12345 hasn't arrived",
"How do I reset my password?",
"I was charged twice for my subscription",
"Your app crashes on iPhone 15"
]
# Create batch requests
requests = []
for i, ticket in enumerate(tickets):
requests.append({
"custom_id": f"ticket-{i}",
"params": {
"model": "claude-sonnet-4-6",
"max_tokens": 256,
"tools": [classify_tool],
"tool_choice": {"type": "tool", "name": "classify_ticket"},
"messages": [{"role": "user", "content": f"Classify: {ticket}"}]
}
})
# Submit batch
batch = client.messages.batches.create(requests=requests)
print(f"Batch ID: {batch.id}")
print(f"Status: {batch.processing_status}")
# Poll for completion (in production, use webhooks)
import time
while True:
batch = client.messages.batches.retrieve(batch.id)
if batch.processing_status == "ended":
break
time.sleep(60)
# Retrieve results
results = list(client.messages.batches.results(batch.id))
for result in results:
if result.result.type == "succeeded":
msg = result.result.message
tool_block = next(b for b in msg.content if b.type == "tool_use")
print(f"{result.custom_id}: {tool_block.input}")
4. Graceful Error Handling
import anthropic
import json
from typing import Optional
def safe_structured_extraction(text: str) -> Optional[dict]:
"""Extract structured data with comprehensive error handling."""
client = anthropic.Anthropic()
try:
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
tools=[extraction_tool],
tool_choice={"type": "tool", "name": "extract_info"},
messages=[{"role": "user", "content": text}]
)
except anthropic.RateLimitError:
# Implement exponential backoff
return None
except anthropic.APIError as e:
# Log and handle API errors
print(f"API error: {e}")
return None
# Handle stop_reason
if response.stop_reason == "max_tokens":
# Output was truncated — may be incomplete JSON
print("Warning: response truncated")
return None
# Find tool_use block
tool_blocks = [b for b in response.content if b.type == "tool_use"]
if not tool_blocks:
# Model didn't produce tool_use (unlikely on a successful forced-tool run,
# but still worth guarding against in production)
return None
return tool_blocks[0].input
5. tool_choice Strategies
The tool_choice parameter controls HOW Claude uses tools — from fully autonomous to completely forced. Understanding these modes is essential for reliable structured output in production and a frequent CCA exam topic.
5.1 Auto vs Any vs Forced
import anthropic
import json
client = anthropic.Anthropic()
# Define a classification tool
classify_tool = {
"name": "classify_intent",
"description": "Classify user message intent",
"input_schema": {
"type": "object",
"properties": {
"intent": {"type": "string", "enum": ["billing", "technical", "sales", "general"]},
"confidence": {"type": "number", "minimum": 0, "maximum": 1}
},
"required": ["intent", "confidence"]
}
}
# MODE 1: auto (default) — Claude decides whether to use tools or respond with text
# Use when: Claude should have the option NOT to use tools
response_auto = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=200,
tools=[classify_tool],
tool_choice={"type": "auto"}, # Default — Claude chooses
messages=[{"role": "user", "content": "Hi, how are you?"}]
)
# Claude might respond with text (no tool call) because greeting isn't classifiable
# MODE 2: any — Claude MUST use a tool (but can pick which one)
# Use when: you want a tool-mediated result, but multiple tools are acceptable
response_any = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=200,
tools=[classify_tool],
tool_choice={"type": "any"}, # MUST call some tool
messages=[{"role": "user", "content": "I was charged twice"}]
)
# Claude is forced to call classify_intent (only tool available)
# MODE 3: forced — Claude MUST use THIS SPECIFIC tool
# Use when: You know exactly which schema you want populated
response_forced = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=200,
tools=[classify_tool],
tool_choice={"type": "tool", "name": "classify_intent"}, # THIS tool specifically
messages=[{"role": "user", "content": "Can you tell me about enterprise pricing?"}]
)
# In a successful run, Claude will call classify_intent.
# You should still validate the returned arguments before trusting them.
print("Forced result:", next(b for b in response_forced.content if b.type == "tool_use").input)
5.2 Enum + “other” Pattern
A common pitfall: strict enums force Claude to pick an option even when none fit. The “enum + other” pattern adds escape hatches for edge cases:
import anthropic
import json
client = anthropic.Anthropic()
# Pattern: Include "other" in enum + a free-text field for explanation
# This prevents forced miscategorization while maintaining structure
robust_tool = {
"name": "categorize_issue",
"description": "Categorize a customer issue. Use 'other' if no category fits well.",
"input_schema": {
"type": "object",
"properties": {
"category": {
"type": "string",
"enum": ["billing", "technical", "account", "shipping", "other"],
"description": "Best-fit category. Use 'other' if confidence < 0.7"
},
"other_description": {
"type": ["string", "null"],
"description": "If category='other', describe the actual category needed. null otherwise."
},
"confidence": {
"type": "number",
"minimum": 0,
"maximum": 1
}
},
"required": ["category", "confidence"]
}
}
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=200,
tools=[robust_tool],
tool_choice={"type": "tool", "name": "categorize_issue"},
messages=[{"role": "user", "content": "I want to donate my unused subscription credits to charity"}]
)
result = next(b for b in response.content if b.type == "tool_use")
print(json.dumps(result.input, indent=2))
# {"category": "other", "other_description": "subscription credit donation/transfer", "confidence": 0.4}
6. Multi-Instance Review Architectures
A single Claude instance reviewing its own output is unreliable — it tends to confirm its own work (confirmation bias). Production systems use independent review instances with separate contexts to catch errors. Think of it like peer review in academia: the reviewer hasn’t seen the author’s thought process, so they evaluate the output fresh.
6.1 Why Self-Review Fails
import anthropic
import json
client = anthropic.Anthropic()
# ❌ ANTI-PATTERN: Self-review in the same conversation
# Claude tends to agree with itself (same context window)
messages = [
{"role": "user", "content": "Summarize this document: [long doc]"},
{"role": "assistant", "content": "Here's the summary: [summary with errors]"},
{"role": "user", "content": "Review your summary for accuracy."}
]
# Problem: Claude sees its own reasoning → confirms its own mistakes
# ✅ CORRECT: Independent review with separate context
def generate_and_review(document: str) -> dict:
"""Two independent Claude instances: one generates, one reviews."""
# Instance 1: Generate the summary (own context)
generation = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1000,
system="Summarize documents accurately. Include all key facts.",
messages=[{"role": "user", "content": f"Summarize:\n{document}"}]
)
summary = generation.content[0].text
# Instance 2: Review the summary (FRESH context — never saw the generation process)
review = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=500,
temperature=0,
system=(
"You are a fact-checker. Compare the summary against the source document. "
"List any: (1) factual errors, (2) missing key information, (3) unsupported claims. "
"If accurate, respond with: {\"errors\": [], \"verdict\": \"pass\"}"
),
messages=[{"role": "user", "content": f"Source:\n{document}\n\nSummary to review:\n{summary}"}]
)
return {
"summary": summary,
"review": review.content[0].text,
"input_tokens": generation.usage.input_tokens + review.usage.input_tokens
}
# The reviewer has fresh eyes — no confirmation bias
result = generate_and_review("TechCorp reported $2.4B revenue in 2024, up 18% YoY...")
print(f"Summary: {result['summary'][:100]}...")
print(f"Review: {result['review'][:100]}...")
6.2 Per-File vs Cross-File Review Passes
For code review or document processing, use two passes: a local pass (each file independently) and an integration pass (cross-file consistency):
import anthropic
import json
client = anthropic.Anthropic()
def multi_pass_code_review(files: dict) -> dict:
"""Two-pass review: local (per-file) then integration (cross-file)."""
# Pass 1: Local review (each file in its own context — parallelizable)
local_reviews = {}
for filename, content in files.items():
review = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=500,
system="Review this file for bugs, security issues, and style problems.",
messages=[{"role": "user", "content": f"File: {filename}\n```\n{content}\n```"}]
)
local_reviews[filename] = review.content[0].text
# Pass 2: Integration review (sees all files + local findings)
# Catches: broken imports, type mismatches across files, circular dependencies
all_context = "\n\n".join(
f"## {fname}\n```\n{content}\n```\nLocal findings: {local_reviews[fname]}"
for fname, content in files.items()
)
integration = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1000,
system=(
"Review these files as a system. Focus on CROSS-FILE issues: "
"broken imports, type mismatches, inconsistent interfaces, missing error handling "
"at module boundaries. Ignore issues already caught in local findings."
),
messages=[{"role": "user", "content": all_context}]
)
return {
"local_reviews": local_reviews,
"integration_review": integration.content[0].text
}
# Example usage
files = {
"auth.py": "def verify_token(token: str) -> dict: ...",
"api.py": "from auth import verify_token\ndef get_user(token): ..."
}
result = multi_pass_code_review(files)
print("Integration issues:", result["integration_review"][:200])
Next in the SDK Track
In Part 12: Context Preservation, we tackle one of the most critical challenges in agentic systems: managing the context window. Covers token counting, summarization strategies, sliding windows, and the 200K token limit. CCA Domain 4 Tasks 4.5 and 4.6.