1. Plan Mode
1.1 Plan vs Execute
Plan mode separates thinking from doing. In plan mode, Claude reads files and runs shell commands to explore, but does not edit source files. Permission prompts still apply as in default mode. This provides a safe exploration phase before committing to action:
# Claude Code plan mode — use --permission-mode plan (NOT --plan)
claude --permission-mode plan "Refactor the auth module to use JWT instead of sessions"
# Alternative: prefix a single prompt with /plan
# /plan Refactor the auth module to use JWT instead of sessions
# Or press Shift+Tab in interactive mode to toggle into plan mode
# Set plan mode as the project default (.claude/settings.json):
# { "permissions": { "defaultMode": "plan" } }
flowchart LR
subgraph Phase1["Phase 1: Plan (Read-Only)"]
R["Read files"]
S["Search codebase"]
A["Analyze architecture"]
P["Propose changes"]
end
subgraph Gate["Human Gate"]
AP{"Approve?"}
end
subgraph Phase2["Phase 2: Execute (Full Access)"]
W["Write/Edit files"]
B["Run Bash commands"]
T["Run tests"]
end
Phase1 --> Gate
AP -->|"Yes"| Phase2
AP -->|"No"| REV["Revise Plan"]
REV --> Phase1
style Phase1 fill:#f8f9fa,stroke:#3B9797
style Gate fill:#f8f9fa,stroke:#BF092F
style Phase2 fill:#f8f9fa,stroke:#16476A
import json
from pathlib import Path
import anthropic
client = anthropic.Anthropic()
WORKSPACE_ROOT = Path.cwd().resolve()
READ_ONLY_TOOLS = [
{
"name": "read_file",
"description": "Read a UTF-8 text file from the workspace.",
"input_schema": {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Relative path from the workspace root"
}
},
"required": ["path"]
}
},
{
"name": "glob_search",
"description": "Find files by glob pattern, for example src/**/*.py.",
"input_schema": {
"type": "object",
"properties": {
"pattern": {
"type": "string",
"description": "A glob pattern relative to the workspace root"
},
"max_results": {
"type": "integer",
"description": "Maximum number of matches to return",
"default": 20
}
},
"required": ["pattern"]
}
},
{
"name": "grep_search",
"description": "Search text files for an exact substring and return matching lines.",
"input_schema": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Exact substring to search for"
},
"max_results": {
"type": "integer",
"description": "Maximum number of matches to return",
"default": 20
}
},
"required": ["query"]
}
}
]
def _resolve_workspace_path(relative_path: str) -> Path:
candidate = (WORKSPACE_ROOT / relative_path).resolve()
if candidate != WORKSPACE_ROOT and WORKSPACE_ROOT not in candidate.parents:
raise ValueError(f"Path escapes workspace root: {relative_path}")
return candidate
def execute_read_only_tool(name: str, input_data: dict) -> dict:
"""Execute a read-only tool locally and return JSON-serializable output."""
if name == "read_file":
file_path = _resolve_workspace_path(input_data["path"])
return {
"path": str(file_path.relative_to(WORKSPACE_ROOT)),
"content": file_path.read_text(encoding="utf-8")[:12000]
}
if name == "glob_search":
pattern = input_data["pattern"]
max_results = input_data.get("max_results", 20)
matches = [
str(path.relative_to(WORKSPACE_ROOT)).replace("\\", "/")
for path in WORKSPACE_ROOT.glob(pattern)
if path.is_file()
]
return {"matches": matches[:max_results], "count": len(matches)}
if name == "grep_search":
query = input_data["query"]
max_results = input_data.get("max_results", 20)
matches = []
for path in WORKSPACE_ROOT.rglob("*"):
if not path.is_file():
continue
try:
text = path.read_text(encoding="utf-8")
except (UnicodeDecodeError, OSError):
continue
for line_number, line in enumerate(text.splitlines(), start=1):
if query in line:
matches.append({
"path": str(path.relative_to(WORKSPACE_ROOT)).replace("\\", "/"),
"line_number": line_number,
"line": line.strip()
})
if len(matches) >= max_results:
return {"matches": matches, "count": len(matches)}
return {"matches": matches, "count": len(matches)}
raise ValueError(f"Unknown tool: {name}")
def run_read_only_loop(task: str, read_only_tools: list, system: str, max_iterations: int = 8) -> str:
"""Run a bounded read-only agentic loop until Claude returns the final plan."""
messages = [{"role": "user", "content": task}]
for _ in range(max_iterations):
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=8192,
system=system,
tools=read_only_tools,
messages=messages,
)
messages.append({"role": "assistant", "content": response.content})
tool_results = []
text_blocks = []
for block in response.content:
if block.type == "tool_use":
tool_output = execute_read_only_tool(block.name, block.input)
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": json.dumps(tool_output, indent=2)
})
elif block.type == "text":
text_blocks.append(block.text)
if tool_results:
messages.append({"role": "user", "content": tool_results})
continue
return "\n".join(text_blocks).strip()
raise RuntimeError("Plan mode exceeded the maximum number of iterations")
def plan_mode_agent(task: str, read_only_tools: list, workspace_root: str = ".") -> dict:
"""Agent in plan mode: it can only read and analyze, never modify files."""
global WORKSPACE_ROOT
WORKSPACE_ROOT = Path(workspace_root).resolve()
system = f"""You are in PLAN MODE. Your job is to analyze the codebase and produce
a detailed implementation plan. You can READ files and SEARCH, but you CANNOT write
or execute commands.
Task: {task}
Output a structured plan with:
1. Files to modify (with specific line ranges)
2. New files to create (with purpose)
3. Tests to add/modify
4. Execution order (dependencies between changes)
5. Risks and rollback strategy"""
plan = run_read_only_loop(task, read_only_tools, system)
return {
"plan": plan,
"status": "awaiting_approval",
"workspace_root": str(WORKSPACE_ROOT)
}
if __name__ == "__main__":
result = plan_mode_agent(
task="Refactor the auth module to use JWT instead of sessions.",
read_only_tools=READ_ONLY_TOOLS,
workspace_root=".",
)
print(json.dumps(result, indent=2))
1.2 Plan Approval Flow
import anthropic
import json
client = anthropic.Anthropic()
def plan_then_execute(task: str, all_tools: list, read_only_tools: list) -> str:
"""Two-phase approach: plan first, execute after approval."""
# Phase 1: Plan (read-only)
plan_result = plan_mode_agent(task, read_only_tools)
print("=== PROPOSED PLAN ===")
print(plan_result["plan"])
# Phase 2: Human approval gate
approval = input("\nApprove this plan? (yes/no/modify): ").strip().lower()
if approval == "no":
return "Plan rejected by user."
elif approval == "modify":
modifications = input("What should be changed? ")
task = f"{task}\n\nUser modifications to plan: {modifications}"
# Phase 3: Execute with full tool access
execute_system = f"""Execute the following plan. You now have full tool access.
Make changes file-by-file, running tests after each significant change.
Plan to execute:
{plan_result['plan']}"""
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=8192,
system=execute_system,
tools=all_tools, # Full access: Read, Write, Edit, Bash
messages=[{"role": "user", "content": "Execute the approved plan."}]
)
return run_full_loop(response, all_tools, execute_system)
2. Iterative Refinement
2.1 Test-Driven Refinement Loops
The most reliable pattern for code changes is a test-driven loop: write/update code → run tests → if tests fail, analyze and fix → repeat until green. This provides an objective completion signal (tests pass) rather than relying on the model’s judgment:
import anthropic
import json
import subprocess
client = anthropic.Anthropic()
def test_driven_refinement(task: str, test_command: str, max_iterations: int = 5) -> dict:
"""Iterative refinement loop driven by test results."""
system = f"""You are implementing: {task}
Your workflow:
1. Make the code change
2. I will run tests and show you the results
3. If tests pass: you're done
4. If tests fail: analyze the failure, fix, and we'll test again
Rules:
- Make minimal, focused changes
- Fix the root cause, not symptoms
- If you're stuck after 3 attempts, explain the blocker"""
messages = [{"role": "user", "content": f"Implement: {task}"}]
iterations = []
for i in range(max_iterations):
# Agent makes changes
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=4096,
system=system,
tools=all_tools,
messages=messages
)
# Run agent's tool calls (edits)
messages = run_tool_calls(response, messages)
# Run tests
test_result = subprocess.run(
test_command.split(),
capture_output=True, text=True, timeout=60
)
test_output = f"Exit code: {test_result.returncode}\n"
test_output += f"STDOUT:\n{test_result.stdout[-2000:]}\n"
if test_result.stderr:
test_output += f"STDERR:\n{test_result.stderr[-1000:]}"
iterations.append({
"iteration": i + 1,
"tests_passed": test_result.returncode == 0,
"output_preview": test_output[:500]
})
if test_result.returncode == 0:
return {"success": True, "iterations": iterations}
# Feed test results back to agent
messages.append({
"role": "user",
"content": f"Tests failed (iteration {i+1}/{max_iterations}):\n\n{test_output}\n\nAnalyze the failure and fix it."
})
return {"success": False, "iterations": iterations, "message": "Max iterations reached"}
2.2 Quality Gates
import subprocess
def run_quality_gates(project_dir: str) -> dict:
"""Run all quality gates — tests, linting, type checking, security scan."""
gates = [
{"name": "unit_tests", "cmd": "pytest tests/ -v --tb=short"},
{"name": "linting", "cmd": "ruff check ."},
{"name": "type_check", "cmd": "mypy src/ --strict"},
{"name": "security", "cmd": "bandit -r src/ -ll"}
]
results = {}
all_passed = True
for gate in gates:
result = subprocess.run(
gate["cmd"].split(),
capture_output=True, text=True,
cwd=project_dir, timeout=120
)
passed = result.returncode == 0
results[gate["name"]] = {
"passed": passed,
"output": result.stdout[-1000:] if not passed else "OK"
}
if not passed:
all_passed = False
return {"all_passed": all_passed, "gates": results}
Startup’s Journey to Agent CI/CD
A YC startup went from deploying agents via copy-paste to a full CI/CD pipeline in 2 weeks. Key insight: treating prompts as versioned artifacts (not code comments) and running regression tests on every PR caught 3 production-breaking prompt changes before they shipped.
3. Headless CI/CD Integration
3.1 GitHub Actions with Claude Code
Claude Code can run headlessly in CI/CD pipelines. The --print flag outputs the result to stdout without interactive prompts, and --allowedTools controls permissions programmatically:
# GitHub Actions workflow using Claude Code headless
# .github/workflows/ai-review.yml
name: AI Code Review
on:
pull_request:
types: [opened, synchronize]
jobs:
review:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Get PR diff
run: |
git diff origin/main...HEAD > pr_diff.patch
- name: Claude Code Review
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
run: |
claude --print \
--allowedTools "Read,Bash" \
--model claude-sonnet-4-6 \
"Review the changes in pr_diff.patch. Focus on:
1. Security vulnerabilities
2. Performance issues
3. Missing error handling
Output as structured markdown."
- name: Post review comment
uses: actions/github-script@v7
with:
script: |
// Post Claude's review as a PR comment
3.2 Automated Code Fixes
# Headless agent that fixes linting issues and commits
# Used in CI when linting fails
claude --print \
--allowedTools "Read,Write,Edit,Bash" \
--model claude-sonnet-4-6 \
"Run 'ruff check . --output-format json' to find linting issues.
Fix each issue by editing the source files.
After fixing, run 'ruff check .' again to verify.
Only fix auto-fixable issues (don't refactor logic)."
# Then commit the fixes
git add -A
git commit -m "fix: auto-fix linting issues [ci]"
--allowedTools for security, (4) CI/CD agents should have minimal permissions (principle of least privilege).
4. Claude Code CLI for CI/CD (CCA 11.4)
Claude Code can run non-interactively in CI/CD pipelines using the -p (print) flag. This transforms it from an interactive coding assistant into an automated agent that reads a task, executes it, and outputs structured results — perfect for automated code review, test generation, and PR feedback.
4.1 Non-Interactive Mode (-p / --print)
# -p / --print: Run Claude Code non-interactively (no human input)
# The agent reads the prompt, executes using available tools, prints result, exits
# Basic usage:
claude -p "Review this PR for security issues"
# With specific output format:
claude -p "Review src/auth.py" --output-format json
# Pipe input:
git diff main...HEAD | claude -p "Review this diff for bugs"
# Key behaviors in -p mode:
# - Tool calls still need explicit approval via --allowedTools (NOT auto-approved)
# - Without --allowedTools, tool calls that require permission abort the session
# - Reads CLAUDE.md + project context (same as interactive, unless --bare is set)
# - Exits with code 0 on success, non-zero on failure
# - PermissionRequest hooks do NOT fire in -p mode (use PreToolUse hooks instead)
# RECOMMENDED for CI: add --bare to skip auto-discovery of hooks/skills/MCP/CLAUDE.md
# This ensures reproducible behavior regardless of local machine config:
claude --bare -p "Review src/auth.py" --allowedTools "Read,Grep,Glob"
# --permission-mode dontAsk: auto-denies any tool not in --allowedTools (locked-down CI)
claude --bare -p "Summarize the API" --permission-mode dontAsk --allowedTools "Read,Grep"
# Continue the most recent session:
claude -p "Now check those files for SQL injection" --continue
# Resume a specific session by ID:
session_id=$(claude -p "Start review" --output-format json | jq -r '.session_id')
claude -p "Continue that review" --resume "$session_id"
4.2 Structured Output (--output-format json)
# --output-format json: Machine-parseable output for CI pipelines
# Combined with --json-schema: enforce specific output structure
# Example: PR review with structured JSON output
claude -p "Review this PR" --output-format json --json-schema '{
"type": "object",
"properties": {
"issues": {
"type": "array",
"items": {
"type": "object",
"properties": {
"file": {"type": "string"},
"line": {"type": "integer"},
"severity": {"type": "string", "enum": ["critical", "warning", "info"]},
"description": {"type": "string"},
"suggestion": {"type": "string"}
},
"required": ["file", "severity", "description"]
}
},
"summary": {"type": "string"},
"approve": {"type": "boolean"}
},
"required": ["issues", "summary", "approve"]
}'
# Output (stdout) — structured_output field holds the schema-validated object:
# {
# "session_id": "sess_01XYZ...",
# "result": "I found 1 critical security issue...",
# "total_cost_usd": 0.0042,
# "structured_output": {
# "issues": [
# {"file": "src/auth.py", "line": 45, "severity": "critical",
# "description": "SQL injection via string formatting",
# "suggestion": "Use parameterized queries"}
# ],
# "summary": "1 critical security issue found",
# "approve": false
# }
# }
# Parse with jq:
claude -p "Review this PR" \
--output-format json \
--json-schema '{...}' | jq '.structured_output'
# Check cost per invocation:
claude -p "Summarize the project" --output-format json | jq '.total_cost_usd'
# This JSON output can be parsed by GitHub Actions, GitLab CI, etc.
# to automatically post PR comments, block merges, or trigger alerts
4.3 Session Isolation & CLAUDE.md for CI
# Session isolation in CI — each run is independent (no state leakage)
# Important: CI runs should NOT share sessions with each other
# BEST PRACTICE: use --bare for consistent, reproducible CI runs
# --bare skips CLAUDE.md, hooks, skills, MCP servers, and plugins
# so local developer config never bleeds into CI results
git diff origin/main...HEAD > diff.txt
claude --bare -p "Review this diff for security issues: $(cat diff.txt)" \
--output-format json \
--allowedTools "Read,Grep,Glob" > review.json
# Prevent session transcript storage in CI (no sensitive data on disk):
claude --bare -p "Review PR" --no-session-persistence --output-format json
# CLAUDE.md for CI — special instructions for automated runs:
# Use --append-system-prompt to add CI-specific instructions WITHOUT --bare:
# gh pr diff "$1" | claude -p \
# --append-system-prompt "You are a security engineer. Review for vulnerabilities only." \
# --output-format json > review.json
#
# Or add a CI/CD section to your CLAUDE.md (read unless --bare is set):
# ## CI/CD Context
# When running in CI (non-interactive):
# - Do NOT make changes to files (review only)
# - Focus on: bugs, security, test coverage gaps
# - Output structured JSON for automated processing
# Duplicate PR comment avoidance:
# Problem: Every push to a PR triggers a new review → floods with comments
# Solution: Use unique identifiers in comments + check before posting
flowchart TD
GEN["Generate Review
claude -p ... > review.json"] --> HASH["Hash Content
sha256sum review.json"]
HASH --> CHECK{"Hash already
posted?"}
CHECK -->|"Yes"| SKIP["Skip — no duplicate"]
CHECK -->|"No"| POST["Post PR Comment
+ Store hash"]
4.4 Routines API (Programmatic Invocation)
Routines are saved Claude Code configurations (a prompt + repositories + connectors) that run on Anthropic-managed cloud infrastructure. The API trigger gives a routine a dedicated HTTP endpoint — POSTing to it starts a new session and returns a session URL. This is how you wire Claude Code into alerting systems, deploy pipelines, and webhooks:
# Routines API trigger — fire a saved routine via HTTP POST
# Prerequisites: Create the routine at claude.ai/code/routines, add an API trigger,
# then generate a bearer token from the trigger modal.
# Fire a routine (shell):
curl -X POST https://api.anthropic.com/v1/claude_code/routines/trig_01ABCDEFGHJKLMNOPQRSTUVW/fire \
-H "Authorization: Bearer sk-ant-oat01-xxxxx" \
-H "anthropic-beta: experimental-cc-routine-2026-04-01" \
-H "anthropic-version: 2023-06-01" \
-H "Content-Type: application/json" \
-d '{"text": "Sentry alert SEN-4521 fired in prod. Stack trace attached."}'
# Response — returns the new session ID and URL:
# {
# "type": "routine_fire",
# "claude_code_session_id": "session_01HJKLMNOPQRSTUVWXYZ",
# "claude_code_session_url": "https://claude.ai/code/session_01HJKLMNOPQRSTUVWXYZ"
# }
# The 'text' field passes run-specific context alongside the routine's saved prompt.
# Open session_url in browser to watch the run in real time.
# Note: requires a claude.ai subscription login (NOT a Console API key)
import requests
# Fire a Routine via API from backend Python code
# Use cases:
# - Monitoring alert fires → trigger incident analysis routine
# - CD pipeline completes → trigger smoke-test routine
# - PR opened → trigger code-review routine
# - Slack bot receives message → trigger task-implementation routine
ROUTINE_ENDPOINT = "https://api.anthropic.com/v1/claude_code/routines/trig_01XXXX/fire"
ROUTINE_TOKEN = "sk-ant-oat01-xxxxx" # Store in secrets manager, not in code
def fire_routine(context_text: str) -> dict:
"""Trigger a Routines API endpoint with optional context."""
response = requests.post(
ROUTINE_ENDPOINT,
headers={
"Authorization": f"Bearer {ROUTINE_TOKEN}",
"anthropic-beta": "experimental-cc-routine-2026-04-01",
"anthropic-version": "2023-06-01",
"Content-Type": "application/json",
},
json={"text": context_text}, # Optional: run-specific context
timeout=30,
)
response.raise_for_status()
data = response.json()
# data["claude_code_session_id"] — for tracking
# data["claude_code_session_url"] — open in browser to watch
return data
# Example: fire routine when a test suite fails
def on_test_failure(test_output: str):
result = fire_routine(f"Test suite failed:\n{test_output[:2000]}")
print(f"Debugging session started: {result['claude_code_session_url']}")
# Note: the /fire endpoint is experimental (beta header required).
# API token is scoped to triggering THAT routine only — one token per routine.
# Rotate or revoke from the routine's edit modal at claude.ai/code/routines.
-p mode: Use -p for ephemeral, scripted tasks that run in your own CI infrastructure. Use Routines API when you need cloud-managed execution (works when your laptop is closed), recurring schedules, GitHub event triggers, or webhook-driven automation from alerting tools.
-p / --print flag enables non-interactive mode. (2) Tool calls in -p mode still require explicit --allowedTools — they are NOT auto-approved. (3) --bare skips CLAUDE.md/hooks/MCP for reproducible CI runs. (4) --output-format json + --json-schema puts structured output in the structured_output field. (5) --permission-mode dontAsk auto-denies any non-allowed tool (locked-down CI). (6) Routines API fires cloud-managed sessions via HTTP POST with a bearer token.
Next in the SDK Track
In Part 10: Prompt Precision & Few-Shot, we begin CCA Domain 4 — system prompt engineering, few-shot examples for consistent output, role-based prompts, and techniques for steering model behavior. Covers CCA Tasks 4.1 and 4.2.