1. Knowledge System
CrewAI’s knowledge system allows agents to access and reason over external information sources. Knowledge is embedded, chunked, and stored in a vector database for semantic retrieval during task execution.
1.1 Agent with Knowledge Sources
from crewai import Agent, Task, Crew
from crewai.knowledge.source.pdf_knowledge_source import PDFKnowledgeSource
from crewai.knowledge.source.csv_knowledge_source import CSVKnowledgeSource
from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource
# Define knowledge sources
company_docs = PDFKnowledgeSource(
file_paths=["./docs/company-handbook.pdf", "./docs/product-guide.pdf"]
)
customer_data = CSVKnowledgeSource(
file_paths=["./data/customers.csv"],
metadata={"type": "customer_data", "updated": "2026-05"}
)
policies = TextFileKnowledgeSource(
file_paths=["./docs/return-policy.txt", "./docs/shipping-policy.txt"]
)
# Agent with knowledge sources attached
support_agent = Agent(
role="Customer Support Specialist",
goal="Provide accurate answers using company knowledge base",
backstory="""Expert customer support agent with deep knowledge of
company products, policies, and procedures. Always references
official documentation when answering questions.""",
knowledge_sources=[company_docs, customer_data, policies],
verbose=True
)
task = Task(
description="A customer asks: 'What is your return policy for electronics purchased more than 30 days ago?'",
expected_output="Accurate answer citing the specific policy with relevant details",
agent=support_agent
)
crew = Crew(agents=[support_agent], tasks=[task])
result = crew.kickoff()
print(result.raw)
1.2 URL and Custom Knowledge Sources
from crewai import Agent, Task, Crew
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
# String-based knowledge (inline content)
faq_knowledge = StringKnowledgeSource(
content="""
Q: What are your business hours?
A: Monday-Friday 9am-6pm EST, Saturday 10am-4pm EST, closed Sundays.
Q: How long does shipping take?
A: Standard shipping: 5-7 business days. Express: 2-3 business days.
Overnight available for orders placed before 2pm EST.
Q: What payment methods do you accept?
A: Visa, Mastercard, American Express, PayPal, Apple Pay, Google Pay.
""",
metadata={"source": "FAQ", "version": "2026-05"}
)
# Crew-level knowledge (shared across all agents)
crew = Crew(
agents=[
Agent(
role="FAQ Bot",
goal="Answer customer questions from the FAQ",
backstory="Friendly customer service representative",
verbose=True
)
],
tasks=[
Task(
description="Answer: What payment methods do you accept and what are your hours?",
expected_output="Concise answer covering both questions",
agent=None # Will be assigned to first agent
)
],
knowledge_sources=[faq_knowledge] # Crew-level knowledge
)
# Assign task to agent
crew.tasks[0].agent = crew.agents[0]
result = crew.kickoff()
print(result.raw)
2. Memory Systems
CrewAI provides three types of memory that give agents context awareness and learning capabilities:
| Memory Type | Scope | Persistence | Use Case |
|---|---|---|---|
| Short-Term | Current execution | Cleared after kickoff | Share context between agents in a run |
| Long-Term | Across executions | Persistent (database) | Learn from past runs, improve over time |
| Entity | Across executions | Persistent (database) | Track people, orgs, concepts mentioned |
2.1 Short-Term Memory
from crewai import Agent, Task, Crew
# Short-term memory is enabled by default when memory=True
# It allows agents to share context within a single crew execution
researcher = Agent(
role="Researcher",
goal="Find key facts about a topic",
backstory="Expert at discovering information",
verbose=True
)
writer = Agent(
role="Writer",
goal="Write compelling content based on research",
backstory="Creative writer who transforms research into articles",
verbose=True
)
research_task = Task(
description="Research the top 3 benefits of remote work in 2026",
expected_output="List of 3 benefits with supporting data",
agent=researcher
)
writing_task = Task(
description="Write a short blog post based on the research findings",
expected_output="A 200-word blog post about remote work benefits",
agent=writer,
context=[research_task] # Short-term memory: writer sees researcher's output
)
crew = Crew(
agents=[researcher, writer],
tasks=[research_task, writing_task],
memory=True # Enable memory system
)
result = crew.kickoff()
print(result.raw)
2.2 Long-Term Memory
from crewai import Agent, Task, Crew
# Long-term memory persists across multiple crew executions
# Agents learn from past successes and failures
analyst = Agent(
role="Market Analyst",
goal="Provide increasingly accurate market analysis over time",
backstory="""Senior market analyst who improves predictions by
learning from past analyses and their outcomes.""",
verbose=True
)
analysis_task = Task(
description="Analyze the current AI market trends and predict Q3 growth",
expected_output="Market analysis with growth prediction and confidence level",
agent=analyst
)
crew = Crew(
agents=[analyst],
tasks=[analysis_task],
memory=True, # Enable all memory types
verbose=True
)
# First run — agent has no history
result1 = crew.kickoff()
print("Run 1:", result1.raw[:200])
# Subsequent runs — agent can reference past analyses
# Long-term memory stores task results and agent reasoning
result2 = crew.kickoff()
print("Run 2:", result2.raw[:200])
2.3 Entity Memory
from crewai import Agent, Task, Crew
# Entity memory tracks people, organizations, and concepts
# across conversations and executions
account_manager = Agent(
role="Account Manager",
goal="Maintain detailed knowledge of client relationships",
backstory="""Experienced account manager who remembers client
preferences, past interactions, and relationship history.""",
verbose=True
)
# First interaction — entities are created
task1 = Task(
description="""Process this meeting note: 'Met with Sarah Chen, CTO of TechFlow Inc.
She's interested in our Enterprise plan. Budget is $50K/year.
Follow up next Tuesday about API integration requirements.'""",
expected_output="Structured summary with action items",
agent=account_manager
)
crew = Crew(
agents=[account_manager],
tasks=[task1],
memory=True # Entity memory tracks Sarah Chen, TechFlow Inc
)
result = crew.kickoff()
print(result.raw)
# Entity memory now stores: Sarah Chen (CTO), TechFlow Inc (company),
# Enterprise plan (interest), $50K budget, API integration (need)
Knowledge-Powered Legal Assistant
A law firm loaded 10,000 case files as knowledge sources for their CrewAI agent. Combined with long-term memory (which remembers each attorney’s preferences and past research), the agent provides increasingly relevant case recommendations over time. After 3 months of use, recommendation acceptance rate grew from 60% to 88%.
3. File Handling
CrewAI tasks can read input files and write output files. This enables document processing pipelines where each stage produces artifacts for the next.
3.1 File-Processing Crew
from crewai import Agent, Task, Crew
# Agent that processes files and produces file outputs
data_processor = Agent(
role="Data Processing Engineer",
goal="Transform raw data into structured reports",
backstory="Expert at data cleaning, transformation, and report generation",
verbose=True
)
report_writer = Agent(
role="Report Writer",
goal="Create polished reports from processed data",
backstory="Technical writer specializing in data-driven reports",
verbose=True
)
# Task with file output
processing_task = Task(
description="""Process the sales data:
1. Clean and validate the entries
2. Calculate monthly totals
3. Identify top performers
Save the processed results.""",
expected_output="Cleaned data summary with monthly totals and top performers",
agent=data_processor,
output_file="output/processed_data.md" # Auto-saves result to file
)
# Task that builds on file output
report_task = Task(
description="Create an executive summary report from the processed data",
expected_output="Executive summary with key metrics and recommendations",
agent=report_writer,
context=[processing_task],
output_file="output/executive_report.md" # Final report saved to file
)
crew = Crew(
agents=[data_processor, report_writer],
tasks=[processing_task, report_task]
)
result = crew.kickoff()
print(f"Final report:\n{result.raw}")
# Files saved: output/processed_data.md and output/executive_report.md
output_file auto-creates directories if they don’t exist. (2) Files are overwritten on each run. (3) Use timestamps in filenames for versioning: output_file=f"reports/report_{date}.md". (4) Combine with output_json or output_pydantic for structured file output.
4. Agent Training
CrewAI supports training crews to improve performance over time. Training iterates on agent behavior using human feedback to produce better results.
4.1 Training Workflow
# Train a crew for N iterations with human feedback
crewai train -n 5
# This will:
# 1. Run the crew
# 2. Show you the results
# 3. Ask for feedback (score 1-10 + comments)
# 4. Store feedback in training data
# 5. Repeat N times
# 6. Agents improve based on accumulated feedback
from crewai import Agent, Task, Crew
# Define a crew that can be trained
email_writer = Agent(
role="Email Copywriter",
goal="Write compelling marketing emails that drive conversions",
backstory="""Expert email marketer with 10 years experience.
Writes subject lines with 40%+ open rates and CTAs with 5%+ click rates.""",
verbose=True
)
email_task = Task(
description="""Write a marketing email for a SaaS product launch:
- Product: AI-powered project management tool
- Target: Engineering managers at mid-size companies
- Goal: Get them to sign up for a free trial
- Tone: Professional but approachable""",
expected_output="Complete email with subject line, body, and CTA",
agent=email_writer
)
crew = Crew(
agents=[email_writer],
tasks=[email_task]
)
# Programmatic training (alternative to CLI)
# crew.train(n_iterations=5, filename="training_data.pkl")
# Normal execution (uses training data if available)
result = crew.kickoff()
print(result.raw)
.crewai/.
5. Skills System
Skills inject domain expertise into agents without modifying their core configuration. They’re packaged knowledge modules that agents can reference during execution.
5.1 Agent with Custom Skill
from crewai import Agent, Task, Crew
# Skills are typically defined in YAML/configuration and loaded automatically.
# Here's how to inject domain knowledge directly:
# Method 1: Backstory as inline skill (simple approach)
security_expert = Agent(
role="Security Code Reviewer",
goal="Identify security vulnerabilities in code",
backstory="""You are a OWASP Top 10 security expert. You check for:
- SQL Injection (parameterized queries required)
- XSS (input sanitization, output encoding)
- CSRF (token validation)
- Authentication flaws (bcrypt, rate limiting)
- Sensitive data exposure (encryption at rest/transit)
- Broken access control (RBAC checks)
Always cite the specific OWASP category for each finding.""",
verbose=True
)
# Method 2: Knowledge source as skill (for extensive domain knowledge)
from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource
security_guidelines = TextFileKnowledgeSource(
file_paths=["./skills/owasp-top-10.txt", "./skills/secure-coding-standards.txt"]
)
security_expert_v2 = Agent(
role="Security Code Reviewer",
goal="Identify and remediate security vulnerabilities",
backstory="OWASP-certified security expert",
knowledge_sources=[security_guidelines],
verbose=True
)
review_task = Task(
description="""Review this code for security issues:
```python
def login(username, password):
query = f"SELECT * FROM users WHERE name='{username}' AND pass='{password}'"
user = db.execute(query)
return user
```""",
expected_output="Security findings with OWASP category, severity, and fix",
agent=security_expert
)
crew = Crew(agents=[security_expert], tasks=[review_task])
result = crew.kickoff()
print(result.raw)
Next in the CrewAI SDK Track
In Part 10: Planning, Reasoning & Testing, we’ll enable agent planning for step-by-step task decomposition, implement reasoning capabilities, write systematic tests for crews, and use replay for debugging.