What You’ll Learn: RAG (Retrieval-Augmented Generation) is the #1 pattern in production LLM apps — it grounds AI responses in your actual data instead of relying on training knowledge. LangChain provides the complete RAG toolkit: document loaders, text splitters, embedding models, vector stores, and retrieval chains. This article builds a production-quality RAG system step by step.
1. Document Loaders
SDK Track Note: This is the
LangChain SDK Track — a hands-on companion to
Foundation Track Part 5 (RAG). Read that article first for concepts, then come here for LangChain-specific implementation.
1.1 PDF & Office Documents
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain_community.document_loaders import UnstructuredExcelLoader
# Load a PDF (one Document per page)
pdf_loader = PyPDFLoader("report.pdf")
pdf_docs = pdf_loader.load()
print(f"Loaded {len(pdf_docs)} pages")
print(pdf_docs[0].page_content[:200])
print(pdf_docs[0].metadata) # {'source': 'report.pdf', 'page': 0}
# Load a Word document
docx_loader = Docx2txtLoader("proposal.docx")
docx_docs = docx_loader.load()
# Load Excel
excel_loader = UnstructuredExcelLoader("data.xlsx")
excel_docs = excel_loader.load()
1.2 Web & API Sources
from langchain_community.document_loaders import WebBaseLoader, RecursiveUrlLoader
from langchain_community.document_loaders import GitHubIssuesLoader
# Single page
web_loader = WebBaseLoader("https://docs.python.org/3/tutorial/index.html")
web_docs = web_loader.load()
# Recursive crawl (with depth limit)
recursive_loader = RecursiveUrlLoader(
url="https://docs.example.com/",
max_depth=2,
extractor=lambda x: x.get_text()
)
crawled_docs = recursive_loader.load()
print(f"Crawled {len(crawled_docs)} pages")
1.3 Directory Loader
from langchain_community.document_loaders import DirectoryLoader, TextLoader
# Load all .md files from a directory
dir_loader = DirectoryLoader(
"./docs/",
glob="**/*.md",
loader_cls=TextLoader,
show_progress=True
)
all_docs = dir_loader.load()
print(f"Loaded {len(all_docs)} documents from ./docs/")
2. Text Splitters
2.1 RecursiveCharacterTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len,
separators=["\n\n", "\n", ". ", " ", ""]
)
chunks = splitter.split_documents(pdf_docs)
print(f"Split {len(pdf_docs)} docs into {len(chunks)} chunks")
print(f"Average chunk size: {sum(len(c.page_content) for c in chunks) // len(chunks)} chars")
2.2 SemanticChunker
from langchain_experimental.text_splitter import SemanticChunker
from langchain_openai import OpenAIEmbeddings
# Split based on semantic similarity between sentences
semantic_splitter = SemanticChunker(
OpenAIEmbeddings(),
breakpoint_threshold_type="percentile",
breakpoint_threshold_amount=95
)
semantic_chunks = semantic_splitter.split_documents(pdf_docs)
print(f"Semantic split: {len(semantic_chunks)} chunks")
2.3 Code Splitters
from langchain_text_splitters import Language, RecursiveCharacterTextSplitter
python_splitter = RecursiveCharacterTextSplitter.from_language(
language=Language.PYTHON,
chunk_size=2000,
chunk_overlap=200
)
# Respects Python syntax boundaries (functions, classes)
code_chunks = python_splitter.split_text(source_code)
Real-World Application
Internal Developer Documentation
A 200-engineer company built a RAG system over their internal docs (Confluence, GitHub READMEs, Slack threads). Engineers ask questions like “How do I deploy to staging?” and get answers sourced from the latest documentation. Result: 50% reduction in repetitive questions in #help channels, and answers stay current as docs are updated.
RAG PipelineKnowledge BaseSource Citations
4. Vectorstore Integrations
4.1 Chroma (Local Development)
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
# Create from documents
vectorstore = Chroma.from_documents(
documents=chunks,
embedding=embeddings,
persist_directory="./chroma_db",
collection_name="my_docs"
)
# Similarity search
results = vectorstore.similarity_search("What is RAG?", k=4)
for doc in results:
print(f"[{doc.metadata.get('source', '?')}] {doc.page_content[:100]}...")
4.2 FAISS (High Performance)
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
# Create FAISS index
vectorstore = FAISS.from_documents(chunks, embeddings)
# Save / Load
vectorstore.save_local("faiss_index")
loaded_vs = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
# Search with scores
results_with_scores = vectorstore.similarity_search_with_score("query", k=5)
for doc, score in results_with_scores:
print(f"Score: {score:.4f} | {doc.page_content[:80]}...")
4.3 Pinecone (Cloud-Native)
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone
# Initialize Pinecone client
pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])
index = pc.Index("my-index")
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
# Create vectorstore from existing index
vectorstore = PineconeVectorStore(
index=index,
embedding=embeddings,
text_key="text"
)
# Add documents
vectorstore.add_documents(chunks)
# Search with metadata filtering
results = vectorstore.similarity_search(
"machine learning",
k=5,
filter={"source": "textbook.pdf"}
)
6. RAG Chain Patterns
6.1 Basic RetrievalQA
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
# Setup
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
model = ChatOpenAI(model="gpt-4o-mini", temperature=0)
# RAG prompt
rag_prompt = ChatPromptTemplate.from_template("""Answer based on the context below.
If the answer is not in the context, say "I don't know."
Context: {context}
Question: {question}
Answer:""")
# Format retrieved docs
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
# RAG chain
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| rag_prompt
| model
| StrOutputParser()
)
answer = rag_chain.invoke("What are the benefits of RAG over fine-tuning?")
print(answer)
6.2 Conversational RAG (with History)
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.messages import HumanMessage, AIMessage
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
model = ChatOpenAI(model="gpt-4o-mini", temperature=0)
# Contextualize question based on chat history
contextualize_prompt = ChatPromptTemplate.from_messages([
("system", "Reformulate the question to be standalone given the chat history."),
MessagesPlaceholder("chat_history"),
("human", "{input}")
])
# RAG answer prompt
answer_prompt = ChatPromptTemplate.from_messages([
("system", "Answer based on the context:\n\n{context}"),
MessagesPlaceholder("chat_history"),
("human", "{input}")
])
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
# Conversational RAG chain
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
history_aware_retriever = create_history_aware_retriever(model, retriever, contextualize_prompt)
qa_chain = create_stuff_documents_chain(model, answer_prompt)
conversational_rag = create_retrieval_chain(history_aware_retriever, qa_chain)
# Use with history
chat_history = []
result = conversational_rag.invoke({"input": "What is RAG?", "chat_history": chat_history})
chat_history.extend([HumanMessage(content="What is RAG?"), AIMessage(content=result["answer"])])
result2 = conversational_rag.invoke({"input": "How does it compare to fine-tuning?", "chat_history": chat_history})
print(result2["answer"])
Summary & Next Steps
This completes the LangChain SDK implementation for the concepts covered in Part 5: Retrieval-Augmented Generation.
Try It Yourself: Build a ‘company knowledge base’ RAG system: (1) load 5+ documents (mix of PDF, markdown, web pages), (2) split into chunks with proper overlap, (3) embed and store in Chroma, (4) build a retrieval chain that answers questions with source citations, (5) test with 10 questions and manually evaluate whether the retrieved context supports the answer.
Related Articles
Foundation: Part 5: Retrieval-Augmented Generation
The framework-agnostic concepts behind this article.
Read Article
LC Part 1: Setup, Chains & LCEL
Previous article in the LangChain SDK Track.
Read Article