RAG Systems - LangChain

RAG Systems with LangChain

Building retrieval-augmented generation systems that combine LLMs with proprietary documents for accurate, contextual responses grounded in specific knowledge.

Overview

RAG systems solve the problem of LLM hallucinations by retrieving relevant documents before generation. LangChain simplifies this with document loaders, vector stores, and retrieval chains.

Basic RAG Pipeline

Python - Simple RAG Setup

from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA

# 1. Load documents
loader = PyPDFLoader("documents/company_policies.pdf")
documents = loader.load()

# 2. Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
)
splits = text_splitter.split_documents(documents)

# 3. Create embeddings and vector store
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(splits, embeddings)

# 4. Create retrieval chain
llm = ChatOpenAI(model="gpt-4")
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(search_kwargs={"k": 5}),
)

# 5. Query
result = qa_chain({"query": "What is our vacation policy?"})

Multi-Document RAG with Metadata

Python - Advanced RAG System

from langchain_community.document_loaders import DirectoryLoader
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.prompts import PromptTemplate

# Load multiple documents with metadata
loader = DirectoryLoader(
    path="documents/",
    glob="**/*.pdf",
    loader_cls=PyPDFLoader,
)
documents = loader.load()

# Add metadata
for doc in documents:
    doc.metadata["source_type"] = "company_policy"
    doc.metadata["department"] = extract_department(doc.metadata["source"])

# Split and embed
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
splits = text_splitter.split_documents(documents)

# Use persistent vector store (Chroma)
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=OpenAIEmbeddings(),
    persist_directory="./chroma_db",
)

# Create RAG chain with sources
llm = ChatOpenAI(model="gpt-4")
qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
    return_source_documents=True,
)

# Query with source tracking
result = qa_chain({"question": "What is the policy on remote work?"})
print(f"Answer: {result['answer']}")
print(f"Sources: {result['sources']}")

Custom Retrieval Chain

Python - Custom Retrieval Logic

from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.schema import Document
from typing import List

# Custom retrieval with filtering
def retrieve_with_filter(query: str, vectorstore, filters: dict = None) -> List[Document]:
    """Retrieve documents with optional metadata filtering"""
    
    # Search in vector store
    docs = vectorstore.similarity_search_with_score(query, k=10)
    
    # Post-filter by metadata
    if filters:
        docs = [
            (doc, score) for doc, score in docs
            if all(doc.metadata.get(k) == v for k, v in filters.items())
        ]
    
    return [doc for doc, _ in docs[:5]]

# Custom prompt
qa_prompt = PromptTemplate(
    template="""Use the following context to answer the question.
If you don't know the answer, say "I don't have information about this."
Include relevant policy numbers when applicable.

Context:
{context}

Question: {question}

Answer:""",
    input_variables=["context", "question"],
)

# Create custom chain
llm = ChatOpenAI(model="gpt-4")
qa_chain = LLMChain(llm=llm, prompt=qa_prompt)

# Execute with custom retrieval
query = "What is the professional development budget?"
docs = retrieve_with_filter(
    query,
    vectorstore,
    filters={"department": "HR", "topic": "benefits"}
)
context = "\n".join([f"{d.page_content}" for d in docs])

answer = qa_chain.run(context=context, question=query)

Hybrid Search with Re-ranking

Python - Hybrid RAG with Re-ranking

from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain_cohere import CohereRerank
from langchain.retrievers.document_compressors import CohereRerank as LangchainCohereRerank
from langchain.retrievers import ContextualCompressionRetriever

# Create multiple retrievers
vector_retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
bm25_retriever = BM25Retriever.from_documents(splits)

# Ensemble retriever (combines multiple sources)
ensemble_retriever = EnsembleRetriever(
    retrievers=[vector_retriever, bm25_retriever],
    weights=[0.5, 0.5],
)

# Add re-ranking to compress results
compressor = LangchainCohereRerank(model="rerank-english-v2.0", top_n=5)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=ensemble_retriever,
)

# Use compressed retriever in QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=compression_retriever,
)

# Query
result = qa_chain({"query": "Tell me about benefits"})

Dynamic Context Window

Python - Adaptive Retrieval

from langchain.chains.question_answering import load_qa_chain

# Multi-step RAG for complex questions
class AdaptiveRAG:
    def __init__(self, vectorstore, llm):
        self.vectorstore = vectorstore
        self.llm = llm
        self.retrieval_chain = load_qa_chain(
            llm, chain_type="stuff"
        )
    
    def answer_question(self, question: str) -> dict:
        # Step 1: Retrieve initial documents
        initial_docs = self.vectorstore.similarity_search(question, k=5)
        
        # Step 2: Generate answer with initial context
        answer = self.retrieval_chain.run(
            input_documents=initial_docs,
            question=question
        )
        
        # Step 3: If answer contains uncertainty, retrieve more docs
        if "uncertain" in answer.lower() or "unclear" in answer.lower():
            # Expand search
            expanded_docs = self.vectorstore.similarity_search(question, k=10)
            answer = self.retrieval_chain.run(
                input_documents=expanded_docs,
                question=question
            )
        
        return {
            "answer": answer,
            "docs_used": len(initial_docs),
            "confidence": self.estimate_confidence(answer)
        }
    
    def estimate_confidence(self, answer: str) -> float:
        """Estimate confidence based on answer language"""
        uncertainty_markers = ["possibly", "might", "unclear", "insufficient"]
        marker_count = sum(
            1 for marker in uncertainty_markers
            if marker in answer.lower()
        )
        return max(0, 1 - (marker_count * 0.1))

# Usage
rag = AdaptiveRAG(vectorstore, llm)
result = rag.answer_question("What benefits are available for employees?")

Document Ingestion Pipeline

Python - Batch Document Processing

from langchain_community.document_loaders import (
    PyPDFLoader,
    CSVLoader,
    JSONLoader,
    WebBaseLoader,
)
import os

class DocumentIngestionPipeline:
    def __init__(self, vectorstore):
        self.vectorstore = vectorstore
        self.loaders = {
            ".pdf": PyPDFLoader,
            ".csv": CSVLoader,
            ".json": JSONLoader,
        }
    
    def ingest_directory(self, directory: str, **kwargs):
        """Ingest all documents from directory"""
        documents = []
        
        for filename in os.listdir(directory):
            filepath = os.path.join(directory, filename)
            ext = os.path.splitext(filename)[1]
            
            if ext in self.loaders:
                loader_class = self.loaders[ext]
                loader = loader_class(filepath, **kwargs)
                docs = loader.load()
                documents.extend(docs)
        
        # Process and add to vectorstore
        self.add_documents(documents)
        return len(documents)
    
    def ingest_urls(self, urls: list):
        """Ingest web documents"""
        documents = []
        for url in urls:
            loader = WebBaseLoader(url)
            docs = loader.load()
            documents.extend(docs)
        
        self.add_documents(documents)
        return len(documents)
    
    def add_documents(self, documents: list):
        """Add documents to vector store"""
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
        )
        splits = text_splitter.split_documents(documents)
        self.vectorstore.add_documents(splits)

# Usage
pipeline = DocumentIngestionPipeline(vectorstore)
count = pipeline.ingest_directory("data/policies/")
print(f"Ingested {count} documents")

Best Practices

Chunk documents appropriately (500-1000 tokens typically)
Include metadata for filtering and source tracking
Use hybrid retrieval (vector + BM25) for better results
Implement re-ranking for best document ordering
Add confidence scoring and uncertainty handling
Use persistent vector stores for scalability
Monitor retrieval quality and iterate on chunking strategy