How to use LangSmith to trace RAG pipeline
LangSmithTracer.Setup
pip install langchain langsmith openai faiss-cpu OPENAI_API_KEYLANGSMITH_API_KEY import os
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langsmith import LangSmithTracer
from langchain.schema import Document Examples
Integration steps
- Install LangSmith and LangChain SDKs and set environment variables for API keys.
- Initialize your retriever (e.g., FAISS) and LLM (e.g., OpenAI) for the RAG pipeline.
- Create the RetrievalQA chain with your retriever and LLM.
- Instantiate LangSmithTracer with your LangSmith API key.
- Attach the LangSmithTracer to your LangChain pipeline using the tracer parameter.
- Run your query through the pipeline and observe the trace in the LangSmith dashboard.
Full code
import os
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langsmith import LangSmithTracer
from langchain.schema import Document
# Setup environment variables
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
LANGSMITH_API_KEY = os.environ["LANGSMITH_API_KEY"]
# Initialize embeddings and vector store
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
docs = [
Document(page_content="LangSmith is a platform for tracing LangChain workflows."),
Document(page_content="Retrieval-augmented generation combines retrieval with LLMs.")
]
vectorstore = FAISS.from_documents(docs, embeddings)
# Initialize retriever
retriever = vectorstore.as_retriever()
# Initialize LLM
llm = OpenAI(openai_api_key=OPENAI_API_KEY, model_name="gpt-4o")
# Create RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
# Initialize LangSmith tracer
tracer = LangSmithTracer(api_key=LANGSMITH_API_KEY)
# Attach tracer to the chain
qa_chain.tracer = tracer
# Query
query = "What is LangSmith?"
result = qa_chain.run(query)
print("Answer:", result) Answer: LangSmith is a platform for tracing LangChain workflows.
API trace
{"model": "gpt-4o", "messages": [{"role": "user", "content": "What is LangSmith?"}], "retriever": {"type": "faiss", "documents": [...]}} {"choices": [{"message": {"content": "LangSmith is a platform for tracing LangChain workflows."}}], "usage": {"total_tokens": 120}} response.choices[0].message.contentVariants
Streaming RAG with LangSmith Tracing ›
Use streaming to get partial answers in real-time while still tracing the RAG pipeline.
import os
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langsmith import LangSmithTracer
embeddings = OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"])
vectorstore = FAISS.load_local("my_faiss_index", embeddings)
retriever = vectorstore.as_retriever()
llm = OpenAI(openai_api_key=os.environ["OPENAI_API_KEY"], model_name="gpt-4o", streaming=True)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
tracer = LangSmithTracer(api_key=os.environ["LANGSMITH_API_KEY"])
qa_chain.tracer = tracer
query = "Explain retrieval-augmented generation."
for token in qa_chain.stream(query):
print(token, end="", flush=True) Async RAG Pipeline with LangSmith ›
Use async when integrating RAG pipelines in asynchronous applications or servers.
import os
import asyncio
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langsmith import LangSmithTracer
async def main():
embeddings = OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"])
vectorstore = FAISS.load_local("my_faiss_index", embeddings)
retriever = vectorstore.as_retriever()
llm = OpenAI(openai_api_key=os.environ["OPENAI_API_KEY"], model_name="gpt-4o")
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
tracer = LangSmithTracer(api_key=os.environ["LANGSMITH_API_KEY"])
qa_chain.tracer = tracer
result = await qa_chain.arun("How does LangSmith help with RAG?")
print("Answer:", result)
asyncio.run(main()) Using Claude 3.5 Sonnet with LangSmith for RAG ›
Use Claude 3.5 Sonnet for higher coding accuracy or alternative LLMs with LangSmith tracing.
import os
import anthropic
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langsmith import LangSmithTracer
client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
embeddings = OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"])
vectorstore = FAISS.load_local("my_faiss_index", embeddings)
retriever = vectorstore.as_retriever()
# Custom Claude wrapper omitted for brevity
# Assume llm is a LangChain-compatible wrapper around Claude 3.5 Sonnet
tracer = LangSmithTracer(api_key=os.environ["LANGSMITH_API_KEY"])
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
qa_chain.tracer = tracer
query = "What is retrieval-augmented generation?"
result = qa_chain.run(query)
print("Answer:", result) Performance
- Use concise prompts to reduce token usage.
- Cache vector store embeddings to avoid recomputation.
- Limit source documents returned to reduce token count.
| Approach | Latency | Cost/call | Best for |
|---|---|---|---|
| Standard RAG with LangSmith | ~800ms | ~$0.0025 | Reliable tracing and debugging |
| Streaming RAG with LangSmith | ~600ms initial + streaming | ~$0.0025 | Real-time partial answers with trace |
| Async RAG with LangSmith | ~800ms async | ~$0.0025 | Concurrent calls in async apps |
Quick tip
Always attach <code>LangSmithTracer</code> to your LangChain pipeline before running queries to capture full trace data automatically.
Common mistake
Beginners often forget to set the <code>tracer</code> attribute on their LangChain pipeline, resulting in no trace data being captured in LangSmith.