How to summarize multiple documents with Python
Direct answer
Use the OpenAI or Anthropic Python SDK to send multiple documents as concatenated or chunked messages to a chat completion model like gpt-4o or claude-3-5-sonnet-20241022 for summarization.
Setup
Install
pip install openai anthropic Env vars
OPENAI_API_KEYANTHROPIC_API_KEY Imports
from openai import OpenAI
import anthropic
import os
import json Examples
in['Document 1 text about AI.', 'Document 2 text about Python.']
outSummary: Document 1 discusses AI concepts; Document 2 covers Python programming basics.
in['Annual report Q1', 'Annual report Q2', 'Annual report Q3']
outSummary: The reports show steady revenue growth and improved operational efficiency across Q1 to Q3.
in[]
outSummary: No documents provided to summarize.
Integration steps
- Initialize the AI client with API key from os.environ
- Concatenate or chunk multiple documents into a single prompt or message list
- Call the chat completions endpoint with the combined input
- Extract the summary text from the response
- Handle edge cases like empty document lists or very long inputs
Full code
import os
from openai import OpenAI
import anthropic
import json
def summarize_documents_openai(documents):
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
if not documents:
return "Summary: No documents provided to summarize."
combined_text = "\n\n---\n\n".join(documents)
messages = [
{"role": "system", "content": "You are a helpful assistant that summarizes multiple documents."},
{"role": "user", "content": f"Summarize the following documents:\n{combined_text}"}
]
response = client.chat.completions.create(
model="gpt-4o",
messages=messages
)
return response.choices[0].message.content
def summarize_documents_anthropic(documents):
client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
if not documents:
return "Summary: No documents provided to summarize."
combined_text = "\n\n---\n\n".join(documents)
system_prompt = "You are a helpful assistant that summarizes multiple documents."
user_message = f"Summarize the following documents:\n{combined_text}"
message = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1024,
system=system_prompt,
messages=[{"role": "user", "content": user_message}]
)
return message.content
if __name__ == "__main__":
docs = [
"Artificial intelligence is transforming industries.",
"Python is a versatile programming language widely used in AI development."
]
print("OpenAI summary:")
print(summarize_documents_openai(docs))
print("\nAnthropic summary:")
print(summarize_documents_anthropic(docs)) API trace
Request
{"model": "gpt-4o", "messages": [{"role": "system", "content": "You are a helpful assistant that summarizes multiple documents."}, {"role": "user", "content": "Summarize the following documents:\n<Document texts concatenated>"}]} Response
{"choices": [{"message": {"content": "<summary text>"}}], "usage": {"total_tokens": 500}} Extract
response.choices[0].message.contentVariants
Streaming summarization with OpenAI ›
Use streaming when you want to display the summary progressively for better user experience with long documents.
import os
from openai import OpenAI
def stream_summarize(documents):
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
if not documents:
print("Summary: No documents provided to summarize.")
return
combined_text = "\n\n---\n\n".join(documents)
messages = [
{"role": "system", "content": "You are a helpful assistant that summarizes multiple documents."},
{"role": "user", "content": f"Summarize the following documents:\n{combined_text}"}
]
stream = client.chat.completions.create(model="gpt-4o", messages=messages, stream=True)
print("Streaming summary:")
for chunk in stream:
delta = chunk.choices[0].delta.content or ""
print(delta, end="", flush=True)
if __name__ == "__main__":
docs = ["Doc 1 text.", "Doc 2 text."]
stream_summarize(docs) Async summarization with Anthropic ›
Use async when integrating summarization into an async application or when making concurrent calls.
import os
import asyncio
import anthropic
async def async_summarize(documents):
client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
if not documents:
return "Summary: No documents provided to summarize."
combined_text = "\n\n---\n\n".join(documents)
system_prompt = "You are a helpful assistant that summarizes multiple documents."
user_message = f"Summarize the following documents:\n{combined_text}"
message = await client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1024,
system=system_prompt,
messages=[{"role": "user", "content": user_message}]
)
return message.content
if __name__ == "__main__":
docs = ["Doc 1 text.", "Doc 2 text."]
summary = asyncio.run(async_summarize(docs))
print(summary) Summarization with chunking for very large documents ›
Use chunking when documents exceed token limits to summarize large inputs effectively.
import os
from openai import OpenAI
def chunk_text(text, max_len=2000):
words = text.split()
chunks = []
current_chunk = []
current_len = 0
for word in words:
current_len += len(word) + 1
if current_len > max_len:
chunks.append(" ".join(current_chunk))
current_chunk = [word]
current_len = len(word) + 1
else:
current_chunk.append(word)
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
def summarize_large_documents(documents):
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
if not documents:
return "Summary: No documents provided to summarize."
combined_text = "\n\n---\n\n".join(documents)
chunks = chunk_text(combined_text)
summaries = []
for chunk in chunks:
messages = [
{"role": "system", "content": "You are a helpful assistant that summarizes text."},
{"role": "user", "content": f"Summarize the following text chunk:\n{chunk}"}
]
response = client.chat.completions.create(model="gpt-4o-mini", messages=messages)
summaries.append(response.choices[0].message.content)
final_prompt = "\n\n---\n\n".join(summaries)
final_messages = [
{"role": "system", "content": "You are a helpful assistant that summarizes multiple summaries."},
{"role": "user", "content": f"Summarize the following summaries:\n{final_prompt}"}
]
final_response = client.chat.completions.create(model="gpt-4o-mini", messages=final_messages)
return final_response.choices[0].message.content
if __name__ == "__main__":
docs = ["Very long document text ..."]
print(summarize_large_documents(docs)) Performance
Latency~800ms to 2s for typical multi-document summarization with gpt-4o
Cost~$0.002 to $0.01 per 500 tokens depending on model and input size
Rate limitsTier 1: 500 RPM / 30K TPM for OpenAI; Anthropic similar limits
- Summarize documents individually if very large, then combine summaries
- Use concise prompts to reduce token usage
- Avoid sending unnecessary metadata or formatting in the input
| Approach | Latency | Cost/call | Best for |
|---|---|---|---|
| Single concatenated prompt | ~800ms | ~$0.002 per 500 tokens | Small to medium document sets |
| Streaming response | ~800ms + streaming time | ~$0.002 per 500 tokens | Better UX for long summaries |
| Chunked summarization | 1-3s depending on chunks | ~$0.005-$0.01 | Very large documents exceeding token limits |
Quick tip
Concatenate multiple documents with clear separators before sending to the chat completion API to get a coherent summary.
Common mistake
Sending very large documents without chunking causes token limit errors or truncated summaries.