How to build a multi-agent system in python
Direct answer
Build a multi-agent system in Python by creating multiple agent classes that communicate via messages and coordinate tasks, often using LLMs like
gpt-4o for reasoning and interaction.Setup
Install
pip install openai Env vars
OPENAI_API_KEY Imports
import os
from openai import OpenAI
import threading
import queue Examples
inStart two agents: one to generate ideas, another to critique them.
outAgent1: 'Idea: Build a chatbot.'\nAgent2: 'Critique: Needs more personalization.'
inThree agents collaborate: planner, executor, and evaluator.
outPlanner: 'Plan task steps.'\nExecutor: 'Executing step 1.'\nEvaluator: 'Step 1 successful.'
inEdge case: Agents handle empty messages gracefully.
outAgent1: 'No input received.'\nAgent2: 'Waiting for valid message.'
Integration steps
- Set up environment with API key in os.environ
- Define agent classes with message queues for communication
- Implement each agent's logic using
OpenAIclient calls - Use threading or async to run agents concurrently
- Exchange messages between agents via queues
- Collect and print final outputs from agents
Full code
import os
from openai import OpenAI
import threading
import queue
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
class Agent(threading.Thread):
def __init__(self, name, inbox, outbox):
super().__init__()
self.name = name
self.inbox = inbox
self.outbox = outbox
self.running = True
def run(self):
while self.running:
try:
message = self.inbox.get(timeout=1)
except queue.Empty:
continue
if message == "STOP":
self.running = False
break
response = self.process_message(message)
if self.outbox:
self.outbox.put(response)
def process_message(self, message):
prompt = f"Agent {self.name} received: {message}\nRespond concisely."
completion = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}]
)
return completion.choices[0].message.content
# Setup communication queues
queue1 = queue.Queue()
queue2 = queue.Queue()
# Create two agents: Generator and Critic
agent_generator = Agent("Generator", queue1, queue2)
agent_critic = Agent("Critic", queue2, queue1)
# Start agents
agent_generator.start()
agent_critic.start()
# Send initial message to generator
queue1.put("Generate an innovative product idea.")
# Run for a few message exchanges
import time
for _ in range(3):
time.sleep(2)
try:
msg = queue1.get_nowait()
print(f"Generator received critique: {msg}")
except queue.Empty:
pass
try:
msg = queue2.get_nowait()
print(f"Critic received idea: {msg}")
except queue.Empty:
pass
# Stop agents
queue1.put("STOP")
queue2.put("STOP")
agent_generator.join()
agent_critic.join()
print("Multi-agent system run complete.") output
Critic received idea: Idea: Build a chatbot that adapts to user emotions. Generator received critique: Critique: Consider adding voice interaction for accessibility. Critic received idea: Idea: Develop a virtual assistant for scheduling. Multi-agent system run complete.
API trace
Request
{"model": "gpt-4o", "messages": [{"role": "user", "content": "Agent Generator received: Generate an innovative product idea.\nRespond concisely."}]} Response
{"choices": [{"message": {"content": "Idea: Build a chatbot that adapts to user emotions."}}], "usage": {"total_tokens": 50}} Extract
response.choices[0].message.contentVariants
Streaming multi-agent system ›
Use streaming when you want real-time partial responses from agents for better UX.
import os
from openai import OpenAI
import threading
import queue
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
class StreamingAgent(threading.Thread):
def __init__(self, name, inbox, outbox):
super().__init__()
self.name = name
self.inbox = inbox
self.outbox = outbox
self.running = True
def run(self):
while self.running:
try:
message = self.inbox.get(timeout=1)
except queue.Empty:
continue
if message == "STOP":
self.running = False
break
prompt = f"Agent {self.name} received: {message}\nRespond concisely."
response_stream = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}],
stream=True
)
collected = []
for chunk in response_stream:
delta = chunk.choices[0].delta.get('content', '')
print(f"{self.name} streaming: {delta}", end='', flush=True)
collected.append(delta)
print()
if self.outbox:
self.outbox.put(''.join(collected))
queue1 = queue.Queue()
queue2 = queue.Queue()
agent_generator = StreamingAgent("Generator", queue1, queue2)
agent_critic = StreamingAgent("Critic", queue2, queue1)
agent_generator.start()
agent_critic.start()
queue1.put("Generate an innovative product idea.")
import time
for _ in range(3):
time.sleep(2)
queue1.put("STOP")
queue2.put("STOP")
agent_generator.join()
agent_critic.join()
print("Streaming multi-agent system run complete.") Async multi-agent system ›
Use async for scalable, non-blocking multi-agent interactions in Python applications.
import os
import asyncio
from openai import OpenAI
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
class AsyncAgent:
def __init__(self, name):
self.name = name
async def process_message(self, message):
prompt = f"Agent {self.name} received: {message}\nRespond concisely."
response = await client.chat.completions.acreate(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
async def main():
agent1 = AsyncAgent("Generator")
agent2 = AsyncAgent("Critic")
idea = await agent1.process_message("Generate an innovative product idea.")
print(f"Generator: {idea}")
critique = await agent2.process_message(idea)
print(f"Critic: {critique}")
asyncio.run(main()) Multi-agent system with Claude 3.5 Sonnet ›
Use Claude 3.5 Sonnet for higher coding and reasoning quality in multi-agent setups.
import os
import anthropic
client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
class ClaudeAgent:
def __init__(self, name):
self.name = name
def process_message(self, message):
prompt = f"You are Agent {self.name}. Respond concisely to: {message}"
response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=200,
system="You are a helpful assistant.",
messages=[{"role": "user", "content": prompt}]
)
return response.content[0].text
agent1 = ClaudeAgent("Generator")
agent2 = ClaudeAgent("Critic")
idea = agent1.process_message("Generate an innovative product idea.")
print(f"Generator: {idea}")
critique = agent2.process_message(idea)
print(f"Critic: {critique}") Performance
Latency~800ms per call for gpt-4o non-streaming
Cost~$0.002 per 500 tokens exchanged on gpt-4o
Rate limitsTier 1: 500 requests per minute / 30,000 tokens per minute
- Keep prompts concise to reduce token usage
- Reuse context between agent messages to avoid repeating info
- Limit max_tokens parameter to control response length
| Approach | Latency | Cost/call | Best for |
|---|---|---|---|
| Synchronous threading | ~800ms | ~$0.002 | Simple multi-agent demos |
| Streaming | Starts ~300ms, ongoing | ~$0.002 | Real-time partial responses |
| Async | ~800ms | ~$0.002 | Scalable concurrent agents |
| Claude 3.5 Sonnet | ~900ms | ~$0.0025 | High-quality reasoning and code |
Quick tip
Design agents with clear input/output message formats and use queues or async calls for smooth communication.
Common mistake
Beginners often block the main thread by running agents synchronously instead of using threading or async.