How to build a multi-agent system in python
Direct answer
Build a multi-agent system in Python by creating multiple agent classes that communicate via messages and coordinate tasks, often using LLMs like
gpt-4o for reasoning and interaction.Setup
Install
pip install openai Env vars
OPENAI_API_KEY Imports
import os
from openai import OpenAI
import threading
import queue Examples
inStart two agents: one to generate ideas, another to critique them.
outAgent1: 'Idea: Build a chatbot.'\nAgent2: 'Critique: Needs more personalization.'
inThree agents collaborate: planner, executor, and evaluator.
outPlanner: 'Plan task steps.'\nExecutor: 'Executing step 1.'\nEvaluator: 'Step 1 successful.'
inEdge case: Agents handle empty messages gracefully.
outAgent1: 'No input received.'\nAgent2: 'Waiting for valid message.'
Integration steps
- Set up environment with API key in os.environ
- Define agent classes with message queues for communication
- Implement each agent's logic using
OpenAIclient calls - Use threading or async to run agents concurrently
- Exchange messages between agents via queues
- Collect and print final outputs from agents
Full code
import os
from openai import OpenAI
import threading
import queue
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
class Agent(threading.Thread):
def __init__(self, name, inbox, outbox):
super().__init__()
self.name = name
self.inbox = inbox
self.outbox = outbox
self.running = True
def run(self):
while self.running:
try:
message = self.inbox.get(timeout=1)
except queue.Empty:
continue
if message == "STOP":
self.running = False
break
response = self.process_message(message)
if self.outbox:
self.outbox.put(response)
def process_message(self, message):
prompt = f"Agent {self.name} received: {message}\nRespond concisely."
completion = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}]
)
return completion.choices[0].message.content
# Setup communication queues
queue1 = queue.Queue()
queue2 = queue.Queue()
# Create two agents: Generator and Critic
agent_generator = Agent("Generator", queue1, queue2)
agent_critic = Agent("Critic", queue2, queue1)
# Start agents
agent_generator.start()
agent_critic.start()
# Send initial message to generator
queue1.put("Generate an innovative product idea.")
# Run for a few message exchanges
import time
for _ in range(3):
time.sleep(2)
try:
msg = queue1.get_nowait()
print(f"Generator received critique: {msg}")
except queue.Empty:
pass
try:
msg = queue2.get_nowait()
print(f"Critic received idea: {msg}")
except queue.Empty:
pass
# Stop agents
queue1.put("STOP")
queue2.put("STOP")
agent_generator.join()
agent_critic.join()
print("Multi-agent system run complete.") output
Critic received idea: Idea: Build a chatbot that adapts to user emotions. Generator received critique: Critique: Consider adding voice interaction for accessibility. Critic received idea: Idea: Develop a virtual assistant for scheduling. Multi-agent system run complete.
API trace
Request
{"model": "gpt-4o", "messages": [{"role": "user", "content": "Agent Generator received: Generate an innovative product idea.\nRespond concisely."}]} Response
{"choices": [{"message": {"content": "Idea: Build a chatbot that adapts to user emotions."}}], "usage": {"total_tokens": 50}} Extract
response.choices[0].message.contentVariants
Streaming multi-agent system ›
Use streaming when you want real-time partial responses from agents for better UX.
import os
from openai import OpenAI
import threading
import queue
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
class StreamingAgent(threading.Thread):
def __init__(self, name, inbox, outbox):
super().__init__()
self.name = name
self.inbox = inbox
self.outbox = outbox
self.running = True
def run(self):
while self.running:
try:
message = self.inbox.get(timeout=1)
except queue.Empty:
continue
if message == "STOP":
self.running = False
break
prompt = f"Agent {self.name} received: {message}\nRespond concisely."
response_stream = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}],
stream=True
)
collected = []
for chunk in response_stream:
delta = chunk.choices[0].delta.get('content', '')
print(f"{self.name} streaming: {delta}", end='', flush=True)
collected.append(delta)
print()
if self.outbox:
self.outbox.put(''.join(collected))
queue1 = queue.Queue()
queue2 = queue.Queue()
agent_generator = StreamingAgent("Generator", queue1, queue2)
agent_critic = StreamingAgent("Critic", queue2, queue1)
agent_generator.start()
agent_critic.start()
queue1.put("Generate an innovative product idea.")
import time
for _ in range(3):
time.sleep(2)
queue1.put("STOP")
queue2.put("STOP")
agent_generator.join()
agent_critic.join()
print("Streaming multi-agent system run complete.") Async multi-agent system ›
Use async for scalable, non-blocking multi-agent interactions in Python applications.
import os
import asyncio
from openai import OpenAI
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
class AsyncAgent:
def __init__(self, name):
self.name = name
async def process_message(self, message):
prompt = f"Agent {self.name} received: {message}\nRespond concisely."
response = await client.chat.completions.acreate(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
async def main():
agent1 = AsyncAgent("Generator")
agent2 = AsyncAgent("Critic")
idea = await agent1.process_message("Generate an innovative product idea.")
print(f"Generator: {idea}")
critique = await agent2.process_message(idea)
print(f"Critic: {critique}")
asyncio.run(main()) Multi-agent system with Claude 3.5 Sonnet ›
Use Claude 3.5 Sonnet for higher coding and reasoning quality in multi-agent setups.
import os
import anthropic
client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
class ClaudeAgent:
def __init__(self, name):
self.name = name
def process_message(self, message):
prompt = f"You are Agent {self.name}. Respond concisely to: {message}"
response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=200,
system="You are a helpful assistant.",
messages=[{"role": "user", "content": prompt}]
)
return response.content[0].text
agent1 = ClaudeAgent("Generator")
agent2 = ClaudeAgent("Critic")
idea = agent1.process_message("Generate an innovative product idea.")
print(f"Generator: {idea}")
critique = agent2.process_message(idea)
print(f"Critic: {critique}") Performance
Latency~800ms per call for gpt-4o non-streaming
Cost~$0.002 per 500 tokens exchanged on gpt-4o
Rate limitsTier 1: 500 requests per minute / 30,000 tokens per minute
- Keep prompts concise to reduce token usage
- Reuse context between agent messages to avoid repeating info
- Limit max_tokens parameter to control response length
| Approach | Latency | Cost/call | Best for |
|---|---|---|---|
| Synchronous threading | ~800ms | ~$0.002 | Simple multi-agent demos |
| Streaming | Starts ~300ms, ongoing | ~$0.002 | Real-time partial responses |
| Async | ~800ms | ~$0.002 | Scalable concurrent agents |
| Claude 3.5 Sonnet | ~900ms | ~$0.0025 | High-quality reasoning and code |
Quick tip
Design agents with clear input/output message formats and use queues or async calls for smooth communication.
Common mistake
Beginners often block the main thread by running agents synchronously instead of using threading or async.
Community Notes
No notes yetBe the first to share a version-specific fix or tip.