Research & Analysis with CrewAI
Building intelligent research pipelines where specialized agents gather, analyze, and synthesize information from diverse sources to produce comprehensive reports.
Overview
Multi-agent research systems leverage different agent specializations: researchers gather data, analysts evaluate quality, and writers synthesize findings. This separation of concerns ensures thorough, accurate research outputs.
Agent Specializations
- Research Agent: Searches and collects information from APIs, databases, web sources
- Analysis Agent: Evaluates data quality, identifies patterns, and validates findings
- Synthesis Agent: Combines findings into cohesive narratives and actionable insights
- Review Agent: Quality checks and ensures completeness before delivery
Basic Research Crew Setup
Python - CrewAI Research Pipeline
from crewai import Agent, Task, Crew, LLM
from crewai_tools import SerperDevTool, FileReadTool
# Initialize tools
search_tool = SerperDevTool()
file_tool = FileReadTool()
# Define Agents
researcher = Agent(
role="Research Analyst",
goal="Find factual, current information on the given topic",
backstory="Expert researcher with 10 years of experience in data gathering",
tools=[search_tool],
llm="gpt-4"
)
analyst = Agent(
role="Data Analyst",
goal="Evaluate research findings and identify patterns",
backstory="Skilled data analyst who ensures quality and relevance",
tools=[],
llm="gpt-4"
)
writer = Agent(
role="Technical Writer",
goal="Create clear, comprehensive reports from analyzed data",
backstory="Professional writer specializing in technical topics",
tools=[],
llm="gpt-4"
)
# Define Tasks
research_task = Task(
description="Research the latest developments in {topic}",
agent=researcher,
expected_output="Detailed list of findings with sources"
)
analysis_task = Task(
description="Analyze the research findings and extract key insights",
agent=analyst,
expected_output="Summary of patterns and key findings"
)
writing_task = Task(
description="Write a comprehensive report based on analysis",
agent=writer,
expected_output="Well-structured technical report"
)
# Create and execute crew
crew = Crew(
agents=[researcher, analyst, writer],
tasks=[research_task, analysis_task, writing_task],
verbose=True,
process="sequential"
)
result = crew.kickoff(inputs={"topic": "Autonomous AI Agents 2025"})
Advanced Crew with Memory
Python - Crew with Long-Term Memory
from crewai import Agent, Task, Crew, Memory
from crewai_tools import SerperDevTool, DatabaseTool
# Initialize with memory
memory = Memory(
memory_type="long_term",
max_memory_items=100,
embedding_model="openai",
)
# Agents with context awareness
researcher = Agent(
role="Research Analyst",
goal="Find and track research across multiple sessions",
backstory="Expert with institutional knowledge",
tools=[SerperDevTool()],
memory=memory,
allow_delegation=True,
max_retries=3,
llm="gpt-4"
)
# Task with context from memory
research_task = Task(
description="""Research {topic}.
Remember previous findings on related topics from memory.
Focus on new developments since last research.""",
agent=researcher,
expected_output="Updated findings with novelty assessment"
)
crew = Crew(
agents=[researcher],
tasks=[research_task],
memory=memory,
verbose=True
)
# Context from previous research sessions
result = crew.kickoff(inputs={"topic": "Machine Learning Trends"})
Hierarchical Research Crew
Python - Hierarchical Process with Manager
from crewai import Agent, Task, Crew, Process
# Manager agent delegates and coordinates
manager = Agent(
role="Research Manager",
goal="Coordinate research efforts and ensure quality",
backstory="Senior research director with 20 years experience",
tools=[],
llm="gpt-4"
)
# Specialized researchers
market_researcher = Agent(
role="Market Researcher",
goal="Research market trends and competitive landscape",
backstory="Specializes in market analysis",
tools=[SerperDevTool()],
llm="gpt-4"
)
technical_researcher = Agent(
role="Technical Researcher",
goal="Research technical specifications and capabilities",
backstory="Deep technical expertise",
tools=[SerperDevTool()],
llm="gpt-4"
)
# Define research tasks
market_task = Task(
description="Analyze market position of {company}",
agent=market_researcher,
expected_output="Market analysis report"
)
tech_task = Task(
description="Analyze technical capabilities of {company}",
agent=technical_researcher,
expected_output="Technical capabilities report"
)
synthesis_task = Task(
description="Combine market and technical insights into executive summary",
agent=manager,
expected_output="Executive summary report"
)
# Use hierarchical process
crew = Crew(
agents=[manager, market_researcher, technical_researcher],
tasks=[market_task, tech_task, synthesis_task],
manager_agent=manager,
process=Process.hierarchical,
verbose=True
)
result = crew.kickoff(inputs={"company": "TechCorp"})
Custom Research Tools
Python - Tool Definition
from crewai_tools import BaseTool
from typing import Type
from pydantic import BaseModel, Field
class ApiSearchInput(BaseModel):
query: str = Field(..., description="Search query")
max_results: int = Field(default=10, description="Maximum results to return")
class CustomApiSearchTool(BaseTool):
name: str = "custom_api_search"
description: str = "Search custom internal APIs for research data"
args_schema: Type[BaseModel] = ApiSearchInput
def _run(self, query: str, max_results: int = 10) -> str:
# Implementation of custom search logic
results = self.query_internal_api(query, max_results)
return self.format_results(results)
def query_internal_api(self, query: str, limit: int) -> list:
# Connect to proprietary database or API
pass
def format_results(self, results: list) -> str:
# Format results for agent consumption
pass
# Use custom tool in agent
researcher = Agent(
role="Internal Researcher",
goal="Research using company data",
tools=[CustomApiSearchTool()],
llm="gpt-4"
)
Error Handling & Retries
Python - Robust Research Workflow
from crewai import Agent, Task, Crew, Task
from crewai_tools import SerperDevTool
import asyncio
# Agent with retry configuration
researcher = Agent(
role="Research Analyst",
goal="Research the topic thoroughly",
tools=[SerperDevTool()],
max_retries=3,
allow_delegation=True,
function_calling_llm="gpt-4"
)
# Task with callbacks for monitoring
research_task = Task(
description="Research {topic} comprehensively",
agent=researcher,
expected_output="Detailed research report",
callback=lambda result: on_task_complete(result),
)
def on_task_complete(result):
"""Callback when task completes"""
print(f"Task completed: {result.description}")
print(f"Output: {result.pydantic_object}")
# Async execution
async def run_research():
crew = Crew(
agents=[researcher],
tasks=[research_task],
verbose=True
)
try:
result = await crew.kickoff_async(
inputs={"topic": "Quantum Computing"}
)
return result
except Exception as e:
print(f"Research failed: {e}")
# Implement fallback logic
return None
# Execute
result = asyncio.run(run_research())
Best Practices
- Clearly define agent roles and specialized expertise
- Use sequential process for dependent research phases
- Implement tool-use constraints to prevent hallucinations
- Add memory systems for cross-session learning
- Monitor agent callbacks for debugging
- Set appropriate max_retries for fault tolerance
- Test crew workflows with realistic inputs