Spaces:

dromerosm
/

crewai-multiagent-research

Running

App Files Files Community

crewai-multiagent-research / app.py

dromerosm

Update .gitignore, requirements, and README for project enhancements

7153d1f 6 months ago

raw

history blame contribute delete

24 kB

	from config import OPENAI_MODELS, COHERE_MODELS, GROQ_MODELS, MAX_TOKENS_BASE, MAX_TOKENS_ADVANCED

	import os
	import gradio as gr
	import requests
	import logging

	from openai import AzureOpenAI, OpenAI
	from cohere import ClientV2

	from crewai import Agent, Task, Crew, Process, LLM
	from crewai_tools import tool
	from duckduckgo_search import DDGS
	from newspaper import Article
	import fitz # PyMuPDF
	from io import BytesIO, StringIO
	import sys

	import threading
	import queue
	import time

	# Basic logger configuration
	logger = logging.getLogger(__name__)
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	def setup_logging():
	"""Set up logging for better error tracking."""
	logger = logging.getLogger(__name__)
	logger.setLevel(logging.INFO)

	# Remove any existing handlers
	if logger.hasHandlers():
	logger.handlers.clear()

	# Create a handler that writes to stdout
	handler = logging.StreamHandler(sys.stdout)
	formatter = logging.Formatter('%(asctime)s \| %(levelname)-8s \| %(message)s', datefmt='%H:%M:%S')
	handler.setFormatter(formatter)
	logger.addHandler(handler)

	return logger

	# Global variables
	TOKENS_SUMMARIZATION = 0
	MODEL_CHOICE = "openai"

	def export_to_markdown(result):
	"""Utility to export the final result to an output.md file."""
	try:
	with open("outputs/output.md", "w") as file:
	file.write(result)
	return "outputs/output.md"
	except Exception as e:
	logger.error("Error exporting to markdown: %s", str(e))
	return f"Error exporting: {e}"

	def fetch_content(url):
	"""
	Fetch the content from a URL, handling either PDFs or normal web articles.
	- url: The URL to fetch the content from.
	"""
	try:
	# HEAD request to check content type
	response = requests.head(url, allow_redirects=True, timeout=10)
	content_type = response.headers.get('Content-Type', '').lower()

	if 'application/pdf' in content_type:
	# The URL points to a PDF; download and extract text
	pdf_response = requests.get(url, stream=True, timeout=10)
	pdf_response.raise_for_status()

	pdf_file = BytesIO(pdf_response.content)
	with fitz.open(stream=pdf_file, filetype="pdf") as doc:
	text = ""
	for page_num, page in enumerate(doc, start=1):
	page_text = page.get_text()
	if page_text:
	text += page_text
	else:
	logger.warning(f"Unable to extract text from page {page_num} of the PDF.")
	return text.strip()
	else:
	# Not a PDF; use newspaper3k’s Article to extract text
	article = Article(url)
	article.download()
	article.parse()
	return article.text
	except requests.exceptions.RequestException as req_err:
	logger.error("Error in the HTTP request: %s", str(req_err))
	return f"Error in the HTTP request: {req_err}"
	except Exception as e:
	logger.error("Error getting the content: %s", str(e))
	return f"Error getting the content: {e}"

	# Tools
	@tool('DuckDuckGoSearchResults')
	def search_results(search_query: str) -> list:
	"""
	Performs a web search to gather and return a collection of search results with this structure:
	- title: The title of the search result.
	- snippet: A short snippet of the search result.
	- link: The link to the search result.
	"""
	try:
	results = DDGS().text(search_query, max_results=5, timelimit='m')
	results_list = [{"title": result['title'],
	"snippet": result['body'],
	"link": result['href']} for result in results]
	return results_list
	except Exception as e:
	logger.error("Error performing search: %s", str(e))
	return []

	@tool('WebScrapper')
	def web_scrapper(url: str, topic: str) -> str:
	"""
	Extract and read the content of a specified link and generate a summary on a specific topic.
	- url: The URL to extract the content from.
	- topic: Strign with the topic to generate a summary on.
	"""
	global TOKENS_SUMMARIZATION

	try:
	content = fetch_content(url)
	prompt = f"""
	# OBJECTIVE
	Generate an in-depth summary of the following CONTENT on the topic "{topic}"

	# INSTRUCTIONS
	- Provide in-depth insights based on the following CONTENT.
	- If the following CONTENT is not directly related to the topic "{topic}", you MUST respond with INVALID CONTENT.
	- Include insights about why the content is important for the topic, possible challenges and advances...
	- The format will be markdown.
	- Avoid making up anything. Every insight MUST be based on the content.

	# CONTENT:
	"{content}"
	"""

	context_messages = [
	{
	"role": "system",
	"content": "You are an expert summarizing content for use as context. Focus on the main points."
	},
	{
	"role": "user",
	"content": str(prompt)
	}
	]

	# Use AzureOpenAI, OpenAI or GROQ_COHERE based on model_choice
	if MODEL_CHOICE == "azure":
	client = AzureOpenAI(
	azure_endpoint=os.getenv('AZURE_API_BASE'),
	azure_deployment=os.getenv('AZURE_DEPLOYMENT_ID'),
	api_key=os.getenv('AZURE_OPENAI_KEY'),
	api_version=os.getenv('AZURE_API_VERSION')
	)
	response = client.chat.completions.create(
	model=os.getenv('AZURE_DEPLOYMENT_ID'),
	messages=context_messages,
	temperature=0.7,
	max_tokens=5000
	)

	elif MODEL_CHOICE == "openai":
	client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
	response = client.chat.completions.create(
	model=OPENAI_MODELS['base'],
	messages=context_messages,
	temperature=0.7,
	max_tokens=5000
	)

	elif MODEL_CHOICE == "groq-cohere":
	client = ClientV2(api_key=os.getenv('COHERE_API_KEY'))
	response = client.chat(
	model=COHERE_MODELS['advanced'],
	messages=context_messages,
	max_tokens=1000
	)
	else:
	return "Error: Invalid model choice. Please select 'azure' or 'openai'."

	summary = response.choices[0].message.content
	if MODEL_CHOICE == "azure" or MODEL_CHOICE == "openai":
	TOKENS_SUMMARIZATION += response.usage.total_tokens
	elif MODEL_CHOICE == "groq-cohere":
	TOKENS_SUMMARIZATION += response.usage.billed_units.input_tokens+response.usage.billed_units.output_tokens

	summary_response = f"""<article_summary>
	# SUMMARY:
	{summary}

	# URL: {url}
	</article_summary>
	"""
	# include a delay of 10 second to avoid rate limiting of GROQ-Cohere
	if MODEL_CHOICE=="groq-cohere":
	time.sleep(10)

	return summary_response

	except Exception as e:
	logger.error("Error generating summary: %s", str(e))
	return f"""<article_summary>
	# SUMMARY:
	Error generating summary.
	IGNORE THIS OUTPUT.

	# URL: {url}
	</article_summary>
	"""


	def capture_verbose_output(
	agent_input,
	model_choice,
	azure_openai_key,
	azure_deployment_id,
	azure_api_base,
	azure_api_version,
	openai_api_key,
	cohere_api_key,
	groq_api_key
	):
	"""
	This generator captures stdout produced by the multi-agent process in real time,
	updating the Gradio interface with logs, while returning the final result once done.
	"""
	old_stdout = sys.stdout
	mystdout = StringIO()
	sys.stdout = mystdout

	result_container = [None]

	def run_kickoff():
	result_container[0] = kickoff_crew(
	topic=agent_input,
	model_choice=model_choice,
	azure_openai_key=azure_openai_key,
	azure_deployment_id=azure_deployment_id,
	azure_api_base=azure_api_base,
	azure_api_version=azure_api_version,
	openai_api_key=openai_api_key,
	cohere_api_key=cohere_api_key,
	groq_api_key=groq_api_key
	)

	kickoff_thread = threading.Thread(target=run_kickoff)
	kickoff_thread.start()

	verbose_output = ""
	result_output = ""

	# Initialize outputs
	yield gr.update(value=result_output), gr.update(value=verbose_output)

	while kickoff_thread.is_alive():
	# Read new output from mystdout
	new_output = mystdout.getvalue()
	if new_output != verbose_output:
	verbose_output = new_output
	yield gr.update(value=result_output), gr.update(value=verbose_output)
	time.sleep(0.1)

	# Once done, get final result
	kickoff_thread.join()
	sys.stdout = old_stdout
	result_output = result_container[0]

	verbose_output = mystdout.getvalue()
	yield gr.update(value=result_output), gr.update(value=verbose_output)


	def kickoff_crew(
	topic: str,
	model_choice: str,
	azure_openai_key: str,
	azure_deployment_id: str,
	azure_api_base: str,
	azure_api_version: str,
	openai_api_key: str,
	cohere_api_key: str,
	groq_api_key: str
	) -> str:
	"""
	Kick off the multi-agent pipeline.
	"""
	try:
	global TOKENS_SUMMARIZATION, MODEL_CHOICE

	TOKENS_SUMMARIZATION = 0
	MODEL_CHOICE = model_choice

	# Basic checks
	if not topic.strip():
	return "Error: The topic cannot be empty. Please provide a valid topic."

	# ---- Define LLMs based on the user-provided inputs ----
	# Inicializa las variables de los modelos con None
	azure_llm_base = None
	azure_llm_advanced = None
	openai_llm_base = None
	openai_llm_advanced = None
	groq_llm_base = None
	groq_llm_advanced = None

	if model_choice == "azure":
	if not azure_openai_key or not azure_deployment_id or not azure_api_base or not azure_api_version:
	return "Error: Please provide all the required Azure OpenAI API details."
	else:
	os.environ['AZURE_API_BASE']=azure_api_base
	os.environ['AZURE_API_VERSION']=azure_api_version
	os.environ['AZURE_DEPLOYMENT_ID']=azure_deployment_id
	os.environ['AZURE_OPENAI_KEY']=azure_openai_key
	# Azure
	azure_llm_base = LLM(
	temperature=0.3,
	model=f"azure/{azure_deployment_id}",
	api_key=azure_openai_key,
	base_url=azure_api_base,
	api_version=azure_api_version,
	max_tokens=4000
	)
	azure_llm_advanced = LLM(
	temperature=0.6,
	model=f"azure/{azure_deployment_id}",
	api_key=azure_openai_key,
	base_url=azure_api_base,
	api_version=azure_api_version,
	max_tokens=10000
	)
	elif model_choice == "openai":
	if not openai_api_key:
	return "Error: Please provide the OpenAI API key."
	else:
	os.environ['OPENAI_API_KEY']=openai_api_key
	# OpenAI
	openai_llm_base = LLM(
	model=OPENAI_MODELS['base'],
	api_key=openai_api_key,
	max_completion_tokens=4000
	)
	openai_llm_advanced = LLM(
	model=OPENAI_MODELS['advanced'],
	api_key=openai_api_key,
	temperature=0.4,
	max_completion_tokens=10000
	)
	elif model_choice == "groq-cohere":
	if not cohere_api_key or not groq_api_key:
	return "Error: Please provide both the Cohere and GROQ API keys."
	else:
	os.environ['COHERE_API_KEY']=cohere_api_key
	os.environ['GROQ_API_KEY']=groq_api_key
	# GROQ - placeholder examples
	groq_llm_base = LLM(
	model=GROQ_MODELS['base'],
	api_key=groq_api_key,
	temperature=0.3,
	max_tokens=1000
	)
	groq_llm_advanced = LLM(
	model=GROQ_MODELS['advanced'],
	api_key=groq_api_key,
	temperature=0.6,
	max_tokens=4000
	)


	# Diccionario para agrupar los LLM

	llms = {
	"azure": {
	"base": azure_llm_base,
	"advanced": azure_llm_advanced
	},
	"openai": {
	"base": openai_llm_base,
	"advanced": openai_llm_advanced
	},
	"groq-cohere": {
	"base": groq_llm_base,
	"advanced": groq_llm_advanced
	}
	}

	# Obtain the selected LLM set
	if model_choice not in llms:
	return f"Error: Invalid model choice. Please select from {list(llms.keys())}."

	selected_llm = llms[model_choice]

	# Define Agents
	researcher = Agent(
	role='Researcher',
	goal=f'Search and collect detailed information on topic ## {topic} ##',
	tools=[search_results, web_scrapper],
	llm=selected_llm["base"],
	backstory=(
	"You are a meticulous researcher, skilled at navigating vast amounts of information to extract "
	"essential insights on any given topic. Your dedication to detail ensures the reliability and "
	"thoroughness of your findings."
	),
	allow_delegation=False,
	max_iter=15,
	max_rpm=5 if model_choice == "groq-cohere" else 120,
	verbose=True
	)

	editor = Agent(
	role='Editor',
	goal=f'Compile and refine the information into a comprehensive report on topic ## {topic} ##',
	llm=selected_llm["advanced"],
	backstory=(
	"As an expert editor, you specialize in transforming raw data into clear, engaging reports. "
	"Your strong command of language and attention to detail ensure that each report not only conveys "
	"essential insights but is also easily understandable to diverse audiences."
	),
	allow_delegation=False,
	max_iter=5,
	max_rpm=10 if model_choice == "groq-cohere" else 120,
	verbose=True
	)

	# Define Tasks
	research_task = Task(
	description=(
	"Be sure to translate the topic into English first. "
	"Use the DuckDuckGoSearchResults tool to collect initial search snippets on ## {topic} ##. "
	"If more detailed searches are required, generate and execute new searches related to ## {topic} ##. "
	"Subsequently, employ the WebScrapper tool to extract information from significant URLs, "
	"extracting further insights. Compile these findings into a preliminary draft, documenting all "
	"relevant sources, titles, and links associated with the topic. "
	"Ensure high accuracy throughout the process and avoid any fabrication of information."
	),
	expected_output=(
	"A structured draft report about the topic, featuring an introduction, a detailed main body, "
	"and a conclusion. Properly cite sources. Provide a thorough overview of the info gathered."
	),
	agent=researcher
	)

	edit_task = Task(
	description=(
	"Review and refine the initial draft report from the research task. Organize the content logically. "
	"Elaborate on each section to provide in-depth information and insights. "
	"Verify the accuracy of all data, correct discrepancies, update info to ensure currency, "
	"and maintain a consistent tone. Include a section listing all sources used, formatted as bullet points."
	),
	expected_output=(
	"A polished, comprehensive report on topic ## {topic} ##, with a clear, professional narrative. "
	"Include an introduction, an extensive discussion, a concise conclusion, and a source list with references."
	),
	agent=editor,
	context=[research_task]
	)

	# Form the Crew
	crew = Crew(
	agents=[researcher, editor],
	tasks=[research_task, edit_task],
	process=Process.sequential
	)

	# Kick off
	result = crew.kickoff(inputs={'topic': topic})

	# Compute token usage (CrewAI aggregator usage)
	tokens = result.token_usage.total_tokens / 1_000
	tokens_summ = TOKENS_SUMMARIZATION / 1_000

	if not isinstance(result, str):
	result = str(result)

	result += f"\n\nEstimated tokens (Agents): {tokens:.5f} k"
	result += f"\n\nEstimated tokens (Summarization): {tokens_summ:.5f} k"

	return result
	except Exception as e:
	logger.error("Error in kickoff_crew: %s", str(e))
	return f"Error in kickoff_crew: {str(e)}"

	def main():
	"""Set up the Gradio interface for the CrewAI Research Tool."""

	description_demo = """# Automatic Insights Generation with Multi-Agents (CrewAI)
	- Multi-agent framework: CrewAI
	- Multi-agents: Two agents, Researcher and Editor, working together to extract information from the internet and compile a report on the topic of choice.
	- Search tool: Duck-Duck-Go-Search
	- Web Retrieval: Newspaper4k and PDF

	Note: Groq is currently disabled due to rate limiting issues. Please use Azure or OpenAI for now.
	"""

	with gr.Blocks() as demo:
	gr.Markdown(description_demo)

	with gr.Row():
	with gr.Column(scale=1):

	# Radio: now includes azure / openai / groq / cohere
	model_choice = gr.Radio(
	choices=["azure", "openai", "groq-cohere"],
	label="Choose Model",
	value="openai",
	interactive=True
	)

	# ------------
	# LLM config inputs
	# ------------
	# Azure
	azure_api_base_input = gr.Textbox(label="Azure API Base (url)", type="password", visible=False, interactive=True)
	azure_deployment_id_input = gr.Textbox(label="Azure Deployment ID (model)", type="password", visible=False, interactive=True)
	azure_openai_key_input = gr.Textbox(label="Azure API Key", type="password", visible=False, interactive=True)
	azure_api_version_input = gr.Textbox(label="Azure API Version", type="text", visible=False, interactive=True)

	# OpenAI
	openai_api_key_input = gr.Textbox(label="OpenAI API Key", type="password", visible=True, interactive=True)

	# GROQ
	groq_api_key_input = gr.Textbox(label="GROQ API Key", type="password", visible=False, interactive=False)

	# Cohere
	cohere_api_key_input = gr.Textbox(label="Cohere API Key", type="password", visible=False, interactive=False)

	export_button = gr.Button("Export to Markdown", interactive=True)
	file_output = gr.File(label="Download Markdown File")
	credits = gr.Markdown(
	label="Credits",
	show_label=True,
	value="This tool is powered by [CrewAI](https://crewai.com), "
	"[OpenAI](https://openai.com), "
	"[Azure OpenAI Services](https://azure.microsoft.com/en-us/products/ai-services/openai-service), "
	"[Cohere](https://dashboard.cohere.com), and [GROQ](https://console.groq.com/playground).",
	)

	with gr.Column(scale=2):
	topic_input = gr.Textbox(
	label="Enter Topic",
	placeholder="Type here the topic of interest...",
	interactive=True
	)
	submit_button = gr.Button("Start Research", interactive=True)
	output = gr.Markdown(
	label="Result",
	show_copy_button=True,
	value="The generated insighsts will appear here...",
	latex_delimiters=[
	{"left": "\\[", "right": "\\]", "display": True},
	{"left": "\\(", "right": "\\)", "display": False},
	]
	)

	verbose_output = gr.Textbox(
	label="Verbose Output",
	placeholder="Verbose logs will appear here...",
	lines=10,
	interactive=False,
	show_copy_button=True
	)

	# ---------------
	# Dynamic toggling of LLM config boxes
	# ---------------
	def update_model_choice(model):
	"""Update visibility of config inputs based on the selected LLM."""
	azure_visibility = False
	openai_visibility = False
	cohere_visibility = False
	groq_visibility = False

	if model == "azure":
	azure_visibility = True
	elif model == "openai":
	openai_visibility = True
	elif model == "groq-cohere":
	cohere_visibility = True
	groq_visibility = True

	return {
	azure_openai_key_input: gr.update(visible=azure_visibility),
	azure_deployment_id_input: gr.update(visible=azure_visibility),
	azure_api_base_input: gr.update(visible=azure_visibility),
	azure_api_version_input: gr.update(visible=azure_visibility),
	openai_api_key_input: gr.update(visible=openai_visibility),
	cohere_api_key_input: gr.update(visible=cohere_visibility),
	groq_api_key_input: gr.update(visible=groq_visibility),
	}

	model_choice.change(
	fn=update_model_choice,
	inputs=[model_choice],
	outputs=[
	azure_openai_key_input,
	azure_deployment_id_input,
	azure_api_base_input,
	azure_api_version_input,
	openai_api_key_input,
	cohere_api_key_input,
	groq_api_key_input
	]
	)


	submit_button.click(
	fn=capture_verbose_output,
	inputs=[
	topic_input,
	model_choice,
	azure_openai_key_input,
	azure_deployment_id_input,
	azure_api_base_input,
	azure_api_version_input,
	openai_api_key_input,
	cohere_api_key_input,
	groq_api_key_input
	],
	outputs=[output, verbose_output]
	)

	export_button.click(
	fn=export_to_markdown,
	inputs=output,
	outputs=file_output
	)

	demo.queue(api_open=False, max_size=3).launch()


	if __name__ == "__main__":
	main()