|
from config import OPENAI_MODELS, COHERE_MODELS, GROQ_MODELS, MAX_TOKENS_BASE, MAX_TOKENS_ADVANCED |
|
|
|
import os |
|
import gradio as gr |
|
import requests |
|
import logging |
|
|
|
from openai import AzureOpenAI, OpenAI |
|
from cohere import ClientV2 |
|
|
|
from crewai import Agent, Task, Crew, Process, LLM |
|
from crewai_tools import tool |
|
from duckduckgo_search import DDGS |
|
from newspaper import Article |
|
import fitz |
|
from io import BytesIO, StringIO |
|
import sys |
|
|
|
import threading |
|
import queue |
|
import time |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
|
|
def setup_logging(): |
|
"""Set up logging for better error tracking.""" |
|
logger = logging.getLogger(__name__) |
|
logger.setLevel(logging.INFO) |
|
|
|
|
|
if logger.hasHandlers(): |
|
logger.handlers.clear() |
|
|
|
|
|
handler = logging.StreamHandler(sys.stdout) |
|
formatter = logging.Formatter('%(asctime)s | %(levelname)-8s | %(message)s', datefmt='%H:%M:%S') |
|
handler.setFormatter(formatter) |
|
logger.addHandler(handler) |
|
|
|
return logger |
|
|
|
|
|
TOKENS_SUMMARIZATION = 0 |
|
MODEL_CHOICE = "openai" |
|
|
|
def export_to_markdown(result): |
|
"""Utility to export the final result to an output.md file.""" |
|
try: |
|
with open("outputs/output.md", "w") as file: |
|
file.write(result) |
|
return "outputs/output.md" |
|
except Exception as e: |
|
logger.error("Error exporting to markdown: %s", str(e)) |
|
return f"Error exporting: {e}" |
|
|
|
def fetch_content(url): |
|
""" |
|
Fetch the content from a URL, handling either PDFs or normal web articles. |
|
- url: The URL to fetch the content from. |
|
""" |
|
try: |
|
|
|
response = requests.head(url, allow_redirects=True, timeout=10) |
|
content_type = response.headers.get('Content-Type', '').lower() |
|
|
|
if 'application/pdf' in content_type: |
|
|
|
pdf_response = requests.get(url, stream=True, timeout=10) |
|
pdf_response.raise_for_status() |
|
|
|
pdf_file = BytesIO(pdf_response.content) |
|
with fitz.open(stream=pdf_file, filetype="pdf") as doc: |
|
text = "" |
|
for page_num, page in enumerate(doc, start=1): |
|
page_text = page.get_text() |
|
if page_text: |
|
text += page_text |
|
else: |
|
logger.warning(f"Unable to extract text from page {page_num} of the PDF.") |
|
return text.strip() |
|
else: |
|
|
|
article = Article(url) |
|
article.download() |
|
article.parse() |
|
return article.text |
|
except requests.exceptions.RequestException as req_err: |
|
logger.error("Error in the HTTP request: %s", str(req_err)) |
|
return f"Error in the HTTP request: {req_err}" |
|
except Exception as e: |
|
logger.error("Error getting the content: %s", str(e)) |
|
return f"Error getting the content: {e}" |
|
|
|
|
|
@tool('DuckDuckGoSearchResults') |
|
def search_results(search_query: str) -> list: |
|
""" |
|
Performs a web search to gather and return a collection of search results with this structure: |
|
- title: The title of the search result. |
|
- snippet: A short snippet of the search result. |
|
- link: The link to the search result. |
|
""" |
|
try: |
|
results = DDGS().text(search_query, max_results=5, timelimit='m') |
|
results_list = [{"title": result['title'], |
|
"snippet": result['body'], |
|
"link": result['href']} for result in results] |
|
return results_list |
|
except Exception as e: |
|
logger.error("Error performing search: %s", str(e)) |
|
return [] |
|
|
|
@tool('WebScrapper') |
|
def web_scrapper(url: str, topic: str) -> str: |
|
""" |
|
Extract and read the content of a specified link and generate a summary on a specific topic. |
|
- url: The URL to extract the content from. |
|
- topic: Strign with the topic to generate a summary on. |
|
""" |
|
global TOKENS_SUMMARIZATION |
|
|
|
try: |
|
content = fetch_content(url) |
|
prompt = f""" |
|
# OBJECTIVE |
|
Generate an in-depth summary of the following CONTENT on the topic "{topic}" |
|
|
|
# INSTRUCTIONS |
|
- Provide in-depth insights based on the following CONTENT. |
|
- If the following CONTENT is not directly related to the topic "{topic}", you MUST respond with INVALID CONTENT. |
|
- Include insights about why the content is important for the topic, possible challenges and advances... |
|
- The format will be markdown. |
|
- Avoid making up anything. Every insight MUST be based on the content. |
|
|
|
# CONTENT: |
|
"{content}" |
|
""" |
|
|
|
context_messages = [ |
|
{ |
|
"role": "system", |
|
"content": "You are an expert summarizing content for use as context. Focus on the main points." |
|
}, |
|
{ |
|
"role": "user", |
|
"content": str(prompt) |
|
} |
|
] |
|
|
|
|
|
if MODEL_CHOICE == "azure": |
|
client = AzureOpenAI( |
|
azure_endpoint=os.getenv('AZURE_API_BASE'), |
|
azure_deployment=os.getenv('AZURE_DEPLOYMENT_ID'), |
|
api_key=os.getenv('AZURE_OPENAI_KEY'), |
|
api_version=os.getenv('AZURE_API_VERSION') |
|
) |
|
response = client.chat.completions.create( |
|
model=os.getenv('AZURE_DEPLOYMENT_ID'), |
|
messages=context_messages, |
|
temperature=0.7, |
|
max_tokens=5000 |
|
) |
|
|
|
elif MODEL_CHOICE == "openai": |
|
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) |
|
response = client.chat.completions.create( |
|
model=OPENAI_MODELS['base'], |
|
messages=context_messages, |
|
temperature=0.7, |
|
max_tokens=5000 |
|
) |
|
|
|
elif MODEL_CHOICE == "groq-cohere": |
|
client = ClientV2(api_key=os.getenv('COHERE_API_KEY')) |
|
response = client.chat( |
|
model=COHERE_MODELS['advanced'], |
|
messages=context_messages, |
|
max_tokens=1000 |
|
) |
|
else: |
|
return "Error: Invalid model choice. Please select 'azure' or 'openai'." |
|
|
|
summary = response.choices[0].message.content |
|
if MODEL_CHOICE == "azure" or MODEL_CHOICE == "openai": |
|
TOKENS_SUMMARIZATION += response.usage.total_tokens |
|
elif MODEL_CHOICE == "groq-cohere": |
|
TOKENS_SUMMARIZATION += response.usage.billed_units.input_tokens+response.usage.billed_units.output_tokens |
|
|
|
summary_response = f"""<article_summary> |
|
# SUMMARY: |
|
{summary} |
|
|
|
# URL: {url} |
|
</article_summary> |
|
""" |
|
|
|
if MODEL_CHOICE=="groq-cohere": |
|
time.sleep(10) |
|
|
|
return summary_response |
|
|
|
except Exception as e: |
|
logger.error("Error generating summary: %s", str(e)) |
|
return f"""<article_summary> |
|
# SUMMARY: |
|
Error generating summary. |
|
IGNORE THIS OUTPUT. |
|
|
|
# URL: {url} |
|
</article_summary> |
|
""" |
|
|
|
|
|
def capture_verbose_output( |
|
agent_input, |
|
model_choice, |
|
azure_openai_key, |
|
azure_deployment_id, |
|
azure_api_base, |
|
azure_api_version, |
|
openai_api_key, |
|
cohere_api_key, |
|
groq_api_key |
|
): |
|
""" |
|
This generator captures stdout produced by the multi-agent process in real time, |
|
updating the Gradio interface with logs, while returning the final result once done. |
|
""" |
|
old_stdout = sys.stdout |
|
mystdout = StringIO() |
|
sys.stdout = mystdout |
|
|
|
result_container = [None] |
|
|
|
def run_kickoff(): |
|
result_container[0] = kickoff_crew( |
|
topic=agent_input, |
|
model_choice=model_choice, |
|
azure_openai_key=azure_openai_key, |
|
azure_deployment_id=azure_deployment_id, |
|
azure_api_base=azure_api_base, |
|
azure_api_version=azure_api_version, |
|
openai_api_key=openai_api_key, |
|
cohere_api_key=cohere_api_key, |
|
groq_api_key=groq_api_key |
|
) |
|
|
|
kickoff_thread = threading.Thread(target=run_kickoff) |
|
kickoff_thread.start() |
|
|
|
verbose_output = "" |
|
result_output = "" |
|
|
|
|
|
yield gr.update(value=result_output), gr.update(value=verbose_output) |
|
|
|
while kickoff_thread.is_alive(): |
|
|
|
new_output = mystdout.getvalue() |
|
if new_output != verbose_output: |
|
verbose_output = new_output |
|
yield gr.update(value=result_output), gr.update(value=verbose_output) |
|
time.sleep(0.1) |
|
|
|
|
|
kickoff_thread.join() |
|
sys.stdout = old_stdout |
|
result_output = result_container[0] |
|
|
|
verbose_output = mystdout.getvalue() |
|
yield gr.update(value=result_output), gr.update(value=verbose_output) |
|
|
|
|
|
def kickoff_crew( |
|
topic: str, |
|
model_choice: str, |
|
azure_openai_key: str, |
|
azure_deployment_id: str, |
|
azure_api_base: str, |
|
azure_api_version: str, |
|
openai_api_key: str, |
|
cohere_api_key: str, |
|
groq_api_key: str |
|
) -> str: |
|
""" |
|
Kick off the multi-agent pipeline. |
|
""" |
|
try: |
|
global TOKENS_SUMMARIZATION, MODEL_CHOICE |
|
|
|
TOKENS_SUMMARIZATION = 0 |
|
MODEL_CHOICE = model_choice |
|
|
|
|
|
if not topic.strip(): |
|
return "Error: The topic cannot be empty. Please provide a valid topic." |
|
|
|
|
|
|
|
azure_llm_base = None |
|
azure_llm_advanced = None |
|
openai_llm_base = None |
|
openai_llm_advanced = None |
|
groq_llm_base = None |
|
groq_llm_advanced = None |
|
|
|
if model_choice == "azure": |
|
if not azure_openai_key or not azure_deployment_id or not azure_api_base or not azure_api_version: |
|
return "Error: Please provide all the required Azure OpenAI API details." |
|
else: |
|
os.environ['AZURE_API_BASE']=azure_api_base |
|
os.environ['AZURE_API_VERSION']=azure_api_version |
|
os.environ['AZURE_DEPLOYMENT_ID']=azure_deployment_id |
|
os.environ['AZURE_OPENAI_KEY']=azure_openai_key |
|
|
|
azure_llm_base = LLM( |
|
temperature=0.3, |
|
model=f"azure/{azure_deployment_id}", |
|
api_key=azure_openai_key, |
|
base_url=azure_api_base, |
|
api_version=azure_api_version, |
|
max_tokens=4000 |
|
) |
|
azure_llm_advanced = LLM( |
|
temperature=0.6, |
|
model=f"azure/{azure_deployment_id}", |
|
api_key=azure_openai_key, |
|
base_url=azure_api_base, |
|
api_version=azure_api_version, |
|
max_tokens=10000 |
|
) |
|
elif model_choice == "openai": |
|
if not openai_api_key: |
|
return "Error: Please provide the OpenAI API key." |
|
else: |
|
os.environ['OPENAI_API_KEY']=openai_api_key |
|
|
|
openai_llm_base = LLM( |
|
model=OPENAI_MODELS['base'], |
|
api_key=openai_api_key, |
|
max_completion_tokens=4000 |
|
) |
|
openai_llm_advanced = LLM( |
|
model=OPENAI_MODELS['advanced'], |
|
api_key=openai_api_key, |
|
temperature=0.4, |
|
max_completion_tokens=10000 |
|
) |
|
elif model_choice == "groq-cohere": |
|
if not cohere_api_key or not groq_api_key: |
|
return "Error: Please provide both the Cohere and GROQ API keys." |
|
else: |
|
os.environ['COHERE_API_KEY']=cohere_api_key |
|
os.environ['GROQ_API_KEY']=groq_api_key |
|
|
|
groq_llm_base = LLM( |
|
model=GROQ_MODELS['base'], |
|
api_key=groq_api_key, |
|
temperature=0.3, |
|
max_tokens=1000 |
|
) |
|
groq_llm_advanced = LLM( |
|
model=GROQ_MODELS['advanced'], |
|
api_key=groq_api_key, |
|
temperature=0.6, |
|
max_tokens=4000 |
|
) |
|
|
|
|
|
|
|
|
|
llms = { |
|
"azure": { |
|
"base": azure_llm_base, |
|
"advanced": azure_llm_advanced |
|
}, |
|
"openai": { |
|
"base": openai_llm_base, |
|
"advanced": openai_llm_advanced |
|
}, |
|
"groq-cohere": { |
|
"base": groq_llm_base, |
|
"advanced": groq_llm_advanced |
|
} |
|
} |
|
|
|
|
|
if model_choice not in llms: |
|
return f"Error: Invalid model choice. Please select from {list(llms.keys())}." |
|
|
|
selected_llm = llms[model_choice] |
|
|
|
|
|
researcher = Agent( |
|
role='Researcher', |
|
goal=f'Search and collect detailed information on topic ## {topic} ##', |
|
tools=[search_results, web_scrapper], |
|
llm=selected_llm["base"], |
|
backstory=( |
|
"You are a meticulous researcher, skilled at navigating vast amounts of information to extract " |
|
"essential insights on any given topic. Your dedication to detail ensures the reliability and " |
|
"thoroughness of your findings." |
|
), |
|
allow_delegation=False, |
|
max_iter=15, |
|
max_rpm=5 if model_choice == "groq-cohere" else 120, |
|
verbose=True |
|
) |
|
|
|
editor = Agent( |
|
role='Editor', |
|
goal=f'Compile and refine the information into a comprehensive report on topic ## {topic} ##', |
|
llm=selected_llm["advanced"], |
|
backstory=( |
|
"As an expert editor, you specialize in transforming raw data into clear, engaging reports. " |
|
"Your strong command of language and attention to detail ensure that each report not only conveys " |
|
"essential insights but is also easily understandable to diverse audiences." |
|
), |
|
allow_delegation=False, |
|
max_iter=5, |
|
max_rpm=10 if model_choice == "groq-cohere" else 120, |
|
verbose=True |
|
) |
|
|
|
|
|
research_task = Task( |
|
description=( |
|
"Be sure to translate the topic into English first. " |
|
"Use the DuckDuckGoSearchResults tool to collect initial search snippets on ## {topic} ##. " |
|
"If more detailed searches are required, generate and execute new searches related to ## {topic} ##. " |
|
"Subsequently, employ the WebScrapper tool to extract information from significant URLs, " |
|
"extracting further insights. Compile these findings into a preliminary draft, documenting all " |
|
"relevant sources, titles, and links associated with the topic. " |
|
"Ensure high accuracy throughout the process and avoid any fabrication of information." |
|
), |
|
expected_output=( |
|
"A structured draft report about the topic, featuring an introduction, a detailed main body, " |
|
"and a conclusion. Properly cite sources. Provide a thorough overview of the info gathered." |
|
), |
|
agent=researcher |
|
) |
|
|
|
edit_task = Task( |
|
description=( |
|
"Review and refine the initial draft report from the research task. Organize the content logically. " |
|
"Elaborate on each section to provide in-depth information and insights. " |
|
"Verify the accuracy of all data, correct discrepancies, update info to ensure currency, " |
|
"and maintain a consistent tone. Include a section listing all sources used, formatted as bullet points." |
|
), |
|
expected_output=( |
|
"A polished, comprehensive report on topic ## {topic} ##, with a clear, professional narrative. " |
|
"Include an introduction, an extensive discussion, a concise conclusion, and a source list with references." |
|
), |
|
agent=editor, |
|
context=[research_task] |
|
) |
|
|
|
|
|
crew = Crew( |
|
agents=[researcher, editor], |
|
tasks=[research_task, edit_task], |
|
process=Process.sequential |
|
) |
|
|
|
|
|
result = crew.kickoff(inputs={'topic': topic}) |
|
|
|
|
|
tokens = result.token_usage.total_tokens / 1_000 |
|
tokens_summ = TOKENS_SUMMARIZATION / 1_000 |
|
|
|
if not isinstance(result, str): |
|
result = str(result) |
|
|
|
result += f"\n\n**Estimated tokens (Agents):** {tokens:.5f} k" |
|
result += f"\n\n**Estimated tokens (Summarization):** {tokens_summ:.5f} k" |
|
|
|
return result |
|
except Exception as e: |
|
logger.error("Error in kickoff_crew: %s", str(e)) |
|
return f"Error in kickoff_crew: {str(e)}" |
|
|
|
def main(): |
|
"""Set up the Gradio interface for the CrewAI Research Tool.""" |
|
|
|
description_demo = """# Automatic Insights Generation with Multi-Agents (CrewAI) |
|
- **Multi-agent framework**: CrewAI |
|
- **Multi-agents**: Two agents, Researcher and Editor, working together to extract information from the internet and compile a report on the topic of choice. |
|
- **Search tool**: Duck-Duck-Go-Search |
|
- **Web Retrieval**: Newspaper4k and PDF |
|
|
|
*Note: Groq is currently disabled due to rate limiting issues. Please use Azure or OpenAI for now.* |
|
""" |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(description_demo) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
|
|
|
|
model_choice = gr.Radio( |
|
choices=["azure", "openai", "groq-cohere"], |
|
label="Choose Model", |
|
value="openai", |
|
interactive=True |
|
) |
|
|
|
|
|
|
|
|
|
|
|
azure_api_base_input = gr.Textbox(label="Azure API Base (url)", type="password", visible=False, interactive=True) |
|
azure_deployment_id_input = gr.Textbox(label="Azure Deployment ID (model)", type="password", visible=False, interactive=True) |
|
azure_openai_key_input = gr.Textbox(label="Azure API Key", type="password", visible=False, interactive=True) |
|
azure_api_version_input = gr.Textbox(label="Azure API Version", type="text", visible=False, interactive=True) |
|
|
|
|
|
openai_api_key_input = gr.Textbox(label="OpenAI API Key", type="password", visible=True, interactive=True) |
|
|
|
|
|
groq_api_key_input = gr.Textbox(label="GROQ API Key", type="password", visible=False, interactive=False) |
|
|
|
|
|
cohere_api_key_input = gr.Textbox(label="Cohere API Key", type="password", visible=False, interactive=False) |
|
|
|
export_button = gr.Button("Export to Markdown", interactive=True) |
|
file_output = gr.File(label="Download Markdown File") |
|
credits = gr.Markdown( |
|
label="Credits", |
|
show_label=True, |
|
value="This tool is powered by [CrewAI](https://crewai.com), " |
|
"[OpenAI](https://openai.com), " |
|
"[Azure OpenAI Services](https://azure.microsoft.com/en-us/products/ai-services/openai-service), " |
|
"[Cohere](https://dashboard.cohere.com), and [GROQ](https://console.groq.com/playground).", |
|
) |
|
|
|
with gr.Column(scale=2): |
|
topic_input = gr.Textbox( |
|
label="Enter Topic", |
|
placeholder="Type here the topic of interest...", |
|
interactive=True |
|
) |
|
submit_button = gr.Button("Start Research", interactive=True) |
|
output = gr.Markdown( |
|
label="Result", |
|
show_copy_button=True, |
|
value="The generated insighsts will appear here...", |
|
latex_delimiters=[ |
|
{"left": "\\[", "right": "\\]", "display": True}, |
|
{"left": "\\(", "right": "\\)", "display": False}, |
|
] |
|
) |
|
|
|
verbose_output = gr.Textbox( |
|
label="Verbose Output", |
|
placeholder="Verbose logs will appear here...", |
|
lines=10, |
|
interactive=False, |
|
show_copy_button=True |
|
) |
|
|
|
|
|
|
|
|
|
def update_model_choice(model): |
|
"""Update visibility of config inputs based on the selected LLM.""" |
|
azure_visibility = False |
|
openai_visibility = False |
|
cohere_visibility = False |
|
groq_visibility = False |
|
|
|
if model == "azure": |
|
azure_visibility = True |
|
elif model == "openai": |
|
openai_visibility = True |
|
elif model == "groq-cohere": |
|
cohere_visibility = True |
|
groq_visibility = True |
|
|
|
return { |
|
azure_openai_key_input: gr.update(visible=azure_visibility), |
|
azure_deployment_id_input: gr.update(visible=azure_visibility), |
|
azure_api_base_input: gr.update(visible=azure_visibility), |
|
azure_api_version_input: gr.update(visible=azure_visibility), |
|
openai_api_key_input: gr.update(visible=openai_visibility), |
|
cohere_api_key_input: gr.update(visible=cohere_visibility), |
|
groq_api_key_input: gr.update(visible=groq_visibility), |
|
} |
|
|
|
model_choice.change( |
|
fn=update_model_choice, |
|
inputs=[model_choice], |
|
outputs=[ |
|
azure_openai_key_input, |
|
azure_deployment_id_input, |
|
azure_api_base_input, |
|
azure_api_version_input, |
|
openai_api_key_input, |
|
cohere_api_key_input, |
|
groq_api_key_input |
|
] |
|
) |
|
|
|
|
|
submit_button.click( |
|
fn=capture_verbose_output, |
|
inputs=[ |
|
topic_input, |
|
model_choice, |
|
azure_openai_key_input, |
|
azure_deployment_id_input, |
|
azure_api_base_input, |
|
azure_api_version_input, |
|
openai_api_key_input, |
|
cohere_api_key_input, |
|
groq_api_key_input |
|
], |
|
outputs=[output, verbose_output] |
|
) |
|
|
|
export_button.click( |
|
fn=export_to_markdown, |
|
inputs=output, |
|
outputs=file_output |
|
) |
|
|
|
demo.queue(api_open=False, max_size=3).launch() |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |