from config import OPENAI_MODELS, COHERE_MODELS, GROQ_MODELS, MAX_TOKENS_BASE, MAX_TOKENS_ADVANCED import os import gradio as gr import requests import logging from openai import AzureOpenAI, OpenAI from cohere import ClientV2 from crewai import Agent, Task, Crew, Process, LLM from crewai_tools import tool from duckduckgo_search import DDGS from newspaper import Article import fitz # PyMuPDF from io import BytesIO, StringIO import sys import threading import queue import time # Basic logger configuration logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def setup_logging(): """Set up logging for better error tracking.""" logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # Remove any existing handlers if logger.hasHandlers(): logger.handlers.clear() # Create a handler that writes to stdout handler = logging.StreamHandler(sys.stdout) formatter = logging.Formatter('%(asctime)s | %(levelname)-8s | %(message)s', datefmt='%H:%M:%S') handler.setFormatter(formatter) logger.addHandler(handler) return logger # Global variables TOKENS_SUMMARIZATION = 0 MODEL_CHOICE = "openai" def export_to_markdown(result): """Utility to export the final result to an output.md file.""" try: with open("outputs/output.md", "w") as file: file.write(result) return "outputs/output.md" except Exception as e: logger.error("Error exporting to markdown: %s", str(e)) return f"Error exporting: {e}" def fetch_content(url): """ Fetch the content from a URL, handling either PDFs or normal web articles. - url: The URL to fetch the content from. """ try: # HEAD request to check content type response = requests.head(url, allow_redirects=True, timeout=10) content_type = response.headers.get('Content-Type', '').lower() if 'application/pdf' in content_type: # The URL points to a PDF; download and extract text pdf_response = requests.get(url, stream=True, timeout=10) pdf_response.raise_for_status() pdf_file = BytesIO(pdf_response.content) with fitz.open(stream=pdf_file, filetype="pdf") as doc: text = "" for page_num, page in enumerate(doc, start=1): page_text = page.get_text() if page_text: text += page_text else: logger.warning(f"Unable to extract text from page {page_num} of the PDF.") return text.strip() else: # Not a PDF; use newspaper3k’s Article to extract text article = Article(url) article.download() article.parse() return article.text except requests.exceptions.RequestException as req_err: logger.error("Error in the HTTP request: %s", str(req_err)) return f"Error in the HTTP request: {req_err}" except Exception as e: logger.error("Error getting the content: %s", str(e)) return f"Error getting the content: {e}" # Tools @tool('DuckDuckGoSearchResults') def search_results(search_query: str) -> list: """ Performs a web search to gather and return a collection of search results with this structure: - title: The title of the search result. - snippet: A short snippet of the search result. - link: The link to the search result. """ try: results = DDGS().text(search_query, max_results=5, timelimit='m') results_list = [{"title": result['title'], "snippet": result['body'], "link": result['href']} for result in results] return results_list except Exception as e: logger.error("Error performing search: %s", str(e)) return [] @tool('WebScrapper') def web_scrapper(url: str, topic: str) -> str: """ Extract and read the content of a specified link and generate a summary on a specific topic. - url: The URL to extract the content from. - topic: Strign with the topic to generate a summary on. """ global TOKENS_SUMMARIZATION try: content = fetch_content(url) prompt = f""" # OBJECTIVE Generate an in-depth summary of the following CONTENT on the topic "{topic}" # INSTRUCTIONS - Provide in-depth insights based on the following CONTENT. - If the following CONTENT is not directly related to the topic "{topic}", you MUST respond with INVALID CONTENT. - Include insights about why the content is important for the topic, possible challenges and advances... - The format will be markdown. - Avoid making up anything. Every insight MUST be based on the content. # CONTENT: "{content}" """ context_messages = [ { "role": "system", "content": "You are an expert summarizing content for use as context. Focus on the main points." }, { "role": "user", "content": str(prompt) } ] # Use AzureOpenAI, OpenAI or GROQ_COHERE based on model_choice if MODEL_CHOICE == "azure": client = AzureOpenAI( azure_endpoint=os.getenv('AZURE_API_BASE'), azure_deployment=os.getenv('AZURE_DEPLOYMENT_ID'), api_key=os.getenv('AZURE_OPENAI_KEY'), api_version=os.getenv('AZURE_API_VERSION') ) response = client.chat.completions.create( model=os.getenv('AZURE_DEPLOYMENT_ID'), messages=context_messages, temperature=0.7, max_tokens=5000 ) elif MODEL_CHOICE == "openai": client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) response = client.chat.completions.create( model=OPENAI_MODELS['base'], messages=context_messages, temperature=0.7, max_tokens=5000 ) elif MODEL_CHOICE == "groq-cohere": client = ClientV2(api_key=os.getenv('COHERE_API_KEY')) response = client.chat( model=COHERE_MODELS['advanced'], messages=context_messages, max_tokens=1000 ) else: return "Error: Invalid model choice. Please select 'azure' or 'openai'." summary = response.choices[0].message.content if MODEL_CHOICE == "azure" or MODEL_CHOICE == "openai": TOKENS_SUMMARIZATION += response.usage.total_tokens elif MODEL_CHOICE == "groq-cohere": TOKENS_SUMMARIZATION += response.usage.billed_units.input_tokens+response.usage.billed_units.output_tokens summary_response = f""" # SUMMARY: {summary} # URL: {url} """ # include a delay of 10 second to avoid rate limiting of GROQ-Cohere if MODEL_CHOICE=="groq-cohere": time.sleep(10) return summary_response except Exception as e: logger.error("Error generating summary: %s", str(e)) return f""" # SUMMARY: Error generating summary. IGNORE THIS OUTPUT. # URL: {url} """ def capture_verbose_output( agent_input, model_choice, azure_openai_key, azure_deployment_id, azure_api_base, azure_api_version, openai_api_key, cohere_api_key, groq_api_key ): """ This generator captures stdout produced by the multi-agent process in real time, updating the Gradio interface with logs, while returning the final result once done. """ old_stdout = sys.stdout mystdout = StringIO() sys.stdout = mystdout result_container = [None] def run_kickoff(): result_container[0] = kickoff_crew( topic=agent_input, model_choice=model_choice, azure_openai_key=azure_openai_key, azure_deployment_id=azure_deployment_id, azure_api_base=azure_api_base, azure_api_version=azure_api_version, openai_api_key=openai_api_key, cohere_api_key=cohere_api_key, groq_api_key=groq_api_key ) kickoff_thread = threading.Thread(target=run_kickoff) kickoff_thread.start() verbose_output = "" result_output = "" # Initialize outputs yield gr.update(value=result_output), gr.update(value=verbose_output) while kickoff_thread.is_alive(): # Read new output from mystdout new_output = mystdout.getvalue() if new_output != verbose_output: verbose_output = new_output yield gr.update(value=result_output), gr.update(value=verbose_output) time.sleep(0.1) # Once done, get final result kickoff_thread.join() sys.stdout = old_stdout result_output = result_container[0] verbose_output = mystdout.getvalue() yield gr.update(value=result_output), gr.update(value=verbose_output) def kickoff_crew( topic: str, model_choice: str, azure_openai_key: str, azure_deployment_id: str, azure_api_base: str, azure_api_version: str, openai_api_key: str, cohere_api_key: str, groq_api_key: str ) -> str: """ Kick off the multi-agent pipeline. """ try: global TOKENS_SUMMARIZATION, MODEL_CHOICE TOKENS_SUMMARIZATION = 0 MODEL_CHOICE = model_choice # Basic checks if not topic.strip(): return "Error: The topic cannot be empty. Please provide a valid topic." # ---- Define LLMs based on the user-provided inputs ---- # Inicializa las variables de los modelos con None azure_llm_base = None azure_llm_advanced = None openai_llm_base = None openai_llm_advanced = None groq_llm_base = None groq_llm_advanced = None if model_choice == "azure": if not azure_openai_key or not azure_deployment_id or not azure_api_base or not azure_api_version: return "Error: Please provide all the required Azure OpenAI API details." else: os.environ['AZURE_API_BASE']=azure_api_base os.environ['AZURE_API_VERSION']=azure_api_version os.environ['AZURE_DEPLOYMENT_ID']=azure_deployment_id os.environ['AZURE_OPENAI_KEY']=azure_openai_key # Azure azure_llm_base = LLM( temperature=0.3, model=f"azure/{azure_deployment_id}", api_key=azure_openai_key, base_url=azure_api_base, api_version=azure_api_version, max_tokens=4000 ) azure_llm_advanced = LLM( temperature=0.6, model=f"azure/{azure_deployment_id}", api_key=azure_openai_key, base_url=azure_api_base, api_version=azure_api_version, max_tokens=10000 ) elif model_choice == "openai": if not openai_api_key: return "Error: Please provide the OpenAI API key." else: os.environ['OPENAI_API_KEY']=openai_api_key # OpenAI openai_llm_base = LLM( model=OPENAI_MODELS['base'], api_key=openai_api_key, max_completion_tokens=4000 ) openai_llm_advanced = LLM( model=OPENAI_MODELS['advanced'], api_key=openai_api_key, temperature=0.4, max_completion_tokens=10000 ) elif model_choice == "groq-cohere": if not cohere_api_key or not groq_api_key: return "Error: Please provide both the Cohere and GROQ API keys." else: os.environ['COHERE_API_KEY']=cohere_api_key os.environ['GROQ_API_KEY']=groq_api_key # GROQ - placeholder examples groq_llm_base = LLM( model=GROQ_MODELS['base'], api_key=groq_api_key, temperature=0.3, max_tokens=1000 ) groq_llm_advanced = LLM( model=GROQ_MODELS['advanced'], api_key=groq_api_key, temperature=0.6, max_tokens=4000 ) # Diccionario para agrupar los LLM llms = { "azure": { "base": azure_llm_base, "advanced": azure_llm_advanced }, "openai": { "base": openai_llm_base, "advanced": openai_llm_advanced }, "groq-cohere": { "base": groq_llm_base, "advanced": groq_llm_advanced } } # Obtain the selected LLM set if model_choice not in llms: return f"Error: Invalid model choice. Please select from {list(llms.keys())}." selected_llm = llms[model_choice] # Define Agents researcher = Agent( role='Researcher', goal=f'Search and collect detailed information on topic ## {topic} ##', tools=[search_results, web_scrapper], llm=selected_llm["base"], backstory=( "You are a meticulous researcher, skilled at navigating vast amounts of information to extract " "essential insights on any given topic. Your dedication to detail ensures the reliability and " "thoroughness of your findings." ), allow_delegation=False, max_iter=15, max_rpm=5 if model_choice == "groq-cohere" else 120, verbose=True ) editor = Agent( role='Editor', goal=f'Compile and refine the information into a comprehensive report on topic ## {topic} ##', llm=selected_llm["advanced"], backstory=( "As an expert editor, you specialize in transforming raw data into clear, engaging reports. " "Your strong command of language and attention to detail ensure that each report not only conveys " "essential insights but is also easily understandable to diverse audiences." ), allow_delegation=False, max_iter=5, max_rpm=10 if model_choice == "groq-cohere" else 120, verbose=True ) # Define Tasks research_task = Task( description=( "Be sure to translate the topic into English first. " "Use the DuckDuckGoSearchResults tool to collect initial search snippets on ## {topic} ##. " "If more detailed searches are required, generate and execute new searches related to ## {topic} ##. " "Subsequently, employ the WebScrapper tool to extract information from significant URLs, " "extracting further insights. Compile these findings into a preliminary draft, documenting all " "relevant sources, titles, and links associated with the topic. " "Ensure high accuracy throughout the process and avoid any fabrication of information." ), expected_output=( "A structured draft report about the topic, featuring an introduction, a detailed main body, " "and a conclusion. Properly cite sources. Provide a thorough overview of the info gathered." ), agent=researcher ) edit_task = Task( description=( "Review and refine the initial draft report from the research task. Organize the content logically. " "Elaborate on each section to provide in-depth information and insights. " "Verify the accuracy of all data, correct discrepancies, update info to ensure currency, " "and maintain a consistent tone. Include a section listing all sources used, formatted as bullet points." ), expected_output=( "A polished, comprehensive report on topic ## {topic} ##, with a clear, professional narrative. " "Include an introduction, an extensive discussion, a concise conclusion, and a source list with references." ), agent=editor, context=[research_task] ) # Form the Crew crew = Crew( agents=[researcher, editor], tasks=[research_task, edit_task], process=Process.sequential ) # Kick off result = crew.kickoff(inputs={'topic': topic}) # Compute token usage (CrewAI aggregator usage) tokens = result.token_usage.total_tokens / 1_000 tokens_summ = TOKENS_SUMMARIZATION / 1_000 if not isinstance(result, str): result = str(result) result += f"\n\n**Estimated tokens (Agents):** {tokens:.5f} k" result += f"\n\n**Estimated tokens (Summarization):** {tokens_summ:.5f} k" return result except Exception as e: logger.error("Error in kickoff_crew: %s", str(e)) return f"Error in kickoff_crew: {str(e)}" def main(): """Set up the Gradio interface for the CrewAI Research Tool.""" description_demo = """# Automatic Insights Generation with Multi-Agents (CrewAI) - **Multi-agent framework**: CrewAI - **Multi-agents**: Two agents, Researcher and Editor, working together to extract information from the internet and compile a report on the topic of choice. - **Search tool**: Duck-Duck-Go-Search - **Web Retrieval**: Newspaper4k and PDF *Note: Groq is currently disabled due to rate limiting issues. Please use Azure or OpenAI for now.* """ with gr.Blocks() as demo: gr.Markdown(description_demo) with gr.Row(): with gr.Column(scale=1): # Radio: now includes azure / openai / groq / cohere model_choice = gr.Radio( choices=["azure", "openai", "groq-cohere"], label="Choose Model", value="openai", interactive=True ) # ------------ # LLM config inputs # ------------ # Azure azure_api_base_input = gr.Textbox(label="Azure API Base (url)", type="password", visible=False, interactive=True) azure_deployment_id_input = gr.Textbox(label="Azure Deployment ID (model)", type="password", visible=False, interactive=True) azure_openai_key_input = gr.Textbox(label="Azure API Key", type="password", visible=False, interactive=True) azure_api_version_input = gr.Textbox(label="Azure API Version", type="text", visible=False, interactive=True) # OpenAI openai_api_key_input = gr.Textbox(label="OpenAI API Key", type="password", visible=True, interactive=True) # GROQ groq_api_key_input = gr.Textbox(label="GROQ API Key", type="password", visible=False, interactive=False) # Cohere cohere_api_key_input = gr.Textbox(label="Cohere API Key", type="password", visible=False, interactive=False) export_button = gr.Button("Export to Markdown", interactive=True) file_output = gr.File(label="Download Markdown File") credits = gr.Markdown( label="Credits", show_label=True, value="This tool is powered by [CrewAI](https://crewai.com), " "[OpenAI](https://openai.com), " "[Azure OpenAI Services](https://azure.microsoft.com/en-us/products/ai-services/openai-service), " "[Cohere](https://dashboard.cohere.com), and [GROQ](https://console.groq.com/playground).", ) with gr.Column(scale=2): topic_input = gr.Textbox( label="Enter Topic", placeholder="Type here the topic of interest...", interactive=True ) submit_button = gr.Button("Start Research", interactive=True) output = gr.Markdown( label="Result", show_copy_button=True, value="The generated insighsts will appear here...", latex_delimiters=[ {"left": "\\[", "right": "\\]", "display": True}, {"left": "\\(", "right": "\\)", "display": False}, ] ) verbose_output = gr.Textbox( label="Verbose Output", placeholder="Verbose logs will appear here...", lines=10, interactive=False, show_copy_button=True ) # --------------- # Dynamic toggling of LLM config boxes # --------------- def update_model_choice(model): """Update visibility of config inputs based on the selected LLM.""" azure_visibility = False openai_visibility = False cohere_visibility = False groq_visibility = False if model == "azure": azure_visibility = True elif model == "openai": openai_visibility = True elif model == "groq-cohere": cohere_visibility = True groq_visibility = True return { azure_openai_key_input: gr.update(visible=azure_visibility), azure_deployment_id_input: gr.update(visible=azure_visibility), azure_api_base_input: gr.update(visible=azure_visibility), azure_api_version_input: gr.update(visible=azure_visibility), openai_api_key_input: gr.update(visible=openai_visibility), cohere_api_key_input: gr.update(visible=cohere_visibility), groq_api_key_input: gr.update(visible=groq_visibility), } model_choice.change( fn=update_model_choice, inputs=[model_choice], outputs=[ azure_openai_key_input, azure_deployment_id_input, azure_api_base_input, azure_api_version_input, openai_api_key_input, cohere_api_key_input, groq_api_key_input ] ) submit_button.click( fn=capture_verbose_output, inputs=[ topic_input, model_choice, azure_openai_key_input, azure_deployment_id_input, azure_api_base_input, azure_api_version_input, openai_api_key_input, cohere_api_key_input, groq_api_key_input ], outputs=[output, verbose_output] ) export_button.click( fn=export_to_markdown, inputs=output, outputs=file_output ) demo.queue(api_open=False, max_size=3).launch() if __name__ == "__main__": main()