dromerosm commited on
Commit
7153d1f
·
1 Parent(s): b68285a

Update .gitignore, requirements, and README for project enhancements

Browse files
Files changed (7) hide show
  1. .gitignore +2 -0
  2. README.md +2 -2
  3. __pycache__/config.cpython-310.pyc +0 -0
  4. app.py +548 -114
  5. config.py +23 -0
  6. outputs/output.md +1 -0
  7. requirements.txt +2 -2
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .env
2
+ .conda
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- title: Crewai, Groq, Llama3 and Cohere for Research
3
  emoji: 📈
4
  colorFrom: blue
5
  colorTo: blue
6
  sdk: gradio
7
- sdk_version: 4.28.3
8
  app_file: app.py
9
  pinned: false
10
  license: cc-by-nc-nd-4.0
 
1
  ---
2
+ title: Crewai Multiagent Research Tool
3
  emoji: 📈
4
  colorFrom: blue
5
  colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 5.0.0
8
  app_file: app.py
9
  pinned: false
10
  license: cc-by-nc-nd-4.0
__pycache__/config.cpython-310.pyc ADDED
Binary file (458 Bytes). View file
 
app.py CHANGED
@@ -1,201 +1,635 @@
 
 
1
  import os
2
  import gradio as gr
3
- import cohere
4
  import requests
5
- from crewai import Agent, Task, Crew, Process
6
 
7
- from langchain_groq import ChatGroq
8
- from langchain_cohere import ChatCohere
9
 
10
- from langchain_community.tools import DuckDuckGoSearchRun, DuckDuckGoSearchResults
11
- from crewai_tools import tool, SeleniumScrapingTool, ScrapeWebsiteTool
12
  from duckduckgo_search import DDGS
13
-
14
  from newspaper import Article
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- # Ensure essential environment variables are set
17
- cohere_api_key = os.getenv('COHERE_API_KEY')
18
- if not cohere_api_key:
19
- raise EnvironmentError("COHERE_API_KEY is not set in environment variables")
20
- groq_api_key = os.getenv("GROQ_API_KEY")
21
- if not groq_api_key:
22
- raise EnvironmentError("GROQ_API_KEY is not set in environment variables")
23
 
24
- # Initialize API clients
25
- co = cohere.Client(cohere_api_key)
26
- print("client ok")
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  def fetch_content(url):
 
 
 
 
29
  try:
30
- article = Article(url)
31
- article.download()
32
- article.parse()
33
- return article.text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  except Exception as e:
35
- print("ERROR: " + str(e))
36
- return f"Error fetching content: {e}"
37
 
38
- # Define the DuckDuckGoSearch tool
39
  @tool('DuckDuckGoSearchResults')
40
- def search_results(search_query: str) -> dict:
41
  """
42
- Performs a web search to gather and return a collection of search results.
43
- This tool automates the retrieval of web-based information related to a specified query.
44
- Args:
45
- - search_query (str): The query string that specifies the information to be searched on the web. This should be a clear and concise expression of the user's information needs.
46
- Returns:
47
- - list: A list of dictionaries, where each dictionary represents a search result. Each dictionary includes 'snippet' of the page and the 'link' with the url linking to it.
48
  """
49
- results = DDGS().text(search_query, max_results=5, timelimit='m')
50
- results_list = [{"title": result['title'], "snippet": result['body'], "link": result['href']} for result in results]
51
- return results_list
 
 
 
 
 
 
52
 
53
  @tool('WebScrapper')
54
  def web_scrapper(url: str, topic: str) -> str:
55
  """
56
- A tool designed to extract and read the content of a specified link and generate a summary on a specific topic.
57
- It is capable of handling various types of web pages by making HTTP requests and parsing the received HTML content.
58
- This tool is particularly useful for web scraping tasks, data collection, or extracting specific information from websites.
59
-
60
- Args:
61
- - url (str): The URL from which to scrape content.
62
- - topic (str): The specific topic on which to generate a summary.
63
- Returns:
64
- - summary (str): summary of the url on the topic
65
  """
66
- # Scrape content from the specified URL
67
- content = fetch_content(url)
68
-
69
- # Prepare the prompt for generating the summary
70
- prompt = f"Generate a summary of the following content on the topic ## {topic} ### \n\nCONTENT:\n\n" + content
71
-
72
- # Generate the summary using Cohere
73
- response = co.chat(
74
- model='command-r-plus',
75
- message=prompt,
76
- temperature=0.4,
77
- max_tokens=1000,
78
- chat_history=[],
79
- prompt_truncation='AUTO'
80
- )
81
-
82
- summary_response = f"""###
83
- Summary:
84
- {response.text}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
- URL: {url}
87
- ###
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  """
89
-
90
- return summary_response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- def kickoff_crew(topic: str, model_choice: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  try:
94
-
95
- # Initialize the large language models based on user selection
96
- groq_llm = ChatGroq(temperature=0, groq_api_key=groq_api_key, model_name="groq/" + model_choice)
97
-
98
- # Define Agents with Groq LLM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  researcher = Agent(
100
  role='Researcher',
101
- goal='Search and Collect detailed information on topic ## {topic} ##',
102
  tools=[search_results, web_scrapper],
103
- llm=groq_llm, # Assigning the LLM here
104
  backstory=(
105
- "You are a meticulous researcher, skilled at navigating vast amounts of information to extract essential insights on any given topic. "
106
- "Your dedication to detail ensures the reliability and thoroughness of your findings. "
107
- "With a strategic approach, you carefully analyze and document data, aiming to provide accurate and trustworthy results."
108
  ),
109
  allow_delegation=False,
110
  max_iter=15,
111
- max_rpm=20,
112
- memory=True,
113
  verbose=True
114
  )
115
 
116
-
117
  editor = Agent(
118
  role='Editor',
119
- goal='Compile and refine the information into a comprehensive report on topic ## {topic} ##',
120
- llm=groq_llm, # Assigning the LLM here
121
  backstory=(
122
  "As an expert editor, you specialize in transforming raw data into clear, engaging reports. "
123
- "Your strong command of language and attention to detail ensure that each report not only conveys essential insights "
124
- "but is also easily understandable and appealing to diverse audiences. "
125
  ),
126
  allow_delegation=False,
127
  max_iter=5,
128
- max_rpm=15,
129
- memory=True,
130
  verbose=True
131
  )
132
 
133
  # Define Tasks
134
  research_task = Task(
135
  description=(
 
136
  "Use the DuckDuckGoSearchResults tool to collect initial search snippets on ## {topic} ##. "
137
- "If more detailed searches are required, generate and execute new queries related to ## {topic} ##. "
138
- "Subsequently, employ the WebScrapper tool to delve deeper into significant URLs identified from the snippets, extracting further information and insights. "
139
- "Compile these findings into a preliminary draft, documenting all relevant sources, titles, and links associated with the topic. "
140
- "Ensure high accuracy throughout the process and avoid any fabrication or misrepresentation of information."
 
141
  ),
142
  expected_output=(
143
- "A structured draft report about the topic, featuring an introduction, a detailed main body organized by different aspects of the topic, and a conclusion. "
144
- "Each section should properly cite sources, providing a thorough overview of the information gathered."
145
  ),
146
  agent=researcher
147
  )
148
 
149
-
150
  edit_task = Task(
151
  description=(
152
- "Review and refine the initial draft report from the research task. Organize the content logically to enhance information flow. "
153
- "Verify the accuracy of all data, correct discrepancies, and update information to ensure it reflects current knowledge and is well-supported by sources. "
154
- "Improve the report’s readability by enhancing language clarity, adjusting sentence structures, and maintaining a consistent tone. "
155
- "Include a section listing all sources used, formatted as bullet points following this template: "
156
- "- title: url'."
157
  ),
158
  expected_output=(
159
- "A polished, comprehensive report on topic ## {topic} ##, with a clear, professional narrative that accurately reflects the research findings. "
160
- "The report should include an introduction, an extensive discussion section, a concise conclusion, and a well-organized source list. "
161
- "Ensure the document is grammatically correct and ready for publication or presentation."
162
  ),
163
  agent=editor,
164
  context=[research_task]
165
  )
166
 
167
- # Forming the Crew
168
  crew = Crew(
169
  agents=[researcher, editor],
170
  tasks=[research_task, edit_task],
171
- process=Process.sequential,
172
  )
173
 
174
- # Kick-off the research process
175
  result = crew.kickoff(inputs={'topic': topic})
 
 
 
 
 
176
  if not isinstance(result, str):
177
  result = str(result)
 
 
 
 
178
  return result
179
  except Exception as e:
180
- return f"Error: {str(e)}"
 
181
 
182
  def main():
183
  """Set up the Gradio interface for the CrewAI Research Tool."""
 
 
 
 
 
 
 
 
 
 
184
  with gr.Blocks() as demo:
185
- gr.Markdown("## CrewAI Research Tool")
186
- topic_input = gr.Textbox(label="Enter Topic", placeholder="Type here...")
187
- model_choice = gr.Radio(choices=["mixtral-8x7b-32768", "llama-3.2-1b-preview", 'llama-3.2-3b-preview', 'llama-3.3-70b-versatile'], label="Choose Model")
188
- submit_button = gr.Button("Start Research")
189
- output = gr.Markdown(label="Result")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
  submit_button.click(
192
- fn=kickoff_crew,
193
- inputs=[topic_input, model_choice],
194
- outputs=output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  )
196
 
197
- # demo.launch(debug=True)
198
  demo.queue(api_open=False, max_size=3).launch()
199
 
 
200
  if __name__ == "__main__":
201
  main()
 
1
+ from config import OPENAI_MODELS, COHERE_MODELS, GROQ_MODELS, MAX_TOKENS_BASE, MAX_TOKENS_ADVANCED
2
+
3
  import os
4
  import gradio as gr
 
5
  import requests
6
+ import logging
7
 
8
+ from openai import AzureOpenAI, OpenAI
9
+ from cohere import ClientV2
10
 
11
+ from crewai import Agent, Task, Crew, Process, LLM
12
+ from crewai_tools import tool
13
  from duckduckgo_search import DDGS
 
14
  from newspaper import Article
15
+ import fitz # PyMuPDF
16
+ from io import BytesIO, StringIO
17
+ import sys
18
+
19
+ import threading
20
+ import queue
21
+ import time
22
+
23
+ # Basic logger configuration
24
+ logger = logging.getLogger(__name__)
25
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
26
+
27
+ def setup_logging():
28
+ """Set up logging for better error tracking."""
29
+ logger = logging.getLogger(__name__)
30
+ logger.setLevel(logging.INFO)
31
+
32
+ # Remove any existing handlers
33
+ if logger.hasHandlers():
34
+ logger.handlers.clear()
35
 
36
+ # Create a handler that writes to stdout
37
+ handler = logging.StreamHandler(sys.stdout)
38
+ formatter = logging.Formatter('%(asctime)s | %(levelname)-8s | %(message)s', datefmt='%H:%M:%S')
39
+ handler.setFormatter(formatter)
40
+ logger.addHandler(handler)
 
 
41
 
42
+ return logger
43
+
44
+ # Global variables
45
+ TOKENS_SUMMARIZATION = 0
46
+ MODEL_CHOICE = "openai"
47
+
48
+ def export_to_markdown(result):
49
+ """Utility to export the final result to an output.md file."""
50
+ try:
51
+ with open("outputs/output.md", "w") as file:
52
+ file.write(result)
53
+ return "outputs/output.md"
54
+ except Exception as e:
55
+ logger.error("Error exporting to markdown: %s", str(e))
56
+ return f"Error exporting: {e}"
57
 
58
  def fetch_content(url):
59
+ """
60
+ Fetch the content from a URL, handling either PDFs or normal web articles.
61
+ - url: The URL to fetch the content from.
62
+ """
63
  try:
64
+ # HEAD request to check content type
65
+ response = requests.head(url, allow_redirects=True, timeout=10)
66
+ content_type = response.headers.get('Content-Type', '').lower()
67
+
68
+ if 'application/pdf' in content_type:
69
+ # The URL points to a PDF; download and extract text
70
+ pdf_response = requests.get(url, stream=True, timeout=10)
71
+ pdf_response.raise_for_status()
72
+
73
+ pdf_file = BytesIO(pdf_response.content)
74
+ with fitz.open(stream=pdf_file, filetype="pdf") as doc:
75
+ text = ""
76
+ for page_num, page in enumerate(doc, start=1):
77
+ page_text = page.get_text()
78
+ if page_text:
79
+ text += page_text
80
+ else:
81
+ logger.warning(f"Unable to extract text from page {page_num} of the PDF.")
82
+ return text.strip()
83
+ else:
84
+ # Not a PDF; use newspaper3k’s Article to extract text
85
+ article = Article(url)
86
+ article.download()
87
+ article.parse()
88
+ return article.text
89
+ except requests.exceptions.RequestException as req_err:
90
+ logger.error("Error in the HTTP request: %s", str(req_err))
91
+ return f"Error in the HTTP request: {req_err}"
92
  except Exception as e:
93
+ logger.error("Error getting the content: %s", str(e))
94
+ return f"Error getting the content: {e}"
95
 
96
+ # Tools
97
  @tool('DuckDuckGoSearchResults')
98
+ def search_results(search_query: str) -> list:
99
  """
100
+ Performs a web search to gather and return a collection of search results with this structure:
101
+ - title: The title of the search result.
102
+ - snippet: A short snippet of the search result.
103
+ - link: The link to the search result.
 
 
104
  """
105
+ try:
106
+ results = DDGS().text(search_query, max_results=5, timelimit='m')
107
+ results_list = [{"title": result['title'],
108
+ "snippet": result['body'],
109
+ "link": result['href']} for result in results]
110
+ return results_list
111
+ except Exception as e:
112
+ logger.error("Error performing search: %s", str(e))
113
+ return []
114
 
115
  @tool('WebScrapper')
116
  def web_scrapper(url: str, topic: str) -> str:
117
  """
118
+ Extract and read the content of a specified link and generate a summary on a specific topic.
119
+ - url: The URL to extract the content from.
120
+ - topic: Strign with the topic to generate a summary on.
 
 
 
 
 
 
121
  """
122
+ global TOKENS_SUMMARIZATION
123
+
124
+ try:
125
+ content = fetch_content(url)
126
+ prompt = f"""
127
+ # OBJECTIVE
128
+ Generate an in-depth summary of the following CONTENT on the topic "{topic}"
129
+
130
+ # INSTRUCTIONS
131
+ - Provide in-depth insights based on the following CONTENT.
132
+ - If the following CONTENT is not directly related to the topic "{topic}", you MUST respond with INVALID CONTENT.
133
+ - Include insights about why the content is important for the topic, possible challenges and advances...
134
+ - The format will be markdown.
135
+ - Avoid making up anything. Every insight MUST be based on the content.
136
+
137
+ # CONTENT:
138
+ "{content}"
139
+ """
140
+
141
+ context_messages = [
142
+ {
143
+ "role": "system",
144
+ "content": "You are an expert summarizing content for use as context. Focus on the main points."
145
+ },
146
+ {
147
+ "role": "user",
148
+ "content": str(prompt)
149
+ }
150
+ ]
151
+
152
+ # Use AzureOpenAI, OpenAI or GROQ_COHERE based on model_choice
153
+ if MODEL_CHOICE == "azure":
154
+ client = AzureOpenAI(
155
+ azure_endpoint=os.getenv('AZURE_API_BASE'),
156
+ azure_deployment=os.getenv('AZURE_DEPLOYMENT_ID'),
157
+ api_key=os.getenv('AZURE_OPENAI_KEY'),
158
+ api_version=os.getenv('AZURE_API_VERSION')
159
+ )
160
+ response = client.chat.completions.create(
161
+ model=os.getenv('AZURE_DEPLOYMENT_ID'),
162
+ messages=context_messages,
163
+ temperature=0.7,
164
+ max_tokens=5000
165
+ )
166
+
167
+ elif MODEL_CHOICE == "openai":
168
+ client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
169
+ response = client.chat.completions.create(
170
+ model=OPENAI_MODELS['base'],
171
+ messages=context_messages,
172
+ temperature=0.7,
173
+ max_tokens=5000
174
+ )
175
+
176
+ elif MODEL_CHOICE == "groq-cohere":
177
+ client = ClientV2(api_key=os.getenv('COHERE_API_KEY'))
178
+ response = client.chat(
179
+ model=COHERE_MODELS['advanced'],
180
+ messages=context_messages,
181
+ max_tokens=1000
182
+ )
183
+ else:
184
+ return "Error: Invalid model choice. Please select 'azure' or 'openai'."
185
+
186
+ summary = response.choices[0].message.content
187
+ if MODEL_CHOICE == "azure" or MODEL_CHOICE == "openai":
188
+ TOKENS_SUMMARIZATION += response.usage.total_tokens
189
+ elif MODEL_CHOICE == "groq-cohere":
190
+ TOKENS_SUMMARIZATION += response.usage.billed_units.input_tokens+response.usage.billed_units.output_tokens
191
+
192
+ summary_response = f"""<article_summary>
193
+ # SUMMARY:
194
+ {summary}
195
+
196
+ # URL: {url}
197
+ </article_summary>
198
+ """
199
+ # include a delay of 10 second to avoid rate limiting of GROQ-Cohere
200
+ if MODEL_CHOICE=="groq-cohere":
201
+ time.sleep(10)
202
+
203
+ return summary_response
204
 
205
+ except Exception as e:
206
+ logger.error("Error generating summary: %s", str(e))
207
+ return f"""<article_summary>
208
+ # SUMMARY:
209
+ Error generating summary.
210
+ IGNORE THIS OUTPUT.
211
+
212
+ # URL: {url}
213
+ </article_summary>
214
+ """
215
+
216
+
217
+ def capture_verbose_output(
218
+ agent_input,
219
+ model_choice,
220
+ azure_openai_key,
221
+ azure_deployment_id,
222
+ azure_api_base,
223
+ azure_api_version,
224
+ openai_api_key,
225
+ cohere_api_key,
226
+ groq_api_key
227
+ ):
228
  """
229
+ This generator captures stdout produced by the multi-agent process in real time,
230
+ updating the Gradio interface with logs, while returning the final result once done.
231
+ """
232
+ old_stdout = sys.stdout
233
+ mystdout = StringIO()
234
+ sys.stdout = mystdout
235
+
236
+ result_container = [None]
237
+
238
+ def run_kickoff():
239
+ result_container[0] = kickoff_crew(
240
+ topic=agent_input,
241
+ model_choice=model_choice,
242
+ azure_openai_key=azure_openai_key,
243
+ azure_deployment_id=azure_deployment_id,
244
+ azure_api_base=azure_api_base,
245
+ azure_api_version=azure_api_version,
246
+ openai_api_key=openai_api_key,
247
+ cohere_api_key=cohere_api_key,
248
+ groq_api_key=groq_api_key
249
+ )
250
+
251
+ kickoff_thread = threading.Thread(target=run_kickoff)
252
+ kickoff_thread.start()
253
+
254
+ verbose_output = ""
255
+ result_output = ""
256
+
257
+ # Initialize outputs
258
+ yield gr.update(value=result_output), gr.update(value=verbose_output)
259
+
260
+ while kickoff_thread.is_alive():
261
+ # Read new output from mystdout
262
+ new_output = mystdout.getvalue()
263
+ if new_output != verbose_output:
264
+ verbose_output = new_output
265
+ yield gr.update(value=result_output), gr.update(value=verbose_output)
266
+ time.sleep(0.1)
267
+
268
+ # Once done, get final result
269
+ kickoff_thread.join()
270
+ sys.stdout = old_stdout
271
+ result_output = result_container[0]
272
 
273
+ verbose_output = mystdout.getvalue()
274
+ yield gr.update(value=result_output), gr.update(value=verbose_output)
275
+
276
+
277
+ def kickoff_crew(
278
+ topic: str,
279
+ model_choice: str,
280
+ azure_openai_key: str,
281
+ azure_deployment_id: str,
282
+ azure_api_base: str,
283
+ azure_api_version: str,
284
+ openai_api_key: str,
285
+ cohere_api_key: str,
286
+ groq_api_key: str
287
+ ) -> str:
288
+ """
289
+ Kick off the multi-agent pipeline.
290
+ """
291
  try:
292
+ global TOKENS_SUMMARIZATION, MODEL_CHOICE
293
+
294
+ TOKENS_SUMMARIZATION = 0
295
+ MODEL_CHOICE = model_choice
296
+
297
+ # Basic checks
298
+ if not topic.strip():
299
+ return "Error: The topic cannot be empty. Please provide a valid topic."
300
+
301
+ # ---- Define LLMs based on the user-provided inputs ----
302
+ # Inicializa las variables de los modelos con None
303
+ azure_llm_base = None
304
+ azure_llm_advanced = None
305
+ openai_llm_base = None
306
+ openai_llm_advanced = None
307
+ groq_llm_base = None
308
+ groq_llm_advanced = None
309
+
310
+ if model_choice == "azure":
311
+ if not azure_openai_key or not azure_deployment_id or not azure_api_base or not azure_api_version:
312
+ return "Error: Please provide all the required Azure OpenAI API details."
313
+ else:
314
+ os.environ['AZURE_API_BASE']=azure_api_base
315
+ os.environ['AZURE_API_VERSION']=azure_api_version
316
+ os.environ['AZURE_DEPLOYMENT_ID']=azure_deployment_id
317
+ os.environ['AZURE_OPENAI_KEY']=azure_openai_key
318
+ # Azure
319
+ azure_llm_base = LLM(
320
+ temperature=0.3,
321
+ model=f"azure/{azure_deployment_id}",
322
+ api_key=azure_openai_key,
323
+ base_url=azure_api_base,
324
+ api_version=azure_api_version,
325
+ max_tokens=4000
326
+ )
327
+ azure_llm_advanced = LLM(
328
+ temperature=0.6,
329
+ model=f"azure/{azure_deployment_id}",
330
+ api_key=azure_openai_key,
331
+ base_url=azure_api_base,
332
+ api_version=azure_api_version,
333
+ max_tokens=10000
334
+ )
335
+ elif model_choice == "openai":
336
+ if not openai_api_key:
337
+ return "Error: Please provide the OpenAI API key."
338
+ else:
339
+ os.environ['OPENAI_API_KEY']=openai_api_key
340
+ # OpenAI
341
+ openai_llm_base = LLM(
342
+ model=OPENAI_MODELS['base'],
343
+ api_key=openai_api_key,
344
+ max_completion_tokens=4000
345
+ )
346
+ openai_llm_advanced = LLM(
347
+ model=OPENAI_MODELS['advanced'],
348
+ api_key=openai_api_key,
349
+ temperature=0.4,
350
+ max_completion_tokens=10000
351
+ )
352
+ elif model_choice == "groq-cohere":
353
+ if not cohere_api_key or not groq_api_key:
354
+ return "Error: Please provide both the Cohere and GROQ API keys."
355
+ else:
356
+ os.environ['COHERE_API_KEY']=cohere_api_key
357
+ os.environ['GROQ_API_KEY']=groq_api_key
358
+ # GROQ - placeholder examples
359
+ groq_llm_base = LLM(
360
+ model=GROQ_MODELS['base'],
361
+ api_key=groq_api_key,
362
+ temperature=0.3,
363
+ max_tokens=1000
364
+ )
365
+ groq_llm_advanced = LLM(
366
+ model=GROQ_MODELS['advanced'],
367
+ api_key=groq_api_key,
368
+ temperature=0.6,
369
+ max_tokens=4000
370
+ )
371
+
372
+
373
+ # Diccionario para agrupar los LLM
374
+
375
+ llms = {
376
+ "azure": {
377
+ "base": azure_llm_base,
378
+ "advanced": azure_llm_advanced
379
+ },
380
+ "openai": {
381
+ "base": openai_llm_base,
382
+ "advanced": openai_llm_advanced
383
+ },
384
+ "groq-cohere": {
385
+ "base": groq_llm_base,
386
+ "advanced": groq_llm_advanced
387
+ }
388
+ }
389
+
390
+ # Obtain the selected LLM set
391
+ if model_choice not in llms:
392
+ return f"Error: Invalid model choice. Please select from {list(llms.keys())}."
393
+
394
+ selected_llm = llms[model_choice]
395
+
396
+ # Define Agents
397
  researcher = Agent(
398
  role='Researcher',
399
+ goal=f'Search and collect detailed information on topic ## {topic} ##',
400
  tools=[search_results, web_scrapper],
401
+ llm=selected_llm["base"],
402
  backstory=(
403
+ "You are a meticulous researcher, skilled at navigating vast amounts of information to extract "
404
+ "essential insights on any given topic. Your dedication to detail ensures the reliability and "
405
+ "thoroughness of your findings."
406
  ),
407
  allow_delegation=False,
408
  max_iter=15,
409
+ max_rpm=5 if model_choice == "groq-cohere" else 120,
 
410
  verbose=True
411
  )
412
 
 
413
  editor = Agent(
414
  role='Editor',
415
+ goal=f'Compile and refine the information into a comprehensive report on topic ## {topic} ##',
416
+ llm=selected_llm["advanced"],
417
  backstory=(
418
  "As an expert editor, you specialize in transforming raw data into clear, engaging reports. "
419
+ "Your strong command of language and attention to detail ensure that each report not only conveys "
420
+ "essential insights but is also easily understandable to diverse audiences."
421
  ),
422
  allow_delegation=False,
423
  max_iter=5,
424
+ max_rpm=10 if model_choice == "groq-cohere" else 120,
 
425
  verbose=True
426
  )
427
 
428
  # Define Tasks
429
  research_task = Task(
430
  description=(
431
+ "Be sure to translate the topic into English first. "
432
  "Use the DuckDuckGoSearchResults tool to collect initial search snippets on ## {topic} ##. "
433
+ "If more detailed searches are required, generate and execute new searches related to ## {topic} ##. "
434
+ "Subsequently, employ the WebScrapper tool to extract information from significant URLs, "
435
+ "extracting further insights. Compile these findings into a preliminary draft, documenting all "
436
+ "relevant sources, titles, and links associated with the topic. "
437
+ "Ensure high accuracy throughout the process and avoid any fabrication of information."
438
  ),
439
  expected_output=(
440
+ "A structured draft report about the topic, featuring an introduction, a detailed main body, "
441
+ "and a conclusion. Properly cite sources. Provide a thorough overview of the info gathered."
442
  ),
443
  agent=researcher
444
  )
445
 
 
446
  edit_task = Task(
447
  description=(
448
+ "Review and refine the initial draft report from the research task. Organize the content logically. "
449
+ "Elaborate on each section to provide in-depth information and insights. "
450
+ "Verify the accuracy of all data, correct discrepancies, update info to ensure currency, "
451
+ "and maintain a consistent tone. Include a section listing all sources used, formatted as bullet points."
 
452
  ),
453
  expected_output=(
454
+ "A polished, comprehensive report on topic ## {topic} ##, with a clear, professional narrative. "
455
+ "Include an introduction, an extensive discussion, a concise conclusion, and a source list with references."
 
456
  ),
457
  agent=editor,
458
  context=[research_task]
459
  )
460
 
461
+ # Form the Crew
462
  crew = Crew(
463
  agents=[researcher, editor],
464
  tasks=[research_task, edit_task],
465
+ process=Process.sequential
466
  )
467
 
468
+ # Kick off
469
  result = crew.kickoff(inputs={'topic': topic})
470
+
471
+ # Compute token usage (CrewAI aggregator usage)
472
+ tokens = result.token_usage.total_tokens / 1_000
473
+ tokens_summ = TOKENS_SUMMARIZATION / 1_000
474
+
475
  if not isinstance(result, str):
476
  result = str(result)
477
+
478
+ result += f"\n\n**Estimated tokens (Agents):** {tokens:.5f} k"
479
+ result += f"\n\n**Estimated tokens (Summarization):** {tokens_summ:.5f} k"
480
+
481
  return result
482
  except Exception as e:
483
+ logger.error("Error in kickoff_crew: %s", str(e))
484
+ return f"Error in kickoff_crew: {str(e)}"
485
 
486
  def main():
487
  """Set up the Gradio interface for the CrewAI Research Tool."""
488
+
489
+ description_demo = """# Automatic Insights Generation with Multi-Agents (CrewAI)
490
+ - **Multi-agent framework**: CrewAI
491
+ - **Multi-agents**: Two agents, Researcher and Editor, working together to extract information from the internet and compile a report on the topic of choice.
492
+ - **Search tool**: Duck-Duck-Go-Search
493
+ - **Web Retrieval**: Newspaper4k and PDF
494
+
495
+ *Note: Groq is currently disabled due to rate limiting issues. Please use Azure or OpenAI for now.*
496
+ """
497
+
498
  with gr.Blocks() as demo:
499
+ gr.Markdown(description_demo)
500
+
501
+ with gr.Row():
502
+ with gr.Column(scale=1):
503
+
504
+ # Radio: now includes azure / openai / groq / cohere
505
+ model_choice = gr.Radio(
506
+ choices=["azure", "openai", "groq-cohere"],
507
+ label="Choose Model",
508
+ value="openai",
509
+ interactive=True
510
+ )
511
+
512
+ # ------------
513
+ # LLM config inputs
514
+ # ------------
515
+ # Azure
516
+ azure_api_base_input = gr.Textbox(label="Azure API Base (url)", type="password", visible=False, interactive=True)
517
+ azure_deployment_id_input = gr.Textbox(label="Azure Deployment ID (model)", type="password", visible=False, interactive=True)
518
+ azure_openai_key_input = gr.Textbox(label="Azure API Key", type="password", visible=False, interactive=True)
519
+ azure_api_version_input = gr.Textbox(label="Azure API Version", type="text", visible=False, interactive=True)
520
+
521
+ # OpenAI
522
+ openai_api_key_input = gr.Textbox(label="OpenAI API Key", type="password", visible=True, interactive=True)
523
+
524
+ # GROQ
525
+ groq_api_key_input = gr.Textbox(label="GROQ API Key", type="password", visible=False, interactive=False)
526
+
527
+ # Cohere
528
+ cohere_api_key_input = gr.Textbox(label="Cohere API Key", type="password", visible=False, interactive=False)
529
+
530
+ export_button = gr.Button("Export to Markdown", interactive=True)
531
+ file_output = gr.File(label="Download Markdown File")
532
+ credits = gr.Markdown(
533
+ label="Credits",
534
+ show_label=True,
535
+ value="This tool is powered by [CrewAI](https://crewai.com), "
536
+ "[OpenAI](https://openai.com), "
537
+ "[Azure OpenAI Services](https://azure.microsoft.com/en-us/products/ai-services/openai-service), "
538
+ "[Cohere](https://dashboard.cohere.com), and [GROQ](https://console.groq.com/playground).",
539
+ )
540
+
541
+ with gr.Column(scale=2):
542
+ topic_input = gr.Textbox(
543
+ label="Enter Topic",
544
+ placeholder="Type here the topic of interest...",
545
+ interactive=True
546
+ )
547
+ submit_button = gr.Button("Start Research", interactive=True)
548
+ output = gr.Markdown(
549
+ label="Result",
550
+ show_copy_button=True,
551
+ value="The generated insighsts will appear here...",
552
+ latex_delimiters=[
553
+ {"left": "\\[", "right": "\\]", "display": True},
554
+ {"left": "\\(", "right": "\\)", "display": False},
555
+ ]
556
+ )
557
+
558
+ verbose_output = gr.Textbox(
559
+ label="Verbose Output",
560
+ placeholder="Verbose logs will appear here...",
561
+ lines=10,
562
+ interactive=False,
563
+ show_copy_button=True
564
+ )
565
+
566
+ # ---------------
567
+ # Dynamic toggling of LLM config boxes
568
+ # ---------------
569
+ def update_model_choice(model):
570
+ """Update visibility of config inputs based on the selected LLM."""
571
+ azure_visibility = False
572
+ openai_visibility = False
573
+ cohere_visibility = False
574
+ groq_visibility = False
575
+
576
+ if model == "azure":
577
+ azure_visibility = True
578
+ elif model == "openai":
579
+ openai_visibility = True
580
+ elif model == "groq-cohere":
581
+ cohere_visibility = True
582
+ groq_visibility = True
583
+
584
+ return {
585
+ azure_openai_key_input: gr.update(visible=azure_visibility),
586
+ azure_deployment_id_input: gr.update(visible=azure_visibility),
587
+ azure_api_base_input: gr.update(visible=azure_visibility),
588
+ azure_api_version_input: gr.update(visible=azure_visibility),
589
+ openai_api_key_input: gr.update(visible=openai_visibility),
590
+ cohere_api_key_input: gr.update(visible=cohere_visibility),
591
+ groq_api_key_input: gr.update(visible=groq_visibility),
592
+ }
593
+
594
+ model_choice.change(
595
+ fn=update_model_choice,
596
+ inputs=[model_choice],
597
+ outputs=[
598
+ azure_openai_key_input,
599
+ azure_deployment_id_input,
600
+ azure_api_base_input,
601
+ azure_api_version_input,
602
+ openai_api_key_input,
603
+ cohere_api_key_input,
604
+ groq_api_key_input
605
+ ]
606
+ )
607
+
608
 
609
  submit_button.click(
610
+ fn=capture_verbose_output,
611
+ inputs=[
612
+ topic_input,
613
+ model_choice,
614
+ azure_openai_key_input,
615
+ azure_deployment_id_input,
616
+ azure_api_base_input,
617
+ azure_api_version_input,
618
+ openai_api_key_input,
619
+ cohere_api_key_input,
620
+ groq_api_key_input
621
+ ],
622
+ outputs=[output, verbose_output]
623
+ )
624
+
625
+ export_button.click(
626
+ fn=export_to_markdown,
627
+ inputs=output,
628
+ outputs=file_output
629
  )
630
 
 
631
  demo.queue(api_open=False, max_size=3).launch()
632
 
633
+
634
  if __name__ == "__main__":
635
  main()
config.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # config.py
2
+
3
+ # Modelos de OpenAI
4
+ OPENAI_MODELS = {
5
+ 'base': 'gpt-4o-mini', # Modelo base de OpenAI
6
+ 'advanced': 'gpt-4o' # Modelo avanzado de OpenAI
7
+ }
8
+
9
+ # Modelos de Cohere
10
+ COHERE_MODELS = {
11
+ 'base': 'command-r', # Modelo base de Cohere
12
+ 'advanced': 'command-r-plus' # Modelo avanzado de Cohere
13
+ }
14
+
15
+ # Modelos de Groq
16
+ GROQ_MODELS = {
17
+ 'base': 'groq/llama-3.2-3b-preview', # Modelo base de Groq
18
+ 'advanced': 'groq/llama-3.3-70b-versatile' # Modelo avanzado de Groq
19
+ }
20
+
21
+ # Configuración adicional
22
+ MAX_TOKENS_BASE = 4000
23
+ MAX_TOKENS_ADVANCED = 10000
outputs/output.md ADDED
@@ -0,0 +1 @@
 
 
1
+ The generated insighsts will appear here...
requirements.txt CHANGED
@@ -1,7 +1,7 @@
 
 
1
  crewai
2
  crewai[tools]
3
- langchain-groq
4
- langchain-cohere
5
  duckduckgo-search
6
  cohere
7
  lxml_html_clean
 
1
+ gradio>=5.0
2
+ pymupdf
3
  crewai
4
  crewai[tools]
 
 
5
  duckduckgo-search
6
  cohere
7
  lxml_html_clean