VanguardAI commited on
Commit
8318c4a
·
verified ·
1 Parent(s): 5d11559

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -22
app.py CHANGED
@@ -137,17 +137,9 @@ class DuckDuckGoSearchRun(BaseTool):
137
  answer = data["Abstract"]
138
  return answer
139
 
 
140
  # Function to handle different input types and choose the right tool
141
  def handle_input(user_prompt, image=None, audio=None, websearch=False, document=None):
142
- if audio:
143
- if isinstance(audio, str):
144
- audio = open(audio, "rb")
145
- transcription = client.audio.transcriptions.create(
146
- file=(audio.name, audio.read()),
147
- model="whisper-large-v3"
148
- )
149
- user_prompt = transcription.text
150
-
151
  # Initialize the search tool
152
  search = DuckDuckGoSearchRun()
153
 
@@ -189,20 +181,31 @@ def handle_input(user_prompt, image=None, audio=None, websearch=False, document=
189
 
190
  llm = ChatGroq(model=MODEL, api_key=os.environ.get("GROQ_API_KEY"))
191
 
192
- # Initialize the agent
193
- agent = initialize_agent(
194
- tools,
195
- llm,
196
- agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
197
- verbose=True
198
- )
199
-
200
- if image:
201
- image = Image.open(image).convert('RGB')
202
- messages = [{"role": "user", "content": [image, user_prompt]}]
203
- response = vqa_model.chat(image=None, msgs=messages, tokenizer=tokenizer)
 
 
 
 
 
 
 
 
 
 
204
  else:
205
- response = agent.run(user_prompt)
 
206
 
207
  return response
208
 
 
137
  answer = data["Abstract"]
138
  return answer
139
 
140
+ # Function to handle different input types and choose the right tool
141
  # Function to handle different input types and choose the right tool
142
  def handle_input(user_prompt, image=None, audio=None, websearch=False, document=None):
 
 
 
 
 
 
 
 
 
143
  # Initialize the search tool
144
  search = DuckDuckGoSearchRun()
145
 
 
181
 
182
  llm = ChatGroq(model=MODEL, api_key=os.environ.get("GROQ_API_KEY"))
183
 
184
+ # Check if the input requires any tools
185
+ requires_tool = False
186
+ for tool in tools:
187
+ if tool.name.lower() in user_prompt.lower():
188
+ requires_tool = True
189
+ break
190
+
191
+ if image or audio or requires_tool:
192
+ # Initialize the agent
193
+ agent = initialize_agent(
194
+ tools,
195
+ llm,
196
+ agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
197
+ verbose=True
198
+ )
199
+
200
+ if image:
201
+ image = Image.open(image).convert('RGB')
202
+ messages = [{"role": "user", "content": [image, user_prompt]}]
203
+ response = vqa_model.chat(image=None, msgs=messages, tokenizer=tokenizer)
204
+ else:
205
+ response = agent.run(user_prompt)
206
  else:
207
+ # If no tools are required, use the LLM directly
208
+ response = llm.call(query=user_prompt)
209
 
210
  return response
211