VanguardAI commited on
Commit
038f995
·
verified ·
1 Parent(s): 03ff38e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -21
app.py CHANGED
@@ -11,8 +11,9 @@ import soundfile as sf
11
  from langchain_community.embeddings import OpenAIEmbeddings
12
  from langchain_community.vectorstores import Chroma
13
  from langchain.text_splitter import RecursiveCharacterTextSplitter
14
- from langchain.chains import RetrievalQA, LLMChain
15
- from langchain.agents import ZeroShotAgent, Tool, AgentExecutor
 
16
  from PIL import Image
17
  from decord import VideoReader, cpu
18
  from tavily import TavilyClient
@@ -56,7 +57,6 @@ def play_voice_output(response):
56
 
57
  # NumPy Code Calculator Tool
58
  def numpy_code_calculator(query):
59
- """Generates and executes NumPy code for mathematical operations."""
60
  try:
61
  llm_response = client.chat.completions.create(
62
  model=MODEL,
@@ -77,20 +77,17 @@ def numpy_code_calculator(query):
77
 
78
  # Web Search Tool
79
  def web_search(query):
80
- """Performs a web search using Tavily."""
81
  answer = tavily_client.qna_search(query=query)
82
  return answer
83
 
84
  # Image Generation Tool
85
  def image_generation(query):
86
- """Generates an image based on the given prompt."""
87
  image = image_pipe(prompt=query, num_inference_steps=20, guidance_scale=7.5).images[0]
88
  image.save("output.jpg")
89
  return "output.jpg"
90
 
91
  # Document Question Answering Tool
92
  def doc_question_answering(query, file_path):
93
- """Answers questions based on the content of a document."""
94
  with open(file_path, 'r') as f:
95
  file_content = f.read()
96
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
@@ -102,9 +99,7 @@ def doc_question_answering(query, file_path):
102
 
103
  # Function to handle different input types and choose the right tool
104
  def handle_input(user_prompt, image=None, video=None, audio=None, doc=None, websearch=False):
105
- # Voice input handling
106
  if audio:
107
- # Make sure 'audio' is a file object
108
  if isinstance(audio, str):
109
  audio = open(audio, "rb")
110
  transcription = client.audio.transcriptions.create(
@@ -113,7 +108,6 @@ def handle_input(user_prompt, image=None, video=None, audio=None, doc=None, webs
113
  )
114
  user_prompt = transcription.text
115
 
116
- # Initialize tools
117
  tools = [
118
  Tool(
119
  name="Numpy Code Calculator",
@@ -132,7 +126,6 @@ def handle_input(user_prompt, image=None, video=None, audio=None, doc=None, webs
132
  ),
133
  ]
134
 
135
- # Add document Q&A tool if a document is provided
136
  if doc:
137
  tools.append(
138
  Tool(
@@ -142,7 +135,6 @@ def handle_input(user_prompt, image=None, video=None, audio=None, doc=None, webs
142
  )
143
  )
144
 
145
- # Function for the agent's LLM
146
  def llm_function(query):
147
  response = client.chat.completions.create(
148
  model=MODEL,
@@ -150,22 +142,15 @@ def handle_input(user_prompt, image=None, video=None, audio=None, doc=None, webs
150
  )
151
  return response.choices[0].message.content
152
 
153
- # Initialize agent with an empty string prompt for LLMChain
154
- agent = ZeroShotAgent(llm_chain=LLMChain(llm=llm_function, prompt=""), tools=tools, verbose=True)
155
  agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)
156
 
157
- # Initialize agent
158
- agent = ZeroShotAgent(llm_chain=LLMChain(llm=llm_function, prompt=None), tools=tools, verbose=True)
159
- agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)
160
-
161
- # If user uploaded an image and text, use MiniCPM model
162
  if image:
163
  image = Image.open(image).convert('RGB')
164
  messages = [{"role": "user", "content": [image, user_prompt]}]
165
  response = vqa_model.chat(image=None, msgs=messages, tokenizer=tokenizer)
166
  return response
167
 
168
- # Use the agent to determine the best tool and get the response
169
  if websearch:
170
  response = agent_executor.run(f"{user_prompt} Use the Web Search tool if necessary.")
171
  else:
@@ -198,7 +183,6 @@ def create_ui():
198
  outputs=[output_label, audio_output]
199
  )
200
 
201
- # Voice-only mode UI
202
  voice_only_mode.change(
203
  lambda x: gr.update(visible=not x),
204
  inputs=voice_only_mode,
@@ -230,4 +214,4 @@ def main_interface(user_prompt, image=None, audio=None, doc=None, voice_only=Fal
230
 
231
  # Launch the app
232
  demo = create_ui()
233
- demo.launch(inline=False)
 
11
  from langchain_community.embeddings import OpenAIEmbeddings
12
  from langchain_community.vectorstores import Chroma
13
  from langchain.text_splitter import RecursiveCharacterTextSplitter
14
+ from langchain.chains import RetrievalQA
15
+ from langchain.agents import AgentExecutor, Tool
16
+ from langchain.schema import RunnableSequence
17
  from PIL import Image
18
  from decord import VideoReader, cpu
19
  from tavily import TavilyClient
 
57
 
58
  # NumPy Code Calculator Tool
59
  def numpy_code_calculator(query):
 
60
  try:
61
  llm_response = client.chat.completions.create(
62
  model=MODEL,
 
77
 
78
  # Web Search Tool
79
  def web_search(query):
 
80
  answer = tavily_client.qna_search(query=query)
81
  return answer
82
 
83
  # Image Generation Tool
84
  def image_generation(query):
 
85
  image = image_pipe(prompt=query, num_inference_steps=20, guidance_scale=7.5).images[0]
86
  image.save("output.jpg")
87
  return "output.jpg"
88
 
89
  # Document Question Answering Tool
90
  def doc_question_answering(query, file_path):
 
91
  with open(file_path, 'r') as f:
92
  file_content = f.read()
93
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
 
99
 
100
  # Function to handle different input types and choose the right tool
101
  def handle_input(user_prompt, image=None, video=None, audio=None, doc=None, websearch=False):
 
102
  if audio:
 
103
  if isinstance(audio, str):
104
  audio = open(audio, "rb")
105
  transcription = client.audio.transcriptions.create(
 
108
  )
109
  user_prompt = transcription.text
110
 
 
111
  tools = [
112
  Tool(
113
  name="Numpy Code Calculator",
 
126
  ),
127
  ]
128
 
 
129
  if doc:
130
  tools.append(
131
  Tool(
 
135
  )
136
  )
137
 
 
138
  def llm_function(query):
139
  response = client.chat.completions.create(
140
  model=MODEL,
 
142
  )
143
  return response.choices[0].message.content
144
 
145
+ agent = ZeroShotAgent(llm_chain=RunnableSequence(prompt="", llm=llm_function), tools=tools, verbose=True)
 
146
  agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)
147
 
 
 
 
 
 
148
  if image:
149
  image = Image.open(image).convert('RGB')
150
  messages = [{"role": "user", "content": [image, user_prompt]}]
151
  response = vqa_model.chat(image=None, msgs=messages, tokenizer=tokenizer)
152
  return response
153
 
 
154
  if websearch:
155
  response = agent_executor.run(f"{user_prompt} Use the Web Search tool if necessary.")
156
  else:
 
183
  outputs=[output_label, audio_output]
184
  )
185
 
 
186
  voice_only_mode.change(
187
  lambda x: gr.update(visible=not x),
188
  inputs=voice_only_mode,
 
214
 
215
  # Launch the app
216
  demo = create_ui()
217
+ demo.launch(inline=False)