Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -118,13 +118,13 @@ class ConversationBot:
|
|
118 |
audio_filename = os.path.join('audio', str(uuid.uuid4())[0:8] + ".wav")
|
119 |
audio_load = whisper.load_audio(file.name)
|
120 |
soundfile.write(audio_filename, audio_load, samplerate = 16000)
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
# AI_prompt = "Received. "
|
125 |
-
# self.agent.memory.buffer = self.agent.memory.buffer + Human_prompt + 'AI: ' + AI_prompt
|
126 |
AI_prompt = "Received. "
|
127 |
-
self.agent.memory.buffer = self.agent.memory.buffer + 'AI: ' + AI_prompt
|
|
|
|
|
128 |
print("======>Current memory:\n %s" % self.agent.memory)
|
129 |
#state = state + [(f"<audio src=audio_filename controls=controls></audio>*{audio_filename}*", AI_prompt)]
|
130 |
state = state + [(f"*{audio_filename}*", AI_prompt)]
|
@@ -146,8 +146,11 @@ class ConversationBot:
|
|
146 |
img = img.convert('RGB')
|
147 |
img.save(image_filename, "PNG")
|
148 |
print(f"Resize image form {width}x{height} to {width_new}x{height_new}")
|
|
|
|
|
|
|
149 |
AI_prompt = "Received. "
|
150 |
-
self.agent.memory.buffer = self.agent.memory.buffer + 'AI: ' + AI_prompt
|
151 |
print("======>Current memory:\n %s" % self.agent.memory)
|
152 |
state = state + [(f"*{image_filename}*", AI_prompt)]
|
153 |
print(f"\nProcessed run_image, Input image: {image_filename}\nCurrent state: {state}\n"
|
@@ -159,7 +162,7 @@ class ConversationBot:
|
|
159 |
print("Inputs:", state)
|
160 |
print("======>Previous memory:\n %s" % self.agent.memory)
|
161 |
# inpaint = Inpaint(device="cpu")
|
162 |
-
new_image_filename, new_audio_filename = self.
|
163 |
AI_prompt = "Here are the predict audio and the mel spectrum." + f"*{new_audio_filename}*" + f"*{new_image_filename}*"
|
164 |
self.agent.memory.buffer = self.agent.memory.buffer + 'AI: ' + AI_prompt
|
165 |
print("======>Current memory:\n %s" % self.agent.memory)
|
|
|
118 |
audio_filename = os.path.join('audio', str(uuid.uuid4())[0:8] + ".wav")
|
119 |
audio_load = whisper.load_audio(file.name)
|
120 |
soundfile.write(audio_filename, audio_load, samplerate = 16000)
|
121 |
+
description = self.models['A2T'].inference(audio_filename)
|
122 |
+
Human_prompt = "\nHuman: provide an audio named {}. The description is: {}. This information helps you to understand this audio, but you should use tools to finish following tasks, " \
|
123 |
+
"rather than directly imagine from my description. If you understand, say \"Received\". \n".format(audio_filename, description)
|
|
|
|
|
124 |
AI_prompt = "Received. "
|
125 |
+
self.agent.memory.buffer = self.agent.memory.buffer + Human_prompt + 'AI: ' + AI_prompt
|
126 |
+
# AI_prompt = "Received. "
|
127 |
+
# self.agent.memory.buffer = self.agent.memory.buffer + 'AI: ' + AI_prompt
|
128 |
print("======>Current memory:\n %s" % self.agent.memory)
|
129 |
#state = state + [(f"<audio src=audio_filename controls=controls></audio>*{audio_filename}*", AI_prompt)]
|
130 |
state = state + [(f"*{audio_filename}*", AI_prompt)]
|
|
|
146 |
img = img.convert('RGB')
|
147 |
img.save(image_filename, "PNG")
|
148 |
print(f"Resize image form {width}x{height} to {width_new}x{height_new}")
|
149 |
+
description = self.models['ImageCaptioning'].inference(image_filename)
|
150 |
+
Human_prompt = "\nHuman: provide an audio named {}. The description is: {}. This information helps you to understand this audio, but you should use tools to finish following tasks, " \
|
151 |
+
"rather than directly imagine from my description. If you understand, say \"Received\". \n".format(image_filename, description)
|
152 |
AI_prompt = "Received. "
|
153 |
+
self.agent.memory.buffer = self.agent.memory.buffer + Human_prompt + 'AI: ' + AI_prompt
|
154 |
print("======>Current memory:\n %s" % self.agent.memory)
|
155 |
state = state + [(f"*{image_filename}*", AI_prompt)]
|
156 |
print(f"\nProcessed run_image, Input image: {image_filename}\nCurrent state: {state}\n"
|
|
|
162 |
print("Inputs:", state)
|
163 |
print("======>Previous memory:\n %s" % self.agent.memory)
|
164 |
# inpaint = Inpaint(device="cpu")
|
165 |
+
new_image_filename, new_audio_filename = self.models['Inpaint'].predict(audio_filename, image_filename)
|
166 |
AI_prompt = "Here are the predict audio and the mel spectrum." + f"*{new_audio_filename}*" + f"*{new_image_filename}*"
|
167 |
self.agent.memory.buffer = self.agent.memory.buffer + 'AI: ' + AI_prompt
|
168 |
print("======>Current memory:\n %s" % self.agent.memory)
|