lmzjms commited on
Commit
88ae081
·
1 Parent(s): b79129e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -8
app.py CHANGED
@@ -118,13 +118,13 @@ class ConversationBot:
118
  audio_filename = os.path.join('audio', str(uuid.uuid4())[0:8] + ".wav")
119
  audio_load = whisper.load_audio(file.name)
120
  soundfile.write(audio_filename, audio_load, samplerate = 16000)
121
- # description = self.a2t.inference(audio_filename)
122
- # Human_prompt = "\nHuman: provide an audio named {}. The description is: {}. This information helps you to understand this audio, but you should use tools to finish following tasks, " \
123
- # "rather than directly imagine from my description. If you understand, say \"Received\". \n".format(audio_filename, description)
124
- # AI_prompt = "Received. "
125
- # self.agent.memory.buffer = self.agent.memory.buffer + Human_prompt + 'AI: ' + AI_prompt
126
  AI_prompt = "Received. "
127
- self.agent.memory.buffer = self.agent.memory.buffer + 'AI: ' + AI_prompt
 
 
128
  print("======>Current memory:\n %s" % self.agent.memory)
129
  #state = state + [(f"<audio src=audio_filename controls=controls></audio>*{audio_filename}*", AI_prompt)]
130
  state = state + [(f"*{audio_filename}*", AI_prompt)]
@@ -146,8 +146,11 @@ class ConversationBot:
146
  img = img.convert('RGB')
147
  img.save(image_filename, "PNG")
148
  print(f"Resize image form {width}x{height} to {width_new}x{height_new}")
 
 
 
149
  AI_prompt = "Received. "
150
- self.agent.memory.buffer = self.agent.memory.buffer + 'AI: ' + AI_prompt
151
  print("======>Current memory:\n %s" % self.agent.memory)
152
  state = state + [(f"![](/file={image_filename})*{image_filename}*", AI_prompt)]
153
  print(f"\nProcessed run_image, Input image: {image_filename}\nCurrent state: {state}\n"
@@ -159,7 +162,7 @@ class ConversationBot:
159
  print("Inputs:", state)
160
  print("======>Previous memory:\n %s" % self.agent.memory)
161
  # inpaint = Inpaint(device="cpu")
162
- new_image_filename, new_audio_filename = self.inpaint.predict(audio_filename, image_filename)
163
  AI_prompt = "Here are the predict audio and the mel spectrum." + f"*{new_audio_filename}*" + f"![](/file={new_image_filename})*{new_image_filename}*"
164
  self.agent.memory.buffer = self.agent.memory.buffer + 'AI: ' + AI_prompt
165
  print("======>Current memory:\n %s" % self.agent.memory)
 
118
  audio_filename = os.path.join('audio', str(uuid.uuid4())[0:8] + ".wav")
119
  audio_load = whisper.load_audio(file.name)
120
  soundfile.write(audio_filename, audio_load, samplerate = 16000)
121
+ description = self.models['A2T'].inference(audio_filename)
122
+ Human_prompt = "\nHuman: provide an audio named {}. The description is: {}. This information helps you to understand this audio, but you should use tools to finish following tasks, " \
123
+ "rather than directly imagine from my description. If you understand, say \"Received\". \n".format(audio_filename, description)
 
 
124
  AI_prompt = "Received. "
125
+ self.agent.memory.buffer = self.agent.memory.buffer + Human_prompt + 'AI: ' + AI_prompt
126
+ # AI_prompt = "Received. "
127
+ # self.agent.memory.buffer = self.agent.memory.buffer + 'AI: ' + AI_prompt
128
  print("======>Current memory:\n %s" % self.agent.memory)
129
  #state = state + [(f"<audio src=audio_filename controls=controls></audio>*{audio_filename}*", AI_prompt)]
130
  state = state + [(f"*{audio_filename}*", AI_prompt)]
 
146
  img = img.convert('RGB')
147
  img.save(image_filename, "PNG")
148
  print(f"Resize image form {width}x{height} to {width_new}x{height_new}")
149
+ description = self.models['ImageCaptioning'].inference(image_filename)
150
+ Human_prompt = "\nHuman: provide an audio named {}. The description is: {}. This information helps you to understand this audio, but you should use tools to finish following tasks, " \
151
+ "rather than directly imagine from my description. If you understand, say \"Received\". \n".format(image_filename, description)
152
  AI_prompt = "Received. "
153
+ self.agent.memory.buffer = self.agent.memory.buffer + Human_prompt + 'AI: ' + AI_prompt
154
  print("======>Current memory:\n %s" % self.agent.memory)
155
  state = state + [(f"![](/file={image_filename})*{image_filename}*", AI_prompt)]
156
  print(f"\nProcessed run_image, Input image: {image_filename}\nCurrent state: {state}\n"
 
162
  print("Inputs:", state)
163
  print("======>Previous memory:\n %s" % self.agent.memory)
164
  # inpaint = Inpaint(device="cpu")
165
+ new_image_filename, new_audio_filename = self.models['Inpaint'].predict(audio_filename, image_filename)
166
  AI_prompt = "Here are the predict audio and the mel spectrum." + f"*{new_audio_filename}*" + f"![](/file={new_image_filename})*{new_image_filename}*"
167
  self.agent.memory.buffer = self.agent.memory.buffer + 'AI: ' + AI_prompt
168
  print("======>Current memory:\n %s" % self.agent.memory)