Ankitajadhav commited on
Commit
d14c9fe
·
verified ·
1 Parent(s): ddc268d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -0
app.py CHANGED
@@ -86,3 +86,112 @@ collection_text.add(
86
  ids=loaded_ids
87
  )
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  ids=loaded_ids
87
  )
88
 
89
+ # Initialize the transcriber
90
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en",device ='cuda')
91
+
92
+ # Preload TTS models
93
+ preload_models()
94
+
95
+ image_path = "dom_bremen.jpg"
96
+ absolute_path = os.path.abspath(image_path)
97
+
98
+ def transcribe(audio):
99
+ sr, y = audio
100
+ y = y.astype(np.float32)
101
+ y /= np.max(np.abs(y))
102
+ return transcriber({"sampling_rate": sr, "raw": y})["text"]
103
+
104
+ fixed_prompt = "en_speaker_5"
105
+
106
+ def generate_audio_output(text):
107
+ audio_arr = generate_audio(text, history_prompt=fixed_prompt)
108
+ audio_arr = (audio_arr * 32767).astype(np.int16)
109
+ return (SAMPLE_RATE, audio_arr)
110
+
111
+ # Function to retrieve and generate text based on input query
112
+ def generate_text(message, max_tokens=150, temperature=0.2, top_p=0.9):
113
+ try:
114
+ # Retrieve context and image from vector store
115
+ retrieved_image = collection_images.query(query_texts=message, include=['data'], n_results=1)
116
+ context_text = collection_text.query(query_texts=message, n_results=1)
117
+
118
+ context = context_text['documents'][0] if context_text else "No relevant context found."
119
+ image_data = retrieved_image['uris'][0] if retrieved_image else None
120
+ image_url = image_data if image_data else None
121
+
122
+ # Log the image URL for debugging
123
+ print(f"Retrieved image URL: {image_url}")
124
+
125
+ # Create prompt template for LLM
126
+ prompt_template = (
127
+ f"Context: {context}\n\n"
128
+ f"Question: {message}\n\n"
129
+ f"You are a guide to city of Bremen from Germany, generate response based on context."
130
+ )
131
+
132
+ # Generate text using the language model
133
+ output = llm(
134
+ prompt_template,
135
+ temperature=temperature,
136
+ top_p=top_p,
137
+ top_k=50,
138
+ repeat_penalty=1.1,
139
+ max_tokens=max_tokens,
140
+ )
141
+
142
+ # Process the output
143
+ input_string = output['choices'][0]['text'].strip()
144
+ cleaned_text = input_string.strip("[]'").replace('\\n', '\n')
145
+ continuous_text = '\n'.join(cleaned_text.split('\n'))
146
+
147
+ return continuous_text, image_url[0]
148
+ except Exception as e:
149
+ return f"Error: {str(e)}", None
150
+
151
+ # Function to load and display an image from a file path
152
+ def load_image_from_path(file_path):
153
+ try:
154
+ img = Image.open(file_path)
155
+ return img
156
+ except Exception as e:
157
+ print(f"Error loading image: {str(e)}")
158
+ return None
159
+
160
+ def process_audio(audio):
161
+ # Transcribe the audio
162
+ transcribed_text = transcribe(audio)
163
+ text_output, image_path = generate_text(transcribed_text)
164
+ if image_path:
165
+ image_output = load_image_from_path(image_path)
166
+ else:
167
+ image_output = None # Handle cases where no image is retrieved
168
+ # return text_output, image_output
169
+ # Generate audio output
170
+ audio_output = generate_audio_output(text_output)
171
+ return text_output,audio_output,image_output
172
+
173
+ def gen_tts(text):
174
+ audio_arr = generate_audio(text, history_prompt=fixed_prompt)
175
+ audio_arr = (audio_arr * 32767).astype(np.int16)
176
+ return (SAMPLE_RATE, audio_arr)
177
+
178
+ # Define the Gradio interface
179
+ # with gr.Blocks() as app:
180
+ demo = gr.Interface(
181
+ fn=process_audio,
182
+ inputs=gr.Audio(sources=["microphone"], label="Input Audio"),
183
+ outputs=[
184
+ gr.Textbox(label="Generated Text"),
185
+ gr.Audio(label="Generated Audio"),
186
+ gr.Image(label="Retrieved Image") # New output component for the image
187
+ ],
188
+ title="moinBremen - Your Personal Tour Guide for our City of Bremen",
189
+ description="Ask your question about Bremen by speaking into the microphone. The system will transcribe your question, generate a response, and read it out loud.",
190
+ css=""".gradio-container {
191
+ background: url('file=/content/dom_bremen.jpg') no-repeat center center fixed;
192
+ background-size: cover;
193
+ }""",
194
+ cache_examples=False,
195
+ )
196
+ demo.launch(allowed_paths=[absolute_path])
197
+