Pijush2023 commited on
Commit
192447d
·
verified ·
1 Parent(s): feb7a13

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +308 -190
app.py CHANGED
@@ -1,68 +1,260 @@
1
  import gradio as gr
2
- import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import requests
4
  import tempfile
5
- import threading
6
- import numpy as np
7
- from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
8
- from langchain_openai import ChatOpenAI, OpenAIEmbeddings
9
- from langchain_community.vectorstores import Neo4jVector
10
- from langchain_community.graphs import Neo4jGraph
11
- from langchain_experimental.graph_transformers import LLMGraphTransformer
12
- from langchain_core.prompts import ChatPromptTemplate
13
  import time
14
- import os
15
- from dataclasses import dataclass, field
16
-
 
 
 
 
17
 
18
 
19
- @dataclass
20
- class AppState:
21
- stream: np.ndarray | None = None
22
- sampling_rate: int = 0
23
- pause_detected: bool = False
24
- started_talking: bool = False
25
- stopped: bool = False
26
- conversation: list = field(default_factory=list)
27
 
28
- def determine_pause(audio: np.ndarray, sampling_rate: int, state: AppState) -> bool:
29
- """Take in the stream, determine if a pause happened"""
 
 
 
 
30
 
31
- temp_audio = audio
32
-
33
- dur_vad, _, time_vad = run_vad(temp_audio, sampling_rate)
34
- duration = len(audio) / sampling_rate
 
35
 
36
- if dur_vad > 0.5 and not state.started_talking:
37
- print("started talking")
38
- state.started_talking = True
39
- return False
40
 
41
- print(f"duration_after_vad: {dur_vad:.3f} s, time_vad: {time_vad:.3f} s")
 
42
 
43
- return (duration - dur_vad) > 1
 
 
 
 
 
 
44
 
45
- def start_recording_user(state: AppState):
46
- if not state.stopped:
47
- return gr.Audio(recording=True)
 
 
 
 
48
 
49
- # Neo4j setup
50
- graph = Neo4jGraph(
51
- url="neo4j+s://c62d0d35.databases.neo4j.io",
52
- username="neo4j",
53
- password="_x8f-_aAQvs2NB0x6s0ZHSh3W_y-HrENDbgStvsUCM0"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  )
55
 
56
- # Initialize the vector index with Neo4j
57
- vector_index = Neo4jVector.from_existing_graph(
58
- OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY']),
59
- graph=graph,
60
- search_type="hybrid",
61
- node_label="Document",
62
- text_node_properties=["text"],
63
- embedding_node_property="embedding",
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  )
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  # Define the ASR model with Whisper
67
  model_id = 'openai/whisper-large-v3'
68
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -83,13 +275,12 @@ pipe_asr = pipeline(
83
  return_timestamps=True
84
  )
85
 
86
- # Function to reset the state after 10 seconds
87
  def auto_reset_state():
88
- time.sleep(2)
89
  return None, "" # Reset the state and clear input text
90
 
91
 
92
- # Function to process audio input and transcribe it
93
  def transcribe_function(stream, new_chunk):
94
  try:
95
  sr, y = new_chunk[0], new_chunk[1]
@@ -97,6 +288,7 @@ def transcribe_function(stream, new_chunk):
97
  print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
98
  return stream, "", None
99
 
 
100
  if y is None or len(y) == 0:
101
  return stream, "", None
102
 
@@ -105,168 +297,94 @@ def transcribe_function(stream, new_chunk):
105
  if max_abs_y > 0:
106
  y = y / max_abs_y
107
 
 
108
  if stream is not None and len(stream) > 0:
109
  stream = np.concatenate([stream, y])
110
  else:
111
  stream = y
112
 
 
113
  result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
114
  full_text = result.get("text", "")
115
 
 
116
  threading.Thread(target=auto_reset_state).start()
117
- return stream, full_text, full_text
118
-
119
-
120
-
121
- # Function to generate a full-text search query for Neo4j
122
- #def generate_full_text_query(input: str) -> str:
123
- #full_text_query = ""
124
- #words = [el for el in input.split() if el]
125
- #for word in words[:-1]:
126
- #full_text_query += f" {word}~2 AND"
127
- #full_text_query += f" {words[-1]}~2"
128
- #return full_text_query.strip()
129
-
130
-
131
- # Function to generate a full-text search query for Neo4j
132
- def generate_full_text_query(input: str) -> str:
133
- # Split the input into words, ignoring any empty strings
134
- words = [el for el in input.split() if el]
135
-
136
- # Check if there are no words
137
- if not words:
138
- return "" # Return an empty string or a default query if desired
139
-
140
- # Create the full-text query with fuzziness (~2 for proximity search)
141
- full_text_query = ""
142
- for word in words[:-1]:
143
- full_text_query += f" {word}~2 AND"
144
- full_text_query += f" {words[-1]}~2"
145
- return full_text_query.strip()
146
-
147
 
 
148
 
149
- # Function to generate audio with Eleven Labs TTS
150
- def generate_audio_elevenlabs(text):
151
- XI_API_KEY = os.environ['ELEVENLABS_API']
152
- VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
153
- tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
154
- headers = {
155
- "Accept": "application/json",
156
- "xi-api-key": XI_API_KEY
157
- }
158
- data = {
159
- "text": str(text),
160
- "model_id": "eleven_multilingual_v2",
161
- "voice_settings": {
162
- "stability": 1.0,
163
- "similarity_boost": 0.0,
164
- "style": 0.60,
165
- "use_speaker_boost": False
166
- }
167
- }
168
- response = requests.post(tts_url, headers=headers, json=data, stream=True)
169
- if response.ok:
170
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
171
- for chunk in response.iter_content(chunk_size=1024):
172
- if chunk:
173
- f.write(chunk)
174
- audio_path = f.name
175
- return audio_path # Return audio path for automatic playback
176
- else:
177
- print(f"Error generating audio: {response.text}")
178
- return None
179
 
180
- # Define the template for generating responses based on context
181
- template = """I am a guide for Birmingham, Alabama. I can provide recommendations and insights about the city, including events and activities.
182
- Ask your question directly, and I'll provide a precise and quick,short and crisp response in a conversational and straight-foreward way without any Greet.
183
- Context:
184
- {context}
185
 
186
- Question: {question}
187
- Answer concisely:"""
 
188
 
189
- # Create a prompt object using the template
190
- prompt = ChatPromptTemplate.from_template(template)
191
 
192
- # Function to generate a response using the prompt and the context
193
- def generate_response_with_prompt(context, question):
194
- formatted_prompt = prompt.format(
195
- context=context,
196
- question=question
197
- )
198
- # Use the ChatOpenAI instance to generate a response directly from the formatted prompt
199
- llm = ChatOpenAI(temperature=0, api_key=os.environ['OPENAI_API_KEY'])
200
- response = llm(formatted_prompt)
201
- return response.content.strip()
202
-
203
- # Define the function to generate a hybrid response using Neo4j and other retrieval methods
204
- def retriever(question: str):
205
- # Structured data retrieval from Neo4j
206
- structured_query = f"""
207
- CALL db.index.fulltext.queryNodes('entity', $query, {{limit: 2}})
208
- YIELD node, score
209
- RETURN node.id AS entity, node.text AS context, score
210
- ORDER BY score DESC
211
- LIMIT 2
212
- """
213
- structured_data = graph.query(structured_query, {"query": generate_full_text_query(question)})
214
- structured_response = "\n".join([f"{record['entity']}: {record['context']}" for record in structured_data])
215
-
216
- # Unstructured data retrieval from vector store
217
- unstructured_data = [el.page_content for el in vector_index.similarity_search(question)]
218
- unstructured_response = "\n".join(unstructured_data)
219
-
220
- # Combine structured and unstructured responses
221
- combined_context = f"Structured data:\n{structured_response}\n\nUnstructured data:\n{unstructured_response}"
222
-
223
- # Generate the final response using the prompt template
224
- final_response = generate_response_with_prompt(combined_context, question)
225
- return final_response
226
 
227
- def process_audio_query(audio_input, state):
228
- stream = None
229
- _, transcription, _ = transcribe_function(stream, audio_input)
230
 
231
- if not transcription.strip():
232
- print("No valid transcription detected.")
233
- return None, state # Avoid generating a response for empty transcriptions
234
-
235
- # Retrieve a response based on the transcription
236
- response_text = retriever(transcription)
237
- print(f"Response: {response_text}")
238
-
239
- # Generate audio from the response text
240
- audio_path = generate_audio_elevenlabs(response_text)
 
 
 
 
 
241
 
242
- # Update the conversation history in the state
243
- state.conversation.append((transcription, response_text))
244
 
245
- return audio_path, state
246
-
247
-
248
- with gr.Blocks() as demo:
249
  with gr.Row():
 
250
  with gr.Column():
251
- input_audio = gr.Audio(label="Input Audio", sources="microphone", type="numpy")
 
252
  with gr.Column():
253
- output_audio = gr.Audio(label="Output Audio", streaming=True, autoplay=True)
254
- state = gr.State(value=AppState())
255
-
256
- stream = input_audio.stream(
257
- process_audio_query,
258
- [input_audio, state],
259
- [output_audio, state],
260
- every=0.50
 
 
261
  )
262
- restart = output_audio.stop(
263
- start_recording_user,
264
- [state],
265
- [input_audio]
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  )
267
- cancel = gr.Button("Stop Conversation", variant="stop")
268
- cancel.click(lambda: (AppState(stopped=True), gr.Audio(recording=False)), None,
269
- [state, input_audio], cancels=[stream, restart])
270
 
271
- demo.launch()
 
 
 
 
 
272
 
 
 
 
1
  import gradio as gr
2
+ import os
3
+ import logging
4
+ from langchain_core.prompts import ChatPromptTemplate
5
+ from langchain_core.output_parsers import StrOutputParser
6
+ from langchain_openai import ChatOpenAI
7
+ from langchain_community.graphs import Neo4jGraph
8
+ from typing import List, Tuple
9
+ from pydantic import BaseModel, Field
10
+ from langchain_core.messages import AIMessage, HumanMessage
11
+ from langchain_core.runnables import (
12
+ RunnableBranch,
13
+ RunnableLambda,
14
+ RunnablePassthrough,
15
+ RunnableParallel,
16
+ )
17
+ from langchain_core.prompts.prompt import PromptTemplate
18
  import requests
19
  import tempfile
20
+ from langchain.memory import ConversationBufferWindowMemory
 
 
 
 
 
 
 
21
  import time
22
+ import logging
23
+ from langchain.chains import ConversationChain
24
+ import torch
25
+ import torchaudio
26
+ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
27
+ import numpy as np
28
+ import threading
29
 
30
 
31
+ #code for history
32
+ conversational_memory = ConversationBufferWindowMemory(
33
+ memory_key='chat_history',
34
+ k=10,
35
+ return_messages=True
36
+ )
 
 
37
 
38
+ # Setup Neo4j
39
+ graph = Neo4jGraph(
40
+ url="neo4j+s://c62d0d35.databases.neo4j.io",
41
+ username="neo4j",
42
+ password="_x8f-_aAQvs2NB0x6s0ZHSh3W_y-HrENDbgStvsUCM0"
43
+ )
44
 
45
+ # Define entity extraction and retrieval functions
46
+ class Entities(BaseModel):
47
+ names: List[str] = Field(
48
+ ..., description="All the person, organization, or business entities that appear in the text"
49
+ )
50
 
51
+ entity_prompt = ChatPromptTemplate.from_messages([
52
+ ("system", "You are extracting organization and person entities from the text."),
53
+ ("human", "Use the given format to extract information from the following input: {question}"),
54
+ ])
55
 
56
+ chat_model = ChatOpenAI(temperature=0, model_name="gpt-4o", api_key=os.environ['OPENAI_API_KEY'])
57
+ entity_chain = entity_prompt | chat_model.with_structured_output(Entities)
58
 
59
+ def remove_lucene_chars(input: str) -> str:
60
+ return input.translate(str.maketrans({
61
+ "\\": r"\\", "+": r"\+", "-": r"\-", "&": r"\&", "|": r"\|", "!": r"\!",
62
+ "(": r"\(", ")": r"\)", "{": r"\{", "}": r"\}", "[": r"\[", "]": r"\]",
63
+ "^": r"\^", "~": r"\~", "*": r"\*", "?": r"\?", ":": r"\:", '"': r'\"',
64
+ ";": r"\;", " ": r"\ "
65
+ }))
66
 
67
+ def generate_full_text_query(input: str) -> str:
68
+ full_text_query = ""
69
+ words = [el for el in remove_lucene_chars(input).split() if el]
70
+ for word in words[:-1]:
71
+ full_text_query += f" {word}~2 AND"
72
+ full_text_query += f" {words[-1]}~2"
73
+ return full_text_query.strip()
74
 
75
+ # Setup logging to a file to capture debug information
76
+ logging.basicConfig(filename='neo4j_retrieval.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
77
+
78
+ def structured_retriever(question: str) -> str:
79
+ result = ""
80
+ entities = entity_chain.invoke({"question": question})
81
+ for entity in entities.names:
82
+ response = graph.query(
83
+ """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
84
+ YIELD node,score
85
+ CALL {
86
+ WITH node
87
+ MATCH (node)-[r:!MENTIONS]->(neighbor)
88
+ RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
89
+ UNION ALL
90
+ WITH node
91
+ MATCH (node)<-[r:!MENTIONS]-(neighbor)
92
+ RETURN neighbor.id + ' - ' + type(r) + ' -> ' + node.id AS output
93
+ }
94
+ RETURN output LIMIT 50
95
+ """,
96
+ {"query": generate_full_text_query(entity)},
97
+ )
98
+ result += "\n".join([el['output'] for el in response])
99
+ return result
100
+
101
+ def retriever_neo4j(question: str):
102
+ structured_data = structured_retriever(question)
103
+ logging.debug(f"Structured data: {structured_data}")
104
+ return structured_data
105
+
106
+ # Setup for condensing the follow-up questions
107
+ _template = """Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question,
108
+ in its original language.
109
+ Chat History:
110
+ {chat_history}
111
+ Follow Up Input: {question}
112
+ Standalone question:"""
113
+
114
+ CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
115
+
116
+ def _format_chat_history(chat_history: list[tuple[str, str]]) -> list:
117
+ buffer = []
118
+ for human, ai in chat_history:
119
+ buffer.append(HumanMessage(content=human))
120
+ buffer.append(AIMessage(content=ai))
121
+ return buffer
122
+
123
+ _search_query = RunnableBranch(
124
+ (
125
+ RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
126
+ run_name="HasChatHistoryCheck"
127
+ ),
128
+ RunnablePassthrough.assign(
129
+ chat_history=lambda x: _format_chat_history(x["chat_history"])
130
+ )
131
+ | CONDENSE_QUESTION_PROMPT
132
+ | ChatOpenAI(temperature=0, api_key=os.environ['OPENAI_API_KEY'])
133
+ | StrOutputParser(),
134
+ ),
135
+ RunnableLambda(lambda x: x["question"]),
136
  )
137
 
138
+
139
+ template = """I am a guide for Birmingham, Alabama. I can provide recommendations and insights about the city, including events and activities.
140
+ Ask your question directly, and I'll provide a precise and quick,short and crisp response in a conversational way without any Greet.
141
+ {context}
142
+ Question: {question}
143
+ Answer:"""
144
+
145
+
146
+ qa_prompt = ChatPromptTemplate.from_template(template)
147
+
148
+ # Define the chain for Neo4j-based retrieval and response generation
149
+ chain_neo4j = (
150
+ RunnableParallel(
151
+ {
152
+ "context": _search_query | retriever_neo4j,
153
+ "question": RunnablePassthrough(),
154
+ }
155
+ )
156
+ | qa_prompt
157
+ | chat_model
158
+ | StrOutputParser()
159
  )
160
 
161
+ # Define the function to get the response
162
+ def get_response(question):
163
+ try:
164
+ return chain_neo4j.invoke({"question": question})
165
+ except Exception as e:
166
+ return f"Error: {str(e)}"
167
+
168
+ # Define the function to clear input and output
169
+ def clear_fields():
170
+ return [],"",None
171
+
172
+ # Function to generate audio with Eleven Labs TTS
173
+ def generate_audio_elevenlabs(text):
174
+ XI_API_KEY = os.environ['ELEVENLABS_API']
175
+ VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
176
+ tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
177
+ headers = {
178
+ "Accept": "application/json",
179
+ "xi-api-key": XI_API_KEY
180
+ }
181
+ data = {
182
+ "text": str(text),
183
+ "model_id": "eleven_multilingual_v2",
184
+ "voice_settings": {
185
+ "stability": 1.0,
186
+ "similarity_boost": 0.0,
187
+ "style": 0.60,
188
+ "use_speaker_boost": False
189
+ }
190
+ }
191
+ response = requests.post(tts_url, headers=headers, json=data, stream=True)
192
+ if response.ok:
193
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
194
+ for chunk in response.iter_content(chunk_size=1024):
195
+ if chunk:
196
+ f.write(chunk)
197
+ audio_path = f.name
198
+ logging.debug(f"Audio saved to {audio_path}")
199
+ return audio_path # Return audio path for automatic playback
200
+ else:
201
+ logging.error(f"Error generating audio: {response.text}")
202
+ return None
203
+
204
+
205
+
206
+ def handle_mode_selection(mode, chat_history, question):
207
+ if mode == "Normal Chatbot":
208
+ # Append the user's question to chat history first
209
+ chat_history.append((question, "")) # Placeholder for the bot's response
210
+
211
+ # Stream the response and update chat history with each chunk
212
+ for response_chunk in chat_with_bot(chat_history):
213
+ chat_history[-1] = (question, response_chunk[-1][1]) # Update last entry with streamed response
214
+ yield chat_history, "", None # Stream each chunk to display in the chatbot
215
+ yield chat_history, "", None # Final yield to complete the response
216
+
217
+ elif mode == "Voice to Voice Conversation":
218
+ # Voice to Voice mode: Stream the response text and then convert it to audio
219
+ response_text = get_response(question) # Retrieve response text
220
+ audio_path = generate_audio_elevenlabs(response_text) # Convert response to audio
221
+ yield [], "", audio_path # Only output the audio response without updating chatbot history
222
+
223
+
224
+ # Function to add a user's message to the chat history and clear the input box
225
+ def add_message(history, message):
226
+ if message.strip():
227
+ history.append((message, "")) # Add the user's message to the chat history only if it's not empty
228
+ return history, "" # Clear the input box
229
+
230
+ # Define function to generate a streaming response
231
+ def chat_with_bot(messages):
232
+ user_message = messages[-1][0] # Get the last user message (input)
233
+ messages[-1] = (user_message, "") # Prepare a placeholder for the bot's response
234
+
235
+ response = get_response(user_message) # Assume `get_response` is a generator function
236
+
237
+ # Stream each character in the response and update the history progressively
238
+ for character in response:
239
+ messages[-1] = (user_message, messages[-1][1] + character)
240
+ yield messages # Stream each updated chunk
241
+ time.sleep(0.05) # Adjust delay as needed for real-time effect
242
+
243
+ yield messages # Final yield to complete the response
244
+
245
+
246
+
247
+ # Function to generate audio with Eleven Labs TTS from the last bot response
248
+ def generate_audio_from_last_response(history):
249
+ # Get the most recent bot response from the chat history
250
+ if history and len(history) > 0:
251
+ recent_response = history[-1][1] # The second item in the tuple is the bot response text
252
+ if recent_response:
253
+ return generate_audio_elevenlabs(recent_response)
254
+ return None
255
+
256
+
257
+
258
  # Define the ASR model with Whisper
259
  model_id = 'openai/whisper-large-v3'
260
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
275
  return_timestamps=True
276
  )
277
 
278
+ # Define the function to reset the state after 10 seconds
279
  def auto_reset_state():
280
+ time.sleep(5)
281
  return None, "" # Reset the state and clear input text
282
 
283
 
 
284
  def transcribe_function(stream, new_chunk):
285
  try:
286
  sr, y = new_chunk[0], new_chunk[1]
 
288
  print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
289
  return stream, "", None
290
 
291
+ # Ensure y is not empty and is at least 1-dimensional
292
  if y is None or len(y) == 0:
293
  return stream, "", None
294
 
 
297
  if max_abs_y > 0:
298
  y = y / max_abs_y
299
 
300
+ # Ensure stream is also at least 1-dimensional before concatenation
301
  if stream is not None and len(stream) > 0:
302
  stream = np.concatenate([stream, y])
303
  else:
304
  stream = y
305
 
306
+ # Process the audio data for transcription
307
  result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
308
  full_text = result.get("text", "")
309
 
310
+ # Start a thread to reset the state after 10 seconds
311
  threading.Thread(target=auto_reset_state).start()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
 
313
+ return stream, full_text, full_text
314
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
 
 
 
 
 
 
316
 
317
+ # Define the function to clear the state and input text
318
+ def clear_transcription_state():
319
+ return None, ""
320
 
 
 
321
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
 
323
+ with gr.Blocks(theme="rawrsor1/Everforest") as demo:
324
+ # Hide the chatbot component by setting `visible=False`
325
+ chatbot = gr.Chatbot([], elem_id="RADAR", bubble_full_width=False, visible=False)
326
 
327
+ with gr.Row():
328
+ with gr.Column():
329
+ # Hide the "Normal Chatbot" radio button by removing it or setting `visible=False`
330
+ mode_selection = gr.Radio(
331
+ choices=["Voice to Voice Conversation"], # Removed "Normal Chatbot" option
332
+ label="Mode Selection",
333
+ value="Voice to Voice Conversation",
334
+ visible=False # Hide the mode selection entirely
335
+ )
336
+ # Remaining code unchanged
337
+ with gr.Row():
338
+ with gr.Column():
339
+ question_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...",visible=False)
340
+ audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy', every=0.1, label="Speak to Ask")
341
+ submit_voice_btn = gr.Button("Submit Voice")
342
 
343
+ with gr.Column():
344
+ audio_output = gr.Audio(label="Audio", type="filepath", autoplay=True, interactive=False)
345
 
 
 
 
 
346
  with gr.Row():
347
+
348
  with gr.Column():
349
+ clear_state_btn = gr.Button("Clear State")
350
+
351
  with gr.Column():
352
+ clean_btn = gr.Button("Clean")
353
+
354
+
355
+
356
+ # Adjust the interactions for the Get Response button
357
+ submit_voice_btn.click(
358
+ fn=handle_mode_selection,
359
+ inputs=[mode_selection, chatbot, question_input],
360
+ outputs=[chatbot, question_input, audio_output],
361
+ api_name="api_voice_to_voice_translation"
362
  )
363
+
364
+ # Speech-to-Text functionality
365
+ state = gr.State()
366
+ audio_input.stream(
367
+ transcribe_function,
368
+ inputs=[state, audio_input],
369
+ outputs=[state, question_input],
370
+ api_name="api_voice_to_text"
371
+ )
372
+
373
+
374
+
375
+ clean_btn.click(
376
+ fn=clear_fields,
377
+ inputs=[],
378
+ outputs=[chatbot, question_input, audio_output],
379
+ api_name="api_clear_textbox"
380
  )
 
 
 
381
 
382
+ # Clear state interaction
383
+ clear_state_btn.click(
384
+ fn=clear_transcription_state,
385
+ outputs=[question_input, state],
386
+ api_name="api_clean_state_transcription"
387
+ )
388
 
389
+ # Launch the Gradio interface
390
+ demo.launch(show_error=True, share=True)