Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,20 +1,40 @@
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
import logging
|
4 |
-
import requests
|
5 |
-
import tempfile
|
6 |
-
import torch
|
7 |
-
import numpy as np
|
8 |
-
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
|
9 |
-
from langchain_community.graphs import Neo4jGraph
|
10 |
from langchain_core.prompts import ChatPromptTemplate
|
|
|
11 |
from langchain_openai import ChatOpenAI
|
|
|
|
|
12 |
from pydantic import BaseModel, Field
|
13 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
import time
|
|
|
|
|
|
|
15 |
import torchaudio
|
|
|
|
|
|
|
16 |
|
17 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
graph = Neo4jGraph(
|
19 |
url="neo4j+s://6457770f.databases.neo4j.io",
|
20 |
username="neo4j",
|
@@ -51,6 +71,9 @@ def generate_full_text_query(input: str) -> str:
|
|
51 |
full_text_query += f" {words[-1]}~2"
|
52 |
return full_text_query.strip()
|
53 |
|
|
|
|
|
|
|
54 |
def structured_retriever(question: str) -> str:
|
55 |
result = ""
|
56 |
entities = entity_chain.invoke({"question": question})
|
@@ -74,9 +97,38 @@ def structured_retriever(question: str) -> str:
|
|
74 |
result += "\n".join([el['output'] for el in response])
|
75 |
return result
|
76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
# Function to generate audio with Eleven Labs TTS
|
78 |
def generate_audio_elevenlabs(text):
|
79 |
-
XI_API_KEY = os.environ
|
80 |
VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
|
81 |
tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
|
82 |
headers = {
|
@@ -93,118 +145,82 @@ def generate_audio_elevenlabs(text):
|
|
93 |
"use_speaker_boost": False
|
94 |
}
|
95 |
}
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
audio_path = f.name
|
108 |
-
logging.debug(f"Audio successfully saved to {audio_path}")
|
109 |
-
return audio_path
|
110 |
-
else:
|
111 |
-
logging.error(f"Error generating audio: {response.status_code} - {response.text}")
|
112 |
-
return None
|
113 |
-
except Exception as e:
|
114 |
-
logging.error(f"Exception during audio generation: {str(e)}")
|
115 |
return None
|
116 |
|
117 |
-
#
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
pipe_asr = pipeline(
|
125 |
-
"automatic-speech-recognition",
|
126 |
-
model=model,
|
127 |
-
tokenizer=processor.tokenizer,
|
128 |
-
feature_extractor=processor.feature_extractor,
|
129 |
-
max_new_tokens=128,
|
130 |
-
chunk_length_s=15,
|
131 |
-
batch_size=16,
|
132 |
-
torch_dtype=torch_dtype,
|
133 |
-
device=device,
|
134 |
-
return_timestamps=True
|
135 |
-
)
|
136 |
|
137 |
-
# Function to
|
138 |
-
def
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
question = result.get("text", "")
|
156 |
-
|
157 |
-
# Log the transcribed text for debugging
|
158 |
-
logging.debug(f"Transcribed text: {question}")
|
159 |
-
|
160 |
-
# Retrieve information from Neo4j
|
161 |
-
response_text = structured_retriever(question) if question else "I didn't understand the question."
|
162 |
-
|
163 |
-
# Convert the response to audio using Eleven Labs TTS
|
164 |
-
audio_path = generate_audio_elevenlabs(response_text) if response_text else None
|
165 |
-
|
166 |
-
# Ensure a valid audio path is returned
|
167 |
-
if audio_path and os.path.exists(audio_path):
|
168 |
-
logging.debug(f"Generated audio file path: {audio_path}")
|
169 |
else:
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
|
175 |
-
|
176 |
-
def clear_transcription_state():
|
177 |
-
return None, None
|
178 |
|
179 |
-
|
|
|
|
|
180 |
with gr.Blocks(theme="rawrsor1/Everforest") as demo:
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
inputs=audio_input,
|
199 |
-
outputs=[audio_output, gr.Textbox(label="Transcription")]
|
200 |
)
|
201 |
|
202 |
-
#
|
203 |
-
gr.
|
204 |
-
|
205 |
-
|
206 |
-
|
|
|
|
|
207 |
)
|
208 |
|
209 |
-
|
210 |
-
demo.launch(show_error=True, share=True)
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
import logging
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
from langchain_core.prompts import ChatPromptTemplate
|
5 |
+
from langchain_core.output_parsers import StrOutputParser
|
6 |
from langchain_openai import ChatOpenAI
|
7 |
+
from langchain_community.graphs import Neo4jGraph
|
8 |
+
from typing import List, Tuple
|
9 |
from pydantic import BaseModel, Field
|
10 |
+
from langchain_core.messages import AIMessage, HumanMessage
|
11 |
+
from langchain_core.runnables import (
|
12 |
+
RunnableBranch,
|
13 |
+
RunnableLambda,
|
14 |
+
RunnablePassthrough,
|
15 |
+
RunnableParallel,
|
16 |
+
)
|
17 |
+
from langchain_core.prompts.prompt import PromptTemplate
|
18 |
+
import requests
|
19 |
+
import tempfile
|
20 |
+
from langchain.memory import ConversationBufferWindowMemory
|
21 |
import time
|
22 |
+
import logging
|
23 |
+
from langchain.chains import ConversationChain
|
24 |
+
import torch
|
25 |
import torchaudio
|
26 |
+
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
|
27 |
+
import numpy as np
|
28 |
+
import threading
|
29 |
|
30 |
+
# Setup conversational memory
|
31 |
+
conversational_memory = ConversationBufferWindowMemory(
|
32 |
+
memory_key='chat_history',
|
33 |
+
k=10,
|
34 |
+
return_messages=True
|
35 |
+
)
|
36 |
+
|
37 |
+
# Setup Neo4j connection
|
38 |
graph = Neo4jGraph(
|
39 |
url="neo4j+s://6457770f.databases.neo4j.io",
|
40 |
username="neo4j",
|
|
|
71 |
full_text_query += f" {words[-1]}~2"
|
72 |
return full_text_query.strip()
|
73 |
|
74 |
+
# Setup logging to a file to capture debug information
|
75 |
+
logging.basicConfig(filename='neo4j_retrieval.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
76 |
+
|
77 |
def structured_retriever(question: str) -> str:
|
78 |
result = ""
|
79 |
entities = entity_chain.invoke({"question": question})
|
|
|
97 |
result += "\n".join([el['output'] for el in response])
|
98 |
return result
|
99 |
|
100 |
+
def retriever_neo4j(question: str):
|
101 |
+
structured_data = structured_retriever(question)
|
102 |
+
logging.debug(f"Structured data: {structured_data}")
|
103 |
+
return structured_data
|
104 |
+
|
105 |
+
# Define the chain for Neo4j-based retrieval and response generation
|
106 |
+
chain_neo4j = (
|
107 |
+
RunnableParallel(
|
108 |
+
{
|
109 |
+
"context": RunnableLambda(lambda x: retriever_neo4j(x["question"])),
|
110 |
+
"question": RunnablePassthrough(),
|
111 |
+
}
|
112 |
+
)
|
113 |
+
| ChatPromptTemplate.from_template("Answer: {context} Question: {question}")
|
114 |
+
| chat_model
|
115 |
+
| StrOutputParser()
|
116 |
+
)
|
117 |
+
|
118 |
+
# Define the function to get the response
|
119 |
+
def get_response(question):
|
120 |
+
try:
|
121 |
+
return chain_neo4j.invoke({"question": question})
|
122 |
+
except Exception as e:
|
123 |
+
return f"Error: {str(e)}"
|
124 |
+
|
125 |
+
# Define the function to clear input and output
|
126 |
+
def clear_fields():
|
127 |
+
return [], "", None
|
128 |
+
|
129 |
# Function to generate audio with Eleven Labs TTS
|
130 |
def generate_audio_elevenlabs(text):
|
131 |
+
XI_API_KEY = os.environ['ELEVENLABS_API']
|
132 |
VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
|
133 |
tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
|
134 |
headers = {
|
|
|
145 |
"use_speaker_boost": False
|
146 |
}
|
147 |
}
|
148 |
+
response = requests.post(tts_url, headers=headers, json=data, stream=True)
|
149 |
+
if response.ok:
|
150 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
|
151 |
+
for chunk in response.iter_content(chunk_size=1024):
|
152 |
+
if chunk:
|
153 |
+
f.write(chunk)
|
154 |
+
audio_path = f.name
|
155 |
+
logging.debug(f"Audio saved to {audio_path}")
|
156 |
+
return audio_path # Return audio path for automatic playback
|
157 |
+
else:
|
158 |
+
logging.error(f"Error generating audio: {response.text}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
return None
|
160 |
|
161 |
+
# Function to handle voice to voice conversation
|
162 |
+
def handle_voice_to_voice(chat_history, question):
|
163 |
+
response = get_response(question)
|
164 |
+
audio_path = generate_audio_elevenlabs(response)
|
165 |
+
chat_history.append(("[Voice Input]", "[Voice Response]"))
|
166 |
+
return chat_history, "", audio_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
|
168 |
+
# Function to transcribe audio input
|
169 |
+
def transcribe_function(stream, new_chunk):
|
170 |
+
try:
|
171 |
+
sr, y = new_chunk[0], new_chunk[1]
|
172 |
+
except TypeError:
|
173 |
+
print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
|
174 |
+
return stream, "", None
|
175 |
+
|
176 |
+
if y is None or len(y) == 0:
|
177 |
+
return stream, "", None
|
178 |
+
|
179 |
+
y = y.astype(np.float32)
|
180 |
+
max_abs_y = np.max(np.abs(y))
|
181 |
+
if max_abs_y > 0:
|
182 |
+
y = y / max_abs_y
|
183 |
+
|
184 |
+
if stream is not None and len(stream) > 0:
|
185 |
+
stream = np.concatenate([stream, y])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
else:
|
187 |
+
stream = y
|
188 |
+
|
189 |
+
result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
|
190 |
+
full_text = result.get("text", "")
|
191 |
|
192 |
+
threading.Thread(target=auto_reset_state).start()
|
|
|
|
|
193 |
|
194 |
+
return stream, full_text, full_text
|
195 |
+
|
196 |
+
# Define the Gradio interface
|
197 |
with gr.Blocks(theme="rawrsor1/Everforest") as demo:
|
198 |
+
chatbot = gr.Chatbot([], elem_id="RADAR", bubble_full_width=False)
|
199 |
+
mode_selection = gr.Radio(
|
200 |
+
choices=["Normal Chatbot", "Voice to Voice Conversation"],
|
201 |
+
label="Mode Selection",
|
202 |
+
value="Normal Chatbot"
|
203 |
+
)
|
204 |
+
question_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...")
|
205 |
+
audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy', every=0.1, label="Speak to Ask")
|
206 |
+
submit_voice_btn = gr.Button("Submit Voice")
|
207 |
+
audio_output = gr.Audio(label="Audio", type="filepath", autoplay=True, interactive=False)
|
208 |
+
|
209 |
+
# Interactions for Submit Voice Button
|
210 |
+
submit_voice_btn.click(
|
211 |
+
fn=handle_voice_to_voice,
|
212 |
+
inputs=[chatbot, question_input],
|
213 |
+
outputs=[chatbot, question_input, audio_output],
|
214 |
+
api_name="api_voice_to_voice_translation"
|
|
|
|
|
215 |
)
|
216 |
|
217 |
+
# Speech-to-Text functionality
|
218 |
+
state = gr.State()
|
219 |
+
audio_input.stream(
|
220 |
+
transcribe_function,
|
221 |
+
inputs=[state, audio_input],
|
222 |
+
outputs=[state, question_input],
|
223 |
+
api_name="api_voice_to_text"
|
224 |
)
|
225 |
|
226 |
+
demo.launch(show_error=True, share=True)
|
|