Spaces:
Runtime error
Runtime error
import gradio as gr | |
import os | |
import requests | |
import tempfile | |
import torch | |
import numpy as np | |
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_openai import ChatOpenAI | |
from langchain_community.graphs import Neo4jGraph | |
# Setup Neo4j | |
graph = Neo4jGraph( | |
url="neo4j+s://6457770f.databases.neo4j.io", | |
username="neo4j", | |
password="Z10duoPkKCtENuOukw3eIlvl0xJWKtrVSr-_hGX1LQ4" | |
) | |
# Define a concise prompt template for generating responses | |
template = """I am a guide for Birmingham, Alabama. I will provide a precise and short response based solely on the provided data. | |
Do not include any additional commentary or context. | |
Data: | |
{context} | |
User's question: {question} | |
Answer:""" | |
qa_prompt = ChatPromptTemplate.from_template(template) | |
# Chat model configuration | |
chat_model = ChatOpenAI(temperature=0, model_name="gpt-4o", api_key=os.environ['OPENAI_API_KEY']) | |
# Function to generate a query for Neo4j and retrieve information | |
def generate_full_text_query(input: str) -> str: | |
return " ".join([f"{word}~2" for word in input.split()]) | |
def retrieve_from_neo4j(question: str) -> str: | |
query = generate_full_text_query(question) | |
response = graph.query( | |
"""CALL db.index.fulltext.queryNodes('entity', $query, {limit:2}) | |
YIELD node, score | |
RETURN node.name AS name, node.description AS description LIMIT 5""", | |
{"query": query} | |
) | |
context = "\n".join([f"{el['name']}: {el['description']}" for el in response]) | |
return context | |
# Function to generate the response using the prompt template and Neo4j data | |
def get_response(question): | |
try: | |
context = retrieve_from_neo4j(question) | |
prompt = qa_prompt.format_prompt(context=context, question=question) | |
response = chat_model(prompt.to_string()) | |
# Filter extraneous content, keeping only the answer part | |
if "Answer:" in response: | |
response = response.split("Answer:")[-1].strip() # Extract the part after "Answer:" and strip extra spaces | |
return response | |
except Exception as e: | |
return f"Error: {str(e)}" | |
# Function to generate audio with Eleven Labs TTS | |
def generate_audio_elevenlabs(text): | |
XI_API_KEY = os.environ['ELEVENLABS_API'] | |
VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW' | |
tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream" | |
headers = {"Accept": "application/json", "xi-api-key": XI_API_KEY} | |
data = { | |
"text": str(text), | |
"model_id": "eleven_multilingual_v2", | |
"voice_settings": {"stability": 1.0, "similarity_boost": 0.0} | |
} | |
response = requests.post(tts_url, headers=headers, json=data, stream=True) | |
if response.ok: | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f: | |
for chunk in response.iter_content(chunk_size=1024): | |
if chunk: | |
f.write(chunk) | |
audio_path = f.name | |
return audio_path | |
else: | |
return None | |
# Define the ASR model with Whisper | |
model_id = 'openai/whisper-large-v3' | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device) | |
processor = AutoProcessor.from_pretrained(model_id) | |
pipe_asr = pipeline( | |
"automatic-speech-recognition", | |
model=model, | |
tokenizer=processor.tokenizer, | |
feature_extractor=processor.feature_extractor, | |
max_new_tokens=128, | |
chunk_length_s=15, | |
batch_size=16, | |
torch_dtype=torch_dtype, | |
device=device, | |
return_timestamps=True | |
) | |
# Define the function to transcribe audio and generate a response | |
def transcribe_and_respond(audio): | |
sr, y = audio[0], audio[1] | |
y = y.astype(np.float32) | |
max_abs_y = np.max(np.abs(y)) | |
if max_abs_y > 0: | |
y = y / max_abs_y | |
result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False) | |
text = result.get("text", "") | |
response = get_response(text) | |
audio_path = generate_audio_elevenlabs(response) | |
return audio_path | |
with gr.Blocks() as demo: | |
audio_input = gr.Audio(sources=["microphone"], streaming=False, type='numpy', label="Speak to Ask") | |
audio_output = gr.Audio(label="Audio", type="filepath", autoplay=True, interactive=False) | |
audio_input.change( | |
fn=transcribe_and_respond, | |
inputs=audio_input, | |
outputs=audio_output, | |
) | |
# Launch the Gradio interface | |
demo.launch(show_error=True, share=True) | |