Spaces:
Runtime error
Runtime error
File size: 4,615 Bytes
7f3430b 9916325 8527f42 9916325 7a077d7 9916325 f0bef0b c71d159 92b0167 7a077d7 b370650 f0bef0b b370650 f0bef0b b370650 f0bef0b b370650 f0bef0b b370650 f0bef0b b370650 f0bef0b 7a077d7 f0bef0b 8527f42 b370650 7702656 8527f42 7702656 f0bef0b 165cb65 f0bef0b 165cb65 8527f42 7e66356 8527f42 7702656 f0bef0b 7e66356 f0bef0b 7e66356 f0bef0b 7e66356 8527f42 f0bef0b 739e317 f0bef0b 58f24a5 f0bef0b 28374c4 934e44a 7e66356 7a077d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import gradio as gr
import os
import requests
import tempfile
import torch
import numpy as np
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_community.graphs import Neo4jGraph
# Setup Neo4j
graph = Neo4jGraph(
url="neo4j+s://6457770f.databases.neo4j.io",
username="neo4j",
password="Z10duoPkKCtENuOukw3eIlvl0xJWKtrVSr-_hGX1LQ4"
)
# Define a concise prompt template for generating responses
template = """I am a guide for Birmingham, Alabama. I will provide a precise and short response based solely on the provided data.
Do not include any additional commentary or context.
Data:
{context}
User's question: {question}
Answer:"""
qa_prompt = ChatPromptTemplate.from_template(template)
# Chat model configuration
chat_model = ChatOpenAI(temperature=0, model_name="gpt-4o", api_key=os.environ['OPENAI_API_KEY'])
# Function to generate a query for Neo4j and retrieve information
def generate_full_text_query(input: str) -> str:
return " ".join([f"{word}~2" for word in input.split()])
def retrieve_from_neo4j(question: str) -> str:
query = generate_full_text_query(question)
response = graph.query(
"""CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
YIELD node, score
RETURN node.name AS name, node.description AS description LIMIT 5""",
{"query": query}
)
context = "\n".join([f"{el['name']}: {el['description']}" for el in response])
return context
# Function to generate the response using the prompt template and Neo4j data
def get_response(question):
try:
context = retrieve_from_neo4j(question)
prompt = qa_prompt.format_prompt(context=context, question=question)
response = chat_model(prompt.to_string())
# Filter extraneous content, keeping only the answer part
if "Answer:" in response:
response = response.split("Answer:")[-1].strip() # Extract the part after "Answer:" and strip extra spaces
return response
except Exception as e:
return f"Error: {str(e)}"
# Function to generate audio with Eleven Labs TTS
def generate_audio_elevenlabs(text):
XI_API_KEY = os.environ['ELEVENLABS_API']
VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
headers = {"Accept": "application/json", "xi-api-key": XI_API_KEY}
data = {
"text": str(text),
"model_id": "eleven_multilingual_v2",
"voice_settings": {"stability": 1.0, "similarity_boost": 0.0}
}
response = requests.post(tts_url, headers=headers, json=data, stream=True)
if response.ok:
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
audio_path = f.name
return audio_path
else:
return None
# Define the ASR model with Whisper
model_id = 'openai/whisper-large-v3'
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device)
processor = AutoProcessor.from_pretrained(model_id)
pipe_asr = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
max_new_tokens=128,
chunk_length_s=15,
batch_size=16,
torch_dtype=torch_dtype,
device=device,
return_timestamps=True
)
# Define the function to transcribe audio and generate a response
def transcribe_and_respond(audio):
sr, y = audio[0], audio[1]
y = y.astype(np.float32)
max_abs_y = np.max(np.abs(y))
if max_abs_y > 0:
y = y / max_abs_y
result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False)
text = result.get("text", "")
response = get_response(text)
audio_path = generate_audio_elevenlabs(response)
return audio_path
with gr.Blocks() as demo:
audio_input = gr.Audio(sources=["microphone"], streaming=False, type='numpy', label="Speak to Ask")
audio_output = gr.Audio(label="Audio", type="filepath", autoplay=True, interactive=False)
audio_input.change(
fn=transcribe_and_respond,
inputs=audio_input,
outputs=audio_output,
)
# Launch the Gradio interface
demo.launch(show_error=True, share=True)
|