Pijush2023's picture
Update app.py
7a077d7 verified
raw
history blame
4.62 kB
import gradio as gr
import os
import requests
import tempfile
import torch
import numpy as np
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_community.graphs import Neo4jGraph
# Setup Neo4j
graph = Neo4jGraph(
url="neo4j+s://6457770f.databases.neo4j.io",
username="neo4j",
password="Z10duoPkKCtENuOukw3eIlvl0xJWKtrVSr-_hGX1LQ4"
)
# Define a concise prompt template for generating responses
template = """I am a guide for Birmingham, Alabama. I will provide a precise and short response based solely on the provided data.
Do not include any additional commentary or context.
Data:
{context}
User's question: {question}
Answer:"""
qa_prompt = ChatPromptTemplate.from_template(template)
# Chat model configuration
chat_model = ChatOpenAI(temperature=0, model_name="gpt-4o", api_key=os.environ['OPENAI_API_KEY'])
# Function to generate a query for Neo4j and retrieve information
def generate_full_text_query(input: str) -> str:
return " ".join([f"{word}~2" for word in input.split()])
def retrieve_from_neo4j(question: str) -> str:
query = generate_full_text_query(question)
response = graph.query(
"""CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
YIELD node, score
RETURN node.name AS name, node.description AS description LIMIT 5""",
{"query": query}
)
context = "\n".join([f"{el['name']}: {el['description']}" for el in response])
return context
# Function to generate the response using the prompt template and Neo4j data
def get_response(question):
try:
context = retrieve_from_neo4j(question)
prompt = qa_prompt.format_prompt(context=context, question=question)
response = chat_model(prompt.to_string())
# Filter extraneous content, keeping only the answer part
if "Answer:" in response:
response = response.split("Answer:")[-1].strip() # Extract the part after "Answer:" and strip extra spaces
return response
except Exception as e:
return f"Error: {str(e)}"
# Function to generate audio with Eleven Labs TTS
def generate_audio_elevenlabs(text):
XI_API_KEY = os.environ['ELEVENLABS_API']
VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
headers = {"Accept": "application/json", "xi-api-key": XI_API_KEY}
data = {
"text": str(text),
"model_id": "eleven_multilingual_v2",
"voice_settings": {"stability": 1.0, "similarity_boost": 0.0}
}
response = requests.post(tts_url, headers=headers, json=data, stream=True)
if response.ok:
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
audio_path = f.name
return audio_path
else:
return None
# Define the ASR model with Whisper
model_id = 'openai/whisper-large-v3'
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device)
processor = AutoProcessor.from_pretrained(model_id)
pipe_asr = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
max_new_tokens=128,
chunk_length_s=15,
batch_size=16,
torch_dtype=torch_dtype,
device=device,
return_timestamps=True
)
# Define the function to transcribe audio and generate a response
def transcribe_and_respond(audio):
sr, y = audio[0], audio[1]
y = y.astype(np.float32)
max_abs_y = np.max(np.abs(y))
if max_abs_y > 0:
y = y / max_abs_y
result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False)
text = result.get("text", "")
response = get_response(text)
audio_path = generate_audio_elevenlabs(response)
return audio_path
with gr.Blocks() as demo:
audio_input = gr.Audio(sources=["microphone"], streaming=False, type='numpy', label="Speak to Ask")
audio_output = gr.Audio(label="Audio", type="filepath", autoplay=True, interactive=False)
audio_input.change(
fn=transcribe_and_respond,
inputs=audio_input,
outputs=audio_output,
)
# Launch the Gradio interface
demo.launch(show_error=True, share=True)