radarbackend11262024v11

Runtime error

App Files Files Community

radarbackend11262024v11 / app.py

Pijush2023

Update app.py

c1009f8 verified 9 months ago

raw

history blame

6.08 kB

	import gradio as gr
	import torch
	import requests
	import tempfile
	import threading
	import numpy as np
	from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
	from langchain_openai import ChatOpenAI
	from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
	from langchain_community.graphs import Neo4jGraph
	from langchain_experimental.graph_transformers import LLMGraphTransformer
	from langchain_core.prompts import ChatPromptTemplate

	# Neo4j setup
	graph = Neo4jGraph(
	url="neo4j+s://c62d0d35.databases.neo4j.io",
	username="neo4j",
	password="_x8f-_aAQvs2NB0x6s0ZHSh3W_y-HrENDbgStvsUCM0"
	)

	# Define the ASR model with Whisper
	model_id = 'openai/whisper-large-v3'
	device = "cuda:0" if torch.cuda.is_available() else "cpu"
	torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
	model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device)
	processor = AutoProcessor.from_pretrained(model_id)

	pipe_asr = pipeline(
	"automatic-speech-recognition",
	model=model,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	max_new_tokens=128,
	chunk_length_s=15,
	batch_size=16,
	torch_dtype=torch_dtype,
	device=device,
	return_timestamps=True
	)

	# Function to reset the state after 10 seconds
	def auto_reset_state():
	time.sleep(5)
	return None, "" # Reset the state and clear input text

	# Function to process audio input and transcribe it
	def transcribe_function(stream, new_chunk):
	try:
	sr, y = new_chunk[0], new_chunk[1]
	except TypeError:
	print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
	return stream, "", None

	# Ensure y is not empty and is at least 1-dimensional
	if y is None or len(y) == 0:
	return stream, "", None

	y = y.astype(np.float32)
	max_abs_y = np.max(np.abs(y))
	if max_abs_y > 0:
	y = y / max_abs_y

	# Ensure stream is also at least 1-dimensional before concatenation
	if stream is not None and len(stream) > 0:
	stream = np.concatenate([stream, y])
	else:
	stream = y

	# Process the audio data for transcription
	result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
	full_text = result.get("text", "")

	# Start a thread to reset the state after 10 seconds
	threading.Thread(target=auto_reset_state).start()

	return stream, full_text, full_text

	# Function to generate audio with Eleven Labs TTS
	def generate_audio_elevenlabs(text):
	XI_API_KEY = os.environ['ELEVENLABS_API']
	VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
	tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
	headers = {
	"Accept": "application/json",
	"xi-api-key": XI_API_KEY
	}
	data = {
	"text": str(text),
	"model_id": "eleven_multilingual_v2",
	"voice_settings": {
	"stability": 1.0,
	"similarity_boost": 0.0,
	"style": 0.60,
	"use_speaker_boost": False
	}
	}
	response = requests.post(tts_url, headers=headers, json=data, stream=True)
	if response.ok:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
	for chunk in response.iter_content(chunk_size=1024):
	if chunk:
	f.write(chunk)
	audio_path = f.name
	return audio_path # Return audio path for automatic playback
	else:
	print(f"Error generating audio: {response.text}")
	return None

	# Define the template for generating responses based on context
	template = """Use the following context to answer the question:
	Context:
	{context}

	Question: {question}
	Answer concisely:"""

	# Create a prompt object using the template
	prompt = ChatPromptTemplate.from_template(template)

	# Function to generate a response using the prompt and the context
	def generate_response_with_prompt(context, question):
	response = prompt.format(
	context=context,
	question=question
	)
	return response

	# Define the function to generate a hybrid response using Neo4j and other retrieval methods
	def retriever(question: str):
	# Structured data retrieval from Neo4j
	structured_query = f"""
	CALL db.index.fulltext.queryNodes('entity', $query, {{limit: 2}})
	YIELD node, score
	RETURN node.id AS entity, node.text AS context, score
	ORDER BY score DESC
	LIMIT 2
	"""
	structured_data = graph.query(structured_query, {"query": generate_full_text_query(question)})
	structured_response = "\n".join([f"{record['entity']}: {record['context']}" for record in structured_data])

	# Unstructured data retrieval from vector store
	unstructured_data = [el.page_content for el in vector_index.similarity_search(question)]
	unstructured_response = "\n".join(unstructured_data)

	# Combine structured and unstructured responses
	combined_context = f"Structured data:\n{structured_response}\n\nUnstructured data:\n{unstructured_response}"

	# Generate the final response using the prompt template
	final_response = generate_response_with_prompt(combined_context, question)
	return final_response

	# Function to handle the entire audio query and response process
	def process_audio_query(audio_input):
	stream = None
	_, transcription, _ = transcribe_function(stream, audio_input)
	print(f"Transcription: {transcription}")

	# Retrieve hybrid response using Neo4j and other methods
	response_text = retriever(transcription)
	print(f"Response: {response_text}")

	# Generate audio from the response text
	audio_path = generate_audio_elevenlabs(response_text)
	return audio_path

	# Create Gradio interface for audio input and output
	interface = gr.Interface(
	fn=process_audio_query,
	inputs=gr.Audio(source="microphone", type="numpy"),
	outputs="audio",
	live=True,
	description="Ask questions via audio and receive audio responses."
	)

	# Launch the Gradio app
	interface.launch()