radarbackend11262024v11

Runtime error

App Files Files Community

Pijush2023 commited on Oct 25, 2024

Commit

c1009f8

verified ·

1 Parent(s): 12a16b6

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -86

app.py CHANGED Viewed

@@ -1,75 +1,98 @@
 import gradio as gr
-import os
 import requests
 import tempfile
-import torch
 import numpy as np
 from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
-from langchain_core.prompts import ChatPromptTemplate
 from langchain_openai import ChatOpenAI
 from langchain_community.graphs import Neo4jGraph
-# Setup Neo4j
 graph = Neo4jGraph(
-    url="neo4j+s://6457770f.databases.neo4j.io",
     username="neo4j",
-    password="Z10duoPkKCtENuOukw3eIlvl0xJWKtrVSr-_hGX1LQ4"
 )
-# Define a concise prompt template for generating responses
-template = """I am a guide for Birmingham, Alabama. I will provide a precise and short response based solely on the provided data.
-Do not include any additional commentary or context.
-Data:
-{context}
-User's question: {question}
-Answer:"""
-qa_prompt = ChatPromptTemplate.from_template(template)
-# Chat model configuration
-chat_model = ChatOpenAI(temperature=0, model_name="gpt-4o", api_key=os.environ['OPENAI_API_KEY'])
-# Function to generate a query for Neo4j and retrieve information
-def generate_full_text_query(input: str) -> str:
-    return " ".join([f"{word}~2" for word in input.split()])
-def retrieve_from_neo4j(question: str) -> str:
-    query = generate_full_text_query(question)
-    response = graph.query(
-        """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
-        YIELD node, score
-        RETURN node.name AS name, node.description AS description LIMIT 5""",
-        {"query": query}
-    )
-    context = "\n".join([f"{el['name']}: {el['description']}" for el in response])
-    return context
-# Function to generate the response using the prompt template and Neo4j data
-def get_response(question):
-    try:
-        context = retrieve_from_neo4j(question)
-        prompt = qa_prompt.format_prompt(context=context, question=question)
-        response = chat_model(prompt.to_string())
-        # Filter extraneous content, keeping only the answer part
-        if "Answer:" in response:
-            response = response.split("Answer:")[-1].strip()  # Extract the part after "Answer:" and strip extra spaces
-        return response
-    except Exception as e:
-        return f"Error: {str(e)}"
 # Function to generate audio with Eleven Labs TTS
 def generate_audio_elevenlabs(text):
     XI_API_KEY = os.environ['ELEVENLABS_API']
     VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
     tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
-    headers = {"Accept": "application/json", "xi-api-key": XI_API_KEY}
     data = {
         "text": str(text),
         "model_id": "eleven_multilingual_v2",
-        "voice_settings": {"stability": 1.0, "similarity_boost": 0.0}
     }
     response = requests.post(tts_url, headers=headers, json=data, stream=True)
     if response.ok:
@@ -78,52 +101,76 @@ def generate_audio_elevenlabs(text):
                 if chunk:
                     f.write(chunk)
             audio_path = f.name
-        return audio_path
     else:
         return None
-# Define the ASR model with Whisper
-model_id = 'openai/whisper-large-v3'
-device = "cuda:0" if torch.cuda.is_available() else "cpu"
-torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device)
-processor = AutoProcessor.from_pretrained(model_id)
-pipe_asr = pipeline(
-    "automatic-speech-recognition",
-    model=model,
-    tokenizer=processor.tokenizer,
-    feature_extractor=processor.feature_extractor,
-    max_new_tokens=128,
-    chunk_length_s=15,
-    batch_size=16,
-    torch_dtype=torch_dtype,
-    device=device,
-    return_timestamps=True
-)
-# Define the function to transcribe audio and generate a response
-def transcribe_and_respond(audio):
-    sr, y = audio[0], audio[1]
-    y = y.astype(np.float32)
-    max_abs_y = np.max(np.abs(y))
-    if max_abs_y > 0:
-        y = y / max_abs_y
-    result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False)
-    text = result.get("text", "")
-    response = get_response(text)
-    audio_path = generate_audio_elevenlabs(response)
-    return audio_path
-with gr.Blocks() as demo:
-    audio_input = gr.Audio(sources=["microphone"], streaming=False, type='numpy', label="Speak to Ask")
-    audio_output = gr.Audio(label="Audio", type="filepath", autoplay=True, interactive=False)
-    audio_input.change(
-        fn=transcribe_and_respond,
-        inputs=audio_input,
-        outputs=audio_output,
     )
-# Launch the Gradio interface
-demo.launch(show_error=True, share=True)

 import gradio as gr
+import torch
 import requests
 import tempfile
+import threading
 import numpy as np
 from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
 from langchain_openai import ChatOpenAI
+from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
 from langchain_community.graphs import Neo4jGraph
+from langchain_experimental.graph_transformers import LLMGraphTransformer
+from langchain_core.prompts import ChatPromptTemplate
+# Neo4j setup
 graph = Neo4jGraph(
+    url="neo4j+s://c62d0d35.databases.neo4j.io",
     username="neo4j",
+    password="_x8f-_aAQvs2NB0x6s0ZHSh3W_y-HrENDbgStvsUCM0"
 )
+# Define the ASR model with Whisper
+model_id = 'openai/whisper-large-v3'
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device)
+processor = AutoProcessor.from_pretrained(model_id)
+pipe_asr = pipeline(
+    "automatic-speech-recognition",
+    model=model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    max_new_tokens=128,
+    chunk_length_s=15,
+    batch_size=16,
+    torch_dtype=torch_dtype,
+    device=device,
+    return_timestamps=True
+)
+# Function to reset the state after 10 seconds
+def auto_reset_state():
+    time.sleep(5)
+    return None, ""  # Reset the state and clear input text
+# Function to process audio input and transcribe it
+def transcribe_function(stream, new_chunk):
+    try:
+        sr, y = new_chunk[0], new_chunk[1]
+    except TypeError:
+        print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
+        return stream, "", None
+    # Ensure y is not empty and is at least 1-dimensional
+    if y is None or len(y) == 0:
+        return stream, "", None
+    y = y.astype(np.float32)
+    max_abs_y = np.max(np.abs(y))
+    if max_abs_y > 0:
+        y = y / max_abs_y
+    # Ensure stream is also at least 1-dimensional before concatenation
+    if stream is not None and len(stream) > 0:
+        stream = np.concatenate([stream, y])
+    else:
+        stream = y
+    # Process the audio data for transcription
+    result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
+    full_text = result.get("text", "")
+    # Start a thread to reset the state after 10 seconds
+    threading.Thread(target=auto_reset_state).start()
+    return stream, full_text, full_text
 # Function to generate audio with Eleven Labs TTS
 def generate_audio_elevenlabs(text):
     XI_API_KEY = os.environ['ELEVENLABS_API']
     VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
     tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
+    headers = {
+        "Accept": "application/json",
+        "xi-api-key": XI_API_KEY
+    }
     data = {
         "text": str(text),
         "model_id": "eleven_multilingual_v2",
+        "voice_settings": {
+            "stability": 1.0,
+            "similarity_boost": 0.0,
+            "style": 0.60,
+            "use_speaker_boost": False
+        }
     }
     response = requests.post(tts_url, headers=headers, json=data, stream=True)
     if response.ok:
                 if chunk:
                     f.write(chunk)
             audio_path = f.name
+        return audio_path  # Return audio path for automatic playback
     else:
+        print(f"Error generating audio: {response.text}")
         return None
+# Define the template for generating responses based on context
+template = """Use the following context to answer the question:
+Context:
+{context}
+Question: {question}
+Answer concisely:"""
+# Create a prompt object using the template
+prompt = ChatPromptTemplate.from_template(template)
+# Function to generate a response using the prompt and the context
+def generate_response_with_prompt(context, question):
+    response = prompt.format(
+        context=context,
+        question=question
     )
+    return response
+# Define the function to generate a hybrid response using Neo4j and other retrieval methods
+def retriever(question: str):
+    # Structured data retrieval from Neo4j
+    structured_query = f"""
+    CALL db.index.fulltext.queryNodes('entity', $query, {{limit: 2}})
+    YIELD node, score
+    RETURN node.id AS entity, node.text AS context, score
+    ORDER BY score DESC
+    LIMIT 2
+    """
+    structured_data = graph.query(structured_query, {"query": generate_full_text_query(question)})
+    structured_response = "\n".join([f"{record['entity']}: {record['context']}" for record in structured_data])
+    # Unstructured data retrieval from vector store
+    unstructured_data = [el.page_content for el in vector_index.similarity_search(question)]
+    unstructured_response = "\n".join(unstructured_data)
+    # Combine structured and unstructured responses
+    combined_context = f"Structured data:\n{structured_response}\n\nUnstructured data:\n{unstructured_response}"
+    # Generate the final response using the prompt template
+    final_response = generate_response_with_prompt(combined_context, question)
+    return final_response
+# Function to handle the entire audio query and response process
+def process_audio_query(audio_input):
+    stream = None
+    _, transcription, _ = transcribe_function(stream, audio_input)
+    print(f"Transcription: {transcription}")
+    # Retrieve hybrid response using Neo4j and other methods
+    response_text = retriever(transcription)
+    print(f"Response: {response_text}")
+    # Generate audio from the response text
+    audio_path = generate_audio_elevenlabs(response_text)
+    return audio_path
+# Create Gradio interface for audio input and output
+interface = gr.Interface(
+    fn=process_audio_query,
+    inputs=gr.Audio(source="microphone", type="numpy"),
+    outputs="audio",
+    live=True,
+    description="Ask questions via audio and receive audio responses."
+)
+# Launch the Gradio app
+interface.launch()