radarbackend11262024v11

Runtime error

App Files Files Community

Pijush2023 commited on Oct 25, 2024

Commit

7f2393e

verified ·

1 Parent(s): 17afe2d

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -121

app.py CHANGED Viewed

@@ -8,10 +8,21 @@ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
 from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 from langchain_community.vectorstores import Neo4jVector
 from langchain_community.graphs import Neo4jGraph
-from langchain_experimental.graph_transformers import LLMGraphTransformer
 from langchain_core.prompts import ChatPromptTemplate
 import time
 import os
 # Neo4j setup
 graph = Neo4jGraph(
@@ -53,136 +64,63 @@ pipe_asr = pipeline(
 # Function to reset the state after 10 seconds
 def auto_reset_state():
     time.sleep(2)
-    return None, ""  # Reset the state and clear input text
 # Function to process audio input and transcribe it
-def transcribe_function(stream, new_chunk):
     try:
         sr, y = new_chunk[0], new_chunk[1]
     except TypeError:
         print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
-        return stream, "", None
-    # Ensure y is not empty and is at least 1-dimensional
     if y is None or len(y) == 0:
-        return stream, "", None
     y = y.astype(np.float32)
     max_abs_y = np.max(np.abs(y))
     if max_abs_y > 0:
         y = y / max_abs_y
-    # Ensure stream is also at least 1-dimensional before concatenation
-    if stream is not None and len(stream) > 0:
-        stream = np.concatenate([stream, y])
     else:
-        stream = y
-    # Process the audio data for transcription
-    result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
     full_text = result.get("text", "")
-    # Start a thread to reset the state after 10 seconds
     threading.Thread(target=auto_reset_state).start()
-    return stream, full_text, full_text
-# Function to generate a full-text search query for Neo4j
-#def generate_full_text_query(input: str) -> str:
-    #full_text_query = ""
-    #words = [el for el in input.split() if el]
-    #for word in words[:-1]:
-        #full_text_query += f" {word}~2 AND"
-    #full_text_query += f" {words[-1]}~2"
-    #return full_text_query.strip()
-# Function to generate a full-text search query for Neo4j
-def generate_full_text_query(input: str) -> str:
-    # Split the input into words, ignoring any empty strings
-    words = [el for el in input.split() if el]
-    # Check if there are no words
-    if not words:
-        return ""  # Return an empty string or a default query if desired
-    # Create the full-text query with fuzziness (~2 for proximity search)
-    full_text_query = ""
-    for word in words[:-1]:
-        full_text_query += f" {word}~2 AND"
-    full_text_query += f" {words[-1]}~2"
-    return full_text_query.strip()
 # Function to generate audio with Eleven Labs TTS
 def generate_audio_elevenlabs(text):
     XI_API_KEY = os.environ['ELEVENLABS_API']
     VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
     tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
-    headers = {
-        "Accept": "application/json",
-        "xi-api-key": XI_API_KEY
-    }
-    data = {
-        "text": str(text),
-        "model_id": "eleven_multilingual_v2",
-        "voice_settings": {
-            "stability": 1.0,
-            "similarity_boost": 0.0,
-            "style": 0.60,
-            "use_speaker_boost": False
-        }
-    }
     response = requests.post(tts_url, headers=headers, json=data, stream=True)
     if response.ok:
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
             for chunk in response.iter_content(chunk_size=1024):
-                if chunk:
-                    f.write(chunk)
-            audio_path = f.name
-        return audio_path  # Return audio path for automatic playback
     else:
         print(f"Error generating audio: {response.text}")
         return None
-# Define the template for generating responses based on context
-template = """I am a guide for Birmingham, Alabama. I can provide recommendations and insights about the city, including events and activities.
-Ask your question directly, and I'll provide a precise and quick,short and crisp response in a conversational and straight-foreward way without any Greet.
-Context:
-{context}
-Question: {question}
-Answer concisely:"""
-# Create a prompt object using the template
-prompt = ChatPromptTemplate.from_template(template)
-# Function to generate a response using the prompt and the context
-def generate_response_with_prompt(context, question):
-    formatted_prompt = prompt.format(
-        context=context,
-        question=question
-    )
-    # Use the ChatOpenAI instance to generate a response directly from the formatted prompt
-    llm = ChatOpenAI(temperature=0, api_key=os.environ['OPENAI_API_KEY'])
-    response = llm(formatted_prompt)
-    return response.content.strip()
-# Function to reset the state
-def reset_state():
-    return None, ""  # Reset the state and clear input text
-# Define the function to generate a hybrid response using Neo4j and other retrieval methods
 def retriever(question: str):
-    # Structured data retrieval from Neo4j
-    structured_query = f"""
-    CALL db.index.fulltext.queryNodes('entity', $query, {{limit: 2}})
     YIELD node, score
     RETURN node.id AS entity, node.text AS context, score
     ORDER BY score DESC
@@ -191,44 +129,27 @@ def retriever(question: str):
     structured_data = graph.query(structured_query, {"query": generate_full_text_query(question)})
     structured_response = "\n".join([f"{record['entity']}: {record['context']}" for record in structured_data])
-    # Unstructured data retrieval from vector store
     unstructured_data = [el.page_content for el in vector_index.similarity_search(question)]
     unstructured_response = "\n".join(unstructured_data)
-    # Combine structured and unstructured responses
     combined_context = f"Structured data:\n{structured_response}\n\nUnstructured data:\n{unstructured_response}"
-    # Generate the final response using the prompt template
-    final_response = generate_response_with_prompt(combined_context, question)
-    return final_response
 # Function to handle the entire audio query and response process
-def process_audio_query(audio_input):
-    stream = None
-    _, transcription, _ = transcribe_function(stream, audio_input)
-    print(f"Transcription: {transcription}")
-    # Retrieve hybrid response using Neo4j and other methods
     response_text = retriever(transcription)
-    print(f"Response: {response_text}")
-    # Generate audio from the response text
     audio_path = generate_audio_elevenlabs(response_text)
-    return audio_path
-# Function to handle submit button click
-def on_submit(audio_input):
-    return process_audio_query(audio_input)
-# Create Gradio interface for audio input, submit button, and output
 with gr.Blocks() as interface:
-    audio_input = gr.Audio(sources="microphone", type="numpy", streaming=True,every=0.1)
     submit_button = gr.Button("Submit")
     audio_output = gr.Audio(type="filepath", autoplay=True)
-    submit_button.click(fn=on_submit, inputs=audio_input, outputs=audio_output)
 # Launch the Gradio app
 interface.launch()

 from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 from langchain_community.vectorstores import Neo4jVector
 from langchain_community.graphs import Neo4jGraph
 from langchain_core.prompts import ChatPromptTemplate
 import time
 import os
+import io
+from pydub import AudioSegment
+from dataclasses import dataclass
+# Define AppState dataclass for managing the application's state
+@dataclass
+class AppState:
+    stream: np.ndarray | None = None
+    sampling_rate: int = 0
+    pause_detected: bool = False
+    stopped: bool = False
+    conversation: list = []
 # Neo4j setup
 graph = Neo4jGraph(
 # Function to reset the state after 10 seconds
 def auto_reset_state():
     time.sleep(2)
+    return AppState()  # Reset the state
 # Function to process audio input and transcribe it
+def transcribe_function(state: AppState, new_chunk):
     try:
         sr, y = new_chunk[0], new_chunk[1]
     except TypeError:
         print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
+        return state, ""
     if y is None or len(y) == 0:
+        return state, ""
     y = y.astype(np.float32)
     max_abs_y = np.max(np.abs(y))
     if max_abs_y > 0:
         y = y / max_abs_y
+    if state.stream is not None and len(state.stream) > 0:
+        state.stream = np.concatenate([state.stream, y])
     else:
+        state.stream = y
+    result = pipe_asr({"array": state.stream, "sampling_rate": sr}, return_timestamps=False)
     full_text = result.get("text", "")
     threading.Thread(target=auto_reset_state).start()
+    return state, full_text
+# Function to generate a response using the prompt and the context
+def generate_response_with_prompt(context, question):
+    formatted_prompt = prompt.format(context=context, question=question)
+    llm = ChatOpenAI(temperature=0, api_key=os.environ['OPENAI_API_KEY'])
+    response = llm(formatted_prompt)
+    return response.content.strip()
 # Function to generate audio with Eleven Labs TTS
 def generate_audio_elevenlabs(text):
     XI_API_KEY = os.environ['ELEVENLABS_API']
     VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
     tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
+    headers = {"Accept": "application/json", "xi-api-key": XI_API_KEY}
+    data = {"text": text, "model_id": "eleven_multilingual_v2", "voice_settings": {"stability": 1.0}}
     response = requests.post(tts_url, headers=headers, json=data, stream=True)
     if response.ok:
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
             for chunk in response.iter_content(chunk_size=1024):
+                f.write(chunk)
+            return f.name
     else:
         print(f"Error generating audio: {response.text}")
         return None
+# Define the function to retrieve information using Neo4j and the vector store
 def retriever(question: str):
+    structured_query = """
+    CALL db.index.fulltext.queryNodes('entity', $query, {limit: 2})
     YIELD node, score
     RETURN node.id AS entity, node.text AS context, score
     ORDER BY score DESC
     structured_data = graph.query(structured_query, {"query": generate_full_text_query(question)})
     structured_response = "\n".join([f"{record['entity']}: {record['context']}" for record in structured_data])
     unstructured_data = [el.page_content for el in vector_index.similarity_search(question)]
     unstructured_response = "\n".join(unstructured_data)
     combined_context = f"Structured data:\n{structured_response}\n\nUnstructured data:\n{unstructured_response}"
+    return generate_response_with_prompt(combined_context, question)
 # Function to handle the entire audio query and response process
+def process_audio_query(state: AppState, audio_input):
+    state, transcription = transcribe_function(state, audio_input)
     response_text = retriever(transcription)
     audio_path = generate_audio_elevenlabs(response_text)
+    return audio_path, state
+# Create Gradio interface for audio input and output
 with gr.Blocks() as interface:
+    audio_input = gr.Audio(sources="microphone", type="numpy", streaming=True, every=0.1)
     submit_button = gr.Button("Submit")
     audio_output = gr.Audio(type="filepath", autoplay=True)
+    state = gr.State(AppState())
+    submit_button.click(fn=process_audio_query, inputs=[state, audio_input], outputs=[audio_output, state])
 # Launch the Gradio app
 interface.launch()