radarbackend11262024v11

Runtime error

App Files Files Community

Pijush2023 commited on Oct 29, 2024

Commit

192447d

verified ·

1 Parent(s): feb7a13

Update app.py

Browse files

Files changed (1) hide show

app.py +308 -190

app.py CHANGED Viewed

@@ -1,68 +1,260 @@
 import gradio as gr
-import torch
 import requests
 import tempfile
-import threading
-import numpy as np
-from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
-from langchain_openai import ChatOpenAI, OpenAIEmbeddings
-from langchain_community.vectorstores import Neo4jVector
-from langchain_community.graphs import Neo4jGraph
-from langchain_experimental.graph_transformers import LLMGraphTransformer
-from langchain_core.prompts import ChatPromptTemplate
 import time
-import os
-from dataclasses import dataclass, field
-@dataclass
-class AppState:
-    stream: np.ndarray | None = None
-    sampling_rate: int = 0
-    pause_detected: bool = False
-    started_talking: bool =  False
-    stopped: bool = False
-    conversation: list = field(default_factory=list)
-def determine_pause(audio: np.ndarray, sampling_rate: int, state: AppState) -> bool:
-    """Take in the stream, determine if a pause happened"""
-    temp_audio = audio
-    dur_vad, _, time_vad = run_vad(temp_audio, sampling_rate)
-    duration = len(audio) / sampling_rate
-    if dur_vad > 0.5 and not state.started_talking:
-        print("started talking")
-        state.started_talking = True
-        return False
-    print(f"duration_after_vad: {dur_vad:.3f} s, time_vad: {time_vad:.3f} s")
-    return (duration - dur_vad) > 1
-def start_recording_user(state: AppState):
-    if not state.stopped:
-        return gr.Audio(recording=True)
-# Neo4j setup
-graph = Neo4jGraph(
-    url="neo4j+s://c62d0d35.databases.neo4j.io",
-    username="neo4j",
-    password="_x8f-_aAQvs2NB0x6s0ZHSh3W_y-HrENDbgStvsUCM0"
 )
-# Initialize the vector index with Neo4j
-vector_index = Neo4jVector.from_existing_graph(
-    OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY']),
-    graph=graph,
-    search_type="hybrid",
-    node_label="Document",
-    text_node_properties=["text"],
-    embedding_node_property="embedding",
 )
 # Define the ASR model with Whisper
 model_id = 'openai/whisper-large-v3'
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -83,13 +275,12 @@ pipe_asr = pipeline(
     return_timestamps=True
 )
-# Function to reset the state after 10 seconds
 def auto_reset_state():
-    time.sleep(2)
     return None, ""  # Reset the state and clear input text
-# Function to process audio input and transcribe it
 def transcribe_function(stream, new_chunk):
     try:
         sr, y = new_chunk[0], new_chunk[1]
@@ -97,6 +288,7 @@ def transcribe_function(stream, new_chunk):
         print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
         return stream, "", None
     if y is None or len(y) == 0:
         return stream, "", None
@@ -105,168 +297,94 @@ def transcribe_function(stream, new_chunk):
     if max_abs_y > 0:
         y = y / max_abs_y
     if stream is not None and len(stream) > 0:
         stream = np.concatenate([stream, y])
     else:
         stream = y
     result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
     full_text = result.get("text", "")
     threading.Thread(target=auto_reset_state).start()
-    return stream, full_text, full_text
-# Function to generate a full-text search query for Neo4j
-#def generate_full_text_query(input: str) -> str:
-    #full_text_query = ""
-    #words = [el for el in input.split() if el]
-    #for word in words[:-1]:
-        #full_text_query += f" {word}~2 AND"
-    #full_text_query += f" {words[-1]}~2"
-    #return full_text_query.strip()
-# Function to generate a full-text search query for Neo4j
-def generate_full_text_query(input: str) -> str:
-    # Split the input into words, ignoring any empty strings
-    words = [el for el in input.split() if el]
-    # Check if there are no words
-    if not words:
-        return ""  # Return an empty string or a default query if desired
-    # Create the full-text query with fuzziness (~2 for proximity search)
-    full_text_query = ""
-    for word in words[:-1]:
-        full_text_query += f" {word}~2 AND"
-    full_text_query += f" {words[-1]}~2"
-    return full_text_query.strip()
-# Function to generate audio with Eleven Labs TTS
-def generate_audio_elevenlabs(text):
-    XI_API_KEY = os.environ['ELEVENLABS_API']
-    VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
-    tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
-    headers = {
-        "Accept": "application/json",
-        "xi-api-key": XI_API_KEY
-    }
-    data = {
-        "text": str(text),
-        "model_id": "eleven_multilingual_v2",
-        "voice_settings": {
-            "stability": 1.0,
-            "similarity_boost": 0.0,
-            "style": 0.60,
-            "use_speaker_boost": False
-        }
-    }
-    response = requests.post(tts_url, headers=headers, json=data, stream=True)
-    if response.ok:
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
-            for chunk in response.iter_content(chunk_size=1024):
-                if chunk:
-                    f.write(chunk)
-            audio_path = f.name
-        return audio_path  # Return audio path for automatic playback
-    else:
-        print(f"Error generating audio: {response.text}")
-        return None
-# Define the template for generating responses based on context
-template = """I am a guide for Birmingham, Alabama. I can provide recommendations and insights about the city, including events and activities.
-Ask your question directly, and I'll provide a precise and quick,short and crisp response in a conversational and straight-foreward way without any Greet.
-Context:
-{context}
-Question: {question}
-Answer concisely:"""
-# Create a prompt object using the template
-prompt = ChatPromptTemplate.from_template(template)
-# Function to generate a response using the prompt and the context
-def generate_response_with_prompt(context, question):
-    formatted_prompt = prompt.format(
-        context=context,
-        question=question
-    )
-    # Use the ChatOpenAI instance to generate a response directly from the formatted prompt
-    llm = ChatOpenAI(temperature=0, api_key=os.environ['OPENAI_API_KEY'])
-    response = llm(formatted_prompt)
-    return response.content.strip()
-# Define the function to generate a hybrid response using Neo4j and other retrieval methods
-def retriever(question: str):
-    # Structured data retrieval from Neo4j
-    structured_query = f"""
-    CALL db.index.fulltext.queryNodes('entity', $query, {{limit: 2}})
-    YIELD node, score
-    RETURN node.id AS entity, node.text AS context, score
-    ORDER BY score DESC
-    LIMIT 2
-    """
-    structured_data = graph.query(structured_query, {"query": generate_full_text_query(question)})
-    structured_response = "\n".join([f"{record['entity']}: {record['context']}" for record in structured_data])
-    # Unstructured data retrieval from vector store
-    unstructured_data = [el.page_content for el in vector_index.similarity_search(question)]
-    unstructured_response = "\n".join(unstructured_data)
-    # Combine structured and unstructured responses
-    combined_context = f"Structured data:\n{structured_response}\n\nUnstructured data:\n{unstructured_response}"
-    # Generate the final response using the prompt template
-    final_response = generate_response_with_prompt(combined_context, question)
-    return final_response
-def process_audio_query(audio_input, state):
-    stream = None
-    _, transcription, _ = transcribe_function(stream, audio_input)
-    if not transcription.strip():
-        print("No valid transcription detected.")
-        return None, state  # Avoid generating a response for empty transcriptions
-    # Retrieve a response based on the transcription
-    response_text = retriever(transcription)
-    print(f"Response: {response_text}")
-    # Generate audio from the response text
-    audio_path = generate_audio_elevenlabs(response_text)
-    # Update the conversation history in the state
-    state.conversation.append((transcription, response_text))
-    return audio_path, state
-with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
-            input_audio = gr.Audio(label="Input Audio", sources="microphone", type="numpy")
         with gr.Column():
-            output_audio = gr.Audio(label="Output Audio", streaming=True, autoplay=True)
-    state = gr.State(value=AppState())
-    stream = input_audio.stream(
-        process_audio_query,
-        [input_audio, state],
-        [output_audio, state],
-        every=0.50
     )
-    restart = output_audio.stop(
-        start_recording_user,
-        [state],
-        [input_audio]
     )
-    cancel = gr.Button("Stop Conversation", variant="stop")
-    cancel.click(lambda: (AppState(stopped=True), gr.Audio(recording=False)), None,
-                 [state, input_audio], cancels=[stream, restart])
-    demo.launch()

 import gradio as gr
+import os
+import logging
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain_openai import ChatOpenAI
+from langchain_community.graphs import Neo4jGraph
+from typing import List, Tuple
+from pydantic import BaseModel, Field
+from langchain_core.messages import AIMessage, HumanMessage
+from langchain_core.runnables import (
+    RunnableBranch,
+    RunnableLambda,
+    RunnablePassthrough,
+    RunnableParallel,
+)
+from langchain_core.prompts.prompt import PromptTemplate
 import requests
 import tempfile
+from langchain.memory import ConversationBufferWindowMemory
 import time
+import logging
+from langchain.chains import ConversationChain
+import torch
+import torchaudio
+from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
+import numpy as np
+import threading
+#code for history
+conversational_memory = ConversationBufferWindowMemory(
+        memory_key='chat_history',
+        k=10,
+        return_messages=True
+    )
+# Setup Neo4j
+graph = Neo4jGraph(
+    url="neo4j+s://c62d0d35.databases.neo4j.io",
+    username="neo4j",
+    password="_x8f-_aAQvs2NB0x6s0ZHSh3W_y-HrENDbgStvsUCM0"
+)
+# Define entity extraction and retrieval functions
+class Entities(BaseModel):
+    names: List[str] = Field(
+        ..., description="All the person, organization, or business entities that appear in the text"
+    )
+entity_prompt = ChatPromptTemplate.from_messages([
+    ("system", "You are extracting organization and person entities from the text."),
+    ("human", "Use the given format to extract information from the following input: {question}"),
+])
+chat_model = ChatOpenAI(temperature=0, model_name="gpt-4o", api_key=os.environ['OPENAI_API_KEY'])
+entity_chain = entity_prompt | chat_model.with_structured_output(Entities)
+def remove_lucene_chars(input: str) -> str:
+    return input.translate(str.maketrans({
+        "\\": r"\\", "+": r"\+", "-": r"\-", "&": r"\&", "|": r"\|", "!": r"\!",
+        "(": r"\(", ")": r"\)", "{": r"\{", "}": r"\}", "[": r"\[", "]": r"\]",
+        "^": r"\^", "~": r"\~", "*": r"\*", "?": r"\?", ":": r"\:", '"': r'\"',
+        ";": r"\;", " ": r"\ "
+    }))
+def generate_full_text_query(input: str) -> str:
+    full_text_query = ""
+    words = [el for el in remove_lucene_chars(input).split() if el]
+    for word in words[:-1]:
+        full_text_query += f" {word}~2 AND"
+    full_text_query += f" {words[-1]}~2"
+    return full_text_query.strip()
+# Setup logging to a file to capture debug information
+logging.basicConfig(filename='neo4j_retrieval.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
+def structured_retriever(question: str) -> str:
+    result = ""
+    entities = entity_chain.invoke({"question": question})
+    for entity in entities.names:
+        response = graph.query(
+            """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
+            YIELD node,score
+            CALL {
+              WITH node
+              MATCH (node)-[r:!MENTIONS]->(neighbor)
+              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
+              UNION ALL
+              WITH node
+              MATCH (node)<-[r:!MENTIONS]-(neighbor)
+              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
+            }
+            RETURN output LIMIT 50
+            """,
+            {"query": generate_full_text_query(entity)},
+        )
+        result += "\n".join([el['output'] for el in response])
+    return result
+def retriever_neo4j(question: str):
+    structured_data = structured_retriever(question)
+    logging.debug(f"Structured data: {structured_data}")
+    return structured_data
+# Setup for condensing the follow-up questions
+_template = """Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question,
+in its original language.
+Chat History:
+{chat_history}
+Follow Up Input: {question}
+Standalone question:"""
+CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
+def _format_chat_history(chat_history: list[tuple[str, str]]) -> list:
+    buffer = []
+    for human, ai in chat_history:
+        buffer.append(HumanMessage(content=human))
+        buffer.append(AIMessage(content=ai))
+    return buffer
+_search_query = RunnableBranch(
+    (
+        RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
+            run_name="HasChatHistoryCheck"
+        ),
+        RunnablePassthrough.assign(
+            chat_history=lambda x: _format_chat_history(x["chat_history"])
+        )
+        | CONDENSE_QUESTION_PROMPT
+        | ChatOpenAI(temperature=0, api_key=os.environ['OPENAI_API_KEY'])
+        | StrOutputParser(),
+    ),
+    RunnableLambda(lambda x: x["question"]),
 )
+template = """I am a guide for Birmingham, Alabama. I can provide recommendations and insights about the city, including events and activities.
+Ask your question directly, and I'll provide a precise and quick,short and crisp response in a conversational way without any Greet.
+{context}
+Question: {question}
+Answer:"""
+qa_prompt = ChatPromptTemplate.from_template(template)
+# Define the chain for Neo4j-based retrieval and response generation
+chain_neo4j = (
+    RunnableParallel(
+        {
+            "context": _search_query | retriever_neo4j,
+            "question": RunnablePassthrough(),
+        }
+    )
+    | qa_prompt
+    | chat_model
+    | StrOutputParser()
 )
+# Define the function to get the response
+def get_response(question):
+    try:
+        return chain_neo4j.invoke({"question": question})
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Define the function to clear input and output
+def clear_fields():
+    return [],"",None
+# Function to generate audio with Eleven Labs TTS
+def generate_audio_elevenlabs(text):
+    XI_API_KEY = os.environ['ELEVENLABS_API']
+    VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
+    tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
+    headers = {
+        "Accept": "application/json",
+        "xi-api-key": XI_API_KEY
+    }
+    data = {
+        "text": str(text),
+        "model_id": "eleven_multilingual_v2",
+        "voice_settings": {
+            "stability": 1.0,
+            "similarity_boost": 0.0,
+            "style": 0.60,
+            "use_speaker_boost": False
+        }
+    }
+    response = requests.post(tts_url, headers=headers, json=data, stream=True)
+    if response.ok:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
+            for chunk in response.iter_content(chunk_size=1024):
+                if chunk:
+                    f.write(chunk)
+            audio_path = f.name
+        logging.debug(f"Audio saved to {audio_path}")
+        return audio_path  # Return audio path for automatic playback
+    else:
+        logging.error(f"Error generating audio: {response.text}")
+        return None
+def handle_mode_selection(mode, chat_history, question):
+    if mode == "Normal Chatbot":
+        # Append the user's question to chat history first
+        chat_history.append((question, ""))  # Placeholder for the bot's response
+        # Stream the response and update chat history with each chunk
+        for response_chunk in chat_with_bot(chat_history):
+                chat_history[-1] = (question, response_chunk[-1][1])  # Update last entry with streamed response
+                yield chat_history, "", None  # Stream each chunk to display in the chatbot
+        yield chat_history, "", None  # Final yield to complete the response
+    elif mode == "Voice to Voice Conversation":
+        # Voice to Voice mode: Stream the response text and then convert it to audio
+        response_text = get_response(question)  # Retrieve response text
+        audio_path = generate_audio_elevenlabs(response_text)  # Convert response to audio
+        yield [], "", audio_path  # Only output the audio response without updating chatbot history
+# Function to add a user's message to the chat history and clear the input box
+def add_message(history, message):
+    if message.strip():
+        history.append((message, ""))  # Add the user's message to the chat history only if it's not empty
+    return history, ""  # Clear the input box
+# Define function to generate a streaming response
+def chat_with_bot(messages):
+    user_message = messages[-1][0]  # Get the last user message (input)
+    messages[-1] = (user_message, "")  # Prepare a placeholder for the bot's response
+    response = get_response(user_message)  # Assume `get_response` is a generator function
+    # Stream each character in the response and update the history progressively
+    for character in response:
+        messages[-1] = (user_message, messages[-1][1] + character)
+        yield messages  # Stream each updated chunk
+        time.sleep(0.05)  # Adjust delay as needed for real-time effect
+    yield messages  # Final yield to complete the response
+# Function to generate audio with Eleven Labs TTS from the last bot response
+def generate_audio_from_last_response(history):
+    # Get the most recent bot response from the chat history
+    if history and len(history) > 0:
+        recent_response = history[-1][1]  # The second item in the tuple is the bot response text
+        if recent_response:
+            return generate_audio_elevenlabs(recent_response)
+    return None
 # Define the ASR model with Whisper
 model_id = 'openai/whisper-large-v3'
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
     return_timestamps=True
 )
+# Define the function to reset the state after 10 seconds
 def auto_reset_state():
+    time.sleep(5)
     return None, ""  # Reset the state and clear input text
 def transcribe_function(stream, new_chunk):
     try:
         sr, y = new_chunk[0], new_chunk[1]
         print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
         return stream, "", None
+    # Ensure y is not empty and is at least 1-dimensional
     if y is None or len(y) == 0:
         return stream, "", None
     if max_abs_y > 0:
         y = y / max_abs_y
+    # Ensure stream is also at least 1-dimensional before concatenation
     if stream is not None and len(stream) > 0:
         stream = np.concatenate([stream, y])
     else:
         stream = y
+    # Process the audio data for transcription
     result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
     full_text = result.get("text", "")
+    # Start a thread to reset the state after 10 seconds
     threading.Thread(target=auto_reset_state).start()
+    return stream, full_text, full_text
+# Define the function to clear the state and input text
+def clear_transcription_state():
+    return None, ""
+with gr.Blocks(theme="rawrsor1/Everforest") as demo:
+    # Hide the chatbot component by setting `visible=False`
+    chatbot = gr.Chatbot([], elem_id="RADAR", bubble_full_width=False, visible=False)
+    with gr.Row():
+        with gr.Column():
+            # Hide the "Normal Chatbot" radio button by removing it or setting `visible=False`
+            mode_selection = gr.Radio(
+                choices=["Voice to Voice Conversation"],  # Removed "Normal Chatbot" option
+                label="Mode Selection",
+                value="Voice to Voice Conversation",
+                visible=False  # Hide the mode selection entirely
+            )
+    # Remaining code unchanged
+    with gr.Row():
+        with gr.Column():
+            question_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...",visible=False)
+            audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy', every=0.1, label="Speak to Ask")
+            submit_voice_btn = gr.Button("Submit Voice")
+        with gr.Column():
+            audio_output = gr.Audio(label="Audio", type="filepath", autoplay=True, interactive=False)
     with gr.Row():
         with gr.Column():
+            clear_state_btn = gr.Button("Clear State")
         with gr.Column():
+            clean_btn = gr.Button("Clean")
+    # Adjust the interactions for the Get Response button
+    submit_voice_btn.click(
+        fn=handle_mode_selection,
+        inputs=[mode_selection, chatbot, question_input],
+        outputs=[chatbot, question_input, audio_output],
+        api_name="api_voice_to_voice_translation"
     )
+    # Speech-to-Text functionality
+    state = gr.State()
+    audio_input.stream(
+        transcribe_function,
+        inputs=[state, audio_input],
+        outputs=[state, question_input],
+        api_name="api_voice_to_text"
+    )
+    clean_btn.click(
+        fn=clear_fields,
+        inputs=[],
+        outputs=[chatbot, question_input, audio_output],
+        api_name="api_clear_textbox"
     )
+    # Clear state interaction
+    clear_state_btn.click(
+        fn=clear_transcription_state,
+        outputs=[question_input, state],
+        api_name="api_clean_state_transcription"
+    )
+# Launch the Gradio interface
+demo.launch(show_error=True, share=True)