radarbackend11262024v11

Runtime error

App Files Files Community

Pijush2023 commited on Oct 23, 2024

Commit

8527f42

verified ·

1 Parent(s): f26ca23

Update app.py

Browse files

Files changed (1) hide show

app.py +129 -113

app.py CHANGED Viewed

@@ -1,20 +1,40 @@
 import gradio as gr
 import os
 import logging
-import requests
-import tempfile
-import torch
-import numpy as np
-from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
-from langchain_community.graphs import Neo4jGraph
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_openai import ChatOpenAI
 from pydantic import BaseModel, Field
-from typing import List
 import time
 import torchaudio
-# Neo4j Setup
 graph = Neo4jGraph(
     url="neo4j+s://6457770f.databases.neo4j.io",
     username="neo4j",
@@ -51,6 +71,9 @@ def generate_full_text_query(input: str) -> str:
     full_text_query += f" {words[-1]}~2"
     return full_text_query.strip()
 def structured_retriever(question: str) -> str:
     result = ""
     entities = entity_chain.invoke({"question": question})
@@ -74,9 +97,38 @@ def structured_retriever(question: str) -> str:
         result += "\n".join([el['output'] for el in response])
     return result
 # Function to generate audio with Eleven Labs TTS
 def generate_audio_elevenlabs(text):
-    XI_API_KEY = os.environ.get('ELEVENLABS_API')
     VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
     tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
     headers = {
@@ -93,118 +145,82 @@ def generate_audio_elevenlabs(text):
             "use_speaker_boost": False
         }
     }
-    try:
-        logging.debug(f"Sending request to Eleven Labs with text: {text[:100]}...")
-        response = requests.post(tts_url, headers=headers, json=data, stream=True)
-        if response.ok:
-            logging.debug("Received successful response from Eleven Labs API.")
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
-                for chunk in response.iter_content(chunk_size=1024):
-                    if chunk:
-                        f.write(chunk)
-                audio_path = f.name
-            logging.debug(f"Audio successfully saved to {audio_path}")
-            return audio_path
-        else:
-            logging.error(f"Error generating audio: {response.status_code} - {response.text}")
-            return None
-    except Exception as e:
-        logging.error(f"Exception during audio generation: {str(e)}")
         return None
-# Define the ASR model with Whisper
-model_id = 'openai/whisper-large-v3'
-device = "cuda:0" if torch.cuda.is_available() else "cpu"
-torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device)
-processor = AutoProcessor.from_pretrained(model_id)
-pipe_asr = pipeline(
-    "automatic-speech-recognition",
-    model=model,
-    tokenizer=processor.tokenizer,
-    feature_extractor=processor.feature_extractor,
-    max_new_tokens=128,
-    chunk_length_s=15,
-    batch_size=16,
-    torch_dtype=torch_dtype,
-    device=device,
-    return_timestamps=True
-)
-# Function to handle audio input, transcribe, fetch from Neo4j, and generate audio response
-def transcribe_and_respond(audio):
-    if audio is None:
-        logging.error("No audio provided.")
-        return None, "No audio provided."
-    sr, y = audio
-    y = np.array(y).astype(np.float32)
-    # Resample to 16kHz if needed
-    target_sr = 16000
-    if sr != target_sr:
-        logging.debug(f"Resampling audio from {sr} Hz to {target_sr} Hz.")
-        y = torchaudio.functional.resample(torch.tensor(y), orig_freq=sr, new_freq=target_sr).numpy()
-        sr = target_sr
-    # Transcribe the audio using Whisper with English language setting
-    result = pipe_asr({"raw": y, "sampling_rate": sr}, return_timestamps=False)
-    question = result.get("text", "")
-    # Log the transcribed text for debugging
-    logging.debug(f"Transcribed text: {question}")
-    # Retrieve information from Neo4j
-    response_text = structured_retriever(question) if question else "I didn't understand the question."
-    # Convert the response to audio using Eleven Labs TTS
-    audio_path = generate_audio_elevenlabs(response_text) if response_text else None
-    # Ensure a valid audio path is returned
-    if audio_path and os.path.exists(audio_path):
-        logging.debug(f"Generated audio file path: {audio_path}")
     else:
-        logging.error("Failed to generate audio or save audio to file.")
-        audio_path = None
-    return audio_path, response_text
-# Function to clear the transcription state
-def clear_transcription_state():
-    return None, None
-# Define the Gradio interface with only audio input and output
 with gr.Blocks(theme="rawrsor1/Everforest") as demo:
-    with gr.Row():
-        audio_input = gr.Audio(
-            sources=["microphone"],
-            type='numpy',
-            label="Speak to Ask"
-        )
-        audio_output = gr.Audio(
-            label="Audio Response",
-            type="filepath",
-            autoplay=True,
-            interactive=False
-        )
-    # Submit button to process the audio input
-    submit_btn = gr.Button("Submit")
-    submit_btn.click(
-        fn=transcribe_and_respond,
-        inputs=audio_input,
-        outputs=[audio_output, gr.Textbox(label="Transcription")]
     )
-    # Clear state interaction
-    gr.Button("Clear State").click(
-        fn=clear_transcription_state,
-        outputs=[audio_output, gr.Textbox(label="Transcription")],
-        api_name="api_clean_state"
     )
-# Launch the Gradio interface
-demo.launch(show_error=True, share=True)

 import gradio as gr
 import os
 import logging
 from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
 from langchain_openai import ChatOpenAI
+from langchain_community.graphs import Neo4jGraph
+from typing import List, Tuple
 from pydantic import BaseModel, Field
+from langchain_core.messages import AIMessage, HumanMessage
+from langchain_core.runnables import (
+    RunnableBranch,
+    RunnableLambda,
+    RunnablePassthrough,
+    RunnableParallel,
+)
+from langchain_core.prompts.prompt import PromptTemplate
+import requests
+import tempfile
+from langchain.memory import ConversationBufferWindowMemory
 import time
+import logging
+from langchain.chains import ConversationChain
+import torch
 import torchaudio
+from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
+import numpy as np
+import threading
+# Setup conversational memory
+conversational_memory = ConversationBufferWindowMemory(
+    memory_key='chat_history',
+    k=10,
+    return_messages=True
+)
+# Setup Neo4j connection
 graph = Neo4jGraph(
     url="neo4j+s://6457770f.databases.neo4j.io",
     username="neo4j",
     full_text_query += f" {words[-1]}~2"
     return full_text_query.strip()
+# Setup logging to a file to capture debug information
+logging.basicConfig(filename='neo4j_retrieval.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
 def structured_retriever(question: str) -> str:
     result = ""
     entities = entity_chain.invoke({"question": question})
         result += "\n".join([el['output'] for el in response])
     return result
+def retriever_neo4j(question: str):
+    structured_data = structured_retriever(question)
+    logging.debug(f"Structured data: {structured_data}")
+    return structured_data
+# Define the chain for Neo4j-based retrieval and response generation
+chain_neo4j = (
+    RunnableParallel(
+        {
+            "context": RunnableLambda(lambda x: retriever_neo4j(x["question"])),
+            "question": RunnablePassthrough(),
+        }
+    )
+    | ChatPromptTemplate.from_template("Answer: {context} Question: {question}")
+    | chat_model
+    | StrOutputParser()
+)
+# Define the function to get the response
+def get_response(question):
+    try:
+        return chain_neo4j.invoke({"question": question})
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Define the function to clear input and output
+def clear_fields():
+    return [], "", None
 # Function to generate audio with Eleven Labs TTS
 def generate_audio_elevenlabs(text):
+    XI_API_KEY = os.environ['ELEVENLABS_API']
     VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
     tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
     headers = {
             "use_speaker_boost": False
         }
     }
+    response = requests.post(tts_url, headers=headers, json=data, stream=True)
+    if response.ok:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
+            for chunk in response.iter_content(chunk_size=1024):
+                if chunk:
+                    f.write(chunk)
+            audio_path = f.name
+        logging.debug(f"Audio saved to {audio_path}")
+        return audio_path  # Return audio path for automatic playback
+    else:
+        logging.error(f"Error generating audio: {response.text}")
         return None
+# Function to handle voice to voice conversation
+def handle_voice_to_voice(chat_history, question):
+    response = get_response(question)
+    audio_path = generate_audio_elevenlabs(response)
+    chat_history.append(("[Voice Input]", "[Voice Response]"))
+    return chat_history, "", audio_path
+# Function to transcribe audio input
+def transcribe_function(stream, new_chunk):
+    try:
+        sr, y = new_chunk[0], new_chunk[1]
+    except TypeError:
+        print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
+        return stream, "", None
+    if y is None or len(y) == 0:
+        return stream, "", None
+    y = y.astype(np.float32)
+    max_abs_y = np.max(np.abs(y))
+    if max_abs_y > 0:
+        y = y / max_abs_y
+    if stream is not None and len(stream) > 0:
+        stream = np.concatenate([stream, y])
     else:
+        stream = y
+    result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
+    full_text = result.get("text", "")
+    threading.Thread(target=auto_reset_state).start()
+    return stream, full_text, full_text
+# Define the Gradio interface
 with gr.Blocks(theme="rawrsor1/Everforest") as demo:
+    chatbot = gr.Chatbot([], elem_id="RADAR", bubble_full_width=False)
+    mode_selection = gr.Radio(
+        choices=["Normal Chatbot", "Voice to Voice Conversation"],
+        label="Mode Selection",
+        value="Normal Chatbot"
+    )
+    question_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...")
+    audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy', every=0.1, label="Speak to Ask")
+    submit_voice_btn = gr.Button("Submit Voice")
+    audio_output = gr.Audio(label="Audio", type="filepath", autoplay=True, interactive=False)
+    # Interactions for Submit Voice Button
+    submit_voice_btn.click(
+        fn=handle_voice_to_voice,
+        inputs=[chatbot, question_input],
+        outputs=[chatbot, question_input, audio_output],
+        api_name="api_voice_to_voice_translation"
     )
+    # Speech-to-Text functionality
+    state = gr.State()
+    audio_input.stream(
+        transcribe_function,
+        inputs=[state, audio_input],
+        outputs=[state, question_input],
+        api_name="api_voice_to_text"
     )
+    demo.launch(show_error=True, share=True)