Spaces:
Runtime error
Runtime error
import gradio as gr | |
import os | |
import logging | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_openai import ChatOpenAI | |
from langchain_core.messages import AIMessage, HumanMessage | |
from langchain_core.runnables import ( | |
RunnableBranch, | |
RunnableLambda, | |
RunnablePassthrough, | |
RunnableParallel, | |
) | |
from langchain_core.prompts.prompt import PromptTemplate | |
import requests | |
import tempfile | |
from langchain.memory import ConversationBufferWindowMemory | |
import time | |
import logging | |
from langchain.chains import ConversationChain | |
import torch | |
import torchaudio | |
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor | |
import numpy as np | |
import threading | |
from langchain_openai import OpenAIEmbeddings | |
from langchain_pinecone import PineconeVectorStore | |
from langchain.chains import RetrievalQA | |
import asyncio | |
import warnings | |
from langchain.globals import set_llm_cache | |
from langchain_openai import OpenAI | |
from langchain_community.cache import InMemoryCache | |
from langchain.globals import set_llm_cache | |
# Suppress warnings from LangChain specifically | |
warnings.filterwarnings("ignore", module="langchain") | |
# Initialize and set the cache | |
set_llm_cache(InMemoryCache()) | |
#model='gpt-3.5-turbo' | |
model='gpt-4o-mini' | |
#index_name ="radardata11122024" | |
#index_name="radarclintcountrymusic11152024" | |
index_name="radarmasterdataset11252024" | |
embeddings = OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY']) | |
def initialize_gpt_model(): | |
return ChatOpenAI(api_key=os.environ['OPENAI_API_KEY'], temperature=0, model=model) | |
gpt_model = initialize_gpt_model() | |
gpt_embeddings = OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY']) | |
gpt_vectorstore = PineconeVectorStore(index_name=index_name, embedding=gpt_embeddings) | |
gpt_retriever = gpt_vectorstore.as_retriever(search_kwargs={'k': 1}) | |
# Pinecone setup | |
from pinecone import Pinecone | |
pc = Pinecone(api_key=os.environ['PINECONE_API_KEY']) | |
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings) | |
retriever = vectorstore.as_retriever(search_kwargs={'k': 1}) | |
chat_model = ChatOpenAI(api_key=os.environ['OPENAI_API_KEY'], temperature=0, model=model) | |
#code for history | |
memory = ConversationBufferWindowMemory(k=3) | |
conversation=ConversationChain( | |
llm=chat_model, | |
memory=memory, | |
verbose=True | |
) | |
# template =f"""Hello there! As your friendly and knowledgeable guide here in Birmingham, Alabama.Give the short ,precise,crisp and straight-foreward response of maximum 2 sentences and dont greet. | |
# {{context}} | |
# Question: {{question}} | |
# Helpful Answer:""" | |
#template = f"""You are Annie, a friendly and intelligent media personality created by Amit Lamba. You are friendly and knowledgeable guide here in Birmingham, Alabama. | |
#Your primary goal is to provide accurate, detailed, and engaging content while maintaining a positive and upbeat tone. You are entertaining and provide information like rick steve or anthony bourdain does | |
#Always aim to provide clear and relevant information that directly addresses the user's query, but feel free to sprinkle in a dash of humor—after all, do not repeat the prompt to the user, they are aware. | |
#Go deep and sell our partners business and services.These partners will be highlighted in the context. This is very important to stress. Only provide other vendors if we do not have | |
#them as our preferred customer. | |
#laughter is the best app! Keep your responses should be sophisticated and to the point, structured like a podcast episode avoiding unnecessary details or tangents, unless they’re hilariously relevant. | |
#Use a friendly and approachable tone to create a pleasant interaction, and don’t shy away from a cheeky pun or two! Tailor your responses based | |
#on the user's input and previous interactions, ensuring a personalized experience that feels like chatting with a witty friend. They should sound like a radio jockey or podcast | |
#Invite users to ask follow-up questions or clarify their needs, fostering a conversational flow that’s as smooth as butter on a hot pancake. | |
#Aim to put a smile on the user's face with light-hearted and fun responses, and be proactive in offering additional help or suggestions related to the user's query. | |
#Remember, your goal is to be the go-to insights for users, making their experience enjoyable and informative—like a delightful dessert after a hearty meal! | |
#{{context}} | |
#Question: {{question}} | |
#Helpful Answer:""" | |
#template = f"""You are Annie, a warm, witty media personality created by Amit Lamba, serving as a knowledgeable guide in Birmingham, Alabama. | |
#Your goal is to deliver accurate, engaging responses with a positive, upbeat tone, similar to Rick Steves or Anthony Bourdain. | |
#Prioritize clarity and relevance, with a touch of humor. Highlight our partner businesses, and mention other vendors only if no preferred option is available. | |
#Keep responses concise, like a podcast episode, with a friendly, conversational tone. Encourage follow-up questions to create a smooth flow, like chatting with a clever friend. | |
#Make users smile with light-hearted responses, adding helpful insights as needed. Your goal? Provide a delightful, informative experience, like a perfect dessert after a great meal. | |
#{{context}} | |
#Question: {{question}} | |
#Helpful Answer:""" | |
template = f"""You’re Annie, a charming media personality created by Amit Lamba, guiding people around Birmingham, Alabama. Like Rick Steves or Anthony Bourdain, you’re here to give friendly, concise, and accurate advice with a dash of humor. | |
Keep answers relevant, upbeat, and engaging, spotlighting partner businesses whenever possible. Be conversational, like chatting with a clever friend, and encourage follow-up questions to create a smooth flow. Make users smile and deliver a delightful, informative experience—like a perfect dessert after a great meal. | |
{{context}} | |
Question: {{question}} | |
Helpful Answer:""" | |
QA_CHAIN_PROMPT= PromptTemplate(input_variables=["context", "question"], template=template) | |
def build_qa_chain(prompt_template): | |
qa_chain = RetrievalQA.from_chain_type( | |
llm=chat_model, | |
chain_type="stuff", | |
retriever=retriever, | |
chain_type_kwargs={"prompt": prompt_template} | |
) | |
return qa_chain # Return the qa_chain object | |
# Instantiate the QA Chain using the defined prompt template | |
qa_chain = build_qa_chain(QA_CHAIN_PROMPT) | |
# Define the function to clear input and output | |
def clear_fields(): | |
return [],"",None | |
# Function to generate audio with Eleven Labs TTS | |
def generate_audio_elevenlabs(text): | |
XI_API_KEY = os.environ['ELEVENLABS_API'] | |
VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW' | |
tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream" | |
headers = { | |
"Accept": "application/json", | |
"xi-api-key": XI_API_KEY | |
} | |
data = { | |
"text": str(text), | |
"model_id": "eleven_multilingual_v2", | |
"voice_settings": { | |
"stability": 1.0, | |
"similarity_boost": 0.0, | |
"style": 0.60, | |
"use_speaker_boost": False | |
} | |
} | |
response = requests.post(tts_url, headers=headers, json=data, stream=True) | |
if response.ok: | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f: | |
for chunk in response.iter_content(chunk_size=1024): | |
if chunk: | |
f.write(chunk) | |
audio_path = f.name | |
logging.debug(f"Audio saved to {audio_path}") | |
return audio_path # Return audio path for automatic playback | |
else: | |
logging.error(f"Error generating audio: {response.text}") | |
return None | |
import time | |
def handle_mode_selection(mode, chat_history, question): | |
if mode == "Normal Chatbot": | |
# Use memory to store history | |
memory.save_context({"input": question}, {"output": ""}) | |
chat_history.append((question, "")) # Add user's question | |
# Get the context from memory | |
context = memory.load_memory_variables({}).get("history", "") | |
# Use QA chain to get the response | |
response = qa_chain.invoke({"query": question, "context": context}) | |
response_text = response['result'] | |
# Update memory with the bot's response | |
memory.save_context({"input": question}, {"output": response_text}) | |
# Stream each character in the response text | |
for i, char in enumerate(response_text): | |
chat_history[-1] = (question, chat_history[-1][1] + char) | |
yield chat_history, "", None | |
time.sleep(0.05) # Simulate streaming | |
yield chat_history, "", None | |
elif mode == "Voice to Voice Conversation": | |
response_text = qa_chain({"query": question, "context": ""})['result'] | |
audio_path = generate_audio_elevenlabs(response_text) | |
yield [], "", audio_path # Only output the audio response without updating chatbot history | |
# Function to add a user's message to the chat history and clear the input box | |
def add_message(history, message): | |
if message.strip(): | |
history.append((message, "")) # Add the user's message to the chat history only if it's not empty | |
return history, "" # Clear the input box | |
# Define function to generate a streaming response | |
def chat_with_bot(messages): | |
user_message = messages[-1][0] # Get the last user message (input) | |
messages[-1] = (user_message, "") # Prepare a placeholder for the bot's response | |
response = get_response(user_message) # Assume `get_response` is a generator function | |
# Stream each character in the response and update the history progressively | |
for character in response: | |
messages[-1] = (user_message, messages[-1][1] + character) | |
yield messages # Stream each updated chunk | |
time.sleep(0.05) # Adjust delay as needed for real-time effect | |
yield messages # Final yield to complete the response | |
# Function to generate audio with Eleven Labs TTS from the last bot response | |
def generate_audio_from_last_response(history): | |
# Get the most recent bot response from the chat history | |
if history and len(history) > 0: | |
recent_response = history[-1][1] # The second item in the tuple is the bot response text | |
if recent_response: | |
return generate_audio_elevenlabs(recent_response) | |
return None | |
# Define the ASR model with Whisper | |
model_id = 'openai/whisper-large-v3' | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device) | |
processor = AutoProcessor.from_pretrained(model_id) | |
pipe_asr = pipeline( | |
"automatic-speech-recognition", | |
model=model, | |
tokenizer=processor.tokenizer, | |
feature_extractor=processor.feature_extractor, | |
max_new_tokens=128, | |
chunk_length_s=15, | |
batch_size=16, | |
torch_dtype=torch_dtype, | |
device=device, | |
return_timestamps=True | |
) | |
# Define the function to reset the state after 10 seconds | |
def auto_reset_state(): | |
time.sleep(5) | |
return None, "" # Reset the state and clear input text | |
def transcribe_function(stream, new_chunk): | |
try: | |
sr, y = new_chunk[0], new_chunk[1] | |
except TypeError: | |
print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}") | |
return stream, "", None | |
# Ensure y is not empty and is at least 1-dimensional | |
if y is None or len(y) == 0: | |
return stream, "", None | |
y = y.astype(np.float32) | |
max_abs_y = np.max(np.abs(y)) | |
if max_abs_y > 0: | |
y = y / max_abs_y | |
# Ensure stream is also at least 1-dimensional before concatenation | |
if stream is not None and len(stream) > 0: | |
stream = np.concatenate([stream, y]) | |
else: | |
stream = y | |
# Process the audio data for transcription | |
result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False) | |
full_text = result.get("text", "") | |
# Start a thread to reset the state after 10 seconds | |
threading.Thread(target=auto_reset_state).start() | |
return stream, full_text, full_text | |
# Define the function to clear the state and input text | |
def clear_transcription_state(): | |
return None, "" | |
with gr.Blocks(theme="rawrsor1/Everforest") as demo: | |
chatbot = gr.Chatbot([], elem_id="RADAR", bubble_full_width=False) | |
with gr.Row(): | |
with gr.Column(): | |
mode_selection = gr.Radio( | |
choices=["Normal Chatbot", "Voice to Voice Conversation"], | |
label="Mode Selection", | |
value="Normal Chatbot" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
question_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...") | |
audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy', every=0.1, label="Speak to Ask") | |
submit_voice_btn = gr.Button("Submit Voice") | |
with gr.Column(): | |
audio_output = gr.Audio(label="Audio", type="filepath", autoplay=True, interactive=False) | |
with gr.Row(): | |
with gr.Column(): | |
get_response_btn = gr.Button("Get Response") | |
with gr.Column(): | |
clear_state_btn = gr.Button("Clear State") | |
with gr.Column(): | |
generate_audio_btn = gr.Button("Generate Audio") | |
with gr.Column(): | |
clean_btn = gr.Button("Clean") | |
# Define interactions for the Get Response button | |
get_response_btn.click( | |
fn=handle_mode_selection, | |
inputs=[mode_selection, chatbot, question_input], | |
outputs=[chatbot, question_input, audio_output], | |
api_name="api_add_message_on_button_click" | |
) | |
question_input.submit( | |
fn=handle_mode_selection, | |
inputs=[mode_selection, chatbot, question_input], | |
outputs=[chatbot, question_input, audio_output], | |
api_name="api_add_message_on_enter" | |
) | |
submit_voice_btn.click( | |
fn=handle_mode_selection, | |
inputs=[mode_selection, chatbot, question_input], | |
outputs=[chatbot, question_input, audio_output], | |
api_name="api_voice_to_voice_translation" | |
) | |
# Speech-to-Text functionality | |
state = gr.State() | |
audio_input.stream( | |
transcribe_function, | |
inputs=[state, audio_input], | |
outputs=[state, question_input], | |
api_name="api_voice_to_text" | |
) | |
generate_audio_btn.click( | |
fn=generate_audio_from_last_response, | |
inputs=chatbot, | |
outputs=audio_output, | |
api_name="api_generate_text_to_audio" | |
) | |
clean_btn.click( | |
fn=clear_fields, | |
inputs=[], | |
outputs=[chatbot, question_input, audio_output], | |
api_name="api_clear_textbox" | |
) | |
# Clear state interaction | |
clear_state_btn.click( | |
fn=clear_transcription_state, | |
outputs=[question_input, state], | |
api_name="api_clean_state_transcription" | |
) | |
# Launch the Gradio interface | |
demo.launch(show_error=True) |