import gradio as gr import torch import whisper from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.chains import RetrievalQA from langchain.agents import initialize_agent, Tool, AgentType from langchain.prompts import PromptTemplate from langchain.memory import ConversationBufferMemory from gtts import gTTS import os from groq import Groq # Load Whisper model for transcription model = whisper.load_model("base") # Initialize Groq client client = Groq(api_key="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") # Function to transcribe audio def transcribe_audio(audio): result = model.transcribe(audio) return result["text"] # Function for text-to-speech conversion def text_to_speech(text): tts = gTTS(text) audio_path = "/tmp/response.mp3" tts.save(audio_path) return audio_path # Function to interact with Groq API for LLM responses def get_groq_response(question): # Use Groq API to get the answer from LLM chat_completion = client.chat.completions.create( messages=[ { "role": "user", "content": question, } ], model="llama-3.3-70b-versatile", ) return chat_completion.choices[0].message.content # Initialize Gradio components with gr.Blocks(css="#output_text { font-size: 18px; margin: 10px 0; }" "#output_audio { margin-top: 15px; }" "gradio .gradio-container { background-color: #f8f9fa; border-radius: 15px; padding: 20px; box-shadow: 0 4px 8px rgba(0,0,0,0.1); }" "gradio .gradio-interface { font-family: 'Arial', sans-serif; }") as demo: gr.Markdown(""" # Quranic Therapy: Gen-AI Driven Mental Health & Wellness ## Where Faith Meets Technology Interact with the model using your voice or text input and get answers from documents! """, elem_id="header") with gr.Row(): with gr.Column(scale=2): gr.Markdown("### Record or Upload Audio") audio_input = gr.Audio(type="filepath", label="Record or Upload Audio", elem_id="audio_input") with gr.Column(scale=3): gr.Markdown("### Ask Your Question") text_input = gr.Textbox(label="Enter your question", placeholder="Ask a question based on the document...", elem_id="text_input") with gr.Row(): with gr.Column(scale=5): output_text = gr.Textbox(label="Answer", elem_id="output_text", interactive=False) output_audio = gr.Audio(label="Voice Response", type="filepath", elem_id="output_audio") # Button to process the input and generate output def process_input(audio_input, text_input): if audio_input: question = transcribe_audio(audio_input) else: question = text_input # Get the answer from the LLM via Groq API answer = get_groq_response(question) # Convert the answer to speech and return both text and audio audio_path = text_to_speech(answer) return answer, audio_path # Bind the function to the interface audio_input.change(process_input, inputs=[audio_input, text_input], outputs=[output_text, output_audio]) text_input.submit(process_input, inputs=[audio_input, text_input], outputs=[output_text, output_audio]) demo.launch(debug=True)