Spaces:

jaafarhh
/

DarijaTherapy

Sleeping

App Files Files Community

jaafarhh commited on Dec 1, 2024

Commit

e49ad3d

verified ·

1 Parent(s): 4dcf57d

Update app.py

Browse files

Files changed (1) hide show

app.py +154 -217

app.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import streamlit as st
-import torch
-import torchaudio
-import soundfile as sf
-from pathlib import Path
-from transformers import pipeline, AutoTokenizer
 from langchain_community.llms import HuggingFaceEndpoint
 from langchain_community.embeddings import HuggingFaceBgeEmbeddings
 from langchain.memory import ConversationBufferMemory
@@ -12,26 +13,20 @@ from langchain_community.vectorstores import FAISS
 from langchain.prompts import PromptTemplate
 import os
 from dotenv import load_dotenv
 from requests.adapters import HTTPAdapter
 from requests.packages.urllib3.util.retry import Retry
-import requests
-import time
 # Load environment variables
 load_dotenv()
-# CSS styling
-css = """
-<style>
-.chat-message { padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex; }
-.chat-message.user { background-color: #2b313e; }
-.chat-message.bot { background-color: #475063; }
-.avatar { margin-right: 1rem; }
-.message { color: white; }
-</style>
-"""
-# Updated prompt template for Mixtral
 PROMPT_TEMPLATE = """
 <s>[INST] You are a professional therapist who speaks Moroccan Arabic (Darija).
 Act as a compassionate therapist and provide empathetic responses using therapeutic techniques.
@@ -46,215 +41,157 @@ Context: {context}
 [/INST]
 """
-class DarijaTherapist:
-    def __init__(self):
-        self.setup_models()
-        self.initialize_session_state()
-        self.setup_memory()
-    def setup_models(self):
-        try:
-            # Speech recognition setup
-            tokenizer = AutoTokenizer.from_pretrained("facebook/seamless-m4t-v2-large")
-            self.device = "cuda" if torch.cuda.is_available() else "cpu"
-            self.asr_pipe = pipeline(
-                "automatic-speech-recognition",
-                model="facebook/seamless-m4t-v2-large",
-                tokenizer=tokenizer,
-                device=self.device
-            )
-            # Configure retry strategy
-            retry_strategy = Retry(
-                total=3,
-                backoff_factor=1,
-                status_forcelist=[429, 500, 502, 503, 504]
-            )
-            # Create session with retry strategy
-            session = requests.Session()
-            session.mount("https://", HTTPAdapter(max_retries=retry_strategy))
-            # Updated LLM setup for Mixtral
-            self.llm = HuggingFaceEndpoint(
-                endpoint_url="https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1",
-                task="text-generation",
-                temperature=0.7,
-                do_sample=True,
-                return_full_text=False,
-                timeout=300,
-                model_kwargs={
-                    "max_new_tokens": 2048,
-                    "top_p": 0.9,
-                    "repetition_penalty": 1.2,
-                    "return_text": True,
-                    "stop": ["</s>"]
-                },
-                huggingfacehub_api_token=os.getenv("HUGGINGFACE_API_TOKEN"),
-                client=session
-            )
-            # Embeddings setup
-            self.embeddings = HuggingFaceBgeEmbeddings(
-                model_name="BAAI/bge-large-en"
-            )
-            self.vectorstore = FAISS.from_texts(
-                ["Initial therapeutic context"],
-                self.embeddings
-            )
-        except Exception as e:
-            st.error(f"Error setting up models: {str(e)}")
-            st.stop()
-    def setup_memory(self):
-        self.memory = ConversationBufferMemory(
-            memory_key="chat_history",
-            return_messages=True
-        )
-        qa_prompt = PromptTemplate(
-            template=PROMPT_TEMPLATE,
-            input_variables=["context", "chat_history", "question"]
-        )
-        self.conversation_chain = ConversationalRetrievalChain.from_llm(
-            llm=self.llm,
-            retriever=self.vectorstore.as_retriever(),
-            memory=self.memory,
-            combine_docs_chain_kwargs={"prompt": qa_prompt},
-            return_source_documents=True
-        )
-    def initialize_session_state(self):
-        if "messages" not in st.session_state:
-            st.session_state.messages = []
-        if "recording" not in st.session_state:
-            st.session_state.recording = False
-        if "audio_buffer" not in st.session_state:
-            st.session_state.audio_buffer = []
-    def handle_audio_input(self):
-        if not st.session_state.recording:
-            return
-        try:
-            waveform, sample_rate = torchaudio.load("temp_audio.wav")
-            st.session_state.audio_buffer.append(waveform)
-        except Exception as e:
-            st.error(f"Error recording audio: {str(e)}")
-    def process_audio(self):
-        if not st.session_state.audio_buffer:
-            return None
         try:
-            audio_data = torch.cat(st.session_state.audio_buffer, dim=1)
-            torchaudio.save("temp_audio.wav", audio_data, 16000)
-            audio, rate = sf.read("temp_audio.wav", dtype='float32')
-            result = self.asr_pipe(
-                audio,
-                generate_kwargs={"task": "transcribe", "language": "ara"}
-            )
-            return result["text"]
-        except Exception as e:
-            st.error(f"Error processing audio: {str(e)}")
-            return None
-        finally:
-            st.session_state.audio_buffer = []
-    def get_ai_response(self, user_input):
-        max_retries = 3
-        for attempt in range(max_retries):
-            try:
-                # Validate and clean input
-                if not user_input or len(user_input.strip()) == 0:
-                    return "عذراً، ما فهمتش السؤال ديالك. عاود من فضلك."
-                # Limit input length to prevent tensor size issues
-                if len(user_input) > 512:
-                    user_input = user_input[:512]
-                response = self.conversation_chain({
-                    "question": user_input,
-                    "chat_history": self.memory.chat_memory.messages[-5:]  # Limit context window
-                })
-                if not response or 'answer' not in response:
-                    if attempt < max_retries - 1:
-                        time.sleep(2 ** attempt)
-                        continue
-                    return "عذراً، كاين مشكل. حاول مرة أخرى."
-                return response['answer']
-            except requests.exceptions.HTTPError as e:
-                if e.response.status_code == 424:
-                    if attempt < max_retries - 1:
-                        st.warning("Model error, retrying with simplified input...")
-                        time.sleep(2 ** attempt)
-                        continue
-                return "عذراً، كاين مشكل مع النموذج. جرب سؤال أقصر."
-            except requests.exceptions.ReadTimeout:
-                if attempt < max_retries - 1:
-                    st.warning(f"Attempt {attempt + 1} timed out, retrying...")
-                    time.sleep(2 ** attempt)
-                    continue
-                return "عذراً، الخادم بطيء حالياً. حاول مرة أخرى."
-            except Exception as e:
-                st.error(f"Error: {str(e)}")
                 if attempt < max_retries - 1:
                     time.sleep(2 ** attempt)
                     continue
-                return "عذراً، كاين شي مشكل. حاول مرة أخرى."
-    def run(self):
-        st.set_page_config(page_title="Darija AI Therapist", page_icon="🧠")
-        st.markdown(css, unsafe_allow_html=True)
-        st.title("Darija AI Therapist 🧠")
-        st.subheader("تكلم معايا بالدارجة على اللي كيجول فبالك")
-        with st.sidebar:
-            st.header("Settings ⚙️")
-            if st.button("Clear Chat History"):
-                st.session_state.messages = []
-                self.memory.clear()
-            st.markdown("### About")
-            st.info("This AI therapist speaks Darija and is here to help.")
-        col1, col2 = st.columns(2)
-        with col1:
-            if st.button("🎤 Start Recording", disabled=st.session_state.recording):
-                st.session_state.recording = True
-                st.session_state.audio_buffer = []
-        with col2:
-            if st.button("⏹️ Stop Recording", disabled=not st.session_state.recording):
-                st.session_state.recording = False
-                transcription = self.process_audio()
-                if transcription:
-                    self.process_message(transcription)
-        user_input = st.text_input("اكتب رسالتك هنا:", key="text_input")
-        if user_input:
-            self.process_message(user_input)
-        for message in st.session_state.messages:
-            with st.chat_message(message["role"]):
-                st.write(message["content"])
-    def process_message(self, user_input):
-        st.session_state.messages.append({"role": "user", "content": user_input})
-        with st.spinner("جاري التفكير..."):
-            ai_response = self.get_ai_response(user_input)
-            if ai_response:
-                st.session_state.messages.append({"role": "assistant", "content": ai_response})
 if __name__ == "__main__":
-    app = DarijaTherapist()
-    app.run()

 import streamlit as st
+from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration
+import whisper
+import numpy as np
+import av
+from typing import List
+import queue
 from langchain_community.llms import HuggingFaceEndpoint
 from langchain_community.embeddings import HuggingFaceBgeEmbeddings
 from langchain.memory import ConversationBufferMemory
 from langchain.prompts import PromptTemplate
 import os
 from dotenv import load_dotenv
+import requests
 from requests.adapters import HTTPAdapter
 from requests.packages.urllib3.util.retry import Retry
 # Load environment variables
 load_dotenv()
+# Initialize session state
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+if "audio_buffer" not in st.session_state:
+    st.session_state.audio_buffer = queue.Queue()
+# Prompt template
 PROMPT_TEMPLATE = """
 <s>[INST] You are a professional therapist who speaks Moroccan Arabic (Darija).
 Act as a compassionate therapist and provide empathetic responses using therapeutic techniques.
 [/INST]
 """
+# Setup retry strategy
+retry_strategy = Retry(
+    total=3,
+    backoff_factor=1,
+    status_forcelist=[429, 500, 502, 503, 504]
+)
+session = requests.Session()
+session.mount("https://", HTTPAdapter(max_retries=retry_strategy))
+# Initialize models
+whisper_model = whisper.load_model("base")
+llm = HuggingFaceEndpoint(
+    endpoint_url="https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1",
+    task="text-generation",
+    temperature=0.7,
+    do_sample=True,
+    return_full_text=False,
+    max_new_tokens=2048,
+    top_p=0.9,
+    repetition_penalty=1.2,
+    model_kwargs={
+        "return_text": True,
+        "stop": ["</s>"]
+    },
+    huggingfacehub_api_token=os.getenv("HUGGINGFACE_API_TOKEN"),
+    client=session
+)
+# Setup memory and conversation chain
+memory = ConversationBufferMemory(
+    memory_key="chat_history",
+    return_messages=True
+)
+embeddings = HuggingFaceBgeEmbeddings(
+    model_name="BAAI/bge-large-en"
+)
+vectorstore = FAISS.from_texts(
+    ["Initial therapeutic context"],
+    embeddings
+)
+qa_prompt = PromptTemplate(
+    template=PROMPT_TEMPLATE,
+    input_variables=["context", "chat_history", "question"]
+)
+conversation_chain = ConversationalRetrievalChain.from_llm(
+    llm=llm,
+    retriever=vectorstore.as_retriever(),
+    memory=memory,
+    combine_docs_chain_kwargs={"prompt": qa_prompt},
+    return_source_documents=True
+)
+def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
+    return frame
+def audio_frame_callback(frame: av.AudioFrame) -> av.AudioFrame:
+    if st.session_state.recording:
+        sound = frame.to_ndarray()
+        st.session_state.audio_buffer.put(sound)
+    return frame
+def get_ai_response(user_input: str) -> str:
+    max_retries = 3
+    for attempt in range(max_retries):
         try:
+            if not user_input or len(user_input.strip()) == 0:
+                return "عذراً، ما فهمتش السؤال ديالك. عاود من فضلك."
+            if len(user_input) > 512:
+                user_input = user_input[:512]
+            response = conversation_chain({
+                "question": user_input,
+                "chat_history": memory.chat_memory.messages[-5:]
+            })
+            if not response or 'answer' not in response:
                 if attempt < max_retries - 1:
                     time.sleep(2 ** attempt)
                     continue
+                return "عذراً، كاين مشكل. حاول مرة أخرى."
+            return response['answer']
+        except requests.exceptions.HTTPError as e:
+            if attempt < max_retries - 1:
+                time.sleep(2 ** attempt)
+                continue
+            return "عذراً، كاين مشكل مع النموذج. جرب سؤال أقصر."
+        except Exception as e:
+            st.error(f"Error: {str(e)}")
+            if attempt < max_retries - 1:
+                time.sleep(2 ** attempt)
+                continue
+            return "عذراً، كاين شي مشكل. حاول مرة أخرى."
+def process_message(user_input: str) -> None:
+    st.session_state.messages.append({"role": "user", "content": user_input})
+    with st.spinner("جاري التفكير..."):
+        ai_response = get_ai_response(user_input)
+        if ai_response:
+            st.session_state.messages.append({"role": "assistant", "content": ai_response})
+def main():
+    st.set_page_config(page_title="Darija AI Therapist", page_icon="🧠")
+    st.title("Darija AI Therapist 🧠")
+    st.subheader("تكلم معايا بالدارجة على اللي كيجول فبالك")
+    # WebRTC setup
+    webrtc_ctx = webrtc_streamer(
+        key="speech-to-text",
+        mode=WebRtcMode.SENDONLY,
+        audio_receiver_size=1024,
+        rtc_configuration=RTCConfiguration(
+            {"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
+        ),
+        video_frame_callback=video_frame_callback,
+        audio_frame_callback=audio_frame_callback,
+        media_stream_constraints={"video": False, "audio": True},
+    )
+    # Chat interface
+    user_input = st.text_input("اكتب رسالتك هنا:", key="text_input")
+    if user_input:
+        process_message(user_input)
+    # Process audio when recording stops
+    if webrtc_ctx.state.playing and len(st.session_state.audio_buffer) > 0:
+        audio_frames = []
+        while not st.session_state.audio_buffer.empty():
+            audio_frames.append(st.session_state.audio_buffer.get())
+        if audio_frames:
+            audio_data = np.concatenate(audio_frames, axis=0)
+            text = whisper_model.transcribe(audio_data)["text"]
+            if text:
+                process_message(text)
+                st.session_state.audio_buffer = queue.Queue()  # Clear buffer
+    # Display chat history
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.write(message["content"])
 if __name__ == "__main__":
+    main()