File size: 2,890 Bytes
77b9c3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import streamlit as st
import os
import requests

# Ensure that the Groq API key is set
os.environ["GROQ_API_KEY"] = "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"

# Groq API endpoint
GROQ_API_URL = "https://api.groq.com/v1/inference"

# Function to perform embedding retrieval using MiniLM via Groq API
def retrieve_embedding(user_query):
    payload = {
        "model": "microsoft/MiniLM-L6-H384-uncased",
        "input_text": user_query
    }
    headers = {
        "Authorization": f"Bearer {os.getenv('GROQ_API_KEY')}"
    }
    response = requests.post(f"{GROQ_API_URL}/embedding", json=payload, headers=headers)
    return response.json()["embedding"]

# Function to perform response generation using FLAN-T5 via Groq API
def generate_response(context):
    payload = {
        "model": "google/flan-t5-small",
        "input_text": f"Given the following context, provide a supportive response: {context}"
    }
    headers = {
        "Authorization": f"Bearer {os.getenv('GROQ_API_KEY')}"
    }
    response = requests.post(f"{GROQ_API_URL}/generate", json=payload, headers=headers)
    return response.json()["text"]

# Load the counseling conversations dataset
from datasets import load_dataset
dataset = load_dataset("Amod/mental_health_counseling_conversations")["train"]

# Precompute embeddings for the dataset responses using Groq API
@st.cache(allow_output_mutation=True)
def embed_dataset(dataset):
    embeddings = []
    for entry in dataset:
        embedding = retrieve_embedding(entry["response"])
        embeddings.append(embedding)
    return embeddings

dataset_embeddings = embed_dataset(dataset)

# Function to retrieve closest responses from the dataset using cosine similarity
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def retrieve_response(user_query, dataset, dataset_embeddings, k=5):
    query_embedding = retrieve_embedding(user_query)
    cos_scores = cosine_similarity([query_embedding], dataset_embeddings)[0]
    top_indices = np.argsort(cos_scores)[-k:][::-1]

    retrieved_responses = []
    for idx in top_indices:
        retrieved_responses.append(dataset[idx]["response"])
    return retrieved_responses

# Streamlit app UI
st.title("Emotional Support Buddy")
st.write("Enter your thoughts or concerns, and I'll provide some comforting words.")

# User input
user_query = st.text_input("How are you feeling today?")

if user_query:
    # Retrieve similar responses from the dataset
    retrieved_responses = retrieve_response(user_query, dataset, dataset_embeddings)
    
    # Join retrieved responses to create a supportive context
    context = " ".join(retrieved_responses)
    
    # Generate a supportive response using FLAN-T5 via Groq API
    supportive_response = generate_response(context)
    
    st.write("Here's some advice or support for you:")
    st.write(supportive_response)