File size: 5,886 Bytes
3a8793d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import gradio as gr
from huggingface_hub import InferenceClient
from typing import List, Tuple
import fitz  # PyMuPDF
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss

client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
#client = InferenceClient("meta-llama/Llama-2-7b-chat-hf")

# Placeholder for the app's state
class MyApp:
    def __init__(self) -> None:
        self.documents = []
        self.embeddings = None
        self.index = None

    def load_pdfs(self, file_paths: List[str]) -> None:
        """Extracts text from multiple PDF files and stores it in the app's documents."""
        self.documents = []
        for file_path in file_paths:
            doc = fitz.open(file_path)
            for page_num in range(len(doc)):
                page = doc[page_num]
                text = page.get_text()
                self.documents.append({"page": page_num + 1, "content": text, "file": file_path})
        print("PDFs processed successfully!")

    def build_vector_db(self) -> None:
        """Builds a vector database using the content of the PDFs."""
        model = SentenceTransformer('all-MiniLM-L6-v2')
        self.embeddings = model.encode([doc["content"] for doc in self.documents], show_progress_bar=True)
        self.index = faiss.IndexFlatL2(self.embeddings.shape[1])
        self.index.add(np.array(self.embeddings))
        print("Vector database built successfully!")

    def search_documents(self, query: str, k: int = 3) -> List[str]:
        """Searches for relevant documents using vector similarity."""
        model = SentenceTransformer('all-MiniLM-L6-v2')
        query_embedding = model.encode([query], show_progress_bar=False)
        D, I = self.index.search(np.array(query_embedding), k)
        results = [self.documents[i]["content"] for i in I[0]]
        return results if results else ["No relevant documents found."]

app = MyApp()

def preprocess_response(response: str) -> str:
    """Preprocesses the response to make it more polished and empathetic."""
    response = response.strip()
    response = response.replace("\n\n", "\n")
    response = response.replace(" ,", ",")
    response = response.replace(" .", ".")
    response = " ".join(response.split())
    if not any(word in response.lower() for word in ["sorry", "apologize", "empathy"]):
        response = "I'm here to help. " + response
    return response

def shorten_response(response: str) -> str:
    """Uses the Zephyr model to shorten and refine the response."""
    messages = [{"role": "system", "content": "Greet, Shorten and refine this response in a supportive and empathetic manner."}, {"role": "user", "content": response}]
    result = client.chat_completion(messages, max_tokens=512, temperature=0.5, top_p=0.9)
    return result.choices[0].message['content'].strip()

def respond(message: str, history: List[Tuple[str, str]], system_message: str):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    # RAG - Retrieve relevant documents if the query suggests exercises or specific information
    if any(keyword in message.lower() for keyword in ["exercise", "technique", "information", "guide", "help", "how to"]):
        retrieved_docs = app.search_documents(message)
        context = "\n".join(retrieved_docs)
        if context.strip():
            messages.append({"role": "system", "content": "Relevant documents: " + context})

    response = client.chat_completion(messages, max_tokens=1024, temperature=0.7, top_p=0.9)
    response_content = "".join([choice.message['content'] for choice in response.choices if 'content' in choice.message])

    polished_response = preprocess_response(response_content)
    shortened_response = shorten_response(polished_response)

    history.append((message, shortened_response))
    return history, ""

with gr.Blocks() as demo:
    gr.Markdown("# 🧘‍♀️ **Dialectical Behaviour Therapy**")
    gr.Markdown(
        "‼️Disclaimer: This chatbot is based on a DBT exercise book that is publicly available. "
        "We are not medical practitioners, and the use of this chatbot is at your own responsibility."
    )

    chatbot = gr.Chatbot()

    with gr.Row():
        pdf_input = gr.File(label="Upload PDFs (minimum 5)", file_count="multiple", type="file")
        txt_input = gr.Textbox(
            show_label=False,
            placeholder="Type your message here...",
            lines=1
        )
        system_input = gr.Textbox(
            label="System Message",
            placeholder="Enter a system message here...",
            lines=2
        )
        submit_btn = gr.Button("Submit", scale=1)
        refresh_btn = gr.Button("Refresh Chat", scale=1, variant="secondary")

    example_questions = [
        ["What are some ways to cope with stress using DBT?"],
        ["Can you guide me through a grounding exercise?"],
        ["How do I use DBT skills to handle intense emotions?"],
        ["What are some self-soothing techniques I can practice?"]
    ]

    gr.Examples(examples=example_questions, inputs=[txt_input])

    def load_and_build_pdfs(pdfs):
        file_paths = [pdf.name for pdf in pdfs]
        if len(file_paths) < 5:
            return [], "Please upload at least 5 PDFs."
        app.load_pdfs(file_paths)
        app.build_vector_db()
        return []

    submit_btn.click(fn=load_and_build_pdfs, inputs=[pdf_input], outputs=[])

    submit_btn.click(fn=respond, inputs=[txt_input, chatbot, system_input], outputs=[chatbot, txt_input])
    refresh_btn.click(lambda: [], None, chatbot)

if __name__ == "__main__":
    demo.launch()