File size: 8,862 Bytes
e764d84
e367093
e764d84
 
 
 
 
2a28b9c
e367093
 
 
bccbeba
 
 
 
e367093
 
 
 
 
 
 
 
 
bccbeba
 
 
 
 
e367093
 
bccbeba
e367093
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c428223
e367093
 
bccbeba
e367093
 
bccbeba
e367093
 
 
 
2a28b9c
e367093
 
 
 
 
 
 
bccbeba
e367093
 
bccbeba
 
e367093
 
bccbeba
 
e367093
 
 
 
 
2a28b9c
bccbeba
e367093
 
2a28b9c
bccbeba
e367093
 
 
 
 
 
e764d84
e367093
e764d84
e367093
bccbeba
e367093
 
 
2a28b9c
e367093
 
 
 
 
 
 
 
bccbeba
e367093
 
 
 
 
2a28b9c
 
bccbeba
e367093
 
bccbeba
e367093
2a28b9c
 
e367093
 
2a28b9c
 
 
 
 
 
 
bccbeba
 
2a28b9c
e367093
 
 
bccbeba
 
e367093
2a28b9c
e367093
 
 
 
 
 
 
 
 
2a28b9c
e367093
 
 
 
2a28b9c
e367093
 
 
bccbeba
e367093
2a28b9c
e367093
 
 
 
 
 
 
 
 
e764d84
e367093
 
 
 
e764d84
e367093
 
 
 
bccbeba
e367093
 
 
 
e764d84
e367093
 
 
e764d84
e367093
e764d84
2a28b9c
e367093
e764d84
 
e367093
e764d84
2a28b9c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
import logging
import os
import gradio as gr
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
import PyPDF2

# Set up logging: write logs to a writable directory (/tmp)
log_file_path = "/tmp/support_bot_log.txt"
logging.basicConfig(filename=log_file_path, level=logging.INFO, format='%(asctime)s - %(message)s')

def flush_logs():
    for handler in logging.getLogger().handlers:
        handler.flush()

class SupportBotAgent:
    def __init__(self, document_path):
        # Load a pre-trained question-answering model
        self.qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
        # Set up an embedding model for finding relevant sections
        self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
        # Load the document text and split it into sections (by paragraphs)
        self.document_text = self.load_document(document_path)
        self.sections = self.document_text.split('\n\n')
        flush_logs()
        # Log document length for debugging
        logging.info(f"Document length: {len(self.document_text)} characters")
        flush_logs()
        # Create embeddings for all sections
        self.section_embeddings = self.embedder.encode(self.sections, convert_to_tensor=True)
        logging.info(f"Loaded document: {document_path}")
        flush_logs()

    def load_document(self, path):
        """Loads and extracts text from a TXT or PDF file."""
        if path.lower().endswith(".txt"):
            file_type = "Text File"
            with open(path, 'r', encoding='utf-8') as file:
                text = file.read()
        elif path.lower().endswith(".pdf"):
            file_type = "PDF File"
            text = ""
            with open(path, "rb") as file:
                pdf_reader = PyPDF2.PdfReader(file)
                for page in pdf_reader.pages:
                    page_text = page.extract_text()
                    if page_text:
                        text += page_text + "\n"
        else:
            file_type = "Unsupported Format"
            logging.error(f"Unsupported file format: {path}")
            flush_logs()
            raise ValueError("Unsupported file format. Please provide a TXT or PDF file.")
        logging.info(f"Loaded {file_type}: {path}")
        flush_logs()
        return text

    def find_relevant_section(self, query):
        """
        Uses semantic similarity first, falling back to keyword search if needed.
        """
        stopwords = {"and", "the", "is", "for", "to", "a", "an", "of", "in", "on", "at", "with", "by", "it", "as", "so", "what"}
        query_embedding = self.embedder.encode(query, convert_to_tensor=True)
        similarities = util.cos_sim(query_embedding, self.section_embeddings)[0]
        best_idx = similarities.argmax().item()
        best_section = self.sections[best_idx]
        similarity_score = similarities[best_idx].item()
        SIMILARITY_THRESHOLD = 0.4  # Adjust if needed

        if similarity_score >= SIMILARITY_THRESHOLD:
            logging.info(f"Found relevant section using embeddings for query: {query} (score: {similarity_score})")
            flush_logs()
            return best_section

        logging.info(f"Low similarity ({similarity_score}) for query: {query}. Falling back to keyword search.")
        flush_logs()
        query_words = {word for word in query.lower().split() if word not in stopwords}
        for section in self.sections:
            section_words = {word for word in section.lower().split() if word not in stopwords}
            common_words = query_words.intersection(section_words)
            if len(common_words) >= 2:
                logging.info(f"Keyword match for query: {query} with common words: {common_words}")
                flush_logs()
                return section

        logging.info("No good keyword match found. Returning default response.")
        flush_logs()
        return "I don’t have enough information to answer that."

    def answer_query(self, query):
        context = self.find_relevant_section(query)
        if not context:
            answer = "I don’t have enough information to answer that."
        else:
            result = self.qa_model(question=query, context=context, max_answer_len=50)
            answer = result["answer"]
        logging.info(f"Answer for query '{query}': {answer}")
        flush_logs()
        return answer

    def adjust_response(self, query, response, feedback):
        """Adjusts the response based on feedback."""
        if feedback == "too vague":
            context = self.find_relevant_section(query)
            adjusted_response = f"{response}\n\n(More details:\n{context[:500]}...)"
        elif feedback == "not helpful":
            adjusted_response = self.answer_query(query + " Please provide more detailed information with examples.")
        else:
            adjusted_response = response
        logging.info(f"Adjusted answer for query '{query}': {adjusted_response}")
        flush_logs()
        return adjusted_response

# --- Gradio Functions and App Workflow ---

def process_file(file, state):
    """Handles file upload and initializes the SupportBotAgent."""
    logging.info("Received file upload request")
    flush_logs()
    if file is None:
        logging.info("No file uploaded")
        flush_logs()
        return [("Bot", "Please upload a TXT or PDF file.")], state

    # Save the uploaded file to /tmp. Handle both file objects and NamedString.
    temp_path = os.path.join("/tmp", file.name)
    with open(temp_path, "wb") as f:
        if hasattr(file, "read"):
            content = file.read()
        else:
            content = file
        if isinstance(content, str):
            content = content.encode("utf-8")
        f.write(content)
    logging.info(f"Saved uploaded file to {temp_path} (size: {os.path.getsize(temp_path)} bytes)")
    flush_logs()

    try:
        state["agent"] = SupportBotAgent(temp_path)
    except Exception as e:
        logging.error(f"Error processing file: {str(e)}")
        flush_logs()
        return [("Bot", f"Error processing file: {str(e)}")], state

    state["chat_history"] = [("Bot", "File loaded successfully. Enter your query (or type 'exit' to end):")]
    state["mode"] = "query"
    state["last_query"] = ""
    state["last_answer"] = ""
    state["feedback_count"] = 0
    return state["chat_history"], state

def process_input(user_input, state):
    """
    Processes user input: as a query or feedback.
    Typing 'exit' stops the session.
    """
    if state.get("mode", "query") == "ended":
        return state["chat_history"], state

    if user_input.lower() == "exit":
        state["chat_history"].append(("Bot", "Session ended. You may now download the log file."))
        state["mode"] = "ended"
        flush_logs()
        return state["chat_history"], state

    if state["mode"] == "query":
        state["last_query"] = user_input
        answer = state["agent"].answer_query(user_input)
        state["last_answer"] = answer
        state["feedback_count"] = 0
        state["chat_history"].append(("User", user_input))
        state["chat_history"].append(("Bot", f"Answer: {answer}\nPlease provide feedback (good, too vague, not helpful):"))
        state["mode"] = "feedback"
    elif state["mode"] == "feedback":
        feedback = user_input.lower()
        state["chat_history"].append(("User", feedback))
        if feedback == "good" or state["feedback_count"] >= 1:
            state["chat_history"].append(("Bot", "Thank you for your feedback. Enter your next query (or type 'exit' to end):"))
            state["mode"] = "query"
        else:
            new_answer = state["agent"].adjust_response(state["last_query"], state["last_answer"], feedback)
            state["last_answer"] = new_answer
            state["feedback_count"] += 1
            state["chat_history"].append(("Bot", f"Updated Answer: {new_answer}\nPlease provide feedback (good, too vague, not helpful):"))
    flush_logs()
    return state["chat_history"], state

# --- Gradio UI Setup ---

with gr.Blocks() as demo:
    state = gr.State({"mode": "idle"})
    gr.Markdown("## Customer Support Bot with Document Training")
    file_upload = gr.File(label="Upload TXT or PDF file")
    chat = gr.Chatbot()
    user_input = gr.Textbox(label="Enter your query or feedback")
    submit_btn = gr.Button("Submit")
    # Provide a file component to download the log file
    log_file = gr.File(label="Download Log File", file_count="single", interactive=False, value=log_file_path)

    file_upload.upload(process_file, inputs=[file_upload, state], outputs=[chat, state])
    submit_btn.click(process_input, inputs=[user_input, state], outputs=[chat, state]).then(lambda: "", None, user_input)

demo.launch(share=True)