Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,9 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
|
3 |
import os
|
|
|
|
|
|
|
4 |
|
5 |
|
6 |
# Retrieve the token from environment variables
|
@@ -25,50 +28,58 @@ bad_words = load_bad_words('badwords.txt') # Adjust the path to your bad words
|
|
25 |
# List of topics for the dropdown
|
26 |
topics_list = ['Aviation', 'Science', 'Education', 'Air Force Pilot', 'Space Exploration', 'Technology']
|
27 |
|
|
|
|
|
|
|
28 |
def is_inappropriate_or_offtopic(message, selected_topics):
|
29 |
if any(bad_word in message.lower() for bad_word in bad_words):
|
30 |
return True
|
31 |
-
if selected_topics and not any(topic.lower() in message.lower() for topic in selected_topics if topic):
|
32 |
-
return True
|
33 |
-
return False
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
38 |
return True
|
|
|
39 |
return False
|
40 |
|
|
|
41 |
def generate_response(message, selected_topics):
|
|
|
|
|
|
|
|
|
|
|
42 |
if is_inappropriate_or_offtopic(message, selected_topics):
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
50 |
-
#response = f"Echo: {message}. Selected topics: {', '.join(selected_topics)}"
|
51 |
-
return response
|
52 |
|
53 |
|
54 |
def main():
|
55 |
with gr.Blocks() as demo:
|
56 |
gr.Markdown("### Child-Safe Chatbot BETA")
|
|
|
57 |
with gr.Row():
|
58 |
message_input = gr.Textbox(label="Your Message")
|
59 |
topics_dropdown = gr.Dropdown(choices=topics_list, label="Select Topics", multiselect=True)
|
60 |
submit_btn = gr.Button("Send")
|
61 |
response_output = gr.Textbox(label="Bot Response")
|
|
|
62 |
|
63 |
-
# Corrected to directly pass selected_topics without wrapping it in another list
|
64 |
submit_btn.click(
|
65 |
fn=generate_response,
|
66 |
inputs=[message_input, topics_dropdown],
|
67 |
-
outputs=response_output
|
68 |
)
|
69 |
|
70 |
-
demo.launch()
|
71 |
-
|
72 |
-
# Run the app
|
73 |
-
if __name__ == "__main__":
|
74 |
-
main()
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
|
3 |
import os
|
4 |
+
from bertopic import BERTopic
|
5 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
6 |
+
import numpy as np
|
7 |
|
8 |
|
9 |
# Retrieve the token from environment variables
|
|
|
28 |
# List of topics for the dropdown
|
29 |
topics_list = ['Aviation', 'Science', 'Education', 'Air Force Pilot', 'Space Exploration', 'Technology']
|
30 |
|
31 |
+
# Initialize BERTopic model
|
32 |
+
topic_model = BERTopic(embedding_model="all-MiniLM-L6-v2", calculate_probabilities=True, verbose=True)
|
33 |
+
|
34 |
def is_inappropriate_or_offtopic(message, selected_topics):
|
35 |
if any(bad_word in message.lower() for bad_word in bad_words):
|
36 |
return True
|
|
|
|
|
|
|
37 |
|
38 |
+
# Generate topics from the message
|
39 |
+
topics, _ = topic_model.fit_transform([message])
|
40 |
+
|
41 |
+
# Check if any of the BERTopic-generated topics match the selected topics
|
42 |
+
generated_topic_words = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1] # Get top word for each topic
|
43 |
+
if not any(selected_topic.lower() in ' '.join(generated_topic_words).lower() for selected_topic in selected_topics):
|
44 |
return True
|
45 |
+
|
46 |
return False
|
47 |
|
48 |
+
|
49 |
def generate_response(message, selected_topics):
|
50 |
+
# Identify BERTopic's topics from the message
|
51 |
+
topics, probabilities = topic_model.fit_transform([message])
|
52 |
+
# Get the names or representative words for the identified topics
|
53 |
+
topic_names = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1] # Adjust as needed
|
54 |
+
|
55 |
if is_inappropriate_or_offtopic(message, selected_topics):
|
56 |
+
response = "Sorry, let's try to keep our conversation focused on positive and relevant topics!"
|
57 |
+
elif check_content(message):
|
58 |
+
response = "I'm here to provide a safe and friendly conversation. Let's talk about something else."
|
59 |
+
else:
|
60 |
+
inputs = tokenizer.encode(message, return_tensors="pt")
|
61 |
+
outputs = model.generate(inputs, max_length=50, do_sample=True)
|
62 |
+
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
63 |
|
64 |
+
# Return both the response and the identified topics as separate values
|
65 |
+
return response, ", ".join(topic_names)
|
|
|
|
|
|
|
66 |
|
67 |
|
68 |
def main():
|
69 |
with gr.Blocks() as demo:
|
70 |
gr.Markdown("### Child-Safe Chatbot BETA")
|
71 |
+
gr.Markdown("This chatbot uses BERTopic to identify topics in your messages and ensures the conversation stays relevant.")
|
72 |
with gr.Row():
|
73 |
message_input = gr.Textbox(label="Your Message")
|
74 |
topics_dropdown = gr.Dropdown(choices=topics_list, label="Select Topics", multiselect=True)
|
75 |
submit_btn = gr.Button("Send")
|
76 |
response_output = gr.Textbox(label="Bot Response")
|
77 |
+
topics_output = gr.Textbox(label="Identified Topics", placeholder="Topics will be displayed here...")
|
78 |
|
|
|
79 |
submit_btn.click(
|
80 |
fn=generate_response,
|
81 |
inputs=[message_input, topics_dropdown],
|
82 |
+
outputs=[response_output, topics_output]
|
83 |
)
|
84 |
|
85 |
+
demo.launch()
|
|
|
|
|
|
|
|