Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -14,8 +14,6 @@ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token
|
|
14 |
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=huggingface_token)
|
15 |
|
16 |
|
17 |
-
# Load a content moderation pipeline
|
18 |
-
moderation_pipeline = pipeline("text-classification", model="typeform/mobilebert-uncased-mnli")
|
19 |
|
20 |
# Function to load bad words from a file
|
21 |
def load_bad_words(filepath):
|
@@ -28,43 +26,45 @@ bad_words = load_bad_words('badwords.txt') # Adjust the path to your bad words
|
|
28 |
# List of topics for the dropdown
|
29 |
topics_list = ['Aviation', 'Science', 'Education', 'Air Force Pilot', 'Space Exploration', 'Technology']
|
30 |
|
31 |
-
#
|
32 |
-
topic_model = BERTopic(
|
33 |
|
34 |
def is_inappropriate_or_offtopic(message, selected_topics):
|
35 |
-
|
36 |
-
return True
|
37 |
|
38 |
-
|
39 |
-
|
|
|
40 |
|
41 |
-
#
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
48 |
|
49 |
def generate_response(message, selected_topics):
|
50 |
-
#
|
51 |
-
topics, probabilities = topic_model.fit_transform([message])
|
52 |
-
# Get the names or representative words for the identified topics
|
53 |
-
topic_names = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1] # Adjust as needed
|
54 |
-
|
55 |
if is_inappropriate_or_offtopic(message, selected_topics):
|
56 |
response = "Sorry, let's try to keep our conversation focused on positive and relevant topics!"
|
57 |
-
elif check_content(message):
|
58 |
-
response = "I'm here to provide a safe and friendly conversation. Let's talk about something else."
|
59 |
else:
|
60 |
inputs = tokenizer.encode(message, return_tensors="pt")
|
61 |
outputs = model.generate(inputs, max_length=50, do_sample=True)
|
62 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
63 |
|
64 |
-
#
|
|
|
|
|
|
|
65 |
return response, ", ".join(topic_names)
|
66 |
|
67 |
|
|
|
68 |
def main():
|
69 |
with gr.Blocks() as demo:
|
70 |
gr.Markdown("### Child-Safe Chatbot | BETA")
|
|
|
14 |
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=huggingface_token)
|
15 |
|
16 |
|
|
|
|
|
17 |
|
18 |
# Function to load bad words from a file
|
19 |
def load_bad_words(filepath):
|
|
|
26 |
# List of topics for the dropdown
|
27 |
topics_list = ['Aviation', 'Science', 'Education', 'Air Force Pilot', 'Space Exploration', 'Technology']
|
28 |
|
29 |
+
#Load BerTopic model
|
30 |
+
topic_model = BERTopic.load("MaartenGr/BERTopic_Wikipedia")
|
31 |
|
32 |
def is_inappropriate_or_offtopic(message, selected_topics):
|
33 |
+
# Assume bad_words loading and check_content function are defined here
|
|
|
34 |
|
35 |
+
topics, probabilities = topic_model.transform([message])
|
36 |
+
# Assuming -1 is the outlier class, filter relevant topics
|
37 |
+
relevant_topics = [topic for topic in topics if topic != -1]
|
38 |
|
39 |
+
# Compare generated topics against selected topics
|
40 |
+
is_offtopic = True # Default to True, prove it's on topic
|
41 |
+
for topic_num in relevant_topics:
|
42 |
+
topic_info = topic_model.get_topic(topic_num)
|
43 |
+
if topic_info:
|
44 |
+
topic_keywords = [word for word, _ in topic_info]
|
45 |
+
if any(selected_topic.lower() in topic_keywords for selected_topic in selected_topics):
|
46 |
+
is_offtopic = False
|
47 |
+
break # If any one of the topics matches, it's not off-topic
|
48 |
+
|
49 |
+
return is_offtopic or any(bad_word in message.lower() for bad_word in bad_words)
|
50 |
|
51 |
def generate_response(message, selected_topics):
|
52 |
+
# Checks if the message is inappropriate or off-topic
|
|
|
|
|
|
|
|
|
53 |
if is_inappropriate_or_offtopic(message, selected_topics):
|
54 |
response = "Sorry, let's try to keep our conversation focused on positive and relevant topics!"
|
|
|
|
|
55 |
else:
|
56 |
inputs = tokenizer.encode(message, return_tensors="pt")
|
57 |
outputs = model.generate(inputs, max_length=50, do_sample=True)
|
58 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
59 |
|
60 |
+
# Predict topics again to display them (could optimize by reusing earlier prediction)
|
61 |
+
topics, _ = topic_model.transform([message])
|
62 |
+
topic_names = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1] # Top word for each topic
|
63 |
+
|
64 |
return response, ", ".join(topic_names)
|
65 |
|
66 |
|
67 |
+
|
68 |
def main():
|
69 |
with gr.Blocks() as demo:
|
70 |
gr.Markdown("### Child-Safe Chatbot | BETA")
|