Spaces:

peterkros
/

child-safe-chatbot

Sleeping

App Files Files Community

peterkros commited on Feb 18, 2024

Commit

b58de5b

verified ·

1 Parent(s): 75e71dc

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -23

app.py CHANGED Viewed

@@ -14,8 +14,6 @@ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token
 model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=huggingface_token)
-# Load a content moderation pipeline
-moderation_pipeline = pipeline("text-classification", model="typeform/mobilebert-uncased-mnli")
 # Function to load bad words from a file
 def load_bad_words(filepath):
@@ -28,43 +26,45 @@ bad_words = load_bad_words('badwords.txt')  # Adjust the path to your bad words
 # List of topics for the dropdown
 topics_list = ['Aviation', 'Science', 'Education', 'Air Force Pilot', 'Space Exploration', 'Technology']
-# Initialize BERTopic model
-topic_model = BERTopic(embedding_model="all-MiniLM-L6-v2", calculate_probabilities=True, verbose=True)
 def is_inappropriate_or_offtopic(message, selected_topics):
-    if any(bad_word in message.lower() for bad_word in bad_words):
-        return True
-    # Generate topics from the message
-    topics, _ = topic_model.fit_transform([message])
-    # Check if any of the BERTopic-generated topics match the selected topics
-    generated_topic_words = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1]  # Get top word for each topic
-    if not any(selected_topic.lower() in ' '.join(generated_topic_words).lower() for selected_topic in selected_topics):
-        return True
-    return False
 def generate_response(message, selected_topics):
-# Identify BERTopic's topics from the message
-    topics, probabilities = topic_model.fit_transform([message])
-    # Get the names or representative words for the identified topics
-    topic_names = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1]  # Adjust as needed
     if is_inappropriate_or_offtopic(message, selected_topics):
         response = "Sorry, let's try to keep our conversation focused on positive and relevant topics!"
-    elif check_content(message):
-        response = "I'm here to provide a safe and friendly conversation. Let's talk about something else."
     else:
         inputs = tokenizer.encode(message, return_tensors="pt")
         outputs = model.generate(inputs, max_length=50, do_sample=True)
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Return both the response and the identified topics as separate values
     return response, ", ".join(topic_names)
 def main():
     with gr.Blocks() as demo:
         gr.Markdown("### Child-Safe Chatbot | BETA")

 model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=huggingface_token)
 # Function to load bad words from a file
 def load_bad_words(filepath):
 # List of topics for the dropdown
 topics_list = ['Aviation', 'Science', 'Education', 'Air Force Pilot', 'Space Exploration', 'Technology']
+#Load BerTopic model
+topic_model = BERTopic.load("MaartenGr/BERTopic_Wikipedia")
 def is_inappropriate_or_offtopic(message, selected_topics):
+    # Assume bad_words loading and check_content function are defined here
+    topics, probabilities = topic_model.transform([message])
+    # Assuming -1 is the outlier class, filter relevant topics
+    relevant_topics = [topic for topic in topics if topic != -1]
+    # Compare generated topics against selected topics
+    is_offtopic = True  # Default to True, prove it's on topic
+    for topic_num in relevant_topics:
+        topic_info = topic_model.get_topic(topic_num)
+        if topic_info:
+            topic_keywords = [word for word, _ in topic_info]
+            if any(selected_topic.lower() in topic_keywords for selected_topic in selected_topics):
+                is_offtopic = False
+                break  # If any one of the topics matches, it's not off-topic
+    return is_offtopic or any(bad_word in message.lower() for bad_word in bad_words)
 def generate_response(message, selected_topics):
+    # Checks if the message is inappropriate or off-topic
     if is_inappropriate_or_offtopic(message, selected_topics):
         response = "Sorry, let's try to keep our conversation focused on positive and relevant topics!"
     else:
         inputs = tokenizer.encode(message, return_tensors="pt")
         outputs = model.generate(inputs, max_length=50, do_sample=True)
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Predict topics again to display them (could optimize by reusing earlier prediction)
+    topics, _ = topic_model.transform([message])
+    topic_names = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1]  # Top word for each topic
     return response, ", ".join(topic_names)
 def main():
     with gr.Blocks() as demo:
         gr.Markdown("### Child-Safe Chatbot | BETA")