Spaces:

peterkros
/

child-safe-chatbot

Sleeping

File size: 3,689 Bytes

144e4aa
 
1883a62
45fd4eb
 
 
1883a62
 
 
 
 
 
14645ae
 
144e4aa
 
 
 
 
 
 
 
 
 
 
 
 
6c7b3b8
 
 
45fd4eb
 
 
6c7b3b8
144e4aa
 
 
45fd4eb
 
 
 
 
 
144e4aa
45fd4eb
144e4aa
 
45fd4eb
6c7b3b8
45fd4eb
 
 
 
 
6c7b3b8
45fd4eb
 
 
 
 
 
 
144e4aa
45fd4eb
 
144e4aa
b1746f2
bc27e82
 
75e71dc
45fd4eb
bc27e82
 
 
 
 
45fd4eb
b1746f2
bc27e82
dec9d6f
bc27e82
45fd4eb
bc27e82
 
45fd4eb

import gradio as gr
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
import os
from bertopic import BERTopic
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np


# Retrieve the token from environment variables
huggingface_token = os.getenv('LLAMA_ACCES_TOKEN')

# Use the token with from_pretrained
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=huggingface_token)
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=huggingface_token)


# Load a content moderation pipeline
moderation_pipeline = pipeline("text-classification", model="typeform/mobilebert-uncased-mnli")

# Function to load bad words from a file
def load_bad_words(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        return [line.strip().lower() for line in file]

# Load bad words list
bad_words = load_bad_words('badwords.txt')  # Adjust the path to your bad words file

# List of topics for the dropdown
topics_list = ['Aviation', 'Science', 'Education', 'Air Force Pilot', 'Space Exploration', 'Technology']

# Initialize BERTopic model
topic_model = BERTopic(embedding_model="all-MiniLM-L6-v2", calculate_probabilities=True, verbose=True)

def is_inappropriate_or_offtopic(message, selected_topics):
    if any(bad_word in message.lower() for bad_word in bad_words):
        return True

    # Generate topics from the message
    topics, _ = topic_model.fit_transform([message])
    
    # Check if any of the BERTopic-generated topics match the selected topics
    generated_topic_words = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1]  # Get top word for each topic
    if not any(selected_topic.lower() in ' '.join(generated_topic_words).lower() for selected_topic in selected_topics):
        return True

    return False


def generate_response(message, selected_topics):
# Identify BERTopic's topics from the message
    topics, probabilities = topic_model.fit_transform([message])
    # Get the names or representative words for the identified topics
    topic_names = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1]  # Adjust as needed
    
    if is_inappropriate_or_offtopic(message, selected_topics):
        response = "Sorry, let's try to keep our conversation focused on positive and relevant topics!"
    elif check_content(message):
        response = "I'm here to provide a safe and friendly conversation. Let's talk about something else."
    else:
        inputs = tokenizer.encode(message, return_tensors="pt")
        outputs = model.generate(inputs, max_length=50, do_sample=True)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Return both the response and the identified topics as separate values
    return response, ", ".join(topic_names)


def main():
    with gr.Blocks() as demo:
        gr.Markdown("### Child-Safe Chatbot | BETA")
        gr.Markdown("This chatbot uses BERTopic to identify topics in your messages and ensures the conversation stays relevant.")
        with gr.Row():
            message_input = gr.Textbox(label="Your Message")
            topics_dropdown = gr.Dropdown(choices=topics_list, label="Select Topics", multiselect=True)
            submit_btn = gr.Button("Send")
        response_output = gr.Textbox(label="Bot Response")
        topics_output = gr.Textbox(label="Identified Topics", placeholder="Topics will be displayed here...")

        submit_btn.click(
            fn=generate_response, 
            inputs=[message_input, topics_dropdown], 
            outputs=[response_output, topics_output]
        )

    demo.launch()