Spaces:

peterkros
/

child-safe-chatbot

Sleeping

App Files Files Community

child-safe-chatbot / app.py

peterkros

Update app.py

75e71dc verified over 1 year ago

raw

history blame

3.69 kB

	import gradio as gr
	from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
	import os
	from bertopic import BERTopic
	from sklearn.feature_extraction.text import CountVectorizer
	import numpy as np


	# Retrieve the token from environment variables
	huggingface_token = os.getenv('LLAMA_ACCES_TOKEN')

	# Use the token with from_pretrained
	tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=huggingface_token)
	model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=huggingface_token)


	# Load a content moderation pipeline
	moderation_pipeline = pipeline("text-classification", model="typeform/mobilebert-uncased-mnli")

	# Function to load bad words from a file
	def load_bad_words(filepath):
	with open(filepath, 'r', encoding='utf-8') as file:
	return [line.strip().lower() for line in file]

	# Load bad words list
	bad_words = load_bad_words('badwords.txt') # Adjust the path to your bad words file

	# List of topics for the dropdown
	topics_list = ['Aviation', 'Science', 'Education', 'Air Force Pilot', 'Space Exploration', 'Technology']

	# Initialize BERTopic model
	topic_model = BERTopic(embedding_model="all-MiniLM-L6-v2", calculate_probabilities=True, verbose=True)

	def is_inappropriate_or_offtopic(message, selected_topics):
	if any(bad_word in message.lower() for bad_word in bad_words):
	return True

	# Generate topics from the message
	topics, _ = topic_model.fit_transform([message])

	# Check if any of the BERTopic-generated topics match the selected topics
	generated_topic_words = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1] # Get top word for each topic
	if not any(selected_topic.lower() in ' '.join(generated_topic_words).lower() for selected_topic in selected_topics):
	return True

	return False


	def generate_response(message, selected_topics):
	# Identify BERTopic's topics from the message
	topics, probabilities = topic_model.fit_transform([message])
	# Get the names or representative words for the identified topics
	topic_names = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1] # Adjust as needed

	if is_inappropriate_or_offtopic(message, selected_topics):
	response = "Sorry, let's try to keep our conversation focused on positive and relevant topics!"
	elif check_content(message):
	response = "I'm here to provide a safe and friendly conversation. Let's talk about something else."
	else:
	inputs = tokenizer.encode(message, return_tensors="pt")
	outputs = model.generate(inputs, max_length=50, do_sample=True)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Return both the response and the identified topics as separate values
	return response, ", ".join(topic_names)


	def main():
	with gr.Blocks() as demo:
	gr.Markdown("### Child-Safe Chatbot \| BETA")
	gr.Markdown("This chatbot uses BERTopic to identify topics in your messages and ensures the conversation stays relevant.")
	with gr.Row():
	message_input = gr.Textbox(label="Your Message")
	topics_dropdown = gr.Dropdown(choices=topics_list, label="Select Topics", multiselect=True)
	submit_btn = gr.Button("Send")
	response_output = gr.Textbox(label="Bot Response")
	topics_output = gr.Textbox(label="Identified Topics", placeholder="Topics will be displayed here...")

	submit_btn.click(
	fn=generate_response,
	inputs=[message_input, topics_dropdown],
	outputs=[response_output, topics_output]
	)

	demo.launch()