Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer | |
import os | |
from bertopic import BERTopic | |
from sklearn.feature_extraction.text import CountVectorizer | |
import numpy as np | |
# Retrieve the token from environment variables | |
huggingface_token = os.getenv('LLAMA_ACCES_TOKEN') | |
# Use the token with from_pretrained | |
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=huggingface_token) | |
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=huggingface_token) | |
# Load a content moderation pipeline | |
moderation_pipeline = pipeline("text-classification", model="typeform/mobilebert-uncased-mnli") | |
# Function to load bad words from a file | |
def load_bad_words(filepath): | |
with open(filepath, 'r', encoding='utf-8') as file: | |
return [line.strip().lower() for line in file] | |
# Load bad words list | |
bad_words = load_bad_words('badwords.txt') # Adjust the path to your bad words file | |
# List of topics for the dropdown | |
topics_list = ['Aviation', 'Science', 'Education', 'Air Force Pilot', 'Space Exploration', 'Technology'] | |
# Initialize BERTopic model | |
topic_model = BERTopic(embedding_model="all-MiniLM-L6-v2", calculate_probabilities=True, verbose=True) | |
def is_inappropriate_or_offtopic(message, selected_topics): | |
if any(bad_word in message.lower() for bad_word in bad_words): | |
return True | |
# Generate topics from the message | |
topics, _ = topic_model.fit_transform([message]) | |
# Check if any of the BERTopic-generated topics match the selected topics | |
generated_topic_words = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1] # Get top word for each topic | |
if not any(selected_topic.lower() in ' '.join(generated_topic_words).lower() for selected_topic in selected_topics): | |
return True | |
return False | |
def generate_response(message, selected_topics): | |
# Identify BERTopic's topics from the message | |
topics, probabilities = topic_model.fit_transform([message]) | |
# Get the names or representative words for the identified topics | |
topic_names = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1] # Adjust as needed | |
if is_inappropriate_or_offtopic(message, selected_topics): | |
response = "Sorry, let's try to keep our conversation focused on positive and relevant topics!" | |
elif check_content(message): | |
response = "I'm here to provide a safe and friendly conversation. Let's talk about something else." | |
else: | |
inputs = tokenizer.encode(message, return_tensors="pt") | |
outputs = model.generate(inputs, max_length=50, do_sample=True) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Return both the response and the identified topics as separate values | |
return response, ", ".join(topic_names) | |
def main(): | |
with gr.Blocks() as demo: | |
gr.Markdown("### Child-Safe Chatbot | BETA") | |
gr.Markdown("This chatbot uses BERTopic to identify topics in your messages and ensures the conversation stays relevant.") | |
with gr.Row(): | |
message_input = gr.Textbox(label="Your Message") | |
topics_dropdown = gr.Dropdown(choices=topics_list, label="Select Topics", multiselect=True) | |
submit_btn = gr.Button("Send") | |
response_output = gr.Textbox(label="Bot Response") | |
topics_output = gr.Textbox(label="Identified Topics", placeholder="Topics will be displayed here...") | |
submit_btn.click( | |
fn=generate_response, | |
inputs=[message_input, topics_dropdown], | |
outputs=[response_output, topics_output] | |
) | |
demo.launch() |