peterkros's picture
Update app.py
75e71dc verified
raw
history blame
3.69 kB
import gradio as gr
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
import os
from bertopic import BERTopic
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
# Retrieve the token from environment variables
huggingface_token = os.getenv('LLAMA_ACCES_TOKEN')
# Use the token with from_pretrained
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=huggingface_token)
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=huggingface_token)
# Load a content moderation pipeline
moderation_pipeline = pipeline("text-classification", model="typeform/mobilebert-uncased-mnli")
# Function to load bad words from a file
def load_bad_words(filepath):
with open(filepath, 'r', encoding='utf-8') as file:
return [line.strip().lower() for line in file]
# Load bad words list
bad_words = load_bad_words('badwords.txt') # Adjust the path to your bad words file
# List of topics for the dropdown
topics_list = ['Aviation', 'Science', 'Education', 'Air Force Pilot', 'Space Exploration', 'Technology']
# Initialize BERTopic model
topic_model = BERTopic(embedding_model="all-MiniLM-L6-v2", calculate_probabilities=True, verbose=True)
def is_inappropriate_or_offtopic(message, selected_topics):
if any(bad_word in message.lower() for bad_word in bad_words):
return True
# Generate topics from the message
topics, _ = topic_model.fit_transform([message])
# Check if any of the BERTopic-generated topics match the selected topics
generated_topic_words = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1] # Get top word for each topic
if not any(selected_topic.lower() in ' '.join(generated_topic_words).lower() for selected_topic in selected_topics):
return True
return False
def generate_response(message, selected_topics):
# Identify BERTopic's topics from the message
topics, probabilities = topic_model.fit_transform([message])
# Get the names or representative words for the identified topics
topic_names = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1] # Adjust as needed
if is_inappropriate_or_offtopic(message, selected_topics):
response = "Sorry, let's try to keep our conversation focused on positive and relevant topics!"
elif check_content(message):
response = "I'm here to provide a safe and friendly conversation. Let's talk about something else."
else:
inputs = tokenizer.encode(message, return_tensors="pt")
outputs = model.generate(inputs, max_length=50, do_sample=True)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Return both the response and the identified topics as separate values
return response, ", ".join(topic_names)
def main():
with gr.Blocks() as demo:
gr.Markdown("### Child-Safe Chatbot | BETA")
gr.Markdown("This chatbot uses BERTopic to identify topics in your messages and ensures the conversation stays relevant.")
with gr.Row():
message_input = gr.Textbox(label="Your Message")
topics_dropdown = gr.Dropdown(choices=topics_list, label="Select Topics", multiselect=True)
submit_btn = gr.Button("Send")
response_output = gr.Textbox(label="Bot Response")
topics_output = gr.Textbox(label="Identified Topics", placeholder="Topics will be displayed here...")
submit_btn.click(
fn=generate_response,
inputs=[message_input, topics_dropdown],
outputs=[response_output, topics_output]
)
demo.launch()