Spaces:
Sleeping
Sleeping
File size: 3,689 Bytes
144e4aa 1883a62 45fd4eb 1883a62 14645ae 144e4aa 6c7b3b8 45fd4eb 6c7b3b8 144e4aa 45fd4eb 144e4aa 45fd4eb 144e4aa 45fd4eb 6c7b3b8 45fd4eb 6c7b3b8 45fd4eb 144e4aa 45fd4eb 144e4aa b1746f2 bc27e82 75e71dc 45fd4eb bc27e82 45fd4eb b1746f2 bc27e82 dec9d6f bc27e82 45fd4eb bc27e82 45fd4eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import gradio as gr
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
import os
from bertopic import BERTopic
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
# Retrieve the token from environment variables
huggingface_token = os.getenv('LLAMA_ACCES_TOKEN')
# Use the token with from_pretrained
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=huggingface_token)
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=huggingface_token)
# Load a content moderation pipeline
moderation_pipeline = pipeline("text-classification", model="typeform/mobilebert-uncased-mnli")
# Function to load bad words from a file
def load_bad_words(filepath):
with open(filepath, 'r', encoding='utf-8') as file:
return [line.strip().lower() for line in file]
# Load bad words list
bad_words = load_bad_words('badwords.txt') # Adjust the path to your bad words file
# List of topics for the dropdown
topics_list = ['Aviation', 'Science', 'Education', 'Air Force Pilot', 'Space Exploration', 'Technology']
# Initialize BERTopic model
topic_model = BERTopic(embedding_model="all-MiniLM-L6-v2", calculate_probabilities=True, verbose=True)
def is_inappropriate_or_offtopic(message, selected_topics):
if any(bad_word in message.lower() for bad_word in bad_words):
return True
# Generate topics from the message
topics, _ = topic_model.fit_transform([message])
# Check if any of the BERTopic-generated topics match the selected topics
generated_topic_words = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1] # Get top word for each topic
if not any(selected_topic.lower() in ' '.join(generated_topic_words).lower() for selected_topic in selected_topics):
return True
return False
def generate_response(message, selected_topics):
# Identify BERTopic's topics from the message
topics, probabilities = topic_model.fit_transform([message])
# Get the names or representative words for the identified topics
topic_names = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1] # Adjust as needed
if is_inappropriate_or_offtopic(message, selected_topics):
response = "Sorry, let's try to keep our conversation focused on positive and relevant topics!"
elif check_content(message):
response = "I'm here to provide a safe and friendly conversation. Let's talk about something else."
else:
inputs = tokenizer.encode(message, return_tensors="pt")
outputs = model.generate(inputs, max_length=50, do_sample=True)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Return both the response and the identified topics as separate values
return response, ", ".join(topic_names)
def main():
with gr.Blocks() as demo:
gr.Markdown("### Child-Safe Chatbot | BETA")
gr.Markdown("This chatbot uses BERTopic to identify topics in your messages and ensures the conversation stays relevant.")
with gr.Row():
message_input = gr.Textbox(label="Your Message")
topics_dropdown = gr.Dropdown(choices=topics_list, label="Select Topics", multiselect=True)
submit_btn = gr.Button("Send")
response_output = gr.Textbox(label="Bot Response")
topics_output = gr.Textbox(label="Identified Topics", placeholder="Topics will be displayed here...")
submit_btn.click(
fn=generate_response,
inputs=[message_input, topics_dropdown],
outputs=[response_output, topics_output]
)
demo.launch() |