peterkros commited on
Commit
b58de5b
·
verified ·
1 Parent(s): 75e71dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -23
app.py CHANGED
@@ -14,8 +14,6 @@ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token
14
  model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=huggingface_token)
15
 
16
 
17
- # Load a content moderation pipeline
18
- moderation_pipeline = pipeline("text-classification", model="typeform/mobilebert-uncased-mnli")
19
 
20
  # Function to load bad words from a file
21
  def load_bad_words(filepath):
@@ -28,43 +26,45 @@ bad_words = load_bad_words('badwords.txt') # Adjust the path to your bad words
28
  # List of topics for the dropdown
29
  topics_list = ['Aviation', 'Science', 'Education', 'Air Force Pilot', 'Space Exploration', 'Technology']
30
 
31
- # Initialize BERTopic model
32
- topic_model = BERTopic(embedding_model="all-MiniLM-L6-v2", calculate_probabilities=True, verbose=True)
33
 
34
  def is_inappropriate_or_offtopic(message, selected_topics):
35
- if any(bad_word in message.lower() for bad_word in bad_words):
36
- return True
37
 
38
- # Generate topics from the message
39
- topics, _ = topic_model.fit_transform([message])
 
40
 
41
- # Check if any of the BERTopic-generated topics match the selected topics
42
- generated_topic_words = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1] # Get top word for each topic
43
- if not any(selected_topic.lower() in ' '.join(generated_topic_words).lower() for selected_topic in selected_topics):
44
- return True
45
-
46
- return False
47
-
 
 
 
 
48
 
49
  def generate_response(message, selected_topics):
50
- # Identify BERTopic's topics from the message
51
- topics, probabilities = topic_model.fit_transform([message])
52
- # Get the names or representative words for the identified topics
53
- topic_names = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1] # Adjust as needed
54
-
55
  if is_inappropriate_or_offtopic(message, selected_topics):
56
  response = "Sorry, let's try to keep our conversation focused on positive and relevant topics!"
57
- elif check_content(message):
58
- response = "I'm here to provide a safe and friendly conversation. Let's talk about something else."
59
  else:
60
  inputs = tokenizer.encode(message, return_tensors="pt")
61
  outputs = model.generate(inputs, max_length=50, do_sample=True)
62
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
63
 
64
- # Return both the response and the identified topics as separate values
 
 
 
65
  return response, ", ".join(topic_names)
66
 
67
 
 
68
  def main():
69
  with gr.Blocks() as demo:
70
  gr.Markdown("### Child-Safe Chatbot | BETA")
 
14
  model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=huggingface_token)
15
 
16
 
 
 
17
 
18
  # Function to load bad words from a file
19
  def load_bad_words(filepath):
 
26
  # List of topics for the dropdown
27
  topics_list = ['Aviation', 'Science', 'Education', 'Air Force Pilot', 'Space Exploration', 'Technology']
28
 
29
+ #Load BerTopic model
30
+ topic_model = BERTopic.load("MaartenGr/BERTopic_Wikipedia")
31
 
32
  def is_inappropriate_or_offtopic(message, selected_topics):
33
+ # Assume bad_words loading and check_content function are defined here
 
34
 
35
+ topics, probabilities = topic_model.transform([message])
36
+ # Assuming -1 is the outlier class, filter relevant topics
37
+ relevant_topics = [topic for topic in topics if topic != -1]
38
 
39
+ # Compare generated topics against selected topics
40
+ is_offtopic = True # Default to True, prove it's on topic
41
+ for topic_num in relevant_topics:
42
+ topic_info = topic_model.get_topic(topic_num)
43
+ if topic_info:
44
+ topic_keywords = [word for word, _ in topic_info]
45
+ if any(selected_topic.lower() in topic_keywords for selected_topic in selected_topics):
46
+ is_offtopic = False
47
+ break # If any one of the topics matches, it's not off-topic
48
+
49
+ return is_offtopic or any(bad_word in message.lower() for bad_word in bad_words)
50
 
51
  def generate_response(message, selected_topics):
52
+ # Checks if the message is inappropriate or off-topic
 
 
 
 
53
  if is_inappropriate_or_offtopic(message, selected_topics):
54
  response = "Sorry, let's try to keep our conversation focused on positive and relevant topics!"
 
 
55
  else:
56
  inputs = tokenizer.encode(message, return_tensors="pt")
57
  outputs = model.generate(inputs, max_length=50, do_sample=True)
58
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
59
 
60
+ # Predict topics again to display them (could optimize by reusing earlier prediction)
61
+ topics, _ = topic_model.transform([message])
62
+ topic_names = [topic_model.get_topic(topic)[0][0] for topic in topics if topic != -1] # Top word for each topic
63
+
64
  return response, ", ".join(topic_names)
65
 
66
 
67
+
68
  def main():
69
  with gr.Blocks() as demo:
70
  gr.Markdown("### Child-Safe Chatbot | BETA")