aditijuluri commited on
Commit
12c74ba
·
verified ·
1 Parent(s): 0abe804

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -7,11 +7,11 @@ import random
7
  # Load model for chat
8
  client = InferenceClient("Qwen/Qwen2.5-72B-Instruct")
9
 
10
- # Read the TV show database
11
  with open("reconext_file.txt", "r", encoding="utf-8") as file:
12
  reconext_file_text = file.read()
13
 
14
- # Preprocess text chunks
15
  def preprocess_text(text):
16
  cleaned_text = text.strip()
17
  chunks = cleaned_text.split("\n")
@@ -24,7 +24,7 @@ def preprocess_text(text):
24
 
25
  cleaned_chunks = preprocess_text(reconext_file_text)
26
 
27
- # Create embeddings
28
  model = SentenceTransformer('all-MiniLM-L6-v2')
29
 
30
  def create_embeddings(text_chunks):
@@ -33,7 +33,7 @@ def create_embeddings(text_chunks):
33
 
34
  chunk_embeddings = create_embeddings(cleaned_chunks)
35
 
36
- # Semantic search function
37
  def get_top_chunks(query, chunk_embeddings, text_chunks):
38
  query_embedding = model.encode(query, convert_to_tensor=True)
39
  query_embedding_normalized = query_embedding / query_embedding.norm()
@@ -43,17 +43,18 @@ def get_top_chunks(query, chunk_embeddings, text_chunks):
43
  top_chunks = [text_chunks[i] for i in top_indices]
44
  return top_chunks
45
 
46
- # Chat response function
47
  def respond(message, history):
48
  best_next_watch = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
49
  str_watch_chunks = "\n".join(best_next_watch)
 
50
  messages = [
51
  {
52
  "role": "system",
53
  "content": (
54
  "You are a Gen Z and Gen Alpha-friendly chatbot that helps teenagers find their next best TV show to watch. "
55
- "Speak naturally and casually, like someone from Gen Z. Only recommend TV shows, never movies. Use only the shows in our database "
56
- "YOU CAN NEVER USE OUTSIDE DATA ONLY TAKE DATA FROM OUR DATABASE! Match show suggestions to the user's age using TV ratings: "
57
  "TV-G is for all ages, TV-PG is for ages 6 and up, TV-14 is for 14 and up, and TV-MA is for 18 and up. "
58
  "If they don’t share their age, assume they’re Gen Z or Gen Alpha and use those guidelines. "
59
  "If the user is not Gen Z or Gen Alpha, you can recommend any show from the database. "
@@ -65,18 +66,21 @@ def respond(message, history):
65
  )
66
  }
67
  ]
 
68
  if history:
69
  messages.extend(history)
 
70
  messages.append({"role": "user", "content": message})
71
 
72
  response = client.chat_completion(
73
  messages, max_tokens=700, temperature=1.3, top_p=0.6
74
  )
 
75
  return response['choices'][0]['message']['content'].strip()
76
 
77
  # Initial chatbot message
78
  initial_message = [("🤖", "Hey! I’m your Gen-Z watch buddy.\nI help you find your next favorite TV show based on what you like, your age, or your favorite genre. Just tell me what you're into!")]
79
 
80
- # Gradio chat interface
81
  chatbot = gr.ChatInterface(respond, type="messages", chatbot=initial_message)
82
  chatbot.launch()
 
7
  # Load model for chat
8
  client = InferenceClient("Qwen/Qwen2.5-72B-Instruct")
9
 
10
+ # Step 2: Load TV show database
11
  with open("reconext_file.txt", "r", encoding="utf-8") as file:
12
  reconext_file_text = file.read()
13
 
14
+ # Step 3: Preprocess the text
15
  def preprocess_text(text):
16
  cleaned_text = text.strip()
17
  chunks = cleaned_text.split("\n")
 
24
 
25
  cleaned_chunks = preprocess_text(reconext_file_text)
26
 
27
+ # Step 4: Create embeddings
28
  model = SentenceTransformer('all-MiniLM-L6-v2')
29
 
30
  def create_embeddings(text_chunks):
 
33
 
34
  chunk_embeddings = create_embeddings(cleaned_chunks)
35
 
36
+ # Step 5: Semantic search
37
  def get_top_chunks(query, chunk_embeddings, text_chunks):
38
  query_embedding = model.encode(query, convert_to_tensor=True)
39
  query_embedding_normalized = query_embedding / query_embedding.norm()
 
43
  top_chunks = [text_chunks[i] for i in top_indices]
44
  return top_chunks
45
 
46
+ # Response function
47
  def respond(message, history):
48
  best_next_watch = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
49
  str_watch_chunks = "\n".join(best_next_watch)
50
+
51
  messages = [
52
  {
53
  "role": "system",
54
  "content": (
55
  "You are a Gen Z and Gen Alpha-friendly chatbot that helps teenagers find their next best TV show to watch. "
56
+ "Speak naturally and casually, like someone from Gen Z. Only recommend TV shows, never movies. Use only the shows in our database. "
57
+ "YOU CAN NEVER USE OUTSIDE DATA ONLY TAKE DATA FROM OUR DATABASE! Match show suggestions to the user's age using TV ratings: "
58
  "TV-G is for all ages, TV-PG is for ages 6 and up, TV-14 is for 14 and up, and TV-MA is for 18 and up. "
59
  "If they don’t share their age, assume they’re Gen Z or Gen Alpha and use those guidelines. "
60
  "If the user is not Gen Z or Gen Alpha, you can recommend any show from the database. "
 
66
  )
67
  }
68
  ]
69
+
70
  if history:
71
  messages.extend(history)
72
+
73
  messages.append({"role": "user", "content": message})
74
 
75
  response = client.chat_completion(
76
  messages, max_tokens=700, temperature=1.3, top_p=0.6
77
  )
78
+
79
  return response['choices'][0]['message']['content'].strip()
80
 
81
  # Initial chatbot message
82
  initial_message = [("🤖", "Hey! I’m your Gen-Z watch buddy.\nI help you find your next favorite TV show based on what you like, your age, or your favorite genre. Just tell me what you're into!")]
83
 
84
+ # Launch Gradio interface with initial message
85
  chatbot = gr.ChatInterface(respond, type="messages", chatbot=initial_message)
86
  chatbot.launch()