Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,11 +7,11 @@ import random
|
|
7 |
# Load model for chat
|
8 |
client = InferenceClient("Qwen/Qwen2.5-72B-Instruct")
|
9 |
|
10 |
-
#
|
11 |
with open("reconext_file.txt", "r", encoding="utf-8") as file:
|
12 |
reconext_file_text = file.read()
|
13 |
|
14 |
-
# Preprocess text
|
15 |
def preprocess_text(text):
|
16 |
cleaned_text = text.strip()
|
17 |
chunks = cleaned_text.split("\n")
|
@@ -24,7 +24,7 @@ def preprocess_text(text):
|
|
24 |
|
25 |
cleaned_chunks = preprocess_text(reconext_file_text)
|
26 |
|
27 |
-
# Create embeddings
|
28 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
29 |
|
30 |
def create_embeddings(text_chunks):
|
@@ -33,7 +33,7 @@ def create_embeddings(text_chunks):
|
|
33 |
|
34 |
chunk_embeddings = create_embeddings(cleaned_chunks)
|
35 |
|
36 |
-
# Semantic search
|
37 |
def get_top_chunks(query, chunk_embeddings, text_chunks):
|
38 |
query_embedding = model.encode(query, convert_to_tensor=True)
|
39 |
query_embedding_normalized = query_embedding / query_embedding.norm()
|
@@ -43,17 +43,18 @@ def get_top_chunks(query, chunk_embeddings, text_chunks):
|
|
43 |
top_chunks = [text_chunks[i] for i in top_indices]
|
44 |
return top_chunks
|
45 |
|
46 |
-
#
|
47 |
def respond(message, history):
|
48 |
best_next_watch = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
|
49 |
str_watch_chunks = "\n".join(best_next_watch)
|
|
|
50 |
messages = [
|
51 |
{
|
52 |
"role": "system",
|
53 |
"content": (
|
54 |
"You are a Gen Z and Gen Alpha-friendly chatbot that helps teenagers find their next best TV show to watch. "
|
55 |
-
"Speak naturally and casually, like someone from Gen Z. Only recommend TV shows, never movies. Use only the shows in our database "
|
56 |
-
"YOU CAN NEVER USE OUTSIDE DATA ONLY TAKE DATA FROM OUR DATABASE! Match show suggestions to the user's age using TV ratings: "
|
57 |
"TV-G is for all ages, TV-PG is for ages 6 and up, TV-14 is for 14 and up, and TV-MA is for 18 and up. "
|
58 |
"If they don’t share their age, assume they’re Gen Z or Gen Alpha and use those guidelines. "
|
59 |
"If the user is not Gen Z or Gen Alpha, you can recommend any show from the database. "
|
@@ -65,18 +66,21 @@ def respond(message, history):
|
|
65 |
)
|
66 |
}
|
67 |
]
|
|
|
68 |
if history:
|
69 |
messages.extend(history)
|
|
|
70 |
messages.append({"role": "user", "content": message})
|
71 |
|
72 |
response = client.chat_completion(
|
73 |
messages, max_tokens=700, temperature=1.3, top_p=0.6
|
74 |
)
|
|
|
75 |
return response['choices'][0]['message']['content'].strip()
|
76 |
|
77 |
# Initial chatbot message
|
78 |
initial_message = [("🤖", "Hey! I’m your Gen-Z watch buddy.\nI help you find your next favorite TV show based on what you like, your age, or your favorite genre. Just tell me what you're into!")]
|
79 |
|
80 |
-
# Gradio
|
81 |
chatbot = gr.ChatInterface(respond, type="messages", chatbot=initial_message)
|
82 |
chatbot.launch()
|
|
|
7 |
# Load model for chat
|
8 |
client = InferenceClient("Qwen/Qwen2.5-72B-Instruct")
|
9 |
|
10 |
+
# Step 2: Load TV show database
|
11 |
with open("reconext_file.txt", "r", encoding="utf-8") as file:
|
12 |
reconext_file_text = file.read()
|
13 |
|
14 |
+
# Step 3: Preprocess the text
|
15 |
def preprocess_text(text):
|
16 |
cleaned_text = text.strip()
|
17 |
chunks = cleaned_text.split("\n")
|
|
|
24 |
|
25 |
cleaned_chunks = preprocess_text(reconext_file_text)
|
26 |
|
27 |
+
# Step 4: Create embeddings
|
28 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
29 |
|
30 |
def create_embeddings(text_chunks):
|
|
|
33 |
|
34 |
chunk_embeddings = create_embeddings(cleaned_chunks)
|
35 |
|
36 |
+
# Step 5: Semantic search
|
37 |
def get_top_chunks(query, chunk_embeddings, text_chunks):
|
38 |
query_embedding = model.encode(query, convert_to_tensor=True)
|
39 |
query_embedding_normalized = query_embedding / query_embedding.norm()
|
|
|
43 |
top_chunks = [text_chunks[i] for i in top_indices]
|
44 |
return top_chunks
|
45 |
|
46 |
+
# Response function
|
47 |
def respond(message, history):
|
48 |
best_next_watch = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
|
49 |
str_watch_chunks = "\n".join(best_next_watch)
|
50 |
+
|
51 |
messages = [
|
52 |
{
|
53 |
"role": "system",
|
54 |
"content": (
|
55 |
"You are a Gen Z and Gen Alpha-friendly chatbot that helps teenagers find their next best TV show to watch. "
|
56 |
+
"Speak naturally and casually, like someone from Gen Z. Only recommend TV shows, never movies. Use only the shows in our database. "
|
57 |
+
"YOU CAN NEVER USE OUTSIDE DATA — ONLY TAKE DATA FROM OUR DATABASE! Match show suggestions to the user's age using TV ratings: "
|
58 |
"TV-G is for all ages, TV-PG is for ages 6 and up, TV-14 is for 14 and up, and TV-MA is for 18 and up. "
|
59 |
"If they don’t share their age, assume they’re Gen Z or Gen Alpha and use those guidelines. "
|
60 |
"If the user is not Gen Z or Gen Alpha, you can recommend any show from the database. "
|
|
|
66 |
)
|
67 |
}
|
68 |
]
|
69 |
+
|
70 |
if history:
|
71 |
messages.extend(history)
|
72 |
+
|
73 |
messages.append({"role": "user", "content": message})
|
74 |
|
75 |
response = client.chat_completion(
|
76 |
messages, max_tokens=700, temperature=1.3, top_p=0.6
|
77 |
)
|
78 |
+
|
79 |
return response['choices'][0]['message']['content'].strip()
|
80 |
|
81 |
# Initial chatbot message
|
82 |
initial_message = [("🤖", "Hey! I’m your Gen-Z watch buddy.\nI help you find your next favorite TV show based on what you like, your age, or your favorite genre. Just tell me what you're into!")]
|
83 |
|
84 |
+
# Launch Gradio interface with initial message
|
85 |
chatbot = gr.ChatInterface(respond, type="messages", chatbot=initial_message)
|
86 |
chatbot.launch()
|