aditijuluri commited on
Commit
3e7a243
·
verified ·
1 Parent(s): 86faa0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -75
app.py CHANGED
@@ -1,96 +1,97 @@
1
- from huggingface_hub import InferenceClient
2
-
3
- #step 1 from semantic search
4
  from sentence_transformers import SentenceTransformer
5
  import torch
6
  import gradio as gr
7
- import random
8
 
 
9
  client = InferenceClient("Qwen/Qwen2.5-72B-Instruct")
10
- #step 2 from semantic search read file
11
- # Open the water_cycle.txt file in read mode with UTF-8 encoding
12
  with open("reconext_file.txt", "r", encoding="utf-8") as file:
13
- # Read the entire contents of the file and store it in a variable
14
- reconext_file_text = file.read()
15
- # Print the text below
16
- print(reconext_file_text)
17
- #step 3 from semantix search
18
  def preprocess_text(text):
19
- # Strip extra whitespace from the beginning and the end of the text
20
- cleaned_text = text.strip()
21
- # Split the cleaned_text by every newline character (\n)
22
- chunks = cleaned_text.split("\n")
23
- # Create an empty list to store cleaned chunks
24
- cleaned_chunks = []
25
- # Write your for-in loop below to clean each chunk and add it to the cleaned_chunks list
26
- for chunk in chunks:
27
- clean_chunk = chunk.strip()
28
- if(len(clean_chunk) >= 0):
29
- cleaned_chunks.append(clean_chunk)
30
- # Print cleaned_chunks
31
- print(cleaned_chunks)
32
- # Print the length of cleaned_chunks
33
- print(len(cleaned_chunks))
34
- # Return the cleaned_chunks
35
- return cleaned_chunks
36
- # Call the preprocess_text function and store the result in a cleaned_chunks variable
37
- cleaned_chunks = preprocess_text(reconext_file_text) # Complete this line
38
- #step 4 from semantic search
39
- # Load the pre-trained embedding model that converts text to vectors
40
  model = SentenceTransformer('all-MiniLM-L6-v2')
 
41
  def create_embeddings(text_chunks):
42
- # Convert each text chunk into a vector embedding and store as a tensor
43
- chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True) # Replace ... with the text_chunks list
44
- # Print the chunk embeddings
45
- print(chunk_embeddings)
46
- # Print the shape of chunk_embeddings
47
- print(chunk_embeddings.shape)
48
- # Return the chunk_embeddings
49
- return chunk_embeddings
50
- # Call the create_embeddings function and store the result in a new chunk_embeddings variable
51
- chunk_embeddings = create_embeddings(cleaned_chunks) # Complete this line
52
- #step 5 from semantic search
53
- # Define a function to find the most relevant text chunks for a given query, chunk_embeddings, and text_chunks
54
  def get_top_chunks(query, chunk_embeddings, text_chunks):
55
- # Convert the query text into a vector embedding
56
- query_embedding = model.encode(query, convert_to_tensor=True) # Complete this line
57
- # Normalize the query embedding to unit length for accurate similarity comparison
58
- query_embedding_normalized = query_embedding / query_embedding.norm()
59
- # Normalize all chunk embeddings to unit length for consistent comparison
60
- chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
61
- # Calculate cosine similarity between query and all chunks using matrix multiplication
62
- similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized) # Complete this line
63
- # Print the similarities
64
- print(similarities)
65
- # Find the indices of the 3 chunks with highest similarity scores
66
- top_indices = torch.topk(similarities, k=3).indices
67
- # Print the top indices
68
- print(top_indices)
69
- # Create an empty list to store the most relevant chunks
70
- top_chunks = []
71
- # Loop through the top indices and retrieve the corresponding text chunks
72
- for i in top_indices:
73
- top_chunks.append(text_chunks[i])
74
- # Return the list of most relevant chunks
75
- return top_chunks
76
  def respond(message, history):
77
  best_next_watch = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
78
- print(best_next_watch)
79
  str_watch_chunks = "\n".join(best_next_watch)
 
 
80
  messages = [
81
- {"role":"system",
82
- "content": "You are a gen-z helpful chatbot that helps teenagers find their next best watch, speak in gen-z terms and be natural. You should answer the users question based on " + str_watch_chunks + " ."
 
 
 
 
83
  }
84
  ]
85
  if history:
86
  messages.extend(history)
87
- messages.append(
88
- {'role':'user',
89
- 'content':message}
90
- )
91
  response = client.chat_completion(
92
- messages, max_tokens = 300, temperature=1.3, top_p=0.6
93
  )
94
  return response['choices'][0]['message']['content'].strip()
95
- chatbot = gr.ChatInterface(respond, type="messages")
96
- chatbot.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import InferenceClient
 
 
2
  from sentence_transformers import SentenceTransformer
3
  import torch
4
  import gradio as gr
 
5
 
6
+ # Initialize the Hugging Face Inference Client
7
  client = InferenceClient("Qwen/Qwen2.5-72B-Instruct")
8
+
9
+ # Step 1: Load and preprocess the context file
10
  with open("reconext_file.txt", "r", encoding="utf-8") as file:
11
+ reconext_file_text = file.read()
12
+
 
 
 
13
  def preprocess_text(text):
14
+ cleaned_text = text.strip()
15
+ chunks = cleaned_text.split("\n")
16
+ cleaned_chunks = [chunk.strip() for chunk in chunks if len(chunk.strip()) > 0]
17
+ return cleaned_chunks
18
+
19
+ cleaned_chunks = preprocess_text(reconext_file_text)
20
+
21
+ # Step 2: Create embeddings for the text chunks
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  model = SentenceTransformer('all-MiniLM-L6-v2')
23
+
24
  def create_embeddings(text_chunks):
25
+ chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True)
26
+ return chunk_embeddings
27
+
28
+ chunk_embeddings = create_embeddings(cleaned_chunks)
29
+
30
+ # Step 3: Semantic search to get relevant chunks for a user query
 
 
 
 
 
 
31
  def get_top_chunks(query, chunk_embeddings, text_chunks):
32
+ query_embedding = model.encode(query, convert_to_tensor=True)
33
+ query_embedding_normalized = query_embedding / query_embedding.norm()
34
+ chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
35
+ similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized)
36
+ top_indices = torch.topk(similarities, k=3).indices
37
+ top_chunks = [text_chunks[i] for i in top_indices]
38
+ return top_chunks
39
+
40
+ # Step 4: Generate a response using the Hugging Face model
 
 
 
 
 
 
 
 
 
 
 
 
41
  def respond(message, history):
42
  best_next_watch = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
 
43
  str_watch_chunks = "\n".join(best_next_watch)
44
+
45
+ # Build messages for prompt
46
  messages = [
47
+ {
48
+ "role": "system",
49
+ "content": (
50
+ "You are a gen-z helpful chatbot that helps teenagers find their next best watch. "
51
+ "Speak in gen-z terms and be natural. Answer the user's question based on:\n" + str_watch_chunks
52
+ )
53
  }
54
  ]
55
  if history:
56
  messages.extend(history)
57
+
58
+ messages.append({"role": "user", "content": message})
59
+
 
60
  response = client.chat_completion(
61
+ messages, max_tokens=300, temperature=1.3, top_p=0.6
62
  )
63
  return response['choices'][0]['message']['content'].strip()
64
+
65
+ # Step 5: Create the Gradio interface
66
+ with gr.Blocks() as demo:
67
+ chatbot = gr.Chatbot()
68
+ msg = gr.Textbox(placeholder="Ask me what to watch...", label="Your Message")
69
+ state = gr.State([]) # Track conversation history
70
+
71
+ # Initial assistant message
72
+ def startup():
73
+ greeting = (
74
+ "Yo! I'm your binge buddy. 🎬🔥 Just tell me what vibe you're feelin' "
75
+ "and I’ll hook you up with the next thing to watch. Let's get it!"
76
+ )
77
+ return [("", greeting)], [{"role": "assistant", "content": greeting}]
78
+
79
+ # Chat handler
80
+ def user_message(message, history):
81
+ bot_response = respond(message, history)
82
+ history.append({"role": "user", "content": message})
83
+ history.append({"role": "assistant", "content": bot_response})
84
+
85
+ # Format history for display in Chatbot
86
+ display = []
87
+ for i in range(1, len(history), 2):
88
+ display.append((history[i-1]["content"], history[i]["content"]))
89
+ return display, history
90
+
91
+ # Load initial greeting
92
+ demo.load(startup, outputs=[chatbot, state])
93
+
94
+ # Respond to user input
95
+ msg.submit(fn=user_message, inputs=[msg, state], outputs=[chatbot, state])
96
+
97
+ demo.launch()