hatim00101 commited on
Commit
6c9917e
·
verified ·
1 Parent(s): bad0e21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -7
app.py CHANGED
@@ -1,3 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
  # Function to summarize text using BART
3
  def summarize_text(text):
@@ -6,14 +27,15 @@ def summarize_text(text):
6
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
7
  return summary
8
 
 
9
  def search_hotels(query_text, k=1):
10
  try:
11
- # Encode the query text
12
  query_embedding = model.encode(query_text, convert_to_tensor=True)
13
  query_embedding = query_embedding.cpu().numpy().reshape(1, -1)
14
  query_embedding = normalize(query_embedding, norm='l2')
15
 
16
- # Compute cosine similarity between query and stored embeddings
17
  similarities = cosine_similarity(query_embedding, normalized_embeddings)
18
 
19
  # Get indices of the top k similar hotels
@@ -25,8 +47,13 @@ def search_hotels(query_text, k=1):
25
  # Prepare results
26
  results = []
27
  for _, row in top_hotels.iterrows():
28
- # Create a summary for the description, review title, review text, and review count
29
- summary_text = f"Description: {row['hotel_description']}\nReview Title: {row['review_title']}\nReview Text: {row['review_text']}\nReview Count: {row['review_count']}"
 
 
 
 
 
30
  summary = summarize_text(summary_text)
31
 
32
  result = (
@@ -34,7 +61,7 @@ def search_hotels(query_text, k=1):
34
  f"Locality: {row['locality']}\n"
35
  f"Price Range: {row['price_range']}\n"
36
  f"Rate: {row['rate']}\n"
37
- f"\n {summary}\n"
38
  )
39
  results.append(result)
40
 
@@ -53,5 +80,4 @@ iface = gr.Interface(
53
  )
54
 
55
  # Launch Gradio Interface
56
- if __name__ == "__main__":
57
- iface.launch()
 
1
+ import gradio as gr
2
+ import torch
3
+ import pickle
4
+ import pandas as pd
5
+ from sentence_transformers import SentenceTransformer
6
+ from sklearn.preprocessing import normalize
7
+ from sklearn.metrics.pairwise import cosine_similarity
8
+
9
+ # Load the pre-trained embedding model (SentenceTransformer)
10
+ model = SentenceTransformer('nomic-ai/nomic-embed-text-v1')
11
+
12
+ # Load BART summarization model and tokenizer
13
+ model_bart = torch.hub.load('pytorch/fairseq', 'bart.large.cnn')
14
+ tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', 'facebook/bart-large-cnn')
15
+
16
+ # Load normalized embeddings from the pkl file
17
+ with open('normalized_embeddings.pkl', 'rb') as f:
18
+ normalized_embeddings = pickle.load(f)
19
+
20
+ # Load the hotel dataset (processed)
21
+ df_copy_first_1000 = pd.read_csv('hotel_dataset_processed.csv')
22
 
23
  # Function to summarize text using BART
24
  def summarize_text(text):
 
27
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
28
  return summary
29
 
30
+ # Function to search hotels
31
  def search_hotels(query_text, k=1):
32
  try:
33
+ # Encode the query text using SentenceTransformer
34
  query_embedding = model.encode(query_text, convert_to_tensor=True)
35
  query_embedding = query_embedding.cpu().numpy().reshape(1, -1)
36
  query_embedding = normalize(query_embedding, norm='l2')
37
 
38
+ # Compute cosine similarity between query embedding and stored embeddings
39
  similarities = cosine_similarity(query_embedding, normalized_embeddings)
40
 
41
  # Get indices of the top k similar hotels
 
47
  # Prepare results
48
  results = []
49
  for _, row in top_hotels.iterrows():
50
+ # Create a summary for the hotel details
51
+ summary_text = (
52
+ f"Description: {row['hotel_description']}\n"
53
+ f"Review Title: {row['review_title']}\n"
54
+ f"Review Text: {row['review_text']}\n"
55
+ f"Review Count: {row['review_count']}"
56
+ )
57
  summary = summarize_text(summary_text)
58
 
59
  result = (
 
61
  f"Locality: {row['locality']}\n"
62
  f"Price Range: {row['price_range']}\n"
63
  f"Rate: {row['rate']}\n"
64
+ f"\nSummary:\n{summary}\n"
65
  )
66
  results.append(result)
67
 
 
80
  )
81
 
82
  # Launch Gradio Interface
83
+ iface.launch()