shroogawh2 commited on
Commit
bfbc980
·
verified ·
1 Parent(s): 65413b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -176
app.py CHANGED
@@ -1,183 +1,10 @@
1
-
2
-
3
-
4
- !pip install datasets
5
- !pip install gradio
6
- !pip install -U sentence-transformers rank_bm25
7
-
8
- import json
9
- import pandas as pd
10
- import time
11
- import spacy
12
- from spacy.lang.en.stop_words import STOP_WORDS
13
- from string import punctuation
14
- from collections import Counter
15
- from heapq import nlargest
16
- import nltk
17
- import numpy as np
18
- from sentence_transformers import SentenceTransformer, util
19
- from openai.embeddings_utils import get_embedding, cosine_similarity
20
-
21
-
22
- from datasets import load_dataset
23
-
24
- ds = load_dataset("traversaal-ai-hackathon/hotel_datasets")
25
- data=ds['train']
26
-
27
- data=pd.DataFrame(data)
28
-
29
- data.head()
30
-
31
- !apt-get install -y fonts-freefont-ttf
32
-
33
- ! pip install --upgrade Pillow
34
-
35
- !pip install ydata_profiling
36
-
37
- #Ceate a Comperhensive report
38
- from ydata_profiling import ProfileReport
39
-
40
- EDA_df = ProfileReport(data,minimal=True)
41
- EDA_df
42
-
43
-
44
- data.shape
45
- data['country'] = data['country'].replace('Türkiye', 'Turkiye')
46
-
47
- data=data.drop_duplicates()
48
-
49
- data["combined_review"] = data.apply(
50
- lambda row: ("title: " + row.review_title.strip() + "; " if pd.notna(row.review_title) and row.review_title.strip() else "") +
51
- ("review: " + row.review_text.strip() if pd.notna(row.review_text) and row.review_text.strip() else ""),axis=1
52
- )
53
- data.head()
54
-
55
-
56
- import re
57
-
58
- df_combined = data.copy()
59
-
60
- df_combined['combined_review'] = df_combined['combined_review'].apply(lambda x: re.sub('[^a-zA-z0-9\s]','',str(x)))
61
-
62
- # Translate all the "combined" column to lower case.
63
- def lower_case(input_str):
64
- input_str = input_str.lower()
65
- return input_str
66
-
67
- df_combined['combined_review']= df_combined['combined_review'].apply(lambda x: lower_case(x))
68
-
69
- from sentence_transformers import SentenceTransformer #import model
70
-
71
- model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5",trust_remote_code=True)
72
-
73
-
74
- import json
75
- from sentence_transformers import SentenceTransformer, CrossEncoder, util
76
- import gzip
77
- import os
78
- import torch
79
-
80
- embedder =model
81
-
82
- # Use the GPU if available
83
- if not torch.cuda.is_available():
84
- print("Warning: No GPU found. Please add GPU to your notebook")
85
- else:
86
- print("GPU Found!")
87
- embedder = embedder.to('cuda')
88
-
89
- startTime = time.time()
90
-
91
- data["embedding_reviews"] = data.combined_review.apply(lambda x: embedder.encode(x))
92
-
93
- executionTime = (time.time() - startTime)
94
- print('Execution time in seconds: ' + str(executionTime))
95
-
96
-
97
- df_with_embedding["combined_summary"] = df_with_embedding.apply(
98
- lambda row: ("hotel_name: " + row.hotel_name.strip() + "; " if pd.notna(row.hotel_name) and row.hotel_name.strip() else "") +
99
- ("hotel_description: " + row.hotel_description.strip() + "; " if pd.notna(row.hotel_description) and row.hotel_description.strip() else "") +
100
- ("rating_value: " + str(row.rating_value) if pd.notna(str(row.rating_value)) and row.rating_value else "")+
101
- ("review_count: " + str(row.review_count) if pd.notna(str(row.review_count)) and row.review_count else "")+
102
- ("street_address: " + row.street_address.strip() if pd.notna(row.street_address) and row.street_address.strip() else "")+
103
- ("City: " + row.locality.strip() if pd.notna(row.locality) and row.locality.strip() else "")+
104
- ("country: " + row.country.strip() if pd.notna(row.country) and row.country.strip() else ""), axis=1
105
- )
106
-
107
- df_with_embedding.head()
108
-
109
- import re
110
-
111
- df_with_embedding2 = df_with_embedding.copy()
112
-
113
- df_with_embedding2['combined_summary'] = df_with_embedding['combined_summary'].apply(lambda x: re.sub('[^a-zA-z0-9\s]','',str(x)))
114
-
115
- # Translate all the "combined" column to lower case.
116
- def lower_case(input_str):
117
- input_str = input_str.lower()
118
- return input_str
119
-
120
- df_with_embedding2['combined_summary']= df_with_embedding2['combined_summary'].apply(lambda x: lower_case(x))
121
-
122
-
123
- startTime = time.time()
124
-
125
- df_with_embedding2["embedding_summary"] = df_with_embedding2.combined_summary.apply(lambda x: embedder.encode(x))
126
-
127
- executionTime = (time.time() - startTime)
128
- print('Execution time in seconds: ' + str(executionTime))
129
-
130
- query="I'm looking for a hotel in the center of London with healthy breakfast"
131
-
132
- def search(query):
133
- # return the first 15 results ranked by similarity.
134
- n = 15
135
-
136
- # Embedding the query.
137
- query_embedding = embedder.encode(query)
138
-
139
- # Generate the similarity column.
140
- df_with_embedding2["similarity"] = (df_with_embedding2.embedding_summary.apply(lambda x: cosine_similarity(x, query_embedding.reshape(768,-1)))+df_with_embedding2.embedding_reviews.apply(lambda x: cosine_similarity(x, query_embedding.reshape(768,-1))))/2
141
-
142
-
143
- results = (
144
- df_with_embedding2.sort_values("similarity", ascending=False)
145
- .head(n))
146
-
147
- resultlist = []
148
-
149
-
150
- hlist = []
151
- for r in results.index:
152
- if results.hotel_name[r] not in hlist:
153
- smalldf = results.loc[results.hotel_name == results.hotel_name[r]]
154
- if smalldf.shape[1] > 3:
155
- smalldf = smalldf[:3]
156
-
157
- resultlist.append(
158
- {
159
- "name":results.hotel_name[r],
160
- "score": smalldf.similarity[r][0],
161
- "rating": smalldf.rating_value[r],
162
- "review_count": smalldf.review_count[r],
163
- "street_address": smalldf.street_address[r],
164
- "city": smalldf.locality[r],
165
- "country": smalldf.country[r],
166
- "hotel_image":smalldf.hotel_image[r]
167
- })
168
- hlist.append(results.hotel_name[r])
169
- return resultlist
170
-
171
-
172
-
173
-
174
  import gradio as gr
175
  import json
176
 
177
  def display_hotel_info(query_json_str):
178
  """This app helps you find hotels based on your search query. Enter a city, location, hotel name or just type what you looking for ."""
179
  try:
180
- query_json = search(query_json_str) # Assume this function returns a list of hotel data dictionaries
181
  hotel_infos = []
182
  image_outputs = []
183
 
@@ -185,7 +12,7 @@ def display_hotel_info(query_json_str):
185
  if not isinstance(hotel, dict):
186
  raise ValueError("Expected hotel data to be a dictionary.")
187
 
188
- # Extract information for each hotel
189
  name = hotel.get("name", "N/A")
190
  score = hotel.get("score", 0.0)
191
  rating = hotel.get("rating", "N/A")
@@ -195,7 +22,7 @@ def display_hotel_info(query_json_str):
195
  country = hotel.get("country", "N/A")
196
  hotel_image = hotel.get("hotel_image", None)
197
 
198
- # Format HTML output for each hotel
199
  hotel_info = f"""
200
  <div style="display: flex; align-items: center; margin-bottom: 20px;">
201
  <div style="flex: 1;">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import json
3
 
4
  def display_hotel_info(query_json_str):
5
  """This app helps you find hotels based on your search query. Enter a city, location, hotel name or just type what you looking for ."""
6
  try:
7
+ query_json = search(query_json_str)
8
  hotel_infos = []
9
  image_outputs = []
10
 
 
12
  if not isinstance(hotel, dict):
13
  raise ValueError("Expected hotel data to be a dictionary.")
14
 
15
+
16
  name = hotel.get("name", "N/A")
17
  score = hotel.get("score", 0.0)
18
  rating = hotel.get("rating", "N/A")
 
22
  country = hotel.get("country", "N/A")
23
  hotel_image = hotel.get("hotel_image", None)
24
 
25
+
26
  hotel_info = f"""
27
  <div style="display: flex; align-items: center; margin-bottom: 20px;">
28
  <div style="flex: 1;">