Spaces:

tuwaiq-allam
/

Group1-Hotel_Search

Sleeping

App Files Files Community

Suku0 commited on Aug 13, 2024

Commit

228303a

verified ·

1 Parent(s): 4a05925

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -53

app.py CHANGED Viewed

@@ -15,49 +15,42 @@ openai.api_key = os.getenv("OPENAI_API_KEY")
 nltk.download('punkt')
 nltk.download('punkt_tab')
 df = pd.read_pickle("hotels_data.pkl")
-model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5",trust_remote_code=True)
 def bm25_rank(query, df, n=15):
-  tokenized_corpus = [word_tokenize(doc.lower()) for doc in df['combined']]
-  bm25 = BM25Okapi(tokenized_corpus)
-  tokenized_query = word_tokenize(query.lower())
-  scores = bm25.get_scores(tokenized_query)
-  df['bm25_scores'] = scores
-  top_results = df.nlargest(n, 'bm25_scores')
-  return top_results
 def search(query, df):
-  n = 5
-  query_embedding = model.encode(query)
-  df = bm25_rank(query, df)
-  df["similarity"] = df.embeddings.apply(lambda x: cosine_similarity(x, query_embedding.reshape(768,-1)))
-  results = (
-      df.sort_values("similarity", ascending=False)
-      .head(n))
-  resultlist = []
-  hlist = []
-  for r in results.index:
-      if results.hotel_name[r] not in hlist:
-          smalldf = results.loc[results.hotel_name == results.hotel_name[r]]
-          if smalldf.shape[1] > 3:
-            smalldf = smalldf[:3]
-          resultlist.append(
-          {
-            "hotel_name":results.hotel_name[r],
-            # "City":results.locality[r],
-            # "Country":results. country[r],
-            # "Score": smalldf.similarity[r][0],
-            "image_url": smalldf.hotel_image[r],
-            "score": smalldf.rate[r],
-            "description": smalldf.hotel_description[r],
-            "relevant_reviews": [ smalldf.review_text[s] for s in smalldf.index]
-          })
-          hlist.append(results.hotel_name[r])
-  return resultlist
 def get_hotel_info(query):
     try:
@@ -72,8 +65,8 @@ def get_hotel_info(query):
         if df_filtred.shape[0] == 0:
             df_filtred = df
     else:
-      city = None
-      df_filtred = df
     results = search(query, df_filtred)
     response = []
     for result in results:
@@ -82,33 +75,22 @@ def get_hotel_info(query):
             'hotel_name': result['hotel_name'],
             'score': result['score'],
             'description': result['description'],
-            'relevent_reviews': result['relevant_reviews']
         })
     return response
-# def format_response(hotel_info):
-#     response = f"**Hotel Name**: {hotel_info['hotel_name']}\n"
-#     response += f"**Score**: {hotel_info['score']}\n"
-#     response += f"**Description**: {hotel_info['description']}\n"
-#     response += f"![Hotel Image]({hotel_info['image_url']})\n"
-#     return response
 def generate_answer(query, context):
     prompt = f"""
     Based on the following query from a user, please generate a detailed answer based on the context
     focusing on which is the top hotel based on the query. You should respond as if you are a travel agent and are conversing with the
     user in a nice cordial way. Remove any special characters and (\\n), make the output clean and concise.
     ###########
     query:
     "{query}"
     ########
-    context:"
     "{context}"
     #####
     Return in Markdown format with each hotel highlighted.
     """

 nltk.download('punkt')
 nltk.download('punkt_tab')
 df = pd.read_pickle("hotels_data.pkl")
+model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
 def bm25_rank(query, df, n=15):
+    tokenized_corpus = [word_tokenize(doc.lower()) for doc in df['combined']]
+    bm25 = BM25Okapi(tokenized_corpus)
+    tokenized_query = word_tokenize(query.lower())
+    scores = bm25.get_scores(tokenized_query)
+    df['bm25_scores'] = scores
+    top_results = df.nlargest(n, 'bm25_scores')
+    return top_results
 def search(query, df):
+    n = 5
+    query_embedding = model.encode(query)
+    df = bm25_rank(query, df)
+    df["similarity"] = df.embeddings.apply(lambda x: cosine_similarity(x, query_embedding.reshape(768, -1)))
+    results = df.sort_values("similarity", ascending=False).head(n)
+    resultlist = []
+    hlist = []
+    for r in results.index:
+        if results.hotel_name[r] not in hlist:
+            smalldf = results.loc[results.hotel_name == results.hotel_name[r]]
+            if smalldf.shape[1] > 3:
+                smalldf = smalldf[:3]
+            resultlist.append({
+                "hotel_name": results.hotel_name[r],
+                "image_url": smalldf.hotel_image[r],
+                "score": smalldf.rate[r],
+                "description": smalldf.hotel_description[r],
+                "relevant_reviews": [smalldf.review_text[s] for s in smalldf.index]
+            })
+            hlist.append(results.hotel_name[r])
+    return resultlist
 def get_hotel_info(query):
     try:
         if df_filtred.shape[0] == 0:
             df_filtred = df
     else:
+        city = None
+        df_filtred = df
     results = search(query, df_filtred)
     response = []
     for result in results:
             'hotel_name': result['hotel_name'],
             'score': result['score'],
             'description': result['description'],
+            'relevant_reviews': result['relevant_reviews']
         })
     return response
 def generate_answer(query, context):
     prompt = f"""
     Based on the following query from a user, please generate a detailed answer based on the context
     focusing on which is the top hotel based on the query. You should respond as if you are a travel agent and are conversing with the
     user in a nice cordial way. Remove any special characters and (\\n), make the output clean and concise.
     ###########
     query:
     "{query}"
     ########
+    context:
     "{context}"
     #####
     Return in Markdown format with each hotel highlighted.
     """