Spaces:

tuwaiq-allam
/

TripleSmart

Sleeping

App Files Files Community

shroogawh2 commited on Aug 12, 2024

Commit

bfbc980

verified ·

1 Parent(s): 65413b8

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -176

app.py CHANGED Viewed

@@ -1,183 +1,10 @@
-!pip install datasets
-!pip install gradio
-!pip install -U sentence-transformers rank_bm25
-import json
-import pandas as pd
-import time
-import spacy
-from spacy.lang.en.stop_words import STOP_WORDS
-from string import punctuation
-from collections import Counter
-from heapq import nlargest
-import nltk
-import numpy as np
-from sentence_transformers import SentenceTransformer, util
-from openai.embeddings_utils import get_embedding, cosine_similarity
-from datasets import load_dataset
-ds = load_dataset("traversaal-ai-hackathon/hotel_datasets")
-data=ds['train']
-data=pd.DataFrame(data)
-data.head()
-!apt-get install -y fonts-freefont-ttf
-! pip install --upgrade Pillow
-!pip install ydata_profiling
-#Ceate a Comperhensive  report
-from ydata_profiling import ProfileReport
-EDA_df = ProfileReport(data,minimal=True)
-EDA_df
-data.shape
-data['country'] = data['country'].replace('Türkiye', 'Turkiye')
-data=data.drop_duplicates()
-data["combined_review"] = data.apply(
-    lambda row: ("title: " + row.review_title.strip() + "; " if pd.notna(row.review_title) and row.review_title.strip() else "") +
-                ("review: " + row.review_text.strip() if pd.notna(row.review_text) and row.review_text.strip() else ""),axis=1
-)
-data.head()
-import re
-df_combined = data.copy()
-df_combined['combined_review'] = df_combined['combined_review'].apply(lambda x: re.sub('[^a-zA-z0-9\s]','',str(x)))
-# Translate all the "combined" column to lower case.
-def lower_case(input_str):
-    input_str = input_str.lower()
-    return input_str
-df_combined['combined_review']= df_combined['combined_review'].apply(lambda x: lower_case(x))
-from sentence_transformers import SentenceTransformer #import model
-model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5",trust_remote_code=True)
-import json
-from sentence_transformers import SentenceTransformer, CrossEncoder, util
-import gzip
-import os
-import torch
-embedder =model
-# Use the GPU if available
-if not torch.cuda.is_available():
-    print("Warning: No GPU found. Please add GPU to your notebook")
-else:
-  print("GPU Found!")
-  embedder =  embedder.to('cuda')
-startTime = time.time()
-data["embedding_reviews"] = data.combined_review.apply(lambda x: embedder.encode(x))
-executionTime = (time.time() - startTime)
-print('Execution time in seconds: ' + str(executionTime))
-df_with_embedding["combined_summary"] = df_with_embedding.apply(
-    lambda row: ("hotel_name: " + row.hotel_name.strip() + "; " if pd.notna(row.hotel_name) and row.hotel_name.strip() else "") +
-    ("hotel_description: " + row.hotel_description.strip() + "; " if pd.notna(row.hotel_description) and row.hotel_description.strip() else "") +
-                 ("rating_value: " + str(row.rating_value) if pd.notna(str(row.rating_value)) and row.rating_value else "")+
-                  ("review_count: " + str(row.review_count) if pd.notna(str(row.review_count)) and row.review_count else "")+
-                   ("street_address: " + row.street_address.strip() if pd.notna(row.street_address) and row.street_address.strip() else "")+
-                    ("City: " + row.locality.strip() if pd.notna(row.locality) and row.locality.strip() else "")+
-                     ("country: " + row.country.strip() if pd.notna(row.country) and row.country.strip() else ""), axis=1
-)
-df_with_embedding.head()
-import re
-df_with_embedding2 = df_with_embedding.copy()
-df_with_embedding2['combined_summary'] = df_with_embedding['combined_summary'].apply(lambda x: re.sub('[^a-zA-z0-9\s]','',str(x)))
-# Translate all the "combined" column to lower case.
-def lower_case(input_str):
-    input_str = input_str.lower()
-    return input_str
-df_with_embedding2['combined_summary']= df_with_embedding2['combined_summary'].apply(lambda x: lower_case(x))
-startTime = time.time()
-df_with_embedding2["embedding_summary"] = df_with_embedding2.combined_summary.apply(lambda x: embedder.encode(x))
-executionTime = (time.time() - startTime)
-print('Execution time in seconds: ' + str(executionTime))
-query="I'm looking for a hotel in the center of London with healthy breakfast"
-def search(query):
-  # return the first 15 results ranked by similarity.
-  n = 15
-  # Embedding the query.
-  query_embedding = embedder.encode(query)
-  # Generate the similarity column.
-  df_with_embedding2["similarity"] = (df_with_embedding2.embedding_summary.apply(lambda x: cosine_similarity(x, query_embedding.reshape(768,-1)))+df_with_embedding2.embedding_reviews.apply(lambda x: cosine_similarity(x, query_embedding.reshape(768,-1))))/2
-  results = (
-      df_with_embedding2.sort_values("similarity", ascending=False)
-      .head(n))
-  resultlist = []
-  hlist = []
-  for r in results.index:
-      if results.hotel_name[r] not in hlist:
-          smalldf = results.loc[results.hotel_name == results.hotel_name[r]]
-          if smalldf.shape[1] > 3:
-            smalldf = smalldf[:3]
-          resultlist.append(
-          {
-            "name":results.hotel_name[r],
-            "score": smalldf.similarity[r][0],
-            "rating": smalldf.rating_value[r],
-            "review_count": smalldf.review_count[r],
-            "street_address": smalldf.street_address[r],
-            "city": smalldf.locality[r],
-            "country": smalldf.country[r],
-            "hotel_image":smalldf.hotel_image[r]
-          })
-          hlist.append(results.hotel_name[r])
-  return resultlist
 import gradio as gr
 import json
 def display_hotel_info(query_json_str):
     """This app helps you find hotels based on your search query. Enter a city, location, hotel name or just type what you looking for ."""
     try:
-        query_json = search(query_json_str)  # Assume this function returns a list of hotel data dictionaries
         hotel_infos = []
         image_outputs = []
@@ -185,7 +12,7 @@ def display_hotel_info(query_json_str):
             if not isinstance(hotel, dict):
                 raise ValueError("Expected hotel data to be a dictionary.")
-            # Extract information for each hotel
             name = hotel.get("name", "N/A")
             score = hotel.get("score", 0.0)
             rating = hotel.get("rating", "N/A")
@@ -195,7 +22,7 @@ def display_hotel_info(query_json_str):
             country = hotel.get("country", "N/A")
             hotel_image = hotel.get("hotel_image", None)
-            # Format HTML output for each hotel
             hotel_info = f"""
             <div style="display: flex; align-items: center; margin-bottom: 20px;">
                 <div style="flex: 1;">

 import gradio as gr
 import json
 def display_hotel_info(query_json_str):
     """This app helps you find hotels based on your search query. Enter a city, location, hotel name or just type what you looking for ."""
     try:
+        query_json = search(query_json_str)
         hotel_infos = []
         image_outputs = []
             if not isinstance(hotel, dict):
                 raise ValueError("Expected hotel data to be a dictionary.")
             name = hotel.get("name", "N/A")
             score = hotel.get("score", 0.0)
             rating = hotel.get("rating", "N/A")
             country = hotel.get("country", "N/A")
             hotel_image = hotel.get("hotel_image", None)
             hotel_info = f"""
             <div style="display: flex; align-items: center; margin-bottom: 20px;">
                 <div style="flex: 1;">