Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
import pickle | |
import pandas as pd | |
from sentence_transformers import SentenceTransformer | |
from sklearn.preprocessing import normalize | |
from sklearn.metrics.pairwise import cosine_similarity | |
# Load the pre-trained embedding model (SentenceTransformer) | |
model = SentenceTransformer('nomic-ai/nomic-embed-text-v1') | |
# Load BART summarization model and tokenizer | |
model_bart = torch.hub.load('pytorch/fairseq', 'bart.large.cnn') | |
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', 'facebook/bart-large-cnn') | |
# Load normalized embeddings from the pkl file | |
with open('normalized_embeddings.pkl', 'rb') as f: | |
normalized_embeddings = pickle.load(f) | |
# Load the hotel dataset (processed) | |
df_copy_first_1000 = pd.read_csv('hotel_dataset_processed.csv') | |
# Function to summarize text using BART | |
def summarize_text(text): | |
inputs = tokenizer.encode("" + text, return_tensors="pt", max_length=1024, truncation=True) | |
summary_ids = model_bart.generate(inputs, max_length=150, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True) | |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
return summary | |
# Function to search hotels | |
def search_hotels(query_text, k=1): | |
try: | |
# Encode the query text using SentenceTransformer | |
query_embedding = model.encode(query_text, convert_to_tensor=True) | |
query_embedding = query_embedding.cpu().numpy().reshape(1, -1) | |
query_embedding = normalize(query_embedding, norm='l2') | |
# Compute cosine similarity between query embedding and stored embeddings | |
similarities = cosine_similarity(query_embedding, normalized_embeddings) | |
# Get indices of the top k similar hotels | |
top_indices = similarities[0].argsort()[-k:][::-1] | |
# Retrieve the top k similar hotels | |
top_hotels = df_copy_first_1000.iloc[top_indices] | |
# Prepare results | |
results = [] | |
for _, row in top_hotels.iterrows(): | |
# Create a summary for the hotel details | |
summary_text = ( | |
f"Description: {row['hotel_description']}\n" | |
f"Review Title: {row['review_title']}\n" | |
f"Review Text: {row['review_text']}\n" | |
f"Review Count: {row['review_count']}" | |
) | |
summary = summarize_text(summary_text) | |
result = ( | |
f"Hotel Name: {row['hotel_name']}\n" | |
f"Locality: {row['locality']}\n" | |
f"Price Range: {row['price_range']}\n" | |
f"Rate: {row['rate']}\n" | |
f"\nSummary:\n{summary}\n" | |
) | |
results.append(result) | |
return "\n\n".join(results) | |
except Exception as e: | |
return f"An error occurred during the search: {e}" | |
# Gradio Interface | |
iface = gr.Interface( | |
fn=search_hotels, | |
inputs=gr.Textbox(label="Enter your search query"), | |
outputs="text", | |
title="Hotel Search Engine", | |
description="Enter a query to search for hotels and get details about the top results." | |
) | |
# Launch Gradio Interface | |
iface.launch() | |