hatim00101's picture
Update app.py
6c9917e verified
import gradio as gr
import torch
import pickle
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_similarity
# Load the pre-trained embedding model (SentenceTransformer)
model = SentenceTransformer('nomic-ai/nomic-embed-text-v1')
# Load BART summarization model and tokenizer
model_bart = torch.hub.load('pytorch/fairseq', 'bart.large.cnn')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', 'facebook/bart-large-cnn')
# Load normalized embeddings from the pkl file
with open('normalized_embeddings.pkl', 'rb') as f:
normalized_embeddings = pickle.load(f)
# Load the hotel dataset (processed)
df_copy_first_1000 = pd.read_csv('hotel_dataset_processed.csv')
# Function to summarize text using BART
def summarize_text(text):
inputs = tokenizer.encode("" + text, return_tensors="pt", max_length=1024, truncation=True)
summary_ids = model_bart.generate(inputs, max_length=150, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return summary
# Function to search hotels
def search_hotels(query_text, k=1):
try:
# Encode the query text using SentenceTransformer
query_embedding = model.encode(query_text, convert_to_tensor=True)
query_embedding = query_embedding.cpu().numpy().reshape(1, -1)
query_embedding = normalize(query_embedding, norm='l2')
# Compute cosine similarity between query embedding and stored embeddings
similarities = cosine_similarity(query_embedding, normalized_embeddings)
# Get indices of the top k similar hotels
top_indices = similarities[0].argsort()[-k:][::-1]
# Retrieve the top k similar hotels
top_hotels = df_copy_first_1000.iloc[top_indices]
# Prepare results
results = []
for _, row in top_hotels.iterrows():
# Create a summary for the hotel details
summary_text = (
f"Description: {row['hotel_description']}\n"
f"Review Title: {row['review_title']}\n"
f"Review Text: {row['review_text']}\n"
f"Review Count: {row['review_count']}"
)
summary = summarize_text(summary_text)
result = (
f"Hotel Name: {row['hotel_name']}\n"
f"Locality: {row['locality']}\n"
f"Price Range: {row['price_range']}\n"
f"Rate: {row['rate']}\n"
f"\nSummary:\n{summary}\n"
)
results.append(result)
return "\n\n".join(results)
except Exception as e:
return f"An error occurred during the search: {e}"
# Gradio Interface
iface = gr.Interface(
fn=search_hotels,
inputs=gr.Textbox(label="Enter your search query"),
outputs="text",
title="Hotel Search Engine",
description="Enter a query to search for hotels and get details about the top results."
)
# Launch Gradio Interface
iface.launch()