Spaces:

hatim00101
/

hotel_search_engine

Runtime error

App Files Files Community

hotel_search_engine / app.py

hatim00101

Update app.py

6c9917e verified 10 months ago

raw

history blame contribute delete

3.12 kB

	import gradio as gr
	import torch
	import pickle
	import pandas as pd
	from sentence_transformers import SentenceTransformer
	from sklearn.preprocessing import normalize
	from sklearn.metrics.pairwise import cosine_similarity

	# Load the pre-trained embedding model (SentenceTransformer)
	model = SentenceTransformer('nomic-ai/nomic-embed-text-v1')

	# Load BART summarization model and tokenizer
	model_bart = torch.hub.load('pytorch/fairseq', 'bart.large.cnn')
	tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', 'facebook/bart-large-cnn')

	# Load normalized embeddings from the pkl file
	with open('normalized_embeddings.pkl', 'rb') as f:
	normalized_embeddings = pickle.load(f)

	# Load the hotel dataset (processed)
	df_copy_first_1000 = pd.read_csv('hotel_dataset_processed.csv')

	# Function to summarize text using BART
	def summarize_text(text):
	inputs = tokenizer.encode("" + text, return_tensors="pt", max_length=1024, truncation=True)
	summary_ids = model_bart.generate(inputs, max_length=150, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
	summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
	return summary

	# Function to search hotels
	def search_hotels(query_text, k=1):
	try:
	# Encode the query text using SentenceTransformer
	query_embedding = model.encode(query_text, convert_to_tensor=True)
	query_embedding = query_embedding.cpu().numpy().reshape(1, -1)
	query_embedding = normalize(query_embedding, norm='l2')

	# Compute cosine similarity between query embedding and stored embeddings
	similarities = cosine_similarity(query_embedding, normalized_embeddings)

	# Get indices of the top k similar hotels
	top_indices = similarities[0].argsort()[-k:][::-1]

	# Retrieve the top k similar hotels
	top_hotels = df_copy_first_1000.iloc[top_indices]

	# Prepare results
	results = []
	for _, row in top_hotels.iterrows():
	# Create a summary for the hotel details
	summary_text = (
	f"Description: {row['hotel_description']}\n"
	f"Review Title: {row['review_title']}\n"
	f"Review Text: {row['review_text']}\n"
	f"Review Count: {row['review_count']}"
	)
	summary = summarize_text(summary_text)

	result = (
	f"Hotel Name: {row['hotel_name']}\n"
	f"Locality: {row['locality']}\n"
	f"Price Range: {row['price_range']}\n"
	f"Rate: {row['rate']}\n"
	f"\nSummary:\n{summary}\n"
	)
	results.append(result)

	return "\n\n".join(results)

	except Exception as e:
	return f"An error occurred during the search: {e}"

	# Gradio Interface
	iface = gr.Interface(
	fn=search_hotels,
	inputs=gr.Textbox(label="Enter your search query"),
	outputs="text",
	title="Hotel Search Engine",
	description="Enter a query to search for hotels and get details about the top results."
	)

	# Launch Gradio Interface
	iface.launch()