Spaces:
Sleeping
Sleeping
import gradio as gr | |
from huggingface_hub import InferenceClient | |
from sentence_transformers import SentenceTransformer | |
import pandas as pd | |
from rank_bm25 import BM25Okapi | |
from nltk.tokenize import word_tokenize | |
import nltk | |
from openai.embeddings_utils import cosine_similarity | |
import spacy | |
from spacy.cli import download | |
import os | |
import openai | |
try: | |
nlp = spacy.load('en_core_web_sm') | |
except IOError: | |
download('en_core_web_sm') | |
nlp = spacy.load('en_core_web_sm') | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
nltk.download('punkt') | |
nltk.download('punkt_tab') | |
df = pd.read_pickle("hotels_data.pkl") | |
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True) | |
def bm25_rank(query, df, n=15): | |
tokenized_corpus = [word_tokenize(doc.lower()) for doc in df['combined']] | |
bm25 = BM25Okapi(tokenized_corpus) | |
tokenized_query = word_tokenize(query.lower()) | |
scores = bm25.get_scores(tokenized_query) | |
df['bm25_scores'] = scores | |
top_results = df.nlargest(n, 'bm25_scores') | |
return top_results | |
def search(query, df): | |
n = 5 | |
query_embedding = model.encode(query) | |
df = bm25_rank(query, df) | |
df["similarity"] = df.embeddings.apply(lambda x: cosine_similarity(x, query_embedding.reshape(768, -1))) | |
results = df.sort_values("similarity", ascending=False).head(n) | |
resultlist = [] | |
hlist = [] | |
for r in results.index: | |
if results.hotel_name[r] not in hlist: | |
smalldf = results.loc[results.hotel_name == results.hotel_name[r]] | |
if smalldf.shape[1] > 3: | |
smalldf = smalldf[:3] | |
resultlist.append({ | |
"hotel_name": results.hotel_name[r], | |
"image_url": smalldf.hotel_image[r], | |
"score": smalldf.rate[r], | |
"description": smalldf.hotel_description[r], | |
"relevant_reviews": [smalldf.review_text[s] for s in smalldf.index] | |
}) | |
hlist.append(results.hotel_name[r]) | |
return resultlist | |
def get_hotel_info(query): | |
doc = nlp(query) | |
if doc.ents: | |
city = str(doc.ents[0]).lower().capitalize() | |
df_filtred = df[df['locality'] == city] | |
if df_filtred.shape[0] == 0: | |
df_filtred = df | |
else: | |
city = None | |
df_filtred = df | |
results = search(query, df_filtred) | |
response = [] | |
for result in results: | |
response.append({ | |
'image_url': result['image_url'], | |
'hotel_name': result['hotel_name'], | |
'score': result['score'], | |
'description': result['description'], | |
'relevant_reviews': result['relevant_reviews'] | |
}) | |
return response | |
def generate_answer(query, context): | |
prompt = f""" | |
Based on the following query from a user, please generate a detailed answer based on the context | |
focusing on which is the top hotel based on the query. You should respond as if you are a travel agent and are conversing with the | |
user in a nice cordial way. Remove any special characters and (\\n), make the output clean and concise. | |
########### | |
query: | |
"{query}" | |
######## | |
context: | |
"{context}" | |
##### | |
Return in Markdown format with each hotel highlighted. | |
""" | |
messages = [ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": prompt} | |
] | |
response = openai.ChatCompletion.create( | |
model="gpt-4o-mini", | |
max_tokens=1500, | |
n=1, | |
stop=None, | |
temperature=0.2, # Higher temperature means more creative or more hallucination | |
messages=messages | |
) | |
# Extract the generated response from the API response | |
generated_text = response.choices[0].message['content'].strip() | |
return generated_text | |
def chatbot_response(message, history): | |
hotel_infos = get_hotel_info(message) | |
if hotel_infos: | |
context = "\n".join([ | |
f"Hotel Name: {info['hotel_name']}, Score: {info['score']}, Description: {info['description']}, Reviews: {info['relevant_reviews']}" | |
for info in hotel_infos | |
]) | |
response = generate_answer(message, context) | |
else: | |
response = "No results found." | |
history.append((message, response)) | |
return history, history | |
with gr.Blocks() as interface: | |
chatbot = gr.Chatbot(label="Hotel Search Chatbot") | |
query_input = gr.Textbox(label="Ask me about hotels!") | |
submit_button = gr.Button("Send") | |
submit_button.click( | |
fn=chatbot_response, | |
inputs=[query_input, chatbot], | |
outputs=[chatbot, chatbot] | |
) | |
interface.launch() | |