Suku0's picture
Update app.py
03fa2bd verified
import gradio as gr
from huggingface_hub import InferenceClient
from sentence_transformers import SentenceTransformer
import pandas as pd
from rank_bm25 import BM25Okapi
from nltk.tokenize import word_tokenize
import nltk
from openai.embeddings_utils import cosine_similarity
import spacy
from spacy.cli import download
import os
import openai
try:
nlp = spacy.load('en_core_web_sm')
except IOError:
download('en_core_web_sm')
nlp = spacy.load('en_core_web_sm')
openai.api_key = os.getenv("OPENAI_API_KEY")
nltk.download('punkt')
nltk.download('punkt_tab')
df = pd.read_pickle("hotels_data.pkl")
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
def bm25_rank(query, df, n=15):
tokenized_corpus = [word_tokenize(doc.lower()) for doc in df['combined']]
bm25 = BM25Okapi(tokenized_corpus)
tokenized_query = word_tokenize(query.lower())
scores = bm25.get_scores(tokenized_query)
df['bm25_scores'] = scores
top_results = df.nlargest(n, 'bm25_scores')
return top_results
def search(query, df):
n = 5
query_embedding = model.encode(query)
df = bm25_rank(query, df)
df["similarity"] = df.embeddings.apply(lambda x: cosine_similarity(x, query_embedding.reshape(768, -1)))
results = df.sort_values("similarity", ascending=False).head(n)
resultlist = []
hlist = []
for r in results.index:
if results.hotel_name[r] not in hlist:
smalldf = results.loc[results.hotel_name == results.hotel_name[r]]
if smalldf.shape[1] > 3:
smalldf = smalldf[:3]
resultlist.append({
"hotel_name": results.hotel_name[r],
"image_url": smalldf.hotel_image[r],
"score": smalldf.rate[r],
"description": smalldf.hotel_description[r],
"relevant_reviews": [smalldf.review_text[s] for s in smalldf.index]
})
hlist.append(results.hotel_name[r])
return resultlist
def get_hotel_info(query):
doc = nlp(query)
if doc.ents:
city = str(doc.ents[0]).lower().capitalize()
df_filtred = df[df['locality'] == city]
if df_filtred.shape[0] == 0:
df_filtred = df
else:
city = None
df_filtred = df
results = search(query, df_filtred)
response = []
for result in results:
response.append({
'image_url': result['image_url'],
'hotel_name': result['hotel_name'],
'score': result['score'],
'description': result['description'],
'relevant_reviews': result['relevant_reviews']
})
return response
def generate_answer(query, context):
prompt = f"""
Based on the following query from a user, please generate a detailed answer based on the context
focusing on which is the top hotel based on the query. You should respond as if you are a travel agent and are conversing with the
user in a nice cordial way. Remove any special characters and (\\n), make the output clean and concise.
###########
query:
"{query}"
########
context:
"{context}"
#####
Return in Markdown format with each hotel highlighted.
"""
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
response = openai.ChatCompletion.create(
model="gpt-4o-mini",
max_tokens=1500,
n=1,
stop=None,
temperature=0.2, # Higher temperature means more creative or more hallucination
messages=messages
)
# Extract the generated response from the API response
generated_text = response.choices[0].message['content'].strip()
return generated_text
def chatbot_response(message, history):
hotel_infos = get_hotel_info(message)
if hotel_infos:
context = "\n".join([
f"Hotel Name: {info['hotel_name']}, Score: {info['score']}, Description: {info['description']}, Reviews: {info['relevant_reviews']}"
for info in hotel_infos
])
response = generate_answer(message, context)
else:
response = "No results found."
history.append((message, response))
return history, history
with gr.Blocks() as interface:
chatbot = gr.Chatbot(label="Hotel Search Chatbot")
query_input = gr.Textbox(label="Ask me about hotels!")
submit_button = gr.Button("Send")
submit_button.click(
fn=chatbot_response,
inputs=[query_input, chatbot],
outputs=[chatbot, chatbot]
)
interface.launch()