Spaces:
Sleeping
Sleeping
import os | |
from groq import Groq | |
import gradio as gr | |
import numpy as np | |
from sentence_transformers import SentenceTransformer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import sqlite3 | |
import pandas as pd | |
from tqdm import tqdm | |
# Define the SentenceTransformer model globally | |
model = SentenceTransformer('paraphrase-MiniLM-L6-v2') | |
# Get the Groq API key from environment variables | |
client = Groq( | |
api_key="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" | |
) | |
# Generate user embedding using the globally defined model | |
def get_user_embedding(query): | |
try: | |
return model.encode(query) | |
except Exception as e: | |
print(f"Error generating embedding: {e}") | |
return np.zeros(384) # Return a zero-vector of the correct size if there is an error | |
con = sqlite3.connect("file::memory:?cache=shared", check_same_thread=False) | |
con.row_factory = sqlite3.Row | |
cur = con.cursor() | |
# create table if not exists | |
cur.execute(""" | |
CREATE TABLE IF NOT EXISTS places ( | |
Place_Id INTEGER PRIMARY KEY, | |
Place_Name TEXT NOT NULL, | |
Description TEXT, | |
Category TEXT, | |
City TEXT, | |
Price REAL, | |
Rating REAL, | |
Embedding TEXT | |
); | |
""") | |
data = pd.read_csv('dataset/tourism_place.csv') | |
# check if the table is empty | |
cur.execute("SELECT * FROM places") | |
if cur.fetchone() is None: | |
# Store the places in the database | |
for i in tqdm(range(len(data))): | |
cur.execute(""" | |
INSERT INTO places (Place_Name, Description, Category, City, Price, Rating) | |
VALUES (?, ?, ?, ?, ?, ?) | |
""", (data['Place_Name'][i], data['Description'][i], data['Category'][i], data['City'][i], float(data['Price'][i]), float(data['Rating'][i])) | |
) | |
con.commit() | |
# Compute and store embeddings for places using the same model | |
def compute_and_store_embeddings(): | |
cur.execute("SELECT Place_Id, Place_Name, Category, Description, City FROM places") | |
places = cur.fetchall() | |
for place in places: | |
text = f"{place[1]} {place[2]} {place[3]} {place[4]}" | |
embedding = model.encode(text) | |
embedding_str = ','.join([str(x) for x in embedding]) | |
cur.execute("UPDATE places SET Embedding = ? WHERE Place_Id = ?", (embedding_str, place[0])) | |
con.commit() | |
compute_and_store_embeddings() | |
# Normalize user query using Groq VM | |
def normalize_query(user_query): | |
try: | |
response = client.chat.completions.create( | |
model="llama-3.1-8b-instant", | |
messages=[{ | |
"role": "user", | |
"content": f""" | |
Please analyze the query: \"{user_query}\", extract Place name, Category, Description, and City. | |
Return the response as: "Place name, Category, Description, City". | |
""" | |
}] | |
) | |
normalized_user_query = response.choices[0].message.content.split('\n')[-1].strip() | |
return normalized_user_query + str(user_query) | |
except Exception as e: | |
print(f"Error normalizing query: {e}") | |
return "" | |
# Generate user embedding | |
def get_user_embedding(query): | |
try: | |
return model.encode(query) | |
except Exception as e: | |
print(f"Error generating embedding: {e}") | |
return np.zeros(512) | |
# Find similar places | |
def get_similar_places(user_embedding): | |
similarities = [] | |
res = cur.execute("SELECT * FROM places").fetchall() | |
for place in res: | |
embedding_str = place['Embedding'] | |
embedding = np.array([float(x) for x in embedding_str.split(',')]) | |
similarity = cosine_similarity([user_embedding], [embedding])[0][0] | |
similarities.append((place, similarity)) | |
ranked_results = sorted(similarities, key=lambda x: (x[1], x[0]['Rating']), reverse=True) | |
return ranked_results | |
# Get top 10 destinations | |
def get_top_10_destinations(user_query): | |
normalized_query = normalize_query(user_query) | |
user_embedding = get_user_embedding(normalized_query) | |
similar_places = get_similar_places(user_embedding) | |
if not similar_places: | |
return "Tidak ada tempat yang ditemukan." | |
return similar_places[:10] | |
# Generate response using Groq VM | |
def generate_response(user_query, top_places, temperature): | |
try: | |
destinations_data = ", ".join([ | |
f'{{"name": "{place[0]["Place_Name"]}", "city": "{place[0]["City"]}", "category": "{place[0]["Category"]}", "rating": {place[0]["Rating"]}, "description": "{place[0]["Description"]}"}}' | |
for place in top_places | |
]) | |
system_prompt = f""" | |
You are a tour guide assistant. Present the tourism recommendations to the user in Bahasa Indonesia. | |
Only return maximum 5 places that suitable what user wants and provided the data in a clear and concise format. Only return the city that mentioned in \"{user_query}\". | |
""" | |
response = client.chat.completions.create( | |
model="llama-3.1-8b-instant", | |
messages=[ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": f"Berikut adalah rekomendasi berdasarkan data: {destinations_data}"} | |
], | |
temperature=temperature | |
) | |
return response.choices[0].message.content | |
except Exception as e: | |
print(f"Error generating response: {e}") | |
return "Maaf, terjadi kesalahan dalam menghasilkan rekomendasi." | |
# Main chatbot function | |
def chatbot(user_query, temperature): | |
top_places = get_top_10_destinations(user_query) | |
if isinstance(top_places, str): | |
return top_places | |
response = generate_response(user_query, top_places[:5], temperature) | |
return response | |
# Define Gradio Interface | |
iface = gr.Interface( | |
fn=chatbot, | |
inputs=[ | |
"text", | |
gr.Slider( | |
minimum=0, | |
maximum=1, | |
step=0.1, | |
value=0.8, | |
label="Temperature" | |
) | |
], | |
outputs="text", | |
title="Tourism Recommendation Chatbot", | |
description="Masukkan pertanyaan wisata Anda dan dapatkan rekomendasi tempat terbaik!" | |
) | |
# Launch the Gradio App | |
if __name__ == "__main__": | |
iface.launch(share=True) |