Spaces:
Sleeping
Sleeping
File size: 6,274 Bytes
40fd038 34d69b0 65d40e5 40fd038 c2bc8b9 27e23d9 c2bc8b9 27e23d9 40fd038 c2bc8b9 b8baef5 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 1271c0b c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 65d40e5 c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 8a369cb c2bc8b9 8a369cb 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 1271c0b c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 65d40e5 40fd038 65d40e5 8a369cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
import os
from groq import Groq
import gradio as gr
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import sqlite3
import pandas as pd
from tqdm import tqdm
# Define the SentenceTransformer model globally
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
# Get the Groq API key from environment variables
client = Groq(
api_key="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"
)
# Generate user embedding using the globally defined model
def get_user_embedding(query):
try:
return model.encode(query)
except Exception as e:
print(f"Error generating embedding: {e}")
return np.zeros(384) # Return a zero-vector of the correct size if there is an error
con = sqlite3.connect("file::memory:?cache=shared", check_same_thread=False)
con.row_factory = sqlite3.Row
cur = con.cursor()
# create table if not exists
cur.execute("""
CREATE TABLE IF NOT EXISTS places (
Place_Id INTEGER PRIMARY KEY,
Place_Name TEXT NOT NULL,
Description TEXT,
Category TEXT,
City TEXT,
Price REAL,
Rating REAL,
Embedding TEXT
);
""")
data = pd.read_csv('dataset/tourism_place.csv')
# check if the table is empty
cur.execute("SELECT * FROM places")
if cur.fetchone() is None:
# Store the places in the database
for i in tqdm(range(len(data))):
cur.execute("""
INSERT INTO places (Place_Name, Description, Category, City, Price, Rating)
VALUES (?, ?, ?, ?, ?, ?)
""", (data['Place_Name'][i], data['Description'][i], data['Category'][i], data['City'][i], float(data['Price'][i]), float(data['Rating'][i]))
)
con.commit()
# Compute and store embeddings for places using the same model
def compute_and_store_embeddings():
cur.execute("SELECT Place_Id, Place_Name, Category, Description, City FROM places")
places = cur.fetchall()
for place in places:
text = f"{place[1]} {place[2]} {place[3]} {place[4]}"
embedding = model.encode(text)
embedding_str = ','.join([str(x) for x in embedding])
cur.execute("UPDATE places SET Embedding = ? WHERE Place_Id = ?", (embedding_str, place[0]))
con.commit()
compute_and_store_embeddings()
# Normalize user query using Groq VM
def normalize_query(user_query):
try:
response = client.chat.completions.create(
model="llama-3.1-8b-instant",
messages=[{
"role": "user",
"content": f"""
Please analyze the query: \"{user_query}\", extract Place name, Category, Description, and City.
Return the response as: "Place name, Category, Description, City".
"""
}]
)
normalized_user_query = response.choices[0].message.content.split('\n')[-1].strip()
return normalized_user_query + str(user_query)
except Exception as e:
print(f"Error normalizing query: {e}")
return ""
# Generate user embedding
def get_user_embedding(query):
try:
return model.encode(query)
except Exception as e:
print(f"Error generating embedding: {e}")
return np.zeros(512)
# Find similar places
def get_similar_places(user_embedding):
similarities = []
res = cur.execute("SELECT * FROM places").fetchall()
for place in res:
embedding_str = place['Embedding']
embedding = np.array([float(x) for x in embedding_str.split(',')])
similarity = cosine_similarity([user_embedding], [embedding])[0][0]
similarities.append((place, similarity))
ranked_results = sorted(similarities, key=lambda x: (x[1], x[0]['Rating']), reverse=True)
return ranked_results
# Get top 10 destinations
def get_top_10_destinations(user_query):
normalized_query = normalize_query(user_query)
user_embedding = get_user_embedding(normalized_query)
similar_places = get_similar_places(user_embedding)
if not similar_places:
return "Tidak ada tempat yang ditemukan."
return similar_places[:10]
# Generate response using Groq VM
def generate_response(user_query, top_places, temperature):
try:
destinations_data = ", ".join([
f'{{"name": "{place[0]["Place_Name"]}", "city": "{place[0]["City"]}", "category": "{place[0]["Category"]}", "rating": {place[0]["Rating"]}, "description": "{place[0]["Description"]}"}}'
for place in top_places
])
system_prompt = f"""
You are a tour guide assistant. Present the tourism recommendations to the user in Bahasa Indonesia.
Only return maximum 5 places that suitable what user wants and provided the data in a clear and concise format. Only return the city that mentioned in \"{user_query}\".
"""
response = client.chat.completions.create(
model="llama-3.1-8b-instant",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": f"Berikut adalah rekomendasi berdasarkan data: {destinations_data}"}
],
temperature=temperature
)
return response.choices[0].message.content
except Exception as e:
print(f"Error generating response: {e}")
return "Maaf, terjadi kesalahan dalam menghasilkan rekomendasi."
# Main chatbot function
def chatbot(user_query, temperature):
top_places = get_top_10_destinations(user_query)
if isinstance(top_places, str):
return top_places
response = generate_response(user_query, top_places[:5], temperature)
return response
# Define Gradio Interface
iface = gr.Interface(
fn=chatbot,
inputs=[
"text",
gr.Slider(
minimum=0,
maximum=1,
step=0.1,
value=0.8,
label="Temperature"
)
],
outputs="text",
title="Tourism Recommendation Chatbot",
description="Masukkan pertanyaan wisata Anda dan dapatkan rekomendasi tempat terbaik!"
)
# Launch the Gradio App
if __name__ == "__main__":
iface.launch(share=True) |