Spaces:
Sleeping
Sleeping
File size: 6,539 Bytes
40fd038 34d69b0 65d40e5 40fd038 c2bc8b9 99a1302 c2bc8b9 27e23d9 7f7472f 27e23d9 40fd038 c2bc8b9 b8baef5 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 ba24bca 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 7391716 40fd038 1271c0b c2bc8b9 dda4b8d 175b284 dda4b8d 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 65d40e5 c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 40fd038 8a369cb 175b284 8a369cb 175b284 8a369cb 40fd038 c2bc8b9 dc8b7bd 40fd038 175b284 dc8b7bd 175b284 dc8b7bd 40fd038 da0456e c2bc8b9 40fd038 c2bc8b9 40fd038 c2bc8b9 175b284 c2bc8b9 1271c0b c2bc8b9 40fd038 c2bc8b9 dda4b8d c2bc8b9 40fd038 c2bc8b9 175b284 40fd038 175b284 40fd038 175b284 40fd038 c2bc8b9 175b284 c2bc8b9 175b284 40fd038 65d40e5 8a369cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
import os
from groq import Groq
import gradio as gr
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import sqlite3
import pandas as pd
from tqdm import tqdm
# Define the SentenceTransformer model globally
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
# Get the Groq API key from environment variables
client = Groq(
api_key = os.getenv("GROQ_API_KEY")
)
# Generate user embedding using the globally defined model
def get_user_embedding(query):
try:
return model.encode(query)
except Exception as e:
print(f"Error generating embedding: {e}")
return np.zeros(384) # Return a zero-vector of the correct size if there is an error
con = sqlite3.connect("file::memory:?cache=shared", check_same_thread=False)
con.row_factory = sqlite3.Row
cur = con.cursor()
# create table if not exists
cur.execute("""
CREATE TABLE IF NOT EXISTS places (
Place_Id INTEGER PRIMARY KEY,
Place_Name TEXT NOT NULL,
Description TEXT,
Category TEXT,
City TEXT,
Price REAL,
Rating REAL,
Embedding TEXT
);
""")
data = pd.read_csv('tourism_place.csv')
# check if the table is empty
cur.execute("SELECT * FROM places")
if cur.fetchone() is None:
# Store the places in the database
for i in tqdm(range(len(data))):
cur.execute("""
INSERT INTO places (Place_Name, Description, Category, City, Price, Rating)
VALUES (?, ?, ?, ?, ?, ?)
""", (data['Place_Name'][i], data['Description'][i], data['Category'][i], data['City'][i], float(data['Price'][i]), float(data['Rating'][i]))
)
con.commit()
# Compute and store embeddings for places using the same model
def compute_and_store_embeddings():
cur.execute("SELECT Place_Id, Place_Name, Category, Description, City FROM places")
places = cur.fetchall()
for place in places:
text = f"{place[1]} {place[2]} {place[3]} {place[4]}"
embedding = model.encode(text)
embedding_str = ','.join([str(x) for x in embedding])
cur.execute("UPDATE places SET Embedding = ? WHERE Place_Id = ?", (embedding_str, place[0]))
con.commit()
compute_and_store_embeddings()
# Normalize user query using llama3.1
def normalize_query(user_query):
try:
response = client.chat.completions.create(
model="llama-3.1-8b-instant",
messages=[
{
"role": "system",
"content": "You are a helpful assistant who has experience as a tour guide."
},
{
"role": "user",
"content": f"""
Please analyze the query: \"{user_query}\", extract Place name, Category, Description, and City.
Return the response as: "Place name, Category, Description, City".
"""
}]
)
normalized_user_query = response.choices[0].message.content.split('\n')[-1].strip()
return normalized_user_query + str(user_query)
except Exception as e:
print(f"Error normalizing query: {e}")
return ""
# Generate user embedding
def get_user_embedding(query):
try:
return model.encode(query)
except Exception as e:
print(f"Error generating embedding: {e}")
return np.zeros(512)
# Find similar places
def get_similar_places(user_embedding):
similarities = []
res = cur.execute("SELECT * FROM places").fetchall()
for place in res:
if not place['Embedding']:
continue
try:
embedding = np.array([float(x) for x in place['Embedding'].split(',')])
similarity = cosine_similarity([user_embedding], [embedding])[0][0]
similarities.append((place, similarity))
except Exception as e:
print(f"Error comparing embedding: {e}")
continue
ranked_results = sorted(similarities, key=lambda x: (x[1], x[0]['Rating']), reverse=True)
return ranked_results
# Get top 10 destinations
def get_top_10_destinations(user_query):
normalized_query = normalize_query(user_query)
user_embedding = get_user_embedding(normalized_query)
similar_places = get_similar_places(user_embedding)
if not similar_places:
return "Tidak ada tempat yang ditemukan."
return similar_places[:10]
# Generate response using llama3.1
def generate_response(user_query, top_places, temperature):
try:
destinations_data = ", ".join([
f'{{"name": "{place[0]["Place_Name"]}", "city": "{place[0]["City"]}", "category": "{place[0]["Category"]}", "rating": {place[0]["Rating"]}, "description": "{place[0]["Description"]}"}}'
for place in top_places
])
system_prompt = f"""
You are a tour guide assistant. Present the tourism recommendations to the user in Bahasa Indonesia.
Only return maximum 5 places that are suitable for what the user wants, and provide the data in a clear and concise format.
Only return the city that is mentioned in \"{user_query}\".
"""
response = client.chat.completions.create(
model="llama-3.1-8b-instant",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": f"Berikut adalah rekomendasi berdasarkan data yang kita miliki: {destinations_data}"}
],
temperature=temperature
)
return response.choices[0].message.content
except Exception as e:
print(f"Error generating response: {e}")
return "Maaf, terjadi kesalahan dalam menghasilkan rekomendasi."
# Main chatbot function
def chatbot(user_query, temperature):
top_places = get_top_10_destinations(user_query)
if isinstance(top_places, str): # error message
return top_places
return generate_response(user_query, top_places[:5], temperature)
# Gradio Interface
iface = gr.Interface(
fn=chatbot,
inputs=[
gr.Textbox(lines=2, label="Pertanyaan Wisata"),
gr.Slider(minimum=0, maximum=1, step=0.1, value=0.6, label="Temperature")
],
outputs="text",
title="Tourism Recommendation System",
description="Masukkan kota mana yang ingin kamu kunjungi dan dapatkan rekomendasi tempat terbaik!"
)
if __name__ == "__main__":
iface.launch(share=True) |