File size: 6,274 Bytes
40fd038
34d69b0
65d40e5
40fd038
 
 
 
 
 
 
c2bc8b9
 
 
 
27e23d9
c2bc8b9
27e23d9
40fd038
 
c2bc8b9
 
 
 
 
 
 
 
 
b8baef5
40fd038
 
 
 
 
 
c2bc8b9
 
40fd038
 
 
c2bc8b9
40fd038
 
 
 
 
c2bc8b9
40fd038
 
 
 
 
 
 
 
c2bc8b9
40fd038
 
 
 
 
c2bc8b9
40fd038
 
 
c2bc8b9
40fd038
 
 
 
c2bc8b9
40fd038
 
c2bc8b9
40fd038
 
 
 
1271c0b
c2bc8b9
40fd038
 
 
c2bc8b9
40fd038
 
 
 
 
c2bc8b9
40fd038
 
 
65d40e5
c2bc8b9
40fd038
 
 
 
 
c2bc8b9
40fd038
c2bc8b9
40fd038
 
8a369cb
 
c2bc8b9
 
8a369cb
 
 
 
40fd038
c2bc8b9
 
40fd038
 
 
 
 
c2bc8b9
40fd038
c2bc8b9
 
40fd038
 
c2bc8b9
40fd038
 
c2bc8b9
 
 
 
1271c0b
c2bc8b9
40fd038
c2bc8b9
40fd038
c2bc8b9
 
40fd038
 
 
 
 
 
c2bc8b9
 
 
 
40fd038
c2bc8b9
40fd038
 
 
 
 
c2bc8b9
 
 
 
 
 
 
 
 
 
 
40fd038
 
 
65d40e5
40fd038
65d40e5
8a369cb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import os
from groq import Groq
import gradio as gr
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import sqlite3
import pandas as pd
from tqdm import tqdm

# Define the SentenceTransformer model globally
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Get the Groq API key from environment variables
client = Groq(
    api_key="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"
)


# Generate user embedding using the globally defined model
def get_user_embedding(query):
    try:
        return model.encode(query)
    except Exception as e:
        print(f"Error generating embedding: {e}")
        return np.zeros(384)  # Return a zero-vector of the correct size if there is an error


con = sqlite3.connect("file::memory:?cache=shared", check_same_thread=False)
con.row_factory = sqlite3.Row
cur = con.cursor()

# create table if not exists
cur.execute("""
            CREATE TABLE IF NOT EXISTS places (
                Place_Id INTEGER PRIMARY KEY,
                Place_Name TEXT NOT NULL,
                Description TEXT,
                Category TEXT,
                City TEXT,
                Price REAL,
                Rating REAL,
                Embedding TEXT
            );
            """)

data = pd.read_csv('dataset/tourism_place.csv')

# check if the table is empty
cur.execute("SELECT * FROM places")

if cur.fetchone() is None:
    # Store the places in the database
    for i in tqdm(range(len(data))):
        cur.execute("""
            INSERT INTO places (Place_Name, Description, Category, City, Price, Rating)
            VALUES (?, ?, ?, ?, ?, ?)
            """, (data['Place_Name'][i], data['Description'][i], data['Category'][i], data['City'][i], float(data['Price'][i]), float(data['Rating'][i]))
    )
    con.commit()

# Compute and store embeddings for places using the same model
def compute_and_store_embeddings():
    cur.execute("SELECT Place_Id, Place_Name, Category, Description, City FROM places")
    places = cur.fetchall()

    for place in places:
        text = f"{place[1]} {place[2]} {place[3]} {place[4]}"
        embedding = model.encode(text)
        embedding_str = ','.join([str(x) for x in embedding])
        cur.execute("UPDATE places SET Embedding = ? WHERE Place_Id = ?", (embedding_str, place[0]))
    con.commit()

compute_and_store_embeddings()

# Normalize user query using Groq VM
def normalize_query(user_query):
    try:
        response = client.chat.completions.create(
            model="llama-3.1-8b-instant",
            messages=[{
                "role": "user",
                "content": f"""
                    Please analyze the query: \"{user_query}\", extract Place name, Category, Description, and City.
                    Return the response as: "Place name, Category, Description, City".
                """
            }]
        )
        normalized_user_query = response.choices[0].message.content.split('\n')[-1].strip()
        return normalized_user_query + str(user_query)
    except Exception as e:
        print(f"Error normalizing query: {e}")
        return ""

# Generate user embedding
def get_user_embedding(query):
    try:
        return model.encode(query)
    except Exception as e:
        print(f"Error generating embedding: {e}")
        return np.zeros(512)

# Find similar places
def get_similar_places(user_embedding):
    similarities = []
    res = cur.execute("SELECT * FROM places").fetchall()
    for place in res:
        embedding_str = place['Embedding']
        embedding = np.array([float(x) for x in embedding_str.split(',')])
        similarity = cosine_similarity([user_embedding], [embedding])[0][0]
        similarities.append((place, similarity))
    ranked_results = sorted(similarities, key=lambda x: (x[1], x[0]['Rating']), reverse=True)
    return ranked_results

# Get top 10 destinations
def get_top_10_destinations(user_query):
    normalized_query = normalize_query(user_query)
    user_embedding = get_user_embedding(normalized_query)
    similar_places = get_similar_places(user_embedding)
    if not similar_places:
        return "Tidak ada tempat yang ditemukan."
    return similar_places[:10]

# Generate response using Groq VM
def generate_response(user_query, top_places, temperature):
    try:
        destinations_data = ", ".join([
            f'{{"name": "{place[0]["Place_Name"]}", "city": "{place[0]["City"]}", "category": "{place[0]["Category"]}", "rating": {place[0]["Rating"]}, "description": "{place[0]["Description"]}"}}'
            for place in top_places
        ])
        system_prompt = f"""
            You are a tour guide assistant. Present the tourism recommendations to the user in Bahasa Indonesia.
            Only return maximum 5 places that suitable what user wants and provided the data in a clear and concise format. Only return the city that mentioned in \"{user_query}\".
        """
        response = client.chat.completions.create(
            model="llama-3.1-8b-instant",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"Berikut adalah rekomendasi berdasarkan data: {destinations_data}"}
            ],
            temperature=temperature
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error generating response: {e}")
        return "Maaf, terjadi kesalahan dalam menghasilkan rekomendasi."

# Main chatbot function
def chatbot(user_query, temperature):
    top_places = get_top_10_destinations(user_query)
    if isinstance(top_places, str):
        return top_places
    response = generate_response(user_query, top_places[:5], temperature)
    return response

# Define Gradio Interface
iface = gr.Interface(
    fn=chatbot,
    inputs=[
        "text",
        gr.Slider(
            minimum=0,
            maximum=1,
            step=0.1,
            value=0.8,
            label="Temperature"
        )
    ],
    outputs="text",
    title="Tourism Recommendation Chatbot",
    description="Masukkan pertanyaan wisata Anda dan dapatkan rekomendasi tempat terbaik!"
)

# Launch the Gradio App
if __name__ == "__main__":
    iface.launch(share=True)