Spaces:

hasnanmr
/

llama-recommender

Sleeping

File size: 6,539 Bytes

40fd038
34d69b0
65d40e5
40fd038
 
 
 
 
 
 
c2bc8b9
99a1302
c2bc8b9
 
27e23d9
7f7472f
27e23d9
40fd038
c2bc8b9
 
 
 
 
 
 
 
 
b8baef5
40fd038
 
 
 
 
 
c2bc8b9
 
40fd038
 
 
c2bc8b9
40fd038
 
 
 
 
ba24bca
40fd038
 
 
 
 
 
 
 
c2bc8b9
40fd038
 
 
 
 
c2bc8b9
40fd038
 
 
c2bc8b9
40fd038
 
 
 
c2bc8b9
40fd038
 
c2bc8b9
40fd038
7391716
40fd038
 
1271c0b
c2bc8b9
dda4b8d
 
 
175b284
dda4b8d
 
40fd038
 
c2bc8b9
40fd038
 
 
 
 
c2bc8b9
40fd038
 
 
65d40e5
c2bc8b9
40fd038
 
 
 
 
c2bc8b9
40fd038
c2bc8b9
40fd038
 
8a369cb
175b284
8a369cb
175b284
 
 
 
 
 
 
 
 
 
8a369cb
 
40fd038
c2bc8b9
dc8b7bd
40fd038
 
 
175b284
dc8b7bd
 
175b284
dc8b7bd
40fd038
da0456e
c2bc8b9
40fd038
 
c2bc8b9
40fd038
 
c2bc8b9
 
175b284
 
c2bc8b9
1271c0b
c2bc8b9
40fd038
c2bc8b9
dda4b8d
c2bc8b9
 
40fd038
 
 
 
 
 
c2bc8b9
 
 
175b284
40fd038
175b284
40fd038
175b284
40fd038
 
c2bc8b9
175b284
 
c2bc8b9
 
175b284
 
40fd038
65d40e5
 
8a369cb

import os
from groq import Groq
import gradio as gr
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import sqlite3
import pandas as pd
from tqdm import tqdm

# Define the SentenceTransformer model globally
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Get the Groq API key from environment variables
client = Groq(
   api_key = os.getenv("GROQ_API_KEY")
)

# Generate user embedding using the globally defined model
def get_user_embedding(query):
    try:
        return model.encode(query)
    except Exception as e:
        print(f"Error generating embedding: {e}")
        return np.zeros(384)  # Return a zero-vector of the correct size if there is an error


con = sqlite3.connect("file::memory:?cache=shared", check_same_thread=False)
con.row_factory = sqlite3.Row
cur = con.cursor()

# create table if not exists
cur.execute("""
            CREATE TABLE IF NOT EXISTS places (
                Place_Id INTEGER PRIMARY KEY,
                Place_Name TEXT NOT NULL,
                Description TEXT,
                Category TEXT,
                City TEXT,
                Price REAL,
                Rating REAL,
                Embedding TEXT
            );
            """)

data = pd.read_csv('tourism_place.csv')

# check if the table is empty
cur.execute("SELECT * FROM places")

if cur.fetchone() is None:
    # Store the places in the database
    for i in tqdm(range(len(data))):
        cur.execute("""
            INSERT INTO places (Place_Name, Description, Category, City, Price, Rating)
            VALUES (?, ?, ?, ?, ?, ?)
            """, (data['Place_Name'][i], data['Description'][i], data['Category'][i], data['City'][i], float(data['Price'][i]), float(data['Rating'][i]))
    )
    con.commit()

# Compute and store embeddings for places using the same model
def compute_and_store_embeddings():
    cur.execute("SELECT Place_Id, Place_Name, Category, Description, City FROM places")
    places = cur.fetchall()

    for place in places:
        text = f"{place[1]} {place[2]} {place[3]} {place[4]}"
        embedding = model.encode(text)
        embedding_str = ','.join([str(x) for x in embedding])
        cur.execute("UPDATE places SET Embedding = ? WHERE Place_Id = ?", (embedding_str, place[0]))
    con.commit()

compute_and_store_embeddings()

# Normalize user query using llama3.1
def normalize_query(user_query):
    try:
        response = client.chat.completions.create(
            model="llama-3.1-8b-instant",
            messages=[
                {
            "role": "system",
            "content": "You are a helpful assistant who has experience as a tour guide."
        },
                {
                "role": "user",
                "content": f"""
                    Please analyze the query: \"{user_query}\", extract Place name, Category, Description, and City.
                    Return the response as: "Place name, Category, Description, City".
                """
            }]
        )
        normalized_user_query = response.choices[0].message.content.split('\n')[-1].strip()
        return normalized_user_query + str(user_query)
    except Exception as e:
        print(f"Error normalizing query: {e}")
        return ""

# Generate user embedding
def get_user_embedding(query):
    try:
        return model.encode(query)
    except Exception as e:
        print(f"Error generating embedding: {e}")
        return np.zeros(512)

# Find similar places
def get_similar_places(user_embedding):
    similarities = []
    res = cur.execute("SELECT * FROM places").fetchall()

    for place in res:
        if not place['Embedding']:
            continue
        try:
            embedding = np.array([float(x) for x in place['Embedding'].split(',')])
            similarity = cosine_similarity([user_embedding], [embedding])[0][0]
            similarities.append((place, similarity))
        except Exception as e:
            print(f"Error comparing embedding: {e}")
            continue

    ranked_results = sorted(similarities, key=lambda x: (x[1], x[0]['Rating']), reverse=True)
    return ranked_results

# Get top 10 destinations
def get_top_10_destinations(user_query):
    normalized_query = normalize_query(user_query)
    user_embedding = get_user_embedding(normalized_query)
    similar_places = get_similar_places(user_embedding)

    if not similar_places:
        return "Tidak ada tempat yang ditemukan."

    return similar_places[:10]

# Generate response using llama3.1
def generate_response(user_query, top_places, temperature):
    try:
        destinations_data = ", ".join([
            f'{{"name": "{place[0]["Place_Name"]}", "city": "{place[0]["City"]}", "category": "{place[0]["Category"]}", "rating": {place[0]["Rating"]}, "description": "{place[0]["Description"]}"}}'
            for place in top_places
        ])
        system_prompt = f"""
            You are a tour guide assistant. Present the tourism recommendations to the user in Bahasa Indonesia.
            Only return maximum 5 places that are suitable for what the user wants, and provide the data in a clear and concise format.
            Only return the city that is mentioned in \"{user_query}\".
        """
        response = client.chat.completions.create(
            model="llama-3.1-8b-instant",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"Berikut adalah rekomendasi berdasarkan data yang kita miliki: {destinations_data}"}
            ],
            temperature=temperature
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error generating response: {e}")
        return "Maaf, terjadi kesalahan dalam menghasilkan rekomendasi."

# Main chatbot function
def chatbot(user_query, temperature):
    top_places = get_top_10_destinations(user_query)
    if isinstance(top_places, str):  # error message
        return top_places
    return generate_response(user_query, top_places[:5], temperature)

# Gradio Interface
iface = gr.Interface(
    fn=chatbot,
    inputs=[
        gr.Textbox(lines=2, label="Pertanyaan Wisata"),
        gr.Slider(minimum=0, maximum=1, step=0.1, value=0.6, label="Temperature")
    ],
    outputs="text",
    title="Tourism Recommendation System",
    description="Masukkan kota mana yang ingin kamu kunjungi dan dapatkan rekomendasi tempat terbaik!"
)

if __name__ == "__main__":
    iface.launch(share=True)