File size: 1,941 Bytes
d47e16f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import streamlit as st
import faiss
import pickle
from transformers import AutoTokenizer, HuggingFaceEndpoint
import os

st.title("Chatbot Interface")

# User role input
user_role = st.text_input("Enter your role (Sales, Marketing, HR)")

query = st.text_input("Enter your query")

dataset = st.selectbox("Select Dataset", ["Sales", "Marketing", "HR"])

# Load Meta-Llama model using Hugging Face Endpoint
temperature = 0.7
max_new_tokens = 150
llm = HuggingFaceEndpoint(
    repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
    temperature=temperature,
    max_new_tokens=max_new_tokens,
    huggingfacehub_api_token=os.getenv("P_HF_TOKEN", "None"),
)

# Retrieve relevant data from vector database
if query and dataset:
    if user_role == dataset:  # Simple access control
        index_file = f'vector_db_{dataset}.index'
        metadata_file = f'metadata_{dataset}.pkl'
        
        if os.path.exists(index_file) and os.path.exists(metadata_file):
            index = faiss.read_index(index_file)
            with open(metadata_file, 'rb') as f:
                metadata = pickle.load(f)
            
            # Load pre-trained model for embeddings
            model = SentenceTransformer('all-MiniLM-L6-v2')
            query_embedding = model.encode([query])
            
            # Search in the vector database
            distances, indices = index.search(query_embedding, k=5)
            
            results = [metadata[i] for i in indices[0]]
            st.write("Results:", results)
            
            # Use the retrieved data as context for LLM
            context = " ".join(results)
            response = llm(f"{context}\n\n{query}")
            st.write("Chatbot:", response)
        else:
            st.write("No data found for the selected dataset.")
    else:
        st.write("Access denied: You do not have permission to access this dataset.")