gmustafa413 commited on
Commit
b139cd9
·
verified ·
1 Parent(s): e81b7d3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -0
app.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import numpy as np
3
+ import faiss
4
+ from sentence_transformers import SentenceTransformer
5
+ from datasets import load_dataset
6
+ import gradio as gr
7
+ import torch
8
+ from tqdm import tqdm
9
+ from groq import Groq
10
+
11
+ # Load dataset
12
+ dataset = load_dataset("midrees2806/7K_Dataset")
13
+ print("Dataset sample:", dataset['train'][0])
14
+
15
+ # Initialize sentence transformer model
16
+ model = SentenceTransformer('all-MiniLM-L6-v2')
17
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
18
+ model = model.to(device)
19
+
20
+ # Batch processing function
21
+ def generate_embeddings_batch(texts):
22
+ return model.encode(texts, batch_size=1024, convert_to_tensor=True, device=device).cpu().numpy()
23
+
24
+ # Prepare embeddings
25
+ train_dataset = dataset['train']
26
+ texts = [data['text'] for data in train_dataset]
27
+
28
+ batch_size = 1024
29
+ chunked_embeddings = []
30
+ for i in tqdm(range(0, len(texts), batch_size), desc="Generating embeddings"):
31
+ batch = texts[i:i+batch_size]
32
+ embeddings = generate_embeddings_batch(batch)
33
+ chunked_embeddings.append(embeddings)
34
+
35
+ chunked_embeddings = np.vstack(chunked_embeddings)
36
+
37
+ # Initialize FAISS index
38
+ dimension = chunked_embeddings.shape[1]
39
+ index = faiss.IndexFlatL2(dimension)
40
+ index.add(chunked_embeddings)
41
+
42
+ # Initialize Groq client
43
+ client = Groq(api_key="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")
44
+
45
+ def get_groq_response(query):
46
+ try:
47
+ # Get relevant context from FAISS
48
+ faiss_results = search_in_faiss(query)
49
+ context = "\n".join([result[0] for result in faiss_results])
50
+
51
+ # Create a prompt that forces the model to only use the provided context
52
+ prompt = f"""
53
+ You are an expert assistant for University of Education Lahore and its sub-campuses ONLY.
54
+ You must ONLY use the following context to answer questions. If the answer isn't in the context,
55
+ say "I don't have specific information about that in the University of Education Lahore's data provided to me.",
56
+ and answer the question generally like qrok ai and should feel like real human talking to a real human.
57
+
58
+ Context:
59
+ {context}
60
+
61
+ Question: {query}
62
+
63
+ Answer:"""
64
+
65
+ chat_completion = client.chat.completions.create(
66
+ messages=[{"role": "user", "content": prompt}],
67
+ model="llama3-70b-8192",
68
+ temperature=0.3,
69
+ max_tokens=1024
70
+ )
71
+
72
+ return chat_completion.choices[0].message.content
73
+ except Exception as e:
74
+ print(f"Error in Groq response: {str(e)}")
75
+ return "I encountered an error while processing your request."
76
+
77
+ def search_in_faiss(query):
78
+ query_embedding = model.encode([query], convert_to_tensor=True, device=device).cpu().numpy()
79
+ distances, indices = index.search(query_embedding, k=3)
80
+ return [(dataset['train'][int(idx)]['text'], float(dist)) for idx, dist in zip(indices[0], distances[0])]
81
+
82
+ def respond(message, chat_history):
83
+ try:
84
+ faiss_results = search_in_faiss(message)
85
+ model_response = get_groq_response(message)
86
+
87
+ bot_response = "**Relevant Information from Dataset:**\n\n"
88
+ for result in faiss_results:
89
+ bot_response += f"- {result[0]} (Similarity: {result[1]:.4f})\n\n"
90
+ bot_response = "\n**Model Response:**\n\n" + model_response
91
+ #+
92
+ return "", chat_history + [(message, bot_response)]
93
+ except Exception as e:
94
+ print(f"Error: {str(e)}")
95
+ return "", chat_history + [(message, f"Error processing request: {str(e)}")]
96
+
97
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
98
+ gr.Markdown("# <center>UoE Chatbot</center>")
99
+ gr.Markdown("<center>University of Education Lahore Information Bot</center>")
100
+ gr.Markdown("<center>This bot only answers questions about University of Education Lahore and its sub-campuses</center>")
101
+
102
+ chatbot = gr.Chatbot(height=500, bubble_full_width=False)
103
+ with gr.Row():
104
+ msg = gr.Textbox(label="Type your message here...", placeholder="Ask about University of Education Lahore...", scale=7)
105
+ submit_btn = gr.Button("Submit", variant="primary")
106
+ clear_btn = gr.Button("Clear Chat")
107
+
108
+ msg.submit(respond, [msg, chatbot], [msg, chatbot])
109
+ submit_btn.click(respond, [msg, chatbot], [msg, chatbot])
110
+ clear_btn.click(lambda: None, None, chatbot, queue=False)
111
+
112
+ demo.launch()