gmustafa413 commited on
Commit
95d666a
·
verified ·
1 Parent(s): 09cf050

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -60
app.py CHANGED
@@ -6,7 +6,8 @@ import faiss
6
  from sentence_transformers import SentenceTransformer
7
  from datasets import load_dataset
8
  from dotenv import load_dotenv
9
- import threading
 
10
 
11
  # Load environment variables
12
  load_dotenv()
@@ -24,7 +25,7 @@ class GeminiRAGSystem:
24
  self.chunks = []
25
  self.dataset_loaded = False
26
  self.loading_error = None
27
- self.gemini_api_key = os.getenv("AIzaSyASrFvE3gFPigihza0JTuALzZmBx0Kc3d0")
28
 
29
  # Initialize embedding model
30
  try:
@@ -36,44 +37,40 @@ class GeminiRAGSystem:
36
  if self.gemini_api_key:
37
  genai.configure(api_key=self.gemini_api_key)
38
 
39
- # Start dataset loading in background
40
- self.load_dataset_in_background()
41
 
42
- def load_dataset_in_background(self):
43
- """Load dataset in a background thread"""
44
- def load_task():
45
- try:
46
- # Load dataset directly
47
- dataset = load_dataset(
48
- DATASET_NAME,
49
- split='train',
50
- download_mode="force_redownload" # Fixes extraction error
51
- )
52
-
53
- # Process dataset
54
- if 'text' in dataset.features:
55
- self.chunks = dataset['text'][:1000] # Limit to first 1000 entries
56
- elif 'context' in dataset.features:
57
- self.chunks = dataset['context'][:1000]
58
- else:
59
- raise ValueError("Dataset must have 'text' or 'context' field")
60
-
61
- # Create embeddings
62
- embeddings = self.embedding_model.encode(
63
- self.chunks,
64
- show_progress_bar=False,
65
- convert_to_numpy=True
66
- )
67
- self.index = faiss.IndexFlatL2(embeddings.shape[1])
68
- self.index.add(embeddings.astype('float32'))
69
-
70
- self.dataset_loaded = True
71
- except Exception as e:
72
- self.loading_error = str(e)
73
- print(f"Dataset loading failed: {str(e)}")
74
-
75
- # Start the loading thread
76
- threading.Thread(target=load_task, daemon=True).start()
77
 
78
  def get_relevant_context(self, query: str) -> str:
79
  """Retrieve most relevant chunks"""
@@ -96,10 +93,10 @@ class GeminiRAGSystem:
96
  """Generate response with robust error handling"""
97
  if not self.dataset_loaded:
98
  if self.loading_error:
99
- return f"Dataset loading failed: {self.loading_error}"
100
- return "Dataset is still loading, please wait..."
101
  if not self.gemini_api_key:
102
- return "Please set your Gemini API key in environment variables"
103
 
104
  context = self.get_relevant_context(query)
105
  if not context:
@@ -116,7 +113,7 @@ class GeminiRAGSystem:
116
  response = model.generate_content(prompt)
117
  return response.text
118
  except Exception as e:
119
- return f"API Error: {str(e)}"
120
 
121
  # Initialize system
122
  try:
@@ -129,31 +126,29 @@ with gr.Blocks(title="UE Chatbot") as app:
129
  gr.Markdown("# UE 24 Hour Service")
130
 
131
  with gr.Row():
132
- chatbot = gr.Chatbot(height=500, label="Chat History",
133
- avatar_images=(None, (None, "https://www.google.com/imgres?q=ue%20lahore%20uoe%20image%20logo&imgurl=https%3A%2F%2Fblogger.googleusercontent.com%2Fimg%2Fb%2FR29vZ2xl%2FAVvXsEglo3Aj2gTXX2j9LIG89CiFB3uj8hS4pkueQtWZrBpLkHk00zbeXuMU1soAxFWz0tenyRIwL7-YxTS9g14DSQnvgGFRGqoWMP2otVtbKbJvQ28FVpUiAzkScgA57_EQiVZ_FOSuS9cGvWg%2Fw1200-h630-p-k-no-nu%2FUE.Logo.jpg&imgrefurl=http%3A%2F%2Fuejauharabad.blogspot.com%2F2012%2F10%2Fue-logo.html&docid=ZJhky4S29RFw9M&tbnid=sopb9CZYLJPitM&vet=12ahUKEwjv7r322aqMAxXM_7sIHSTUAnYQM3oECE8QAA..i&w=769&h=403&hcb=2&ved=2ahUKEwjv7r322aqMAxXM_7sIHSTUAnYQM3oECE8QAA")),
134
- bubble_full_width=False)
 
 
135
 
136
  with gr.Row():
137
- query = gr.Textbox(label="Your question",
138
- placeholder="Ask your question...",
139
- scale=4)
 
 
140
  submit_btn = gr.Button("Submit", variant="primary", scale=1)
141
 
142
  with gr.Row():
143
  clear_btn = gr.Button("Clear Chat", variant="secondary")
144
 
145
  # Status indicator
146
- status = gr.Textbox(label="System Status",
147
- value="Initializing...",
148
- interactive=False)
149
-
150
- # Update status periodically
151
- def update_status():
152
- if rag_system.loading_error:
153
- return f"Error: {rag_system.loading_error}"
154
- return "Ready" if rag_system.dataset_loaded else "Loading dataset..."
155
-
156
- app.load(update_status, None, status, every=1)
157
 
158
  # Event handlers
159
  def respond(message, chat_history):
 
6
  from sentence_transformers import SentenceTransformer
7
  from datasets import load_dataset
8
  from dotenv import load_dotenv
9
+ import asyncio
10
+ import time
11
 
12
  # Load environment variables
13
  load_dotenv()
 
25
  self.chunks = []
26
  self.dataset_loaded = False
27
  self.loading_error = None
28
+ self.gemini_api_key = os.getenv("GEMINI_API_KEY")
29
 
30
  # Initialize embedding model
31
  try:
 
37
  if self.gemini_api_key:
38
  genai.configure(api_key=self.gemini_api_key)
39
 
40
+ # Start dataset loading
41
+ self.load_dataset()
42
 
43
+ def load_dataset(self):
44
+ """Load dataset synchronously"""
45
+ try:
46
+ # Load dataset directly
47
+ dataset = load_dataset(
48
+ DATASET_NAME,
49
+ split='train',
50
+ download_mode="force_redownload"
51
+ )
52
+
53
+ # Process dataset
54
+ if 'text' in dataset.features:
55
+ self.chunks = dataset['text'][:1000] # Limit to first 1000 entries
56
+ elif 'context' in dataset.features:
57
+ self.chunks = dataset['context'][:1000]
58
+ else:
59
+ raise ValueError("Dataset must have 'text' or 'context' field")
60
+
61
+ # Create embeddings
62
+ embeddings = self.embedding_model.encode(
63
+ self.chunks,
64
+ show_progress_bar=False,
65
+ convert_to_numpy=True
66
+ )
67
+ self.index = faiss.IndexFlatL2(embeddings.shape[1])
68
+ self.index.add(embeddings.astype('float32'))
69
+
70
+ self.dataset_loaded = True
71
+ except Exception as e:
72
+ self.loading_error = str(e)
73
+ print(f"Dataset loading failed: {str(e)}")
 
 
 
 
74
 
75
  def get_relevant_context(self, query: str) -> str:
76
  """Retrieve most relevant chunks"""
 
93
  """Generate response with robust error handling"""
94
  if not self.dataset_loaded:
95
  if self.loading_error:
96
+ return f"⚠️ Dataset loading failed: {self.loading_error}"
97
+ return "⚠️ Dataset is still loading, please wait..."
98
  if not self.gemini_api_key:
99
+ return "🔑 Please set your Gemini API key in environment variables"
100
 
101
  context = self.get_relevant_context(query)
102
  if not context:
 
113
  response = model.generate_content(prompt)
114
  return response.text
115
  except Exception as e:
116
+ return f"⚠️ API Error: {str(e)}"
117
 
118
  # Initialize system
119
  try:
 
126
  gr.Markdown("# UE 24 Hour Service")
127
 
128
  with gr.Row():
129
+ chatbot = gr.Chatbot(
130
+ height=500,
131
+ avatar_images=(None, (None, "https://huggingface.co/spaces/groq/Groq-LLM/resolve/main/groq_logo.png")),
132
+ bubble_full_width=False
133
+ )
134
 
135
  with gr.Row():
136
+ query = gr.Textbox(
137
+ label="Your question",
138
+ placeholder="Ask your question...",
139
+ scale=4
140
+ )
141
  submit_btn = gr.Button("Submit", variant="primary", scale=1)
142
 
143
  with gr.Row():
144
  clear_btn = gr.Button("Clear Chat", variant="secondary")
145
 
146
  # Status indicator
147
+ status = gr.Textbox(
148
+ label="System Status",
149
+ value="Loading dataset..." if not rag_system.dataset_loaded else "Ready",
150
+ interactive=False
151
+ )
 
 
 
 
 
 
152
 
153
  # Event handlers
154
  def respond(message, chat_history):