gmustafa413 commited on
Commit
0be7c95
·
verified ·
1 Parent(s): 28050ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -45
app.py CHANGED
@@ -6,6 +6,7 @@ import faiss
6
  from sentence_transformers import SentenceTransformer
7
  from datasets import load_dataset
8
  from dotenv import load_dotenv
 
9
 
10
  # Load environment variables
11
  load_dotenv()
@@ -13,7 +14,7 @@ load_dotenv()
13
  # Configuration
14
  MODEL_NAME = "all-MiniLM-L6-v2"
15
  GENAI_MODEL = "gemini-pro"
16
- DATASET_NAME = "midrees2806/7K_Dataset" # Direct dataset name
17
  CHUNK_SIZE = 500
18
  TOP_K = 3
19
 
@@ -22,13 +23,11 @@ class GeminiRAGSystem:
22
  self.index = None
23
  self.chunks = []
24
  self.dataset_loaded = False
25
- self.gemini_api_key = os.getenv("AIzaSyASrFvE3gFPigihza0JTuALzZmBx0Kc3d0")
 
26
 
27
- # Initialize embedding model with explicit version compatibility
28
  try:
29
- # Workaround for huggingface_hub compatibility
30
- import huggingface_hub
31
- huggingface_hub.__version__ = "0.13.4" # Force compatible version
32
  self.embedding_model = SentenceTransformer(MODEL_NAME)
33
  except Exception as e:
34
  raise RuntimeError(f"Failed to initialize embedding model: {str(e)}")
@@ -36,21 +35,22 @@ class GeminiRAGSystem:
36
  # Configure Gemini
37
  if self.gemini_api_key:
38
  genai.configure(api_key=self.gemini_api_key)
 
 
 
39
 
40
- def load_dataset(self):
41
- """Load dataset from Hugging Face with compatibility fallbacks"""
42
- try:
43
- with gr.Progress() as progress:
44
- progress(0.1, desc="📦 Downloading dataset...")
45
-
46
- # Workaround for dataset loading
47
  dataset = load_dataset(
48
  DATASET_NAME,
49
  split='train',
50
  download_config={"use_auth_token": False}
51
  )
52
 
53
- progress(0.5, desc="🔨 Processing dataset...")
54
  if 'text' in dataset.features:
55
  self.chunks = dataset['text'][:1000] # Limit to first 1000 entries
56
  elif 'context' in dataset.features:
@@ -58,7 +58,7 @@ class GeminiRAGSystem:
58
  else:
59
  raise ValueError("Dataset must have 'text' or 'context' field")
60
 
61
- progress(0.7, desc="🧠 Creating embeddings...")
62
  embeddings = self.embedding_model.encode(
63
  self.chunks,
64
  show_progress_bar=False,
@@ -68,14 +68,15 @@ class GeminiRAGSystem:
68
  self.index.add(embeddings.astype('float32'))
69
 
70
  self.dataset_loaded = True
71
- progress(1.0, desc="✅ Dataset loaded successfully!")
72
- return True
73
- except Exception as e:
74
- gr.Warning(f"Dataset loading error: {str(e)}")
75
- return False
 
76
 
77
  def get_relevant_context(self, query: str) -> str:
78
- """Retrieve most relevant chunks with version-safe operations"""
79
  if not self.index:
80
  return ""
81
 
@@ -94,9 +95,11 @@ class GeminiRAGSystem:
94
  def generate_response(self, query: str) -> str:
95
  """Generate response with robust error handling"""
96
  if not self.dataset_loaded:
97
- return "⚠️ Please load the dataset first"
 
 
98
  if not self.gemini_api_key:
99
- return "🔑 Please set your Gemini API key"
100
 
101
  context = self.get_relevant_context(query)
102
  if not context:
@@ -113,9 +116,9 @@ class GeminiRAGSystem:
113
  response = model.generate_content(prompt)
114
  return response.text
115
  except Exception as e:
116
- return f"⚠️ API Error: {str(e)}"
117
 
118
- # Initialize system with compatibility checks
119
  try:
120
  rag_system = GeminiRAGSystem()
121
  except Exception as e:
@@ -123,29 +126,22 @@ except Exception as e:
123
 
124
  # Create interface
125
  with gr.Blocks(title="UE Chatbot") as app:
126
- gr.Markdown("UE 24 Hour Service")
127
 
128
  with gr.Row():
129
- with gr.Column():
130
- load_btn = gr.Button("Load Dataset", variant="primary")
131
- status = gr.Markdown("System ready - Load dataset to begin")
132
-
133
- with gr.Column():
134
- chatbot = gr.Chatbot(height=500)
135
- query = gr.Textbox(label="Your question", placeholder="Ask about the dataset...")
136
- with gr.Row():
137
- submit_btn = gr.Button("Submit", variant="primary")
138
- clear_btn = gr.Button("Clear", variant="secondary")
139
 
140
- # Event handlers
141
- def load_dataset():
142
- try:
143
- if rag_system.load_dataset():
144
- return "Dataset ready! Ask questions now."
145
- return "Failed to load dataset"
146
- except Exception as e:
147
- return f" Error: {str(e)}"
 
148
 
 
149
  def respond(message, chat_history):
150
  try:
151
  response = rag_system.generate_response(message)
@@ -158,10 +154,17 @@ with gr.Blocks(title="UE Chatbot") as app:
158
  def clear_chat():
159
  return []
160
 
161
- load_btn.click(load_dataset, outputs=status)
 
 
 
 
162
  submit_btn.click(respond, [query, chatbot], [query, chatbot])
163
  query.submit(respond, [query, chatbot], [query, chatbot])
164
  clear_btn.click(clear_chat, outputs=chatbot)
 
 
 
165
 
166
  if __name__ == "__main__":
167
  app.launch(share=True)
 
6
  from sentence_transformers import SentenceTransformer
7
  from datasets import load_dataset
8
  from dotenv import load_dotenv
9
+ import threading
10
 
11
  # Load environment variables
12
  load_dotenv()
 
14
  # Configuration
15
  MODEL_NAME = "all-MiniLM-L6-v2"
16
  GENAI_MODEL = "gemini-pro"
17
+ DATASET_NAME = "midrees2806/7K_Dataset"
18
  CHUNK_SIZE = 500
19
  TOP_K = 3
20
 
 
23
  self.index = None
24
  self.chunks = []
25
  self.dataset_loaded = False
26
+ self.loading_error = None
27
+ self.gemini_api_key = os.getenv("AIzaSyASrFvE3gFPigihza0JTuALzZmBx0Kc3d0") # Changed from hardcoded key
28
 
29
+ # Initialize embedding model
30
  try:
 
 
 
31
  self.embedding_model = SentenceTransformer(MODEL_NAME)
32
  except Exception as e:
33
  raise RuntimeError(f"Failed to initialize embedding model: {str(e)}")
 
35
  # Configure Gemini
36
  if self.gemini_api_key:
37
  genai.configure(api_key=self.gemini_api_key)
38
+
39
+ # Start dataset loading in background
40
+ self.load_dataset_in_background()
41
 
42
+ def load_dataset_in_background(self):
43
+ """Load dataset in a background thread"""
44
+ def load_task():
45
+ try:
46
+ # Load dataset directly without progress bar
 
 
47
  dataset = load_dataset(
48
  DATASET_NAME,
49
  split='train',
50
  download_config={"use_auth_token": False}
51
  )
52
 
53
+ # Process dataset
54
  if 'text' in dataset.features:
55
  self.chunks = dataset['text'][:1000] # Limit to first 1000 entries
56
  elif 'context' in dataset.features:
 
58
  else:
59
  raise ValueError("Dataset must have 'text' or 'context' field")
60
 
61
+ # Create embeddings
62
  embeddings = self.embedding_model.encode(
63
  self.chunks,
64
  show_progress_bar=False,
 
68
  self.index.add(embeddings.astype('float32'))
69
 
70
  self.dataset_loaded = True
71
+ except Exception as e:
72
+ self.loading_error = str(e)
73
+ print(f"Dataset loading failed: {str(e)}")
74
+
75
+ # Start the loading thread
76
+ threading.Thread(target=load_task, daemon=True).start()
77
 
78
  def get_relevant_context(self, query: str) -> str:
79
+ """Retrieve most relevant chunks"""
80
  if not self.index:
81
  return ""
82
 
 
95
  def generate_response(self, query: str) -> str:
96
  """Generate response with robust error handling"""
97
  if not self.dataset_loaded:
98
+ if self.loading_error:
99
+ return f" Dataset loading failed: {self.loading_error}"
100
+ return " Dataset is still loading, please wait..."
101
  if not self.gemini_api_key:
102
+ return " Please set your Gemini API key in environment variables"
103
 
104
  context = self.get_relevant_context(query)
105
  if not context:
 
116
  response = model.generate_content(prompt)
117
  return response.text
118
  except Exception as e:
119
+ return f" API Error: {str(e)}"
120
 
121
+ # Initialize system
122
  try:
123
  rag_system = GeminiRAGSystem()
124
  except Exception as e:
 
126
 
127
  # Create interface
128
  with gr.Blocks(title="UE Chatbot") as app:
129
+ gr.Markdown("# UE 24/7 Service")
130
 
131
  with gr.Row():
132
+ chatbot = gr.Chatbot(height=500)
 
 
 
 
 
 
 
 
 
133
 
134
+ with gr.Row():
135
+ query = gr.Textbox(label="Your question", placeholder="Ask your question...", scale=4)
136
+ submit_btn = gr.Button("Submit", variant="primary", scale=1)
137
+
138
+ with gr.Row():
139
+ clear_btn = gr.Button("Clear Chat", variant="secondary")
140
+
141
+ # Status indicator
142
+ status = gr.Textbox(label="System Status", visible=False)
143
 
144
+ # Event handlers
145
  def respond(message, chat_history):
146
  try:
147
  response = rag_system.generate_response(message)
 
154
  def clear_chat():
155
  return []
156
 
157
+ def get_status():
158
+ if rag_system.loading_error:
159
+ return f"Error: {rag_system.loading_error}"
160
+ return "Ready" if rag_system.dataset_loaded else "Loading dataset..."
161
+
162
  submit_btn.click(respond, [query, chatbot], [query, chatbot])
163
  query.submit(respond, [query, chatbot], [query, chatbot])
164
  clear_btn.click(clear_chat, outputs=chatbot)
165
+
166
+ # Periodically check status (hidden from user)
167
+ app.load(get_status, None, status, every=1)
168
 
169
  if __name__ == "__main__":
170
  app.launch(share=True)