JohnsonMLEngineer commited on
Commit
1e01d91
·
verified ·
1 Parent(s): 78ae67a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -57
app.py CHANGED
@@ -10,8 +10,7 @@ import pandas as pd
10
  from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
11
  import json
12
  from datetime import datetime
13
-
14
- load_dotenv()
15
 
16
  # Constants
17
  PAGE_TITLE = "PTE Assistant - Multi-Model Comparison"
@@ -23,6 +22,61 @@ MODELS = {
23
  }
24
  TRAINING_DATA_FILE = "training_data.json"
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  # Initialize session state
27
  if "chat_history" not in st.session_state:
28
  st.session_state.chat_history = []
@@ -31,34 +85,21 @@ if "selected_responses" not in st.session_state:
31
  if "feedback_messages" not in st.session_state:
32
  st.session_state.feedback_messages = {}
33
 
34
- def load_training_data():
35
- """Load existing training data or return empty list if file doesn't exist"""
36
- try:
37
- if os.path.exists(TRAINING_DATA_FILE) and os.path.getsize(TRAINING_DATA_FILE) > 0:
38
- with open(TRAINING_DATA_FILE, 'r') as f:
39
- return json.load(f)
40
- return []
41
- except json.JSONDecodeError:
42
- st.warning("Found invalid training data file. Creating new one.")
43
- return []
44
- except Exception as e:
45
- st.error(f"Error loading training data: {str(e)}")
46
- return []
47
 
48
- def save_training_data(data):
49
- """Save data for future training"""
50
  try:
51
  # Load existing data
52
- existing_data = load_training_data()
53
 
54
  # Append new data
55
  existing_data.append(data)
56
 
57
  # Save updated data
58
- with open(TRAINING_DATA_FILE, 'w') as f:
59
- json.dump(existing_data, f, indent=2)
60
-
61
- return True
62
  except Exception as e:
63
  st.error(f"Error saving training data: {str(e)}")
64
  return False
@@ -90,9 +131,9 @@ def initialize_rag_components():
90
 
91
  # Initialize vector store
92
  docsearch = PineconeVectorStore.from_existing_index(
93
- index_name = "dataset2",
94
- embedding = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
95
- )
96
 
97
  # Create retriever
98
  retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 10})
@@ -106,31 +147,30 @@ def initialize_rag_components():
106
  ) for model_name, model_id in MODELS.items()
107
  }
108
 
109
- # System prompt remains the same...
110
  system_prompt = """
111
  You are an advanced AI assistant specialized in PTE (Pearson Test of English) exam preparation. Your role is to provide expert guidance, explanations, and strategies to help students excel in all aspects of the PTE exam.
112
- Core Responsibilities:
113
-
114
- Provide accurate, detailed information about PTE exam structure, scoring, and recent updates.
115
- Offer tailored advice and strategies for each PTE section: Speaking, Writing, Reading, and Listening.
116
- Suggest effective study plans and time management techniques.
117
- Provide constructive feedback on practice responses (when given).
118
-
119
- Guidelines for Responses:
120
-
121
- Use the following retrieved context to inform your answers: {context}
122
- If the context doesn't provide sufficient information or
123
- If you don't know the answer or are unsure, clearly state this and suggest reliable resources for further information.
124
- Tailor your language complexity to the user's apparent level of understanding.
125
- Be concise yet thorough. Aim for clear, actionable advice.
126
- Use bullet points or numbered lists for step-by-step instructions or multiple tips.
127
-
128
- Ethical Considerations:
129
- Topic Limitation: If a question is outside the scope of the PTE exam, kindly inform the user that you are only equipped to address PTE-related topics.
130
- Never provide or encourage cheating methods.
131
- Emphasize the importance of genuine language skill development over exam tricks.
132
- Respect copyright; produce exact questions from official PTE materials.
133
- """
134
 
135
  prompt = ChatPromptTemplate.from_messages([
136
  ("system", system_prompt),
@@ -171,7 +211,7 @@ def display_chat_history():
171
  "selected_model": model_name,
172
  "selected_response": response,
173
  "custom_response": None
174
- }):
175
  show_feedback_message(
176
  idx,
177
  f"✅ Response from {model_name} has been selected and saved successfully! This response will be used to improve future answers."
@@ -206,7 +246,7 @@ def display_chat_history():
206
  "selected_model": "custom",
207
  "selected_response": custom_response,
208
  "custom_response": custom_response
209
- }):
210
  show_feedback_message(
211
  idx,
212
  "✅ Your custom response has been submitted and saved successfully! This will help improve future responses."
@@ -217,10 +257,6 @@ def display_chat_history():
217
  "❌ Failed to save your custom response. Please try again.",
218
  is_error=True
219
  )
220
- #else:
221
- # Display the selected response
222
- #selected = st.session_state.selected_responses[idx]
223
- # st.success(f"✅ Selected response from: {selected['selected_model']}")
224
 
225
  # Display any feedback messages for this interaction
226
  display_feedback(idx)
@@ -228,11 +264,18 @@ def display_chat_history():
228
  st.divider()
229
 
230
  def main():
 
231
  set_page_config()
232
 
233
  st.header("PTE Assistant - Multi-Model Comparison 🎓")
234
  st.subheader("Compare responses and select the best answer")
235
 
 
 
 
 
 
 
236
  # Initialize RAG chains for all models
237
  rag_chains = initialize_rag_components()
238
 
@@ -268,8 +311,5 @@ def main():
268
  # Display chat history with response selection
269
  display_chat_history()
270
 
271
-
272
-
273
-
274
  if __name__ == "__main__":
275
  main()
 
10
  from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
11
  import json
12
  from datetime import datetime
13
+ from huggingface_hub import HfApi, Repository
 
14
 
15
  # Constants
16
  PAGE_TITLE = "PTE Assistant - Multi-Model Comparison"
 
22
  }
23
  TRAINING_DATA_FILE = "training_data.json"
24
 
25
+ class HuggingFaceStorage:
26
+ def __init__(self, repo_id, token_path=".env"):
27
+
28
+ # Read token from .env file
29
+ load_dotenv(token_path)
30
+ self.token = os.getenv("HF_TOKEN")
31
+ if not self.token:
32
+ raise ValueError("HF_TOKEN not found in .env file")
33
+
34
+ self.repo_id = repo_id
35
+ self.api = HfApi()
36
+
37
+ # Initialize/clone the repository
38
+ self.repo = Repository(
39
+ local_dir="repo_cache",
40
+ clone_from=repo_id,
41
+ token=self.token,
42
+ git_user="JohnsonMLEngineer",
43
+ git_email="[email protected]"
44
+ )
45
+
46
+ def load_json_file(self, filename="training_data.json"):
47
+ """Load JSON file from repository"""
48
+ json_path = f"repo_cache/{filename}"
49
+ try:
50
+ if os.path.exists(json_path) and os.path.getsize(json_path) > 0:
51
+ with open(json_path, 'r') as f:
52
+ return json.load(f)
53
+ return []
54
+ except Exception as e:
55
+ st.error(f"Error loading data from HuggingFace: {str(e)}")
56
+ return []
57
+
58
+ def save_json_file(self, data, filename="training_data.json"):
59
+ """Save JSON file to repository"""
60
+ try:
61
+ json_path = f"repo_cache/{filename}"
62
+
63
+ # Create directory if it doesn't exist
64
+ os.makedirs(os.path.dirname(json_path), exist_ok=True)
65
+
66
+ # Save file
67
+ with open(json_path, 'w') as f:
68
+ json.dump(data, f, indent=2)
69
+
70
+ # Commit and push changes
71
+ self.repo.git_add(filename)
72
+ self.repo.git_commit(f"Update {filename}")
73
+ self.repo.git_push()
74
+
75
+ return True
76
+ except Exception as e:
77
+ st.error(f"Error saving data to HuggingFace: {str(e)}")
78
+ return False
79
+
80
  # Initialize session state
81
  if "chat_history" not in st.session_state:
82
  st.session_state.chat_history = []
 
85
  if "feedback_messages" not in st.session_state:
86
  st.session_state.feedback_messages = {}
87
 
88
+ def load_training_data(hf_storage):
89
+ """Load existing training data using HuggingFace storage"""
90
+ return hf_storage.load_json_file(TRAINING_DATA_FILE)
 
 
 
 
 
 
 
 
 
 
91
 
92
+ def save_training_data(data, hf_storage):
93
+ """Save training data using HuggingFace storage"""
94
  try:
95
  # Load existing data
96
+ existing_data = load_training_data(hf_storage)
97
 
98
  # Append new data
99
  existing_data.append(data)
100
 
101
  # Save updated data
102
+ return hf_storage.save_json_file(existing_data, TRAINING_DATA_FILE)
 
 
 
103
  except Exception as e:
104
  st.error(f"Error saving training data: {str(e)}")
105
  return False
 
131
 
132
  # Initialize vector store
133
  docsearch = PineconeVectorStore.from_existing_index(
134
+ index_name="dataset2",
135
+ embedding=GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
136
+ )
137
 
138
  # Create retriever
139
  retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 10})
 
147
  ) for model_name, model_id in MODELS.items()
148
  }
149
 
 
150
  system_prompt = """
151
  You are an advanced AI assistant specialized in PTE (Pearson Test of English) exam preparation. Your role is to provide expert guidance, explanations, and strategies to help students excel in all aspects of the PTE exam.
152
+ Core Responsibilities:
153
+
154
+ Provide accurate, detailed information about PTE exam structure, scoring, and recent updates.
155
+ Offer tailored advice and strategies for each PTE section: Speaking, Writing, Reading, and Listening.
156
+ Suggest effective study plans and time management techniques.
157
+ Provide constructive feedback on practice responses (when given).
158
+
159
+ Guidelines for Responses:
160
+
161
+ Use the following retrieved context to inform your answers: {context}
162
+ If the context doesn't provide sufficient information or
163
+ If you don't know the answer or are unsure, clearly state this and suggest reliable resources for further information.
164
+ Tailor your language complexity to the user's apparent level of understanding.
165
+ Be concise yet thorough. Aim for clear, actionable advice.
166
+ Use bullet points or numbered lists for step-by-step instructions or multiple tips.
167
+
168
+ Ethical Considerations:
169
+ Topic Limitation: If a question is outside the scope of the PTE exam, kindly inform the user that you are only equipped to address PTE-related topics.
170
+ Never provide or encourage cheating methods.
171
+ Emphasize the importance of genuine language skill development over exam tricks.
172
+ Respect copyright; produce exact questions from official PTE materials.
173
+ """
174
 
175
  prompt = ChatPromptTemplate.from_messages([
176
  ("system", system_prompt),
 
211
  "selected_model": model_name,
212
  "selected_response": response,
213
  "custom_response": None
214
+ }, st.session_state.hf_storage):
215
  show_feedback_message(
216
  idx,
217
  f"✅ Response from {model_name} has been selected and saved successfully! This response will be used to improve future answers."
 
246
  "selected_model": "custom",
247
  "selected_response": custom_response,
248
  "custom_response": custom_response
249
+ }, st.session_state.hf_storage):
250
  show_feedback_message(
251
  idx,
252
  "✅ Your custom response has been submitted and saved successfully! This will help improve future responses."
 
257
  "❌ Failed to save your custom response. Please try again.",
258
  is_error=True
259
  )
 
 
 
 
260
 
261
  # Display any feedback messages for this interaction
262
  display_feedback(idx)
 
264
  st.divider()
265
 
266
  def main():
267
+ load_dotenv()
268
  set_page_config()
269
 
270
  st.header("PTE Assistant - Multi-Model Comparison 🎓")
271
  st.subheader("Compare responses and select the best answer")
272
 
273
+ # Initialize HuggingFace storage
274
+ if 'hf_storage' not in st.session_state:
275
+ st.session_state.hf_storage = HuggingFaceStorage(
276
+ repo_id="JohnsonMLEngineer/Multi-Model_Comparison" # your actual repo ID
277
+ )
278
+
279
  # Initialize RAG chains for all models
280
  rag_chains = initialize_rag_components()
281
 
 
311
  # Display chat history with response selection
312
  display_chat_history()
313
 
 
 
 
314
  if __name__ == "__main__":
315
  main()