JohnsonMLEngineer commited on
Commit
cc1da9b
Β·
verified Β·
1 Parent(s): 5fa3c01

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -95
app.py CHANGED
@@ -10,7 +10,8 @@ import pandas as pd
10
  from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
11
  import json
12
  from datetime import datetime
13
- from huggingface_hub import HfApi, Repository
 
14
 
15
  # Constants
16
  PAGE_TITLE = "PTE Assistant - Multi-Model Comparison"
@@ -22,59 +23,6 @@ MODELS = {
22
  }
23
  TRAINING_DATA_FILE = "training_data.json"
24
 
25
- class HuggingFaceStorage:
26
- def __init__(self, repo_id):
27
-
28
- # Read token from .env file
29
-
30
- self.token = os.environ.get("LLM fine tune")
31
-
32
- self.repo_id = repo_id
33
- self.api = HfApi()
34
-
35
- # Initialize/clone the repository
36
- self.repo = Repository(
37
- local_dir="repo_cache",
38
- clone_from=repo_id,
39
- token=self.token,
40
- git_user="JohnsonMLEngineer",
41
- git_email="[email protected]"
42
- )
43
-
44
- def load_json_file(self, filename="training_data.json"):
45
- """Load JSON file from repository"""
46
- json_path = f"repo_cache/{filename}"
47
- try:
48
- if os.path.exists(json_path) and os.path.getsize(json_path) > 0:
49
- with open(json_path, 'r') as f:
50
- return json.load(f)
51
- return []
52
- except Exception as e:
53
- st.error(f"Error loading data from HuggingFace: {str(e)}")
54
- return []
55
-
56
- def save_json_file(self, data, filename="training_data.json"):
57
- """Save JSON file to repository"""
58
- try:
59
- json_path = f"repo_cache/{filename}"
60
-
61
- # Create directory if it doesn't exist
62
- os.makedirs(os.path.dirname(json_path), exist_ok=True)
63
-
64
- # Save file
65
- with open(json_path, 'w') as f:
66
- json.dump(data, f, indent=2)
67
-
68
- # Commit and push changes
69
- self.repo.git_add(filename)
70
- self.repo.git_commit(f"Update {filename}")
71
- self.repo.git_push()
72
-
73
- return True
74
- except Exception as e:
75
- st.error(f"Error saving data to HuggingFace: {str(e)}")
76
- return False
77
-
78
  # Initialize session state
79
  if "chat_history" not in st.session_state:
80
  st.session_state.chat_history = []
@@ -83,21 +31,34 @@ if "selected_responses" not in st.session_state:
83
  if "feedback_messages" not in st.session_state:
84
  st.session_state.feedback_messages = {}
85
 
86
- def load_training_data(hf_storage):
87
- """Load existing training data using HuggingFace storage"""
88
- return hf_storage.load_json_file(TRAINING_DATA_FILE)
 
 
 
 
 
 
 
 
 
 
89
 
90
- def save_training_data(data, hf_storage):
91
- """Save training data using HuggingFace storage"""
92
  try:
93
  # Load existing data
94
- existing_data = load_training_data(hf_storage)
95
 
96
  # Append new data
97
  existing_data.append(data)
98
 
99
  # Save updated data
100
- return hf_storage.save_json_file(existing_data, TRAINING_DATA_FILE)
 
 
 
101
  except Exception as e:
102
  st.error(f"Error saving training data: {str(e)}")
103
  return False
@@ -129,9 +90,9 @@ def initialize_rag_components():
129
 
130
  # Initialize vector store
131
  docsearch = PineconeVectorStore.from_existing_index(
132
- index_name="dataset2",
133
- embedding=GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
134
- )
135
 
136
  # Create retriever
137
  retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 10})
@@ -145,30 +106,31 @@ def initialize_rag_components():
145
  ) for model_name, model_id in MODELS.items()
146
  }
147
 
 
148
  system_prompt = """
149
  You are an advanced AI assistant specialized in PTE (Pearson Test of English) exam preparation. Your role is to provide expert guidance, explanations, and strategies to help students excel in all aspects of the PTE exam.
150
- Core Responsibilities:
151
-
152
- Provide accurate, detailed information about PTE exam structure, scoring, and recent updates.
153
- Offer tailored advice and strategies for each PTE section: Speaking, Writing, Reading, and Listening.
154
- Suggest effective study plans and time management techniques.
155
- Provide constructive feedback on practice responses (when given).
156
-
157
- Guidelines for Responses:
158
-
159
- Use the following retrieved context to inform your answers: {context}
160
- If the context doesn't provide sufficient information or
161
- If you don't know the answer or are unsure, clearly state this and suggest reliable resources for further information.
162
- Tailor your language complexity to the user's apparent level of understanding.
163
- Be concise yet thorough. Aim for clear, actionable advice.
164
- Use bullet points or numbered lists for step-by-step instructions or multiple tips.
165
-
166
- Ethical Considerations:
167
- Topic Limitation: If a question is outside the scope of the PTE exam, kindly inform the user that you are only equipped to address PTE-related topics.
168
- Never provide or encourage cheating methods.
169
- Emphasize the importance of genuine language skill development over exam tricks.
170
- Respect copyright; produce exact questions from official PTE materials.
171
- """
172
 
173
  prompt = ChatPromptTemplate.from_messages([
174
  ("system", system_prompt),
@@ -209,7 +171,7 @@ def display_chat_history():
209
  "selected_model": model_name,
210
  "selected_response": response,
211
  "custom_response": None
212
- }, st.session_state.hf_storage):
213
  show_feedback_message(
214
  idx,
215
  f"βœ… Response from {model_name} has been selected and saved successfully! This response will be used to improve future answers."
@@ -244,7 +206,7 @@ def display_chat_history():
244
  "selected_model": "custom",
245
  "selected_response": custom_response,
246
  "custom_response": custom_response
247
- }, st.session_state.hf_storage):
248
  show_feedback_message(
249
  idx,
250
  "βœ… Your custom response has been submitted and saved successfully! This will help improve future responses."
@@ -255,6 +217,10 @@ def display_chat_history():
255
  "❌ Failed to save your custom response. Please try again.",
256
  is_error=True
257
  )
 
 
 
 
258
 
259
  # Display any feedback messages for this interaction
260
  display_feedback(idx)
@@ -262,18 +228,11 @@ def display_chat_history():
262
  st.divider()
263
 
264
  def main():
265
- load_dotenv()
266
  set_page_config()
267
 
268
  st.header("PTE Assistant - Multi-Model Comparison πŸŽ“")
269
  st.subheader("Compare responses and select the best answer")
270
 
271
- # Initialize HuggingFace storage
272
- if 'hf_storage' not in st.session_state:
273
- st.session_state.hf_storage = HuggingFaceStorage(
274
- repo_id="JohnsonMLEngineer/Multi-Model_Comparison" # your actual repo ID
275
- )
276
-
277
  # Initialize RAG chains for all models
278
  rag_chains = initialize_rag_components()
279
 
@@ -309,5 +268,8 @@ def main():
309
  # Display chat history with response selection
310
  display_chat_history()
311
 
 
 
 
312
  if __name__ == "__main__":
313
  main()
 
10
  from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
11
  import json
12
  from datetime import datetime
13
+
14
+ load_dotenv()
15
 
16
  # Constants
17
  PAGE_TITLE = "PTE Assistant - Multi-Model Comparison"
 
23
  }
24
  TRAINING_DATA_FILE = "training_data.json"
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  # Initialize session state
27
  if "chat_history" not in st.session_state:
28
  st.session_state.chat_history = []
 
31
  if "feedback_messages" not in st.session_state:
32
  st.session_state.feedback_messages = {}
33
 
34
+ def load_training_data():
35
+ """Load existing training data or return empty list if file doesn't exist"""
36
+ try:
37
+ if os.path.exists(TRAINING_DATA_FILE) and os.path.getsize(TRAINING_DATA_FILE) > 0:
38
+ with open(TRAINING_DATA_FILE, 'r') as f:
39
+ return json.load(f)
40
+ return []
41
+ except json.JSONDecodeError:
42
+ st.warning("Found invalid training data file. Creating new one.")
43
+ return []
44
+ except Exception as e:
45
+ st.error(f"Error loading training data: {str(e)}")
46
+ return []
47
 
48
+ def save_training_data(data):
49
+ """Save data for future training"""
50
  try:
51
  # Load existing data
52
+ existing_data = load_training_data()
53
 
54
  # Append new data
55
  existing_data.append(data)
56
 
57
  # Save updated data
58
+ with open(TRAINING_DATA_FILE, 'w') as f:
59
+ json.dump(existing_data, f, indent=2)
60
+
61
+ return True
62
  except Exception as e:
63
  st.error(f"Error saving training data: {str(e)}")
64
  return False
 
90
 
91
  # Initialize vector store
92
  docsearch = PineconeVectorStore.from_existing_index(
93
+ index_name = "dataset2",
94
+ embedding = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
95
+ )
96
 
97
  # Create retriever
98
  retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 10})
 
106
  ) for model_name, model_id in MODELS.items()
107
  }
108
 
109
+ # System prompt remains the same...
110
  system_prompt = """
111
  You are an advanced AI assistant specialized in PTE (Pearson Test of English) exam preparation. Your role is to provide expert guidance, explanations, and strategies to help students excel in all aspects of the PTE exam.
112
+ Core Responsibilities:
113
+
114
+ Provide accurate, detailed information about PTE exam structure, scoring, and recent updates.
115
+ Offer tailored advice and strategies for each PTE section: Speaking, Writing, Reading, and Listening.
116
+ Suggest effective study plans and time management techniques.
117
+ Provide constructive feedback on practice responses (when given).
118
+
119
+ Guidelines for Responses:
120
+
121
+ Use the following retrieved context to inform your answers: {context}
122
+ If the context doesn't provide sufficient information or
123
+ If you don't know the answer or are unsure, clearly state this and suggest reliable resources for further information.
124
+ Tailor your language complexity to the user's apparent level of understanding.
125
+ Be concise yet thorough. Aim for clear, actionable advice.
126
+ Use bullet points or numbered lists for step-by-step instructions or multiple tips.
127
+
128
+ Ethical Considerations:
129
+ Topic Limitation: If a question is outside the scope of the PTE exam, kindly inform the user that you are only equipped to address PTE-related topics.
130
+ Never provide or encourage cheating methods.
131
+ Emphasize the importance of genuine language skill development over exam tricks.
132
+ Respect copyright; produce exact questions from official PTE materials.
133
+ """
134
 
135
  prompt = ChatPromptTemplate.from_messages([
136
  ("system", system_prompt),
 
171
  "selected_model": model_name,
172
  "selected_response": response,
173
  "custom_response": None
174
+ }):
175
  show_feedback_message(
176
  idx,
177
  f"βœ… Response from {model_name} has been selected and saved successfully! This response will be used to improve future answers."
 
206
  "selected_model": "custom",
207
  "selected_response": custom_response,
208
  "custom_response": custom_response
209
+ }):
210
  show_feedback_message(
211
  idx,
212
  "βœ… Your custom response has been submitted and saved successfully! This will help improve future responses."
 
217
  "❌ Failed to save your custom response. Please try again.",
218
  is_error=True
219
  )
220
+ #else:
221
+ # Display the selected response
222
+ #selected = st.session_state.selected_responses[idx]
223
+ # st.success(f"βœ… Selected response from: {selected['selected_model']}")
224
 
225
  # Display any feedback messages for this interaction
226
  display_feedback(idx)
 
228
  st.divider()
229
 
230
  def main():
 
231
  set_page_config()
232
 
233
  st.header("PTE Assistant - Multi-Model Comparison πŸŽ“")
234
  st.subheader("Compare responses and select the best answer")
235
 
 
 
 
 
 
 
236
  # Initialize RAG chains for all models
237
  rag_chains = initialize_rag_components()
238
 
 
268
  # Display chat history with response selection
269
  display_chat_history()
270
 
271
+
272
+
273
+
274
  if __name__ == "__main__":
275
  main()