forbiddensoul90 commited on
Commit
5a01603
·
verified ·
1 Parent(s): c1062db

Update back.py

Browse files
Files changed (1) hide show
  1. back.py +299 -297
back.py CHANGED
@@ -1,298 +1,300 @@
1
- import os
2
- import logging
3
- from typing import List, Dict, Any, Optional, Union
4
- from dataclasses import dataclass
5
- import torch
6
- from sentence_transformers import SentenceTransformer
7
- from langchain.vectorstores import FAISS
8
- from langchain_core.embeddings import Embeddings
9
- import google.generativeai as genai
10
- from datetime import datetime
11
- import json
12
-
13
- @dataclass
14
- class UserInfo:
15
- """User information for context"""
16
- name: str
17
- college: str
18
- degree: str
19
- year: int
20
- career_goals: str
21
- has_internship: bool
22
- has_placement: bool
23
-
24
- @dataclass
25
- class ChatConfig:
26
- """Configuration for the chatbot"""
27
- embedding_model_name: str = 'all-MiniLM-L6-v2'
28
- device: str = 'cuda' if torch.cuda.is_available() else 'cpu'
29
- max_history: int = 3
30
- gemini_api_key: str = "AIzaSyCAlM-YPVKl1qTnUwInWz9X5sNCmISPOr4" # Replace with your API key
31
- log_file: str = "chat_history.txt"
32
- user_data_file: str = "user_data.json"
33
-
34
- # In the UserManager class, modify these methods:
35
- class UserManager:
36
- """Manages user information storage and retrieval"""
37
- def __init__(self, user_data_file: str):
38
- self.user_data_file = user_data_file
39
- self.ensure_file_exists()
40
-
41
- def ensure_file_exists(self):
42
- """Create user data file if it doesn't exist"""
43
- if not os.path.exists(self.user_data_file):
44
- os.makedirs(os.path.dirname(self.user_data_file), exist_ok=True)
45
- with open(self.user_data_file, 'w', encoding='utf-8') as f:
46
- json.dump({}, f)
47
-
48
- def save_user_info(self, user_info: UserInfo):
49
- """Save user information to JSON file"""
50
- try:
51
- # First ensure the file exists with valid JSON
52
- self.ensure_file_exists()
53
-
54
- # Read existing data
55
- try:
56
- with open(self.user_data_file, 'r', encoding='utf-8') as f:
57
- data = json.load(f)
58
- except json.JSONDecodeError:
59
- data = {}
60
-
61
- # Update data
62
- data[user_info.name] = {
63
- "college": user_info.college,
64
- "degree": user_info.degree,
65
- "year": user_info.year,
66
- "career_goals": user_info.career_goals,
67
- "has_internship": user_info.has_internship,
68
- "has_placement": user_info.has_placement,
69
- "last_updated": datetime.now().isoformat()
70
- }
71
-
72
- # Write back to file
73
- with open(self.user_data_file, 'w', encoding='utf-8') as f:
74
- json.dump(data, f, indent=4)
75
- return True
76
- except Exception as e:
77
- logging.error(f"Error saving user info: {str(e)}")
78
- return False
79
-
80
-
81
- class ChatLogger:
82
- """Logger for chat interactions"""
83
- def __init__(self, log_file: str):
84
- self.log_file = log_file
85
-
86
- def log_interaction(self, question: str, answer: str, user_info: Optional[UserInfo] = None):
87
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
88
- with open(self.log_file, 'a', encoding='utf-8') as f:
89
- user_context = ""
90
- if user_info:
91
- user_context = f"\nUser: {user_info.name} | College: {user_info.college} | Degree: {user_info.degree} | Year: {user_info.year} | Career Goals: {user_info.career_goals}"
92
- f.write(f"\n[{timestamp}]{user_context}\nQ: {question}\nA: {answer}\n{'-'*50}")
93
-
94
- class ChatMemory:
95
- """Manages chat history"""
96
- def __init__(self, max_history: int = 3):
97
- self.max_history = max_history
98
- self.history = []
99
-
100
- def add_interaction(self, question: str, answer: str):
101
- self.history.append({"question": question, "answer": answer})
102
- if len(self.history) > self.max_history:
103
- self.history.pop(0)
104
-
105
- def get_history(self) -> List[Dict[str, str]]:
106
- return self.history
107
-
108
- def clear_history(self):
109
- self.history = []
110
-
111
- class QuestionGenerator:
112
- def __init__(self, api_key: str):
113
- genai.configure(api_key=api_key)
114
- self.generation_config = {
115
- "temperature": 0.1,
116
- "top_p": 0.95,
117
- "max_output_tokens": 8192,
118
- }
119
- self.model = genai.GenerativeModel(
120
- model_name="gemini-1.5-flash",
121
- generation_config=self.generation_config,
122
- safety_settings={'HATE': 'BLOCK_NONE','HARASSMENT': 'BLOCK_NONE','SEXUAL' : 'BLOCK_NONE','DANGEROUS' : 'BLOCK_NONE'}
123
- )
124
-
125
- self.default_questions = [
126
- "What are some other skills I should focus on to improve my chances?",
127
- "What resources or platforms can help me in my career journey?",
128
- "Are there any specific companies or organizations I should target for internships/placements?",
129
- "What are some common interview questions asked for this career path?"
130
- ]
131
-
132
- async def generate_questions(
133
- self,
134
- question: str,
135
- answer: str,
136
- user_info: Optional[UserInfo] = None
137
- ) -> List[str]:
138
- """Generate follow-up questions based on the conversation"""
139
- try:
140
- chat = self.model.start_chat(history=[])
141
- prompt = f"""Generate 4 simple, practical follow-up questions, that a college student may ask, based on this conversation about career advice:
142
-
143
- Question: {question}
144
- Answer: {answer}
145
-
146
- Focus the questions on:
147
- 1. Skills development (What skills are needed, how to improve)
148
- 2. Resources and platforms (Where to find internships, jobs, etc.)
149
- 3. Specific target companies/organizations
150
- 4. Common interview questions
151
-
152
- Keep the language simple and student-friendly. Format each question on a new line."""
153
-
154
- response = chat.send_message(prompt).text
155
-
156
- # Extract questions
157
- questions = [q.strip() for q in response.split('\n') if q.strip()]
158
-
159
- # Return default questions if we don't get exactly 4 valid questions
160
- if len(questions) != 4:
161
- return self.default_questions
162
-
163
- return questions
164
-
165
- except Exception as e:
166
- logging.error(f"Error generating questions: {str(e)}")
167
- return self.default_questions
168
-
169
- class GeminiRAG:
170
- def __init__(self, api_key: str):
171
- genai.configure(api_key=api_key)
172
- self.generation_config = {
173
- "temperature": 0.1,
174
- "top_p": 0.95,
175
- "top_k": 64,
176
- "max_output_tokens": 8192,
177
- }
178
- self.model = genai.GenerativeModel(
179
- model_name="gemini-1.5-flash",
180
- generation_config=self.generation_config,
181
- safety_settings={'HATE': 'BLOCK_NONE','HARASSMENT': 'BLOCK_NONE','SEXUAL' : 'BLOCK_NONE','DANGEROUS' : 'BLOCK_NONE'}
182
- )
183
-
184
- def create_context(self, relevant_docs: List[Dict[str, Any]]) -> str:
185
- """Creates a context string from relevant documents"""
186
- context_parts = []
187
- for doc in relevant_docs:
188
- context_parts.append(f"Section: {doc['metadata']['section']}\n{doc['content']}")
189
- return "\n\n".join(context_parts)
190
-
191
- async def get_answer(
192
- self,
193
- question: str,
194
- context: str,
195
- user_info: Optional[UserInfo] = None
196
- ) -> str:
197
- try:
198
- chat = self.model.start_chat(history=[])
199
-
200
- # Simplified prompt to reduce chances of recitation
201
- prompt = f"""As a career counselor, provide a helpful response based on:
202
-
203
- Context: {context}
204
-
205
- {f'''User Background:
206
- - Student at {user_info.college}
207
- - Studying {user_info.degree} (Year {user_info.year})
208
- - Goals: {user_info.career_goals}
209
- - {'Has internship experience' if user_info.has_internship else 'No internship yet'}
210
- - {'Has placement' if user_info.has_placement else 'Seeking placement'}''' if user_info else ''}
211
-
212
- Question: {question}
213
-
214
- Provide practical advice with specific examples and actionable steps."""
215
-
216
- try:
217
- response = chat.send_message(prompt)
218
- if response.text:
219
- return response.text
220
- else:
221
- return "I apologize, but I couldn't generate a proper response. Please try rephrasing your question."
222
- except Exception as chat_error:
223
- logging.error(f"Chat error: {str(chat_error)}")
224
- return "I encountered an error while processing your question. Please try again with a simpler question."
225
-
226
- except Exception as e:
227
- logging.error(f"Error generating answer: {str(e)}")
228
- return "An error occurred. Please try again later."
229
-
230
- class CustomEmbeddings(Embeddings):
231
- """Custom embeddings using SentenceTransformer"""
232
- def __init__(self, model_name: str, device: str):
233
- self.model = SentenceTransformer(model_name)
234
- self.model.to(device)
235
-
236
- def embed_documents(self, texts: List[str]) -> List[List[float]]:
237
- with torch.no_grad():
238
- embeddings = self.model.encode(texts, convert_to_tensor=True)
239
- return embeddings.cpu().numpy().tolist()
240
-
241
- def embed_query(self, text: str) -> List[float]:
242
- with torch.no_grad():
243
- embedding = self.model.encode([text], convert_to_tensor=True)
244
- return embedding.cpu().numpy().tolist()[0]
245
-
246
- class ProductDatabase:
247
- """Handles document storage and retrieval"""
248
- def __init__(self, config: ChatConfig):
249
- self.embeddings = CustomEmbeddings(
250
- model_name=config.embedding_model_name,
251
- device=config.device
252
- )
253
- self.vectorstore = None
254
-
255
- def process_markdown(self, markdown_content: str):
256
- """Process markdown content and create vector store"""
257
- try:
258
- sections = markdown_content.split('\n## ')
259
- documents = []
260
-
261
- if sections[0].startswith('# '):
262
- intro = sections[0].split('\n', 1)[1]
263
- documents.append({
264
- "content": intro,
265
- "section": "Introduction"
266
- })
267
-
268
- for section in sections[1:]:
269
- if section.strip():
270
- title, content = section.split('\n', 1)
271
- documents.append({
272
- "content": content.strip(),
273
- "section": title.strip()
274
- })
275
-
276
- texts = [doc["content"] for doc in documents]
277
- metadatas = [{"section": doc["section"]} for doc in documents]
278
-
279
- self.vectorstore = FAISS.from_texts(
280
- texts=texts,
281
- embedding=self.embeddings,
282
- metadatas=metadatas
283
- )
284
-
285
- except Exception as e:
286
- raise Exception(f"Error processing markdown content: {str(e)}")
287
-
288
- def search(self, query: str, k: int = 3) -> List[Dict[str, Any]]:
289
- """Search for relevant documents"""
290
- if not self.vectorstore:
291
- raise ValueError("Database not initialized. Please process documents first.")
292
-
293
- try:
294
- docs = self.vectorstore.similarity_search(query, k=k)
295
- return [{"content": doc.page_content, "metadata": doc.metadata} for doc in docs]
296
- except Exception as e:
297
- logging.error(f"Error during search: {str(e)}")
 
 
298
  return []
 
1
+ import os
2
+ import logging
3
+ from typing import List, Dict, Any, Optional, Union
4
+ from dataclasses import dataclass
5
+ import torch
6
+ from sentence_transformers import SentenceTransformer
7
+ from langchain.vectorstores import FAISS
8
+ from langchain_core.embeddings import Embeddings
9
+ import google.generativeai as genai
10
+ from datetime import datetime
11
+ import json
12
+
13
+ @dataclass
14
+ class UserInfo:
15
+ """User information for context"""
16
+ name: str
17
+ college: str
18
+ degree: str
19
+ year: int
20
+ career_goals: str
21
+ has_internship: bool
22
+ has_placement: bool
23
+
24
+ @dataclass
25
+ class ChatConfig:
26
+ """Configuration for the chatbot"""
27
+ embedding_model_name: str = 'all-MiniLM-L6-v2'
28
+ device: str = 'cuda' if torch.cuda.is_available() else 'cpu'
29
+ max_history: int = 3
30
+ gemini_api_key: str = "AIzaSyCAlM-YPVKl1qTnUwInWz9X5sNCmISPOr4" # Replace with your API key
31
+ log_file: str = "chat_history.txt"
32
+ user_data_file: str = "user_data.json"
33
+
34
+ # In the UserManager class, modify these methods:
35
+ class UserManager:
36
+ """Manages user information storage and retrieval"""
37
+ def __init__(self, user_data_file: str):
38
+ self.user_data_file = user_data_file
39
+ self.ensure_file_exists()
40
+
41
+ def ensure_file_exists(self):
42
+ """Create user data file if it doesn't exist"""
43
+ if not os.path.exists(self.user_data_file):
44
+ os.makedirs(os.path.dirname(self.user_data_file), exist_ok=True)
45
+ with open(self.user_data_file, 'w', encoding='utf-8') as f:
46
+ json.dump({}, f)
47
+
48
+ def save_user_info(self, user_info: UserInfo):
49
+ """Save user information to JSON file"""
50
+ try:
51
+ # First ensure the file exists with valid JSON
52
+ self.ensure_file_exists()
53
+
54
+ # Read existing data
55
+ try:
56
+ with open(self.user_data_file, 'r', encoding='utf-8') as f:
57
+ data = json.load(f)
58
+ except json.JSONDecodeError:
59
+ data = {}
60
+
61
+ # Update data
62
+ data[user_info.name] = {
63
+ "college": user_info.college,
64
+ "degree": user_info.degree,
65
+ "year": user_info.year,
66
+ "career_goals": user_info.career_goals,
67
+ "has_internship": user_info.has_internship,
68
+ "has_placement": user_info.has_placement,
69
+ "last_updated": datetime.now().isoformat()
70
+ }
71
+
72
+ # Write back to file
73
+ with open(self.user_data_file, 'w', encoding='utf-8') as f:
74
+ json.dump(data, f, indent=4)
75
+ return True
76
+ except Exception as e:
77
+ logging.error(f"Error saving user info: {str(e)}")
78
+ return False
79
+
80
+
81
+ class ChatLogger:
82
+ """Logger for chat interactions"""
83
+ def __init__(self, log_file: str):
84
+ self.log_file = log_file
85
+
86
+ def log_interaction(self, question: str, answer: str, user_info: Optional[UserInfo] = None):
87
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
88
+ with open(self.log_file, 'a', encoding='utf-8') as f:
89
+ user_context = ""
90
+ if user_info:
91
+ user_context = f"\nUser: {user_info.name} | College: {user_info.college} | Degree: {user_info.degree} | Year: {user_info.year} | Career Goals: {user_info.career_goals}"
92
+ f.write(f"\n[{timestamp}]{user_context}\nQ: {question}\nA: {answer}\n{'-'*50}")
93
+
94
+ class ChatMemory:
95
+ """Manages chat history"""
96
+ def __init__(self, max_history: int = 3):
97
+ self.max_history = max_history
98
+ self.history = []
99
+
100
+ def add_interaction(self, question: str, answer: str):
101
+ self.history.append({"question": question, "answer": answer})
102
+ if len(self.history) > self.max_history:
103
+ self.history.pop(0)
104
+
105
+ def get_history(self) -> List[Dict[str, str]]:
106
+ return self.history
107
+
108
+ def clear_history(self):
109
+ self.history = []
110
+
111
+ class QuestionGenerator:
112
+ def __init__(self, api_key: str):
113
+ genai.configure(api_key=api_key)
114
+ self.generation_config = {
115
+ "temperature": 0.1,
116
+ "top_p": 0.95,
117
+ "max_output_tokens": 8192,
118
+ }
119
+ self.model = genai.GenerativeModel(
120
+ model_name="gemini-1.5-flash",
121
+ generation_config=self.generation_config,
122
+ safety_settings={'HATE': 'BLOCK_NONE','HARASSMENT': 'BLOCK_NONE','SEXUAL' : 'BLOCK_NONE','DANGEROUS' : 'BLOCK_NONE'}
123
+ )
124
+
125
+ self.default_questions = [
126
+ "What are some other skills I should focus on to improve my chances?",
127
+ "What resources or platforms can help me in my career journey?",
128
+ "Are there any specific companies or organizations I should target for internships/placements?",
129
+ "What are some common interview questions asked for this career path?"
130
+ ]
131
+
132
+ async def generate_questions(
133
+ self,
134
+ question: str,
135
+ answer: str,
136
+ user_info: Optional[UserInfo] = None
137
+ ) -> List[str]:
138
+ """Generate follow-up questions based on the conversation"""
139
+ try:
140
+ chat = self.model.start_chat(history=[])
141
+ prompt = f"""Generate 4 simple, practical follow-up questions, that a college student may ask, based on this conversation about career advice:
142
+
143
+ Question: {question}
144
+ Answer: {answer}
145
+
146
+ Focus the questions on:
147
+ 1. Skills development (What skills are needed, how to improve)
148
+ 2. Resources and platforms (Where to find internships, jobs, etc.)
149
+ 3. Specific target companies/organizations
150
+ 4. Common interview questions
151
+
152
+ Keep the language simple and student-friendly. Format each question on a new line.
153
+
154
+ NOTE: YOU MUST STRICTLY REPLY IN HINGLISH"""
155
+
156
+ response = chat.send_message(prompt).text
157
+
158
+ # Extract questions
159
+ questions = [q.strip() for q in response.split('\n') if q.strip()]
160
+
161
+ # Return default questions if we don't get exactly 4 valid questions
162
+ if len(questions) != 4:
163
+ return self.default_questions
164
+
165
+ return questions
166
+
167
+ except Exception as e:
168
+ logging.error(f"Error generating questions: {str(e)}")
169
+ return self.default_questions
170
+
171
+ class GeminiRAG:
172
+ def __init__(self, api_key: str):
173
+ genai.configure(api_key=api_key)
174
+ self.generation_config = {
175
+ "temperature": 0.1,
176
+ "top_p": 0.95,
177
+ "top_k": 64,
178
+ "max_output_tokens": 8192,
179
+ }
180
+ self.model = genai.GenerativeModel(
181
+ model_name="gemini-1.5-flash",
182
+ generation_config=self.generation_config,
183
+ safety_settings={'HATE': 'BLOCK_NONE','HARASSMENT': 'BLOCK_NONE','SEXUAL' : 'BLOCK_NONE','DANGEROUS' : 'BLOCK_NONE'}
184
+ )
185
+
186
+ def create_context(self, relevant_docs: List[Dict[str, Any]]) -> str:
187
+ """Creates a context string from relevant documents"""
188
+ context_parts = []
189
+ for doc in relevant_docs:
190
+ context_parts.append(f"Section: {doc['metadata']['section']}\n{doc['content']}")
191
+ return "\n\n".join(context_parts)
192
+
193
+ async def get_answer(
194
+ self,
195
+ question: str,
196
+ context: str,
197
+ user_info: Optional[UserInfo] = None
198
+ ) -> str:
199
+ try:
200
+ chat = self.model.start_chat(history=[])
201
+
202
+ # Simplified prompt to reduce chances of recitation
203
+ prompt = f"""As a career counselor, provide a helpful response based on:
204
+
205
+ Context: {context}
206
+
207
+ {f'''User Background:
208
+ - Student at {user_info.college}
209
+ - Studying {user_info.degree} (Year {user_info.year})
210
+ - Goals: {user_info.career_goals}
211
+ - {'Has internship experience' if user_info.has_internship else 'No internship yet'}
212
+ - {'Has placement' if user_info.has_placement else 'Seeking placement'}''' if user_info else ''}
213
+
214
+ Question: {question}
215
+
216
+ Provide practical advice with specific examples and actionable steps."""
217
+
218
+ try:
219
+ response = chat.send_message(prompt)
220
+ if response.text:
221
+ return response.text
222
+ else:
223
+ return "I apologize, but I couldn't generate a proper response. Please try rephrasing your question."
224
+ except Exception as chat_error:
225
+ logging.error(f"Chat error: {str(chat_error)}")
226
+ return "I encountered an error while processing your question. Please try again with a simpler question."
227
+
228
+ except Exception as e:
229
+ logging.error(f"Error generating answer: {str(e)}")
230
+ return "An error occurred. Please try again later."
231
+
232
+ class CustomEmbeddings(Embeddings):
233
+ """Custom embeddings using SentenceTransformer"""
234
+ def __init__(self, model_name: str, device: str):
235
+ self.model = SentenceTransformer(model_name)
236
+ self.model.to(device)
237
+
238
+ def embed_documents(self, texts: List[str]) -> List[List[float]]:
239
+ with torch.no_grad():
240
+ embeddings = self.model.encode(texts, convert_to_tensor=True)
241
+ return embeddings.cpu().numpy().tolist()
242
+
243
+ def embed_query(self, text: str) -> List[float]:
244
+ with torch.no_grad():
245
+ embedding = self.model.encode([text], convert_to_tensor=True)
246
+ return embedding.cpu().numpy().tolist()[0]
247
+
248
+ class ProductDatabase:
249
+ """Handles document storage and retrieval"""
250
+ def __init__(self, config: ChatConfig):
251
+ self.embeddings = CustomEmbeddings(
252
+ model_name=config.embedding_model_name,
253
+ device=config.device
254
+ )
255
+ self.vectorstore = None
256
+
257
+ def process_markdown(self, markdown_content: str):
258
+ """Process markdown content and create vector store"""
259
+ try:
260
+ sections = markdown_content.split('\n## ')
261
+ documents = []
262
+
263
+ if sections[0].startswith('# '):
264
+ intro = sections[0].split('\n', 1)[1]
265
+ documents.append({
266
+ "content": intro,
267
+ "section": "Introduction"
268
+ })
269
+
270
+ for section in sections[1:]:
271
+ if section.strip():
272
+ title, content = section.split('\n', 1)
273
+ documents.append({
274
+ "content": content.strip(),
275
+ "section": title.strip()
276
+ })
277
+
278
+ texts = [doc["content"] for doc in documents]
279
+ metadatas = [{"section": doc["section"]} for doc in documents]
280
+
281
+ self.vectorstore = FAISS.from_texts(
282
+ texts=texts,
283
+ embedding=self.embeddings,
284
+ metadatas=metadatas
285
+ )
286
+
287
+ except Exception as e:
288
+ raise Exception(f"Error processing markdown content: {str(e)}")
289
+
290
+ def search(self, query: str, k: int = 3) -> List[Dict[str, Any]]:
291
+ """Search for relevant documents"""
292
+ if not self.vectorstore:
293
+ raise ValueError("Database not initialized. Please process documents first.")
294
+
295
+ try:
296
+ docs = self.vectorstore.similarity_search(query, k=k)
297
+ return [{"content": doc.page_content, "metadata": doc.metadata} for doc in docs]
298
+ except Exception as e:
299
+ logging.error(f"Error during search: {str(e)}")
300
  return []