Spaces:

forbiddensoul90
/

PhalAI

Sleeping

App Files Files Community

forbiddensoul90 commited on Nov 7, 2024

Commit

5a01603

verified ·

1 Parent(s): c1062db

Update back.py

Browse files

Files changed (1) hide show

back.py +299 -297

back.py CHANGED Viewed

@@ -1,298 +1,300 @@
-import os
-import logging
-from typing import List, Dict, Any, Optional, Union
-from dataclasses import dataclass
-import torch
-from sentence_transformers import SentenceTransformer
-from langchain.vectorstores import FAISS
-from langchain_core.embeddings import Embeddings
-import google.generativeai as genai
-from datetime import datetime
-import json
-@dataclass
-class UserInfo:
-    """User information for context"""
-    name: str
-    college: str
-    degree: str
-    year: int
-    career_goals: str
-    has_internship: bool
-    has_placement: bool
-@dataclass
-class ChatConfig:
-    """Configuration for the chatbot"""
-    embedding_model_name: str = 'all-MiniLM-L6-v2'
-    device: str = 'cuda' if torch.cuda.is_available() else 'cpu'
-    max_history: int = 3
-    gemini_api_key: str = "AIzaSyCAlM-YPVKl1qTnUwInWz9X5sNCmISPOr4"  # Replace with your API key
-    log_file: str = "chat_history.txt"
-    user_data_file: str = "user_data.json"
-# In the UserManager class, modify these methods:
-class UserManager:
-    """Manages user information storage and retrieval"""
-    def __init__(self, user_data_file: str):
-        self.user_data_file = user_data_file
-        self.ensure_file_exists()
-    def ensure_file_exists(self):
-        """Create user data file if it doesn't exist"""
-        if not os.path.exists(self.user_data_file):
-            os.makedirs(os.path.dirname(self.user_data_file), exist_ok=True)
-            with open(self.user_data_file, 'w', encoding='utf-8') as f:
-                json.dump({}, f)
-    def save_user_info(self, user_info: UserInfo):
-        """Save user information to JSON file"""
-        try:
-            # First ensure the file exists with valid JSON
-            self.ensure_file_exists()
-            # Read existing data
-            try:
-                with open(self.user_data_file, 'r', encoding='utf-8') as f:
-                    data = json.load(f)
-            except json.JSONDecodeError:
-                data = {}
-            # Update data
-            data[user_info.name] = {
-                "college": user_info.college,
-                "degree": user_info.degree,
-                "year": user_info.year,
-                "career_goals": user_info.career_goals,
-                "has_internship": user_info.has_internship,
-                "has_placement": user_info.has_placement,
-                "last_updated": datetime.now().isoformat()
-            }
-            # Write back to file
-            with open(self.user_data_file, 'w', encoding='utf-8') as f:
-                json.dump(data, f, indent=4)
-            return True
-        except Exception as e:
-            logging.error(f"Error saving user info: {str(e)}")
-            return False
-class ChatLogger:
-    """Logger for chat interactions"""
-    def __init__(self, log_file: str):
-        self.log_file = log_file
-    def log_interaction(self, question: str, answer: str, user_info: Optional[UserInfo] = None):
-        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        with open(self.log_file, 'a', encoding='utf-8') as f:
-            user_context = ""
-            if user_info:
-                user_context = f"\nUser: {user_info.name} | College: {user_info.college} | Degree: {user_info.degree} | Year: {user_info.year} | Career Goals: {user_info.career_goals}"
-            f.write(f"\n[{timestamp}]{user_context}\nQ: {question}\nA: {answer}\n{'-'*50}")
-class ChatMemory:
-    """Manages chat history"""
-    def __init__(self, max_history: int = 3):
-        self.max_history = max_history
-        self.history = []
-    def add_interaction(self, question: str, answer: str):
-        self.history.append({"question": question, "answer": answer})
-        if len(self.history) > self.max_history:
-            self.history.pop(0)
-    def get_history(self) -> List[Dict[str, str]]:
-        return self.history
-    def clear_history(self):
-        self.history = []
-class QuestionGenerator:
-    def __init__(self, api_key: str):
-        genai.configure(api_key=api_key)
-        self.generation_config = {
-            "temperature": 0.1,
-            "top_p": 0.95,
-            "max_output_tokens": 8192,
-        }
-        self.model = genai.GenerativeModel(
-            model_name="gemini-1.5-flash",
-            generation_config=self.generation_config,
-            safety_settings={'HATE': 'BLOCK_NONE','HARASSMENT': 'BLOCK_NONE','SEXUAL' : 'BLOCK_NONE','DANGEROUS' : 'BLOCK_NONE'}
-        )
-        self.default_questions = [
-            "What are some other skills I should focus on to improve my chances?",
-            "What resources or platforms can help me in my career journey?",
-            "Are there any specific companies or organizations I should target for internships/placements?",
-            "What are some common interview questions asked for this career path?"
-        ]
-    async def generate_questions(
-        self,
-        question: str,
-        answer: str,
-        user_info: Optional[UserInfo] = None
-    ) -> List[str]:
-        """Generate follow-up questions based on the conversation"""
-        try:
-            chat = self.model.start_chat(history=[])
-            prompt = f"""Generate 4 simple, practical follow-up questions, that a college student may ask, based on this conversation about career advice:
-Question: {question}
-Answer: {answer}
-Focus the questions on:
-1. Skills development (What skills are needed, how to improve)
-2. Resources and platforms (Where to find internships, jobs, etc.)
-3. Specific target companies/organizations
-4. Common interview questions
-Keep the language simple and student-friendly. Format each question on a new line."""
-            response = chat.send_message(prompt).text
-            # Extract questions
-            questions = [q.strip() for q in response.split('\n') if q.strip()]
-            # Return default questions if we don't get exactly 4 valid questions
-            if len(questions) != 4:
-                return self.default_questions
-            return questions
-        except Exception as e:
-            logging.error(f"Error generating questions: {str(e)}")
-            return self.default_questions
-class GeminiRAG:
-    def __init__(self, api_key: str):
-        genai.configure(api_key=api_key)
-        self.generation_config = {
-            "temperature": 0.1,
-            "top_p": 0.95,
-            "top_k": 64,
-            "max_output_tokens": 8192,
-        }
-        self.model = genai.GenerativeModel(
-            model_name="gemini-1.5-flash",
-            generation_config=self.generation_config,
-            safety_settings={'HATE': 'BLOCK_NONE','HARASSMENT': 'BLOCK_NONE','SEXUAL' : 'BLOCK_NONE','DANGEROUS' : 'BLOCK_NONE'}
-        )
-    def create_context(self, relevant_docs: List[Dict[str, Any]]) -> str:
-        """Creates a context string from relevant documents"""
-        context_parts = []
-        for doc in relevant_docs:
-            context_parts.append(f"Section: {doc['metadata']['section']}\n{doc['content']}")
-        return "\n\n".join(context_parts)
-    async def get_answer(
-        self,
-        question: str,
-        context: str,
-        user_info: Optional[UserInfo] = None
-    ) -> str:
-        try:
-            chat = self.model.start_chat(history=[])
-            # Simplified prompt to reduce chances of recitation
-            prompt = f"""As a career counselor, provide a helpful response based on:
-Context: {context}
-{f'''User Background:
-- Student at {user_info.college}
-- Studying {user_info.degree} (Year {user_info.year})
-- Goals: {user_info.career_goals}
-- {'Has internship experience' if user_info.has_internship else 'No internship yet'}
-- {'Has placement' if user_info.has_placement else 'Seeking placement'}''' if user_info else ''}
-Question: {question}
-Provide practical advice with specific examples and actionable steps."""
-            try:
-                response = chat.send_message(prompt)
-                if response.text:
-                    return response.text
-                else:
-                    return "I apologize, but I couldn't generate a proper response. Please try rephrasing your question."
-            except Exception as chat_error:
-                logging.error(f"Chat error: {str(chat_error)}")
-                return "I encountered an error while processing your question. Please try again with a simpler question."
-        except Exception as e:
-            logging.error(f"Error generating answer: {str(e)}")
-            return "An error occurred. Please try again later."
-class CustomEmbeddings(Embeddings):
-    """Custom embeddings using SentenceTransformer"""
-    def __init__(self, model_name: str, device: str):
-        self.model = SentenceTransformer(model_name)
-        self.model.to(device)
-    def embed_documents(self, texts: List[str]) -> List[List[float]]:
-        with torch.no_grad():
-            embeddings = self.model.encode(texts, convert_to_tensor=True)
-            return embeddings.cpu().numpy().tolist()
-    def embed_query(self, text: str) -> List[float]:
-        with torch.no_grad():
-            embedding = self.model.encode([text], convert_to_tensor=True)
-            return embedding.cpu().numpy().tolist()[0]
-class ProductDatabase:
-    """Handles document storage and retrieval"""
-    def __init__(self, config: ChatConfig):
-        self.embeddings = CustomEmbeddings(
-            model_name=config.embedding_model_name,
-            device=config.device
-        )
-        self.vectorstore = None
-    def process_markdown(self, markdown_content: str):
-        """Process markdown content and create vector store"""
-        try:
-            sections = markdown_content.split('\n## ')
-            documents = []
-            if sections[0].startswith('# '):
-                intro = sections[0].split('\n', 1)[1]
-                documents.append({
-                    "content": intro,
-                    "section": "Introduction"
-                })
-            for section in sections[1:]:
-                if section.strip():
-                    title, content = section.split('\n', 1)
-                    documents.append({
-                        "content": content.strip(),
-                        "section": title.strip()
-                    })
-            texts = [doc["content"] for doc in documents]
-            metadatas = [{"section": doc["section"]} for doc in documents]
-            self.vectorstore = FAISS.from_texts(
-                texts=texts,
-                embedding=self.embeddings,
-                metadatas=metadatas
-            )
-        except Exception as e:
-            raise Exception(f"Error processing markdown content: {str(e)}")
-    def search(self, query: str, k: int = 3) -> List[Dict[str, Any]]:
-        """Search for relevant documents"""
-        if not self.vectorstore:
-            raise ValueError("Database not initialized. Please process documents first.")
-        try:
-            docs = self.vectorstore.similarity_search(query, k=k)
-            return [{"content": doc.page_content, "metadata": doc.metadata} for doc in docs]
-        except Exception as e:
-            logging.error(f"Error during search: {str(e)}")
             return []

+import os
+import logging
+from typing import List, Dict, Any, Optional, Union
+from dataclasses import dataclass
+import torch
+from sentence_transformers import SentenceTransformer
+from langchain.vectorstores import FAISS
+from langchain_core.embeddings import Embeddings
+import google.generativeai as genai
+from datetime import datetime
+import json
+@dataclass
+class UserInfo:
+    """User information for context"""
+    name: str
+    college: str
+    degree: str
+    year: int
+    career_goals: str
+    has_internship: bool
+    has_placement: bool
+@dataclass
+class ChatConfig:
+    """Configuration for the chatbot"""
+    embedding_model_name: str = 'all-MiniLM-L6-v2'
+    device: str = 'cuda' if torch.cuda.is_available() else 'cpu'
+    max_history: int = 3
+    gemini_api_key: str = "AIzaSyCAlM-YPVKl1qTnUwInWz9X5sNCmISPOr4"  # Replace with your API key
+    log_file: str = "chat_history.txt"
+    user_data_file: str = "user_data.json"
+# In the UserManager class, modify these methods:
+class UserManager:
+    """Manages user information storage and retrieval"""
+    def __init__(self, user_data_file: str):
+        self.user_data_file = user_data_file
+        self.ensure_file_exists()
+    def ensure_file_exists(self):
+        """Create user data file if it doesn't exist"""
+        if not os.path.exists(self.user_data_file):
+            os.makedirs(os.path.dirname(self.user_data_file), exist_ok=True)
+            with open(self.user_data_file, 'w', encoding='utf-8') as f:
+                json.dump({}, f)
+    def save_user_info(self, user_info: UserInfo):
+        """Save user information to JSON file"""
+        try:
+            # First ensure the file exists with valid JSON
+            self.ensure_file_exists()
+            # Read existing data
+            try:
+                with open(self.user_data_file, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+            except json.JSONDecodeError:
+                data = {}
+            # Update data
+            data[user_info.name] = {
+                "college": user_info.college,
+                "degree": user_info.degree,
+                "year": user_info.year,
+                "career_goals": user_info.career_goals,
+                "has_internship": user_info.has_internship,
+                "has_placement": user_info.has_placement,
+                "last_updated": datetime.now().isoformat()
+            }
+            # Write back to file
+            with open(self.user_data_file, 'w', encoding='utf-8') as f:
+                json.dump(data, f, indent=4)
+            return True
+        except Exception as e:
+            logging.error(f"Error saving user info: {str(e)}")
+            return False
+class ChatLogger:
+    """Logger for chat interactions"""
+    def __init__(self, log_file: str):
+        self.log_file = log_file
+    def log_interaction(self, question: str, answer: str, user_info: Optional[UserInfo] = None):
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        with open(self.log_file, 'a', encoding='utf-8') as f:
+            user_context = ""
+            if user_info:
+                user_context = f"\nUser: {user_info.name} | College: {user_info.college} | Degree: {user_info.degree} | Year: {user_info.year} | Career Goals: {user_info.career_goals}"
+            f.write(f"\n[{timestamp}]{user_context}\nQ: {question}\nA: {answer}\n{'-'*50}")
+class ChatMemory:
+    """Manages chat history"""
+    def __init__(self, max_history: int = 3):
+        self.max_history = max_history
+        self.history = []
+    def add_interaction(self, question: str, answer: str):
+        self.history.append({"question": question, "answer": answer})
+        if len(self.history) > self.max_history:
+            self.history.pop(0)
+    def get_history(self) -> List[Dict[str, str]]:
+        return self.history
+    def clear_history(self):
+        self.history = []
+class QuestionGenerator:
+    def __init__(self, api_key: str):
+        genai.configure(api_key=api_key)
+        self.generation_config = {
+            "temperature": 0.1,
+            "top_p": 0.95,
+            "max_output_tokens": 8192,
+        }
+        self.model = genai.GenerativeModel(
+            model_name="gemini-1.5-flash",
+            generation_config=self.generation_config,
+            safety_settings={'HATE': 'BLOCK_NONE','HARASSMENT': 'BLOCK_NONE','SEXUAL' : 'BLOCK_NONE','DANGEROUS' : 'BLOCK_NONE'}
+        )
+        self.default_questions = [
+            "What are some other skills I should focus on to improve my chances?",
+            "What resources or platforms can help me in my career journey?",
+            "Are there any specific companies or organizations I should target for internships/placements?",
+            "What are some common interview questions asked for this career path?"
+        ]
+    async def generate_questions(
+        self,
+        question: str,
+        answer: str,
+        user_info: Optional[UserInfo] = None
+    ) -> List[str]:
+        """Generate follow-up questions based on the conversation"""
+        try:
+            chat = self.model.start_chat(history=[])
+            prompt = f"""Generate 4 simple, practical follow-up questions, that a college student may ask, based on this conversation about career advice:
+Question: {question}
+Answer: {answer}
+Focus the questions on:
+1. Skills development (What skills are needed, how to improve)
+2. Resources and platforms (Where to find internships, jobs, etc.)
+3. Specific target companies/organizations
+4. Common interview questions
+Keep the language simple and student-friendly. Format each question on a new line.
+NOTE: YOU MUST STRICTLY REPLY IN HINGLISH"""
+            response = chat.send_message(prompt).text
+            # Extract questions
+            questions = [q.strip() for q in response.split('\n') if q.strip()]
+            # Return default questions if we don't get exactly 4 valid questions
+            if len(questions) != 4:
+                return self.default_questions
+            return questions
+        except Exception as e:
+            logging.error(f"Error generating questions: {str(e)}")
+            return self.default_questions
+class GeminiRAG:
+    def __init__(self, api_key: str):
+        genai.configure(api_key=api_key)
+        self.generation_config = {
+            "temperature": 0.1,
+            "top_p": 0.95,
+            "top_k": 64,
+            "max_output_tokens": 8192,
+        }
+        self.model = genai.GenerativeModel(
+            model_name="gemini-1.5-flash",
+            generation_config=self.generation_config,
+            safety_settings={'HATE': 'BLOCK_NONE','HARASSMENT': 'BLOCK_NONE','SEXUAL' : 'BLOCK_NONE','DANGEROUS' : 'BLOCK_NONE'}
+        )
+    def create_context(self, relevant_docs: List[Dict[str, Any]]) -> str:
+        """Creates a context string from relevant documents"""
+        context_parts = []
+        for doc in relevant_docs:
+            context_parts.append(f"Section: {doc['metadata']['section']}\n{doc['content']}")
+        return "\n\n".join(context_parts)
+    async def get_answer(
+        self,
+        question: str,
+        context: str,
+        user_info: Optional[UserInfo] = None
+    ) -> str:
+        try:
+            chat = self.model.start_chat(history=[])
+            # Simplified prompt to reduce chances of recitation
+            prompt = f"""As a career counselor, provide a helpful response based on:
+Context: {context}
+{f'''User Background:
+- Student at {user_info.college}
+- Studying {user_info.degree} (Year {user_info.year})
+- Goals: {user_info.career_goals}
+- {'Has internship experience' if user_info.has_internship else 'No internship yet'}
+- {'Has placement' if user_info.has_placement else 'Seeking placement'}''' if user_info else ''}
+Question: {question}
+Provide practical advice with specific examples and actionable steps."""
+            try:
+                response = chat.send_message(prompt)
+                if response.text:
+                    return response.text
+                else:
+                    return "I apologize, but I couldn't generate a proper response. Please try rephrasing your question."
+            except Exception as chat_error:
+                logging.error(f"Chat error: {str(chat_error)}")
+                return "I encountered an error while processing your question. Please try again with a simpler question."
+        except Exception as e:
+            logging.error(f"Error generating answer: {str(e)}")
+            return "An error occurred. Please try again later."
+class CustomEmbeddings(Embeddings):
+    """Custom embeddings using SentenceTransformer"""
+    def __init__(self, model_name: str, device: str):
+        self.model = SentenceTransformer(model_name)
+        self.model.to(device)
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        with torch.no_grad():
+            embeddings = self.model.encode(texts, convert_to_tensor=True)
+            return embeddings.cpu().numpy().tolist()
+    def embed_query(self, text: str) -> List[float]:
+        with torch.no_grad():
+            embedding = self.model.encode([text], convert_to_tensor=True)
+            return embedding.cpu().numpy().tolist()[0]
+class ProductDatabase:
+    """Handles document storage and retrieval"""
+    def __init__(self, config: ChatConfig):
+        self.embeddings = CustomEmbeddings(
+            model_name=config.embedding_model_name,
+            device=config.device
+        )
+        self.vectorstore = None
+    def process_markdown(self, markdown_content: str):
+        """Process markdown content and create vector store"""
+        try:
+            sections = markdown_content.split('\n## ')
+            documents = []
+            if sections[0].startswith('# '):
+                intro = sections[0].split('\n', 1)[1]
+                documents.append({
+                    "content": intro,
+                    "section": "Introduction"
+                })
+            for section in sections[1:]:
+                if section.strip():
+                    title, content = section.split('\n', 1)
+                    documents.append({
+                        "content": content.strip(),
+                        "section": title.strip()
+                    })
+            texts = [doc["content"] for doc in documents]
+            metadatas = [{"section": doc["section"]} for doc in documents]
+            self.vectorstore = FAISS.from_texts(
+                texts=texts,
+                embedding=self.embeddings,
+                metadatas=metadatas
+            )
+        except Exception as e:
+            raise Exception(f"Error processing markdown content: {str(e)}")
+    def search(self, query: str, k: int = 3) -> List[Dict[str, Any]]:
+        """Search for relevant documents"""
+        if not self.vectorstore:
+            raise ValueError("Database not initialized. Please process documents first.")
+        try:
+            docs = self.vectorstore.similarity_search(query, k=k)
+            return [{"content": doc.page_content, "metadata": doc.metadata} for doc in docs]
+        except Exception as e:
+            logging.error(f"Error during search: {str(e)}")
             return []