Spaces:

CosmickVisions
/

Tech-Vision

Build error

App Files Files Community

CosmickVisions commited on Mar 24

Commit

4c6c992

verified ·

1 Parent(s): 84a6029

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -17

app.py CHANGED Viewed

@@ -25,23 +25,24 @@ client = groq.Client(api_key=os.getenv("GROQ_TECH_API_KEY"))
 # Initialize embeddings with error handling
 try:
     embeddings = HuggingFaceInstructEmbeddings(
         model_name="hkunlp/instructor-base",
-        model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
     )
 except Exception as e:
     print(f"Warning: Failed to load primary embeddings model: {e}")
     try:
         embeddings = HuggingFaceInstructEmbeddings(
             model_name="all-MiniLM-L6-v2",
-            model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
         )
     except Exception as e:
         print(f"Warning: Failed to load fallback embeddings model: {e}")
         embeddings = None
-# Directory to store FAISS indexes
-FAISS_INDEX_DIR = "faiss_indexes_tech"
 if not os.path.exists(FAISS_INDEX_DIR):
     os.makedirs(FAISS_INDEX_DIR)
@@ -204,7 +205,7 @@ def process_pdf(pdf_file):
 # Function to generate chatbot responses with Tech theme
 def generate_response(message, session_id, model_name, history):
-    """Generate chatbot responses"""
     if not message:
         return history
@@ -212,10 +213,22 @@ def generate_response(message, session_id, model_name, history):
         context = ""
         if embeddings and session_id and session_id in user_vectorstores:
             try:
                 vectorstore = user_vectorstores[session_id]
-                docs = vectorstore.similarity_search(message, k=3)
                 if docs:
-                    context = "\n\nRelevant code context:\n" + "\n".join(f"```\n{doc.page_content}\n```" for doc in docs)
             except Exception as e:
                 print(f"Warning: Failed to perform similarity search: {e}")
@@ -224,7 +237,10 @@ def generate_response(message, session_id, model_name, history):
         Format code snippets with proper markdown code blocks and specify the language."""
         if context:
-            system_prompt += f"\nUse this context from the uploaded code when relevant:{context}"
         completion = client.chat.completions.create(
             model=model_name,
@@ -237,12 +253,31 @@ def generate_response(message, session_id, model_name, history):
         )
         response = completion.choices[0].message.content
-        history.append({"role": "assistant", "content": response})
         return history
     except Exception as e:
         error_msg = f"Error generating response: {str(e)}"
-        history.append({"role": "assistant", "content": error_msg})
         return history
 # Functions to update PDF viewer
@@ -492,20 +527,20 @@ def perform_stack_search(query, tag, sort_by):
     except Exception as e:
         return f"Error searching Stack Overflow: {str(e)}"
-# Modify the file input and processing section
 def process_code_file(file_obj):
-    """Process uploaded code files"""
     if file_obj is None:
         return None, "No file uploaded", {}
     try:
         # Handle both file objects and bytes objects
         if isinstance(file_obj, bytes):
-            content = file_obj.decode('utf-8')
             file_name = "uploaded_file"
             file_extension = ".txt"  # Default extension
         else:
-            content = file_obj.read().decode('utf-8')
             file_name = getattr(file_obj, 'name', 'uploaded_file')
             file_extension = Path(file_name).suffix.lower()
@@ -518,17 +553,34 @@ def process_code_file(file_obj):
         session_id = None
         if embeddings:
             try:
-                text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-                chunks = text_splitter.create_documents([content])
                 vectorstore = FAISS.from_documents(chunks, embeddings)
                 session_id = str(uuid.uuid4())
                 index_path = os.path.join(FAISS_INDEX_DIR, session_id)
                 vectorstore.save_local(index_path)
                 user_vectorstores[session_id] = vectorstore
             except Exception as e:
                 print(f"Warning: Failed to create vectorstore: {e}")
-        return session_id, f"✅ Successfully analyzed {file_name}", metrics
     except Exception as e:
         return None, f"Error processing file: {str(e)}", {}

 # Initialize embeddings with error handling
 try:
+    # Force CPU usage for embeddings
     embeddings = HuggingFaceInstructEmbeddings(
         model_name="hkunlp/instructor-base",
+        model_kwargs={"device": "cpu"}  # Force CPU usage
     )
 except Exception as e:
     print(f"Warning: Failed to load primary embeddings model: {e}")
     try:
         embeddings = HuggingFaceInstructEmbeddings(
             model_name="all-MiniLM-L6-v2",
+            model_kwargs={"device": "cpu"}  # Force CPU usage
         )
     except Exception as e:
         print(f"Warning: Failed to load fallback embeddings model: {e}")
         embeddings = None
+# Directory to store FAISS indexes with better naming
+FAISS_INDEX_DIR = "faiss_indexes_tech_cpu"
 if not os.path.exists(FAISS_INDEX_DIR):
     os.makedirs(FAISS_INDEX_DIR)
 # Function to generate chatbot responses with Tech theme
 def generate_response(message, session_id, model_name, history):
+    """Generate chatbot responses with FAISS context enhancement"""
     if not message:
         return history
         context = ""
         if embeddings and session_id and session_id in user_vectorstores:
             try:
+                print(f"Performing similarity search with session: {session_id}")
                 vectorstore = user_vectorstores[session_id]
+                # Use a higher k value to get more relevant context
+                docs = vectorstore.similarity_search(message, k=5)
                 if docs:
+                    # Format the context more clearly with source information
+                    context = "\n\nRelevant code context from your files:\n\n"
+                    for i, doc in enumerate(docs, 1):
+                        source = doc.metadata.get("source", "Unknown")
+                        language = doc.metadata.get("language", "Unknown")
+                        context += f"--- Segment {i} from {source} ({language}) ---\n"
+                        context += f"```\n{doc.page_content}\n```\n\n"
+                    print(f"Found {len(docs)} relevant code segments for context.")
             except Exception as e:
                 print(f"Warning: Failed to perform similarity search: {e}")
         Format code snippets with proper markdown code blocks and specify the language."""
         if context:
+            system_prompt += f"\n\nUse this context from the uploaded code files to inform your answers:{context}"
+        # Add instruction to reference specific file parts
+        system_prompt += "\nWhen discussing code from the uploaded files, specifically reference the file name and segment number."
         completion = client.chat.completions.create(
             model=model_name,
         )
         response = completion.choices[0].message.content
+        # For proper chat history handling
+        if isinstance(history, list) and history and isinstance(history[0], dict):
+            # History is in message format
+            history.append({"role": "user", "content": message})
+            history.append({"role": "assistant", "content": response})
+        else:
+            # Fallback for other formats
+            history.append({"role": "user", "content": message})
+            history.append({"role": "assistant", "content": response})
         return history
     except Exception as e:
         error_msg = f"Error generating response: {str(e)}"
+        # Handle different history formats
+        if isinstance(history, list):
+            if history and isinstance(history[0], dict):
+                history.append({"role": "user", "content": message})
+                history.append({"role": "assistant", "content": error_msg})
+            else:
+                history.append({"role": "user", "content": message})
+                history.append({"role": "assistant", "content": error_msg})
         return history
 # Functions to update PDF viewer
     except Exception as e:
         return f"Error searching Stack Overflow: {str(e)}"
+# Modify the process_code_file function
 def process_code_file(file_obj):
+    """Process uploaded code files and store in FAISS index"""
     if file_obj is None:
         return None, "No file uploaded", {}
     try:
         # Handle both file objects and bytes objects
         if isinstance(file_obj, bytes):
+            content = file_obj.decode('utf-8', errors='replace')  # Added error handling
             file_name = "uploaded_file"
             file_extension = ".txt"  # Default extension
         else:
+            content = file_obj.read().decode('utf-8', errors='replace')  # Added error handling
             file_name = getattr(file_obj, 'name', 'uploaded_file')
             file_extension = Path(file_name).suffix.lower()
         session_id = None
         if embeddings:
             try:
+                print(f"Creating FAISS index for {file_name}...")
+                # Improved chunking for code files
+                text_splitter = RecursiveCharacterTextSplitter(
+                    chunk_size=500,  # Smaller chunks for code
+                    chunk_overlap=50,
+                    separators=["\n\n", "\n", " ", ""]
+                )
+                chunks = text_splitter.create_documents([content], metadatas=[{"filename": file_name, "language": language}])
+                # Add source metadata to help with retrieval
+                for i, chunk in enumerate(chunks):
+                    chunk.metadata["chunk_id"] = i
+                    chunk.metadata["source"] = file_name
+                # Create and store vectorstore
                 vectorstore = FAISS.from_documents(chunks, embeddings)
                 session_id = str(uuid.uuid4())
                 index_path = os.path.join(FAISS_INDEX_DIR, session_id)
                 vectorstore.save_local(index_path)
                 user_vectorstores[session_id] = vectorstore
+                # Add number of chunks to metrics for display
+                metrics["chunks"] = len(chunks)
+                print(f"Successfully created FAISS index with {len(chunks)} chunks.")
             except Exception as e:
                 print(f"Warning: Failed to create vectorstore: {e}")
+        return session_id, f"✅ Successfully analyzed {file_name} and stored in FAISS index", metrics
     except Exception as e:
         return None, f"Error processing file: {str(e)}", {}