Spaces:

CosmickVisions
/

Tech-Vision

Build error

App Files Files Community

CosmickVisions commited on Mar 21

Commit

645418b

verified ·

1 Parent(s): 8b2cff8

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -8

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import tempfile
 import uuid
 from dotenv import load_dotenv
 from langchain_community.vectorstores import FAISS
-from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 import fitz  # PyMuPDF
 import base64
@@ -16,11 +16,26 @@ import json
 import re
 from datetime import datetime, timedelta
 from pathlib import Path
 # Load environment variables
 load_dotenv()
 client = groq.Client(api_key=os.getenv("GROQ_TECH_API_KEY"))
-embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 # Directory to store FAISS indexes
 FAISS_INDEX_DIR = "faiss_indexes_tech"
@@ -116,11 +131,14 @@ def generate_response(message, session_id, model_name, history):
         return history
     try:
         context = ""
-        if session_id and session_id in user_vectorstores:
-            vectorstore = user_vectorstores[session_id]
-            docs = vectorstore.similarity_search(message, k=3)
-            if docs:
-                context = "\n\nRelevant information from uploaded PDF:\n" + "\n".join(f"- {doc.page_content}" for doc in docs)
         # Check if it's a GitHub repo search
         if re.match(r'^/github\s+.+', message, re.IGNORECASE):
@@ -447,7 +465,23 @@ def process_code_file(file_obj):
         # Calculate metrics
         metrics = calculate_complexity_metrics(content, language)
-        return str(uuid.uuid4()), f"✅ Successfully analyzed {file_obj.name}", metrics
     except Exception as e:
         return None, f"Error processing file: {str(e)}", {}

 import uuid
 from dotenv import load_dotenv
 from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceInstructEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 import fitz  # PyMuPDF
 import base64
 import re
 from datetime import datetime, timedelta
 from pathlib import Path
+import torch
 # Load environment variables
 load_dotenv()
 client = groq.Client(api_key=os.getenv("GROQ_TECH_API_KEY"))
+# Replace the embeddings initialization
+try:
+    # Initialize embeddings with a simpler, more reliable model
+    embeddings = HuggingFaceInstructEmbeddings(
+        model_name="hkunlp/instructor-base",
+        model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
+    )
+except Exception as e:
+    print(f"Warning: Failed to load primary embeddings model: {e}")
+    # Fallback to a basic model
+    embeddings = HuggingFaceInstructEmbeddings(
+        model_name="all-MiniLM-L6-v2",
+        model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
+    )
 # Directory to store FAISS indexes
 FAISS_INDEX_DIR = "faiss_indexes_tech"
         return history
     try:
         context = ""
+        if embeddings and session_id and session_id in user_vectorstores:  # Check if embeddings exist
+            try:
+                vectorstore = user_vectorstores[session_id]
+                docs = vectorstore.similarity_search(message, k=3)
+                if docs:
+                    context = "\n\nRelevant information from uploaded code:\n" + "\n".join(f"- {doc.page_content}" for doc in docs)
+            except Exception as e:
+                print(f"Warning: Failed to perform similarity search: {e}")
         # Check if it's a GitHub repo search
         if re.match(r'^/github\s+.+', message, re.IGNORECASE):
         # Calculate metrics
         metrics = calculate_complexity_metrics(content, language)
+        # Only create vectorstore if embeddings are available
+        if embeddings:
+            try:
+                text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+                chunks = text_splitter.create_documents([content])
+                vectorstore = FAISS.from_documents(chunks, embeddings)
+                session_id = str(uuid.uuid4())
+                index_path = os.path.join(FAISS_INDEX_DIR, session_id)
+                vectorstore.save_local(index_path)
+                user_vectorstores[session_id] = vectorstore
+            except Exception as e:
+                print(f"Warning: Failed to create vectorstore: {e}")
+                session_id = None
+        else:
+            session_id = None
+        return session_id, f"✅ Successfully analyzed {file_obj.name}", metrics
     except Exception as e:
         return None, f"Error processing file: {str(e)}", {}