Spaces:

drrobot9
/

AI_BRAIN

Runtime error

App Files Files Community

drrobot9 commited on 2 days ago

Commit

3fc834d

verified ·

1 Parent(s): ddd60c5

push updated backend changes and auto start buiding

Browse files

Files changed (4) hide show

agents.py +8 -1
config.py +17 -5
main.py +33 -0
rag.py +48 -48

agents.py CHANGED Viewed

@@ -7,6 +7,13 @@ from rag import RAGAgent
 import os
 import sys
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 if BASE_DIR not in sys.path:
     sys.path.insert(0, BASE_DIR)
@@ -87,6 +94,6 @@ class TutorAgent(AssistantAgent):
 # User Agent
 class BioUser(UserProxyAgent):
     def __init__(self, name="BioUser", **kwargs):
         kwargs.setdefault("code_execution_config", {"use_docker": False})
         super().__init__(name=name, **kwargs)

 import os
 import sys
+#  Ensure Hugging Face cache is in a writable directory (important on HF Spaces)
+if "HF_HOME" not in os.environ:
+    hf_cache = "/home/user/.cache/huggingface"
+    os.environ["HF_HOME"] = hf_cache
+    os.environ["TRANSFORMERS_CACHE"] = os.path.join(hf_cache, "transformers")
+    os.environ["HF_HUB_CACHE"] = os.path.join(hf_cache, "hub")
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 if BASE_DIR not in sys.path:
     sys.path.insert(0, BASE_DIR)
 # User Agent
 class BioUser(UserProxyAgent):
     def __init__(self, name="BioUser", **kwargs):
+        #  disable docker-based execution (not available in HF Spaces)
         kwargs.setdefault("code_execution_config", {"use_docker": False})
         super().__init__(name=name, **kwargs)

config.py CHANGED Viewed

@@ -1,30 +1,42 @@
 # config.py
 import os
 import sys
 from pathlib import Path
-# Ensure BASE_DIR is a Path, not a string
 BASE_DIR = Path(__file__).resolve().parent
 if str(BASE_DIR) not in sys.path:
     sys.path.insert(0, str(BASE_DIR))
 try:
-    import google.colab
     IN_COLAB = True
 except ImportError:
     IN_COLAB = False
 if IN_COLAB:
     VECTORSTORE_DIR = Path("/content/drive/MyDrive/bioinformatics_tutor_ai/vectorstore")
 else:
     VECTORSTORE_DIR = BASE_DIR / "vectorstore"
 EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
 LLM_MODEL = "BioMistral/BioMistral-7B-DARE"
-CONFIDENCE_THRESHOLD = 0.65
-OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", None)
 if not OPENAI_API_KEY:
-    print(" WARNING: OPENAI_API_KEY not set! AutoGen Tutor Agent will not work without it.")

 # config.py
 import os
 import sys
+import logging
 from pathlib import Path
 BASE_DIR = Path(__file__).resolve().parent
 if str(BASE_DIR) not in sys.path:
     sys.path.insert(0, str(BASE_DIR))
 try:
+    import google.colab
     IN_COLAB = True
 except ImportError:
     IN_COLAB = False
 if IN_COLAB:
     VECTORSTORE_DIR = Path("/content/drive/MyDrive/bioinformatics_tutor_ai/vectorstore")
 else:
     VECTORSTORE_DIR = BASE_DIR / "vectorstore"
+# Models
 EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
 LLM_MODEL = "BioMistral/BioMistral-7B-DARE"
+# Confidence threshold for TutorAgent
+CONFIDENCE_THRESHOLD = 0.65
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
 if not OPENAI_API_KEY:
+    logging.warning("OPENAI_API_KEY not set! AutoGen Tutor Agent will not work without it.")
+if "HF_HOME" not in os.environ:
+    os.environ["HF_HOME"] = "/home/user/.cache/huggingface"
+    os.environ["TRANSFORMERS_CACHE"] = os.path.join(os.environ["HF_HOME"], "transformers")
+    os.environ["HF_HUB_CACHE"] = os.path.join(os.environ["HF_HOME"], "hub")

main.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# main.py
+import uvicorn
+from fastapi import FastAPI
+from pydantic import BaseModel
+from agents import TutorAgent, BioUser
+# Initialize FastAPI
+app = FastAPI(title="Bioinformatics Tutor API")
+# Initialize agents
+user_agent = BioUser()
+tutor_agent = TutorAgent()
+# Request model
+class QueryRequest(BaseModel):
+    question: str
+# Response model
+class QueryResponse(BaseModel):
+    answer: str
+@app.post("/ask", response_model=QueryResponse)
+def ask_tutor(request: QueryRequest):
+    """
+    Ask the Bioinformatics Tutor a question.
+    """
+    answer = tutor_agent.process_query(request.question)
+    return QueryResponse(answer=answer)
+@app.get("/")
+def root():
+    return {"message": "Bioinformatics Tutor API is running."}

rag.py CHANGED Viewed

@@ -12,7 +12,6 @@ from sentence_transformers import SentenceTransformer
 from config import VECTORSTORE_DIR, EMBEDDING_MODEL
 log = logging.getLogger(__name__)
-logging.basicConfig(level=logging.INFO)
 class RAGAgent:
@@ -29,26 +28,30 @@ class RAGAgent:
         self._embedder: Optional[SentenceTransformer] = None
         self._loaded = False
-    def _find_index_file(self) -> str:
         if not os.path.isdir(self.vectorstore_dir):
-            raise FileNotFoundError(f"Vectorstore dir not found: {self.vectorstore_dir}")
         for fname in os.listdir(self.vectorstore_dir):
-            if fname.endswith(".faiss") or fname.endswith(".index") or fname.endswith(".bin") or fname.startswith("index"):
                 return os.path.join(self.vectorstore_dir, fname)
-        raise FileNotFoundError(f"No FAISS index file (.faiss/.index/.bin) found in {self.vectorstore_dir}")
-    def _find_meta_file(self) -> str:
         for candidate in ("index.pkl", "metadata.pkl", "index_meta.pkl", "metadata.json", "index.json"):
             p = os.path.join(self.vectorstore_dir, candidate)
             if os.path.exists(p):
                 return p
         for fname in os.listdir(self.vectorstore_dir):
-            if fname.endswith(".pkl"):
                 return os.path.join(self.vectorstore_dir, fname)
-        raise FileNotFoundError(f"No metadata (.pkl/.json) found in {self.vectorstore_dir}")
     @property
     def embedder(self) -> SentenceTransformer:
@@ -61,32 +64,39 @@ class RAGAgent:
         """Load index and metadata into memory (idempotent)."""
         if self._loaded:
             return
         idx_path = self._find_index_file()
         meta_path = self._find_meta_file()
         log.info("Loading FAISS index from: %s", idx_path)
         try:
             self.index = faiss.read_index(idx_path)
         except Exception as e:
-            raise RuntimeError(f"Failed to read faiss index {idx_path}: {e}")
         log.info("Loading metadata from: %s", meta_path)
-        if meta_path.endswith(".json"):
-            with open(meta_path, "r", encoding="utf-8") as f:
-                self.metadata = json.load(f)
-        else:
-            with open(meta_path, "rb") as f:
-                self.metadata = pickle.load(f)
         if not isinstance(self.metadata, list):
             if isinstance(self.metadata, dict):
-                keys = sorted(self.metadata.keys())
                 try:
-                    self.metadata = [self.metadata[k] for k in keys]
                 except Exception:
                     self.metadata = list(self.metadata.values())
             else:
                 self.metadata = list(self.metadata)
@@ -106,56 +116,46 @@ class RAGAgent:
         if self.index is None or self.metadata is None:
             return [], []
-        q_emb = self.embedder.encode([query], convert_to_numpy=True)
         # try normalize if index uses normalized vectors
         try:
             faiss.normalize_L2(q_emb)
         except Exception:
             pass
-        q_emb = q_emb.astype("float32")
-        # safe search call
         try:
             D, I = self.index.search(q_emb, k)
         except Exception as e:
             log.warning("FAISS search error: %s", e)
             return [], []
-        # ensure shapes
         if I is None or D is None:
             return [], []
         indices = np.array(I).reshape(-1)[:k].tolist()
         scores = np.array(D).reshape(-1)[:k].tolist()
-        contexts = []
-        sources = []
         for idx, score in zip(indices, scores):
-            if int(idx) < 0:
                 continue
-            # guard against idx out of metadata bounds
-            if idx >= len(self.metadata):
-                log.debug("Index %s >= metadata length %d — skipping", idx, len(self.metadata))
-                continue
-            meta = self.metadata[int(idx)]
-            # extract text from common keys
             text = None
-            for key in ("text", "page_content", "content", "chunk_text", "source_text"):
-                if isinstance(meta, dict) and key in meta and meta[key]:
-                    text = meta[key]
-                    break
-            if text is None:
-                # fallbac if metadata itself is a string or has 'text' attribute
-                if isinstance(meta, str):
-                    text = meta
-                elif isinstance(meta, dict) and "metadata" in meta and isinstance(meta["metadata"], dict):
-                    # sometimes nested
                     text = meta["metadata"].get("text") or meta["metadata"].get("page_content")
-                else:
-                    text = str(meta)
             contexts.append(text)
             sources.append({"meta": meta, "score": float(score)})

 from config import VECTORSTORE_DIR, EMBEDDING_MODEL
 log = logging.getLogger(__name__)
 class RAGAgent:
         self._embedder: Optional[SentenceTransformer] = None
         self._loaded = False
+    def _find_index_file(self) -> Optional[str]:
         if not os.path.isdir(self.vectorstore_dir):
+            log.warning("Vectorstore dir not found: %s", self.vectorstore_dir)
+            return None
         for fname in os.listdir(self.vectorstore_dir):
+            if fname.endswith((".faiss", ".index", ".bin")) or fname.startswith("index"):
                 return os.path.join(self.vectorstore_dir, fname)
+        return None
+    def _find_meta_file(self) -> Optional[str]:
+        if not os.path.isdir(self.vectorstore_dir):
+            return None
         for candidate in ("index.pkl", "metadata.pkl", "index_meta.pkl", "metadata.json", "index.json"):
             p = os.path.join(self.vectorstore_dir, candidate)
             if os.path.exists(p):
                 return p
         for fname in os.listdir(self.vectorstore_dir):
+            if fname.endswith(".pkl") or fname.endswith(".json"):
                 return os.path.join(self.vectorstore_dir, fname)
+        return None
     @property
     def embedder(self) -> SentenceTransformer:
         """Load index and metadata into memory (idempotent)."""
         if self._loaded:
             return
         idx_path = self._find_index_file()
         meta_path = self._find_meta_file()
+        if not idx_path or not meta_path:
+            log.warning("No index/metadata found in %s — retrieval disabled.", self.vectorstore_dir)
+            return
         log.info("Loading FAISS index from: %s", idx_path)
         try:
             self.index = faiss.read_index(idx_path)
         except Exception as e:
+            log.error("Failed to read FAISS index: %s", e)
+            return
         log.info("Loading metadata from: %s", meta_path)
+        try:
+            if meta_path.endswith(".json"):
+                with open(meta_path, "r", encoding="utf-8") as f:
+                    self.metadata = json.load(f)
+            else:
+                with open(meta_path, "rb") as f:
+                    self.metadata = pickle.load(f)
+        except Exception as e:
+            log.error("Failed to read metadata: %s", e)
+            return
+        # Normalize metadata type
         if not isinstance(self.metadata, list):
             if isinstance(self.metadata, dict):
                 try:
+                    self.metadata = [self.metadata[k] for k in sorted(self.metadata.keys())]
                 except Exception:
                     self.metadata = list(self.metadata.values())
             else:
                 self.metadata = list(self.metadata)
         if self.index is None or self.metadata is None:
             return [], []
+        q_emb = self.embedder.encode([query], convert_to_numpy=True).astype("float32")
         # try normalize if index uses normalized vectors
         try:
             faiss.normalize_L2(q_emb)
         except Exception:
             pass
         try:
             D, I = self.index.search(q_emb, k)
         except Exception as e:
             log.warning("FAISS search error: %s", e)
             return [], []
         if I is None or D is None:
             return [], []
         indices = np.array(I).reshape(-1)[:k].tolist()
         scores = np.array(D).reshape(-1)[:k].tolist()
+        contexts, sources = [], []
         for idx, score in zip(indices, scores):
+            if int(idx) < 0 or idx >= len(self.metadata):
                 continue
+            meta = self.metadata[int(idx)]
             text = None
+            if isinstance(meta, dict):
+                for key in ("text", "page_content", "content", "chunk_text", "source_text"):
+                    if key in meta and meta[key]:
+                        text = meta[key]
+                        break
+                if text is None and "metadata" in meta and isinstance(meta["metadata"], dict):
                     text = meta["metadata"].get("text") or meta["metadata"].get("page_content")
+            elif isinstance(meta, str):
+                text = meta
+            if text is None:
+                text = str(meta)
             contexts.append(text)
             sources.append({"meta": meta, "score": float(score)})