Spaces:

Rsr2425
/

SimpliFi

Sleeping

App Files Files Community

Rsr2425 commited on Feb 21

Commit

1ef298a

1 Parent(s): eae1098

Did some refactoring in BE API

Browse files

Files changed (5) hide show

backend/app/main.py +3 -10
backend/app/problem_generator.py +16 -0
backend/app/vectorstore.py +50 -32
backend/tests/{test_quiz.py → test_api.py} +2 -1
backend/tests/test_vectorstore.py +16 -7

backend/app/main.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-import random
 app = FastAPI()
@@ -27,12 +27,5 @@ async def crawl_documentation(input_data: UrlInput):
 @app.post("/problems/")
 async def generate_problems(query: UserQuery):
-    # For MVP, returning random sample questions
-    sample_questions = [
-        "What is the main purpose of this framework?",
-        "How do you install this tool?",
-        "What are the key components?",
-        "Explain the basic workflow",
-        "What are the best practices?"
-    ]
-    return {"Problems": sample_questions}

 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
+from backend.app.problem_generator import ProblemGenerator
 app = FastAPI()
 @app.post("/problems/")
 async def generate_problems(query: UserQuery):
+    problems = ProblemGenerator().generate_problems(query.user_query)
+    return {"Problems": problems}

backend/app/problem_generator.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from typing import List
+class ProblemGenerator:
+    def generate_problems(self, query: str) -> List[str]:
+        """
+        Generate problems based on the user's query.
+        """
+        # For MVP, returning random sample questions
+        sample_questions = [
+            "What is the main purpose of this framework?",
+            "How do you install this tool?",
+            "What are the key components?",
+            "Explain the basic workflow",
+            "What are the best practices?"
+        ]
+        return sample_questions

backend/app/vectorstore.py CHANGED Viewed

@@ -1,44 +1,62 @@
 """
 Super early version of a vector store. Just want to make something available for the rest of the app to use.
 """
 import os
 import requests
 import nltk
 from langchain_community.vectorstores import Qdrant
 from langchain_openai.embeddings import OpenAIEmbeddings
 nltk.download('punkt_tab')
 nltk.download('averaged_perceptron_tagger_eng')
-embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
-# Create static/data directory if it doesn't exist
-os.makedirs("static/data", exist_ok=True)
-# Download and save the webpage
-url = "https://python.langchain.com/docs/tutorials/rag/"
-response = requests.get(url)
-with open("static/data/langchain_rag_tutorial.html", "w", encoding="utf-8") as f:
-    f.write(response.text)
-from langchain_community.document_loaders import DirectoryLoader
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-# Load HTML files from static/data directory
-loader = DirectoryLoader("static/data", glob="*.html")
-documents = loader.load()
-# Split documents into chunks
-text_splitter = RecursiveCharacterTextSplitter(
-    chunk_size=1000,
-    chunk_overlap=200
-)
-split_chunks = text_splitter.split_documents(documents)
-vector_db = Qdrant.from_documents(
-    split_chunks,
-    embedding_model,
-    location=":memory:",
-    collection_name="extending_context_window_llama_3",
-)

 """
 Super early version of a vector store. Just want to make something available for the rest of the app to use.
+Vector store implementation with singleton pattern to ensure only one instance exists.
 """
 import os
 import requests
 import nltk
+from typing import Optional
 from langchain_community.vectorstores import Qdrant
 from langchain_openai.embeddings import OpenAIEmbeddings
+from langchain_community.document_loaders import DirectoryLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
 nltk.download('punkt_tab')
 nltk.download('averaged_perceptron_tagger_eng')
+# Global variable to store the singleton instance
+_vector_db_instance: Optional[Qdrant] = None
+def get_vector_db() -> Qdrant:
+    """
+    Factory function that returns a singleton instance of the vector database.
+    Creates the instance if it doesn't exist.
+    """
+    global _vector_db_instance
+    if _vector_db_instance is None:
+        # Create static/data directory if it doesn't exist
+        os.makedirs("static/data", exist_ok=True)
+        # Download and save the webpage if it doesn't exist
+        html_path = "static/data/langchain_rag_tutorial.html"
+        if not os.path.exists(html_path):
+            url = "https://python.langchain.com/docs/tutorials/rag/"
+            response = requests.get(url)
+            with open(html_path, "w", encoding="utf-8") as f:
+                f.write(response.text)
+        # Initialize embedding model
+        embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
+        # Load HTML files from static/data directory
+        loader = DirectoryLoader("static/data", glob="*.html")
+        documents = loader.load()
+        # Split documents into chunks
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=200
+        )
+        split_chunks = text_splitter.split_documents(documents)
+        # Create vector store instance
+        _vector_db_instance = Qdrant.from_documents(
+            split_chunks,
+            embedding_model,
+            location=":memory:",
+            collection_name="extending_context_window_llama_3",
+        )
+    return _vector_db_instance

backend/tests/{test_quiz.py → test_api.py} RENAMED Viewed

@@ -18,4 +18,5 @@ def test_problems_endpoint():
     )
     assert response.status_code == 200
     assert "Problems" in response.json()
-    assert len(response.json()["Problems"]) == 5

     )
     assert response.status_code == 200
     assert "Problems" in response.json()
+    assert len(response.json()["Problems"]) == 5

backend/tests/test_vectorstore.py CHANGED Viewed

@@ -1,15 +1,14 @@
-import pytest
 import os
 from langchain.schema import Document
-from backend.app import vectorstore
 def test_directory_creation():
-    """Test that the static/data directory is created"""
     assert os.path.exists("static/data")
     assert os.path.exists("static/data/langchain_rag_tutorial.html")
 def test_html_content():
-    """Test that the HTML content was downloaded and contains expected content"""
     with open("static/data/langchain_rag_tutorial.html", "r", encoding="utf-8") as f:
         content = f.read()
@@ -22,8 +21,9 @@ def test_vector_store_similarity_search():
     # Test query
     query = "What is RAG?"
-    # Perform similarity search
-    results = vectorstore.vector_db.similarity_search(query, k=2)
     # Verify we get results
     assert len(results) == 2
@@ -32,4 +32,13 @@ def test_vector_store_similarity_search():
     # Verify the results contain relevant content
     combined_content = " ".join([doc.page_content for doc in results]).lower()
     assert "rag" in combined_content
-    assert "retrieval" in combined_content

 import os
 from langchain.schema import Document
+from backend.app.vectorstore import get_vector_db
 def test_directory_creation():
+    get_vector_db()
     assert os.path.exists("static/data")
     assert os.path.exists("static/data/langchain_rag_tutorial.html")
+# TODO remove this test when data ingrestion layer is implemented
 def test_html_content():
     with open("static/data/langchain_rag_tutorial.html", "r", encoding="utf-8") as f:
         content = f.read()
     # Test query
     query = "What is RAG?"
+    # Get vector db instance and perform similarity search
+    vector_db = get_vector_db()
+    results = vector_db.similarity_search(query, k=2)
     # Verify we get results
     assert len(results) == 2
     # Verify the results contain relevant content
     combined_content = " ".join([doc.page_content for doc in results]).lower()
     assert "rag" in combined_content
+    assert "retrieval" in combined_content
+def test_vector_db_singleton():
+    """Test that get_vector_db returns the same instance each time"""
+    # Get two instances
+    instance1 = get_vector_db()
+    instance2 = get_vector_db()
+    # Verify they are the same object
+    assert instance1 is instance2