Spaces:

Rsr2425
/

SimpliFi

Sleeping

Rsr2425 commited on Feb 21

Commit

ed91833

1 Parent(s): 50f8987

Added vectorstore

Files changed (8) hide show

.env.template ADDED Viewed


1	+ # OpenAI API Key - Required for embeddings and LLM calls
2	+ OPENAI_API_KEY=your_key_here

backend/app/__init__.py CHANGED Viewed

	@@ -1 +1,9 @@
1	- # ~~Empty file to make the directory a Python package~~

+import os
+from pathlib import Path
+from dotenv import load_dotenv, find_dotenv
+# Load environment variables from .env file in project root
+load_dotenv(find_dotenv())
+if os.getenv("OPENAI_API_KEY") is None:
+    raise ValueError("OPENAI_API_KEY is not set")

backend/app/vectorstore.py ADDED Viewed

+"""
+Super early version of a vector store. Just want to make something available for the rest of the app to use.
+"""
+import os
+import requests
+import nltk
+from langchain_community.vectorstores import Qdrant
+from langchain_openai.embeddings import OpenAIEmbeddings
+nltk.download('punkt_tab')
+nltk.download('averaged_perceptron_tagger_eng')
+embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
+# Create static/data directory if it doesn't exist
+os.makedirs("static/data", exist_ok=True)
+# Download and save the webpage
+url = "https://python.langchain.com/docs/tutorials/rag/"
+response = requests.get(url)
+with open("static/data/langchain_rag_tutorial.html", "w", encoding="utf-8") as f:
+    f.write(response.text)
+from langchain_community.document_loaders import DirectoryLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+# Load HTML files from static/data directory
+loader = DirectoryLoader("static/data", glob="*.html")
+documents = loader.load()
+# Split documents into chunks
+text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size=1000,
+    chunk_overlap=200
+)
+split_chunks = text_splitter.split_documents(documents)
+vector_db = Qdrant.from_documents(
+    split_chunks,
+    embedding_model,
+    location=":memory:",
+    collection_name="extending_context_window_llama_3",
+)

backend/tests/test_vectorstore.py ADDED Viewed

+import pytest
+import os
+from langchain.schema import Document
+from backend.app import vectorstore
+def test_directory_creation():
+    """Test that the static/data directory is created"""
+    assert os.path.exists("static/data")
+    assert os.path.exists("static/data/langchain_rag_tutorial.html")
+def test_html_content():
+    """Test that the HTML content was downloaded and contains expected content"""
+    with open("static/data/langchain_rag_tutorial.html", "r", encoding="utf-8") as f:
+        content = f.read()
+    # Check for some expected content from the LangChain RAG tutorial
+    assert "RAG" in content
+    assert "LangChain" in content
+def test_vector_store_similarity_search():
+    """Test that the vector store can perform similarity search"""
+    # Test query
+    query = "What is RAG?"
+    # Perform similarity search
+    results = vectorstore.vector_db.similarity_search(query, k=2)
+    # Verify we get results
+    assert len(results) == 2
+    assert isinstance(results[0], Document)
+    # Verify the results contain relevant content
+    combined_content = " ".join([doc.page_content for doc in results]).lower()
+    assert "rag" in combined_content
+    assert "retrieval" in combined_content

pyproject.toml CHANGED Viewed

@@ -1,9 +1,7 @@
 [project]
-name = "simplify"
 version = "0.1.0"
-description = "LLM System to generate quizzes that simplify the learning process of tools and frameworks"
-readme = "README.md"
-requires-python = ">=3.12"
 dependencies = [
     "chainlit>=2.0.4",
     "numpy>=2.2.2",
@@ -14,9 +12,24 @@ dependencies = [
     "fastapi>=0.110.0",
     "uvicorn>=0.27.1",
     "pytest>=8.0.0",
-    "httpx>=0.26.0"
 ]
-[tool.pytest.ini_options]
-testpaths = ["backend/tests"]
-python_files = ["test_*.py"]

 [project]
+name = "backend"
 version = "0.1.0"
+description = "Backend for the application"
 dependencies = [
     "chainlit>=2.0.4",
     "numpy>=2.2.2",
     "fastapi>=0.110.0",
     "uvicorn>=0.27.1",
     "pytest>=8.0.0",
+    "httpx>=0.26.0",
+    "langchain>=0.3.15",
+    "langchain-community>=0.3.15",
+    "langchain-openai>=0.3.2",
+    "requests>=2.31.0",
+    "python-dotenv>=1.0.0",
+    "openai>=1.12.0",
+    "pytest-dotenv>=0.5.2",
+    "unstructured",
+    "qdrant-client>=1.6.0",
 ]
+[project.optional-dependencies]
+test = [
+    "pytest>=7.4.0",
+    "pytest-asyncio>=0.21.1",
+]
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"

pytest.ini CHANGED Viewed

@@ -1,2 +1,3 @@
 [pytest]
-pythonpath = .

 [pytest]
+pythonpath = .
+env_files = .env

static/data/langchain_rag_tutorial.html ADDED Viewed

The diff for this file is too large to render. See raw diff

test_local.sh CHANGED Viewed

@@ -30,6 +30,6 @@ echo -e "${BLUE}${DIVIDER}${NC}"
 # Run backend tests
 echo -e "${YELLOW}Running backend tests...${NC}"
-docker run simplify-test pytest backend/tests
 echo -e "\n${GREEN}✨ Testing complete!${NC}\n"

 # Run backend tests
 echo -e "${YELLOW}Running backend tests...${NC}"
+docker run --env-file .env simplify-test pytest backend/tests
 echo -e "\n${GREEN}✨ Testing complete!${NC}\n"