Spaces:

Cheselle
/

Midterm-FinetunedRAG

Runtime error

App Files Files Community

Cheselle commited on Sep 24, 2024

Commit

ffd0213

verified ·

1 Parent(s): b83d3fb

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -5

app.py CHANGED Viewed

@@ -15,7 +15,6 @@ from sentence_transformers import SentenceTransformer
 load_dotenv()
 os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
-# Custom embedding class for SentenceTransformer
 class SentenceTransformerEmbedding:
     def __init__(self, model_name):
         self.model = SentenceTransformer(model_name)
@@ -26,14 +25,16 @@ class SentenceTransformerEmbedding:
     def __call__(self, texts):
         return self.embed_documents(texts)  # Make it callable
-@cl.on_chat_start  # Marks the function to be executed at the start of a user session
 async def on_chat_start():
     model = ChatOpenAI(streaming=True)
     # Load documents
     ai_framework_document = PyMuPDFLoader(file_path="https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf").load()
     ai_blueprint_document = PyMuPDFLoader(file_path="https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf").load()
     RAG_PROMPT = """\
     Given a provided context and question, you must answer the question based only on context.
@@ -60,17 +61,25 @@ async def on_chat_start():
     sentence_combined_documents = sentence_framework + sentence_blueprint
     # Initialize the embedding model instance
     embedding_model = SentenceTransformerEmbedding('Cheselle/finetuned-arctic-sentence')
     # Create the Qdrant vector store using the embedding instance
     sentence_vectorstore = Qdrant.from_documents(
         documents=sentence_combined_documents,
-        embedding=embedding_model,  # Pass the embedding instance correctly
         location=":memory:",
         collection_name="AI Policy"
     )
     # Create retriever from the vector store
     sentence_retriever = sentence_vectorstore.as_retriever()
@@ -83,7 +92,7 @@ async def on_chat_start():
     cl.user_session.set("retriever", sentence_retriever)
     cl.user_session.set("prompt_template", rag_prompt)
-@cl.on_message  # Marks a function to run each time a message is received
 async def on_message(message: cl.Message):
     # Get the stored model, retriever, and prompt
     model = cl.user_session.get("runnable")

 load_dotenv()
 os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
 class SentenceTransformerEmbedding:
     def __init__(self, model_name):
         self.model = SentenceTransformer(model_name)
     def __call__(self, texts):
         return self.embed_documents(texts)  # Make it callable
+@cl.on_chat_start
 async def on_chat_start():
     model = ChatOpenAI(streaming=True)
     # Load documents
     ai_framework_document = PyMuPDFLoader(file_path="https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf").load()
     ai_blueprint_document = PyMuPDFLoader(file_path="https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf").load()
+    print("Documents loaded.")
     RAG_PROMPT = """\
     Given a provided context and question, you must answer the question based only on context.
     sentence_combined_documents = sentence_framework + sentence_blueprint
+    print(f"Total documents to embed: {len(sentence_combined_documents)}")
+    # Limit the number of documents processed for debugging
+    max_documents = 10
+    sentence_combined_documents = sentence_combined_documents[:max_documents]
     # Initialize the embedding model instance
     embedding_model = SentenceTransformerEmbedding('Cheselle/finetuned-arctic-sentence')
     # Create the Qdrant vector store using the embedding instance
     sentence_vectorstore = Qdrant.from_documents(
         documents=sentence_combined_documents,
+        embedding=embedding_model,
         location=":memory:",
         collection_name="AI Policy"
     )
+    print("Vector store created.")
     # Create retriever from the vector store
     sentence_retriever = sentence_vectorstore.as_retriever()
     cl.user_session.set("retriever", sentence_retriever)
     cl.user_session.set("prompt_template", rag_prompt)
+@cl.on_message
 async def on_message(message: cl.Message):
     # Get the stored model, retriever, and prompt
     model = cl.user_session.get("runnable")