Cheselle commited on
Commit
ffd0213
·
verified ·
1 Parent(s): b83d3fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -5
app.py CHANGED
@@ -15,7 +15,6 @@ from sentence_transformers import SentenceTransformer
15
  load_dotenv()
16
  os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
17
 
18
- # Custom embedding class for SentenceTransformer
19
  class SentenceTransformerEmbedding:
20
  def __init__(self, model_name):
21
  self.model = SentenceTransformer(model_name)
@@ -26,14 +25,16 @@ class SentenceTransformerEmbedding:
26
  def __call__(self, texts):
27
  return self.embed_documents(texts) # Make it callable
28
 
29
- @cl.on_chat_start # Marks the function to be executed at the start of a user session
30
  async def on_chat_start():
31
  model = ChatOpenAI(streaming=True)
32
 
33
  # Load documents
34
  ai_framework_document = PyMuPDFLoader(file_path="https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf").load()
35
  ai_blueprint_document = PyMuPDFLoader(file_path="https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf").load()
36
-
 
 
37
  RAG_PROMPT = """\
38
  Given a provided context and question, you must answer the question based only on context.
39
 
@@ -60,17 +61,25 @@ async def on_chat_start():
60
 
61
  sentence_combined_documents = sentence_framework + sentence_blueprint
62
 
 
 
 
 
 
 
63
  # Initialize the embedding model instance
64
  embedding_model = SentenceTransformerEmbedding('Cheselle/finetuned-arctic-sentence')
65
 
66
  # Create the Qdrant vector store using the embedding instance
67
  sentence_vectorstore = Qdrant.from_documents(
68
  documents=sentence_combined_documents,
69
- embedding=embedding_model, # Pass the embedding instance correctly
70
  location=":memory:",
71
  collection_name="AI Policy"
72
  )
73
 
 
 
74
  # Create retriever from the vector store
75
  sentence_retriever = sentence_vectorstore.as_retriever()
76
 
@@ -83,7 +92,7 @@ async def on_chat_start():
83
  cl.user_session.set("retriever", sentence_retriever)
84
  cl.user_session.set("prompt_template", rag_prompt)
85
 
86
- @cl.on_message # Marks a function to run each time a message is received
87
  async def on_message(message: cl.Message):
88
  # Get the stored model, retriever, and prompt
89
  model = cl.user_session.get("runnable")
 
15
  load_dotenv()
16
  os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
17
 
 
18
  class SentenceTransformerEmbedding:
19
  def __init__(self, model_name):
20
  self.model = SentenceTransformer(model_name)
 
25
  def __call__(self, texts):
26
  return self.embed_documents(texts) # Make it callable
27
 
28
+ @cl.on_chat_start
29
  async def on_chat_start():
30
  model = ChatOpenAI(streaming=True)
31
 
32
  # Load documents
33
  ai_framework_document = PyMuPDFLoader(file_path="https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf").load()
34
  ai_blueprint_document = PyMuPDFLoader(file_path="https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf").load()
35
+
36
+ print("Documents loaded.")
37
+
38
  RAG_PROMPT = """\
39
  Given a provided context and question, you must answer the question based only on context.
40
 
 
61
 
62
  sentence_combined_documents = sentence_framework + sentence_blueprint
63
 
64
+ print(f"Total documents to embed: {len(sentence_combined_documents)}")
65
+
66
+ # Limit the number of documents processed for debugging
67
+ max_documents = 10
68
+ sentence_combined_documents = sentence_combined_documents[:max_documents]
69
+
70
  # Initialize the embedding model instance
71
  embedding_model = SentenceTransformerEmbedding('Cheselle/finetuned-arctic-sentence')
72
 
73
  # Create the Qdrant vector store using the embedding instance
74
  sentence_vectorstore = Qdrant.from_documents(
75
  documents=sentence_combined_documents,
76
+ embedding=embedding_model,
77
  location=":memory:",
78
  collection_name="AI Policy"
79
  )
80
 
81
+ print("Vector store created.")
82
+
83
  # Create retriever from the vector store
84
  sentence_retriever = sentence_vectorstore.as_retriever()
85
 
 
92
  cl.user_session.set("retriever", sentence_retriever)
93
  cl.user_session.set("prompt_template", rag_prompt)
94
 
95
+ @cl.on_message
96
  async def on_message(message: cl.Message):
97
  # Get the stored model, retriever, and prompt
98
  model = cl.user_session.get("runnable")