bupa1018 commited on
Commit
0fdd155
·
1 Parent(s): d7cd739

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -11
app.py CHANGED
@@ -280,8 +280,8 @@ def split_into_chunks(texts, references, chunk_size, chunk_overlap):
280
  print(f"Total number of chunks: {len(chunks)}")
281
  return chunks
282
 
283
- # Setup Chroma
284
- def setup_chroma(chunks, model_name, persist_directory):
285
  embedding_model = HuggingFaceEmbeddings(model_name=model_name)
286
  vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, persist_directory=persist_directory)
287
  return vectorstore
@@ -291,7 +291,7 @@ def setup_llm(model_name, temperature, api_key):
291
  llm = ChatGroq(model=model_name, temperature=temperature, api_key=api_key)
292
  return llm
293
 
294
- def query_chroma(vectorstore, query, k):
295
  results = vectorstore.similarity_search(query, k=k)
296
  chunks_with_references = [(result.page_content, result.metadata["source"]) for result in results]
297
  # Print the chosen chunks and their sources to the console
@@ -302,14 +302,14 @@ def query_chroma(vectorstore, query, k):
302
  return chunks_with_references
303
 
304
  def rag_workflow(query):
305
- retrieved_doc_chunks = query_chroma(vectorstore, query, k=5)
306
- #retrieved_code_chunks = query_chroma(codestore, query, k=5)
307
 
308
  doc_context = "\n\n".join([doc_chunk for doc_chunk, _ in retrieved_doc_chunks])
309
- #code_context = "\n\n".join([code_chunk for code_chunk, _ in retrieved_code_chunks])
310
 
311
- references = "\n".join([f"[{i+1}] {ref}" for i, (_, ref) in enumerate(docs)])
312
-
313
 
314
  print(f"Context for the query:\n{doc_context}\n")
315
 
@@ -332,7 +332,7 @@ def rag_workflow(query):
332
 
333
 
334
  def initialize():
335
- global docstore, vectorstore, chunks, llm
336
 
337
  code_partial_paths = ['kadi_apy/lib/']
338
  code_file_path = []
@@ -350,8 +350,9 @@ def initialize():
350
  print(f"Total number of code_chunks: {len(code_chunks)}")
351
  print(f"Total number of doc_chunks: {len(doc_chunks)}")
352
 
353
- vectorstore = setup_chroma(doc_chunks, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY)
354
- # codestore = setup_chroma(code_chunks, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY)
 
355
  llm = setup_llm(LLM_MODEL_NAME, LLM_TEMPERATURE, GROQ_API_KEY)
356
 
357
 
 
280
  print(f"Total number of chunks: {len(chunks)}")
281
  return chunks
282
 
283
+ # Setup Vectorstore
284
+ def setup_vectorstore(chunks, model_name, persist_directory):
285
  embedding_model = HuggingFaceEmbeddings(model_name=model_name)
286
  vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, persist_directory=persist_directory)
287
  return vectorstore
 
291
  llm = ChatGroq(model=model_name, temperature=temperature, api_key=api_key)
292
  return llm
293
 
294
+ def retrieve_from_vectorstore(vectorstore, query, k):
295
  results = vectorstore.similarity_search(query, k=k)
296
  chunks_with_references = [(result.page_content, result.metadata["source"]) for result in results]
297
  # Print the chosen chunks and their sources to the console
 
302
  return chunks_with_references
303
 
304
  def rag_workflow(query):
305
+ retrieved_doc_chunks = retrieve_from_vectorstore(docstore, query, k=5)
306
+ retrieved_code_chunks = retrieve_from_vectorstore(codestore, query, k=5)
307
 
308
  doc_context = "\n\n".join([doc_chunk for doc_chunk, _ in retrieved_doc_chunks])
309
+ code_context = "\n\n".join([code_chunk for code_chunk, _ in retrieved_code_chunks])
310
 
311
+ doc_references = "\n".join([f"[{i+1}] {ref}" for i, (_, ref) in enumerate(retrieved_doc_chunks)])
312
+ code_references = "\n".join([f"[{i+1}] {ref}" for i, (_, ref) in enumerate(retrieved_code_chunks)])
313
 
314
  print(f"Context for the query:\n{doc_context}\n")
315
 
 
332
 
333
 
334
  def initialize():
335
+ global docstore, codestore, chunks, llm
336
 
337
  code_partial_paths = ['kadi_apy/lib/']
338
  code_file_path = []
 
350
  print(f"Total number of code_chunks: {len(code_chunks)}")
351
  print(f"Total number of doc_chunks: {len(doc_chunks)}")
352
 
353
+ docstore = setup_vectorstore(doc_chunks, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY)
354
+ codestore = setup_vectorstore(code_chunks, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY)
355
+
356
  llm = setup_llm(LLM_MODEL_NAME, LLM_TEMPERATURE, GROQ_API_KEY)
357
 
358