Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -280,8 +280,8 @@ def split_into_chunks(texts, references, chunk_size, chunk_overlap):
|
|
280 |
print(f"Total number of chunks: {len(chunks)}")
|
281 |
return chunks
|
282 |
|
283 |
-
# Setup
|
284 |
-
def
|
285 |
embedding_model = HuggingFaceEmbeddings(model_name=model_name)
|
286 |
vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, persist_directory=persist_directory)
|
287 |
return vectorstore
|
@@ -291,7 +291,7 @@ def setup_llm(model_name, temperature, api_key):
|
|
291 |
llm = ChatGroq(model=model_name, temperature=temperature, api_key=api_key)
|
292 |
return llm
|
293 |
|
294 |
-
def
|
295 |
results = vectorstore.similarity_search(query, k=k)
|
296 |
chunks_with_references = [(result.page_content, result.metadata["source"]) for result in results]
|
297 |
# Print the chosen chunks and their sources to the console
|
@@ -302,14 +302,14 @@ def query_chroma(vectorstore, query, k):
|
|
302 |
return chunks_with_references
|
303 |
|
304 |
def rag_workflow(query):
|
305 |
-
retrieved_doc_chunks =
|
306 |
-
|
307 |
|
308 |
doc_context = "\n\n".join([doc_chunk for doc_chunk, _ in retrieved_doc_chunks])
|
309 |
-
|
310 |
|
311 |
-
|
312 |
-
|
313 |
|
314 |
print(f"Context for the query:\n{doc_context}\n")
|
315 |
|
@@ -332,7 +332,7 @@ def rag_workflow(query):
|
|
332 |
|
333 |
|
334 |
def initialize():
|
335 |
-
global docstore,
|
336 |
|
337 |
code_partial_paths = ['kadi_apy/lib/']
|
338 |
code_file_path = []
|
@@ -350,8 +350,9 @@ def initialize():
|
|
350 |
print(f"Total number of code_chunks: {len(code_chunks)}")
|
351 |
print(f"Total number of doc_chunks: {len(doc_chunks)}")
|
352 |
|
353 |
-
|
354 |
-
|
|
|
355 |
llm = setup_llm(LLM_MODEL_NAME, LLM_TEMPERATURE, GROQ_API_KEY)
|
356 |
|
357 |
|
|
|
280 |
print(f"Total number of chunks: {len(chunks)}")
|
281 |
return chunks
|
282 |
|
283 |
+
# Setup Vectorstore
|
284 |
+
def setup_vectorstore(chunks, model_name, persist_directory):
|
285 |
embedding_model = HuggingFaceEmbeddings(model_name=model_name)
|
286 |
vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, persist_directory=persist_directory)
|
287 |
return vectorstore
|
|
|
291 |
llm = ChatGroq(model=model_name, temperature=temperature, api_key=api_key)
|
292 |
return llm
|
293 |
|
294 |
+
def retrieve_from_vectorstore(vectorstore, query, k):
|
295 |
results = vectorstore.similarity_search(query, k=k)
|
296 |
chunks_with_references = [(result.page_content, result.metadata["source"]) for result in results]
|
297 |
# Print the chosen chunks and their sources to the console
|
|
|
302 |
return chunks_with_references
|
303 |
|
304 |
def rag_workflow(query):
|
305 |
+
retrieved_doc_chunks = retrieve_from_vectorstore(docstore, query, k=5)
|
306 |
+
retrieved_code_chunks = retrieve_from_vectorstore(codestore, query, k=5)
|
307 |
|
308 |
doc_context = "\n\n".join([doc_chunk for doc_chunk, _ in retrieved_doc_chunks])
|
309 |
+
code_context = "\n\n".join([code_chunk for code_chunk, _ in retrieved_code_chunks])
|
310 |
|
311 |
+
doc_references = "\n".join([f"[{i+1}] {ref}" for i, (_, ref) in enumerate(retrieved_doc_chunks)])
|
312 |
+
code_references = "\n".join([f"[{i+1}] {ref}" for i, (_, ref) in enumerate(retrieved_code_chunks)])
|
313 |
|
314 |
print(f"Context for the query:\n{doc_context}\n")
|
315 |
|
|
|
332 |
|
333 |
|
334 |
def initialize():
|
335 |
+
global docstore, codestore, chunks, llm
|
336 |
|
337 |
code_partial_paths = ['kadi_apy/lib/']
|
338 |
code_file_path = []
|
|
|
350 |
print(f"Total number of code_chunks: {len(code_chunks)}")
|
351 |
print(f"Total number of doc_chunks: {len(doc_chunks)}")
|
352 |
|
353 |
+
docstore = setup_vectorstore(doc_chunks, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY)
|
354 |
+
codestore = setup_vectorstore(code_chunks, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY)
|
355 |
+
|
356 |
llm = setup_llm(LLM_MODEL_NAME, LLM_TEMPERATURE, GROQ_API_KEY)
|
357 |
|
358 |
|