bupa1018 commited on
Commit
db1cea6
·
1 Parent(s): 50049b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -22
app.py CHANGED
@@ -255,45 +255,96 @@ def setup_llm(model_name, temperature, api_key):
255
  return llm
256
 
257
  def retrieve_from_vectorstore(vectorstore, query, k):
258
- results = vectorstore.similarity_search(query, k=k)
259
- chunks_with_references = [(result.page_content, result.metadata["source"]) for result in results]
260
- # Print the chosen chunks and their sources to the console
261
- print("\nChosen chunks and their sources for the query:")
262
- for chunk, source in chunks_with_references:
263
- print(f"Source: {source}\nChunk: {chunk}\n")
264
- print("-" * 50)
265
- return chunks_with_references
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
 
267
 
268
  def retrieve_docs_from_vectorstore(vectorstore, query, k):
269
  return vectorstore.similarity_search(query, k=k)
270
 
271
- def format_doc_context(docs):
272
- doc_context = "\n\n".join(doc.page_content for doc in docs)
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
- print("\nDocument Context for LLM:\n")
275
- print(doc_context) # Optional: Print the context for verification
276
 
277
  return doc_context
278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  def rag_workflow(query):
280
 
281
- retrieved_doc_chunks = retrieve_from_vectorstore (docstore, query, k=5)
282
- retrieved_code_chunks = retrieve_from_vectorstore(codestore, query, k=5)
283
 
284
  # docs = retrieve_docs_from_vectorstore(docstore, query, k=5)
 
 
285
 
286
- # doc_context = format_doc_context(docs)
287
-
288
- doc_context = "\n\n".join([doc_chunk for doc_chunk, _ in retrieved_doc_chunks])
289
- code_context = "\n\n".join([code_chunk for code_chunk, _ in retrieved_code_chunks])
 
290
 
291
- doc_references = "\n".join([f"[{i+1}] {ref}" for i, (_, ref) in enumerate(retrieved_doc_chunks)])
292
- code_references = "\n".join([f"[{i+1}] {ref}" for i, (_, ref) in enumerate(retrieved_code_chunks)])
293
  print(doc_context)
294
  print(code_context)
295
- print(doc_references)
296
- print(code_references)
297
 
298
  # print("Document Chunks:\n")
299
  # print("\n\n".join(["="*80 + "\n" + doc_chunk for doc_chunk, _ in retrieved_doc_chunks]))
 
255
  return llm
256
 
257
  def retrieve_from_vectorstore(vectorstore, query, k):
258
+ retrieved_docs = vectorstore.similarity_search(query, k=k)
259
+ return retrieved_docs
260
+
261
+
262
+
263
+ def retrieve_within_kadiApy_docs(vectorstore, query, k):
264
+ filter_criteria = {"usage": "docs"}
265
+ retrieved_docs = vectorstore.similarity_search(query=query, k=k, filter=filter_criteria)
266
+ return retrieved_docs
267
+
268
+ def retrieve_within_kadiApy_library(vectorstore, query, k):
269
+ filter_criteria = {"usage": "library", "visibility" : "public"}
270
+ retrieved_docs = vectorstore.similarity_search(query=query, k=k, filter=filter_criteria)
271
+ return retrieved_docs
272
+
273
+ def retrieve_within_kadiApy_cli_library(vectorstore, query, k):
274
+ filter_criteria = {"usage": "cli_library", "visibility" : "public"}
275
+ retrieved_docs = vectorstore.similarity_search(query=query, k=k, filter=filter_criteria)
276
+ return retrieved_docs
277
+
278
+ def retrieve_within_kadiApy_cli_library_excluding_cli_commands(vectorstore, query, k):
279
+ filter_criteria = {"usage": "cli_library", "visibility" : "public"}
280
+ retrieved_docs = vectorstore.similarity_search(query=query, k=k, filter=filter_criteria)
281
+ filtered_docs = [doc for doc in documents if "command" not in doc.metadata]
282
+ return filtered_docs
283
+
284
+
285
+ def retrieve_kadiApy_cli_commands(vectorstore, query, k):
286
+ filter_criteria = {"usage": "cli_library", "type": "command"}
287
+ results = vectorstore.similarity_search(query=query, k=k, filter=filter_criteria)
288
+ return results
289
+
290
 
291
 
292
  def retrieve_docs_from_vectorstore(vectorstore, query, k):
293
  return vectorstore.similarity_search(query, k=k)
294
 
295
+
296
+
297
+
298
+ def format_kadi_apy_library_context(docs):
299
+ doc_context_list = []
300
+
301
+ for doc in docs:
302
+ # Extract metadata information
303
+ class_info = doc.metadata.get("class", "Unknown Class")
304
+ type_info = doc.metadata.get("type", "Unknown Type")
305
+ source_info = doc.metadata.get("source", "Unknown Type")
306
+ # Format metadata and document content
307
+ formatted_doc = f"# source: {source_info}\n# class: {class_info}\n# type: {type_info}\n{doc.page_content}\n\n\n"
308
+ doc_context_list.append(formatted_doc)
309
 
310
+ # Join all formatted document contexts
311
+ doc_context = "".join(doc_context_list)
312
 
313
  return doc_context
314
 
315
+
316
+ def format_kadi_api_doc_context(docs):
317
+ doc_context_list = []
318
+
319
+ for doc in docs
320
+ source_info = doc.metadata.get("source", "Unknown Type")
321
+ formatted_doc = f"# source: {source_info}\n{doc.page_content}\n\n\n"
322
+ doc_context_list.append(formatted_doc)
323
+
324
+ return doc_context
325
+
326
+
327
+
328
  def rag_workflow(query):
329
 
330
+ # retrieved_doc_chunks = retrieve_from_vectorstore (docstore, query, k=5)
331
+ # retrieved_code_chunks = retrieve_from_vectorstore(codestore, query, k=5)
332
 
333
  # docs = retrieve_docs_from_vectorstore(docstore, query, k=5)
334
+
335
+
336
 
337
+ kadi_apy_docs = retrieve_within_kadiApy_docs (docstore, query, k = 5)
338
+ kadi_apy_library_docs = retrieve_within_kadiApy_library (docstore, query, k = 10)
339
+
340
+ doc_context = format_kadi_api_doc_context(kadi_apy_docs)
341
+ code_context = format_kadi_apy_library_context(kadi_apy_library_docs)
342
 
343
+
 
344
  print(doc_context)
345
  print(code_context)
346
+
347
+
348
 
349
  # print("Document Chunks:\n")
350
  # print("\n\n".join(["="*80 + "\n" + doc_chunk for doc_chunk, _ in retrieved_doc_chunks]))