Spaces:

Didier
/

Docs_QA_ColBERT_DSPy

Running

Didier Guillevic commited on Dec 22, 2024

Commit

bebe878

1 Parent(s): 1c18375

Add missing function

Files changed (1) hide show

app.py CHANGED Viewed

@@ -23,6 +23,41 @@ import warnings
 warnings.filterwarnings('ignore')
 def generate_response(question: str) -> list[str, str, str]:
     """Generate a response to a given question using the RAG model.

 warnings.filterwarnings('ignore')
+dspy_rag_model = None
+def build_rag_model(files: list[str]) -> str:
+    """Build a retrieval augmented model using given files to index.
+    """
+    global dspy_rag_model
+    # Get the text from the pdf files
+    documents = []
+    metadatas = []
+    for pdf_file in files:
+        logger.info(f"Processing {pdf_file}")
+        metadata = pdf_utils.get_metadata_info(pdf_file)
+        text = pdf_utils.get_text_from_pdf(pdf_file)
+        if text:
+            documents.append(text)
+            metadatas.append(metadata)
+    # Build the ColBERT retrieval model
+    colbert_base_model = 'antoinelouis/colbert-xm' # multilingual model
+    colbert_index_name = 'OECD_HNW' # for web app, generate unique name with uuid.uuid4()
+    retrieval_model = colbert_utils.build_colbert_model(
+        documents,
+        metadatas,
+        pretrained_model=colbert_base_model,
+        index_name=colbert_index_name
+    )
+    # Instanatiate the DSPy based RAG model
+    dspy_rag_model = dspy_utils.DSPyRagModel(retrieval_model)
+    return "Done building RAG model."
 def generate_response(question: str) -> list[str, str, str]:
     """Generate a response to a given question using the RAG model.