Spaces:
Sleeping
Sleeping
Didier Guillevic
commited on
Commit
·
bebe878
1
Parent(s):
1c18375
Add missing function
Browse files
app.py
CHANGED
@@ -23,6 +23,41 @@ import warnings
|
|
23 |
warnings.filterwarnings('ignore')
|
24 |
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
def generate_response(question: str) -> list[str, str, str]:
|
27 |
"""Generate a response to a given question using the RAG model.
|
28 |
|
|
|
23 |
warnings.filterwarnings('ignore')
|
24 |
|
25 |
|
26 |
+
dspy_rag_model = None
|
27 |
+
|
28 |
+
def build_rag_model(files: list[str]) -> str:
|
29 |
+
"""Build a retrieval augmented model using given files to index.
|
30 |
+
|
31 |
+
"""
|
32 |
+
global dspy_rag_model
|
33 |
+
|
34 |
+
# Get the text from the pdf files
|
35 |
+
documents = []
|
36 |
+
metadatas = []
|
37 |
+
for pdf_file in files:
|
38 |
+
logger.info(f"Processing {pdf_file}")
|
39 |
+
metadata = pdf_utils.get_metadata_info(pdf_file)
|
40 |
+
text = pdf_utils.get_text_from_pdf(pdf_file)
|
41 |
+
if text:
|
42 |
+
documents.append(text)
|
43 |
+
metadatas.append(metadata)
|
44 |
+
|
45 |
+
# Build the ColBERT retrieval model
|
46 |
+
colbert_base_model = 'antoinelouis/colbert-xm' # multilingual model
|
47 |
+
colbert_index_name = 'OECD_HNW' # for web app, generate unique name with uuid.uuid4()
|
48 |
+
retrieval_model = colbert_utils.build_colbert_model(
|
49 |
+
documents,
|
50 |
+
metadatas,
|
51 |
+
pretrained_model=colbert_base_model,
|
52 |
+
index_name=colbert_index_name
|
53 |
+
)
|
54 |
+
|
55 |
+
# Instanatiate the DSPy based RAG model
|
56 |
+
dspy_rag_model = dspy_utils.DSPyRagModel(retrieval_model)
|
57 |
+
|
58 |
+
return "Done building RAG model."
|
59 |
+
|
60 |
+
|
61 |
def generate_response(question: str) -> list[str, str, str]:
|
62 |
"""Generate a response to a given question using the RAG model.
|
63 |
|