Spaces:
Sleeping
Sleeping
Commit
·
e0aff18
1
Parent(s):
78dd8e8
chore: format & linting + __init__ + fix: imports
Browse files- medrag_multi_modal/__init__.py +15 -0
- medrag_multi_modal/document_loader/__init__.py +10 -2
- medrag_multi_modal/document_loader/load_image.py +3 -3
- medrag_multi_modal/document_loader/load_text_image.py +2 -2
- medrag_multi_modal/document_loader/text_loader/__init__.py +5 -1
- medrag_multi_modal/document_loader/text_loader/pymupdf4llm_text_loader.py +1 -1
- medrag_multi_modal/retrieval/multi_modal_retrieval.py +6 -6
medrag_multi_modal/__init__.py
CHANGED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .document_loader import (
|
2 |
+
ImageLoader,
|
3 |
+
MarkerTextLoader,
|
4 |
+
PyMuPDF4LLMTextLoader,
|
5 |
+
TextImageLoader,
|
6 |
+
)
|
7 |
+
from .retrieval import MultiModalRetriever
|
8 |
+
|
9 |
+
__all__ = [
|
10 |
+
"PyMuPDF4LLMTextLoader",
|
11 |
+
"MarkerTextLoader",
|
12 |
+
"ImageLoader",
|
13 |
+
"TextImageLoader",
|
14 |
+
"MultiModalRetriever",
|
15 |
+
]
|
medrag_multi_modal/document_loader/__init__.py
CHANGED
@@ -1,5 +1,13 @@
|
|
1 |
from .load_image import ImageLoader
|
2 |
-
from .load_text import TextLoader
|
3 |
from .load_text_image import TextImageLoader
|
|
|
|
|
|
|
|
|
4 |
|
5 |
-
__all__ = [
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from .load_image import ImageLoader
|
|
|
2 |
from .load_text_image import TextImageLoader
|
3 |
+
from .text_loader import (
|
4 |
+
MarkerTextLoader,
|
5 |
+
PyMuPDF4LLMTextLoader,
|
6 |
+
)
|
7 |
|
8 |
+
__all__ = [
|
9 |
+
"PyMuPDF4LLMTextLoader",
|
10 |
+
"MarkerTextLoader",
|
11 |
+
"ImageLoader",
|
12 |
+
"TextImageLoader",
|
13 |
+
]
|
medrag_multi_modal/document_loader/load_image.py
CHANGED
@@ -3,15 +3,15 @@ import os
|
|
3 |
from typing import Optional
|
4 |
|
5 |
import rich
|
|
|
6 |
import weave
|
7 |
from pdf2image.pdf2image import convert_from_path
|
8 |
from PIL import Image
|
9 |
|
10 |
-
import
|
11 |
-
from medrag_multi_modal.document_loader.load_text import TextLoader
|
12 |
|
13 |
|
14 |
-
class ImageLoader(
|
15 |
"""
|
16 |
`ImageLoader` is a class that extends the `TextLoader` class to handle the extraction and
|
17 |
loading of pages from a PDF file as images.
|
|
|
3 |
from typing import Optional
|
4 |
|
5 |
import rich
|
6 |
+
import wandb
|
7 |
import weave
|
8 |
from pdf2image.pdf2image import convert_from_path
|
9 |
from PIL import Image
|
10 |
|
11 |
+
from medrag_multi_modal.document_loader.text_loader import PyMuPDF4LLMTextLoader
|
|
|
12 |
|
13 |
|
14 |
+
class ImageLoader(PyMuPDF4LLMTextLoader):
|
15 |
"""
|
16 |
`ImageLoader` is a class that extends the `TextLoader` class to handle the extraction and
|
17 |
loading of pages from a PDF file as images.
|
medrag_multi_modal/document_loader/load_text_image.py
CHANGED
@@ -8,10 +8,10 @@ import rich
|
|
8 |
import weave
|
9 |
from PIL import Image
|
10 |
|
11 |
-
from medrag_multi_modal.document_loader.
|
12 |
|
13 |
|
14 |
-
class TextImageLoader(
|
15 |
"""
|
16 |
A class for loading and processing text and images from a document.
|
17 |
|
|
|
8 |
import weave
|
9 |
from PIL import Image
|
10 |
|
11 |
+
from medrag_multi_modal.document_loader.text_loader import PyMuPDF4LLMTextLoader
|
12 |
|
13 |
|
14 |
+
class TextImageLoader(PyMuPDF4LLMTextLoader):
|
15 |
"""
|
16 |
A class for loading and processing text and images from a document.
|
17 |
|
medrag_multi_modal/document_loader/text_loader/__init__.py
CHANGED
@@ -1,3 +1,7 @@
|
|
|
|
1 |
from .pymupdf4llm_text_loader import PyMuPDF4LLMTextLoader
|
2 |
|
3 |
-
__all__ = [
|
|
|
|
|
|
|
|
1 |
+
from .marker_text_loader import MarkerTextLoader
|
2 |
from .pymupdf4llm_text_loader import PyMuPDF4LLMTextLoader
|
3 |
|
4 |
+
__all__ = [
|
5 |
+
"PyMuPDF4LLMTextLoader",
|
6 |
+
"MarkerTextLoader",
|
7 |
+
]
|
medrag_multi_modal/document_loader/text_loader/pymupdf4llm_text_loader.py
CHANGED
@@ -11,7 +11,7 @@ class PyMuPDF4LLMTextLoader(BaseTextLoader):
|
|
11 |
processing it into markdown using `pymupdf4llm`, and optionally publishing it to a Weave dataset.
|
12 |
|
13 |
This class extends the BaseTextLoader and implements the abstract methods to load and process pages from a PDF file.
|
14 |
-
|
15 |
This class will handle the downloading of a PDF file from a given URL if it does not already exist locally.
|
16 |
It uses PyPDF2 to read the PDF and pymupdf4llm to convert pages to markdown. The processed pages are stored in a list
|
17 |
of Page objects, which can be optionally published to a Weave dataset.
|
|
|
11 |
processing it into markdown using `pymupdf4llm`, and optionally publishing it to a Weave dataset.
|
12 |
|
13 |
This class extends the BaseTextLoader and implements the abstract methods to load and process pages from a PDF file.
|
14 |
+
|
15 |
This class will handle the downloading of a PDF file from a given URL if it does not already exist locally.
|
16 |
It uses PyPDF2 to read the PDF and pymupdf4llm to convert pages to markdown. The processed pages are stored in a list
|
17 |
of Page objects, which can be optionally published to a Weave dataset.
|
medrag_multi_modal/retrieval/multi_modal_retrieval.py
CHANGED
@@ -1,23 +1,22 @@
|
|
1 |
import os
|
2 |
from typing import Any, Optional
|
3 |
|
|
|
4 |
import weave
|
5 |
from byaldi import RAGMultiModalModel
|
6 |
from PIL import Image
|
7 |
|
8 |
-
import wandb
|
9 |
-
|
10 |
from ..utils import get_wandb_artifact
|
11 |
|
12 |
|
13 |
class MultiModalRetriever(weave.Model):
|
14 |
"""
|
15 |
MultiModalRetriever is a class that facilitates the retrieval of page images using ColPali.
|
16 |
-
|
17 |
This class leverages the `byaldi.RAGMultiModalModel` to perform document retrieval tasks.
|
18 |
It can be initialized with a pre-trained model or from a specified W&B artifact. The class
|
19 |
also provides methods to index new data and to predict/retrieve documents based on a query.
|
20 |
-
|
21 |
!!! example "Indexing Data"
|
22 |
```python
|
23 |
import wandb
|
@@ -31,14 +30,14 @@ class MultiModalRetriever(weave.Model):
|
|
31 |
index_name="grays-anatomy",
|
32 |
)
|
33 |
```
|
34 |
-
|
35 |
!!! example "Retrieving Documents"
|
36 |
```python
|
37 |
import weave
|
38 |
|
39 |
import wandb
|
40 |
from medrag_multi_modal.retrieval import MultiModalRetriever
|
41 |
-
|
42 |
weave.init(project_name="ml-colabs/medrag-multi-modal")
|
43 |
retriever = MultiModalRetriever.from_artifact(
|
44 |
index_artifact_name="ml-colabs/medrag-multi-modal/grays-anatomy:v0",
|
@@ -54,6 +53,7 @@ class MultiModalRetriever(weave.Model):
|
|
54 |
Attributes:
|
55 |
model_name (str): The name of the model to be used for retrieval.
|
56 |
"""
|
|
|
57 |
model_name: str
|
58 |
_docs_retrieval_model: Optional[RAGMultiModalModel] = None
|
59 |
_metadata: Optional[dict] = None
|
|
|
1 |
import os
|
2 |
from typing import Any, Optional
|
3 |
|
4 |
+
import wandb
|
5 |
import weave
|
6 |
from byaldi import RAGMultiModalModel
|
7 |
from PIL import Image
|
8 |
|
|
|
|
|
9 |
from ..utils import get_wandb_artifact
|
10 |
|
11 |
|
12 |
class MultiModalRetriever(weave.Model):
|
13 |
"""
|
14 |
MultiModalRetriever is a class that facilitates the retrieval of page images using ColPali.
|
15 |
+
|
16 |
This class leverages the `byaldi.RAGMultiModalModel` to perform document retrieval tasks.
|
17 |
It can be initialized with a pre-trained model or from a specified W&B artifact. The class
|
18 |
also provides methods to index new data and to predict/retrieve documents based on a query.
|
19 |
+
|
20 |
!!! example "Indexing Data"
|
21 |
```python
|
22 |
import wandb
|
|
|
30 |
index_name="grays-anatomy",
|
31 |
)
|
32 |
```
|
33 |
+
|
34 |
!!! example "Retrieving Documents"
|
35 |
```python
|
36 |
import weave
|
37 |
|
38 |
import wandb
|
39 |
from medrag_multi_modal.retrieval import MultiModalRetriever
|
40 |
+
|
41 |
weave.init(project_name="ml-colabs/medrag-multi-modal")
|
42 |
retriever = MultiModalRetriever.from_artifact(
|
43 |
index_artifact_name="ml-colabs/medrag-multi-modal/grays-anatomy:v0",
|
|
|
53 |
Attributes:
|
54 |
model_name (str): The name of the model to be used for retrieval.
|
55 |
"""
|
56 |
+
|
57 |
model_name: str
|
58 |
_docs_retrieval_model: Optional[RAGMultiModalModel] = None
|
59 |
_metadata: Optional[dict] = None
|