mratanusarkar commited on
Commit
e0aff18
·
1 Parent(s): 78dd8e8

chore: format & linting + __init__ + fix: imports

Browse files
medrag_multi_modal/__init__.py CHANGED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .document_loader import (
2
+ ImageLoader,
3
+ MarkerTextLoader,
4
+ PyMuPDF4LLMTextLoader,
5
+ TextImageLoader,
6
+ )
7
+ from .retrieval import MultiModalRetriever
8
+
9
+ __all__ = [
10
+ "PyMuPDF4LLMTextLoader",
11
+ "MarkerTextLoader",
12
+ "ImageLoader",
13
+ "TextImageLoader",
14
+ "MultiModalRetriever",
15
+ ]
medrag_multi_modal/document_loader/__init__.py CHANGED
@@ -1,5 +1,13 @@
1
  from .load_image import ImageLoader
2
- from .load_text import TextLoader
3
  from .load_text_image import TextImageLoader
 
 
 
 
4
 
5
- __all__ = ["TextLoader", "TextImageLoader", "ImageLoader"]
 
 
 
 
 
 
1
  from .load_image import ImageLoader
 
2
  from .load_text_image import TextImageLoader
3
+ from .text_loader import (
4
+ MarkerTextLoader,
5
+ PyMuPDF4LLMTextLoader,
6
+ )
7
 
8
+ __all__ = [
9
+ "PyMuPDF4LLMTextLoader",
10
+ "MarkerTextLoader",
11
+ "ImageLoader",
12
+ "TextImageLoader",
13
+ ]
medrag_multi_modal/document_loader/load_image.py CHANGED
@@ -3,15 +3,15 @@ import os
3
  from typing import Optional
4
 
5
  import rich
 
6
  import weave
7
  from pdf2image.pdf2image import convert_from_path
8
  from PIL import Image
9
 
10
- import wandb
11
- from medrag_multi_modal.document_loader.load_text import TextLoader
12
 
13
 
14
- class ImageLoader(TextLoader):
15
  """
16
  `ImageLoader` is a class that extends the `TextLoader` class to handle the extraction and
17
  loading of pages from a PDF file as images.
 
3
  from typing import Optional
4
 
5
  import rich
6
+ import wandb
7
  import weave
8
  from pdf2image.pdf2image import convert_from_path
9
  from PIL import Image
10
 
11
+ from medrag_multi_modal.document_loader.text_loader import PyMuPDF4LLMTextLoader
 
12
 
13
 
14
+ class ImageLoader(PyMuPDF4LLMTextLoader):
15
  """
16
  `ImageLoader` is a class that extends the `TextLoader` class to handle the extraction and
17
  loading of pages from a PDF file as images.
medrag_multi_modal/document_loader/load_text_image.py CHANGED
@@ -8,10 +8,10 @@ import rich
8
  import weave
9
  from PIL import Image
10
 
11
- from medrag_multi_modal.document_loader.load_text import TextLoader
12
 
13
 
14
- class TextImageLoader(TextLoader):
15
  """
16
  A class for loading and processing text and images from a document.
17
 
 
8
  import weave
9
  from PIL import Image
10
 
11
+ from medrag_multi_modal.document_loader.text_loader import PyMuPDF4LLMTextLoader
12
 
13
 
14
+ class TextImageLoader(PyMuPDF4LLMTextLoader):
15
  """
16
  A class for loading and processing text and images from a document.
17
 
medrag_multi_modal/document_loader/text_loader/__init__.py CHANGED
@@ -1,3 +1,7 @@
 
1
  from .pymupdf4llm_text_loader import PyMuPDF4LLMTextLoader
2
 
3
- __all__ = ["PyMuPDF4LLMTextLoader"]
 
 
 
 
1
+ from .marker_text_loader import MarkerTextLoader
2
  from .pymupdf4llm_text_loader import PyMuPDF4LLMTextLoader
3
 
4
+ __all__ = [
5
+ "PyMuPDF4LLMTextLoader",
6
+ "MarkerTextLoader",
7
+ ]
medrag_multi_modal/document_loader/text_loader/pymupdf4llm_text_loader.py CHANGED
@@ -11,7 +11,7 @@ class PyMuPDF4LLMTextLoader(BaseTextLoader):
11
  processing it into markdown using `pymupdf4llm`, and optionally publishing it to a Weave dataset.
12
 
13
  This class extends the BaseTextLoader and implements the abstract methods to load and process pages from a PDF file.
14
-
15
  This class will handle the downloading of a PDF file from a given URL if it does not already exist locally.
16
  It uses PyPDF2 to read the PDF and pymupdf4llm to convert pages to markdown. The processed pages are stored in a list
17
  of Page objects, which can be optionally published to a Weave dataset.
 
11
  processing it into markdown using `pymupdf4llm`, and optionally publishing it to a Weave dataset.
12
 
13
  This class extends the BaseTextLoader and implements the abstract methods to load and process pages from a PDF file.
14
+
15
  This class will handle the downloading of a PDF file from a given URL if it does not already exist locally.
16
  It uses PyPDF2 to read the PDF and pymupdf4llm to convert pages to markdown. The processed pages are stored in a list
17
  of Page objects, which can be optionally published to a Weave dataset.
medrag_multi_modal/retrieval/multi_modal_retrieval.py CHANGED
@@ -1,23 +1,22 @@
1
  import os
2
  from typing import Any, Optional
3
 
 
4
  import weave
5
  from byaldi import RAGMultiModalModel
6
  from PIL import Image
7
 
8
- import wandb
9
-
10
  from ..utils import get_wandb_artifact
11
 
12
 
13
  class MultiModalRetriever(weave.Model):
14
  """
15
  MultiModalRetriever is a class that facilitates the retrieval of page images using ColPali.
16
-
17
  This class leverages the `byaldi.RAGMultiModalModel` to perform document retrieval tasks.
18
  It can be initialized with a pre-trained model or from a specified W&B artifact. The class
19
  also provides methods to index new data and to predict/retrieve documents based on a query.
20
-
21
  !!! example "Indexing Data"
22
  ```python
23
  import wandb
@@ -31,14 +30,14 @@ class MultiModalRetriever(weave.Model):
31
  index_name="grays-anatomy",
32
  )
33
  ```
34
-
35
  !!! example "Retrieving Documents"
36
  ```python
37
  import weave
38
 
39
  import wandb
40
  from medrag_multi_modal.retrieval import MultiModalRetriever
41
-
42
  weave.init(project_name="ml-colabs/medrag-multi-modal")
43
  retriever = MultiModalRetriever.from_artifact(
44
  index_artifact_name="ml-colabs/medrag-multi-modal/grays-anatomy:v0",
@@ -54,6 +53,7 @@ class MultiModalRetriever(weave.Model):
54
  Attributes:
55
  model_name (str): The name of the model to be used for retrieval.
56
  """
 
57
  model_name: str
58
  _docs_retrieval_model: Optional[RAGMultiModalModel] = None
59
  _metadata: Optional[dict] = None
 
1
  import os
2
  from typing import Any, Optional
3
 
4
+ import wandb
5
  import weave
6
  from byaldi import RAGMultiModalModel
7
  from PIL import Image
8
 
 
 
9
  from ..utils import get_wandb_artifact
10
 
11
 
12
  class MultiModalRetriever(weave.Model):
13
  """
14
  MultiModalRetriever is a class that facilitates the retrieval of page images using ColPali.
15
+
16
  This class leverages the `byaldi.RAGMultiModalModel` to perform document retrieval tasks.
17
  It can be initialized with a pre-trained model or from a specified W&B artifact. The class
18
  also provides methods to index new data and to predict/retrieve documents based on a query.
19
+
20
  !!! example "Indexing Data"
21
  ```python
22
  import wandb
 
30
  index_name="grays-anatomy",
31
  )
32
  ```
33
+
34
  !!! example "Retrieving Documents"
35
  ```python
36
  import weave
37
 
38
  import wandb
39
  from medrag_multi_modal.retrieval import MultiModalRetriever
40
+
41
  weave.init(project_name="ml-colabs/medrag-multi-modal")
42
  retriever = MultiModalRetriever.from_artifact(
43
  index_artifact_name="ml-colabs/medrag-multi-modal/grays-anatomy:v0",
 
53
  Attributes:
54
  model_name (str): The name of the model to be used for retrieval.
55
  """
56
+
57
  model_name: str
58
  _docs_retrieval_model: Optional[RAGMultiModalModel] = None
59
  _metadata: Optional[dict] = None