Spaces:

geekyrakshit
/

medrag

Runtime error

mratanusarkar commited on Oct 17, 2024

Commit

e0aff18

1 Parent(s): 78dd8e8

chore: format & linting + init + fix: imports

Files changed (7) hide show

medrag_multi_modal/__init__.py CHANGED Viewed

+from .document_loader import (
+    ImageLoader,
+    MarkerTextLoader,
+    PyMuPDF4LLMTextLoader,
+    TextImageLoader,
+)
+from .retrieval import MultiModalRetriever
+__all__ = [
+    "PyMuPDF4LLMTextLoader",
+    "MarkerTextLoader",
+    "ImageLoader",
+    "TextImageLoader",
+    "MultiModalRetriever",
+]

medrag_multi_modal/document_loader/__init__.py CHANGED Viewed

@@ -1,5 +1,13 @@
 from .load_image import ImageLoader
-from .load_text import TextLoader
 from .load_text_image import TextImageLoader
-__all__ = ["TextLoader", "TextImageLoader", "ImageLoader"]

 from .load_image import ImageLoader
 from .load_text_image import TextImageLoader
+from .text_loader import (
+    MarkerTextLoader,
+    PyMuPDF4LLMTextLoader,
+)
+__all__ = [
+    "PyMuPDF4LLMTextLoader",
+    "MarkerTextLoader",
+    "ImageLoader",
+    "TextImageLoader",
+]

medrag_multi_modal/document_loader/load_image.py CHANGED Viewed

@@ -3,15 +3,15 @@ import os
 from typing import Optional
 import rich
 import weave
 from pdf2image.pdf2image import convert_from_path
 from PIL import Image
-import wandb
-from medrag_multi_modal.document_loader.load_text import TextLoader
-class ImageLoader(TextLoader):
     """
     `ImageLoader` is a class that extends the `TextLoader` class to handle the extraction and
     loading of pages from a PDF file as images.

 from typing import Optional
 import rich
+import wandb
 import weave
 from pdf2image.pdf2image import convert_from_path
 from PIL import Image
+from medrag_multi_modal.document_loader.text_loader import PyMuPDF4LLMTextLoader
+class ImageLoader(PyMuPDF4LLMTextLoader):
     """
     `ImageLoader` is a class that extends the `TextLoader` class to handle the extraction and
     loading of pages from a PDF file as images.

medrag_multi_modal/document_loader/load_text_image.py CHANGED Viewed

@@ -8,10 +8,10 @@ import rich
 import weave
 from PIL import Image
-from medrag_multi_modal.document_loader.load_text import TextLoader
-class TextImageLoader(TextLoader):
     """
     A class for loading and processing text and images from a document.

 import weave
 from PIL import Image
+from medrag_multi_modal.document_loader.text_loader import PyMuPDF4LLMTextLoader
+class TextImageLoader(PyMuPDF4LLMTextLoader):
     """
     A class for loading and processing text and images from a document.

medrag_multi_modal/document_loader/text_loader/__init__.py CHANGED Viewed

@@ -1,3 +1,7 @@
 from .pymupdf4llm_text_loader import PyMuPDF4LLMTextLoader
-__all__ = ["PyMuPDF4LLMTextLoader"]

+from .marker_text_loader import MarkerTextLoader
 from .pymupdf4llm_text_loader import PyMuPDF4LLMTextLoader
+__all__ = [
+    "PyMuPDF4LLMTextLoader",
+    "MarkerTextLoader",
+]

medrag_multi_modal/document_loader/text_loader/pymupdf4llm_text_loader.py CHANGED Viewed

@@ -11,7 +11,7 @@ class PyMuPDF4LLMTextLoader(BaseTextLoader):
     processing it into markdown using `pymupdf4llm`, and optionally publishing it to a Weave dataset.
     This class extends the BaseTextLoader and implements the abstract methods to load and process pages from a PDF file.
     This class will handle the downloading of a PDF file from a given URL if it does not already exist locally.
     It uses PyPDF2 to read the PDF and pymupdf4llm to convert pages to markdown. The processed pages are stored in a list
     of Page objects, which can be optionally published to a Weave dataset.

     processing it into markdown using `pymupdf4llm`, and optionally publishing it to a Weave dataset.
     This class extends the BaseTextLoader and implements the abstract methods to load and process pages from a PDF file.
     This class will handle the downloading of a PDF file from a given URL if it does not already exist locally.
     It uses PyPDF2 to read the PDF and pymupdf4llm to convert pages to markdown. The processed pages are stored in a list
     of Page objects, which can be optionally published to a Weave dataset.

medrag_multi_modal/retrieval/multi_modal_retrieval.py CHANGED Viewed

@@ -1,23 +1,22 @@
 import os
 from typing import Any, Optional
 import weave
 from byaldi import RAGMultiModalModel
 from PIL import Image
-import wandb
 from ..utils import get_wandb_artifact
 class MultiModalRetriever(weave.Model):
     """
     MultiModalRetriever is a class that facilitates the retrieval of page images using ColPali.
     This class leverages the `byaldi.RAGMultiModalModel` to perform document retrieval tasks.
     It can be initialized with a pre-trained model or from a specified W&B artifact. The class
     also provides methods to index new data and to predict/retrieve documents based on a query.
     !!! example "Indexing Data"
         ```python
         import wandb
@@ -31,14 +30,14 @@ class MultiModalRetriever(weave.Model):
             index_name="grays-anatomy",
         )
         ```
     !!! example "Retrieving Documents"
         ```python
         import weave
         import wandb
         from medrag_multi_modal.retrieval import MultiModalRetriever
         weave.init(project_name="ml-colabs/medrag-multi-modal")
         retriever = MultiModalRetriever.from_artifact(
             index_artifact_name="ml-colabs/medrag-multi-modal/grays-anatomy:v0",
@@ -54,6 +53,7 @@ class MultiModalRetriever(weave.Model):
     Attributes:
         model_name (str): The name of the model to be used for retrieval.
     """
     model_name: str
     _docs_retrieval_model: Optional[RAGMultiModalModel] = None
     _metadata: Optional[dict] = None

 import os
 from typing import Any, Optional
+import wandb
 import weave
 from byaldi import RAGMultiModalModel
 from PIL import Image
 from ..utils import get_wandb_artifact
 class MultiModalRetriever(weave.Model):
     """
     MultiModalRetriever is a class that facilitates the retrieval of page images using ColPali.
     This class leverages the `byaldi.RAGMultiModalModel` to perform document retrieval tasks.
     It can be initialized with a pre-trained model or from a specified W&B artifact. The class
     also provides methods to index new data and to predict/retrieve documents based on a query.
     !!! example "Indexing Data"
         ```python
         import wandb
             index_name="grays-anatomy",
         )
         ```
     !!! example "Retrieving Documents"
         ```python
         import weave
         import wandb
         from medrag_multi_modal.retrieval import MultiModalRetriever
         weave.init(project_name="ml-colabs/medrag-multi-modal")
         retriever = MultiModalRetriever.from_artifact(
             index_artifact_name="ml-colabs/medrag-multi-modal/grays-anatomy:v0",
     Attributes:
         model_name (str): The name of the model to be used for retrieval.
     """
     model_name: str
     _docs_retrieval_model: Optional[RAGMultiModalModel] = None
     _metadata: Optional[dict] = None