mratanusarkar commited on
Commit
bf0f2e5
·
1 Parent(s): 331f289

add: example usage for marker and pdf2img loaders

Browse files
medrag_multi_modal/document_loader/image_loader/marker_img_loader.py CHANGED
@@ -15,6 +15,33 @@ class MarkerImageLoader(BaseImageLoader):
15
  This class provides functionality to extract images from a PDF file using marker library,
16
  and optionally publish these images to a WandB artifact.
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  Args:
19
  url (str): The URL of the PDF document.
20
  document_name (str): The name of the document.
 
15
  This class provides functionality to extract images from a PDF file using marker library,
16
  and optionally publish these images to a WandB artifact.
17
 
18
+ !!! example "Example Usage"
19
+ ```python
20
+ import asyncio
21
+
22
+ import weave
23
+
24
+ import wandb
25
+ from medrag_multi_modal.document_loader.image_loader import MarkerImageLoader
26
+
27
+ weave.init(project_name="ml-colabs/medrag-multi-modal")
28
+ wandb.init(project="medrag-multi-modal", entity="ml-colabs")
29
+ url = "https://archive.org/download/GraysAnatomy41E2015PDF/Grays%20Anatomy-41%20E%20%282015%29%20%5BPDF%5D.pdf"
30
+ loader = MarkerImageLoader(
31
+ url=url,
32
+ document_name="Gray's Anatomy",
33
+ document_file_path="grays_anatomy.pdf",
34
+ )
35
+ asyncio.run(
36
+ loader.load_data(
37
+ start_page=31,
38
+ end_page=36,
39
+ wandb_artifact_name="grays-anatomy-images",
40
+ cleanup=False,
41
+ )
42
+ )
43
+ ```
44
+
45
  Args:
46
  url (str): The URL of the PDF document.
47
  document_name (str): The name of the document.
medrag_multi_modal/document_loader/image_loader/pdf2image_img_loader.py CHANGED
@@ -15,6 +15,33 @@ class PDF2ImageLoader(BaseImageLoader):
15
  and optionally publish these images to a WandB artifact.
16
  It is like a snapshot image version of each of the pages from the PDF.
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  Args:
19
  url (str): The URL of the PDF document.
20
  document_name (str): The name of the document.
 
15
  and optionally publish these images to a WandB artifact.
16
  It is like a snapshot image version of each of the pages from the PDF.
17
 
18
+ !!! example "Example Usage"
19
+ ```python
20
+ import asyncio
21
+
22
+ import weave
23
+
24
+ import wandb
25
+ from medrag_multi_modal.document_loader.image_loader import PDF2ImageLoader
26
+
27
+ weave.init(project_name="ml-colabs/medrag-multi-modal")
28
+ wandb.init(project="medrag-multi-modal", entity="ml-colabs")
29
+ url = "https://archive.org/download/GraysAnatomy41E2015PDF/Grays%20Anatomy-41%20E%20%282015%29%20%5BPDF%5D.pdf"
30
+ loader = PDF2ImageLoader(
31
+ url=url,
32
+ document_name="Gray's Anatomy",
33
+ document_file_path="grays_anatomy.pdf",
34
+ )
35
+ asyncio.run(
36
+ loader.load_data(
37
+ start_page=31,
38
+ end_page=36,
39
+ wandb_artifact_name="grays-anatomy-images",
40
+ cleanup=False,
41
+ )
42
+ )
43
+ ```
44
+
45
  Args:
46
  url (str): The URL of the PDF document.
47
  document_name (str): The name of the document.