Spaces:

geekyrakshit
/

medrag

Runtime error

App Files Files Community

geekyrakshit commited on Oct 24, 2024

Commit

6c6905f

1 Parent(s): e197ad0

update: MedQAAssistant + FigureAnnotatorFromPageImage

Browse files

Files changed (2) hide show

medrag_multi_modal/assistant/figure_annotation.py +35 -18
medrag_multi_modal/assistant/medqa_assistant.py +71 -13

medrag_multi_modal/assistant/figure_annotation.py CHANGED Viewed

@@ -1,12 +1,11 @@
 import os
 from glob import glob
-from typing import Union
 import cv2
 import weave
 from PIL import Image
 from pydantic import BaseModel
-from rich.progress import track
 from ..utils import get_wandb_artifact, read_jsonl_file
 from .llm_client import LLMClient
@@ -23,7 +22,8 @@ class FigureAnnotations(BaseModel):
 class FigureAnnotatorFromPageImage(weave.Model):
     """
-    `FigureAnnotatorFromPageImage` is a class that leverages two LLM clients to annotate figures from a page image of a scientific textbook.
     !!! example "Example Usage"
         ```python
@@ -39,19 +39,35 @@ class FigureAnnotatorFromPageImage(weave.Model):
         figure_annotator = FigureAnnotatorFromPageImage(
             figure_extraction_llm_client=LLMClient(model_name="pixtral-12b-2409"),
             structured_output_llm_client=LLMClient(model_name="gpt-4o"),
         )
-        annotations = figure_annotator.predict(
-            image_artifact_address="ml-colabs/medrag-multi-modal/grays-anatomy-images-marker:v6"
-        )
         ```
-    Attributes:
-        figure_extraction_llm_client (LLMClient): An LLM client used to extract figure annotations from the page image.
-        structured_output_llm_client (LLMClient): An LLM client used to convert the extracted annotations into a structured format.
     """
     figure_extraction_llm_client: LLMClient
     structured_output_llm_client: LLMClient
     @weave.op()
     def annotate_figures(
@@ -92,7 +108,7 @@ Here are some clues you need to follow:
         )
     @weave.op()
-    def predict(self, page_idx: int, image_artifact_address: str):
         """
         Predicts figure annotations for a specific page in a document.
@@ -105,22 +121,23 @@ Here are some clues you need to follow:
         Args:
             page_idx (int): The index of the page to annotate.
-            image_artifact_address (str): The address of the image artifact containing the page images.
         Returns:
-            dict: A dictionary containing the page index as the key and the extracted figure annotations
-                  as the value.
         """
-        artifact_dir = get_wandb_artifact(image_artifact_address, "dataset")
-        metadata = read_jsonl_file(os.path.join(artifact_dir, "metadata.jsonl"))
         annotations = {}
-        for item in track(metadata, description="Annotating images:"):
             if item["page_idx"] == page_idx:
                 page_image_file = os.path.join(
-                    artifact_dir, f"page{item['page_idx']}.png"
                 )
                 figure_image_files = glob(
-                    os.path.join(artifact_dir, f"page{item['page_idx']}_fig*.png")
                 )
                 if len(figure_image_files) > 0:
                     page_image = cv2.imread(page_image_file)

 import os
 from glob import glob
+from typing import Optional, Union
 import cv2
 import weave
 from PIL import Image
 from pydantic import BaseModel
 from ..utils import get_wandb_artifact, read_jsonl_file
 from .llm_client import LLMClient
 class FigureAnnotatorFromPageImage(weave.Model):
     """
+    `FigureAnnotatorFromPageImage` is a class that leverages two LLM clients to annotate
+    figures from a page image of a scientific textbook.
     !!! example "Example Usage"
         ```python
         figure_annotator = FigureAnnotatorFromPageImage(
             figure_extraction_llm_client=LLMClient(model_name="pixtral-12b-2409"),
             structured_output_llm_client=LLMClient(model_name="gpt-4o"),
+            image_artifact_address="ml-colabs/medrag-multi-modal/grays-anatomy-images-marker:v6",
         )
+        annotations = figure_annotator.predict(page_idx=34)
         ```
+    Args:
+        figure_extraction_llm_client (LLMClient): An LLM client used to extract figure annotations
+            from the page image.
+        structured_output_llm_client (LLMClient): An LLM client used to convert the extracted
+            annotations into a structured format.
+        image_artifact_address (Optional[str]): The address of the image artifact containing the
+            page images.
     """
     figure_extraction_llm_client: LLMClient
     structured_output_llm_client: LLMClient
+    _artifact_dir: str
+    def __init__(
+        self,
+        figure_extraction_llm_client: LLMClient,
+        structured_output_llm_client: LLMClient,
+        image_artifact_address: Optional[str] = None,
+    ):
+        super().__init__(
+            figure_extraction_llm_client=figure_extraction_llm_client,
+            structured_output_llm_client=structured_output_llm_client,
+        )
+        self._artifact_dir = get_wandb_artifact(image_artifact_address, "dataset")
     @weave.op()
     def annotate_figures(
         )
     @weave.op()
+    def predict(self, page_idx: int) -> dict[int, list[FigureAnnotation]]:
         """
         Predicts figure annotations for a specific page in a document.
         Args:
             page_idx (int): The index of the page to annotate.
+            image_artifact_address (str): The address of the image artifact containing the
+                page images.
         Returns:
+            dict: A dictionary containing the page index as the key and the extracted figure
+                annotations as the value.
         """
+        metadata = read_jsonl_file(os.path.join(self._artifact_dir, "metadata.jsonl"))
         annotations = {}
+        for item in metadata:
             if item["page_idx"] == page_idx:
                 page_image_file = os.path.join(
+                    self._artifact_dir, f"page{item['page_idx']}.png"
                 )
                 figure_image_files = glob(
+                    os.path.join(self._artifact_dir, f"page{item['page_idx']}_fig*.png")
                 )
                 if len(figure_image_files) > 0:
                     page_image = cv2.imread(page_image_file)

medrag_multi_modal/assistant/medqa_assistant.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from typing import Optional
 import weave
 from ..retrieval import SimilarityMetric
@@ -8,7 +6,50 @@ from .llm_client import LLMClient
 class MedQAAssistant(weave.Model):
-    """Cuming"""
     llm_client: LLMClient
     retriever: weave.Model
@@ -17,7 +58,25 @@ class MedQAAssistant(weave.Model):
     retrieval_similarity_metric: SimilarityMetric = SimilarityMetric.COSINE
     @weave.op()
-    def predict(self, query: str, image_artifact_address: Optional[str] = None) -> str:
         retrieved_chunks = self.retriever.predict(
             query, top_k=self.top_k_chunks, metric=self.retrieval_similarity_metric
         )
@@ -29,14 +88,13 @@ class MedQAAssistant(weave.Model):
             page_indices.add(int(chunk["page_idx"]))
         figure_descriptions = []
-        if image_artifact_address is not None:
-            for page_idx in page_indices:
-                figure_annotations = self.figure_annotator.predict(
-                    page_idx=page_idx, image_artifact_address=image_artifact_address
-                )
-                figure_descriptions += [
-                    item["figure_description"] for item in figure_annotations[page_idx]
-                ]
         system_prompt = """
         You are an expert in medical science. You are given a query and a list of chunks from a medical document.
@@ -46,5 +104,5 @@ class MedQAAssistant(weave.Model):
             user_prompt=[query, *retrieved_chunk_texts, *figure_descriptions],
         )
         page_numbers = ", ".join([str(int(page_idx) + 1) for page_idx in page_indices])
-        response += f"\n\n**Source:** {'Pages' if len(page_numbers) > 1 else 'Page'} {page_numbers} from Gray's Anatomy"
         return response

 import weave
 from ..retrieval import SimilarityMetric
 class MedQAAssistant(weave.Model):
+    """
+    `MedQAAssistant` is a class designed to assist with medical queries by leveraging a
+    language model client, a retriever model, and a figure annotator.
+    !!! example "Usage Example"
+        ```python
+        import weave
+        from dotenv import load_dotenv
+        from medrag_multi_modal.assistant import (
+            FigureAnnotatorFromPageImage,
+            LLMClient,
+            MedQAAssistant,
+        )
+        from medrag_multi_modal.retrieval import MedCPTRetriever
+        load_dotenv()
+        weave.init(project_name="ml-colabs/medrag-multi-modal")
+        llm_client = LLMClient(model_name="gemini-1.5-flash")
+        retriever=MedCPTRetriever.from_wandb_artifact(
+            chunk_dataset_name="grays-anatomy-chunks:v0",
+            index_artifact_address="ml-colabs/medrag-multi-modal/grays-anatomy-medcpt:v0",
+        )
+        figure_annotator=FigureAnnotatorFromPageImage(
+            figure_extraction_llm_client=LLMClient(model_name="pixtral-12b-2409"),
+            structured_output_llm_client=LLMClient(model_name="gpt-4o"),
+            image_artifact_address="ml-colabs/medrag-multi-modal/grays-anatomy-images-marker:v6",
+        )
+        medqa_assistant = MedQAAssistant(
+            llm_client=llm_client, retriever=retriever, figure_annotator=figure_annotator
+        )
+        medqa_assistant.predict(query="What is ribosome?")
+        ```
+    Args:
+        llm_client (LLMClient): The language model client used to generate responses.
+        retriever (weave.Model): The model used to retrieve relevant chunks of text from a medical document.
+        figure_annotator (FigureAnnotatorFromPageImage): The annotator used to extract figure descriptions from pages.
+        top_k_chunks (int): The number of top chunks to retrieve based on similarity metric.
+        retrieval_similarity_metric (SimilarityMetric): The metric used to measure similarity for retrieval.
+    """
     llm_client: LLMClient
     retriever: weave.Model
     retrieval_similarity_metric: SimilarityMetric = SimilarityMetric.COSINE
     @weave.op()
+    def predict(self, query: str) -> str:
+        """
+        Generates a response to a medical query by retrieving relevant text chunks and figure descriptions
+        from a medical document and using a language model to generate the final response.
+        This function performs the following steps:
+        1. Retrieves relevant text chunks from the medical document based on the query using the retriever model.
+        2. Extracts the text and page indices from the retrieved chunks.
+        3. Retrieves figure descriptions from the pages identified in the previous step using the figure annotator.
+        4. Constructs a system prompt and user prompt combining the query, retrieved text chunks, and figure descriptions.
+        5. Uses the language model client to generate a response based on the constructed prompts.
+        6. Appends the source information (page numbers) to the generated response.
+        Args:
+            query (str): The medical query to be answered.
+        Returns:
+            str: The generated response to the query, including source information.
+        """
         retrieved_chunks = self.retriever.predict(
             query, top_k=self.top_k_chunks, metric=self.retrieval_similarity_metric
         )
             page_indices.add(int(chunk["page_idx"]))
         figure_descriptions = []
+        for page_idx in page_indices:
+            figure_annotations = self.figure_annotator.predict(page_idx=page_idx)[
+                page_idx
+            ]
+            figure_descriptions += [
+                item["figure_description"] for item in figure_annotations
+            ]
         system_prompt = """
         You are an expert in medical science. You are given a query and a list of chunks from a medical document.
             user_prompt=[query, *retrieved_chunk_texts, *figure_descriptions],
         )
         page_numbers = ", ".join([str(int(page_idx) + 1) for page_idx in page_indices])
+        response += f"\n\n**Source:** {'Pages' if len(page_indices) > 1 else 'Page'} {page_numbers} from Gray's Anatomy"
         return response