Spaces:

geekyrakshit
/

medrag

Runtime error

App Files Files Community

geekyrakshit commited on Oct 24, 2024

Commit

ceaeef3

1 Parent(s): 7934a8e

update: FigureAnnotatorFromPageImage

Browse files

Files changed (3) hide show

medrag_multi_modal/assistant/__init__.py +2 -2
medrag_multi_modal/assistant/figure_annotation.py +4 -1
medrag_multi_modal/assistant/llm_client.py +50 -2

medrag_multi_modal/assistant/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from .figure_annotation import FigureAnnotator
 from .llm_client import ClientType, LLMClient
 from .medqa_assistant import MedQAAssistant
-__all__ = ["LLMClient", "ClientType", "MedQAAssistant", "FigureAnnotator"]

+from .figure_annotation import FigureAnnotatorFromPageImage
 from .llm_client import ClientType, LLMClient
 from .medqa_assistant import MedQAAssistant
+__all__ = ["LLMClient", "ClientType", "MedQAAssistant", "FigureAnnotatorFromPageImage"]

medrag_multi_modal/assistant/figure_annotation.py CHANGED Viewed

@@ -10,7 +10,7 @@ from ..utils import get_wandb_artifact, read_jsonl_file
 from .llm_client import LLMClient
-class FigureAnnotator(weave.Model):
     llm_client: LLMClient
     @weave.op()
@@ -24,6 +24,7 @@ You are presented with a page from a scientific textbook.
 You are to first identify the number of figures in the image.
 Then you are to identify the figure IDs associated with each figure in the image.
 Then, you are to extract the exact figure descriptions from the image.
 Here are some clues you need to follow:
 1. Figure IDs are unique identifiers for each figure in the image.
@@ -33,6 +34,8 @@ Here are some clues you need to follow:
 5. The text in the image is written in English and is present in a two-column format.
 6. There is a clear distinction between the figure caption and the regular text in the image in the form of extra white space.
 7. There might be multiple figures present in the image.
 """,
             user_prompt=[page_image],
         )

 from .llm_client import LLMClient
+class FigureAnnotatorFromPageImage(weave.Model):
     llm_client: LLMClient
     @weave.op()
 You are to first identify the number of figures in the image.
 Then you are to identify the figure IDs associated with each figure in the image.
 Then, you are to extract the exact figure descriptions from the image.
+You need to output the figure IDs and descriptions in a structured manner as a JSON object.
 Here are some clues you need to follow:
 1. Figure IDs are unique identifiers for each figure in the image.
 5. The text in the image is written in English and is present in a two-column format.
 6. There is a clear distinction between the figure caption and the regular text in the image in the form of extra white space.
 7. There might be multiple figures present in the image.
+8. The figures may or may not have a distinct border against a white background.
+9. There might be multiple figures present in the image. You are to carefully identify all the figures in the image.
 """,
             user_prompt=[page_image],
         )

medrag_multi_modal/assistant/llm_client.py CHANGED Viewed

@@ -14,11 +14,59 @@ class ClientType(str, Enum):
     MISTRAL = "mistral"
 class LLMClient(weave.Model):
     model_name: str
-    client_type: ClientType
-    def __init__(self, model_name: str, client_type: ClientType):
         super().__init__(model_name=model_name, client_type=client_type)
     @weave.op()

     MISTRAL = "mistral"
+GOOGLE_MODELS = [
+    "gemini-1.0-pro-latest",
+    "gemini-1.0-pro",
+    "gemini-pro",
+    "gemini-1.0-pro-001",
+    "gemini-1.0-pro-vision-latest",
+    "gemini-pro-vision",
+    "gemini-1.5-pro-latest",
+    "gemini-1.5-pro-001",
+    "gemini-1.5-pro-002",
+    "gemini-1.5-pro",
+    "gemini-1.5-pro-exp-0801",
+    "gemini-1.5-pro-exp-0827",
+    "gemini-1.5-flash-latest",
+    "gemini-1.5-flash-001",
+    "gemini-1.5-flash-001-tuning",
+    "gemini-1.5-flash",
+    "gemini-1.5-flash-exp-0827",
+    "gemini-1.5-flash-002",
+    "gemini-1.5-flash-8b",
+    "gemini-1.5-flash-8b-001",
+    "gemini-1.5-flash-8b-latest",
+    "gemini-1.5-flash-8b-exp-0827",
+    "gemini-1.5-flash-8b-exp-0924",
+]
+MISTRAL_MODELS = [
+    "ministral-3b-latest",
+    "ministral-8b-latest",
+    "mistral-large-latest",
+    "mistral-small-latest",
+    "codestral-latest",
+    "pixtral-12b-2409",
+    "open-mistral-nemo",
+    "open-codestral-mamba",
+    "open-mistral-7b",
+    "open-mixtral-8x7b",
+    "open-mixtral-8x22b",
+]
 class LLMClient(weave.Model):
     model_name: str
+    client_type: Optional[ClientType]
+    def __init__(self, model_name: str, client_type: Optional[ClientType] = None):
+        if client_type is None:
+            if model_name in GOOGLE_MODELS:
+                client_type = ClientType.GEMINI
+            elif model_name in MISTRAL_MODELS:
+                client_type = ClientType.MISTRAL
+            else:
+                raise ValueError(f"Invalid model name: {model_name}")
         super().__init__(model_name=model_name, client_type=client_type)
     @weave.op()