prithivMLmods commited on
Commit
b22fc2d
·
verified ·
1 Parent(s): 58a89c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -96,10 +96,10 @@ def generate_image(model_name: str, text: str, image: Image.Image,
96
  """
97
  Generates responses using the selected model for image input.
98
  """
99
- if model_name == "VIREX-062225-8B-exp":
100
  processor = processor_m
101
  model = model_m
102
- elif model_name == "DREX-062225-8B-exp":
103
  processor = processor_x
104
  model = model_x
105
  elif model_name == "olmOCR-7B-0225":
@@ -152,10 +152,10 @@ def generate_video(model_name: str, text: str, video_path: str,
152
  """
153
  Generates responses using the selected model for video input.
154
  """
155
- if model_name == "VIREX-062225-8B-exp":
156
  processor = processor_m
157
  model = model_m
158
- elif model_name == "DREX-062225-8B-exp":
159
  processor = processor_x
160
  model = model_x
161
  elif model_name == "olmOCR-7B-0225":
@@ -275,15 +275,15 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
275
  markdown_output = gr.Markdown(label="Formatted Result (Result.Md)")
276
 
277
  model_choice = gr.Radio(
278
- choices=["DREX-062225-8B-exp", "VIREX-062225-8B-exp", "Typhoon-OCR-3B", "olmOCR-7B-0225"],
279
  label="Select Model",
280
- value="DREX-062225-8B-exp"
281
  )
282
 
283
  gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Doc-VLMs/discussions)")
284
- gr.Markdown("> [DREX-062225-8B-exp](https://huggingface.co/prithivMLmods/DREX-062225-exp): the drex-062225-exp (document retrieval and extraction expert) model is a specialized fine-tuned version of docscopeocr-7b-050425-exp, optimized for document retrieval, content extraction, and analysis recognition. built on top of the qwen2.5-vl architecture.")
285
- gr.Markdown("> [VIREX-062225-8B-exp](https://huggingface.co/prithivMLmods/VIREX-062225-exp): the virex-062225-exp (video information retrieval and extraction expert - experimental) model is a fine-tuned version of qwen2.5-vl-7b-instruct, specifically optimized for advanced video understanding, image comprehension, sense of reasoning, and natural language decision-making through cot reasoning.")
286
- gr.Markdown("> [typhoon-ocr-3b](https://huggingface.co/scb10x/typhoon-ocr-3b): a bilingual document parsing model built specifically for real-world documents in thai and english, inspired by models like olmocr, based on qwen2.5-vl-instruction. this model is intended to be used with a specific prompt only.")
287
  gr.Markdown("> [olmOCR-7B-0225](https://huggingface.co/allenai/olmOCR-7B-0225-preview): the olmocr-7b-0225-preview model is based on qwen2-vl-7b, optimized for document-level optical character recognition (ocr), long-context vision-language understanding, and accurate image-to-text conversion with mathematical latex formatting. designed with a focus on high-fidelity visual-textual comprehension.")
288
  gr.Markdown(">⚠️note: all the models in space are not guaranteed to perform well in video inference use cases.")
289
 
 
96
  """
97
  Generates responses using the selected model for image input.
98
  """
99
+ if model_name == "VIREX-062225-7B-exp":
100
  processor = processor_m
101
  model = model_m
102
+ elif model_name == "DREX-062225-7B-exp":
103
  processor = processor_x
104
  model = model_x
105
  elif model_name == "olmOCR-7B-0225":
 
152
  """
153
  Generates responses using the selected model for video input.
154
  """
155
+ if model_name == "VIREX-062225-7B-exp":
156
  processor = processor_m
157
  model = model_m
158
+ elif model_name == "DREX-062225-7B-exp":
159
  processor = processor_x
160
  model = model_x
161
  elif model_name == "olmOCR-7B-0225":
 
275
  markdown_output = gr.Markdown(label="Formatted Result (Result.Md)")
276
 
277
  model_choice = gr.Radio(
278
+ choices=["DREX-062225-7B-exp", "olmOCR-7B-0225-preview", "VIREX-062225-7B-exp", "Typhoon-OCR-3B"],
279
  label="Select Model",
280
+ value="DREX-062225-7B-exp"
281
  )
282
 
283
  gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Doc-VLMs/discussions)")
284
+ gr.Markdown("> [DREX-062225-7B-exp](https://huggingface.co/prithivMLmods/DREX-062225-exp): the drex-062225-exp (document retrieval and extraction expert) model is a specialized fine-tuned version of docscopeocr-7b-050425-exp, optimized for document retrieval, content extraction, and analysis recognition. built on top of the qwen2.5-vl architecture.")
285
+ gr.Markdown("> [VIREX-062225-7B-exp](https://huggingface.co/prithivMLmods/VIREX-062225-exp): the virex-062225-exp (video information retrieval and extraction expert - experimental) model is a fine-tuned version of qwen2.5-vl-7b-instruct, specifically optimized for advanced video understanding, image comprehension, sense of reasoning, and natural language decision-making through cot reasoning.")
286
+ gr.Markdown("> [Typhoon-OCR-3B](https://huggingface.co/scb10x/typhoon-ocr-3b): a bilingual document parsing model built specifically for real-world documents in thai and english, inspired by models like olmocr, based on qwen2.5-vl-instruction. this model is intended to be used with a specific prompt only.")
287
  gr.Markdown("> [olmOCR-7B-0225](https://huggingface.co/allenai/olmOCR-7B-0225-preview): the olmocr-7b-0225-preview model is based on qwen2-vl-7b, optimized for document-level optical character recognition (ocr), long-context vision-language understanding, and accurate image-to-text conversion with mathematical latex formatting. designed with a focus on high-fidelity visual-textual comprehension.")
288
  gr.Markdown(">⚠️note: all the models in space are not guaranteed to perform well in video inference use cases.")
289