prithivMLmods commited on
Commit
481fc63
·
verified ·
1 Parent(s): b34f4aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -198,9 +198,9 @@ def generate_video(model_name: str, text: str, video_path: str,
198
 
199
  # Define examples for image and video inference
200
  image_examples = [
201
- ["Convert this page to doc [text].", "images/3.png"],
202
  ["Convert this page to doc [text] precisely.", "images/4.png"],
203
- ["Convert this page to doc [text].", "images/1.png"],
204
  ["Convert chart to OTSL.", "images/2.png"]
205
 
206
  ]
@@ -257,8 +257,8 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
257
  )
258
 
259
  gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Doc-VLMs/discussions)")
260
- gr.Markdown("> [VIREX-062225-exp](https://huggingface.co/prithivMLmods/VIREX-062225-exp): the virex-062225-exp (video information retrieval and extraction expert - experimental) model is a fine-tuned version of qwen2.5-vl-7b-instruct, specifically optimized for advanced video understanding, image comprehension, sense of reasoning, and natural language decision-making through cot reasoning.")
261
  gr.Markdown("> [DREX-062225-exp](https://huggingface.co/prithivMLmods/DREX-062225-exp): the drex-062225-exp (document retrieval and extraction expert) model is a specialized fine-tuned version of docscopeocr-7b-050425-exp, optimized for document retrieval, content extraction, and analysis recognition. built on top of the qwen2.5-vl architecture.")
 
262
  gr.Markdown("> [olmOCR-7B-0225](https://huggingface.co/allenai/olmOCR-7B-0225-preview): the olmocr-7b-0225-preview model is based on qwen2-vl-7b, optimized for document-level optical character recognition (ocr), long-context vision-language understanding, and accurate image-to-text conversion with mathematical latex formatting. designed with a focus on high-fidelity visual-textual comprehension.")
263
 
264
  image_submit.click(
 
198
 
199
  # Define examples for image and video inference
200
  image_examples = [
201
+ ["Convert this page to doc [text] precisely.", "images/3.png"],
202
  ["Convert this page to doc [text] precisely.", "images/4.png"],
203
+ ["Convert this page to doc [text] precisely.", "images/1.png"],
204
  ["Convert chart to OTSL.", "images/2.png"]
205
 
206
  ]
 
257
  )
258
 
259
  gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Doc-VLMs/discussions)")
 
260
  gr.Markdown("> [DREX-062225-exp](https://huggingface.co/prithivMLmods/DREX-062225-exp): the drex-062225-exp (document retrieval and extraction expert) model is a specialized fine-tuned version of docscopeocr-7b-050425-exp, optimized for document retrieval, content extraction, and analysis recognition. built on top of the qwen2.5-vl architecture.")
261
+ gr.Markdown("> [VIREX-062225-exp](https://huggingface.co/prithivMLmods/VIREX-062225-exp): the virex-062225-exp (video information retrieval and extraction expert - experimental) model is a fine-tuned version of qwen2.5-vl-7b-instruct, specifically optimized for advanced video understanding, image comprehension, sense of reasoning, and natural language decision-making through cot reasoning.")
262
  gr.Markdown("> [olmOCR-7B-0225](https://huggingface.co/allenai/olmOCR-7B-0225-preview): the olmocr-7b-0225-preview model is based on qwen2-vl-7b, optimized for document-level optical character recognition (ocr), long-context vision-language understanding, and accurate image-to-text conversion with mathematical latex formatting. designed with a focus on high-fidelity visual-textual comprehension.")
263
 
264
  image_submit.click(