Spaces:

prithivMLmods
/

Imgscope-OCR-Mini

Running on Zero

prithivMLmods commited on Mar 18

Commit

803afd5

verified ·

1 Parent(s): 42f9ebc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import os
-os.environ["VLLM_ENABLE_CHUNKED_PREFILL"] = "False"  # Disable chunked prefill as a workaround
 import re
 import uuid
@@ -86,16 +88,16 @@ def load_system_prompt(repo_id: str, filename: str) -> str:
 # Model details (adjust as needed)
 MODEL_ID = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
-# Load the system prompt from HF Hub (make sure SYSTEM_PROMPT.txt exists in the repo)
 SYSTEM_PROMPT = load_system_prompt(MODEL_ID, "SYSTEM_PROMPT.txt")
-# If you prefer a hardcoded system prompt, you can use:
 # SYSTEM_PROMPT = "You are a conversational agent that always answers straight to the point, and ends with an ASCII cat."
-# Set the device explicitly
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Initialize the Mistral LLM via vllm.
-# The 'enforce_eager=True' parameter disables asynchronous output.
 llm = LLM(model=MODEL_ID, tokenizer_mode="mistral", device=device, enforce_eager=True)
 # -----------------------------------------------------------------------------
@@ -124,7 +126,7 @@ def generate(
         {"role": "system", "content": SYSTEM_PROMPT}
     ]
-    # Check if any file is provided
     video_extensions = (".mp4", ".mov", ".avi", ".mkv", ".webm")
     if files:
         # If any file is a video, use video inference branch.

 import os
+# Disable chunked prefill and asynchronous output before importing vllm.
+os.environ["VLLM_ENABLE_CHUNKED_PREFILL"] = "False"
+os.environ["VLLM_ENABLE_ASYNC_OUTPUT"] = "False"
 import re
 import uuid
 # Model details (adjust as needed)
 MODEL_ID = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
+# Load the system prompt from HF Hub (ensure SYSTEM_PROMPT.txt exists in the repo)
 SYSTEM_PROMPT = load_system_prompt(MODEL_ID, "SYSTEM_PROMPT.txt")
+# Alternatively, you can hardcode the system prompt:
 # SYSTEM_PROMPT = "You are a conversational agent that always answers straight to the point, and ends with an ASCII cat."
+# Set the device explicitly.
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Initialize the Mistral LLM via vllm.
+# The enforce_eager flag ensures synchronous (eager) output.
 llm = LLM(model=MODEL_ID, tokenizer_mode="mistral", device=device, enforce_eager=True)
 # -----------------------------------------------------------------------------
         {"role": "system", "content": SYSTEM_PROMPT}
     ]
+    # Check if any file is provided.
     video_extensions = (".mp4", ".mov", ".avi", ".mkv", ".webm")
     if files:
         # If any file is a video, use video inference branch.