Spaces:
Sleeping
Sleeping
Update app/utils.py
Browse files- app/utils.py +20 -7
app/utils.py
CHANGED
@@ -18,22 +18,35 @@ class OCRModel:
|
|
18 |
|
19 |
def initialize(self):
|
20 |
try:
|
21 |
-
logger.info("Initializing OCR model...")
|
22 |
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
self.model = AutoModel.from_pretrained(
|
26 |
'stepfun-ai/GOT-OCR-2.0-hf',
|
27 |
trust_remote_code=True,
|
28 |
low_cpu_mem_usage=True,
|
29 |
-
device_map='
|
30 |
use_safetensors=True,
|
31 |
pad_token_id=self.tokenizer.eos_token_id
|
32 |
)
|
33 |
|
34 |
-
#
|
35 |
-
self.device = "cuda"
|
36 |
-
self.model = self.model.eval().
|
37 |
|
38 |
logger.info("Model initialization completed successfully")
|
39 |
|
|
|
18 |
|
19 |
def initialize(self):
|
20 |
try:
|
|
|
21 |
|
22 |
+
logger.info("Initializing OCR model...")
|
23 |
+
# Try loading with use_fast=False if the fast tokenizer fails
|
24 |
+
try:
|
25 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
26 |
+
'stepfun-ai/GOT-OCR-2.0-hf',
|
27 |
+
trust_remote_code=True,
|
28 |
+
use_fast=False # Try with slow tokenizer
|
29 |
+
)
|
30 |
+
except Exception as e:
|
31 |
+
logger.warning(f"Fast tokenizer failed, trying alternative: {str(e)}")
|
32 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
33 |
+
'stepfun-ai/GOT-OCR-2.0-hf',
|
34 |
+
trust_remote_code=True,
|
35 |
+
use_fast=False
|
36 |
+
)
|
37 |
+
|
38 |
self.model = AutoModel.from_pretrained(
|
39 |
'stepfun-ai/GOT-OCR-2.0-hf',
|
40 |
trust_remote_code=True,
|
41 |
low_cpu_mem_usage=True,
|
42 |
+
device_map='auto', # Let transformers decide the best device
|
43 |
use_safetensors=True,
|
44 |
pad_token_id=self.tokenizer.eos_token_id
|
45 |
)
|
46 |
|
47 |
+
# Let the model decide device placement
|
48 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
49 |
+
self.model = self.model.eval().to(self.device)
|
50 |
|
51 |
logger.info("Model initialization completed successfully")
|
52 |
|