Tonic commited on
Commit
cbbac88
Β·
unverified Β·
1 Parent(s): dfd3463

continues to remove cache parameter

Browse files
Files changed (1) hide show
  1. app.py +251 -45
app.py CHANGED
@@ -49,6 +49,37 @@ warnings.filterwarnings("ignore", message="Setting `pad_token_id` to `eos_token_
49
  warnings.filterwarnings("ignore", message="The attention mask is not set and cannot be inferred")
50
  warnings.filterwarnings("ignore", message="The `seen_tokens` attribute is deprecated")
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  class ModelCacheManager:
53
  """
54
  Manages model cache to prevent DynamicCache errors
@@ -58,7 +89,10 @@ class ModelCacheManager:
58
  self._clear_all_caches()
59
 
60
  def _clear_all_caches(self):
61
- """Clear all possible caches"""
 
 
 
62
  # Clear model cache
63
  if hasattr(self.model, 'clear_cache'):
64
  try:
@@ -72,6 +106,23 @@ class ModelCacheManager:
72
  except:
73
  pass
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  # Clear transformers cache based on version
76
  try:
77
  if USE_LEGACY_CACHE:
@@ -94,6 +145,15 @@ class ModelCacheManager:
94
  GenerationConfig.clear_cache()
95
  except:
96
  pass
 
 
 
 
 
 
 
 
 
97
  except:
98
  pass
99
 
@@ -104,6 +164,13 @@ class ModelCacheManager:
104
  torch.cuda.empty_cache()
105
  except:
106
  pass
 
 
 
 
 
 
 
107
 
108
  def safe_call(self, method_name, *args, **kwargs):
109
  """Safely call model methods with cache management"""
@@ -167,6 +234,58 @@ class ModelCacheManager:
167
  except Exception as e:
168
  # Fallback to direct call
169
  return self.direct_call(method_name, *args, **kwargs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
  def initialize_model_safely():
172
  """
@@ -264,49 +383,109 @@ def direct_model_call(model, method_name, *args, **kwargs):
264
 
265
  def safe_model_call_with_dynamic_cache_fix(model, method_name, *args, **kwargs):
266
  """
267
- Safe model call that handles DynamicCache errors specifically
268
  """
 
269
  try:
270
- return direct_model_call(model, method_name, *args, **kwargs)
271
- except AttributeError as e:
272
- if "get_max_length" in str(e) and "DynamicCache" in str(e):
273
- # This is the specific DynamicCache error we need to handle
274
- print("DynamicCache error detected, applying workaround...")
 
275
 
276
- # Try to clear any existing cache
277
  try:
278
- if hasattr(model, 'clear_cache'):
279
- model.clear_cache()
280
- # Also try to clear transformers cache
281
  import torch
282
  if torch.cuda.is_available():
283
  torch.cuda.empty_cache()
284
  except:
285
  pass
286
 
287
- # Try the call again with minimal parameters
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  try:
289
- # Create minimal kwargs with only essential parameters
290
- minimal_kwargs = {}
291
- essential_params = ['ocr_type', 'render', 'save_render_file', 'ocr_box', 'ocr_color']
292
- for key, value in kwargs.items():
293
- if key in essential_params and 'cache' not in key.lower():
294
- minimal_kwargs[key] = value
295
 
296
- method = getattr(model, method_name)
297
- return method(*args, **minimal_kwargs)
298
- except Exception as retry_error:
299
- # If still failing, try with even more minimal approach
300
  try:
301
- # Try with only the most basic parameters
302
- basic_kwargs = {}
303
- if 'ocr_type' in kwargs:
304
- basic_kwargs['ocr_type'] = kwargs['ocr_type']
305
 
306
- method = getattr(model, method_name)
307
- return method(*args, **basic_kwargs)
308
- except Exception as final_error:
309
- raise Exception(f"DynamicCache workaround failed: {str(final_error)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  else:
311
  # Re-raise if it's not the DynamicCache error
312
  raise e
@@ -319,6 +498,9 @@ def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
319
  """
320
  Process image with OCR using ZeroGPU-compatible approach
321
  """
 
 
 
322
  if image is None:
323
  return "Error: No image provided", None, None
324
 
@@ -345,22 +527,22 @@ def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
345
  else:
346
  return "Error: Unsupported image format", None, None
347
 
348
- # Use safe model calls with DynamicCache error handling
349
  try:
350
  if task == "Plain Text OCR":
351
- res = safe_model_call_with_dynamic_cache_fix(model, 'chat', tokenizer, image_path, ocr_type='ocr')
352
  return res, None, unique_id
353
  else:
354
  if task == "Format Text OCR":
355
- res = safe_model_call_with_dynamic_cache_fix(model, 'chat', tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
356
  elif task == "Fine-grained OCR (Box)":
357
- res = safe_model_call_with_dynamic_cache_fix(model, 'chat', tokenizer, image_path, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=result_path)
358
  elif task == "Fine-grained OCR (Color)":
359
- res = safe_model_call_with_dynamic_cache_fix(model, 'chat', tokenizer, image_path, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=result_path)
360
  elif task == "Multi-crop OCR":
361
- res = safe_model_call_with_dynamic_cache_fix(model, 'chat_crop', tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
362
  elif task == "Render Formatted OCR":
363
- res = safe_model_call_with_dynamic_cache_fix(model, 'chat', tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
364
 
365
  if os.path.exists(result_path):
366
  with open(result_path, 'r') as f:
@@ -369,22 +551,22 @@ def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
369
  else:
370
  return res, None, unique_id
371
  except Exception as e:
372
- # If safe call fails, try with cache manager as fallback
373
  try:
374
  if task == "Plain Text OCR":
375
- res = cache_manager.safe_call('chat', tokenizer, image_path, ocr_type='ocr')
376
  return res, None, unique_id
377
  else:
378
  if task == "Format Text OCR":
379
- res = cache_manager.safe_call('chat', tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
380
  elif task == "Fine-grained OCR (Box)":
381
- res = cache_manager.safe_call('chat', tokenizer, image_path, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=result_path)
382
  elif task == "Fine-grained OCR (Color)":
383
- res = cache_manager.safe_call('chat', tokenizer, image_path, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=result_path)
384
  elif task == "Multi-crop OCR":
385
- res = cache_manager.safe_call('chat_crop', tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
386
  elif task == "Render Formatted OCR":
387
- res = cache_manager.safe_call('chat', tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
388
 
389
  if os.path.exists(result_path):
390
  with open(result_path, 'r') as f:
@@ -393,7 +575,31 @@ def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
393
  else:
394
  return res, None, unique_id
395
  except Exception as fallback_error:
396
- return f"Error: {str(fallback_error)}", None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
 
398
  except Exception as e:
399
  return f"Error: {str(e)}", None, None
 
49
  warnings.filterwarnings("ignore", message="The attention mask is not set and cannot be inferred")
50
  warnings.filterwarnings("ignore", message="The `seen_tokens` attribute is deprecated")
51
 
52
+ def global_cache_clear():
53
+ """Global cache clearing function to prevent DynamicCache issues"""
54
+ try:
55
+ # Clear torch cache
56
+ import torch
57
+ if torch.cuda.is_available():
58
+ torch.cuda.empty_cache()
59
+
60
+ # Clear transformers cache
61
+ try:
62
+ from transformers.cache_utils import clear_cache
63
+ clear_cache()
64
+ except:
65
+ pass
66
+
67
+ # Clear any DynamicCache instances
68
+ try:
69
+ from transformers.cache_utils import DynamicCache
70
+ if hasattr(DynamicCache, 'clear_all'):
71
+ DynamicCache.clear_all()
72
+ except:
73
+ pass
74
+
75
+ # Force garbage collection
76
+ import gc
77
+ gc.collect()
78
+
79
+ except Exception as e:
80
+ print(f"Global cache clear warning: {str(e)}")
81
+ pass
82
+
83
  class ModelCacheManager:
84
  """
85
  Manages model cache to prevent DynamicCache errors
 
89
  self._clear_all_caches()
90
 
91
  def _clear_all_caches(self):
92
+ """Clear all possible caches including DynamicCache"""
93
+ # Use global cache clearing first
94
+ global_cache_clear()
95
+
96
  # Clear model cache
97
  if hasattr(self.model, 'clear_cache'):
98
  try:
 
106
  except:
107
  pass
108
 
109
+ # Clear any generation cache
110
+ try:
111
+ if hasattr(self.model, 'generation_config'):
112
+ if hasattr(self.model.generation_config, 'clear_cache'):
113
+ self.model.generation_config.clear_cache()
114
+ except:
115
+ pass
116
+
117
+ # Clear any cache attributes that might cause DynamicCache issues
118
+ cache_attrs = ['cache', '_cache', 'past_key_values', 'use_cache', '_past_key_values']
119
+ for attr in cache_attrs:
120
+ if hasattr(self.model, attr):
121
+ try:
122
+ delattr(self.model, attr)
123
+ except:
124
+ pass
125
+
126
  # Clear transformers cache based on version
127
  try:
128
  if USE_LEGACY_CACHE:
 
145
  GenerationConfig.clear_cache()
146
  except:
147
  pass
148
+
149
+ # Try to clear DynamicCache specifically
150
+ try:
151
+ from transformers.cache_utils import DynamicCache
152
+ # Clear any global DynamicCache instances
153
+ if hasattr(DynamicCache, 'clear_all'):
154
+ DynamicCache.clear_all()
155
+ except:
156
+ pass
157
  except:
158
  pass
159
 
 
164
  torch.cuda.empty_cache()
165
  except:
166
  pass
167
+
168
+ # Force garbage collection
169
+ try:
170
+ import gc
171
+ gc.collect()
172
+ except:
173
+ pass
174
 
175
  def safe_call(self, method_name, *args, **kwargs):
176
  """Safely call model methods with cache management"""
 
234
  except Exception as e:
235
  # Fallback to direct call
236
  return self.direct_call(method_name, *args, **kwargs)
237
+
238
+ def dynamic_cache_safe_call(self, method_name, *args, **kwargs):
239
+ """Specialized method to handle DynamicCache errors"""
240
+ try:
241
+ # First, try to completely disable cache mechanisms
242
+ original_attrs = {}
243
+
244
+ # Store and remove cache-related attributes
245
+ cache_attrs = ['cache', '_cache', 'past_key_values', 'use_cache', '_past_key_values']
246
+ for attr in cache_attrs:
247
+ if hasattr(self.model, attr):
248
+ original_attrs[attr] = getattr(self.model, attr)
249
+ try:
250
+ delattr(self.model, attr)
251
+ except:
252
+ pass
253
+
254
+ # Clear all caches
255
+ self._clear_all_caches()
256
+
257
+ # Create minimal kwargs
258
+ minimal_kwargs = {}
259
+ essential_params = ['ocr_type', 'render', 'save_render_file', 'ocr_box', 'ocr_color']
260
+ for key, value in kwargs.items():
261
+ if key in essential_params and 'cache' not in key.lower():
262
+ minimal_kwargs[key] = value
263
+
264
+ # Make the call
265
+ method = getattr(self.model, method_name)
266
+ result = method(*args, **minimal_kwargs)
267
+
268
+ # Restore original attributes
269
+ for attr, value in original_attrs.items():
270
+ try:
271
+ setattr(self.model, attr, value)
272
+ except:
273
+ pass
274
+
275
+ return result
276
+
277
+ except AttributeError as e:
278
+ if "get_max_length" in str(e) and "DynamicCache" in str(e):
279
+ # If DynamicCache error still occurs, try with no parameters
280
+ try:
281
+ method = getattr(self.model, method_name)
282
+ return method(*args)
283
+ except Exception as final_error:
284
+ raise Exception(f"DynamicCache safe call failed: {str(final_error)}")
285
+ else:
286
+ raise e
287
+ except Exception as e:
288
+ raise e
289
 
290
  def initialize_model_safely():
291
  """
 
383
 
384
  def safe_model_call_with_dynamic_cache_fix(model, method_name, *args, **kwargs):
385
  """
386
+ Comprehensive safe model call that handles DynamicCache errors with multiple fallback strategies
387
  """
388
+ # Strategy 1: Try with complete cache clearing and minimal parameters
389
  try:
390
+ # Clear all possible caches first
391
+ try:
392
+ if hasattr(model, 'clear_cache'):
393
+ model.clear_cache()
394
+ if hasattr(model, '_clear_cache'):
395
+ model._clear_cache()
396
 
397
+ # Clear transformers cache
398
  try:
 
 
 
399
  import torch
400
  if torch.cuda.is_available():
401
  torch.cuda.empty_cache()
402
  except:
403
  pass
404
 
405
+ # Clear any generation cache
406
+ try:
407
+ if hasattr(model, 'generation_config'):
408
+ if hasattr(model.generation_config, 'clear_cache'):
409
+ model.generation_config.clear_cache()
410
+ except:
411
+ pass
412
+ except:
413
+ pass
414
+
415
+ # Create minimal kwargs with only essential parameters
416
+ minimal_kwargs = {}
417
+ essential_params = ['ocr_type', 'render', 'save_render_file', 'ocr_box', 'ocr_color']
418
+ for key, value in kwargs.items():
419
+ if key in essential_params and 'cache' not in key.lower():
420
+ minimal_kwargs[key] = value
421
+
422
+ method = getattr(model, method_name)
423
+ return method(*args, **minimal_kwargs)
424
+
425
+ except AttributeError as e:
426
+ if "get_max_length" in str(e) and "DynamicCache" in str(e):
427
+ print("DynamicCache error detected, applying comprehensive workaround...")
428
+
429
+ # Strategy 2: Try with model cache manager
430
  try:
431
+ return cache_manager.direct_call(method_name, *args, **kwargs)
432
+ except Exception as cache_error:
433
+ print(f"Cache manager failed: {str(cache_error)}")
 
 
 
434
 
435
+ # Strategy 3: Try with legacy cache handling
 
 
 
436
  try:
437
+ return cache_manager.legacy_call(method_name, *args, **kwargs)
438
+ except Exception as legacy_error:
439
+ print(f"Legacy cache handling failed: {str(legacy_error)}")
 
440
 
441
+ # Strategy 4: Try with completely stripped parameters
442
+ try:
443
+ # Remove ALL parameters except the most basic ones
444
+ stripped_kwargs = {}
445
+ if 'ocr_type' in kwargs:
446
+ stripped_kwargs['ocr_type'] = kwargs['ocr_type']
447
+
448
+ method = getattr(model, method_name)
449
+ return method(*args, **stripped_kwargs)
450
+ except Exception as stripped_error:
451
+ print(f"Stripped parameters failed: {str(stripped_error)}")
452
+
453
+ # Strategy 5: Try with monkey patching to bypass cache
454
+ try:
455
+ # Temporarily disable cache-related attributes
456
+ original_attrs = {}
457
+
458
+ # Store original attributes that might cause issues
459
+ for attr_name in ['cache', '_cache', 'past_key_values', 'use_cache']:
460
+ if hasattr(model, attr_name):
461
+ original_attrs[attr_name] = getattr(model, attr_name)
462
+ try:
463
+ delattr(model, attr_name)
464
+ except:
465
+ pass
466
+
467
+ # Try the call
468
+ method = getattr(model, method_name)
469
+ result = method(*args, **stripped_kwargs)
470
+
471
+ # Restore original attributes
472
+ for attr_name, value in original_attrs.items():
473
+ try:
474
+ setattr(model, attr_name, value)
475
+ except:
476
+ pass
477
+
478
+ return result
479
+
480
+ except Exception as monkey_error:
481
+ print(f"Monkey patching failed: {str(monkey_error)}")
482
+
483
+ # Strategy 6: Final fallback - try with no parameters at all
484
+ try:
485
+ method = getattr(model, method_name)
486
+ return method(*args)
487
+ except Exception as final_error:
488
+ raise Exception(f"All DynamicCache workarounds failed. Last error: {str(final_error)}")
489
  else:
490
  # Re-raise if it's not the DynamicCache error
491
  raise e
 
498
  """
499
  Process image with OCR using ZeroGPU-compatible approach
500
  """
501
+ # Clear global cache at the start to prevent DynamicCache issues
502
+ global_cache_clear()
503
+
504
  if image is None:
505
  return "Error: No image provided", None, None
506
 
 
527
  else:
528
  return "Error: Unsupported image format", None, None
529
 
530
+ # Use specialized DynamicCache-safe model calls
531
  try:
532
  if task == "Plain Text OCR":
533
+ res = cache_manager.dynamic_cache_safe_call('chat', tokenizer, image_path, ocr_type='ocr')
534
  return res, None, unique_id
535
  else:
536
  if task == "Format Text OCR":
537
+ res = cache_manager.dynamic_cache_safe_call('chat', tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
538
  elif task == "Fine-grained OCR (Box)":
539
+ res = cache_manager.dynamic_cache_safe_call('chat', tokenizer, image_path, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=result_path)
540
  elif task == "Fine-grained OCR (Color)":
541
+ res = cache_manager.dynamic_cache_safe_call('chat', tokenizer, image_path, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=result_path)
542
  elif task == "Multi-crop OCR":
543
+ res = cache_manager.dynamic_cache_safe_call('chat_crop', tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
544
  elif task == "Render Formatted OCR":
545
+ res = cache_manager.dynamic_cache_safe_call('chat', tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
546
 
547
  if os.path.exists(result_path):
548
  with open(result_path, 'r') as f:
 
551
  else:
552
  return res, None, unique_id
553
  except Exception as e:
554
+ # If dynamic cache safe call fails, try with comprehensive workaround
555
  try:
556
  if task == "Plain Text OCR":
557
+ res = safe_model_call_with_dynamic_cache_fix(model, 'chat', tokenizer, image_path, ocr_type='ocr')
558
  return res, None, unique_id
559
  else:
560
  if task == "Format Text OCR":
561
+ res = safe_model_call_with_dynamic_cache_fix(model, 'chat', tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
562
  elif task == "Fine-grained OCR (Box)":
563
+ res = safe_model_call_with_dynamic_cache_fix(model, 'chat', tokenizer, image_path, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=result_path)
564
  elif task == "Fine-grained OCR (Color)":
565
+ res = safe_model_call_with_dynamic_cache_fix(model, 'chat', tokenizer, image_path, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=result_path)
566
  elif task == "Multi-crop OCR":
567
+ res = safe_model_call_with_dynamic_cache_fix(model, 'chat_crop', tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
568
  elif task == "Render Formatted OCR":
569
+ res = safe_model_call_with_dynamic_cache_fix(model, 'chat', tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
570
 
571
  if os.path.exists(result_path):
572
  with open(result_path, 'r') as f:
 
575
  else:
576
  return res, None, unique_id
577
  except Exception as fallback_error:
578
+ # Final fallback to basic cache manager
579
+ try:
580
+ if task == "Plain Text OCR":
581
+ res = cache_manager.safe_call('chat', tokenizer, image_path, ocr_type='ocr')
582
+ return res, None, unique_id
583
+ else:
584
+ if task == "Format Text OCR":
585
+ res = cache_manager.safe_call('chat', tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
586
+ elif task == "Fine-grained OCR (Box)":
587
+ res = cache_manager.safe_call('chat', tokenizer, image_path, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=result_path)
588
+ elif task == "Fine-grained OCR (Color)":
589
+ res = cache_manager.safe_call('chat', tokenizer, image_path, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=result_path)
590
+ elif task == "Multi-crop OCR":
591
+ res = cache_manager.safe_call('chat_crop', tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
592
+ elif task == "Render Formatted OCR":
593
+ res = cache_manager.safe_call('chat', tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
594
+
595
+ if os.path.exists(result_path):
596
+ with open(result_path, 'r') as f:
597
+ html_content = f.read()
598
+ return res, html_content, unique_id
599
+ else:
600
+ return res, None, unique_id
601
+ except Exception as final_error:
602
+ return f"Error: {str(final_error)}", None, None
603
 
604
  except Exception as e:
605
  return f"Error: {str(e)}", None, None