AK1239 commited on
Commit
86e2f15
·
1 Parent(s): b2fe716

reverted back to old version

Browse files
Files changed (1) hide show
  1. app/main.py +4 -16
app/main.py CHANGED
@@ -1,5 +1,5 @@
1
  import os
2
- from transformers import pipeline, BitsAndBytesConfig
3
  import torch
4
  import nltk
5
  from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
@@ -418,10 +418,7 @@ async def startup_event():
418
  logger = logging.getLogger(__name__)
419
  logger.info("Starting application initialization...")
420
 
421
- # Set PyTorch memory management settings
422
- os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
423
-
424
- # Check if CUDA is available and has enough memory
425
  device = "cuda" if torch.cuda.is_available() else "cpu"
426
  logger.info(f"Using device: {device}")
427
 
@@ -441,24 +438,15 @@ async def startup_event():
441
  logger.error(f"Error downloading NLTK data: {str(e)}")
442
  raise Exception(f"Failed to initialize application: {str(e)}")
443
 
444
- # Initialize the model and index with quantization
445
  try:
446
- # Configure 8-bit quantization
447
- quantization_config = BitsAndBytesConfig(
448
- load_in_8bit=True,
449
- llm_int8_threshold=6.0, # Default threshold for good balance of performance/accuracy
450
- llm_int8_skip_modules=None, # No modules to skip
451
- llm_int8_enable_fp32_cpu_offload=True # Enable CPU offloading if needed
452
- )
453
-
454
  app.state.pipe = pipeline(
455
  "text-generation",
456
  model=MODEL_ID,
457
  trust_remote_code=True,
458
  token=HUGGINGFACE_TOKEN,
459
  device_map="auto",
460
- torch_dtype=torch.float16 if device == "cuda" else torch.float32,
461
- quantization_config=quantization_config # Add quantization config
462
  )
463
 
464
  faiss_index, documents, embedding_model = await load_or_create_index()
 
1
  import os
2
+ from transformers import pipeline
3
  import torch
4
  import nltk
5
  from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
 
418
  logger = logging.getLogger(__name__)
419
  logger.info("Starting application initialization...")
420
 
421
+ # Check if CUDA is available
 
 
 
422
  device = "cuda" if torch.cuda.is_available() else "cpu"
423
  logger.info(f"Using device: {device}")
424
 
 
438
  logger.error(f"Error downloading NLTK data: {str(e)}")
439
  raise Exception(f"Failed to initialize application: {str(e)}")
440
 
441
+ # Initialize the model and index
442
  try:
 
 
 
 
 
 
 
 
443
  app.state.pipe = pipeline(
444
  "text-generation",
445
  model=MODEL_ID,
446
  trust_remote_code=True,
447
  token=HUGGINGFACE_TOKEN,
448
  device_map="auto",
449
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32
 
450
  )
451
 
452
  faiss_index, documents, embedding_model = await load_or_create_index()