Spaces:

stem-content-ai-project
/

content-pipeline

Sleeping

App Files Files Community

AK1239 commited on Mar 13

Commit

86e2f15

1 Parent(s): b2fe716

reverted back to old version

Browse files

Files changed (1) hide show

app/main.py +4 -16

app/main.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import os
-from transformers import pipeline, BitsAndBytesConfig
 import torch
 import nltk
 from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
@@ -418,10 +418,7 @@ async def startup_event():
     logger = logging.getLogger(__name__)
     logger.info("Starting application initialization...")
-    # Set PyTorch memory management settings
-    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
-    # Check if CUDA is available and has enough memory
     device = "cuda" if torch.cuda.is_available() else "cpu"
     logger.info(f"Using device: {device}")
@@ -441,24 +438,15 @@ async def startup_event():
         logger.error(f"Error downloading NLTK data: {str(e)}")
         raise Exception(f"Failed to initialize application: {str(e)}")
-    # Initialize the model and index with quantization
     try:
-        # Configure 8-bit quantization
-        quantization_config = BitsAndBytesConfig(
-            load_in_8bit=True,
-            llm_int8_threshold=6.0,  # Default threshold for good balance of performance/accuracy
-            llm_int8_skip_modules=None,  # No modules to skip
-            llm_int8_enable_fp32_cpu_offload=True  # Enable CPU offloading if needed
-        )
         app.state.pipe = pipeline(
             "text-generation",
             model=MODEL_ID,
             trust_remote_code=True,
             token=HUGGINGFACE_TOKEN,
             device_map="auto",
-            torch_dtype=torch.float16 if device == "cuda" else torch.float32,
-            quantization_config=quantization_config  # Add quantization config
         )
         faiss_index, documents, embedding_model = await load_or_create_index()

 import os
+from transformers import pipeline
 import torch
 import nltk
 from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
     logger = logging.getLogger(__name__)
     logger.info("Starting application initialization...")
+    # Check if CUDA is available
     device = "cuda" if torch.cuda.is_available() else "cpu"
     logger.info(f"Using device: {device}")
         logger.error(f"Error downloading NLTK data: {str(e)}")
         raise Exception(f"Failed to initialize application: {str(e)}")
+    # Initialize the model and index
     try:
         app.state.pipe = pipeline(
             "text-generation",
             model=MODEL_ID,
             trust_remote_code=True,
             token=HUGGINGFACE_TOKEN,
             device_map="auto",
+            torch_dtype=torch.float16 if device == "cuda" else torch.float32
         )
         faiss_index, documents, embedding_model = await load_or_create_index()