Spaces:
Runtime error
Runtime error
from fastapi import FastAPI, HTTPException | |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
from peft import PeftModel | |
import torch | |
app = FastAPI() | |
async def load_model(): | |
try: | |
# 4-bit config | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_compute_dtype=torch.float16, | |
bnb_4bit_use_double_quant=True, | |
) | |
# Load base model | |
app.state.base_model = AutoModelForCausalLM.from_pretrained( | |
"unsloth/deepseek-r1-distill-llama-8b-unsloth-bnb-4bit", | |
quantization_config=bnb_config, | |
device_map="auto", | |
trust_remote_code=True | |
) | |
# Attach PEFT adapter | |
app.state.model = PeftModel.from_pretrained( | |
app.state.base_model, | |
"LAWSA07/medical_fine_tuned_deepseekR1" | |
) | |
# Load tokenizer | |
app.state.tokenizer = AutoTokenizer.from_pretrained( | |
"unsloth/deepseek-r1-distill-llama-8b-unsloth-bnb-4bit" | |
) | |
except Exception as e: | |
raise HTTPException( | |
status_code=500, | |
detail=f"Model loading failed: {str(e)}" | |
) |