Spaces:

0llheaven
/

FT_Llama

Runtime error

File size: 9,587 Bytes

import spaces
import os
import sys
import subprocess

def install_packages():
    subprocess.check_call([sys.executable, "-m", "pip", "install", "unsloth-zoo"])
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-deps", "git+https://github.com/unslothai/unsloth.git"])

try:
    install_packages()
except Exception as e:
    print(f"Failed to install packages: {e}")


###
# # เพิ่มบรรทัดนี้ที่ต้นโค้ด ก่อน import torch
# import os
# # ตั้งค่า env variables ทั้งหมดก่อน import torch
# os.environ['TORCH_LOGS'] = '+dynamo'
# os.environ['TORCHDYNAMO_VERBOSE'] = '1'
# os.environ['TORCH_INDUCTOR_BACKEND'] = 'CUDA'
# # os.environ['NVIDIA_VISIBLE_DEVICES'] = ''  # อาจต้องตรวจสอบว่าจำเป็นไหม

# os.environ['TORCHDYNAMO_DEBUG'] = '1'  # เพิ่มเพื่อดู debug info
###

import warnings
import torch

# เปลี่ยนแปลงที่ 1: เพิ่มการตั้งค่า dynamo ก่อน import unsloth
# torch._dynamo.config.suppress_errors = True
# torch._dynamo.config.verbose = False
# torch._inductor.config.fallback_random = True  # เพิ่มบรรทัดนี้

# torch._inductor.config.triton.cudagraphs = False
# torch._inductor.config.disable_kernel_cache = True

 
from transformers import TextStreamer
import gradio as gr
from huggingface_hub import login
from PIL import Image

warnings.filterwarnings('ignore')

model = None
tokenizer = None

if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
    print("กำลังเข้าสู่ระบบ Hugging Face Hub...")
    login(token=os.environ['HUGGING_FACE_HUB_TOKEN'])
else:
    print("คำเตือน: ไม่พบ HUGGING_FACE_HUB_TOKEN")

# @spaces.GPU
# def load_model():
#     global model, tokenizer
#     print("กำลังโหลดโมเดล...")
#     try:
#         from unsloth import FastVisionModel
#         # โหลด base model และ tokenizer แบบพื้นฐาน
#         base_model, tokenizer = FastVisionModel.from_pretrained(
#             "unsloth/Llama-3.2-11B-Vision-Instruct"
#         )
        
#         print("โหลด base model และ tokenizer สำเร็จ")
        
#         # โหลดโมเดล fine-tuned แบบพื้นฐาน
#         from transformers import AutoModelForVision2Seq
#         model = AutoModelForVision2Seq.from_pretrained(
#             "Aekanun/Llama-3.2-11B-Vision-Instruct-XRay"
#         ).to('cuda')
        
#         print("โหลดโมเดลสำเร็จ!")
#         return True
#     except Exception as e:
#         print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
#         import traceback
#         traceback.print_exc()  # เพิ่มการแสดง stack trace
#         return False

###@spaces.GPU
def load_model():
    global model
    print("กำลังโหลดโมเดล...")
    try:
        # โหลด tokenizer จาก base model
        # from unsloth import FastVisionModel
        # from transformers import AutoTokenizer
        # print("กำลังโหลด tokenizer...")
        # base_model, _tokenizer = FastVisionModel.from_pretrained(
        #     "unsloth/Llama-3.2-11B-Vision-Instruct",
        #     use_gradient_checkpointing = "unsloth",
        #     device_map="auto"  ### เพิ่มตรงนี้
        # )
        
        # tokenizer = _tokenizer  # กำหนดค่าให้ตัวแปร global โดยตรง
        # print(f"2. ประเภทของ tokenizer: {type(tokenizer)}")
        # print(f"3. เมธอดที่มีใน tokenizer: {dir(tokenizer)}")
        # print("4. Global tokenizer after assignment:", type(tokenizer))  # เช็คค่า
        
        # print("โหลด base model และ tokenizer สำเร็จ กำลังโหลดโมเดลที่ fine-tune...")
        
        # # โหลดโมเดล fine-tuned
        # from transformers import AutoModelForVision2Seq
        # print("กำลังโหลดโมเดล fine-tuned...")
        # model = AutoModelForVision2Seq.from_pretrained(
        #     "Aekanun/Llama-3.2-11B-Vision-Instruct-XRay",
        #     device_map="auto",  ### เพิ่มตรงนี้
        #     ###load_in_4bit=True,
        #     torch_dtype=torch.float16
        # ).to('cuda')
        
        # FastVisionModel.for_inference(model)
        # print("โหลดโมเดลสำเร็จ!")
        # return True
        from transformers import AutoModelForVision2Seq
        ### import torch

        # print("กำลังโหลด tokenizer...")
        # # tokenizer = AutoTokenizer.from_pretrained(
        # #     "unsloth/Llama-3.2-11B-Vision-Instruct",
        # #     trust_remote_code=True
        # # )
        # tokenizer = AutoTokenizer.from_pretrained(
        #     "meta-llama/Llama-3.2-11B-Vision-Instruct",
        #     trust_remote_code=True,
        #     use_auth_token=True
        # )

        # print(f"2. ประเภทของ tokenizer: {type(tokenizer)}")
        # print(f"3. เมธอดที่มีใน tokenizer: {dir(tokenizer)}")
        # print("4. Global tokenizer after assignment:", type(tokenizer))

        # print("โหลด tokenizer สำเร็จ กำลังโหลดโมเดลที่ fine-tune...")

        # โหลดโมเดล fine-tuned
        print("กำลังโหลดโมเดล fine-tuned...")
        # model = AutoModelForVision2Seq.from_pretrained(
        #     "Aekanun/Llama-3.2-11B-Vision-Instruct-XRay",
        #     device_map="auto",
        #     torch_dtype=torch.float16
        # ).to('cuda')
        model = AutoModelForVision2Seq.from_pretrained(
            "0llheaven/Llama-3.2-11B-Vision-Radiology-mini",
            load_in_4bit = True,
            device_map="auto",
            torch_dtype = torch.float16
        )

        print("โหลดโมเดลสำเร็จ!")
        return True
        
    except Exception as e:
        print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
        import traceback
        traceback.print_exc()
        return False

@spaces.GPU(duration=120)
def process_image(image):
    # # ย้ายมาไว้ตรงนี้
    # import os
    # os.environ['TORCH_LOGS'] = '+dynamo'
    # os.environ['TORCHDYNAMO_VERBOSE'] = '1'
    global model

    ### โหลด tokenizer จาก base model
    from unsloth import FastVisionModel

    FastVisionModel.for_inference(model) ###ลองแก้ไขปัญหา torch

    from transformers import AutoTokenizer
    print("กำลังโหลด tokenizer...")
    base_model, tokenizer = FastVisionModel.from_pretrained(
        "unsloth/Llama-3.2-11B-Vision-Instruct",
        use_gradient_checkpointing = "unsloth",
        ### device_map="auto"  ### เพิ่มตรงนี้
    )
    ###

    
    print("\nใน process_image():")
    print("Type of model:", type(model))
    print("A. Type of tokenizer:", type(tokenizer))
    if tokenizer is not None:
        print("B. Available methods:", dir(tokenizer))
    
    if image is None:
        return "กรุณาอัพโหลดรูปภาพ"
    
    try:
        if not isinstance(image, Image.Image):
            image = Image.fromarray(image)

        print("0. Image info:", type(image), image.size)  # เพิ่ม debug ข้อมูลรูปภาพ
        instruction = "You are an expert radiographer. Describe accurately what you see in this image."
        messages = [
            {"role": "user", "content": [
                {"type": "image"},
                {"type": "text", "text": instruction}
            ]}
        ]

        # input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
        # inputs = tokenizer(
        #     image,
        #     input_text,
        #     add_special_tokens=False,
        #     return_tensors="pt",
        # ).to("cuda")
        print("1. Messages:", messages)  

        print("2. Tokenizer type:", type(tokenizer))
        input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
        print("3. Chat template success:", input_text[:100])
        inputs = tokenizer(
            image,
            input_text,
            add_special_tokens=False,
            return_tensors="pt",
        ).to("cuda")
        print("3. Tokenizer inputs:", inputs.keys())  # Debug 3

        text_streamer = TextStreamer(tokenizer, skip_prompt=True)
        outputs = model.generate(
            **inputs, 
            streamer=text_streamer,
            max_new_tokens=256,
            use_cache=True,
            temperature=1.5,
            min_p=0.1
        )
        
        return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
        
    except Exception as e:
        return f"เกิดข้อผิดพลาด: {str(e)}"

if load_model():
    demo = gr.Interface(
        fn=process_image,
        inputs=gr.Image(type="pil", label="Upload Image"),
        outputs=gr.Textbox(label="Generated Caption"),
        title="Medical Vision Analysis"
    )
    
    if __name__ == "__main__":
        demo.launch()