|
import spaces |
|
import os |
|
import sys |
|
import subprocess |
|
|
|
def install_packages(): |
|
subprocess.check_call([sys.executable, "-m", "pip", "install", "unsloth-zoo"]) |
|
subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-deps", "git+https://github.com/unslothai/unsloth.git"]) |
|
|
|
try: |
|
install_packages() |
|
except Exception as e: |
|
print(f"Failed to install packages: {e}") |
|
|
|
import warnings |
|
import torch |
|
|
|
from transformers import TextStreamer |
|
import gradio as gr |
|
from huggingface_hub import login |
|
from PIL import Image |
|
|
|
warnings.filterwarnings('ignore') |
|
|
|
model = None |
|
tokenizer = None |
|
|
|
if 'HUGGING_FACE_HUB_TOKEN' in os.environ: |
|
print("กำลังเข้าสู่ระบบ Hugging Face Hub...") |
|
login(token=os.environ['HUGGING_FACE_HUB_TOKEN']) |
|
else: |
|
print("คำเตือน: ไม่พบ HUGGING_FACE_HUB_TOKEN") |
|
|
|
|
|
def load_model(): |
|
global model |
|
print("กำลังโหลดโมเดล...") |
|
try: |
|
from transformers import AutoModelForVision2Seq |
|
print("กำลังโหลดโมเดล fine-tuned...") |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
model = AutoModelForVision2Seq.from_pretrained( |
|
"0llheaven/Llama-3.2-11B-Vision-Radiology-mini", |
|
load_in_4bit = True, |
|
device_map=device, |
|
torch_dtype = torch.float16 |
|
) |
|
|
|
print("โหลดโมเดลสำเร็จ!") |
|
return True |
|
|
|
except Exception as e: |
|
print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}") |
|
import traceback |
|
traceback.print_exc() |
|
return False |
|
|
|
@spaces.GPU(duration=120) |
|
def process_image(image): |
|
global model |
|
|
|
|
|
from unsloth import FastVisionModel |
|
|
|
FastVisionModel.for_inference(model) |
|
|
|
from transformers import AutoTokenizer |
|
print("กำลังโหลด tokenizer...") |
|
base_model, tokenizer = FastVisionModel.from_pretrained( |
|
"unsloth/Llama-3.2-11B-Vision-Instruct", |
|
use_gradient_checkpointing = "unsloth", |
|
|
|
) |
|
|
|
print("\nใน process_image():") |
|
print("Type of model:", type(model)) |
|
print("A. Type of tokenizer:", type(tokenizer)) |
|
if tokenizer is not None: |
|
print("B. Available methods:", dir(tokenizer)) |
|
|
|
if image is None: |
|
return "กรุณาอัพโหลดรูปภาพ" |
|
|
|
try: |
|
if not isinstance(image, Image.Image): |
|
image = Image.fromarray(image) |
|
|
|
print("0. Image info:", type(image), image.size) |
|
instruction = "You are an expert radiographer. Describe accurately what you see in this image." |
|
messages = [ |
|
{"role": "user", "content": [ |
|
{"type": "image"}, |
|
{"type": "text", "text": instruction} |
|
]} |
|
] |
|
|
|
print("1. Messages:", messages) |
|
|
|
print("2. Tokenizer type:", type(tokenizer)) |
|
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True) |
|
print("3. Chat template success:", input_text[:100]) |
|
inputs = tokenizer( |
|
image, |
|
input_text, |
|
add_special_tokens=False, |
|
return_tensors="pt", |
|
).to("cuda") |
|
print("3. Tokenizer inputs:", inputs.keys()) |
|
|
|
text_streamer = TextStreamer(tokenizer, skip_prompt=True) |
|
outputs = model.generate( |
|
**inputs, |
|
streamer=text_streamer, |
|
max_new_tokens=256, |
|
use_cache=True, |
|
temperature=1.5, |
|
min_p=0.1 |
|
) |
|
|
|
return tokenizer.decode(outputs[0], skip_special_tokens=True).strip() |
|
|
|
except Exception as e: |
|
return f"เกิดข้อผิดพลาด: {str(e)}" |
|
|
|
if load_model(): |
|
demo = gr.Interface( |
|
fn=process_image, |
|
inputs=gr.Image(type="pil", label="Upload Image"), |
|
outputs=gr.Textbox(label="Generated Caption"), |
|
title="Medical Vision Analysis" |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |