import spaces | |
import os | |
import sys | |
import subprocess | |
def install_packages(): | |
subprocess.check_call([sys.executable, "-m", "pip", "install", "unsloth-zoo"]) | |
subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-deps", "git+https://github.com/unslothai/unsloth.git"]) | |
try: | |
install_packages() | |
except Exception as e: | |
print(f"Failed to install packages: {e}") | |
### | |
# # เพิ่มบรรทัดนี้ที่ต้นโค้ด ก่อน import torch | |
# import os | |
# # ตั้งค่า env variables ทั้งหมดก่อน import torch | |
# os.environ['TORCH_LOGS'] = '+dynamo' | |
# os.environ['TORCHDYNAMO_VERBOSE'] = '1' | |
# os.environ['TORCH_INDUCTOR_BACKEND'] = 'CUDA' | |
# # os.environ['NVIDIA_VISIBLE_DEVICES'] = '' # อาจต้องตรวจสอบว่าจำเป็นไหม | |
# os.environ['TORCHDYNAMO_DEBUG'] = '1' # เพิ่มเพื่อดู debug info | |
### | |
import warnings | |
import torch | |
# เปลี่ยนแปลงที่ 1: เพิ่มการตั้งค่า dynamo ก่อน import unsloth | |
# torch._dynamo.config.suppress_errors = True | |
# torch._dynamo.config.verbose = False | |
# torch._inductor.config.fallback_random = True # เพิ่มบรรทัดนี้ | |
# torch._inductor.config.triton.cudagraphs = False | |
# torch._inductor.config.disable_kernel_cache = True | |
from transformers import TextStreamer | |
import gradio as gr | |
from huggingface_hub import login | |
from PIL import Image | |
warnings.filterwarnings('ignore') | |
model = None | |
tokenizer = None | |
if 'HUGGING_FACE_HUB_TOKEN' in os.environ: | |
print("กำลังเข้าสู่ระบบ Hugging Face Hub...") | |
login(token=os.environ['HUGGING_FACE_HUB_TOKEN']) | |
else: | |
print("คำเตือน: ไม่พบ HUGGING_FACE_HUB_TOKEN") | |
# @spaces.GPU | |
# def load_model(): | |
# global model, tokenizer | |
# print("กำลังโหลดโมเดล...") | |
# try: | |
# from unsloth import FastVisionModel | |
# # โหลด base model และ tokenizer แบบพื้นฐาน | |
# base_model, tokenizer = FastVisionModel.from_pretrained( | |
# "unsloth/Llama-3.2-11B-Vision-Instruct" | |
# ) | |
# print("โหลด base model และ tokenizer สำเร็จ") | |
# # โหลดโมเดล fine-tuned แบบพื้นฐาน | |
# from transformers import AutoModelForVision2Seq | |
# model = AutoModelForVision2Seq.from_pretrained( | |
# "Aekanun/Llama-3.2-11B-Vision-Instruct-XRay" | |
# ).to('cuda') | |
# print("โหลดโมเดลสำเร็จ!") | |
# return True | |
# except Exception as e: | |
# print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}") | |
# import traceback | |
# traceback.print_exc() # เพิ่มการแสดง stack trace | |
# return False | |
###@spaces.GPU | |
def load_model(): | |
global model | |
print("กำลังโหลดโมเดล...") | |
try: | |
# โหลด tokenizer จาก base model | |
# from unsloth import FastVisionModel | |
# from transformers import AutoTokenizer | |
# print("กำลังโหลด tokenizer...") | |
# base_model, _tokenizer = FastVisionModel.from_pretrained( | |
# "unsloth/Llama-3.2-11B-Vision-Instruct", | |
# use_gradient_checkpointing = "unsloth", | |
# device_map="auto" ### เพิ่มตรงนี้ | |
# ) | |
# tokenizer = _tokenizer # กำหนดค่าให้ตัวแปร global โดยตรง | |
# print(f"2. ประเภทของ tokenizer: {type(tokenizer)}") | |
# print(f"3. เมธอดที่มีใน tokenizer: {dir(tokenizer)}") | |
# print("4. Global tokenizer after assignment:", type(tokenizer)) # เช็คค่า | |
# print("โหลด base model และ tokenizer สำเร็จ กำลังโหลดโมเดลที่ fine-tune...") | |
# # โหลดโมเดล fine-tuned | |
# from transformers import AutoModelForVision2Seq | |
# print("กำลังโหลดโมเดล fine-tuned...") | |
# model = AutoModelForVision2Seq.from_pretrained( | |
# "Aekanun/Llama-3.2-11B-Vision-Instruct-XRay", | |
# device_map="auto", ### เพิ่มตรงนี้ | |
# ###load_in_4bit=True, | |
# torch_dtype=torch.float16 | |
# ).to('cuda') | |
# FastVisionModel.for_inference(model) | |
# print("โหลดโมเดลสำเร็จ!") | |
# return True | |
from transformers import AutoModelForVision2Seq | |
### import torch | |
# print("กำลังโหลด tokenizer...") | |
# # tokenizer = AutoTokenizer.from_pretrained( | |
# # "unsloth/Llama-3.2-11B-Vision-Instruct", | |
# # trust_remote_code=True | |
# # ) | |
# tokenizer = AutoTokenizer.from_pretrained( | |
# "meta-llama/Llama-3.2-11B-Vision-Instruct", | |
# trust_remote_code=True, | |
# use_auth_token=True | |
# ) | |
# print(f"2. ประเภทของ tokenizer: {type(tokenizer)}") | |
# print(f"3. เมธอดที่มีใน tokenizer: {dir(tokenizer)}") | |
# print("4. Global tokenizer after assignment:", type(tokenizer)) | |
# print("โหลด tokenizer สำเร็จ กำลังโหลดโมเดลที่ fine-tune...") | |
# โหลดโมเดล fine-tuned | |
print("กำลังโหลดโมเดล fine-tuned...") | |
# model = AutoModelForVision2Seq.from_pretrained( | |
# "Aekanun/Llama-3.2-11B-Vision-Instruct-XRay", | |
# device_map="auto", | |
# torch_dtype=torch.float16 | |
# ).to('cuda') | |
model = AutoModelForVision2Seq.from_pretrained( | |
"0llheaven/Llama-3.2-11B-Vision-Radiology-mini", | |
load_in_4bit = True, | |
device_map="auto", | |
torch_dtype = torch.float16 | |
) | |
print("โหลดโมเดลสำเร็จ!") | |
return True | |
except Exception as e: | |
print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}") | |
import traceback | |
traceback.print_exc() | |
return False | |
def process_image(image): | |
# # ย้ายมาไว้ตรงนี้ | |
# import os | |
# os.environ['TORCH_LOGS'] = '+dynamo' | |
# os.environ['TORCHDYNAMO_VERBOSE'] = '1' | |
global model | |
### โหลด tokenizer จาก base model | |
from unsloth import FastVisionModel | |
FastVisionModel.for_inference(model) ###ลองแก้ไขปัญหา torch | |
from transformers import AutoTokenizer | |
print("กำลังโหลด tokenizer...") | |
base_model, tokenizer = FastVisionModel.from_pretrained( | |
"unsloth/Llama-3.2-11B-Vision-Instruct", | |
use_gradient_checkpointing = "unsloth", | |
### device_map="auto" ### เพิ่มตรงนี้ | |
) | |
### | |
print("\nใน process_image():") | |
print("Type of model:", type(model)) | |
print("A. Type of tokenizer:", type(tokenizer)) | |
if tokenizer is not None: | |
print("B. Available methods:", dir(tokenizer)) | |
if image is None: | |
return "กรุณาอัพโหลดรูปภาพ" | |
try: | |
if not isinstance(image, Image.Image): | |
image = Image.fromarray(image) | |
print("0. Image info:", type(image), image.size) # เพิ่ม debug ข้อมูลรูปภาพ | |
instruction = "You are an expert radiographer. Describe accurately what you see in this image." | |
messages = [ | |
{"role": "user", "content": [ | |
{"type": "image"}, | |
{"type": "text", "text": instruction} | |
]} | |
] | |
# input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True) | |
# inputs = tokenizer( | |
# image, | |
# input_text, | |
# add_special_tokens=False, | |
# return_tensors="pt", | |
# ).to("cuda") | |
print("1. Messages:", messages) | |
print("2. Tokenizer type:", type(tokenizer)) | |
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True) | |
print("3. Chat template success:", input_text[:100]) | |
inputs = tokenizer( | |
image, | |
input_text, | |
add_special_tokens=False, | |
return_tensors="pt", | |
).to("cuda") | |
print("3. Tokenizer inputs:", inputs.keys()) # Debug 3 | |
text_streamer = TextStreamer(tokenizer, skip_prompt=True) | |
outputs = model.generate( | |
**inputs, | |
streamer=text_streamer, | |
max_new_tokens=256, | |
use_cache=True, | |
temperature=1.5, | |
min_p=0.1 | |
) | |
return tokenizer.decode(outputs[0], skip_special_tokens=True).strip() | |
except Exception as e: | |
return f"เกิดข้อผิดพลาด: {str(e)}" | |
if load_model(): | |
demo = gr.Interface( | |
fn=process_image, | |
inputs=gr.Image(type="pil", label="Upload Image"), | |
outputs=gr.Textbox(label="Generated Caption"), | |
title="Medical Vision Analysis" | |
) | |
if __name__ == "__main__": | |
demo.launch() |