Spaces:

0llheaven
/

FT_Llama

Runtime error

App Files Files Community

FT_Llama / app.py

0llheaven

Update app.py

21795f4 verified 7 months ago

raw

history blame

9.59 kB

	import spaces
	import os
	import sys
	import subprocess

	def install_packages():
	subprocess.check_call([sys.executable, "-m", "pip", "install", "unsloth-zoo"])
	subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-deps", "git+https://github.com/unslothai/unsloth.git"])

	try:
	install_packages()
	except Exception as e:
	print(f"Failed to install packages: {e}")


	###
	# # เพิ่มบรรทัดนี้ที่ต้นโค้ด ก่อน import torch
	# import os
	# # ตั้งค่า env variables ทั้งหมดก่อน import torch
	# os.environ['TORCH_LOGS'] = '+dynamo'
	# os.environ['TORCHDYNAMO_VERBOSE'] = '1'
	# os.environ['TORCH_INDUCTOR_BACKEND'] = 'CUDA'
	# # os.environ['NVIDIA_VISIBLE_DEVICES'] = '' # อาจต้องตรวจสอบว่าจำเป็นไหม

	# os.environ['TORCHDYNAMO_DEBUG'] = '1' # เพิ่มเพื่อดู debug info
	###

	import warnings
	import torch

	# เปลี่ยนแปลงที่ 1: เพิ่มการตั้งค่า dynamo ก่อน import unsloth
	# torch._dynamo.config.suppress_errors = True
	# torch._dynamo.config.verbose = False
	# torch._inductor.config.fallback_random = True # เพิ่มบรรทัดนี้

	# torch._inductor.config.triton.cudagraphs = False
	# torch._inductor.config.disable_kernel_cache = True


	from transformers import TextStreamer
	import gradio as gr
	from huggingface_hub import login
	from PIL import Image

	warnings.filterwarnings('ignore')

	model = None
	tokenizer = None

	if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
	print("กำลังเข้าสู่ระบบ Hugging Face Hub...")
	login(token=os.environ['HUGGING_FACE_HUB_TOKEN'])
	else:
	print("คำเตือน: ไม่พบ HUGGING_FACE_HUB_TOKEN")

	# @spaces.GPU
	# def load_model():
	# global model, tokenizer
	# print("กำลังโหลดโมเดล...")
	# try:
	# from unsloth import FastVisionModel
	# # โหลด base model และ tokenizer แบบพื้นฐาน
	# base_model, tokenizer = FastVisionModel.from_pretrained(
	# "unsloth/Llama-3.2-11B-Vision-Instruct"
	# )

	# print("โหลด base model และ tokenizer สำเร็จ")

	# # โหลดโมเดล fine-tuned แบบพื้นฐาน
	# from transformers import AutoModelForVision2Seq
	# model = AutoModelForVision2Seq.from_pretrained(
	# "Aekanun/Llama-3.2-11B-Vision-Instruct-XRay"
	# ).to('cuda')

	# print("โหลดโมเดลสำเร็จ!")
	# return True
	# except Exception as e:
	# print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
	# import traceback
	# traceback.print_exc() # เพิ่มการแสดง stack trace
	# return False

	###@spaces.GPU
	def load_model():
	global model
	print("กำลังโหลดโมเดล...")
	try:
	# โหลด tokenizer จาก base model
	# from unsloth import FastVisionModel
	# from transformers import AutoTokenizer
	# print("กำลังโหลด tokenizer...")
	# base_model, _tokenizer = FastVisionModel.from_pretrained(
	# "unsloth/Llama-3.2-11B-Vision-Instruct",
	# use_gradient_checkpointing = "unsloth",
	# device_map="auto" ### เพิ่มตรงนี้
	# )

	# tokenizer = _tokenizer # กำหนดค่าให้ตัวแปร global โดยตรง
	# print(f"2. ประเภทของ tokenizer: {type(tokenizer)}")
	# print(f"3. เมธอดที่มีใน tokenizer: {dir(tokenizer)}")
	# print("4. Global tokenizer after assignment:", type(tokenizer)) # เช็คค่า

	# print("โหลด base model และ tokenizer สำเร็จ กำลังโหลดโมเดลที่ fine-tune...")

	# # โหลดโมเดล fine-tuned
	# from transformers import AutoModelForVision2Seq
	# print("กำลังโหลดโมเดล fine-tuned...")
	# model = AutoModelForVision2Seq.from_pretrained(
	# "Aekanun/Llama-3.2-11B-Vision-Instruct-XRay",
	# device_map="auto", ### เพิ่มตรงนี้
	# ###load_in_4bit=True,
	# torch_dtype=torch.float16
	# ).to('cuda')

	# FastVisionModel.for_inference(model)
	# print("โหลดโมเดลสำเร็จ!")
	# return True
	from transformers import AutoModelForVision2Seq
	### import torch

	# print("กำลังโหลด tokenizer...")
	# # tokenizer = AutoTokenizer.from_pretrained(
	# # "unsloth/Llama-3.2-11B-Vision-Instruct",
	# # trust_remote_code=True
	# # )
	# tokenizer = AutoTokenizer.from_pretrained(
	# "meta-llama/Llama-3.2-11B-Vision-Instruct",
	# trust_remote_code=True,
	# use_auth_token=True
	# )

	# print(f"2. ประเภทของ tokenizer: {type(tokenizer)}")
	# print(f"3. เมธอดที่มีใน tokenizer: {dir(tokenizer)}")
	# print("4. Global tokenizer after assignment:", type(tokenizer))

	# print("โหลด tokenizer สำเร็จ กำลังโหลดโมเดลที่ fine-tune...")

	# โหลดโมเดล fine-tuned
	print("กำลังโหลดโมเดล fine-tuned...")
	# model = AutoModelForVision2Seq.from_pretrained(
	# "Aekanun/Llama-3.2-11B-Vision-Instruct-XRay",
	# device_map="auto",
	# torch_dtype=torch.float16
	# ).to('cuda')
	model = AutoModelForVision2Seq.from_pretrained(
	"0llheaven/Llama-3.2-11B-Vision-Radiology-mini",
	load_in_4bit = True,
	device_map="auto",
	torch_dtype = torch.float16
	)

	print("โหลดโมเดลสำเร็จ!")
	return True

	except Exception as e:
	print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
	import traceback
	traceback.print_exc()
	return False

	@spaces.GPU(duration=120)
	def process_image(image):
	# # ย้ายมาไว้ตรงนี้
	# import os
	# os.environ['TORCH_LOGS'] = '+dynamo'
	# os.environ['TORCHDYNAMO_VERBOSE'] = '1'
	global model

	### โหลด tokenizer จาก base model
	from unsloth import FastVisionModel

	FastVisionModel.for_inference(model) ###ลองแก้ไขปัญหา torch

	from transformers import AutoTokenizer
	print("กำลังโหลด tokenizer...")
	base_model, tokenizer = FastVisionModel.from_pretrained(
	"unsloth/Llama-3.2-11B-Vision-Instruct",
	use_gradient_checkpointing = "unsloth",
	### device_map="auto" ### เพิ่มตรงนี้
	)
	###


	print("\nใน process_image():")
	print("Type of model:", type(model))
	print("A. Type of tokenizer:", type(tokenizer))
	if tokenizer is not None:
	print("B. Available methods:", dir(tokenizer))

	if image is None:
	return "กรุณาอัพโหลดรูปภาพ"

	try:
	if not isinstance(image, Image.Image):
	image = Image.fromarray(image)

	print("0. Image info:", type(image), image.size) # เพิ่ม debug ข้อมูลรูปภาพ
	instruction = "You are an expert radiographer. Describe accurately what you see in this image."
	messages = [
	{"role": "user", "content": [
	{"type": "image"},
	{"type": "text", "text": instruction}
	]}
	]

	# input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
	# inputs = tokenizer(
	# image,
	# input_text,
	# add_special_tokens=False,
	# return_tensors="pt",
	# ).to("cuda")
	print("1. Messages:", messages)

	print("2. Tokenizer type:", type(tokenizer))
	input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
	print("3. Chat template success:", input_text[:100])
	inputs = tokenizer(
	image,
	input_text,
	add_special_tokens=False,
	return_tensors="pt",
	).to("cuda")
	print("3. Tokenizer inputs:", inputs.keys()) # Debug 3

	text_streamer = TextStreamer(tokenizer, skip_prompt=True)
	outputs = model.generate(
	**inputs,
	streamer=text_streamer,
	max_new_tokens=256,
	use_cache=True,
	temperature=1.5,
	min_p=0.1
	)

	return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

	except Exception as e:
	return f"เกิดข้อผิดพลาด: {str(e)}"

	if load_model():
	demo = gr.Interface(
	fn=process_image,
	inputs=gr.Image(type="pil", label="Upload Image"),
	outputs=gr.Textbox(label="Generated Caption"),
	title="Medical Vision Analysis"
	)

	if __name__ == "__main__":
	demo.launch()