Spaces:

0llheaven
/

FT_Llama

Runtime error

App Files Files Community

0llheaven commited on Dec 4, 2024

Commit

dec27fc

verified ·

1 Parent(s): 96c46d4

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -118

app.py CHANGED Viewed

@@ -12,31 +12,9 @@ try:
 except Exception as e:
     print(f"Failed to install packages: {e}")
-###
-# # เพิ่มบรรทัดนี้ที่ต้นโค้ด ก่อน import torch
-# import os
-# # ตั้งค่า env variables ทั้งหมดก่อน import torch
-# os.environ['TORCH_LOGS'] = '+dynamo'
-# os.environ['TORCHDYNAMO_VERBOSE'] = '1'
-# os.environ['TORCH_INDUCTOR_BACKEND'] = 'CUDA'
-# # os.environ['NVIDIA_VISIBLE_DEVICES'] = ''  # อาจต้องตรวจสอบว่าจำเป็นไหม
-# os.environ['TORCHDYNAMO_DEBUG'] = '1'  # เพิ่มเพื่อดู debug info
-###
 import warnings
 import torch
-# เปลี่ยนแปลงที่ 1: เพิ่มการตั้งค่า dynamo ก่อน import unsloth
-# torch._dynamo.config.suppress_errors = True
-# torch._dynamo.config.verbose = False
-# torch._inductor.config.fallback_random = True  # เพิ่มบรรทัดนี้
-# torch._inductor.config.triton.cudagraphs = False
-# torch._inductor.config.disable_kernel_cache = True
 from transformers import TextStreamer
 import gradio as gr
 from huggingface_hub import login
@@ -53,99 +31,19 @@ if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
 else:
     print("คำเตือน: ไม่พบ HUGGING_FACE_HUB_TOKEN")
-# @spaces.GPU
-# def load_model():
-#     global model, tokenizer
-#     print("กำลังโหลดโมเดล...")
-#     try:
-#         from unsloth import FastVisionModel
-#         # โหลด base model และ tokenizer แบบพื้นฐาน
-#         base_model, tokenizer = FastVisionModel.from_pretrained(
-#             "unsloth/Llama-3.2-11B-Vision-Instruct"
-#         )
-#         print("โหลด base model และ tokenizer สำเร็จ")
-#         # โหลดโมเดล fine-tuned แบบพื้นฐาน
-#         from transformers import AutoModelForVision2Seq
-#         model = AutoModelForVision2Seq.from_pretrained(
-#             "Aekanun/Llama-3.2-11B-Vision-Instruct-XRay"
-#         ).to('cuda')
-#         print("โหลดโมเดลสำเร็จ!")
-#         return True
-#     except Exception as e:
-#         print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
-#         import traceback
-#         traceback.print_exc()  # เพิ่มการแสดง stack trace
-#         return False
 ###@spaces.GPU
 def load_model():
     global model
     print("กำลังโหลดโมเดล...")
     try:
-        # โหลด tokenizer จาก base model
-        # from unsloth import FastVisionModel
-        # from transformers import AutoTokenizer
-        # print("กำลังโหลด tokenizer...")
-        # base_model, _tokenizer = FastVisionModel.from_pretrained(
-        #     "unsloth/Llama-3.2-11B-Vision-Instruct",
-        #     use_gradient_checkpointing = "unsloth",
-        #     device_map="auto"  ### เพิ่มตรงนี้
-        # )
-        # tokenizer = _tokenizer  # กำหนดค่าให้ตัวแปร global โดยตรง
-        # print(f"2. ประเภทของ tokenizer: {type(tokenizer)}")
-        # print(f"3. เมธอดที่มีใน tokenizer: {dir(tokenizer)}")
-        # print("4. Global tokenizer after assignment:", type(tokenizer))  # เช็คค่า
-        # print("โหลด base model และ tokenizer สำเร็จ กำลังโหลดโมเดลที่ fine-tune...")
-        # # โหลดโมเดล fine-tuned
-        # from transformers import AutoModelForVision2Seq
-        # print("กำลังโหลดโมเดล fine-tuned...")
-        # model = AutoModelForVision2Seq.from_pretrained(
-        #     "Aekanun/Llama-3.2-11B-Vision-Instruct-XRay",
-        #     device_map="auto",  ### เพิ่มตรงนี้
-        #     ###load_in_4bit=True,
-        #     torch_dtype=torch.float16
-        # ).to('cuda')
-        # FastVisionModel.for_inference(model)
-        # print("โหลดโมเดลสำเร็จ!")
-        # return True
         from transformers import AutoModelForVision2Seq
-        ### import torch
-        # print("กำลังโหลด tokenizer...")
-        # # tokenizer = AutoTokenizer.from_pretrained(
-        # #     "unsloth/Llama-3.2-11B-Vision-Instruct",
-        # #     trust_remote_code=True
-        # # )
-        # tokenizer = AutoTokenizer.from_pretrained(
-        #     "meta-llama/Llama-3.2-11B-Vision-Instruct",
-        #     trust_remote_code=True,
-        #     use_auth_token=True
-        # )
-        # print(f"2. ประเภทของ tokenizer: {type(tokenizer)}")
-        # print(f"3. เมธอดที่มีใน tokenizer: {dir(tokenizer)}")
-        # print("4. Global tokenizer after assignment:", type(tokenizer))
-        # print("โหลด tokenizer สำเร็จ กำลังโหลดโมเดลที่ fine-tune...")
-        # โหลดโมเดล fine-tuned
         print("กำลังโหลดโมเดล fine-tuned...")
-        # model = AutoModelForVision2Seq.from_pretrained(
-        #     "Aekanun/Llama-3.2-11B-Vision-Instruct-XRay",
-        #     device_map="auto",
-        #     torch_dtype=torch.float16
-        # ).to('cuda')
         model = AutoModelForVision2Seq.from_pretrained(
             "0llheaven/Llama-3.2-11B-Vision-Radiology-mini",
             load_in_4bit = True,
-            device_map="auto",
             torch_dtype = torch.float16
         )
@@ -160,10 +58,6 @@ def load_model():
 @spaces.GPU(duration=120)
 def process_image(image):
-    # # ย้ายมาไว้ตรงนี้
-    # import os
-    # os.environ['TORCH_LOGS'] = '+dynamo'
-    # os.environ['TORCHDYNAMO_VERBOSE'] = '1'
     global model
     ### โหลด tokenizer จาก base model
@@ -178,8 +72,6 @@ def process_image(image):
         use_gradient_checkpointing = "unsloth",
         ### device_map="auto"  ### เพิ่มตรงนี้
     )
-    ###
     print("\nใน process_image():")
     print("Type of model:", type(model))
@@ -203,13 +95,6 @@ def process_image(image):
             ]}
         ]
-        # input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
-        # inputs = tokenizer(
-        #     image,
-        #     input_text,
-        #     add_special_tokens=False,
-        #     return_tensors="pt",
-        # ).to("cuda")
         print("1. Messages:", messages)
         print("2. Tokenizer type:", type(tokenizer))

 except Exception as e:
     print(f"Failed to install packages: {e}")
 import warnings
 import torch
 from transformers import TextStreamer
 import gradio as gr
 from huggingface_hub import login
 else:
     print("คำเตือน: ไม่พบ HUGGING_FACE_HUB_TOKEN")
 ###@spaces.GPU
 def load_model():
     global model
     print("กำลังโหลดโมเดล...")
     try:
         from transformers import AutoModelForVision2Seq
         print("กำลังโหลดโมเดล fine-tuned...")
+        device = "cuda" if torch.cuda.is_available() else "cpu"
         model = AutoModelForVision2Seq.from_pretrained(
             "0llheaven/Llama-3.2-11B-Vision-Radiology-mini",
             load_in_4bit = True,
+            device_map=device,
             torch_dtype = torch.float16
         )
 @spaces.GPU(duration=120)
 def process_image(image):
     global model
     ### โหลด tokenizer จาก base model
         use_gradient_checkpointing = "unsloth",
         ### device_map="auto"  ### เพิ่มตรงนี้
     )
     print("\nใน process_image():")
     print("Type of model:", type(model))
             ]}
         ]
         print("1. Messages:", messages)
         print("2. Tokenizer type:", type(tokenizer))