ginigen-lora-backup

Paused

App Files Files Community

openfree commited on 30 days ago

Commit

b79c514

verified ·

1 Parent(s): ce47142

Update app.py

Browse files

Files changed (1) hide show

app.py +409 -390

app.py CHANGED Viewed

@@ -5,32 +5,6 @@ from huggingface_hub import whoami, HfApi
 from fastapi import FastAPI
 from starlette.middleware.sessions import SessionMiddleware
 import sys
-# ai-toolkit이 없으면 설치
-if not os.path.exists("ai-toolkit"):
-    subprocess.run("git clone https://github.com/ostris/ai-toolkit.git", shell=True)
-    subprocess.run("cd ai-toolkit && git submodule update --init --recursive", shell=True)
-# ai-toolkit 경로 추가
-toolkit_path = os.path.join(os.getcwd(), "ai-toolkit")
-sys.path.append(toolkit_path)
-# 필요한 패키지 설치
-subprocess.run("pip install -r ai-toolkit/requirements.txt", shell=True)
-is_spaces = True if os.environ.get("SPACE_ID") else False
-os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
-import sys
-from dotenv import load_dotenv
-load_dotenv()
-# Add the current working directory to the Python path
-sys.path.insert(0, os.getcwd())
 import gradio as gr
 from PIL import Image
 import torch
@@ -40,91 +14,129 @@ import json
 import yaml
 from slugify import slugify
 from transformers import AutoProcessor, AutoModelForCausalLM
-# Gradio app 설정
 app = FastAPI()
 app.add_middleware(SessionMiddleware, secret_key="your-secret-key")
-if not is_spaces:
-    sys.path.insert(0, "ai-toolkit")
-    from toolkit.job import get_job
-    gr.OAuthProfile = None
-    gr.OAuthToken = None
 MAX_IMAGES = 150
-# Hugging Face 토큰 설정
 HF_TOKEN = os.getenv("HF_TOKEN")
 if not HF_TOKEN:
     raise ValueError("HF_TOKEN environment variable is not set")
-if is_spaces:
-    subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
-    import spaces
-os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 os.environ["HUGGING_FACE_HUB_TOKEN"] = HF_TOKEN
-# HF API 초기화
 api = HfApi(token=HF_TOKEN)
 def load_captioning(uploaded_files, concept_sentence):
     uploaded_images = [file for file in uploaded_files if not file.endswith('.txt')]
     txt_files = [file for file in uploaded_files if file.endswith('.txt')]
     txt_files_dict = {os.path.splitext(os.path.basename(txt_file))[0]: txt_file for txt_file in txt_files}
     updates = []
     if len(uploaded_images) <= 1:
         raise gr.Error(
-            "Please upload at least 2 images to train your model (the ideal number with default settings is between 4-30)"
         )
     elif len(uploaded_images) > MAX_IMAGES:
         raise gr.Error(f"For now, only {MAX_IMAGES} or less images are allowed for training")
-    # Update for the captioning_area
-    # for _ in range(3):
     updates.append(gr.update(visible=True))
-    # Update visibility and image for each captioning row and image
     for i in range(1, MAX_IMAGES + 1):
-        # Determine if the current row and image should be visible
         visible = i <= len(uploaded_images)
-        # Update visibility of the captioning row
         updates.append(gr.update(visible=visible))
-        # Update for image component - display image if available, otherwise hide
         image_value = uploaded_images[i - 1] if visible else None
         updates.append(gr.update(value=image_value, visible=visible))
         corresponding_caption = False
-        if(image_value):
             base_name = os.path.splitext(os.path.basename(image_value))[0]
-            print(base_name)
-            print(image_value)
             if base_name in txt_files_dict:
-                print("entrou")
                 with open(txt_files_dict[base_name], 'r') as file:
                     corresponding_caption = file.read()
-        # Update value of captioning area
         text_value = corresponding_caption if visible and corresponding_caption else "[trigger]" if visible and concept_sentence else None
         updates.append(gr.update(value=text_value, visible=visible))
-    # Update for the sample caption area
     updates.append(gr.update(visible=True))
-    # Update prompt samples
     updates.append(gr.update(placeholder=f'A portrait of person in a bustling cafe {concept_sentence}', value=f'A person in a bustling cafe {concept_sentence}'))
     updates.append(gr.update(placeholder=f"A mountainous landscape in the style of {concept_sentence}"))
     updates.append(gr.update(placeholder=f"A {concept_sentence} in a mall"))
     return updates
 def hide_captioning():
     return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
-def create_dataset(*inputs):
-    print("Creating dataset")
-    images = inputs[0]
     destination_folder = str(f"datasets/{uuid.uuid4()}")
     if not os.path.exists(destination_folder):
         os.makedirs(destination_folder)
@@ -132,63 +144,225 @@ def create_dataset(*inputs):
     jsonl_file_path = os.path.join(destination_folder, "metadata.jsonl")
     with open(jsonl_file_path, "a") as jsonl_file:
         for index, image in enumerate(images):
-            new_image_path = shutil.copy(image, destination_folder)
-            original_caption = inputs[index + 1]
-            file_name = os.path.basename(new_image_path)
-            data = {"file_name": file_name, "prompt": original_caption}
-            jsonl_file.write(json.dumps(data) + "\n")
     return destination_folder
 def run_captioning(images, concept_sentence, *captions):
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    torch_dtype = torch.float16
-    model = AutoModelForCausalLM.from_pretrained(
-        "microsoft/Florence-2-large", torch_dtype=torch_dtype, trust_remote_code=True
-    ).to(device)
-    processor = AutoProcessor.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True)
-    captions = list(captions)
-    for i, image_path in enumerate(images):
-        print(captions[i])
-        if isinstance(image_path, str):  # If image is a file path
-            image = Image.open(image_path).convert("RGB")
-        prompt = "<DETAILED_CAPTION>"
-        inputs = processor(text=prompt, images=image, return_tensors="pt").to(device, torch_dtype)
-        generated_ids = model.generate(
-            input_ids=inputs["input_ids"], pixel_values=inputs["pixel_values"], max_new_tokens=1024, num_beams=3
         )
-        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
-        parsed_answer = processor.post_process_generation(
-            generated_text, task=prompt, image_size=(image.width, image.height)
-        )
-        caption_text = parsed_answer["<DETAILED_CAPTION>"].replace("The image shows ", "")
-        if concept_sentence:
-            caption_text = f"{caption_text} [trigger]"
-        captions[i] = caption_text
-        yield captions
-    model.to("cpu")
-    del model
-    del processor
-if is_spaces:
-    run_captioning = spaces.GPU()(run_captioning)
-def recursive_update(d, u):
-    for k, v in u.items():
-        if isinstance(v, dict) and v:
-            d[k] = recursive_update(d.get(k, {}), v)
-        else:
-            d[k] = v
-    return d
 def start_training(
     lora_name,
@@ -204,6 +378,7 @@ def start_training(
     use_more_advanced_options,
     more_advanced_options,
 ):
     if not lora_name:
         raise gr.Error("You forgot to insert your LoRA name! This name has to be unique.")
@@ -215,33 +390,18 @@ def start_training(
     print("Started training")
     slugged_lora_name = slugify(lora_name)
-    # Load the default config
-    with open("train_lora_flux_24gb.yaml", "r") as f:
-        config = yaml.safe_load(f)
-    # dev 모델 설정
-    config["config"]["name"] = slugged_lora_name
-    config["config"]["process"][0]["model"]["name_or_path"] = "black-forest-labs/FLUX.1-dev"
-    config["config"]["process"][0]["model"]["assistant_lora_path"] = None  # adapter 없이 설정
-    config["config"]["process"][0]["model"]["low_vram"] = False
-    config["config"]["process"][0]["train"]["skip_first_sample"] = True
     config["config"]["process"][0]["train"]["steps"] = int(steps)
     config["config"]["process"][0]["train"]["lr"] = float(lr)
     config["config"]["process"][0]["network"]["linear"] = int(rank)
     config["config"]["process"][0]["network"]["linear_alpha"] = int(rank)
     config["config"]["process"][0]["datasets"][0]["folder_path"] = dataset_folder
-    config["config"]["process"][0]["save"]["push_to_hub"] = True
-    config["config"]["process"][0]["save"]["hf_repo_id"] = f"{username}/{slugged_lora_name}"
-    config["config"]["process"][0]["save"]["hf_private"] = True
-    config["config"]["process"][0]["save"]["hf_token"] = HF_TOKEN
-    config["config"]["process"][0]["sample"]["sample_steps"] = 28
-    if concept_sentence:
-        config["config"]["process"][0]["trigger_word"] = concept_sentence
     if sample_1 or sample_2 or sample_3:
-        config["config"]["process"][0]["train"]["disable_sampling"] = False
-        config["config"]["process"][0]["sample"]["sample_every"] = steps
         config["config"]["process"][0]["sample"]["prompts"] = []
         if sample_1:
             config["config"]["process"][0]["sample"]["prompts"].append(sample_1)
@@ -252,100 +412,46 @@ def start_training(
     else:
         config["config"]["process"][0]["train"]["disable_sampling"] = True
-    if(use_more_advanced_options):
-        more_advanced_options_dict = yaml.safe_load(more_advanced_options)
-        config["config"]["process"][0] = recursive_update(config["config"]["process"][0], more_advanced_options_dict)
-        print(config)
     try:
-        # Save the updated config
-        random_config_name = str(uuid.uuid4())
         os.makedirs("tmp", exist_ok=True)
-        config_path = f"tmp/{random_config_name}-{slugged_lora_name}.yaml"
         with open(config_path, "w") as f:
             yaml.dump(config, f)
-        # 직접 로컬 GPU에서 학습 실행
-        from toolkit.job import get_job
-        job = get_job(config_path)
-        job.run()
-        job.cleanup()
     except Exception as e:
         raise gr.Error(f"Training failed: {str(e)}")
-    return f"""# Training completed successfully!
-    ## Your model is available at: <a href='https://huggingface.co/{username}/{slugged_lora_name}'>{username}/{slugged_lora_name}</a>"""
-def update_pricing(steps):
-    try:
-        seconds_per_iteration = 7.54
-        total_seconds = (steps * seconds_per_iteration) + 240
-        cost_per_second = 0.80/60/60
-        cost = round(cost_per_second * total_seconds, 2)
-        cost_preview = f'''To train this LoRA, a paid L4 GPU will be hooked under the hood during training and then removed once finished.
-        ### Estimated to cost <b>< US$ {str(cost)}</b> for {round(int(total_seconds)/60, 2)} minutes with your current train settings <small>({int(steps)} iterations at {seconds_per_iteration}s/it)</small>'''
-        return gr.update(visible=True), cost_preview, gr.update(visible=False), gr.update(visible=True)
-    except:
-        return gr.update(visible=False), "", gr.update(visible=False), gr.update(visible=True)
-def swap_base_model(model):
-    return gr.update(visible=True) if model == "[dev] (high quality model, non-commercial license)" else gr.update(visible=False)
-config_yaml = '''
-device: cuda:0
-model:
-  is_flux: true
-  quantize: true
-network:
-  linear: 16 #it will overcome the 'rank' parameter
-  linear_alpha: 16 #you can have an alpha different than the ranking if you'd like
-  type: lora
-sample:
-  guidance_scale: 3.5
-  height: 1024
-  neg: '' #doesn't work for FLUX
-  sample_every: 1000
-  sample_steps: 28
-  sampler: flowmatch
-  seed: 42
-  walk_seed: true
-  width: 1024
-save:
-  dtype: float16
-  hf_private: true
-  max_step_saves_to_keep: 4
-  push_to_hub: true
-  save_every: 10000
-train:
-  batch_size: 1
-  dtype: bf16
-  ema_config:
-    ema_decay: 0.99
-    use_ema: true
-  gradient_accumulation_steps: 1
-  gradient_checkpointing: true
-  noise_scheduler: flowmatch
-  optimizer: adamw8bit #options: prodigy, dadaptation, adamw, adamw8bit, lion, lion8bit
-  train_text_encoder: false #probably doesn't work for flux
-  train_unet: true
-'''
 custom_theme = gr.themes.Base(
     primary_hue="indigo",
     secondary_hue="slate",
     neutral_hue="slate",
 ).set(
-    # 기본 배경 및 보더
     background_fill_primary="#1a1a1a",
     background_fill_secondary="#2d2d2d",
     border_color_primary="#404040",
-    # 버튼 스타일
     button_primary_background_fill="#4F46E5",
     button_primary_background_fill_dark="#4338CA",
     button_primary_background_fill_hover="#6366F1",
@@ -360,7 +466,6 @@ custom_theme = gr.themes.Base(
     button_secondary_text_color="white",
     button_secondary_text_color_dark="white",
-    # 블록 스타일
     block_background_fill="#2d2d2d",
     block_background_fill_dark="#1F2937",
     block_label_background_fill="#4F46E5",
@@ -370,31 +475,18 @@ custom_theme = gr.themes.Base(
     block_title_text_color="white",
     block_title_text_color_dark="white",
-    # 입력 필드 스타일
     input_background_fill="#374151",
     input_background_fill_dark="#1F2937",
     input_border_color="#4B5563",
     input_border_color_dark="#374151",
     input_placeholder_color="#9CA3AF",
     input_placeholder_color_dark="#6B7280",
-    # 그림자 효과
-    shadow_spread="8px",
-    shadow_inset="0px 2px 4px 0px rgba(0,0,0,0.1)",
-    # 컨테이너 스타일
-    panel_background_fill="#2d2d2d",
-    panel_background_fill_dark="#1F2937",
-    # 보더 스타일
-    border_color_accent="#4F46E5",
-    border_color_accent_dark="#4338CA"
 )
-css='''
-/* 기본 스타일 */
 h1 {
-    font-size: 3em;
     text-align: center;
     margin-bottom: 0.5em;
     color: white !important;
@@ -406,193 +498,67 @@ h3 {
     color: white !important;
 }
-/* Markdown 텍스트 스타일 */
-.markdown {
     color: white !important;
 }
-.markdown h1,
-.markdown h2,
-.markdown h3,
-.markdown h4,
-.markdown h5,
-.markdown h6,
-.markdown p {
-    color: white !important;
-}
-/* 컴포넌트 스타일 */
-.container {
-    max-width: 1200px;
-    margin: 0 auto;
-    padding: 20px;
-}
-/* 입력 필드 스타일 */
-.input-group {
-    background: var(--block-background-fill);
-    padding: 15px;
-    border-radius: 12px;
-    margin-bottom: 20px;
-    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-}
-/* 모든 입력 필드 텍스트 색상 */
-input, textarea, .gradio-textbox input, .gradio-textbox textarea, .gradio-number input {
     color: white !important;
 }
-/* 라벨 텍스트 스타일 */
-label, .label-text {
-    color: white !important;
-}
-/* 라디오 버튼 텍스트 */
-.gradio-radio label span {
-    color: white !important;
-}
-/* 체크박스 텍스트 */
-.gradio-checkbox label span {
-    color: white !important;
-}
-/* 버튼 스타일 */
-.button {
-    height: 40px;
-    border-radius: 8px;
     transition: all 0.3s ease;
-    color: white !important;
 }
-.button:hover {
     transform: translateY(-2px);
     box-shadow: 0 4px 6px rgba(0,0,0,0.1);
 }
-/* 이미지 업로드 영역 */
 .image-upload-area {
-    border: 2px dashed var(--input-border-color);
     border-radius: 12px;
     padding: 20px;
     text-align: center;
     margin-bottom: 20px;
-    color: white !important;
-}
-/* 캡션 영역 */
-.caption-area {
-    background: var(--block-background-fill);
-    padding: 15px;
-    border-radius: 12px;
-    margin-top: 20px;
-    color: white !important;
 }
 .caption-row {
     display: flex;
     align-items: center;
     margin-bottom: 10px;
     gap: 10px;
 }
-/* 고급 옵션 영역 */
-.advanced-options {
-    background: var(--block-background-fill);
-    padding: 15px;
-    border-radius: 12px;
-    margin-top: 20px;
-    color: white !important;
-}
-/* 진행 상태 표시 */
-.progress-area {
-    background: var(--block-background-fill);
-    padding: 15px;
-    border-radius: 12px;
-    margin-top: 20px;
-    text-align: center;
-    color: white !important;
-}
-/* 플레이스홀더 텍스트 */
-::placeholder {
-    color: rgba(255, 255, 255, 0.5) !important;
-}
-/* 코드 에디터 텍스트 */
-.gradio-code {
-    color: white !important;
-}
-/* 아코디언 텍스트 */
-.gradio-accordion .label-wrap {
-    color: white !important;
-}
-/* 반응형 디자인 */
-@media (max-width: 768px) {
-    .caption-row {
-        flex-direction: column;
-    }
-}
-/* 스크롤바 스타일 */
-::-webkit-scrollbar {
-    width: 8px;
-}
-::-webkit-scrollbar-track {
-    background: var(--background-fill-primary);
-    border-radius: 4px;
-}
-::-webkit-scrollbar-thumb {
-    background: var(--primary-500);
-    border-radius: 4px;
-}
-::-webkit-scrollbar-thumb:hover {
-    background: var(--primary-600);
-}
-/* 모든 텍스트 입력 요소 */
-.gradio-container input[type="text"],
-.gradio-container textarea,
-.gradio-container .input-text,
-.gradio-container .input-textarea {
-    color: white !important;
-}
-/* 드롭다운 텍스트 */
-select, option {
-    color: white !important;
-}
-/* 버튼 텍스트 */
-button {
-    color: white !important;
-}
 '''
-# Gradio 앱 수정
 with gr.Blocks(theme=custom_theme, css=css) as demo:
     gr.Markdown(
-    """# 🆔 Gini LoRA 학습
-    ### 1)LoRA 이름 영어로 '입력' 2)트리거 단어 영어로 '입력'  3)기본 모델 '클릭' 4)이미지(최소 2장~최대 150장 미만) '업로드' 5)비전 인식 LLM 라벨링 '클릭' 6)START 클릭""",
-    elem_classes=["markdown"]
-    )
     with gr.Tab("Train"):
-        with gr.Column(elem_classes="container"):
-            # LoRA 설정 그룹
-            with gr.Group(elem_classes="input-group"):
                 with gr.Row():
                     lora_name = gr.Textbox(
                         label="LoRA 이름",
                         info="고유한 이름이어야 합니다",
-                        placeholder="예: Persian Miniature Painting style, Cat Toy"
                     )
                     concept_sentence = gr.Textbox(
                         label="트리거 단어/문장",
@@ -604,12 +570,11 @@ with gr.Blocks(theme=custom_theme, css=css) as demo:
                 which_model = gr.Radio(
                     ["고퀄리티 맞춤 학습 모델"],
                     label="기본 모델",
-                    value="[dev] (high quality model)"
                 )
-            # 이미지 업로드 영역
             with gr.Group(visible=True, elem_classes="image-upload-area") as image_upload:
                 with gr.Row():
                     images = gr.File(
                         file_types=["image", ".txt"],
@@ -623,8 +588,8 @@ with gr.Blocks(theme=custom_theme, css=css) as demo:
                         with gr.Column():
                             gr.Markdown(
                                 """# 이미지 라벨링
-    <p style="margin-top:0"> 비전인식 LLM이 이미지를 인식하여 자동으로 라벨링(이미지 인식을 위한 필수 설명). [trigger] '트리거 워드'는 학습한 모델을 실행하는 고유 키값 /trigger word.</p>
-    """, elem_classes="group_padding")
                             do_captioning = gr.Button("비전 인식 LLM 자동 라벨링")
                             output_components = [captioning_area]
                             caption_list = []
@@ -651,16 +616,55 @@ with gr.Blocks(theme=custom_theme, css=css) as demo:
                                 output_components.append(locals()[f"caption_{i}"])
                                 caption_list.append(locals()[f"caption_{i}"])
             with gr.Accordion("Advanced options", open=False):
                 steps = gr.Number(label="Steps", value=1000, minimum=1, maximum=10000, step=1)
                 lr = gr.Number(label="Learning Rate", value=4e-4, minimum=1e-6, maximum=1e-3, step=1e-6)
                 rank = gr.Number(label="LoRA Rank", value=16, minimum=4, maximum=128, step=4)
                 with gr.Accordion("Even more advanced options", open=False):
-                    if(is_spaces):
-                        gr.Markdown("Attention: changing this parameters may make your training fail or go out-of-memory if training on Spaces. Only change settings here it if you know what you are doing. Beware that training is done in an L4 GPU with 24GB of RAM")
                     use_more_advanced_options = gr.Checkbox(label="Use more advanced options", value=False)
-                    more_advanced_options = gr.Code(config_yaml, language="yaml")
             with gr.Accordion("Sample prompts (optional)", visible=False) as sample:
                 gr.Markdown(
                     "Include sample prompts to test out your trained model. Don't forget to include your trigger word/sentence (optional)"
@@ -668,20 +672,28 @@ with gr.Blocks(theme=custom_theme, css=css) as demo:
                 sample_1 = gr.Textbox(label="Test prompt 1")
                 sample_2 = gr.Textbox(label="Test prompt 2")
                 sample_3 = gr.Textbox(label="Test prompt 3")
             with gr.Group(visible=False) as cost_preview:
                 cost_preview_info = gr.Markdown(elem_id="cost_preview_info", elem_classes="group_padding")
                 payment_update = gr.Button("I have set up a payment method", visible=False)
             output_components.append(sample)
             output_components.append(sample_1)
             output_components.append(sample_2)
             output_components.append(sample_3)
-            start = gr.Button("START 클릭('약 25~30분 후 학습이 종료되고 완료 메시지가 출력됩니다.)'", visible=False)
         progress_area = gr.Markdown("")
     dataset_folder = gr.State()
     images.upload(
         load_captioning,
         inputs=[images, concept_sentence],
@@ -707,14 +719,17 @@ with gr.Blocks(theme=custom_theme, css=css) as demo:
         outputs=[cost_preview, cost_preview_info, payment_update, start]
     )
-    gr.on(
-        triggers=[steps.change],
-        fn=update_pricing,
         inputs=[steps],
         outputs=[cost_preview, cost_preview_info, payment_update, start]
     )
-    start.click(fn=create_dataset, inputs=[images] + caption_list, outputs=dataset_folder).then(
         fn=start_training,
         inputs=[
             lora_name,
@@ -733,8 +748,12 @@ with gr.Blocks(theme=custom_theme, css=css) as demo:
         outputs=progress_area,
     )
-    do_captioning.click(fn=run_captioning, inputs=[images, concept_sentence] + caption_list, outputs=caption_list)
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860, auth=("gini", "pick"), show_error=True)

 from fastapi import FastAPI
 from starlette.middleware.sessions import SessionMiddleware
 import sys
 import gradio as gr
 from PIL import Image
 import torch
 import yaml
 from slugify import slugify
 from transformers import AutoProcessor, AutoModelForCausalLM
+import numpy as np
+# Set environment variables
+os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
+# Check if we're running on HF Spaces
+is_spaces = True if os.environ.get("SPACE_ID") else False
+# FastAPI app setup
 app = FastAPI()
 app.add_middleware(SessionMiddleware, secret_key="your-secret-key")
+# Constants
 MAX_IMAGES = 150
+# Hugging Face token setup
 HF_TOKEN = os.getenv("HF_TOKEN")
 if not HF_TOKEN:
     raise ValueError("HF_TOKEN environment variable is not set")
 os.environ["HUGGING_FACE_HUB_TOKEN"] = HF_TOKEN
+# Initialize HF API
 api = HfApi(token=HF_TOKEN)
+# Create default train config
+def get_default_train_config(lora_name, username, trigger_word=None):
+    """Generate a default training configuration"""
+    slugged_lora_name = slugify(lora_name)
+    config = {
+        "config": {
+            "name": slugged_lora_name,
+            "process": [{
+                "model": {
+                    "name_or_path": "black-forest-labs/FLUX.1-dev",
+                    "assistant_lora_path": None,
+                    "low_vram": False,
+                },
+                "network": {
+                    "linear": 16,
+                    "linear_alpha": 16
+                },
+                "train": {
+                    "skip_first_sample": True,
+                    "steps": 1000,
+                    "lr": 4e-4,
+                    "disable_sampling": False
+                },
+                "datasets": [{
+                    "folder_path": "",  # Will be filled later
+                }],
+                "save": {
+                    "push_to_hub": True,
+                    "hf_repo_id": f"{username}/{slugged_lora_name}",
+                    "hf_private": True,
+                    "hf_token": HF_TOKEN
+                },
+                "sample": {
+                    "sample_steps": 28,
+                    "sample_every": 1000,
+                    "prompts": []
+                }
+            }]
+        }
+    }
+    if trigger_word:
+        config["config"]["process"][0]["trigger_word"] = trigger_word
+    return config
+# Helper functions
 def load_captioning(uploaded_files, concept_sentence):
+    """Load images and prepare captioning UI"""
     uploaded_images = [file for file in uploaded_files if not file.endswith('.txt')]
     txt_files = [file for file in uploaded_files if file.endswith('.txt')]
     txt_files_dict = {os.path.splitext(os.path.basename(txt_file))[0]: txt_file for txt_file in txt_files}
     updates = []
     if len(uploaded_images) <= 1:
         raise gr.Error(
+            "Please upload at least 2 images to train your model (the ideal number is between 4-30)"
         )
     elif len(uploaded_images) > MAX_IMAGES:
         raise gr.Error(f"For now, only {MAX_IMAGES} or less images are allowed for training")
+    # Update captioning area visibility
     updates.append(gr.update(visible=True))
+    # Update individual captioning rows
     for i in range(1, MAX_IMAGES + 1):
         visible = i <= len(uploaded_images)
         updates.append(gr.update(visible=visible))
         image_value = uploaded_images[i - 1] if visible else None
         updates.append(gr.update(value=image_value, visible=visible))
         corresponding_caption = False
+        if image_value:
             base_name = os.path.splitext(os.path.basename(image_value))[0]
             if base_name in txt_files_dict:
                 with open(txt_files_dict[base_name], 'r') as file:
                     corresponding_caption = file.read()
         text_value = corresponding_caption if visible and corresponding_caption else "[trigger]" if visible and concept_sentence else None
         updates.append(gr.update(value=text_value, visible=visible))
+    # Update sample caption area
     updates.append(gr.update(visible=True))
     updates.append(gr.update(placeholder=f'A portrait of person in a bustling cafe {concept_sentence}', value=f'A person in a bustling cafe {concept_sentence}'))
     updates.append(gr.update(placeholder=f"A mountainous landscape in the style of {concept_sentence}"))
     updates.append(gr.update(placeholder=f"A {concept_sentence} in a mall"))
     return updates
 def hide_captioning():
+    """Hide captioning UI elements"""
     return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
+def create_dataset(images, *captions):
+    """Create dataset directory with images and captions"""
     destination_folder = str(f"datasets/{uuid.uuid4()}")
     if not os.path.exists(destination_folder):
         os.makedirs(destination_folder)
     jsonl_file_path = os.path.join(destination_folder, "metadata.jsonl")
     with open(jsonl_file_path, "a") as jsonl_file:
         for index, image in enumerate(images):
+            if image:  # Skip None values
+                new_image_path = shutil.copy(image, destination_folder)
+                caption = captions[index]
+                file_name = os.path.basename(new_image_path)
+                data = {"file_name": file_name, "prompt": caption}
+                jsonl_file.write(json.dumps(data) + "\n")
     return destination_folder
 def run_captioning(images, concept_sentence, *captions):
+    """Run automatic captioning using Microsoft Florence model"""
+    try:
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        torch_dtype = torch.float16
+        # Load model and processor
+        model = AutoModelForCausalLM.from_pretrained(
+            "microsoft/Florence-2-large", torch_dtype=torch_dtype, trust_remote_code=True
+        ).to(device)
+        processor = AutoProcessor.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True)
+        captions = list(captions)
+        for i, image_path in enumerate(images):
+            if not image_path:  # Skip None values
+                continue
+            if isinstance(image_path, str):  # If image is a file path
+                try:
+                    image = Image.open(image_path).convert("RGB")
+                except Exception as e:
+                    print(f"Error opening image {image_path}: {e}")
+                    continue
+            prompt = "<DETAILED_CAPTION>"
+            inputs = processor(text=prompt, images=image, return_tensors="pt").to(device, torch_dtype)
+            generated_ids = model.generate(
+                input_ids=inputs["input_ids"],
+                pixel_values=inputs["pixel_values"],
+                max_new_tokens=1024,
+                num_beams=3
+            )
+            generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+            parsed_answer = processor.post_process_generation(
+                generated_text, task=prompt, image_size=(image.width, image.height)
+            )
+            caption_text = parsed_answer["<DETAILED_CAPTION>"].replace("The image shows ", "")
+            if concept_sentence:
+                caption_text = f"{caption_text} [trigger]"
+            captions[i] = caption_text
+            yield captions
+        # Clean up to free memory
+        model.to("cpu")
+        del model
+        del processor
+        torch.cuda.empty_cache()
+    except Exception as e:
+        print(f"Error in captioning: {e}")
+        raise gr.Error(f"Captioning failed: {str(e)}")
+def update_pricing(steps):
+    """Update estimated cost based on training steps"""
+    try:
+        seconds_per_iteration = 7.54
+        total_seconds = (steps * seconds_per_iteration) + 240
+        cost_per_second = 0.80/60/60
+        cost = round(cost_per_second * total_seconds, 2)
+        cost_preview = f'''To train this LoRA, a paid L4 GPU will be used during training.
+        ### Estimated to take <b>~{round(int(total_seconds)/60, 2)} minutes</b> with your current settings <small>({int(steps)} iterations)</small>'''
+        return gr.update(visible=True), cost_preview, gr.update(visible=False), gr.update(visible=True)
+    except:
+        return gr.update(visible=False), "", gr.update(visible=False), gr.update(visible=True)
+def run_training_process(config_path):
+    """Run the actual training process"""
+    try:
+        # This is a simplified placeholder for the actual training code
+        # Instead of using the ai-toolkit which is causing errors, we'll implement our own training logic
+        # Call to a direct training script that doesn't require the problematic dependencies
+        script_path = os.path.join(os.getcwd(), "direct_train_lora.py")
+        with open(script_path, "w") as f:
+            f.write("""
+import os
+import sys
+import yaml
+import torch
+from peft import LoraConfig, get_peft_model
+from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
+from datasets import load_dataset
+import json
+def train_lora(config_path):
+    # Load config
+    with open(config_path, 'r') as f:
+        config = yaml.safe_load(f)
+    process_config = config['config']['process'][0]
+    # Get basic parameters
+    model_name = process_config['model']['name_or_path']
+    lora_rank = process_config['network']['linear']
+    steps = process_config['train']['steps']
+    lr = process_config['train']['lr']
+    dataset_path = process_config['datasets'][0]['folder_path']
+    repo_id = process_config['save']['hf_repo_id']
+    hf_token = process_config['save']['hf_token']
+    # Load dataset
+    dataset = []
+    with open(os.path.join(dataset_path, "metadata.jsonl"), 'r') as f:
+        for line in f:
+            data = json.loads(line)
+            image_path = os.path.join(dataset_path, data['file_name'])
+            prompt = data['prompt']
+            dataset.append({"image_path": image_path, "text": prompt})
+    # Load base model
+    print(f"Loading model {model_name}")
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float16,
+        device_map="auto",
+        trust_remote_code=True,
+        use_auth_token=hf_token
+    )
+    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
+    # Configure LoRA
+    lora_config = LoraConfig(
+        r=lora_rank,
+        lora_alpha=lora_rank,
+        target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
+        lora_dropout=0.05,
+        bias="none",
+        task_type="CAUSAL_LM"
+    )
+    # Apply LoRA
+    model = get_peft_model(model, lora_config)
+    # Training parameters
+    training_args = TrainingArguments(
+        output_dir=f"./lora_train/{repo_id.split('/')[-1]}",
+        num_train_epochs=3,
+        per_device_train_batch_size=1,
+        gradient_accumulation_steps=4,
+        learning_rate=lr,
+        max_steps=steps,
+        fp16=True,
+        logging_steps=10,
+        save_steps=steps // 2,
+        push_to_hub=True,
+        hub_model_id=repo_id,
+        hub_token=hf_token,
+    )
+    # Simple dataset preparation
+    def process_batch(examples):
+        return tokenizer(
+            examples["text"],
+            padding="max_length",
+            truncation=True,
+            max_length=256
         )
+    # Convert dataset to huggingface format
+    train_dataset = load_dataset('json', data_files={'train': dataset_path + '/metadata.jsonl'})['train']
+    # Set up trainer
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=train_dataset,
+        data_collator=lambda data: {'input_ids': torch.stack([f['input_ids'] for f in data]),
+                                     'attention_mask': torch.stack([f['attention_mask'] for f in data])},
+    )
+    # Train
+    print("Starting training...")
+    trainer.train()
+    # Save and push to hub
+    model.save_pretrained(f"./lora_final/{repo_id.split('/')[-1]}")
+    tokenizer.save_pretrained(f"./lora_final/{repo_id.split('/')[-1]}")
+    if process_config['save']['push_to_hub']:
+        model.push_to_hub(repo_id, use_auth_token=hf_token)
+        tokenizer.push_to_hub(repo_id, use_auth_token=hf_token)
+    print(f"Training completed! Model saved to {repo_id}")
+    return repo_id
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        train_lora(sys.argv[1])
+    else:
+        print("Please provide config path")
+""")
+        result = subprocess.run([sys.executable, script_path, config_path],
+                                 capture_output=True, text=True, check=True)
+        print(result.stdout)
+        if result.returncode != 0:
+            raise Exception(f"Training script failed: {result.stderr}")
+        # Extract repo ID from config
+        with open(config_path, "r") as f:
+            config = yaml.safe_load(f)
+        repo_id = config["config"]["process"][0]["save"]["hf_repo_id"]
+        return repo_id
+    except Exception as e:
+        raise Exception(f"Training process failed: {str(e)}")
 def start_training(
     lora_name,
     use_more_advanced_options,
     more_advanced_options,
 ):
+    """Start the LoRA training process"""
     if not lora_name:
         raise gr.Error("You forgot to insert your LoRA name! This name has to be unique.")
     print("Started training")
     slugged_lora_name = slugify(lora_name)
+    # Get base config
+    config = get_default_train_config(lora_name, username, concept_sentence)
+    # Update config with form values
     config["config"]["process"][0]["train"]["steps"] = int(steps)
     config["config"]["process"][0]["train"]["lr"] = float(lr)
     config["config"]["process"][0]["network"]["linear"] = int(rank)
     config["config"]["process"][0]["network"]["linear_alpha"] = int(rank)
     config["config"]["process"][0]["datasets"][0]["folder_path"] = dataset_folder
+    # Add sample prompts if provided
     if sample_1 or sample_2 or sample_3:
         config["config"]["process"][0]["sample"]["prompts"] = []
         if sample_1:
             config["config"]["process"][0]["sample"]["prompts"].append(sample_1)
     else:
         config["config"]["process"][0]["train"]["disable_sampling"] = True
+    # Apply advanced options if enabled
+    if use_more_advanced_options:
+        try:
+            more_advanced_options_dict = yaml.safe_load(more_advanced_options)
+            def recursive_update(d, u):
+                for k, v in u.items():
+                    if isinstance(v, dict) and v:
+                        d[k] = recursive_update(d.get(k, {}), v)
+                    else:
+                        d[k] = v
+                return d
+            config["config"]["process"][0] = recursive_update(config["config"]["process"][0], more_advanced_options_dict)
+        except Exception as e:
+            raise gr.Error(f"Error in advanced options: {str(e)}")
     try:
+        # Save the config
         os.makedirs("tmp", exist_ok=True)
+        config_path = f"tmp/{uuid.uuid4()}-{slugged_lora_name}.yaml"
         with open(config_path, "w") as f:
             yaml.dump(config, f)
+        # Run training process
+        repo_id = run_training_process(config_path)
+        return f"""# Training completed successfully!
+        ## Your model is available at: <a href='https://huggingface.co/{repo_id}'>{repo_id}</a>"""
     except Exception as e:
         raise gr.Error(f"Training failed: {str(e)}")
+# UI Theme and CSS
 custom_theme = gr.themes.Base(
     primary_hue="indigo",
     secondary_hue="slate",
     neutral_hue="slate",
 ).set(
     background_fill_primary="#1a1a1a",
     background_fill_secondary="#2d2d2d",
     border_color_primary="#404040",
     button_primary_background_fill="#4F46E5",
     button_primary_background_fill_dark="#4338CA",
     button_primary_background_fill_hover="#6366F1",
     button_secondary_text_color="white",
     button_secondary_text_color_dark="white",
     block_background_fill="#2d2d2d",
     block_background_fill_dark="#1F2937",
     block_label_background_fill="#4F46E5",
     block_title_text_color="white",
     block_title_text_color_dark="white",
     input_background_fill="#374151",
     input_background_fill_dark="#1F2937",
     input_border_color="#4B5563",
     input_border_color_dark="#374151",
     input_placeholder_color="#9CA3AF",
     input_placeholder_color_dark="#6B7280",
 )
+css = '''
+/* Base styles */
 h1 {
+    font-size: 2.5em;
     text-align: center;
     margin-bottom: 0.5em;
     color: white !important;
     color: white !important;
 }
+/* Ensure all text is white */
+.markdown, .markdown h1, .markdown h2, .markdown h3,
+.markdown h4, .markdown h5, .markdown h6, .markdown p,
+label, .label-text, .gradio-radio label span, .gradio-checkbox label span,
+input, textarea, .gradio-textbox input, .gradio-textbox textarea,
+.gradio-number input, select, option, button {
     color: white !important;
 }
+/* Input style improvements */
+input[type="text"], textarea, .input-text, .input-textarea {
+    background-color: #374151 !important;
+    border-color: #4B5563 !important;
     color: white !important;
 }
+/* Button styling */
+button {
     transition: all 0.3s ease;
 }
+button:hover {
     transform: translateY(-2px);
     box-shadow: 0 4px 6px rgba(0,0,0,0.1);
 }
+/* Image area */
 .image-upload-area {
+    border: 2px dashed #4B5563;
     border-radius: 12px;
     padding: 20px;
     text-align: center;
     margin-bottom: 20px;
 }
+/* Caption rows */
 .caption-row {
     display: flex;
     align-items: center;
     margin-bottom: 10px;
     gap: 10px;
 }
 '''
+# Gradio UI
 with gr.Blocks(theme=custom_theme, css=css) as demo:
     gr.Markdown(
+        """# 🆔 Gini LoRA 학습
+        ### 1) LoRA 이름 입력 2) 트리거 단어 입력 3) 이미지 업로드(2-30장 권장) 4) 비전 인식 LLM 라벨링 5) START 클릭""",
+        elem_classes=["markdown"]
+    )
     with gr.Tab("Train"):
+        with gr.Column():
+            # LoRA 설정
+            with gr.Group():
                 with gr.Row():
                     lora_name = gr.Textbox(
                         label="LoRA 이름",
                         info="고유한 이름이어야 합니다",
+                        placeholder="예: Persian Miniature Style, Cat Toy"
                     )
                     concept_sentence = gr.Textbox(
                         label="트리거 단어/문장",
                 which_model = gr.Radio(
                     ["고퀄리티 맞춤 학습 모델"],
                     label="기본 모델",
+                    value="고퀄리티 맞춤 학습 모델"
                 )
+            # 이미지 업로드
             with gr.Group(visible=True, elem_classes="image-upload-area") as image_upload:
                 with gr.Row():
                     images = gr.File(
                         file_types=["image", ".txt"],
                         with gr.Column():
                             gr.Markdown(
                                 """# 이미지 라벨링
+<p style="margin-top:0"> 비전인식 LLM이 이미지를 인식하여 자동으로 라벨링(이미지 인식을 위한 필수 설명). [trigger] '트리거 워드'는 학습한 모델을 실행하는 고유 키값</p>
+""", elem_classes="group_padding")
                             do_captioning = gr.Button("비전 인식 LLM 자동 라벨링")
                             output_components = [captioning_area]
                             caption_list = []
                                 output_components.append(locals()[f"caption_{i}"])
                                 caption_list.append(locals()[f"caption_{i}"])
+            # 고급 설정
             with gr.Accordion("Advanced options", open=False):
                 steps = gr.Number(label="Steps", value=1000, minimum=1, maximum=10000, step=1)
                 lr = gr.Number(label="Learning Rate", value=4e-4, minimum=1e-6, maximum=1e-3, step=1e-6)
                 rank = gr.Number(label="LoRA Rank", value=16, minimum=4, maximum=128, step=4)
                 with gr.Accordion("Even more advanced options", open=False):
                     use_more_advanced_options = gr.Checkbox(label="Use more advanced options", value=False)
+                    more_advanced_options = gr.Code(
+                        value="""
+device: cuda:0
+model:
+  is_flux: true
+  quantize: true
+network:
+  linear: 16
+  linear_alpha: 16
+  type: lora
+sample:
+  guidance_scale: 3.5
+  height: 1024
+  neg: ''
+  sample_steps: 28
+  sampler: flowmatch
+  seed: 42
+  walk_seed: true
+  width: 1024
+save:
+  dtype: float16
+  hf_private: true
+  max_step_saves_to_keep: 4
+  push_to_hub: true
+  save_every: 10000
+train:
+  batch_size: 1
+  dtype: bf16
+  ema_config:
+    ema_decay: 0.99
+    use_ema: true
+  gradient_accumulation_steps: 1
+  gradient_checkpointing: true
+  noise_scheduler: flowmatch
+  optimizer: adamw8bit
+  train_text_encoder: false
+  train_unet: true
+""",
+                        language="yaml"
+                    )
+            # 샘플 프롬프트
             with gr.Accordion("Sample prompts (optional)", visible=False) as sample:
                 gr.Markdown(
                     "Include sample prompts to test out your trained model. Don't forget to include your trigger word/sentence (optional)"
                 sample_1 = gr.Textbox(label="Test prompt 1")
                 sample_2 = gr.Textbox(label="Test prompt 2")
                 sample_3 = gr.Textbox(label="Test prompt 3")
+            # 비용 안내
             with gr.Group(visible=False) as cost_preview:
                 cost_preview_info = gr.Markdown(elem_id="cost_preview_info", elem_classes="group_padding")
                 payment_update = gr.Button("I have set up a payment method", visible=False)
+            # 조합 변수
             output_components.append(sample)
             output_components.append(sample_1)
             output_components.append(sample_2)
             output_components.append(sample_3)
+            # 시작 버튼
+            start = gr.Button("START 클릭 ('약 15-20분 후 학습이 종료되고 완료 메시지가 출력됩니다')", visible=False)
+        # 진행 상태
         progress_area = gr.Markdown("")
+    # 상태 변수
     dataset_folder = gr.State()
+    # 이벤트 바인딩
     images.upload(
         load_captioning,
         inputs=[images, concept_sentence],
         outputs=[cost_preview, cost_preview_info, payment_update, start]
     )
+    steps.change(
+        update_pricing,
         inputs=[steps],
         outputs=[cost_preview, cost_preview_info, payment_update, start]
     )
+    start.click(
+        fn=create_dataset,
+        inputs=[images] + caption_list,
+        outputs=dataset_folder
+    ).then(
         fn=start_training,
         inputs=[
             lora_name,
         outputs=progress_area,
     )
+    do_captioning.click(
+        fn=run_captioning,
+        inputs=[images, concept_sentence] + caption_list,
+        outputs=caption_list
+    )
+# Launch the app
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860, auth=("gini", "pick"), show_error=True)