ginigen-lora-backup

Paused

App Files Files Community

ginigen-lora-backup / app.py

openfree

Update app.py

b79c514 verified about 1 month ago

raw

history blame

26.9 kB

	import os
	import subprocess
	from typing import Union
	from huggingface_hub import whoami, HfApi
	from fastapi import FastAPI
	from starlette.middleware.sessions import SessionMiddleware
	import sys
	import gradio as gr
	from PIL import Image
	import torch
	import uuid
	import shutil
	import json
	import yaml
	from slugify import slugify
	from transformers import AutoProcessor, AutoModelForCausalLM
	import numpy as np

	# Set environment variables
	os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

	# Check if we're running on HF Spaces
	is_spaces = True if os.environ.get("SPACE_ID") else False

	# FastAPI app setup
	app = FastAPI()
	app.add_middleware(SessionMiddleware, secret_key="your-secret-key")

	# Constants
	MAX_IMAGES = 150

	# Hugging Face token setup
	HF_TOKEN = os.getenv("HF_TOKEN")
	if not HF_TOKEN:
	raise ValueError("HF_TOKEN environment variable is not set")

	os.environ["HUGGING_FACE_HUB_TOKEN"] = HF_TOKEN

	# Initialize HF API
	api = HfApi(token=HF_TOKEN)

	# Create default train config
	def get_default_train_config(lora_name, username, trigger_word=None):
	"""Generate a default training configuration"""
	slugged_lora_name = slugify(lora_name)

	config = {
	"config": {
	"name": slugged_lora_name,
	"process": [{
	"model": {
	"name_or_path": "black-forest-labs/FLUX.1-dev",
	"assistant_lora_path": None,
	"low_vram": False,
	},
	"network": {
	"linear": 16,
	"linear_alpha": 16
	},
	"train": {
	"skip_first_sample": True,
	"steps": 1000,
	"lr": 4e-4,
	"disable_sampling": False
	},
	"datasets": [{
	"folder_path": "", # Will be filled later
	}],
	"save": {
	"push_to_hub": True,
	"hf_repo_id": f"{username}/{slugged_lora_name}",
	"hf_private": True,
	"hf_token": HF_TOKEN
	},
	"sample": {
	"sample_steps": 28,
	"sample_every": 1000,
	"prompts": []
	}
	}]
	}
	}

	if trigger_word:
	config["config"]["process"][0]["trigger_word"] = trigger_word

	return config

	# Helper functions
	def load_captioning(uploaded_files, concept_sentence):
	"""Load images and prepare captioning UI"""
	uploaded_images = [file for file in uploaded_files if not file.endswith('.txt')]
	txt_files = [file for file in uploaded_files if file.endswith('.txt')]
	txt_files_dict = {os.path.splitext(os.path.basename(txt_file))[0]: txt_file for txt_file in txt_files}
	updates = []

	if len(uploaded_images) <= 1:
	raise gr.Error(
	"Please upload at least 2 images to train your model (the ideal number is between 4-30)"
	)
	elif len(uploaded_images) > MAX_IMAGES:
	raise gr.Error(f"For now, only {MAX_IMAGES} or less images are allowed for training")

	# Update captioning area visibility
	updates.append(gr.update(visible=True))

	# Update individual captioning rows
	for i in range(1, MAX_IMAGES + 1):
	visible = i <= len(uploaded_images)

	updates.append(gr.update(visible=visible))

	image_value = uploaded_images[i - 1] if visible else None
	updates.append(gr.update(value=image_value, visible=visible))

	corresponding_caption = False
	if image_value:
	base_name = os.path.splitext(os.path.basename(image_value))[0]
	if base_name in txt_files_dict:
	with open(txt_files_dict[base_name], 'r') as file:
	corresponding_caption = file.read()

	text_value = corresponding_caption if visible and corresponding_caption else "[trigger]" if visible and concept_sentence else None
	updates.append(gr.update(value=text_value, visible=visible))

	# Update sample caption area
	updates.append(gr.update(visible=True))
	updates.append(gr.update(placeholder=f'A portrait of person in a bustling cafe {concept_sentence}', value=f'A person in a bustling cafe {concept_sentence}'))
	updates.append(gr.update(placeholder=f"A mountainous landscape in the style of {concept_sentence}"))
	updates.append(gr.update(placeholder=f"A {concept_sentence} in a mall"))

	return updates

	def hide_captioning():
	"""Hide captioning UI elements"""
	return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)

	def create_dataset(images, *captions):
	"""Create dataset directory with images and captions"""
	destination_folder = str(f"datasets/{uuid.uuid4()}")
	if not os.path.exists(destination_folder):
	os.makedirs(destination_folder)

	jsonl_file_path = os.path.join(destination_folder, "metadata.jsonl")
	with open(jsonl_file_path, "a") as jsonl_file:
	for index, image in enumerate(images):
	if image: # Skip None values
	new_image_path = shutil.copy(image, destination_folder)
	caption = captions[index]
	file_name = os.path.basename(new_image_path)
	data = {"file_name": file_name, "prompt": caption}
	jsonl_file.write(json.dumps(data) + "\n")

	return destination_folder

	def run_captioning(images, concept_sentence, *captions):
	"""Run automatic captioning using Microsoft Florence model"""
	try:
	device = "cuda" if torch.cuda.is_available() else "cpu"
	torch_dtype = torch.float16

	# Load model and processor
	model = AutoModelForCausalLM.from_pretrained(
	"microsoft/Florence-2-large", torch_dtype=torch_dtype, trust_remote_code=True
	).to(device)
	processor = AutoProcessor.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True)

	captions = list(captions)
	for i, image_path in enumerate(images):
	if not image_path: # Skip None values
	continue

	if isinstance(image_path, str): # If image is a file path
	try:
	image = Image.open(image_path).convert("RGB")
	except Exception as e:
	print(f"Error opening image {image_path}: {e}")
	continue

	prompt = "<DETAILED_CAPTION>"
	inputs = processor(text=prompt, images=image, return_tensors="pt").to(device, torch_dtype)

	generated_ids = model.generate(
	input_ids=inputs["input_ids"],
	pixel_values=inputs["pixel_values"],
	max_new_tokens=1024,
	num_beams=3
	)

	generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
	parsed_answer = processor.post_process_generation(
	generated_text, task=prompt, image_size=(image.width, image.height)
	)
	caption_text = parsed_answer["<DETAILED_CAPTION>"].replace("The image shows ", "")
	if concept_sentence:
	caption_text = f"{caption_text} [trigger]"

	captions[i] = caption_text
	yield captions

	# Clean up to free memory
	model.to("cpu")
	del model
	del processor
	torch.cuda.empty_cache()

	except Exception as e:
	print(f"Error in captioning: {e}")
	raise gr.Error(f"Captioning failed: {str(e)}")

	def update_pricing(steps):
	"""Update estimated cost based on training steps"""
	try:
	seconds_per_iteration = 7.54
	total_seconds = (steps * seconds_per_iteration) + 240
	cost_per_second = 0.80/60/60
	cost = round(cost_per_second * total_seconds, 2)
	cost_preview = f'''To train this LoRA, a paid L4 GPU will be used during training.
	### Estimated to take <b>~{round(int(total_seconds)/60, 2)} minutes</b> with your current settings <small>({int(steps)} iterations)</small>'''
	return gr.update(visible=True), cost_preview, gr.update(visible=False), gr.update(visible=True)
	except:
	return gr.update(visible=False), "", gr.update(visible=False), gr.update(visible=True)

	def run_training_process(config_path):
	"""Run the actual training process"""
	try:
	# This is a simplified placeholder for the actual training code
	# Instead of using the ai-toolkit which is causing errors, we'll implement our own training logic

	# Call to a direct training script that doesn't require the problematic dependencies
	script_path = os.path.join(os.getcwd(), "direct_train_lora.py")
	with open(script_path, "w") as f:
	f.write("""
	import os
	import sys
	import yaml
	import torch
	from peft import LoraConfig, get_peft_model
	from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
	from datasets import load_dataset
	import json

	def train_lora(config_path):
	# Load config
	with open(config_path, 'r') as f:
	config = yaml.safe_load(f)

	process_config = config['config']['process'][0]

	# Get basic parameters
	model_name = process_config['model']['name_or_path']
	lora_rank = process_config['network']['linear']
	steps = process_config['train']['steps']
	lr = process_config['train']['lr']
	dataset_path = process_config['datasets'][0]['folder_path']
	repo_id = process_config['save']['hf_repo_id']
	hf_token = process_config['save']['hf_token']

	# Load dataset
	dataset = []
	with open(os.path.join(dataset_path, "metadata.jsonl"), 'r') as f:
	for line in f:
	data = json.loads(line)
	image_path = os.path.join(dataset_path, data['file_name'])
	prompt = data['prompt']
	dataset.append({"image_path": image_path, "text": prompt})

	# Load base model
	print(f"Loading model {model_name}")
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True,
	use_auth_token=hf_token
	)

	tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)

	# Configure LoRA
	lora_config = LoraConfig(
	r=lora_rank,
	lora_alpha=lora_rank,
	target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
	lora_dropout=0.05,
	bias="none",
	task_type="CAUSAL_LM"
	)

	# Apply LoRA
	model = get_peft_model(model, lora_config)

	# Training parameters
	training_args = TrainingArguments(
	output_dir=f"./lora_train/{repo_id.split('/')[-1]}",
	num_train_epochs=3,
	per_device_train_batch_size=1,
	gradient_accumulation_steps=4,
	learning_rate=lr,
	max_steps=steps,
	fp16=True,
	logging_steps=10,
	save_steps=steps // 2,
	push_to_hub=True,
	hub_model_id=repo_id,
	hub_token=hf_token,
	)

	# Simple dataset preparation
	def process_batch(examples):
	return tokenizer(
	examples["text"],
	padding="max_length",
	truncation=True,
	max_length=256
	)

	# Convert dataset to huggingface format
	train_dataset = load_dataset('json', data_files={'train': dataset_path + '/metadata.jsonl'})['train']

	# Set up trainer
	trainer = Trainer(
	model=model,
	args=training_args,
	train_dataset=train_dataset,
	data_collator=lambda data: {'input_ids': torch.stack([f['input_ids'] for f in data]),
	'attention_mask': torch.stack([f['attention_mask'] for f in data])},
	)

	# Train
	print("Starting training...")
	trainer.train()

	# Save and push to hub
	model.save_pretrained(f"./lora_final/{repo_id.split('/')[-1]}")
	tokenizer.save_pretrained(f"./lora_final/{repo_id.split('/')[-1]}")

	if process_config['save']['push_to_hub']:
	model.push_to_hub(repo_id, use_auth_token=hf_token)
	tokenizer.push_to_hub(repo_id, use_auth_token=hf_token)

	print(f"Training completed! Model saved to {repo_id}")
	return repo_id

	if __name__ == "__main__":
	if len(sys.argv) > 1:
	train_lora(sys.argv[1])
	else:
	print("Please provide config path")
	""")

	result = subprocess.run([sys.executable, script_path, config_path],
	capture_output=True, text=True, check=True)
	print(result.stdout)
	if result.returncode != 0:
	raise Exception(f"Training script failed: {result.stderr}")

	# Extract repo ID from config
	with open(config_path, "r") as f:
	config = yaml.safe_load(f)
	repo_id = config["config"]["process"][0]["save"]["hf_repo_id"]

	return repo_id
	except Exception as e:
	raise Exception(f"Training process failed: {str(e)}")

	def start_training(
	lora_name,
	concept_sentence,
	which_model,
	steps,
	lr,
	rank,
	dataset_folder,
	sample_1,
	sample_2,
	sample_3,
	use_more_advanced_options,
	more_advanced_options,
	):
	"""Start the LoRA training process"""
	if not lora_name:
	raise gr.Error("You forgot to insert your LoRA name! This name has to be unique.")

	try:
	username = whoami()["name"]
	except:
	raise gr.Error("Failed to get username. Please check your HF_TOKEN.")

	print("Started training")
	slugged_lora_name = slugify(lora_name)

	# Get base config
	config = get_default_train_config(lora_name, username, concept_sentence)

	# Update config with form values
	config["config"]["process"][0]["train"]["steps"] = int(steps)
	config["config"]["process"][0]["train"]["lr"] = float(lr)
	config["config"]["process"][0]["network"]["linear"] = int(rank)
	config["config"]["process"][0]["network"]["linear_alpha"] = int(rank)
	config["config"]["process"][0]["datasets"][0]["folder_path"] = dataset_folder

	# Add sample prompts if provided
	if sample_1 or sample_2 or sample_3:
	config["config"]["process"][0]["sample"]["prompts"] = []
	if sample_1:
	config["config"]["process"][0]["sample"]["prompts"].append(sample_1)
	if sample_2:
	config["config"]["process"][0]["sample"]["prompts"].append(sample_2)
	if sample_3:
	config["config"]["process"][0]["sample"]["prompts"].append(sample_3)
	else:
	config["config"]["process"][0]["train"]["disable_sampling"] = True

	# Apply advanced options if enabled
	if use_more_advanced_options:
	try:
	more_advanced_options_dict = yaml.safe_load(more_advanced_options)
	def recursive_update(d, u):
	for k, v in u.items():
	if isinstance(v, dict) and v:
	d[k] = recursive_update(d.get(k, {}), v)
	else:
	d[k] = v
	return d
	config["config"]["process"][0] = recursive_update(config["config"]["process"][0], more_advanced_options_dict)
	except Exception as e:
	raise gr.Error(f"Error in advanced options: {str(e)}")

	try:
	# Save the config
	os.makedirs("tmp", exist_ok=True)
	config_path = f"tmp/{uuid.uuid4()}-{slugged_lora_name}.yaml"
	with open(config_path, "w") as f:
	yaml.dump(config, f)

	# Run training process
	repo_id = run_training_process(config_path)

	return f"""# Training completed successfully!
	## Your model is available at: <a href='https://huggingface.co/{repo_id}'>{repo_id}</a>"""
	except Exception as e:
	raise gr.Error(f"Training failed: {str(e)}")

	# UI Theme and CSS
	custom_theme = gr.themes.Base(
	primary_hue="indigo",
	secondary_hue="slate",
	neutral_hue="slate",
	).set(
	background_fill_primary="#1a1a1a",
	background_fill_secondary="#2d2d2d",
	border_color_primary="#404040",

	button_primary_background_fill="#4F46E5",
	button_primary_background_fill_dark="#4338CA",
	button_primary_background_fill_hover="#6366F1",
	button_primary_border_color="#4F46E5",
	button_primary_border_color_dark="#4338CA",
	button_primary_text_color="white",
	button_primary_text_color_dark="white",

	button_secondary_background_fill="#374151",
	button_secondary_background_fill_dark="#1F2937",
	button_secondary_background_fill_hover="#4B5563",
	button_secondary_text_color="white",
	button_secondary_text_color_dark="white",

	block_background_fill="#2d2d2d",
	block_background_fill_dark="#1F2937",
	block_label_background_fill="#4F46E5",
	block_label_background_fill_dark="#4338CA",
	block_label_text_color="white",
	block_label_text_color_dark="white",
	block_title_text_color="white",
	block_title_text_color_dark="white",

	input_background_fill="#374151",
	input_background_fill_dark="#1F2937",
	input_border_color="#4B5563",
	input_border_color_dark="#374151",
	input_placeholder_color="#9CA3AF",
	input_placeholder_color_dark="#6B7280",
	)

	css = '''
	/* Base styles */
	h1 {
	font-size: 2.5em;
	text-align: center;
	margin-bottom: 0.5em;
	color: white !important;
	}

	h3 {
	margin-top: 0;
	font-size: 1.2em;
	color: white !important;
	}

	/* Ensure all text is white */
	.markdown, .markdown h1, .markdown h2, .markdown h3,
	.markdown h4, .markdown h5, .markdown h6, .markdown p,
	label, .label-text, .gradio-radio label span, .gradio-checkbox label span,
	input, textarea, .gradio-textbox input, .gradio-textbox textarea,
	.gradio-number input, select, option, button {
	color: white !important;
	}

	/* Input style improvements */
	input[type="text"], textarea, .input-text, .input-textarea {
	background-color: #374151 !important;
	border-color: #4B5563 !important;
	color: white !important;
	}

	/* Button styling */
	button {
	transition: all 0.3s ease;
	}

	button:hover {
	transform: translateY(-2px);
	box-shadow: 0 4px 6px rgba(0,0,0,0.1);
	}

	/* Image area */
	.image-upload-area {
	border: 2px dashed #4B5563;
	border-radius: 12px;
	padding: 20px;
	text-align: center;
	margin-bottom: 20px;
	}

	/* Caption rows */
	.caption-row {
	display: flex;
	align-items: center;
	margin-bottom: 10px;
	gap: 10px;
	}
	'''

	# Gradio UI
	with gr.Blocks(theme=custom_theme, css=css) as demo:
	gr.Markdown(
	"""# 🆔 Gini LoRA 학습
	### 1) LoRA 이름 입력 2) 트리거 단어 입력 3) 이미지 업로드(2-30장 권장) 4) 비전 인식 LLM 라벨링 5) START 클릭""",
	elem_classes=["markdown"]
	)

	with gr.Tab("Train"):
	with gr.Column():
	# LoRA 설정
	with gr.Group():
	with gr.Row():
	lora_name = gr.Textbox(
	label="LoRA 이름",
	info="고유한 이름이어야 합니다",
	placeholder="예: Persian Miniature Style, Cat Toy"
	)
	concept_sentence = gr.Textbox(
	label="트리거 단어/문장",
	info="사용할 트리거 단어나 문장",
	placeholder="p3rs0n이나 trtcrd같은 특이한 단어, 또는 'in the style of CNSTLL'같은 문장"
	)

	model_warning = gr.Markdown(visible=False)
	which_model = gr.Radio(
	["고퀄리티 맞춤 학습 모델"],
	label="기본 모델",
	value="고퀄리티 맞춤 학습 모델"
	)

	# 이미지 업로드
	with gr.Group(visible=True, elem_classes="image-upload-area") as image_upload:
	with gr.Row():
	images = gr.File(
	file_types=["image", ".txt"],
	label="Upload your images",
	file_count="multiple",
	interactive=True,
	visible=True,
	scale=1,
	)
	with gr.Column(scale=3, visible=False) as captioning_area:
	with gr.Column():
	gr.Markdown(
	"""# 이미지 라벨링
	<p style="margin-top:0"> 비전인식 LLM이 이미지를 인식하여 자동으로 라벨링(이미지 인식을 위한 필수 설명). [trigger] '트리거 워드'는 학습한 모델을 실행하는 고유 키값</p>
	""", elem_classes="group_padding")
	do_captioning = gr.Button("비전 인식 LLM 자동 라벨링")
	output_components = [captioning_area]
	caption_list = []
	for i in range(1, MAX_IMAGES + 1):
	locals()[f"captioning_row_{i}"] = gr.Row(visible=False)
	with locals()[f"captioning_row_{i}"]:
	locals()[f"image_{i}"] = gr.Image(
	type="filepath",
	width=111,
	height=111,
	min_width=111,
	interactive=False,
	scale=2,
	show_label=False,
	show_share_button=False,
	show_download_button=False,
	)
	locals()[f"caption_{i}"] = gr.Textbox(
	label=f"Caption {i}", scale=15, interactive=True
	)

	output_components.append(locals()[f"captioning_row_{i}"])
	output_components.append(locals()[f"image_{i}"])
	output_components.append(locals()[f"caption_{i}"])
	caption_list.append(locals()[f"caption_{i}"])

	# 고급 설정
	with gr.Accordion("Advanced options", open=False):
	steps = gr.Number(label="Steps", value=1000, minimum=1, maximum=10000, step=1)
	lr = gr.Number(label="Learning Rate", value=4e-4, minimum=1e-6, maximum=1e-3, step=1e-6)
	rank = gr.Number(label="LoRA Rank", value=16, minimum=4, maximum=128, step=4)
	with gr.Accordion("Even more advanced options", open=False):
	use_more_advanced_options = gr.Checkbox(label="Use more advanced options", value=False)
	more_advanced_options = gr.Code(
	value="""
	device: cuda:0
	model:
	is_flux: true
	quantize: true
	network:
	linear: 16
	linear_alpha: 16
	type: lora
	sample:
	guidance_scale: 3.5
	height: 1024
	neg: ''
	sample_steps: 28
	sampler: flowmatch
	seed: 42
	walk_seed: true
	width: 1024
	save:
	dtype: float16
	hf_private: true
	max_step_saves_to_keep: 4
	push_to_hub: true
	save_every: 10000
	train:
	batch_size: 1
	dtype: bf16
	ema_config:
	ema_decay: 0.99
	use_ema: true
	gradient_accumulation_steps: 1
	gradient_checkpointing: true
	noise_scheduler: flowmatch
	optimizer: adamw8bit
	train_text_encoder: false
	train_unet: true
	""",
	language="yaml"
	)

	# 샘플 프롬프트
	with gr.Accordion("Sample prompts (optional)", visible=False) as sample:
	gr.Markdown(
	"Include sample prompts to test out your trained model. Don't forget to include your trigger word/sentence (optional)"
	)
	sample_1 = gr.Textbox(label="Test prompt 1")
	sample_2 = gr.Textbox(label="Test prompt 2")
	sample_3 = gr.Textbox(label="Test prompt 3")

	# 비용 안내
	with gr.Group(visible=False) as cost_preview:
	cost_preview_info = gr.Markdown(elem_id="cost_preview_info", elem_classes="group_padding")
	payment_update = gr.Button("I have set up a payment method", visible=False)

	# 조합 변수
	output_components.append(sample)
	output_components.append(sample_1)
	output_components.append(sample_2)
	output_components.append(sample_3)

	# 시작 버튼
	start = gr.Button("START 클릭 ('약 15-20분 후 학습이 종료되고 완료 메시지가 출력됩니다')", visible=False)

	# 진행 상태
	progress_area = gr.Markdown("")

	# 상태 변수
	dataset_folder = gr.State()

	# 이벤트 바인딩
	images.upload(
	load_captioning,
	inputs=[images, concept_sentence],
	outputs=output_components
	).then(
	update_pricing,
	inputs=[steps],
	outputs=[cost_preview, cost_preview_info, payment_update, start]
	)

	images.clear(
	hide_captioning,
	outputs=[captioning_area, cost_preview, sample, start]
	)

	images.delete(
	load_captioning,
	inputs=[images, concept_sentence],
	outputs=output_components
	).then(
	update_pricing,
	inputs=[steps],
	outputs=[cost_preview, cost_preview_info, payment_update, start]
	)

	steps.change(
	update_pricing,
	inputs=[steps],
	outputs=[cost_preview, cost_preview_info, payment_update, start]
	)

	start.click(
	fn=create_dataset,
	inputs=[images] + caption_list,
	outputs=dataset_folder
	).then(
	fn=start_training,
	inputs=[
	lora_name,
	concept_sentence,
	which_model,
	steps,
	lr,
	rank,
	dataset_folder,
	sample_1,
	sample_2,
	sample_3,
	use_more_advanced_options,
	more_advanced_options
	],
	outputs=progress_area,
	)

	do_captioning.click(
	fn=run_captioning,
	inputs=[images, concept_sentence] + caption_list,
	outputs=caption_list
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860, auth=("gini", "pick"), show_error=True)