Spaces:

taoki
/

tiny-ja-trans-sd

Sleeping

App Files Files Community

tiny-ja-trans-sd / app.py

taoki

Upload app.py

dd99792 verified over 1 year ago

raw

history blame

4.35 kB

	# coding=utf-8
	#
	# Copyright 2024 Toshihiko Aoki
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import argparse
	import os.path

	import torch
	from diffusers import AutoPipelineForText2Image, LCMScheduler
	from llama_cpp import Llama
	import gradio as gr

	parser = argparse.ArgumentParser(description='Japanese translation and hallucinations for SD')
	parser.add_argument('--gguf_path',
	type=str,
	default=None,
	help='load gguf filepath')
	parser.add_argument('--sd_model_name',
	type=str,
	default="segmind/SSD-1B",
	help='sd model HF name')
	parser.add_argument('--sd_adapter_name',
	type=str,
	default="latent-consistency/lcm-lora-ssd-1b",
	help='sd lora adaptor HF name')
	parser.add_argument('--cpu',
	action='store_true',
	help='force use cpu (intel).')
	parser.add_argument('--share',
	action='store_true',
	help='force use cpu.')
	parser.add_argument('--openvino_path',
	type=str,
	default=None,
	help='load openvio model filepath')

	args = parser.parse_args()

	llm_model_path = args.gguf_path
	sd_model_name = args.sd_model_name
	sd_adapter_name = args.sd_adapter_name

	use_cuda = torch.cuda.is_available() and not args.cpu

	width = 512
	height = 512
	num_inference_steps = 4
	guidance_scale = 1.0

	if args.cpu:
	if args.openvino_path is None:
	if not os.path.exists('./sd-1.5-lcm-openvino'):
	from huggingface_hub import snapshot_download
	download_folder = snapshot_download(repo_id="Intel/sd-1.5-lcm-openvino")
	import shutil
	shutil.copytree(download_folder, "./sd-1.5-lcm-openvino'")
	args.openvino_path = './sd-1.5-lcm-openvino'
	else:
	args.openvino_path = './sd-1.5-lcm-openvino'
	from openvino_pipe import LatentConsistencyEngine
	pipe = LatentConsistencyEngine(
	args.openvino_path
	)
	else:
	pipe = AutoPipelineForText2Image.from_pretrained(
	sd_model_name,
	torch_dtype=torch.float16 if use_cuda else torch.float32,
	)
	pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)

	if use_cuda:
	pipe.enable_xformers_memory_efficient_attention()
	pipe.enable_model_cpu_offload()
	pipe.to("cuda")

	if sd_adapter_name is not None:
	pipe.load_lora_weights(sd_adapter_name)
	if use_cuda:
	pipe.fuse_lora()

	if llm_model_path is None:
	from huggingface_hub import hf_hub_download
	llm_model_path = hf_hub_download(
	repo_id="taoki/llm-jp-1.3b-v1.0-staircaptions-FT",
	filename="llm-jp-1.3b-v1.0_staircaptions-FT_Q4_K_S.gguf",
	)

	llm = Llama(
	model_path=llm_model_path,
	n_gpu_layers=25 if use_cuda else -1,
	)


	def ja2prompt(ja_prompt):
	response = llm(f"### Instruction:\n{ja_prompt}\n### Response:\n", max_tokens=128)
	return response['choices'][0]['text']


	def prompt2img(sd_prompt):
	return pipe(
	sd_prompt,
	num_inference_steps=num_inference_steps,
	guidance_scale=1.0,
	).images[0]


	with gr.Blocks(title="tiny sd web-ui") as demo:
	gr.Markdown(f"## Japanese translation and hallucinations for Stable Diffusion")
	with gr.Row():
	with gr.Column(scale=3):
	ja = gr.Text(label="日本語")
	translate = gr.Button("変換")
	prompt = gr.Text(label="プロンプト")
	with gr.Column(scale=2):
	result = gr.Image()
	t2i = gr.Button("生成")
	translate.click(ja2prompt, ja, prompt)
	t2i.click(prompt2img, prompt, result)

	if args.share:
	demo.launch(share=True, server_name="0.0.0.0")
	else:
	demo.launch()