Spaces:

prithivMLmods
/

Text-to-Image-StableHamster

Running on Zero

App Files Files Community

prithivMLmods commited on Mar 23

Commit

78be7e8

verified ·

1 Parent(s): f4bb0af

Update app.py

Browse files

Files changed (1) hide show

app.py +200 -242

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import uuid
 import time
 import asyncio
 from threading import Thread
 import gradio as gr
 import spaces
@@ -12,6 +13,7 @@ import numpy as np
 from PIL import Image
 import cv2
 from transformers import (
     AutoModelForCausalLM,
     AutoTokenizer,
@@ -20,179 +22,26 @@ from transformers import (
     AutoProcessor,
 )
 from transformers.image_utils import load_image
-from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
 # ---------------------------
-# Global Settings & Utilities
 # ---------------------------
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-def save_image(img: Image.Image) -> str:
-    """Save a PIL image with a unique filename and return the path."""
-    unique_name = str(uuid.uuid4()) + ".png"
-    img.save(unique_name)
-    return unique_name
-def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
-    MAX_SEED = np.iinfo(np.int32).max
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    return seed
-def progress_bar_html(label: str) -> str:
-    """Returns an HTML snippet for a thin progress bar with a label."""
-    return f'''
-<div style="display: flex; align-items: center;">
-    <span style="margin-right: 10px; font-size: 14px;">{label}</span>
-    <div style="width: 110px; height: 5px; background-color: #FFF0F5; border-radius: 2px; overflow: hidden;">
-        <div style="width: 100%; height: 100%; background-color: #FF69B4; animation: loading 1.5s linear infinite;"></div>
-    </div>
-</div>
-<style>
-@keyframes loading {{
-    0% {{ transform: translateX(-100%); }}
-    100% {{ transform: translateX(100%); }}
-}}
-</style>
-    '''
-# Helper function for the chat interface
-def apply_chat_template_for_text(conversation, add_generation_prompt=True):
-    """
-    Concatenates a conversation (list of dict with keys "role" and "content")
-    into a single string prompt. If add_generation_prompt is True, appends "assistant:".
-    """
-    prompt = ""
-    for msg in conversation:
-        prompt += f"{msg['role']}: {msg['content']}\n"
-    if add_generation_prompt:
-        prompt += "assistant:"
-    return prompt
-def clean_chat_history(chat_history):
-    """
-    Filter out any chat entries whose "content" is not a string.
-    """
-    cleaned = []
-    for msg in chat_history:
-        if isinstance(msg, dict) and isinstance(msg.get("content"), str):
-            cleaned.append(msg)
-    return cleaned
 # ---------------------------
-# 1. Chat Interface Tab
 # ---------------------------
-# Uses a text-only model: DeepHermes-3-Llama-3-3B-Preview-abliterated
-model_id_text = "prithivMLmods/DeepHermes-3-Llama-3-3B-Preview-abliterated"
-tokenizer = AutoTokenizer.from_pretrained(model_id_text)
-model = AutoModelForCausalLM.from_pretrained(
-    model_id_text,
-    device_map="auto",
-    torch_dtype=torch.bfloat16,
-)
-model.eval()
-@spaces.GPU
-def chat_generate(input_text: str, chat_history: list, max_new_tokens: int, temperature: float, top_p: float, top_k: int, repetition_penalty: float):
-    """
-    Chat generation using a text-only model.
-    """
-    # Prepare conversation by cleaning history and appending the new user message.
-    conversation = clean_chat_history(chat_history)
-    conversation.append({"role": "user", "content": input_text})
-    # Instead of tokenizer.apply_chat_template, we use our helper to generate a prompt.
-    prompt_text = apply_chat_template_for_text(conversation, add_generation_prompt=True)
-    input_ids = tokenizer(prompt_text, return_tensors="pt").input_ids
-    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
-        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
-    input_ids = input_ids.to(model.device)
-    streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
-    generation_kwargs = {
-        "input_ids": input_ids,
-        "streamer": streamer,
-        "max_new_tokens": max_new_tokens,
-        "do_sample": True,
-        "top_p": top_p,
-        "top_k": top_k,
-        "temperature": temperature,
-        "num_beams": 1,
-        "repetition_penalty": repetition_penalty,
-    }
-    thread = Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
-    outputs = []
-    # Collect the generated text from the streamer.
-    for new_text in streamer:
-        outputs.append(new_text)
-    final_response = "".join(outputs)
-    # Append assistant reply to conversation.
-    updated_history = conversation + [{"role": "assistant", "content": final_response}]
-    return final_response, updated_history
-# ---------------------------
-# 2. Qwen 2 VL OCR Tab
-# ---------------------------
-# Uses Qwen2VL OCR model for multimodal input (text + image)
-MODEL_ID_QWEN = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
-processor = AutoProcessor.from_pretrained(MODEL_ID_QWEN, trust_remote_code=True)
-model_m = Qwen2VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_QWEN,
-    trust_remote_code=True,
-    torch_dtype=torch.float16
-).to("cuda").eval()
-@spaces.GPU
-def generate_qwen_ocr(input_text: str, image):
-    """
-    Uses the Qwen2VL OCR model to process an image along with text.
-    """
-    if image is None:
-        return "No image provided."
-    # Build message with system and user content.
-    messages = [
-        {"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
-        {"role": "user", "content": [{"type": "text", "text": input_text}, {"type": "image", "image": image}]}
-    ]
-    # Use the processor's chat template.
-    prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    inputs = processor(text=[prompt_full], images=[image], return_tensors="pt", padding=True).to("cuda")
-    streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
-    generation_kwargs = {
-        **inputs,
-        "streamer": streamer,
-        "max_new_tokens": DEFAULT_MAX_NEW_TOKENS,
-        "do_sample": True,
-        "temperature": 0.6,
-        "top_p": 0.9,
-        "top_k": 50,
-        "repetition_penalty": 1.2,
-    }
-    thread = Thread(target=model_m.generate, kwargs=generation_kwargs)
-    thread.start()
-    outputs = []
-    for new_text in streamer:
-        outputs.append(new_text.replace("<|im_end|>", ""))
-    final_response = "".join(outputs)
-    return final_response
-# ---------------------------
-# 3. Image Gen LoRA Tab
-# ---------------------------
-# Uses the SDXL pipeline with LoRA options.
-MODEL_ID_SD = os.getenv("MODEL_VAL_PATH")  # set your SDXL model path via env variable
-MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
-USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
-ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
-BATCH_SIZE = int(os.getenv("BATCH_SIZE", "1"))
 sd_pipe = StableDiffusionXLPipeline.from_pretrained(
     MODEL_ID_SD,
     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
@@ -202,12 +51,26 @@ sd_pipe = StableDiffusionXLPipeline.from_pretrained(
 sd_pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(sd_pipe.scheduler.config)
 if torch.cuda.is_available():
     sd_pipe.text_encoder = sd_pipe.text_encoder.half()
 if USE_TORCH_COMPILE:
     sd_pipe.compile()
 if ENABLE_CPU_OFFLOAD:
     sd_pipe.enable_model_cpu_offload()
-# LoRA options dictionary.
 LORA_OPTIONS = {
     "Realism (face/character)👦🏻": ("prithivMLmods/Canopus-Realism-LoRA", "Canopus-Realism-LoRA.safetensors", "rlms"),
     "Pixar (art/toons)🙀": ("prithivMLmods/Canopus-Pixar-Art", "Canopus-Pixar-Art.safetensors", "pixar"),
@@ -224,7 +87,6 @@ LORA_OPTIONS = {
     "Art Minimalistic (paint/semireal)🎨": ("prithivMLmods/Canopus-Art-Medium-LoRA", "Canopus-Art-Medium-LoRA.safetensors", "mdm"),
 }
-# Style options.
 style_list = [
     {
         "name": "3840 x 2160",
@@ -251,102 +113,198 @@ styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
 DEFAULT_STYLE_NAME = "3840 x 2160"
 STYLE_NAMES = list(styles.keys())
-def apply_style(style_name: str, positive: str, negative: str = ""):
     if style_name in styles:
         p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
     else:
         p, n = styles[DEFAULT_STYLE_NAME]
-    return p.replace("{prompt}", positive), n + (negative if negative else "")
-@spaces.GPU
-def generate_image_lora(prompt: str, negative_prompt: str, use_negative_prompt: bool, seed: int, width: int, height: int, guidance_scale: float, randomize_seed: bool, style_name: str, lora_model: str):
     seed = int(randomize_seed_fn(seed, randomize_seed))
     positive_prompt, effective_negative_prompt = apply_style(style_name, prompt, negative_prompt)
     if not use_negative_prompt:
         effective_negative_prompt = ""
-    # Set the desired LoRA adapter.
     model_name, weight_name, adapter_name = LORA_OPTIONS[lora_model]
-    sd_pipe.set_adapters(adapter_name)
-    # Generate image(s)
-    options = {
-        "prompt": [positive_prompt],
-        "negative_prompt": [effective_negative_prompt],
-        "width": width,
-        "height": height,
-        "guidance_scale": guidance_scale,
-        "num_inference_steps": 20,
-        "num_images_per_prompt": 1,
-        "cross_attention_kwargs": {"scale": 0.65},
-        "output_type": "pil",
-    }
-    outputs = sd_pipe(**options)
-    images = outputs.images
-    image_paths = [save_image(img) for img in images]
     return image_paths, seed
 # ---------------------------
-# Build Gradio Interface with Three Tabs
 # ---------------------------
-with gr.Blocks(css=".gradio-container {max-width: 900px; margin: auto;}") as demo:
-    gr.Markdown("## Multi-Functional Demo: Chat Interface | Qwen 2 VL OCR | Image Gen LoRA")
-    with gr.Tabs():
-        # Tab 1: Chat Interface
-        with gr.Tab("Chat Interface"):
-            chat_output = gr.Chatbot(label="Chat Conversation")
-            with gr.Row():
-                chat_inp = gr.Textbox(label="Enter your message", placeholder="Type your message here...", lines=2)
-                send_btn = gr.Button("Send")
-            with gr.Row():
-                max_tokens_slider = gr.Slider(label="Max New Tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
-                temperature_slider = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6)
-                top_p_slider = gr.Slider(label="Top-p", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
-                top_k_slider = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
-                rep_penalty_slider = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2)
-            state = gr.State([])
-            def chat_step(user_message, history, max_tokens, temp, top_p, top_k, rep_penalty):
-                response, updated_history = chat_generate(user_message, history, max_tokens, temp, top_p, top_k, rep_penalty)
-                return updated_history, updated_history
-            send_btn.click(chat_step,
-                           inputs=[chat_inp, state, max_tokens_slider, temperature_slider, top_p_slider, top_k_slider, rep_penalty_slider],
-                           outputs=[chat_output, state])
-            chat_inp.submit(chat_step,
-                            inputs=[chat_inp, state, max_tokens_slider, temperature_slider, top_p_slider, top_k_slider, rep_penalty_slider],
-                            outputs=[chat_output, state])
-        # Tab 2: Qwen 2 VL OCR
-        with gr.Tab("Qwen 2 VL OCR"):
-            gr.Markdown("Upload an image and enter a prompt. The model will return OCR/extraction or descriptive text from the image.")
-            ocr_inp = gr.Textbox(label="Enter prompt", placeholder="Describe what you want to extract...", lines=2)
-            image_inp = gr.Image(label="Upload Image", type="pil")
-            ocr_output = gr.Textbox(label="Output", placeholder="Model output will appear here...", lines=5)
-            ocr_btn = gr.Button("Run Qwen 2 VL OCR")
-            ocr_btn.click(generate_qwen_ocr, inputs=[ocr_inp, image_inp], outputs=ocr_output)
-        # Tab 3: Image Gen LoRA
-        with gr.Tab("Image Gen LoRA"):
-            gr.Markdown("Generate images with SDXL using various LoRA models and quality styles.")
-            with gr.Row():
-                prompt_img = gr.Textbox(label="Prompt", placeholder="Enter prompt for image generation...", lines=2)
-                negative_prompt_img = gr.Textbox(label="Negative Prompt", placeholder="(optional) negative prompt", lines=2)
-            use_neg_checkbox = gr.Checkbox(label="Use Negative Prompt", value=True)
-            with gr.Row():
-                seed_slider = gr.Slider(label="Seed", minimum=0, maximum=np.iinfo(np.int32).max, step=1, value=0)
-                randomize_seed_checkbox = gr.Checkbox(label="Randomize Seed", value=True)
-            with gr.Row():
-                width_slider = gr.Slider(label="Width", minimum=512, maximum=2048, step=8, value=1024)
-                height_slider = gr.Slider(label="Height", minimum=512, maximum=2048, step=8, value=1024)
-            guidance_slider = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=20.0, step=0.1, value=3.0)
-            style_radio = gr.Radio(label="Quality Style", choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME)
-            lora_dropdown = gr.Dropdown(label="LoRA Selection", choices=list(LORA_OPTIONS.keys()), value="Realism (face/character)👦🏻")
-            img_output = gr.Gallery(label="Generated Images", columns=1, preview=True)
-            seed_output = gr.Number(label="Used Seed")
-            run_img_btn = gr.Button("Generate Image")
-            run_img_btn.click(generate_image_lora,
-                              inputs=[prompt_img, negative_prompt_img, use_neg_checkbox, seed_slider, width_slider, height_slider, guidance_slider, randomize_seed_checkbox, style_radio, lora_dropdown],
-                              outputs=[img_output, seed_output])
 if __name__ == "__main__":
     demo.queue(max_size=20).launch(share=True)

 import time
 import asyncio
 from threading import Thread
+from typing import Tuple
 import gradio as gr
 import spaces
 from PIL import Image
 import cv2
+from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
 from transformers import (
     AutoModelForCausalLM,
     AutoTokenizer,
     AutoProcessor,
 )
 from transformers.image_utils import load_image
 # ---------------------------
+# Global Settings and Devices
 # ---------------------------
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+MAX_SEED = np.iinfo(np.int32).max
 # ---------------------------
+# IMAGE GEN LO_RA TAB: SDXL Gen with LoRA Options
 # ---------------------------
+# Load the SDXL pipeline
+MODEL_ID_SD = os.getenv("MODEL_VAL_PATH")  # Path from env variable
+if MODEL_ID_SD is None:
+    MODEL_ID_SD = "SG161222/RealVisXL_V4.0_Lightning"  # default fallback
+# Load SDXL pipeline (use GPU if available)
 sd_pipe = StableDiffusionXLPipeline.from_pretrained(
     MODEL_ID_SD,
     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
 sd_pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(sd_pipe.scheduler.config)
 if torch.cuda.is_available():
     sd_pipe.text_encoder = sd_pipe.text_encoder.half()
+# Optional: compile or offload if desired
+USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
+ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
 if USE_TORCH_COMPILE:
     sd_pipe.compile()
 if ENABLE_CPU_OFFLOAD:
     sd_pipe.enable_model_cpu_offload()
+def save_image(img: Image.Image) -> str:
+    unique_name = str(uuid.uuid4()) + ".png"
+    img.save(unique_name)
+    return unique_name
+def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    return seed
+# LoRA options and style definitions
 LORA_OPTIONS = {
     "Realism (face/character)👦🏻": ("prithivMLmods/Canopus-Realism-LoRA", "Canopus-Realism-LoRA.safetensors", "rlms"),
     "Pixar (art/toons)🙀": ("prithivMLmods/Canopus-Pixar-Art", "Canopus-Pixar-Art.safetensors", "pixar"),
     "Art Minimalistic (paint/semireal)🎨": ("prithivMLmods/Canopus-Art-Medium-LoRA", "Canopus-Art-Medium-LoRA.safetensors", "mdm"),
 }
 style_list = [
     {
         "name": "3840 x 2160",
 DEFAULT_STYLE_NAME = "3840 x 2160"
 STYLE_NAMES = list(styles.keys())
+def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
     if style_name in styles:
         p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
     else:
         p, n = styles[DEFAULT_STYLE_NAME]
+    return p.replace("{prompt}", positive), n + negative
+@spaces.GPU(duration=180, enable_queue=True)
+def generate_image_lora(
+    prompt: str,
+    negative_prompt: str = "",
+    use_negative_prompt: bool = True,
+    seed: int = 0,
+    width: int = 1024,
+    height: int = 1024,
+    guidance_scale: float = 3,
+    randomize_seed: bool = False,
+    style_name: str = DEFAULT_STYLE_NAME,
+    lora_model: str = "Realism (face/character)👦🏻",
+    progress=gr.Progress(track_tqdm=True),
+):
     seed = int(randomize_seed_fn(seed, randomize_seed))
     positive_prompt, effective_negative_prompt = apply_style(style_name, prompt, negative_prompt)
     if not use_negative_prompt:
         effective_negative_prompt = ""
+    # Set LoRA adapter based on selection
     model_name, weight_name, adapter_name = LORA_OPTIONS[lora_model]
+    sd_pipe.load_lora_weights(model_name, weight_name=weight_name, adapter_name=adapter_name)
+    sd_pipe.to(device)
+    outputs = sd_pipe(
+        prompt=positive_prompt,
+        negative_prompt=effective_negative_prompt,
+        width=width,
+        height=height,
+        guidance_scale=guidance_scale,
+        num_inference_steps=20,
+        num_images_per_prompt=1,
+        cross_attention_kwargs={"scale": 0.65},
+        output_type="pil",
+    )
+    image_paths = [save_image(img) for img in outputs.images]
     return image_paths, seed
 # ---------------------------
+# Qwen 2 VL OCR TAB
 # ---------------------------
+MODEL_ID_QWEN = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
+processor = AutoProcessor.from_pretrained(MODEL_ID_QWEN, trust_remote_code=True)
+model_m = Qwen2VLForConditionalGeneration.from_pretrained(
+    MODEL_ID_QWEN,
+    trust_remote_code=True,
+    torch_dtype=torch.float16
+).to("cuda" if torch.cuda.is_available() else "cpu").eval()
+@spaces.GPU
+def qwen2vl_ocr_generate(
+    prompt: str,
+    file: list,
+    max_new_tokens: int = 1024,
+    temperature: float = 0.6,
+    top_p: float = 0.9,
+    top_k: int = 50,
+    repetition_penalty: float = 1.2,
+):
+    # In this tab, we assume the user supplies an image (or multiple images) for OCR.
+    images = []
+    if file:
+        # load image(s) using the helper function
+        for f in file:
+            images.append(load_image(f))
+    else:
+        # If no image provided, use an empty list
+        images = []
+    # Build message content: We use a simple chat template with text and images.
+    messages = [{
+        "role": "user",
+        "content": [
+            *[{"type": "image", "image": image} for image in images],
+            {"type": "text", "text": prompt},
+        ]
+    }]
+    prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    inputs = processor(text=[prompt_full], images=images, return_tensors="pt", padding=True).to("cuda" if torch.cuda.is_available() else "cpu")
+    # Use non-streaming generation for simplicity
+    output_ids = model_m.generate(
+        **inputs,
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
+        temperature=temperature,
+        top_p=top_p,
+        top_k=top_k,
+        repetition_penalty=repetition_penalty,
+    )
+    final_response = processor.tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    return final_response
+# ---------------------------
+# CHAT INTERFACE TAB (Text-only)
+# ---------------------------
+# Load text-only model and tokenizer
+model_id_text = "prithivMLmods/FastThink-0.5B-Tiny"
+tokenizer = AutoTokenizer.from_pretrained(model_id_text)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id_text,
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+)
+model.eval()
+def chat_generate(prompt: str, max_new_tokens: int = 1024, temperature: float = 0.6,
+                  top_p: float = 0.9, top_k: int = 50, repetition_penalty: float = 1.2):
+    # For simplicity, use a basic generate without streaming.
+    input_ids = tokenizer.encode(prompt, return_tensors="pt")
+    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
+        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
+    input_ids = input_ids.to(model.device)
+    output_ids = model.generate(
+        input_ids=input_ids,
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
+        temperature=temperature,
+        top_p=top_p,
+        top_k=top_k,
+        repetition_penalty=repetition_penalty,
+    )
+    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    return response
+# ---------------------------
+# GRADIO INTERFACE WITH TABS
+# ---------------------------
+with gr.Blocks(title="Multi-Modal Playground") as demo:
+    gr.Markdown("# Multi-Modal Playground")
+    with gr.Tab("Image Gen LoRA"):
+        gr.Markdown("## Generate Images using SDXL + LoRA")
+        with gr.Row():
+            prompt_img = gr.Textbox(label="Prompt", placeholder="Enter your image prompt here")
+            negative_prompt_img = gr.Textbox(label="Negative Prompt", placeholder="Enter negative prompt (optional)", lines=2)
+        with gr.Row():
+            use_negative = gr.Checkbox(label="Use Negative Prompt", value=True)
+            seed_img = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
+            randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
+        with gr.Row():
+            width_img = gr.Slider(label="Width", minimum=512, maximum=2048, step=8, value=1024)
+            height_img = gr.Slider(label="Height", minimum=512, maximum=2048, step=8, value=1024)
+        with gr.Row():
+            guidance_scale_img = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=20.0, step=0.1, value=3.0)
+        with gr.Row():
+            style_selection = gr.Radio(choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME, label="Quality Style")
+            lora_selection = gr.Dropdown(choices=list(LORA_OPTIONS.keys()), value="Realism (face/character)👦🏻", label="LoRA Selection")
+        run_img = gr.Button("Generate Image")
+        gallery = gr.Gallery(label="Generated Images", columns=1).style(full_width=True)
+        output_seed = gr.Number(label="Seed Used")
+        run_img.click(
+            generate_image_lora,
+            inputs=[prompt_img, negative_prompt_img, use_negative, seed_img, width_img, height_img, guidance_scale_img,
+                    randomize_seed, style_selection, lora_selection],
+            outputs=[gallery, output_seed]
+        )
+    with gr.Tab("Qwen 2 VL OCR"):
+        gr.Markdown("## Extract and Generate Text from Images (OCR)")
+        with gr.Row():
+            prompt_ocr = gr.Textbox(label="OCR Prompt", placeholder="Enter instructions for OCR/text extraction")
+        file_ocr = gr.File(label="Upload Image", file_types=["image"], file_count="multiple")
+        run_ocr = gr.Button("Run OCR")
+        output_ocr = gr.Textbox(label="OCR Output")
+        run_ocr.click(
+            qwen2vl_ocr_generate,
+            inputs=[prompt_ocr, file_ocr],
+            outputs=output_ocr
+        )
+    with gr.Tab("Chat Interface"):
+        gr.Markdown("## Chat with the Text-Only Model")
+        chat_input = gr.Textbox(label="Enter your message", placeholder="Say something...")
+        max_tokens_chat = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
+        temperature_chat = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6)
+        top_p_chat = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
+        top_k_chat = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
+        rep_penalty_chat = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2)
+        run_chat = gr.Button("Send")
+        chat_output = gr.Textbox(label="Response")
+        run_chat.click(
+            chat_generate,
+            inputs=[chat_input, max_tokens_chat, temperature_chat, top_p_chat, top_k_chat, rep_penalty_chat],
+            outputs=chat_output
+        )
+    gr.Markdown("**Adjust parameters in each tab as needed.**")
 if __name__ == "__main__":
     demo.queue(max_size=20).launch(share=True)