prithivMLmods commited on
Commit
71b21b4
Β·
verified Β·
1 Parent(s): 7f0b259

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +345 -285
app.py CHANGED
@@ -1,285 +1,345 @@
1
- import os
2
- import random
3
- import uuid
4
- from typing import Tuple
5
- import gradio as gr
6
- import numpy as np
7
- from PIL import Image
8
- import spaces
9
- import torch
10
- from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
11
-
12
- def save_image(img):
13
- unique_name = str(uuid.uuid4()) + ".png"
14
- img.save(unique_name)
15
- return unique_name
16
-
17
- def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
18
- if randomize_seed:
19
- seed = random.randint(0, MAX_SEED)
20
- return seed
21
-
22
- MAX_SEED = np.iinfo(np.int32).max
23
- USE_TORCH_COMPILE = 0
24
- ENABLE_CPU_OFFLOAD = 0
25
-
26
- if torch.cuda.is_available():
27
- pipe = StableDiffusionXLPipeline.from_pretrained(
28
- "SG161222/RealVisXL_V4.0_Lightning",
29
- torch_dtype=torch.float16,
30
- use_safetensors=True,
31
- )
32
- pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
33
-
34
- LORA_OPTIONS = {
35
- "Realism (face/character)πŸ‘¦πŸ»": ("prithivMLmods/Canopus-Realism-LoRA", "Canopus-Realism-LoRA.safetensors", "rlms"),
36
- "Pixar (art/toons)πŸ™€": ("prithivMLmods/Canopus-Pixar-Art", "Canopus-Pixar-Art.safetensors", "pixar"),
37
- "Photoshoot (camera/film)πŸ“Έ": ("prithivMLmods/Canopus-Photo-Shoot-Mini-LoRA", "Canopus-Photo-Shoot-Mini-LoRA.safetensors", "photo"),
38
- "Clothing (hoodies/pant/shirts)πŸ‘”": ("prithivMLmods/Canopus-Clothing-Adp-LoRA", "Canopus-Dress-Clothing-LoRA.safetensors", "clth"),
39
- "Interior Architecture (house/hotel)🏠": ("prithivMLmods/Canopus-Interior-Architecture-0.1", "Canopus-Interior-Architecture-0.1δ.safetensors", "arch"),
40
- "Fashion Product (wearing/usable)πŸ‘œ": ("prithivMLmods/Canopus-Fashion-Product-Dilation", "Canopus-Fashion-Product-Dilation.safetensors", "fashion"),
41
- "Minimalistic Image (minimal/detailed)🏞️": ("prithivMLmods/Pegasi-Minimalist-Image-Style", "Pegasi-Minimalist-Image-Style.safetensors", "minimalist"),
42
- "Modern Clothing (trend/new)πŸ‘•": ("prithivMLmods/Canopus-Modern-Clothing-Design", "Canopus-Modern-Clothing-Design.safetensors", "mdrnclth"),
43
- "Animaliea (farm/wild)🫎": ("prithivMLmods/Canopus-Animaliea-Artism", "Canopus-Animaliea-Artism.safetensors", "Animaliea"),
44
- "Liquid Wallpaper (minimal/illustration)πŸ–ΌοΈ": ("prithivMLmods/Canopus-Liquid-Wallpaper-Art", "Canopus-Liquid-Wallpaper-Minimalize-LoRA.safetensors", "liquid"),
45
- "Canes Cars (realistic/futurecars)🚘": ("prithivMLmods/Canes-Cars-Model-LoRA", "Canes-Cars-Model-LoRA.safetensors", "car"),
46
- "Pencil Art (characteristic/creative)✏️": ("prithivMLmods/Canopus-Pencil-Art-LoRA", "Canopus-Pencil-Art-LoRA.safetensors", "Pencil Art"),
47
- "Art Minimalistic (paint/semireal)🎨": ("prithivMLmods/Canopus-Art-Medium-LoRA", "Canopus-Art-Medium-LoRA.safetensors", "mdm"),
48
-
49
- }
50
-
51
- for model_name, weight_name, adapter_name in LORA_OPTIONS.values():
52
- pipe.load_lora_weights(model_name, weight_name=weight_name, adapter_name=adapter_name)
53
- pipe.to("cuda")
54
-
55
- style_list = [
56
- {
57
- "name": "3840 x 2160",
58
- "prompt": "hyper-realistic 8K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic",
59
- "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly",
60
- },
61
- {
62
- "name": "2560 x 1440",
63
- "prompt": "hyper-realistic 4K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic",
64
- "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly",
65
- },
66
- {
67
- "name": "HD+",
68
- "prompt": "hyper-realistic 2K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic",
69
- "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly",
70
- },
71
- {
72
- "name": "Style Zero",
73
- "prompt": "{prompt}",
74
- "negative_prompt": "",
75
- },
76
- ]
77
-
78
- styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
79
-
80
- DEFAULT_STYLE_NAME = "3840 x 2160"
81
- STYLE_NAMES = list(styles.keys())
82
-
83
- def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
84
- if style_name in styles:
85
- p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
86
- else:
87
- p, n = styles[DEFAULT_STYLE_NAME]
88
-
89
- if not negative:
90
- negative = ""
91
- return p.replace("{prompt}", positive), n + negative
92
-
93
- @spaces.GPU(duration=180, enable_queue=True)
94
- def generate(
95
- prompt: str,
96
- negative_prompt: str = "",
97
- use_negative_prompt: bool = False,
98
- seed: int = 0,
99
- width: int = 1024,
100
- height: int = 1024,
101
- guidance_scale: float = 3,
102
- randomize_seed: bool = False,
103
- style_name: str = DEFAULT_STYLE_NAME,
104
- lora_model: str = "Realism (face/character)πŸ‘¦πŸ»",
105
- progress=gr.Progress(track_tqdm=True),
106
- ):
107
- seed = int(randomize_seed_fn(seed, randomize_seed))
108
-
109
- positive_prompt, effective_negative_prompt = apply_style(style_name, prompt, negative_prompt)
110
-
111
- if not use_negative_prompt:
112
- effective_negative_prompt = "" # type: ignore
113
-
114
- model_name, weight_name, adapter_name = LORA_OPTIONS[lora_model]
115
- pipe.set_adapters(adapter_name)
116
-
117
- images = pipe(
118
- prompt=positive_prompt,
119
- negative_prompt=effective_negative_prompt,
120
- width=width,
121
- height=height,
122
- guidance_scale=guidance_scale,
123
- num_inference_steps=20,
124
- num_images_per_prompt=1,
125
- cross_attention_kwargs={"scale": 0.65},
126
- output_type="pil",
127
- ).images
128
- image_paths = [save_image(img) for img in images]
129
- return image_paths, seed
130
-
131
- examples = [
132
- "realism, man in the style of dark beige and brown, uhd image, youthful protagonists, nonrepresentational",
133
- "pixar, a young man with light brown wavy hair and light brown eyes sitting in an armchair and looking directly at the camera, pixar style, disney pixar, office background",
134
- "hoodie, front view, capture a urban style, superman hoodie, technical materials, fabric small point label on text blue theory, with a raised collar, fabric is a light yellow, low angle to capture the hoodies form and detailing, f/5.6 to focus on the hoodies craftsmanship, solid grey background, studio light setting, with batman logo.",
135
- ]
136
-
137
-
138
- css = '''
139
- .gradio-container{max-width: 888px !important}
140
- h1{text-align:center}
141
- .submit-btn {
142
- background-color: #ecde2c !important;
143
- color: white !important;
144
- }
145
- .submit-btn:hover {
146
- background-color: #ffec00 !important;
147
- }
148
- '''
149
-
150
- def load_predefined_images():
151
- predefined_images = [
152
- "assets/1.png",
153
- "assets/2.png",
154
- "assets/3.png",
155
- "assets/4.png",
156
- "assets/5.png",
157
- "assets/6.png",
158
- "assets/7.png",
159
- "assets/8.png",
160
- "assets/9.png",
161
- ]
162
- return predefined_images
163
-
164
- with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
165
- with gr.Row():
166
- with gr.Column(scale=1):
167
- prompt = gr.Text(
168
- label="Prompt",
169
- show_label=False,
170
- max_lines=1,
171
- placeholder="Enter your prompt with resp. tag!",
172
- container=False,
173
- )
174
- run_button = gr.Button("Generate as (1024 x 1024)πŸŽƒ", scale=0, elem_classes="submit-btn")
175
-
176
- with gr.Row(visible=True):
177
- model_choice = gr.Dropdown(
178
- label="LoRA Selection",
179
- choices=list(LORA_OPTIONS.keys()),
180
- value="Realism (face/character)πŸ‘¦πŸ»")
181
-
182
- with gr.Accordion("Advanced options", open=True):
183
- use_negative_prompt = gr.Checkbox(label="Use negative prompt", value=True, visible=True)
184
- negative_prompt = gr.Text(
185
- label="Negative prompt",
186
- lines=4,
187
- max_lines=6,
188
- value="(deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, (mutated hands and fingers:1.4), disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation",
189
- placeholder="Enter a negative prompt",
190
- visible=True,
191
- )
192
- with gr.Row():
193
- seed = gr.Slider(
194
- label="Seed",
195
- minimum=0,
196
- maximum=MAX_SEED,
197
- step=1,
198
- value=0,
199
- visible=True
200
- )
201
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
202
-
203
- with gr.Row(visible=True):
204
- width = gr.Slider(
205
- label="Width",
206
- minimum=512,
207
- maximum=2048,
208
- step=8,
209
- value=1024,
210
- )
211
- height = gr.Slider(
212
- label="Height",
213
- minimum=512,
214
- maximum=2048,
215
- step=8,
216
- value=1024,
217
- )
218
-
219
- guidance_scale = gr.Slider(
220
- label="Guidance Scale",
221
- minimum=0.1,
222
- maximum=20.0,
223
- step=0.1,
224
- value=3.0,
225
- )
226
-
227
- style_selection = gr.Radio(
228
- show_label=True,
229
- container=True,
230
- interactive=True,
231
- choices=STYLE_NAMES,
232
- value=DEFAULT_STYLE_NAME,
233
- label="Quality Style",
234
- )
235
-
236
- with gr.Column(scale=2):
237
- result = gr.Gallery(label="Result", columns=1, preview=True, show_label=False)
238
-
239
- gr.Examples(
240
- examples=examples,
241
- inputs=prompt,
242
- outputs=[result, seed],
243
- fn=generate,
244
- cache_examples=False,
245
- )
246
-
247
- predefined_gallery = gr.Gallery(
248
- label="Image Gallery",
249
- columns=3,
250
- show_label=False,
251
- value=load_predefined_images()
252
- )
253
-
254
- use_negative_prompt.change(
255
- fn=lambda x: gr.update(visible=x),
256
- inputs=use_negative_prompt,
257
- outputs=negative_prompt,
258
- api_name=False,
259
- )
260
-
261
- gr.on(
262
- triggers=[
263
- prompt.submit,
264
- negative_prompt.submit,
265
- run_button.click,
266
- ],
267
- fn=generate,
268
- inputs=[
269
- prompt,
270
- negative_prompt,
271
- use_negative_prompt,
272
- seed,
273
- width,
274
- height,
275
- guidance_scale,
276
- randomize_seed,
277
- style_selection,
278
- model_choice,
279
- ],
280
- outputs=[result, seed],
281
- api_name="run",
282
- )
283
-
284
- if __name__ == "__main__":
285
- demo.queue(max_size=30).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+ import uuid
4
+ import json
5
+ import time
6
+ import asyncio
7
+ from threading import Thread
8
+
9
+ import gradio as gr
10
+ import spaces
11
+ import torch
12
+ import numpy as np
13
+ from PIL import Image
14
+ import edge_tts
15
+
16
+ from transformers import (
17
+ AutoModelForCausalLM,
18
+ AutoTokenizer,
19
+ TextIteratorStreamer,
20
+ Qwen2VLForConditionalGeneration,
21
+ AutoProcessor,
22
+ )
23
+ from transformers.image_utils import load_image
24
+ from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
25
+
26
+ DESCRIPTION = """
27
+ # Gen Vision πŸ’¬
28
+ """
29
+
30
+ css = '''
31
+ h1 {
32
+ text-align: center;
33
+ display: block;
34
+ }
35
+
36
+ #duplicate-button {
37
+ margin: auto;
38
+ color: #fff;
39
+ background: #1565c0;
40
+ border-radius: 100vh;
41
+ }
42
+ '''
43
+
44
+ MAX_MAX_NEW_TOKENS = 2048
45
+ DEFAULT_MAX_NEW_TOKENS = 1024
46
+ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
47
+
48
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
49
+
50
+ # ------------------------------
51
+ # Text Generation Models & TTS
52
+ # ------------------------------
53
+
54
+ # Load text-only model and tokenizer for text generation
55
+ model_id = "prithivMLmods/FastThink-0.5B-Tiny"
56
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
57
+ model = AutoModelForCausalLM.from_pretrained(
58
+ model_id,
59
+ device_map="auto",
60
+ torch_dtype=torch.bfloat16,
61
+ )
62
+ model.eval()
63
+
64
+ TTS_VOICES = [
65
+ "en-US-JennyNeural", # @tts1
66
+ "en-US-GuyNeural", # @tts2
67
+ ]
68
+
69
+ MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
70
+ processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
71
+ model_m = Qwen2VLForConditionalGeneration.from_pretrained(
72
+ MODEL_ID,
73
+ trust_remote_code=True,
74
+ torch_dtype=torch.float16
75
+ ).to("cuda").eval()
76
+
77
+ async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
78
+ """Convert text to speech using Edge TTS and save as MP3"""
79
+ communicate = edge_tts.Communicate(text, voice)
80
+ await communicate.save(output_file)
81
+ return output_file
82
+
83
+ def clean_chat_history(chat_history):
84
+ """
85
+ Filter out any chat entries whose "content" is not a string.
86
+ This helps prevent errors when concatenating previous messages.
87
+ """
88
+ cleaned = []
89
+ for msg in chat_history:
90
+ if isinstance(msg, dict) and isinstance(msg.get("content"), str):
91
+ cleaned.append(msg)
92
+ return cleaned
93
+
94
+ # ------------------------------
95
+ # New Image Generation Pipeline
96
+ # ------------------------------
97
+
98
+ MAX_SEED = np.iinfo(np.int32).max
99
+ USE_TORCH_COMPILE = False
100
+ ENABLE_CPU_OFFLOAD = False
101
+
102
+ if torch.cuda.is_available():
103
+ pipe = StableDiffusionXLPipeline.from_pretrained(
104
+ "SG161222/RealVisXL_V4.0_Lightning",
105
+ torch_dtype=torch.float16,
106
+ use_safetensors=True,
107
+ )
108
+ pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
109
+
110
+ # LoRA options with one example for each.
111
+ LORA_OPTIONS = {
112
+ "Realism": ("prithivMLmods/Canopus-Realism-LoRA", "Canopus-Realism-LoRA.safetensors", "rlms"),
113
+ "Pixar": ("prithivMLmods/Canopus-Pixar-Art", "Canopus-Pixar-Art.safetensors", "pixar"),
114
+ "Photoshoot": ("prithivMLmods/Canopus-Photo-Shoot-Mini-LoRA", "Canopus-Photo-Shoot-Mini-LoRA.safetensors", "photo"),
115
+ "Clothing": ("prithivMLmods/Canopus-Clothing-Adp-LoRA", "Canopus-Dress-Clothing-LoRA.safetensors", "clth"),
116
+ "Interior": ("prithivMLmods/Canopus-Interior-Architecture-0.1", "Canopus-Interior-Architecture-0.1Ξ΄.safetensors", "arch"),
117
+ "Fashion": ("prithivMLmods/Canopus-Fashion-Product-Dilation", "Canopus-Fashion-Product-Dilation.safetensors", "fashion"),
118
+ "Minimalistic": ("prithivMLmods/Pegasi-Minimalist-Image-Style", "Pegasi-Minimalist-Image-Style.safetensors", "minimalist"),
119
+ "Modern": ("prithivMLmods/Canopus-Modern-Clothing-Design", "Canopus-Modern-Clothing-Design.safetensors", "mdrnclth"),
120
+ "Animaliea": ("prithivMLmods/Canopus-Animaliea-Artism", "Canopus-Animaliea-Artism.safetensors", "Animaliea"),
121
+ "Wallpaper": ("prithivMLmods/Canopus-Liquid-Wallpaper-Art", "Canopus-Liquid-Wallpaper-Minimalize-LoRA.safetensors", "liquid"),
122
+ "Cars": ("prithivMLmods/Canes-Cars-Model-LoRA", "Canes-Cars-Model-LoRA.safetensors", "car"),
123
+ "PencilArt": ("prithivMLmods/Canopus-Pencil-Art-LoRA", "Canopus-Pencil-Art-LoRA.safetensors", "Pencil Art"),
124
+ "ArtMinimalistic": ("prithivMLmods/Canopus-Art-Medium-LoRA", "Canopus-Art-Medium-LoRA.safetensors", "mdm"),
125
+ }
126
+
127
+ # Load all LoRA weights
128
+ for model_name, weight_name, adapter_name in LORA_OPTIONS.values():
129
+ pipe.load_lora_weights(model_name, weight_name=weight_name, adapter_name=adapter_name)
130
+ pipe.to("cuda")
131
+
132
+ def save_image(img: Image.Image) -> str:
133
+ """Save a PIL image with a unique filename and return the path."""
134
+ unique_name = str(uuid.uuid4()) + ".png"
135
+ img.save(unique_name)
136
+ return unique_name
137
+
138
+ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
139
+ if randomize_seed:
140
+ seed = random.randint(0, MAX_SEED)
141
+ return seed
142
+
143
+ @spaces.GPU(duration=180, enable_queue=True)
144
+ def generate_image(
145
+ prompt: str,
146
+ negative_prompt: str = "",
147
+ seed: int = 0,
148
+ width: int = 1024,
149
+ height: int = 1024,
150
+ guidance_scale: float = 3.0,
151
+ randomize_seed: bool = True,
152
+ lora_model: str = "Realism",
153
+ progress=gr.Progress(track_tqdm=True),
154
+ ):
155
+ seed = int(randomize_seed_fn(seed, randomize_seed))
156
+ effective_negative_prompt = negative_prompt # Use provided negative prompt if any
157
+ model_name, weight_name, adapter_name = LORA_OPTIONS[lora_model]
158
+ pipe.set_adapters(adapter_name)
159
+ outputs = pipe(
160
+ prompt=prompt,
161
+ negative_prompt=effective_negative_prompt,
162
+ width=width,
163
+ height=height,
164
+ guidance_scale=guidance_scale,
165
+ num_inference_steps=20,
166
+ num_images_per_prompt=1,
167
+ cross_attention_kwargs={"scale": 0.65},
168
+ output_type="pil",
169
+ )
170
+ images = outputs.images
171
+ image_paths = [save_image(img) for img in images]
172
+ return image_paths, seed
173
+
174
+ # ------------------------------
175
+ # QwQ Edge Chat Interface
176
+ # ------------------------------
177
+
178
+ @spaces.GPU
179
+ def generate(
180
+ input_dict: dict,
181
+ chat_history: list[dict],
182
+ max_new_tokens: int = 1024,
183
+ temperature: float = 0.6,
184
+ top_p: float = 0.9,
185
+ top_k: int = 50,
186
+ repetition_penalty: float = 1.2,
187
+ ):
188
+ """
189
+ Generates chatbot responses with support for multimodal input, TTS, and image generation.
190
+ Special commands:
191
+ - "@tts1" or "@tts2": triggers text-to-speech.
192
+ - "@<lora_command>": triggers image generation using the new LoRA pipeline.
193
+ Available commands (case-insensitive): @realism, @pixar, @photoshoot, @clothing, @interior, @fashion,
194
+ @minimalistic, @modern, @animaliea, @wallpaper, @cars, @pencilart, @artminimalistic.
195
+ """
196
+ text = input_dict["text"]
197
+ files = input_dict.get("files", [])
198
+
199
+ # Check for image generation command based on LoRA tags.
200
+ # Build a mapping with lowercase keys.
201
+ lora_mapping = { key.lower(): key for key in LORA_OPTIONS }
202
+ for key_lower, key in lora_mapping.items():
203
+ command_tag = "@" + key_lower
204
+ if text.strip().lower().startswith(command_tag):
205
+ prompt_text = text.strip()[len(command_tag):].strip()
206
+ yield f"Generating image with {key} style..."
207
+ image_paths, used_seed = generate_image(
208
+ prompt=prompt_text,
209
+ negative_prompt="",
210
+ seed=1,
211
+ width=1024,
212
+ height=1024,
213
+ guidance_scale=3,
214
+ randomize_seed=True,
215
+ lora_model=key,
216
+ )
217
+ yield gr.Image(image_paths[0])
218
+ return
219
+
220
+ # Check for TTS command (@tts1 or @tts2)
221
+ tts_prefix = "@tts"
222
+ is_tts = any(text.strip().lower().startswith(f"{tts_prefix}{i}") for i in range(1, 3))
223
+ voice_index = next((i for i in range(1, 3) if text.strip().lower().startswith(f"{tts_prefix}{i}")), None)
224
+
225
+ if is_tts and voice_index:
226
+ voice = TTS_VOICES[voice_index - 1]
227
+ text = text.replace(f"{tts_prefix}{voice_index}", "").strip()
228
+ # Clear previous chat history for a fresh TTS request.
229
+ conversation = [{"role": "user", "content": text}]
230
+ else:
231
+ voice = None
232
+ # Remove any stray @tts tags and build the conversation history.
233
+ text = text.replace(tts_prefix, "").strip()
234
+ conversation = clean_chat_history(chat_history)
235
+ conversation.append({"role": "user", "content": text})
236
+
237
+ if files:
238
+ if len(files) > 1:
239
+ images = [load_image(image) for image in files]
240
+ elif len(files) == 1:
241
+ images = [load_image(files[0])]
242
+ else:
243
+ images = []
244
+ messages = [{
245
+ "role": "user",
246
+ "content": [
247
+ *[{"type": "image", "image": image} for image in images],
248
+ {"type": "text", "text": text},
249
+ ]
250
+ }]
251
+ prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
252
+ inputs = processor(text=[prompt], images=images, return_tensors="pt", padding=True).to("cuda")
253
+ streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
254
+ generation_kwargs = {**inputs, "streamer": streamer, "max_new_tokens": max_new_tokens}
255
+ thread = Thread(target=model_m.generate, kwargs=generation_kwargs)
256
+ thread.start()
257
+
258
+ buffer = ""
259
+ yield "Thinking..."
260
+ for new_text in streamer:
261
+ buffer += new_text
262
+ buffer = buffer.replace("<|im_end|>", "")
263
+ time.sleep(0.01)
264
+ yield buffer
265
+ else:
266
+ input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
267
+ if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
268
+ input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
269
+ gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
270
+ input_ids = input_ids.to(model.device)
271
+ streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
272
+ generation_kwargs = {
273
+ "input_ids": input_ids,
274
+ "streamer": streamer,
275
+ "max_new_tokens": max_new_tokens,
276
+ "do_sample": True,
277
+ "top_p": top_p,
278
+ "top_k": top_k,
279
+ "temperature": temperature,
280
+ "num_beams": 1,
281
+ "repetition_penalty": repetition_penalty,
282
+ }
283
+ t = Thread(target=model.generate, kwargs=generation_kwargs)
284
+ t.start()
285
+
286
+ outputs = []
287
+ for new_text in streamer:
288
+ outputs.append(new_text)
289
+ yield "".join(outputs)
290
+
291
+ final_response = "".join(outputs)
292
+ yield final_response
293
+
294
+ # If TTS was requested, convert the final response to speech.
295
+ if is_tts and voice:
296
+ output_file = asyncio.run(text_to_speech(final_response, voice))
297
+ yield gr.Audio(output_file, autoplay=True)
298
+
299
+ # ------------------------------
300
+ # Sample Examples
301
+ # ------------------------------
302
+
303
+ # The examples include a text generation example, two TTS examples, and one sample for each LoRA command.
304
+ examples = [
305
+ ["Python Program for Array Rotation"],
306
+ ["@tts1 Who is Nikola Tesla, and why did he die?"],
307
+ ["@realism A futuristic cityscape with neon lights"],
308
+ ["@pixar A whimsical scene featuring a playful robot in a vibrant setting"],
309
+ ["@photoshoot A portrait of a person with dramatic lighting"],
310
+ ["@clothing Fashionable streetwear in an urban environment"],
311
+ ["@interior A modern living room interior with minimalist design"],
312
+ ["@fashion A runway model in haute couture"],
313
+ ["@minimalistic A simple and elegant design of a serene landscape"],
314
+ ["@modern A contemporary art piece with abstract geometric shapes"],
315
+ ["@animaliea A cute animal portrait with vibrant colors"],
316
+ ["@wallpaper A scenic mountain range perfect for a desktop wallpaper"],
317
+ ["@cars A sleek sports car cruising on a city street"],
318
+ ["@pencilart A detailed pencil sketch of a historic building"],
319
+ ["@artminimalistic An artistic minimalist composition with subtle tones"],
320
+ ["@tts2 What causes rainbows to form?"],
321
+ ]
322
+
323
+ demo = gr.ChatInterface(
324
+ fn=generate,
325
+ additional_inputs=[
326
+ gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS),
327
+ gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6),
328
+ gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9),
329
+ gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50),
330
+ gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2),
331
+ ],
332
+ examples=examples,
333
+ cache_examples=False,
334
+ type="messages",
335
+ description=DESCRIPTION,
336
+ css=css,
337
+ fill_height=True,
338
+ textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"),
339
+ stop_btn="Stop Generation",
340
+ multimodal=True,
341
+ )
342
+
343
+ if __name__ == "__main__":
344
+ # To create a public link, set share=True in launch().
345
+ demo.queue(max_size=20).launch(share=True)