ginipick commited on
Commit
e005050
ยท
verified ยท
1 Parent(s): 759747e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -37
app.py CHANGED
@@ -4,7 +4,7 @@ import json
4
  import os
5
  import re
6
  from datetime import datetime
7
- from huggingface_hub import InferenceClient
8
  import subprocess
9
  import torch
10
  from PIL import Image
@@ -13,8 +13,8 @@ from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
13
  # ์„ค์น˜ ๊ณผ์ •์€ ์ƒ๋žต ๊ฐ€๋Šฅํ•˜๋ฉฐ ํ•„์š”ํ•œ ๊ฒฝ์šฐ์—๋งŒ ์‹คํ–‰
14
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
15
 
16
- # Hugging Face ํ† ํฐ ์„ค์ •
17
- huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
18
 
19
  # ๋ฒˆ์—ญ ๋ชจ๋ธ ์ถ”๊ฐ€
20
  translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
@@ -314,19 +314,14 @@ class PromptGenerator:
314
  return f"{prompt}, {caption}"
315
  return prompt
316
 
317
- class HuggingFaceInferenceNode:
318
  def __init__(self):
319
- self.clients = {
320
- "Mixtral": InferenceClient("NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO"),
321
- "Mistral": InferenceClient("mistralai/Mistral-7B-Instruct-v0.3"),
322
- "Llama 3": InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct"),
323
- "Mistral-Nemo": InferenceClient("mistralai/Mistral-Nemo-Instruct-2407")
324
- }
325
  self.prompts_dir = "./prompts"
326
  os.makedirs(self.prompts_dir, exist_ok=True)
327
 
328
  def save_prompt(self, prompt):
329
- filename_text = "hf_" + prompt.split(',')[0].strip()
330
  filename_text = re.sub(r'[^\w\-_\. ]', '_', filename_text)
331
  filename_text = filename_text[:30]
332
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -340,7 +335,13 @@ class HuggingFaceInferenceNode:
340
 
341
  def generate(self, model, input_text, happy_talk, compress, compression_level, poster, custom_base_prompt=""):
342
  try:
343
- client = self.clients[model]
 
 
 
 
 
 
344
 
345
  default_happy_prompt = """Create a detailed visually descriptive caption of this description, which will be used as a prompt for a text to image AI system (caption only, no instructions like "create an image").Remove any mention of digital artwork or artwork style. Give detailed visual descriptions of the character(s), including ethnicity, skin tone, expression etc. Imagine using keywords for a still for someone who has aphantasia. Describe the image style, e.g. any photographic or art styles / techniques utilized. Make sure to fully describe all aspects of the cinematography, with abundant technical details and visual descriptions. If there is more than one image, combine the elements and characters from all of the images creatively into a single cohesive composition with a single background, inventing an interaction between the characters. Be creative in combining the characters into a single cohesive scene. Focus on two primary characters (or one) and describe an interesting interaction between them, such as a hug, a kiss, a fight, giving an object, an emotional reaction / interaction. If there is more than one background in the images, pick the most appropriate one. Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph. If you feel the images are inappropriate, invent a new scene / characters inspired by these. Additionally, incorporate a specific movie director's visual style and describe the lighting setup in detail, including the type, color, and placement of light sources to create the desired mood and atmosphere. Always frame the scene, including details about the film grain, color grading, and any artifacts or characteristics specific."""
346
 
@@ -372,24 +373,46 @@ You are allowed to make up film and branding names, and do them like 80's, 90's
372
  char_limit = compression_chars[compression_level]
373
  base_prompt += f" Compress the output to be concise while retaining key visual details. MAX OUTPUT SIZE no more than {char_limit} characters."
374
 
375
- messages = f"system\nYou are a helpful assistant. Try your best to give best response possible to user."
376
- messages += f"\nuser\n{base_prompt}\nDescription: {input_text}\nassistant\n"
377
-
378
- stream = client.text_generation(messages, max_new_tokens=4000, do_sample=True, stream=True, details=True, return_full_text=False)
379
- output = ""
380
- for response in stream:
381
- if not response.token.text == "":
382
- output += response.token.text
383
-
384
- # Remove specific tokens based on the model
385
- if model == "Llama 3":
386
- output = output.rstrip("")
387
- elif model == "Mistral":
388
- output = output.rstrip("</s>")
389
- elif model == "Mistral-Nemo":
390
- output = output.rstrip("</s>")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
 
392
- # Clean up the output
 
 
 
393
  if ": " in output:
394
  output = output.split(": ", 1)[1].strip()
395
  elif output.lower().startswith("here"):
@@ -411,7 +434,7 @@ footer {
411
 
412
  def create_interface():
413
  prompt_generator = PromptGenerator()
414
- huggingface_node = HuggingFaceInferenceNode()
415
 
416
  with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
417
 
@@ -473,14 +496,15 @@ def create_interface():
473
  clip_g_output = gr.Textbox(label="CLIP G Output", visible=True)
474
 
475
  with gr.Column(scale=2):
476
- with gr.Accordion("Prompt Generation with LLM", open=False):
477
- model = gr.Dropdown(["Mixtral", "Mistral", "Llama 3", "Mistral-Nemo"], label="Model", value="Llama 3")
 
478
  happy_talk = gr.Checkbox(label="Happy Talk", value=True)
479
  compress = gr.Checkbox(label="Compress", value=True)
480
  compression_level = gr.Radio(["soft", "medium", "hard"], label="Compression Level", value="hard")
481
  poster = gr.Checkbox(label="Poster", value=False)
482
  custom_base_prompt = gr.Textbox(label="Custom Base Prompt", lines=5)
483
- generate_text_button = gr.Button("Generate Prompt with LLM")
484
  text_output = gr.Textbox(label="Generated Text", lines=10)
485
 
486
  def create_caption(image):
@@ -498,7 +522,7 @@ def create_interface():
498
  prompt_generator.generate_prompt,
499
  inputs=[seed, custom, subject, artform, photo_type, body_types, default_tags, roles, hairstyles,
500
  additional_details, photography_styles, device, photographer, artist, digital_artform,
501
- place, lighting, clothing, composition, pose, background],
502
  outputs=[output, gr.Number(visible=False), t5xxl_output, clip_l_output, clip_g_output]
503
  )
504
 
@@ -509,7 +533,7 @@ def create_interface():
509
  )
510
 
511
  generate_text_button.click(
512
- huggingface_node.generate,
513
  inputs=[model, output, happy_talk, compress, compression_level, poster, custom_base_prompt],
514
  outputs=text_output
515
  )
@@ -551,5 +575,4 @@ def create_interface():
551
 
552
  if __name__ == "__main__":
553
  demo = create_interface()
554
- demo.launch()
555
-
 
4
  import os
5
  import re
6
  from datetime import datetime
7
+ from openai import OpenAI
8
  import subprocess
9
  import torch
10
  from PIL import Image
 
13
  # ์„ค์น˜ ๊ณผ์ •์€ ์ƒ๋žต ๊ฐ€๋Šฅํ•˜๋ฉฐ ํ•„์š”ํ•œ ๊ฒฝ์šฐ์—๋งŒ ์‹คํ–‰
14
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
15
 
16
+ # OpenAI ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™”
17
+ client = OpenAI()
18
 
19
  # ๋ฒˆ์—ญ ๋ชจ๋ธ ์ถ”๊ฐ€
20
  translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
 
314
  return f"{prompt}, {caption}"
315
  return prompt
316
 
317
+ class OpenAIGenerationNode:
318
  def __init__(self):
319
+ self.client = OpenAI()
 
 
 
 
 
320
  self.prompts_dir = "./prompts"
321
  os.makedirs(self.prompts_dir, exist_ok=True)
322
 
323
  def save_prompt(self, prompt):
324
+ filename_text = "openai_" + prompt.split(',')[0].strip()
325
  filename_text = re.sub(r'[^\w\-_\. ]', '_', filename_text)
326
  filename_text = filename_text[:30]
327
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
 
335
 
336
  def generate(self, model, input_text, happy_talk, compress, compression_level, poster, custom_base_prompt=""):
337
  try:
338
+ # ๋ชจ๋ธ ๋งคํ•‘ - OpenAI๋Š” ๋ชจ๋ธ์ด ๋‹ค๋ฅด๋ฏ€๋กœ ์ ์ ˆํžˆ ๋Œ€์ฒด
339
+ model_mapping = {
340
+
341
+ "gpt-4.1": "gpt-4.1-mini"
342
+ }
343
+
344
+ openai_model = model_mapping.get(model, "gpt-4.1-mini") # ๊ธฐ๋ณธ๊ฐ’์€ gpt-4.1-mini
345
 
346
  default_happy_prompt = """Create a detailed visually descriptive caption of this description, which will be used as a prompt for a text to image AI system (caption only, no instructions like "create an image").Remove any mention of digital artwork or artwork style. Give detailed visual descriptions of the character(s), including ethnicity, skin tone, expression etc. Imagine using keywords for a still for someone who has aphantasia. Describe the image style, e.g. any photographic or art styles / techniques utilized. Make sure to fully describe all aspects of the cinematography, with abundant technical details and visual descriptions. If there is more than one image, combine the elements and characters from all of the images creatively into a single cohesive composition with a single background, inventing an interaction between the characters. Be creative in combining the characters into a single cohesive scene. Focus on two primary characters (or one) and describe an interesting interaction between them, such as a hug, a kiss, a fight, giving an object, an emotional reaction / interaction. If there is more than one background in the images, pick the most appropriate one. Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph. If you feel the images are inappropriate, invent a new scene / characters inspired by these. Additionally, incorporate a specific movie director's visual style and describe the lighting setup in detail, including the type, color, and placement of light sources to create the desired mood and atmosphere. Always frame the scene, including details about the film grain, color grading, and any artifacts or characteristics specific."""
347
 
 
373
  char_limit = compression_chars[compression_level]
374
  base_prompt += f" Compress the output to be concise while retaining key visual details. MAX OUTPUT SIZE no more than {char_limit} characters."
375
 
376
+ # OpenAI API ์š”์ฒญ ํฌ๋งท ์‚ฌ์šฉ
377
+ response = client.responses.create(
378
+ model=openai_model,
379
+ input=[
380
+ {
381
+ "role": "system",
382
+ "content": [
383
+ {
384
+ "type": "input_text",
385
+ "text": "You are a helpful assistant. Try your best to give best response possible to user."
386
+ }
387
+ ]
388
+ },
389
+ {
390
+ "role": "user",
391
+ "content": [
392
+ {
393
+ "type": "input_text",
394
+ "text": f"{base_prompt}\nDescription: {input_text}"
395
+ }
396
+ ]
397
+ }
398
+ ],
399
+ text={
400
+ "format": {
401
+ "type": "text"
402
+ }
403
+ },
404
+ reasoning={},
405
+ tools=[],
406
+ temperature=1,
407
+ max_output_tokens=2048,
408
+ top_p=1,
409
+ store=True
410
+ )
411
 
412
+ # ์‘๋‹ต ์ถ”์ถœ
413
+ output = response.content[0].text
414
+
415
+ # ์ถœ๋ ฅ ์ •๋ฆฌ
416
  if ": " in output:
417
  output = output.split(": ", 1)[1].strip()
418
  elif output.lower().startswith("here"):
 
434
 
435
  def create_interface():
436
  prompt_generator = PromptGenerator()
437
+ openai_node = OpenAIGenerationNode()
438
 
439
  with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
440
 
 
496
  clip_g_output = gr.Textbox(label="CLIP G Output", visible=True)
497
 
498
  with gr.Column(scale=2):
499
+ with gr.Accordion("Prompt Generation with OpenAI", open=True):
500
+ model = gr.Dropdown(["Mixtral", "Mistral", "Llama 3", "Mistral-Nemo"], label="Model Type", value="Llama 3")
501
+ gr.HTML("<small>Note: Mixtral โ†’ gpt-4, Mistral โ†’ gpt-4-turbo, Llama 3 โ†’ gpt-4.1-mini, Mistral-Nemo โ†’ gpt-4-turbo</small>")
502
  happy_talk = gr.Checkbox(label="Happy Talk", value=True)
503
  compress = gr.Checkbox(label="Compress", value=True)
504
  compression_level = gr.Radio(["soft", "medium", "hard"], label="Compression Level", value="hard")
505
  poster = gr.Checkbox(label="Poster", value=False)
506
  custom_base_prompt = gr.Textbox(label="Custom Base Prompt", lines=5)
507
+ generate_text_button = gr.Button("Generate Prompt with OpenAI")
508
  text_output = gr.Textbox(label="Generated Text", lines=10)
509
 
510
  def create_caption(image):
 
522
  prompt_generator.generate_prompt,
523
  inputs=[seed, custom, subject, artform, photo_type, body_types, default_tags, roles, hairstyles,
524
  additional_details, photography_styles, device, photographer, artist, digital_artform,
525
+ place, lighting, clothing, composition, pose, background, input_image],
526
  outputs=[output, gr.Number(visible=False), t5xxl_output, clip_l_output, clip_g_output]
527
  )
528
 
 
533
  )
534
 
535
  generate_text_button.click(
536
+ openai_node.generate,
537
  inputs=[model, output, happy_talk, compress, compression_level, poster, custom_base_prompt],
538
  outputs=text_output
539
  )
 
575
 
576
  if __name__ == "__main__":
577
  demo = create_interface()
578
+ demo.launch()