linoyts HF Staff commited on
Commit
38fcaae
·
verified ·
1 Parent(s): 9d1698f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -53
app.py CHANGED
@@ -12,7 +12,7 @@ from huggingface_hub import InferenceClient
12
  import math
13
 
14
  # --- Prompt Enhancement using Hugging Face InferenceClient ---
15
- def polish_prompt_hf(original_prompt, system_prompt):
16
  """
17
  Rewrites the prompt using a Hugging Face InferenceClient.
18
  """
@@ -25,19 +25,52 @@ def polish_prompt_hf(original_prompt, system_prompt):
25
  try:
26
  # Initialize the client
27
  client = InferenceClient(
28
- provider="cerebras",
29
  api_key=api_key,
30
  )
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  # Format the messages for the chat completions API
33
  messages = [
34
  {"role": "system", "content": system_prompt},
35
- {"role": "user", "content": original_prompt}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  ]
37
 
 
38
  # Call the API
39
  completion = client.chat.completions.create(
40
- model="Qwen/Qwen3-235B-A22B-Instruct-2507",
41
  messages=messages,
42
  )
43
 
@@ -70,58 +103,96 @@ def polish_prompt(prompt, img):
70
  Main function to polish prompts for image editing using HF inference.
71
  """
72
  SYSTEM_PROMPT = '''
73
- # Edit Instruction Rewriter
74
- You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable professional-level edit instruction based on the user-provided instruction and the image to be edited.
 
75
 
76
  Please strictly follow the rewriting rules below:
77
 
78
  ## 1. General Principles
79
- - Keep the rewritten prompt **concise**. Avoid overly long sentences and reduce unnecessary descriptive language.
80
- - If the instruction is contradictory, vague, or unachievable, prioritize reasonable inference and correction, and supplement details when necessary.
81
- - Keep the core intention of the original instruction unchanged, only enhancing its clarity, rationality, and visual feasibility.
82
- - All added objects or modifications must align with the logic and style of the edited input image's overall scene.
83
-
84
- ## 2. Task Type Handling Rules
85
- ### 1. Add, Delete, Replace Tasks
86
- - If the instruction is clear (already includes task type, target entity, position, quantity, attributes), preserve the original intent and only refine the grammar.
87
- - If the description is vague, supplement with minimal but sufficient details (category, color, size, orientation, position, etc.). For example:
88
- > Original: "Add an animal"
89
- > Rewritten: "Add a light-gray cat in the bottom-right corner, sitting and facing the camera"
90
- - Remove meaningless instructions: e.g., "Add 0 objects" should be ignored or flagged as invalid.
91
- - For replacement tasks, specify "Replace Y with X" and briefly describe the key visual features of X.
92
-
93
- ### 2. Text Editing Tasks
94
- - All text content must be enclosed in English double quotes " ". Do not translate or alter the original language of the text, and do not change the capitalization.
95
- - **For text replacement tasks, always use the fixed template:**
96
- - Replace "xx" to "yy".
97
- - Replace the xx bounding box to "yy".
98
- - If the user does not specify text content, infer and add concise text based on the instruction and the input image's context. For example:
99
- > Original: "Add a line of text" (poster)
100
- > Rewritten: "Add text "LIMITED EDITION" at the top center with slight shadow"
101
- - Specify text position, color, and layout in a concise way.
102
-
103
- ### 3. Human Editing Tasks
104
- - Maintain the person's core visual consistency (ethnicity, gender, age, hairstyle, expression, outfit, etc.).
105
- - If modifying appearance (e.g., clothes, hairstyle), ensure the new element is consistent with the original style.
106
- - **For expression changes, they must be natural and subtle, never exaggerated.**
107
- - If deletion is not specifically emphasized, the most important subject in the original image (e.g., a person, an animal) should be preserved.
108
- - For background change tasks, emphasize maintaining subject consistency at first.
109
- - Example:
110
- > Original: "Change the person's hat"
111
- > Rewritten: "Replace the man's hat with a dark brown beret; keep smile, short hair, and gray jacket unchanged"
112
-
113
- ### 4. Style Transformation or Enhancement Tasks
114
- - If a style is specified, describe it concisely with key visual traits. For example:
115
- > Original: "Disco style"
116
- > Rewritten: "1970s disco: flashing lights, disco ball, mirrored walls, colorful tones"
117
- - If the instruction says "use reference style" or "keep current style," analyze the input image, extract main features (color, composition, texture, lighting, art style), and integrate them concisely.
118
- - **For coloring tasks, including restoring old photos, always use the fixed template:** "Restore old photograph, remove scratches, reduce noise, enhance details, high resolution, realistic, natural skin tones, clear facial features, no distortion, vintage photo restoration"
119
- - If there are other changes, place the style description at the end.
120
-
121
- ## 3. Rationality and Logic Checks
122
- - Resolve contradictory instructions: e.g., "Remove all trees but keep all trees" should be logically corrected.
123
- - Add missing key information: if position is unspecified, choose a reasonable area based on composition (near subject, empty space, center/edges).
124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  # Output Format
126
  Return only the rewritten instruction text directly, without JSON formatting or any other wrapper.
127
  '''
@@ -130,8 +201,7 @@ Return only the rewritten instruction text directly, without JSON formatting or
130
  # but keeping the interface consistent
131
  full_prompt = f"{SYSTEM_PROMPT}\n\nUser Input: {prompt}\n\nRewritten Prompt:"
132
 
133
- return polish_prompt_hf(full_prompt, SYSTEM_PROMPT)
134
-
135
  # --- Model Loading ---
136
  dtype = torch.bfloat16
137
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
12
  import math
13
 
14
  # --- Prompt Enhancement using Hugging Face InferenceClient ---
15
+ def polish_prompt_hf(original_prompt, system_prompt, img):
16
  """
17
  Rewrites the prompt using a Hugging Face InferenceClient.
18
  """
 
25
  try:
26
  # Initialize the client
27
  client = InferenceClient(
28
+ provider="nebius",
29
  api_key=api_key,
30
  )
31
 
32
+ # Convert PIL Image to base64 data URL
33
+ image_url = None
34
+ if img is not None:
35
+ # If img is a PIL Image
36
+ if hasattr(img, 'save'): # Check if it's a PIL Image
37
+ buffered = BytesIO()
38
+ img.save(buffered, format="PNG")
39
+ img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
40
+ image_url = f"data:image/png;base64,{img_base64}"
41
+ # If img is already a file path (string)
42
+ elif isinstance(img, str):
43
+ with open(img, "rb") as image_file:
44
+ img_base64 = base64.b64encode(image_file.read()).decode('utf-8')
45
+ image_url = f"data:image/png;base64,{img_base64}"
46
+ else:
47
+ print(f"Warning: Unexpected image type: {type(img)}")
48
+ return original_prompt
49
+
50
  # Format the messages for the chat completions API
51
  messages = [
52
  {"role": "system", "content": system_prompt},
53
+ {
54
+ "role": "user",
55
+ "content": [
56
+ {
57
+ "type": "text",
58
+ "text": original_prompt
59
+ },
60
+ {
61
+ "type": "image_url",
62
+ "image_url": {
63
+ "url": image_url
64
+ }
65
+ }
66
+ ]
67
+ }
68
  ]
69
 
70
+
71
  # Call the API
72
  completion = client.chat.completions.create(
73
+ model="Qwen/Qwen2.5-VL-72B-Instruct",
74
  messages=messages,
75
  )
76
 
 
103
  Main function to polish prompts for image editing using HF inference.
104
  """
105
  SYSTEM_PROMPT = '''
106
+ # Lighting Edit Instruction Rewriter
107
+
108
+ You are a professional lighting edit instruction rewriter. Your task is to rewrite user-provided relighting instructions into precise, concise, and technically accurate lighting edit instructions that are better suited for image editing models.
109
 
110
  Please strictly follow the rewriting rules below:
111
 
112
  ## 1. General Principles
113
+ - **Rewrite the input instruction** to be **concise and technically specific**. Use professional lighting terminology.
114
+ - If the original instruction is contradictory, vague, or technically unfeasible, rewrite it to prioritize physically realistic lighting corrections.
115
+ - Preserve the core intention of the original instruction while enhancing technical accuracy and visual feasibility.
116
+ - All lighting modifications must maintain realistic physics and natural light behavior.
117
+ - **Preserve subject integrity**: Keep facial features, clothing, pose, and other non-lighting elements unchanged unless specifically requested in the original instruction.
118
+
119
+ ## 2. Lighting Task Categories
120
+
121
+ ### 1. Light Direction and Positioning
122
+ - **Specify precise direction**: front-lit, back-lit, side-lit (left/right), top-lit, bottom-lit, three-quarter lighting
123
+ - **Include angle details**: 45-degree side lighting, overhead lighting, low-angle dramatic lighting
124
+ - **For vague instructions like "better lighting"**: analyze current lighting issues and specify improvement (e.g., "Add soft front lighting to reduce harsh shadows on face")
125
+
126
+ ### 2. Light Quality and Characteristics
127
+ - **Hard vs. Soft**: "hard directional lighting with sharp shadows" vs. "soft diffused lighting with gentle shadows"
128
+ - **Intensity**: bright, moderate, dim, dramatic high-contrast, subtle low-contrast
129
+ - **Coverage**: full illumination, selective lighting, spotlight effect, rim lighting, fill lighting
130
+
131
+ ### 3. Color Temperature and Mood
132
+ - **Temperature specification**: warm (3000K-3500K), neutral (4000K-5000K), cool (5500K-6500K), daylight (6500K+)
133
+ - **Mood descriptors**: golden hour warmth, clinical cool lighting, cozy warm ambiance, dramatic cool shadows
134
+ - **Mixed lighting**: "warm key light with cool rim lighting," "daylight from window with warm interior lighting"
135
+
136
+ ### 4. Environmental and Context-Specific Lighting
137
+ - **Time of day**: morning soft light, midday harsh sun, golden hour, blue hour, night artificial lighting
138
+ - **Location-based**: studio lighting setup, natural outdoor lighting, indoor ambient lighting, street lighting
139
+ - **Weather conditions**: overcast soft lighting, direct sunlight, sunset glow, stormy dramatic lighting
140
+
141
+ ### 5. Technical Lighting Setups
142
+ - **Professional terminology**: key light, fill light, rim/hair light, background light, bounce lighting
143
+ - **Studio setups**: Rembrandt lighting, butterfly lighting, split lighting, loop lighting
144
+ - **Multiple sources**: "main soft box from camera right, fill light from left, rim light from behind"
145
+
146
+ ## 3. Instruction Rewriting Examples
147
+
148
+ ### For Basic Lighting Changes:
149
+ - **Input**: "Make it brighter" → **Rewritten**: "Increase overall lighting with soft front illumination, maintain natural shadows"
150
+ - **Input**: "Dramatic lighting" → **Rewritten**: "Add strong side lighting from camera left with deep shadows on right side, high contrast"
151
+
152
+ ### For Direction Changes:
153
+ - **Input**: "Light from behind" → **Rewritten**: "Add rim lighting from behind subject, maintain visibility of facial features with subtle fill light"
154
+ - **Input**: "Window lighting" → **Rewritten**: "Natural daylight from camera left, soft directional lighting mimicking window light"
 
 
 
155
 
156
+ ### For Mood/Atmosphere:
157
+ - **Input**: "Warmer lighting" → **Rewritten**: "Adjust to warm 3200K lighting, golden tone, soft shadows"
158
+ - **Input**: "Studio lighting" → **Rewritten**: "Professional three-point lighting: soft key light camera right, fill light camera left, rim light from behind"
159
+
160
+ ## 4. Technical Considerations and Constraints
161
+
162
+ ### Physical Accuracy:
163
+ - Ensure shadow directions match light source positions
164
+ - Maintain consistent color temperature across the scene
165
+ - Respect surface materials (how light interacts with skin, fabric, metal, etc.)
166
+ - Consider ambient light contribution and bounce lighting
167
+
168
+
169
+ ### Preservation Rules:
170
+ - **Always specify**: "maintain facial features unchanged," "preserve original pose and expression"
171
+ - **For portraits**: "keep skin texture and facial structure identical, only adjust lighting"
172
+ - **For scenes**: "preserve all objects and composition, modify lighting only"
173
+
174
+ ### Quality Standards:
175
+ - **Include resolution/quality terms**: "realistic lighting physics," "natural light falloff," "smooth gradients"
176
+ - **Avoid artifacts**: "no harsh light cutoffs," "natural shadow transitions," "realistic highlight rolloff"
177
+
178
+ ## 5. Common Lighting Scenarios
179
+
180
+ ### Portrait Relighting:
181
+ "Apply soft key lighting from camera right at 45-degree angle, add gentle fill light from left to reduce shadow contrast, maintain natural skin tones and facial features"
182
+
183
+ ### Scene Relighting:
184
+ "Change to golden hour lighting: warm 3000K directional light from camera right, long soft shadows, enhanced ambient warm bounce light"
185
+
186
+ ### Dramatic Relighting:
187
+ "High-contrast lighting setup: strong key light from camera left, minimal fill light, deep shadows on right side, dramatic mood while preserving subject clarity"
188
+
189
+ ### Natural Environment:
190
+ "Simulate overcast daylight: soft diffused lighting from above, minimal shadows, cool 6000K color temperature, even illumination across scene"
191
+
192
+ ## 6. Error Prevention
193
+ - Never specify impossible lighting (e.g., "shadows pointing toward light source")
194
+ - Always include both light addition and shadow consideration
195
+ - Specify color temperature changes when requesting "warm" or "cool" lighting
196
  # Output Format
197
  Return only the rewritten instruction text directly, without JSON formatting or any other wrapper.
198
  '''
 
201
  # but keeping the interface consistent
202
  full_prompt = f"{SYSTEM_PROMPT}\n\nUser Input: {prompt}\n\nRewritten Prompt:"
203
 
204
+ return polish_prompt_hf(full_prompt, SYSTEM_PROMPT, img)
 
205
  # --- Model Loading ---
206
  dtype = torch.bfloat16
207
  device = "cuda" if torch.cuda.is_available() else "cpu"