Spaces:

mgbam
/

CingenAI

Running

App Files Files Community

CingenAI / core /prompt_engineering.py

mgbam

Update core/prompt_engineering.py

5876552 verified 19 days ago

raw

history blame

11 kB

	# core/prompt_engineering.py
	import json

	def create_story_breakdown_prompt(user_idea, genre="sci-fi", mood="suspenseful", num_scenes=3):
	"""
	Generates a prompt for Gemini to break down a story idea into scenes.
	Emphasizes concise, descriptive key_actions suitable for video overlays.
	"""
	return f"""
	You are an expert screenwriter and visual storyteller.
	Based on the user's idea: "{user_idea}"
	And considering the genre: "{genre}" and mood: "{mood}"

	Generate a {num_scenes}-scene story breakdown. For each scene, provide:
	1. scene_number (int): Sequential number of the scene.
	2. emotional_beat (str): A short title or phrase capturing the core emotion/theme of this scene (e.g., "Desperate Escape," "Betrayal Revealed," "A Glimmer of Hope").
	3. setting_description (str): Vivid description of the location, atmosphere, and key visual elements (approx 30-50 words).
	4. characters_involved (list of str): Names of characters present and active in the scene.
	5. key_action (str): The single most important event or character action happening in this specific visual moment, described concisely for a potential video overlay (max 15-20 words).
	6. dialogue_snippet (str): A brief, impactful line of dialogue spoken in this scene, if any.
	7. visual_style_suggestion (str): Keywords for the overall visual style of this scene (e.g., "Dark and gritty, high contrast, Blade Runner-esque neon reflections"). This can be influenced by the overall story mood.
	8. camera_angle_suggestion (str): A specific camera shot type or angle suggestion (e.g., "Low-angle shot emphasizing power," "Dutch angle for unease," "Extreme close-up on eyes").

	Output ONLY the JSON object for the list of scenes.
	Example for one scene:
	{{
	"scene_number": 1,
	"emotional_beat": "Tense Standoff",
	"setting_description": "A rain-slicked, neon-drenched alleyway in Neo-Kyoto. Broken holographic advertisements flicker erratically, casting distorted shadows. The air hangs heavy with the smell of ozone and despair.",
	"characters_involved": ["Detective Kaito Tanaka", "Informant (shadowy figure)"],
	"key_action": "Kaito cautiously approaches a nervous informant huddled in the shadows.",
	"dialogue_snippet": "Informant: 'They know you're looking... You're not safe.'",
	"visual_style_suggestion": "Neo-noir, cyberpunk, high contrast, deep shadows, vibrant neon reflections in puddles, film grain.",
	"camera_angle_suggestion": "Medium shot from behind Kaito, focusing on the informant, creating suspense."
	}}

	Provide the full JSON structure for {num_scenes} scenes in a list:
	[
	{{scene1_details...}},
	{{scene2_details...}},
	...
	]
	"""

	def create_image_prompt_from_scene_data(scene_data, character_definitions=None, global_style_reference=""):
	"""
	Generates a detailed image prompt for DALL-E 3 based on structured scene data,
	injecting character descriptions and global style preferences.

	scene_data (dict): Contains details for a single scene.
	character_definitions (dict): {'character_name_lower': 'description', ...}
	global_style_reference (str): User-defined global style keywords.
	"""
	scene_num = scene_data.get('scene_number', 'N/A')
	emotional_beat_title = scene_data.get('emotional_beat', 'A cinematic scene') # Used as part of the core request
	setting_desc = scene_data.get('setting_description', 'A visually interesting setting.')
	key_action_desc = scene_data.get('key_action', 'A significant moment unfolds.')

	# --- Character Injection ---
	characters_involved_in_scene = scene_data.get('characters_involved', [])
	character_prompt_segments = []
	if characters_involved_in_scene:
	for char_name_from_scene in characters_involved_in_scene:
	char_name_clean = char_name_from_scene.strip()
	char_lookup_key = char_name_clean.lower()
	if character_definitions and char_lookup_key in character_definitions:
	char_visual_desc = character_definitions[char_lookup_key]
	character_prompt_segments.append(f"{char_name_clean} (described as: {char_visual_desc})")
	else:
	character_prompt_segments.append(char_name_clean) # Character is present but no specific visual definition provided

	characters_narrative = ""
	if character_prompt_segments:
	if len(character_prompt_segments) == 1:
	characters_narrative = f"The primary focus is on {character_prompt_segments[0]}."
	else:
	characters_narrative = f"The scene prominently features {', '.join(character_prompt_segments[:-1])} and {character_prompt_segments[-1]}."
	# --- End Character Injection ---

	# --- Style Aggregation ---
	scene_specific_style = scene_data.get('visual_style_suggestion', 'cinematic, photorealistic')
	final_style_directive = scene_specific_style
	if global_style_reference: # User's global style preference
	final_style_directive += f", {global_style_reference}"
	# --- End Style Aggregation ---

	camera_instr = scene_data.get('camera_angle_suggestion', 'eye-level medium shot')

	# Constructing the DALL-E 3 prompt with more narrative flow
	# DALL-E 3 often works best if you tell it what kind of image you want first, then the details.
	prompt = (
	f"Create an ultra-detailed, photorealistic, and highly cinematic digital painting or concept art image. "
	f"The image should depict: '{emotional_beat_title}'. "
	f"Setting: {setting_desc}. "
	f"{characters_narrative} " # This will be empty if no characters, or list them with descriptions
	f"They are engaged in the following key action: {key_action_desc}. "
	f"Visual Style and Atmosphere: {final_style_directive}. "
	f"Camera Composition: {camera_instr}. "
	f"Emphasize: Dramatic lighting (consider {scene_data.get('mood','cinematic')} mood), rich textures, depth of field, and strong atmospheric effects like mist, rain, or dust if appropriate to the setting. "
	f"The overall image must feel like a high-quality still from a major motion picture or a AAA video game. "
	f"Pay close attention to character details if provided, ensuring they are distinct and match their descriptions."
	)

	return " ".join(prompt.split()) # Normalize whitespace

	def create_scene_regeneration_prompt(original_scene_data, user_feedback, full_story_context=None):
	"""
	Prompt for Gemini to regenerate scene script details based on user feedback.
	"""
	context_str = f"Original scene (Scene Number {original_scene_data.get('scene_number')}):\n{json.dumps(original_scene_data, indent=2)}\n\n"
	if full_story_context:
	context_str += f"Full story context for reference:\n{json.dumps(full_story_context, indent=2)}\n\n"

	return f"""
	You are an expert script doctor and editor.
	{context_str}
	The user wants to modify this specific scene based on the following feedback: "{user_feedback}"

	Please regenerate ONLY the JSON object for this single scene, incorporating the feedback.
	Maintain the exact same JSON structure as the original: (scene_number, emotional_beat, setting_description, characters_involved, key_action, dialogue_snippet, visual_style_suggestion, camera_angle_suggestion).
	The 'scene_number' must remain unchanged.
	The 'key_action' should be a concise descriptive sentence (max 15-20 words) suitable for a brief video overlay.
	If feedback pertains to characters, setting, action, dialogue, style, or camera, update those fields accordingly.
	Ensure the regenerated scene remains coherent with the overall story context if provided.
	Focus on making the changes impactful and clear.
	"""

	def create_visual_regeneration_prompt(original_image_prompt_text, user_feedback_on_visuals, scene_data, character_definitions=None, global_style_reference=""):
	"""
	Prompt for Gemini to rewrite an existing DALL-E image prompt based on user feedback and scene context.
	"""
	# Reconstruct parts of what the original prompt might have focused on, for context
	scene_context_summary = (
	f"Scene Number: {scene_data.get('scene_number', 'N/A')}. "
	f"Emotional Beat: {scene_data.get('emotional_beat', '')}. "
	f"Setting: {scene_data.get('setting_description', '')}. "
	f"Action: {scene_data.get('key_action', '')}. "
	f"Characters: {', '.join(scene_data.get('characters_involved',[]))}. "
	f"Current Style Hint: {scene_data.get('visual_style_suggestion', '')}. "
	f"Current Camera Hint: {scene_data.get('camera_angle_suggestion', '')}."
	)

	character_details_for_prompt = []
	if scene_data.get('characters_involved'):
	for char_name_in_scene in scene_data.get('characters_involved', []):
	char_name_clean = char_name_in_scene.strip()
	char_lookup_key = char_name_clean.lower()
	if character_definitions and char_lookup_key in character_definitions:
	char_visual_desc = character_definitions[char_lookup_key]
	character_details_for_prompt.append(f"{char_name_clean} (described as: {char_visual_desc})")
	else:
	character_details_for_prompt.append(char_name_clean)
	characters_narrative = f"Relevant characters and their descriptions: {', '.join(character_details_for_prompt) if character_details_for_prompt else 'None specified'}."


	full_prompt_for_gemini = f"""
	You are an AI assistant specializing in refining image generation prompts for DALL-E 3.
	The user wants to modify a visual concept for a cinematic scene.

	Original Scene Context:
	{scene_context_summary}
	{characters_narrative}
	Global Style Reference (if any): "{global_style_reference}"

	The DALL-E 3 prompt that was used to generate the previous image was:
	"{original_image_prompt_text}"

	The user provided the following feedback on the visual generated by that prompt:
	"{user_feedback_on_visuals}"

	Your task is to generate a new, revised DALL-E 3 prompt that incorporates the user's feedback to achieve the desired visual changes.
	The new prompt should be ultra-detailed, photorealistic, and highly cinematic.
	It should instruct DALL-E 3 to create an image that feels like a high-quality still from a major motion picture or AAA video game.
	Maintain the core elements of the scene (setting, characters, key action) unless the feedback explicitly asks to change them.
	Focus on translating the user's feedback into concrete visual descriptions related to composition, lighting, color, character appearance/pose, atmosphere, etc.
	Ensure character descriptions from the context are respected and reinforced if characters are mentioned.
	The prompt should be a single block of text.

	Output ONLY the new, revised DALL-E 3 prompt string.
	"""
	return " ".join(full_prompt_for_gemini.split())