Spaces:
Paused
Paused
simplify captions
Browse files- app.py +5 -2
- baseline_utils.py +28 -0
app.py
CHANGED
@@ -5,7 +5,8 @@ from google.oauth2 import service_account
|
|
5 |
from baseline_utils import (detect_text_in_image,
|
6 |
analyze_writer_image,
|
7 |
generate_video,
|
8 |
-
break_diary_to_scenes
|
|
|
9 |
import os
|
10 |
|
11 |
# Load secrets from Hugging Face Spaces environment
|
@@ -44,7 +45,9 @@ def process_images(diary_image, writer_image):
|
|
44 |
# Generate the video based on the summaries
|
45 |
video_paths = generate_video(scene_list, fps=24)
|
46 |
|
47 |
-
|
|
|
|
|
48 |
|
49 |
|
50 |
# Define the Gradio interface
|
|
|
5 |
from baseline_utils import (detect_text_in_image,
|
6 |
analyze_writer_image,
|
7 |
generate_video,
|
8 |
+
break_diary_to_scenes,
|
9 |
+
scenes_caption)
|
10 |
import os
|
11 |
|
12 |
# Load secrets from Hugging Face Spaces environment
|
|
|
45 |
# Generate the video based on the summaries
|
46 |
video_paths = generate_video(scene_list, fps=24)
|
47 |
|
48 |
+
captions = scenes_caption(scene_list, openai_api_key)
|
49 |
+
|
50 |
+
return video_paths, captions
|
51 |
|
52 |
|
53 |
# Define the Gradio interface
|
baseline_utils.py
CHANGED
@@ -86,6 +86,34 @@ def break_diary_to_scenes(diary_text, writer_description, api_key):
|
|
86 |
return response.choices[0].message.content
|
87 |
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
@spaces.GPU
|
90 |
def generate_video(scene_list, fps=24): # Lower fps
|
91 |
# Load the Zeroscope video generation model
|
|
|
86 |
return response.choices[0].message.content
|
87 |
|
88 |
|
89 |
+
def scenes_caption(scenes, api_key):
|
90 |
+
# Initialize the OpenAI client
|
91 |
+
client = openai.Client(api_key=api_key)
|
92 |
+
|
93 |
+
captions = []
|
94 |
+
|
95 |
+
for scene in scenes:
|
96 |
+
# Use OpenAI's GPT API to generate a caption for each scene
|
97 |
+
response = client.chat.completions.create(
|
98 |
+
model="gpt-4",
|
99 |
+
messages=[
|
100 |
+
{
|
101 |
+
"role": "user",
|
102 |
+
"content": f"Given the scene: {scene}, "
|
103 |
+
f"turn this scene into a simple caption starting with 'I am' doing something. "
|
104 |
+
f"Be concise, keeping it under 10 words. Return without any quotation marks."
|
105 |
+
}
|
106 |
+
],
|
107 |
+
max_tokens=50, # Limit to a reasonable number of tokens for short captions
|
108 |
+
temperature=0.7, # Adjust creativity level as needed
|
109 |
+
n=1
|
110 |
+
)
|
111 |
+
# Append the generated caption to the list
|
112 |
+
captions.append(response.choices[0].message.content)
|
113 |
+
|
114 |
+
return captions
|
115 |
+
|
116 |
+
|
117 |
@spaces.GPU
|
118 |
def generate_video(scene_list, fps=24): # Lower fps
|
119 |
# Load the Zeroscope video generation model
|