jjz5463 commited on
Commit
0575e8c
·
1 Parent(s): f05d3d0

simplify captions

Browse files
Files changed (2) hide show
  1. app.py +5 -2
  2. baseline_utils.py +28 -0
app.py CHANGED
@@ -5,7 +5,8 @@ from google.oauth2 import service_account
5
  from baseline_utils import (detect_text_in_image,
6
  analyze_writer_image,
7
  generate_video,
8
- break_diary_to_scenes)
 
9
  import os
10
 
11
  # Load secrets from Hugging Face Spaces environment
@@ -44,7 +45,9 @@ def process_images(diary_image, writer_image):
44
  # Generate the video based on the summaries
45
  video_paths = generate_video(scene_list, fps=24)
46
 
47
- return video_paths, scene_list
 
 
48
 
49
 
50
  # Define the Gradio interface
 
5
  from baseline_utils import (detect_text_in_image,
6
  analyze_writer_image,
7
  generate_video,
8
+ break_diary_to_scenes,
9
+ scenes_caption)
10
  import os
11
 
12
  # Load secrets from Hugging Face Spaces environment
 
45
  # Generate the video based on the summaries
46
  video_paths = generate_video(scene_list, fps=24)
47
 
48
+ captions = scenes_caption(scene_list, openai_api_key)
49
+
50
+ return video_paths, captions
51
 
52
 
53
  # Define the Gradio interface
baseline_utils.py CHANGED
@@ -86,6 +86,34 @@ def break_diary_to_scenes(diary_text, writer_description, api_key):
86
  return response.choices[0].message.content
87
 
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  @spaces.GPU
90
  def generate_video(scene_list, fps=24): # Lower fps
91
  # Load the Zeroscope video generation model
 
86
  return response.choices[0].message.content
87
 
88
 
89
+ def scenes_caption(scenes, api_key):
90
+ # Initialize the OpenAI client
91
+ client = openai.Client(api_key=api_key)
92
+
93
+ captions = []
94
+
95
+ for scene in scenes:
96
+ # Use OpenAI's GPT API to generate a caption for each scene
97
+ response = client.chat.completions.create(
98
+ model="gpt-4",
99
+ messages=[
100
+ {
101
+ "role": "user",
102
+ "content": f"Given the scene: {scene}, "
103
+ f"turn this scene into a simple caption starting with 'I am' doing something. "
104
+ f"Be concise, keeping it under 10 words. Return without any quotation marks."
105
+ }
106
+ ],
107
+ max_tokens=50, # Limit to a reasonable number of tokens for short captions
108
+ temperature=0.7, # Adjust creativity level as needed
109
+ n=1
110
+ )
111
+ # Append the generated caption to the list
112
+ captions.append(response.choices[0].message.content)
113
+
114
+ return captions
115
+
116
+
117
  @spaces.GPU
118
  def generate_video(scene_list, fps=24): # Lower fps
119
  # Load the Zeroscope video generation model