Spaces:

jjz5463
/

Diary-AI-Video

Paused

jjz5463 commited on Oct 4, 2024

Commit

0575e8c

1 Parent(s): f05d3d0

simplify captions

Files changed (2) hide show

app.py CHANGED Viewed

@@ -5,7 +5,8 @@ from google.oauth2 import service_account
 from baseline_utils import (detect_text_in_image,
                             analyze_writer_image,
                             generate_video,
-                            break_diary_to_scenes)
 import os
 # Load secrets from Hugging Face Spaces environment
@@ -44,7 +45,9 @@ def process_images(diary_image, writer_image):
     # Generate the video based on the summaries
     video_paths = generate_video(scene_list, fps=24)
-    return video_paths, scene_list
 # Define the Gradio interface

 from baseline_utils import (detect_text_in_image,
                             analyze_writer_image,
                             generate_video,
+                            break_diary_to_scenes,
+                            scenes_caption)
 import os
 # Load secrets from Hugging Face Spaces environment
     # Generate the video based on the summaries
     video_paths = generate_video(scene_list, fps=24)
+    captions = scenes_caption(scene_list, openai_api_key)
+    return video_paths, captions
 # Define the Gradio interface

baseline_utils.py CHANGED Viewed

@@ -86,6 +86,34 @@ def break_diary_to_scenes(diary_text, writer_description, api_key):
     return response.choices[0].message.content
 @spaces.GPU
 def generate_video(scene_list, fps=24):  # Lower fps
     # Load the Zeroscope video generation model

     return response.choices[0].message.content
+def scenes_caption(scenes, api_key):
+    # Initialize the OpenAI client
+    client = openai.Client(api_key=api_key)
+    captions = []
+    for scene in scenes:
+        # Use OpenAI's GPT API to generate a caption for each scene
+        response = client.chat.completions.create(
+            model="gpt-4",
+            messages=[
+                {
+                    "role": "user",
+                    "content": f"Given the scene: {scene}, "
+                               f"turn this scene into a simple caption starting with 'I am' doing something. "
+                               f"Be concise, keeping it under 10 words. Return without any quotation marks."
+                }
+            ],
+            max_tokens=50,  # Limit to a reasonable number of tokens for short captions
+            temperature=0.7,  # Adjust creativity level as needed
+            n=1
+        )
+        # Append the generated caption to the list
+        captions.append(response.choices[0].message.content)
+    return captions
 @spaces.GPU
 def generate_video(scene_list, fps=24):  # Lower fps
     # Load the Zeroscope video generation model