Spaces:

tournas
/

storytelling_assistant

Running

App Files Files Community

tournas commited on Feb 15

Commit

fca69f9

verified ·

1 Parent(s): 7b64947

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -20

app.py CHANGED Viewed

@@ -10,9 +10,8 @@ from ultralytics import YOLO
 from gtts import gTTS
 from PIL import Image
 from nltk.tokenize import sent_tokenize
-import spaces
-# Set device (use GPU if available)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load environment variables
@@ -23,22 +22,24 @@ if not api_key:
 # Initialize OpenAI client
 client = OpenAI(api_key=api_key)
-# Load YOLO model
-yolo_model = YOLO("yolov8s.pt")
-# Load Stable Diffusion pipeline
-stable_diffusion = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 if device == "cuda" else torch.float32)
-stable_diffusion.to(device)
 # Download NLTK data
 nltk.download("punkt")
-# Load summarization pipeline
-summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0 if device == "cuda" else -1)
-@spaces.GPU
 # Function to detect objects in an image
-def detect_objects(image_path):
     results = yolo_model(image_path)
     detected_objects = []
     for r in results:
@@ -52,20 +53,20 @@ def detect_objects(image_path):
 def generate_story(detected_objects):
     story_prompt = f"Write a short story based on the following objects: {', '.join(detected_objects)}"
     response = client.chat.completions.create(
-        model="gpt-4o-mini",  # Use GPT-4 or GPT-3.5-turbo
         messages=[{"role": "user", "content": story_prompt}],
         max_tokens=200
     )
     return response.choices[0].message.content.strip()
 # Function to summarize the story into scenes
-def summarize_story(story):
     summary = summarizer(story, max_length=100, do_sample=False)[0]['summary_text']
     scenes = sent_tokenize(summary)
     return scenes
 # Function to generate images for each scene
-def generate_images(story):
     scenes = summarize_story(story)
     prompts = [f"Highly detailed, cinematic scene: {scene}, digital art, 4K, realistic lighting" for scene in scenes]
     images = []
@@ -88,8 +89,13 @@ def full_pipeline(image):
         image_path = f"temp_{uuid.uuid4().hex}.jpg"
         image.save(image_path)
         # Detect objects in the image
-        detected_objects = detect_objects(image_path)
         if not detected_objects:
             return "No objects detected. Please upload a different image.", "", [], None
@@ -99,12 +105,12 @@ def full_pipeline(image):
             return "Failed to generate a story. Please try again.", "", [], None
         # Summarize the story into scenes
-        scenes = summarize_story(story)
         if not scenes:
             return story, "No scenes extracted.", [], None
         # Generate images for each scene
-        images = generate_images(story)
         if not images:
             return story, "\n".join(scenes), [], None
@@ -119,7 +125,7 @@ def full_pipeline(image):
     except Exception as e:
         return f"An error occurred: {str(e)}", "", [], None
-# Gradio UI
 demo = gr.Interface(
     fn=full_pipeline,
     inputs=gr.Image(type="pil"),
@@ -133,6 +139,9 @@ demo = gr.Interface(
     description="Upload an image, and the AI will detect objects, generate a story, create images, and narrate the story."
 )
 # Launch the app
 if __name__ == "__main__":
     demo.launch()

 from gtts import gTTS
 from PIL import Image
 from nltk.tokenize import sent_tokenize
+# Set device (use GPU if available, but don't initialize CUDA here)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load environment variables
 # Initialize OpenAI client
 client = OpenAI(api_key=api_key)
 # Download NLTK data
 nltk.download("punkt")
+# Lazy-load models to avoid initializing CUDA in the main process
+def load_yolo_model():
+    return YOLO("yolov8s.pt")
+def load_stable_diffusion():
+    return StableDiffusionPipeline.from_pretrained(
+        "runwayml/stable-diffusion-v1-5",
+        torch_dtype=torch.float16 if device == "cuda" else torch.float32
+    ).to(device)
+def load_summarizer():
+    return pipeline("summarization", model="facebook/bart-large-cnn", device=0 if device == "cuda" else -1)
 # Function to detect objects in an image
+def detect_objects(image_path, yolo_model):
     results = yolo_model(image_path)
     detected_objects = []
     for r in results:
 def generate_story(detected_objects):
     story_prompt = f"Write a short story based on the following objects: {', '.join(detected_objects)}"
     response = client.chat.completions.create(
+        model="gpt-4",  # Use GPT-4 or GPT-3.5-turbo
         messages=[{"role": "user", "content": story_prompt}],
         max_tokens=200
     )
     return response.choices[0].message.content.strip()
 # Function to summarize the story into scenes
+def summarize_story(story, summarizer):
     summary = summarizer(story, max_length=100, do_sample=False)[0]['summary_text']
     scenes = sent_tokenize(summary)
     return scenes
 # Function to generate images for each scene
+def generate_images(story, stable_diffusion):
     scenes = summarize_story(story)
     prompts = [f"Highly detailed, cinematic scene: {scene}, digital art, 4K, realistic lighting" for scene in scenes]
     images = []
         image_path = f"temp_{uuid.uuid4().hex}.jpg"
         image.save(image_path)
+        # Lazy-load models
+        yolo_model = load_yolo_model()
+        stable_diffusion = load_stable_diffusion()
+        summarizer = load_summarizer()
         # Detect objects in the image
+        detected_objects = detect_objects(image_path, yolo_model)
         if not detected_objects:
             return "No objects detected. Please upload a different image.", "", [], None
             return "Failed to generate a story. Please try again.", "", [], None
         # Summarize the story into scenes
+        scenes = summarize_story(story, summarizer)
         if not scenes:
             return story, "No scenes extracted.", [], None
         # Generate images for each scene
+        images = generate_images(story, stable_diffusion)
         if not images:
             return story, "\n".join(scenes), [], None
     except Exception as e:
         return f"An error occurred: {str(e)}", "", [], None
+# Gradio UI with queue for long-running tasks
 demo = gr.Interface(
     fn=full_pipeline,
     inputs=gr.Image(type="pil"),
     description="Upload an image, and the AI will detect objects, generate a story, create images, and narrate the story."
 )
+# Enable queue for long-running tasks
+demo.queue()
 # Launch the app
 if __name__ == "__main__":
     demo.launch()