Spaces:

tournas
/

storytelling_assistant

Running

tournas commited on Feb 15

Commit

17a4149

verified ·

1 Parent(s): 0ec87e7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ from nltk.tokenize import sent_tokenize
 from IPython.display import Audio
 import spaces
-device = 'cuda'
 api_key = os.getenv("OPENAI_API_KEY")
 if not api_key:
@@ -27,7 +27,7 @@ yolo_model = YOLO("yolov8s.pt")
 stable_diffusion = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
 stable_diffusion.to(device)
 nltk.download("punkt")
-summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
 @spaces.GPU
 def detect_objects(image_path):
@@ -47,7 +47,7 @@ def generate_story(detected_objects):
         messages=[{"role": "user", "content": story_prompt}],
         max_tokens=200
     )
-    return response.choices[0].message.content
 def summarize_story(story):
     summary = summarizer(story, max_length=100, do_sample=False)[0]['summary_text']
@@ -59,7 +59,7 @@ def generate_images(story):
     prompts = [f"Highly detailed, cinematic scene: {scene}, digital art, 4K, realistic lighting" for scene in scenes]
     images = []
     for prompt in prompts:
-        image = stable_diffusion(prompt).images[0]
         images.append(image)
     return images
@@ -70,10 +70,12 @@ def text_to_speech(story):
     return audio_file_path
 def full_pipeline(image):
-    detected_objects = detect_objects(image)
     story = generate_story(detected_objects)
     scenes = summarize_story(story)
-    images = generate_images(scenes)
     audio = text_to_speech(story)
     return story, scenes, images, audio

 from IPython.display import Audio
 import spaces
+device = "cuda" if torch.cuda.is_available() else "cpu"
 api_key = os.getenv("OPENAI_API_KEY")
 if not api_key:
 stable_diffusion = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
 stable_diffusion.to(device)
 nltk.download("punkt")
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device= 'cuda' if torch.cuda.is_available() else 'cpu')
 @spaces.GPU
 def detect_objects(image_path):
         messages=[{"role": "user", "content": story_prompt}],
         max_tokens=200
     )
+    return response.choices[0].message.content.strip()
 def summarize_story(story):
     summary = summarizer(story, max_length=100, do_sample=False)[0]['summary_text']
     prompts = [f"Highly detailed, cinematic scene: {scene}, digital art, 4K, realistic lighting" for scene in scenes]
     images = []
     for prompt in prompts:
+        image = stable_diffusion(prompt=prompt).images[0]
         images.append(image)
     return images
     return audio_file_path
 def full_pipeline(image):
+    image_path = "input.jpg"
+    image.save(image_path)
+    detected_objects = detect_objects(image_path)
     story = generate_story(detected_objects)
     scenes = summarize_story(story)
+    images = generate_images(story)
     audio = text_to_speech(story)
     return story, scenes, images, audio