tournas commited on
Commit
03e5adf
·
verified ·
1 Parent(s): 2baf3e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -67
app.py CHANGED
@@ -1,105 +1,79 @@
 
1
  import gradio as gr
2
  import torch
3
- from transformers import pipeline
4
- from gtts import gTTS
5
- from ultralytics import YOLO
6
- from openai import OpenAI
7
  import nltk
8
- from nltk.tokenize import sent_tokenize
 
 
 
 
9
  from PIL import Image
10
- from io import BytesIO
11
- import os
 
12
 
13
- # Κατέβασμα απαραίτητων μοντέλων
14
- nltk.download("punkt")
15
- client = OpenAI()
 
 
 
 
16
 
17
  # Φόρτωση μοντέλων
18
- yolo_model = YOLO("yolov8n.pt") # YOLOv8
19
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Summarization model
20
- pipe = pipeline("image-to-image", model="runwayml/stable-diffusion-v1-5") # Stable Diffusion
 
21
 
22
- # 1. Object Detection
23
- def detect_objects(image):
24
- image_path = "uploaded_image.jpg"
25
- image.save(image_path)
26
-
27
  results = yolo_model(image_path)
28
  detected_objects = []
29
-
30
  for r in results:
31
  for box in r.boxes:
32
  class_id = int(box.cls[0])
33
  label = yolo_model.names[class_id]
34
  detected_objects.append(label)
 
35
 
36
- return ", ".join(detected_objects)
37
-
38
- # 2. Story Generation
39
  def generate_story(detected_objects):
40
- story_prompt = f"Write a short story based on the following objects: {detected_objects}"
41
-
42
  response = client.chat.completions.create(
43
  model="gpt-4o-mini",
44
  messages=[{"role": "user", "content": story_prompt}],
45
  max_tokens=200
46
  )
47
-
48
  return response.choices[0].message.content
49
 
50
- # 3. Summarization and Scene Splitting
51
  def summarize_story(story):
52
  summary = summarizer(story, max_length=100, do_sample=False)[0]['summary_text']
53
  scenes = sent_tokenize(summary)
54
  return scenes
55
 
56
- # 4. Image Generation
57
- def generate_images(scenes):
 
58
  images = []
59
- for idx, scene in enumerate(scenes):
60
- prompt = f"Highly detailed, cinematic scene: {scene}, digital art, 4K, realistic lighting"
61
- image = pipe(prompt).images[0]
62
- image_path = f"scene_{idx + 1}.png"
63
- image.save(image_path)
64
- images.append(image_path)
65
-
66
  return images
67
 
68
- # 5. Text-to-Speech
69
  def text_to_speech(story):
70
  tts = gTTS(text=story, lang="en", slow=False)
71
- audio_path = "story_audio.mp3"
72
- tts.save(audio_path)
73
- return audio_path
74
-
75
- # **Τελική Αυτοματοποιημένη Ροή**
76
- def full_pipeline(image):
77
- detected_objects = detect_objects(image)
78
- story = generate_story(detected_objects)
79
- scenes = summarize_story(story)
80
- images = generate_images(scenes)
81
- audio = text_to_speech(story)
82
 
83
- return story, scenes, images, audio
 
 
 
 
84
 
85
- # **Gradio UI**
86
- demo = gr.Interface(
87
- fn=full_pipeline,
88
- inputs=gr.Image(type="pil"),
89
- outputs=[
90
- gr.Textbox(label="Generated Story"),
91
- gr.Textbox(label="Story Scenes"),
92
- gr.Gallery(label="Generated Images"),
93
- gr.Audio(label="Story Audio"),
94
- ],
95
- title="AI-Powered Storytelling Assistant",
96
- description="Upload an image, and the AI will detect objects, generate a story, create images, and narrate the story."
97
- )
98
 
99
  if __name__ == "__main__":
100
- demo.launch()
101
-
102
-
103
-
104
-
105
-
 
1
+ import os
2
  import gradio as gr
3
  import torch
 
 
 
 
4
  import nltk
5
+ from openai import OpenAI
6
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
7
+ from diffusers import StableDiffusionPipeline
8
+ from ultralytics import YOLO
9
+ from gtts import gTTS
10
  from PIL import Image
11
+ import numpy as np
12
+ from nltk.tokenize import sent_tokenize
13
+ from IPython.display import Audio
14
 
15
+ # Βεβαιωθείτε ότι το API Key υπάρχει
16
+ api_key = os.getenv("OPENAI_API_KEY")
17
+ if not api_key:
18
+ raise ValueError("⚠️ OpenAI API Key is missing! Add it as a Secret in Hugging Face Spaces.")
19
+
20
+ # OpenAI Client
21
+ client = OpenAI(api_key=api_key)
22
 
23
  # Φόρτωση μοντέλων
24
+ yolo_model = YOLO("yolov8s.pt")
25
+ stable_diffusion = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
26
+ nltk.download("punkt")
27
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
28
 
29
+ def detect_objects(image_path):
 
 
 
 
30
  results = yolo_model(image_path)
31
  detected_objects = []
 
32
  for r in results:
33
  for box in r.boxes:
34
  class_id = int(box.cls[0])
35
  label = yolo_model.names[class_id]
36
  detected_objects.append(label)
37
+ return detected_objects
38
 
 
 
 
39
  def generate_story(detected_objects):
40
+ story_prompt = f"Write a short story based on the following objects: {', '.join(detected_objects)}"
 
41
  response = client.chat.completions.create(
42
  model="gpt-4o-mini",
43
  messages=[{"role": "user", "content": story_prompt}],
44
  max_tokens=200
45
  )
 
46
  return response.choices[0].message.content
47
 
 
48
  def summarize_story(story):
49
  summary = summarizer(story, max_length=100, do_sample=False)[0]['summary_text']
50
  scenes = sent_tokenize(summary)
51
  return scenes
52
 
53
+ def generate_images(story):
54
+ scenes = summarize_story(story)
55
+ prompts = [f"Highly detailed, cinematic scene: {scene}, digital art, 4K, realistic lighting" for scene in scenes]
56
  images = []
57
+ for prompt in prompts:
58
+ image = stable_diffusion(prompt).images[0]
59
+ images.append(image)
 
 
 
 
60
  return images
61
 
 
62
  def text_to_speech(story):
63
  tts = gTTS(text=story, lang="en", slow=False)
64
+ audio_file_path = "story_audio.mp3"
65
+ tts.save(audio_file_path)
66
+ return audio_file_path
 
 
 
 
 
 
 
 
67
 
68
+ detect_interface = gr.Interface(fn=detect_objects, inputs="image", outputs="text", title="Object Detection")
69
+ story_interface = gr.Interface(fn=generate_story, inputs="text", outputs="text", title="Story Generation")
70
+ summary_interface = gr.Interface(fn=summarize_story, inputs="text", outputs="text", title="Story Summarization")
71
+ image_interface = gr.Interface(fn=generate_images, inputs="text", outputs="image", title="Image Generation")
72
+ audio_interface = gr.Interface(fn=text_to_speech, inputs="text", outputs="audio", title="Text to Speech")
73
 
74
+ # Συνδυασμός των interfaces σε ένα Gradio Tabbed Interface
75
+ demo = gr.TabbedInterface([detect_interface, story_interface, summary_interface, image_interface, audio_interface],
76
+ ["Detect Objects", "Generate Story", "Summarize Story", "Generate Images", "Text to Speech"])
 
 
 
 
 
 
 
 
 
 
77
 
78
  if __name__ == "__main__":
79
+ demo.launch()