tournas commited on
Commit
5ed6c3c
·
verified ·
1 Parent(s): e9ab1be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -27
app.py CHANGED
@@ -2,7 +2,6 @@ import os
2
  import gradio as gr
3
  import torch
4
  import nltk
5
- import random
6
  from openai import OpenAI
7
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
8
  from diffusers import StableDiffusionPipeline
@@ -10,6 +9,8 @@ from ultralytics import YOLO
10
  from gtts import gTTS
11
  from PIL import Image
12
  import numpy as np
 
 
13
 
14
  # Βεβαιωθείτε ότι το API Key υπάρχει
15
  api_key = os.getenv("OPENAI_API_KEY")
@@ -20,57 +21,79 @@ if not api_key:
20
  client = OpenAI(api_key=api_key)
21
 
22
  # Φόρτωση μοντέλων
23
- print("Loading models...")
24
- yolo_model = YOLO("yolov8s.pt") # Μοντέλο Object Detection
25
- text_generation = pipeline("text-generation", model="gpt2")
26
  stable_diffusion = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
27
  nltk.download("punkt")
 
28
 
29
- def generate_story(prompt):
30
- response = client.completions.create(
31
- model="gpt-4o",
32
- prompt=prompt,
33
- max_tokens=300
 
 
 
 
 
 
 
 
 
 
 
34
  )
35
- return response.choices[0].text.strip()
36
 
37
- def detect_objects(image):
38
- results = yolo_model(image)
39
- return results[0].plot()
 
40
 
41
- def generate_image(prompt):
42
- image = stable_diffusion(prompt).images[0]
43
- return image
 
 
 
 
 
44
 
45
- def text_to_speech(text):
46
- tts = gTTS(text=text, lang="en")
47
- tts.save("output.mp3")
48
- return "output.mp3"
 
49
 
50
  demo = gr.Interface(
51
  fn={
52
- "Generate Story": generate_story,
53
  "Detect Objects": detect_objects,
54
- "Generate Image": generate_image,
 
 
55
  "Text to Speech": text_to_speech,
56
  },
57
  inputs={
58
- "Generate Story": gr.Textbox(placeholder="Write a story prompt..."),
59
  "Detect Objects": gr.Image(type="numpy"),
60
- "Generate Image": gr.Textbox(placeholder="Describe an image..."),
 
 
61
  "Text to Speech": gr.Textbox(placeholder="Enter text to convert to speech...")
62
  },
63
  outputs={
 
64
  "Generate Story": "text",
65
- "Detect Objects": "image",
66
- "Generate Image": "image",
67
  "Text to Speech": "audio"
68
  },
69
  title="AI-Powered Storytelling Assistant",
70
- description="An AI assistant combining NLP, Object Detection, Image Generation, and TTS!"
71
  )
72
 
73
  if __name__ == "__main__":
74
  demo.launch()
75
 
76
 
 
 
 
2
  import gradio as gr
3
  import torch
4
  import nltk
 
5
  from openai import OpenAI
6
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
7
  from diffusers import StableDiffusionPipeline
 
9
  from gtts import gTTS
10
  from PIL import Image
11
  import numpy as np
12
+ from nltk.tokenize import sent_tokenize
13
+ from IPython.display import Audio
14
 
15
  # Βεβαιωθείτε ότι το API Key υπάρχει
16
  api_key = os.getenv("OPENAI_API_KEY")
 
21
  client = OpenAI(api_key=api_key)
22
 
23
  # Φόρτωση μοντέλων
24
+ yolo_model = YOLO("yolov8s.pt")
 
 
25
  stable_diffusion = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
26
  nltk.download("punkt")
27
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
28
 
29
+ def detect_objects(image_path):
30
+ results = yolo_model(image_path)
31
+ detected_objects = []
32
+ for r in results:
33
+ for box in r.boxes:
34
+ class_id = int(box.cls[0])
35
+ label = yolo_model.names[class_id]
36
+ detected_objects.append(label)
37
+ return detected_objects
38
+
39
+ def generate_story(detected_objects):
40
+ story_prompt = f"Write a short story based on the following objects: {', '.join(detected_objects)}"
41
+ response = client.chat.completions.create(
42
+ model="gpt-4o-mini",
43
+ messages=[{"role": "user", "content": story_prompt}],
44
+ max_tokens=200
45
  )
46
+ return response.choices[0].message.content
47
 
48
+ def summarize_story(story):
49
+ summary = summarizer(story, max_length=100, do_sample=False)[0]['summary_text']
50
+ scenes = sent_tokenize(summary)
51
+ return scenes
52
 
53
+ def generate_images(story):
54
+ scenes = summarize_story(story)
55
+ prompts = [f"Highly detailed, cinematic scene: {scene}, digital art, 4K, realistic lighting" for scene in scenes]
56
+ images = []
57
+ for prompt in prompts:
58
+ image = stable_diffusion(prompt).images[0]
59
+ images.append(image)
60
+ return images
61
 
62
+ def text_to_speech(story):
63
+ tts = gTTS(text=story, lang="en", slow=False)
64
+ audio_file_path = "story_audio.mp3"
65
+ tts.save(audio_file_path)
66
+ return audio_file_path
67
 
68
  demo = gr.Interface(
69
  fn={
 
70
  "Detect Objects": detect_objects,
71
+ "Generate Story": generate_story,
72
+ "Summarize Story": summarize_story,
73
+ "Generate Images": generate_images,
74
  "Text to Speech": text_to_speech,
75
  },
76
  inputs={
 
77
  "Detect Objects": gr.Image(type="numpy"),
78
+ "Generate Story": gr.Textbox(placeholder="Write a story prompt..."),
79
+ "Summarize Story": gr.Textbox(placeholder="Enter the generated story..."),
80
+ "Generate Images": gr.Textbox(placeholder="Enter the story for image generation..."),
81
  "Text to Speech": gr.Textbox(placeholder="Enter text to convert to speech...")
82
  },
83
  outputs={
84
+ "Detect Objects": "text",
85
  "Generate Story": "text",
86
+ "Summarize Story": "text",
87
+ "Generate Images": "image",
88
  "Text to Speech": "audio"
89
  },
90
  title="AI-Powered Storytelling Assistant",
91
+ description="An AI assistant combining Object Detection, NLP, Image Generation, and TTS!"
92
  )
93
 
94
  if __name__ == "__main__":
95
  demo.launch()
96
 
97
 
98
+
99
+