Spaces:

CR7CAD
/

Assignment1

Sleeping

App Files Files Community

CR7CAD commited on Mar 8

Commit

8fe6281

verified ·

1 Parent(s): efe4c0f

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -23

app.py CHANGED Viewed

@@ -1,7 +1,11 @@
-# Only the two imports you requested
 import streamlit as st
 from transformers import pipeline
 from PIL import Image
 # Simple image-to-text function
 def img2text(image):
@@ -9,7 +13,7 @@ def img2text(image):
     text = image_to_text(image)[0]["generated_text"]
     return text
-# Simple text-to-story function
 def text2story(text):
     generator = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
     prompt = f"Write a short children's story based on this: {text}. The story should have a clear beginning, middle, and end. Keep it under 150 words. Once upon a time, "
@@ -52,11 +56,25 @@ def text2story(text):
     # If no good ending is found, return as is
     return story_text
-# Simple text-to-audio function
 def text2audio(story_text):
-    synthesizer = pipeline("text-to-speech", model="HelpingAI/HelpingAI-TTS-v1")
-    speech = synthesizer(story_text)
-    return speech
 # Basic Streamlit interface
 st.title("Image to Audio Story")
@@ -70,26 +88,22 @@ if uploaded_file is not None:
     image = Image.open(uploaded_file)
     # Image to Text
-    st.write("Generating caption...")
-    caption = img2text(image)
     st.write(f"Caption: {caption}")
     # Text to Story
-    st.write("Creating story...")
-    story = text2story(caption)
     st.write(f"Story: {story}")
     # Text to Audio
-    st.write("Generating audio...")
-    speech_output = text2audio(story)
-    # Play audio
-    try:
-        if 'audio' in speech_output and 'sampling_rate' in speech_output:
-            st.audio(speech_output['audio'], sample_rate=speech_output['sampling_rate'])
-        elif 'audio_array' in speech_output and 'sampling_rate' in speech_output:
-            st.audio(speech_output['audio_array'], sample_rate=speech_output['sampling_rate'])
-        else:
-            st.write("Audio generated but could not be played.")
-    except Exception as e:
-        st.error(f"Error playing audio: {e}")

+# Imports
 import streamlit as st
 from transformers import pipeline
 from PIL import Image
+import torch
+from gtts import gTTS
+import os
+import tempfile
 # Simple image-to-text function
 def img2text(image):
     text = image_to_text(image)[0]["generated_text"]
     return text
+# Improved text-to-story function with natural ending
 def text2story(text):
     generator = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
     prompt = f"Write a short children's story based on this: {text}. The story should have a clear beginning, middle, and end. Keep it under 150 words. Once upon a time, "
     # If no good ending is found, return as is
     return story_text
+# Updated text-to-audio function using gTTS instead of transformers
 def text2audio(story_text):
+    # Create a temporary file
+    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
+    temp_filename = temp_file.name
+    temp_file.close()
+    # Use gTTS to convert text to speech
+    tts = gTTS(text=story_text, lang='en', slow=False)
+    tts.save(temp_filename)
+    # Read the audio file
+    with open(temp_filename, 'rb') as audio_file:
+        audio_bytes = audio_file.read()
+    # Clean up the temporary file
+    os.unlink(temp_filename)
+    return audio_bytes
 # Basic Streamlit interface
 st.title("Image to Audio Story")
     image = Image.open(uploaded_file)
     # Image to Text
+    with st.spinner("Generating caption..."):
+        caption = img2text(image)
     st.write(f"Caption: {caption}")
     # Text to Story
+    with st.spinner("Creating story..."):
+        story = text2story(caption)
     st.write(f"Story: {story}")
     # Text to Audio
+    with st.spinner("Generating audio..."):
+        try:
+            audio_bytes = text2audio(story)
+            # Play audio
+            st.audio(audio_bytes, format='audio/mp3')
+        except Exception as e:
+            st.error(f"Error generating or playing audio: {e}")
+            st.write("Make sure gTTS is installed with: pip install gTTS")