Spaces:

CR7CAD
/

Assignment1

Sleeping

App Files Files Community

CR7CAD commited on Mar 8

Commit

76abf5e

verified ·

1 Parent(s): 3fd88eb

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -43

app.py CHANGED Viewed

@@ -2,8 +2,7 @@
 import streamlit as st
 from transformers import pipeline
 import os
-import numpy as np
-import io
 # function part
 # img2text
@@ -43,52 +42,37 @@ def text2story(text):
     return story_text
-# text2audio - REVISED to use a simpler approach without scipy
 def text2audio(story_text):
     try:
-        # Use the facebook/mms-tts-eng model with fewer features
         synthesizer = pipeline("text-to-speech", model="facebook/mms-tts-eng")
-        # For simplicity, we'll limit the text length to avoid timeouts
-        # If text is too long, truncate it to a reasonable length (500 chars ~ 100 words)
-        max_length = 500
-        if len(story_text) > max_length:
-            last_period = story_text[:max_length].rfind('.')
             if last_period > 0:
                 story_text = story_text[:last_period + 1]
             else:
-                story_text = story_text[:max_length]
         # Generate speech
         speech = synthesizer(story_text)
-        # Save the audio to a file instead of using in-memory processing
-        # This avoids needing scipy
-        temp_audio_path = "temp_audio.wav"
-        # Convert numpy array to bytes and save
-        with open(temp_audio_path, "wb") as f:
-            # Assuming the audio is in the right format already
-            np.save(f, speech["audio"])
-        # Read the file back
-        with open(temp_audio_path, "rb") as f:
-            audio_data = f.read()
-        # Clean up
-        try:
-            os.remove(temp_audio_path)
-        except:
-            pass
-        return {
-            "audio": audio_data,
-            "sampling_rate": speech["sampling_rate"]
-        }
     except Exception as e:
         st.error(f"Error generating audio: {str(e)}")
-        # No fallback - just return None
         return None
 # Function to save temporary image file
@@ -127,22 +111,19 @@ if uploaded_file is not None:
     # Stage 3: Story to Audio data
     st.text('Generating audio data...')
-    audio_data = text2audio(story)
     # Play button
     if st.button("Play Audio"):
-        if audio_data:
-            st.audio(
-                audio_data["audio"],
-                format="audio/wav",
-                start_time=0,
-                sample_rate=audio_data["sampling_rate"]
-            )
         else:
-            st.error("Failed to generate audio. Please try again.")
-    # Clean up the temporary file
     try:
         os.remove(image_path)
     except:
         pass

 import streamlit as st
 from transformers import pipeline
 import os
+import tempfile
 # function part
 # img2text
     return story_text
+# text2audio - REVISED to handle audio format correctly
 def text2audio(story_text):
     try:
+        # Use a simple, reliable TTS model
         synthesizer = pipeline("text-to-speech", model="facebook/mms-tts-eng")
+        # Limit text length to avoid timeouts
+        max_chars = 500
+        if len(story_text) > max_chars:
+            last_period = story_text[:max_chars].rfind('.')
             if last_period > 0:
                 story_text = story_text[:last_period + 1]
             else:
+                story_text = story_text[:max_chars]
         # Generate speech
         speech = synthesizer(story_text)
+        # Create a temporary file with .wav extension
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
+        temp_filename = temp_file.name
+        temp_file.close()  # Close the file so we can write to it
+        # Write the raw audio data to the file
+        with open(temp_filename, 'wb') as f:
+            f.write(speech['bytes'])  # Using the 'bytes' field instead of 'audio'
+        return temp_filename
     except Exception as e:
         st.error(f"Error generating audio: {str(e)}")
         return None
 # Function to save temporary image file
     # Stage 3: Story to Audio data
     st.text('Generating audio data...')
+    audio_file = text2audio(story)
     # Play button
     if st.button("Play Audio"):
+        if audio_file and os.path.exists(audio_file):
+            # Play the audio file
+            st.audio(audio_file)
         else:
+            st.error("Audio generation failed. Please try again.")
+    # Clean up the temporary files
     try:
         os.remove(image_path)
+        # Don't delete audio file immediately as it might still be playing
     except:
         pass