Spaces:

CR7CAD
/

Assignment1

Sleeping

App Files Files Community

CR7CAD commited on Mar 8

Commit

3fd88eb

verified ·

1 Parent(s): 7df9b81

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -45

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ from transformers import pipeline
 import os
 import numpy as np
 import io
-import scipy.io.wavfile as wavfile
 # function part
 # img2text
@@ -44,65 +43,53 @@ def text2story(text):
     return story_text
-# text2audio - REVISED to use facebook/mms-tts-eng model
 def text2audio(story_text):
     try:
-        # Use a smaller and more reliable TTS model
         synthesizer = pipeline("text-to-speech", model="facebook/mms-tts-eng")
-        # Break the text into smaller chunks if needed (prevent timeout)
-        max_chunk_size = 200  # characters
-        chunks = []
-        for i in range(0, len(story_text), max_chunk_size):
-            chunk = story_text[i:i+max_chunk_size]
-            # Make sure we break at word boundaries
-            if i+max_chunk_size < len(story_text) and story_text[i+max_chunk_size] != ' ':
-                # Find the last space in this chunk
-                last_space = chunk.rfind(' ')
-                if last_space != -1:
-                    chunk = chunk[:last_space]
-            chunks.append(chunk)
-        # Process each chunk
-        audio_arrays = []
-        sampling_rate = None
-        for chunk in chunks:
-            if not chunk.strip():  # Skip empty chunks
-                continue
-            speech = synthesizer(chunk)
-            if sampling_rate is None:
-                sampling_rate = speech["sampling_rate"]
-            audio_arrays.append(speech["audio"])
-        # Combine all audio chunks
-        combined_audio = np.concatenate(audio_arrays)
-        # Create a BytesIO object to store the wave file
-        wav_buffer = io.BytesIO()
-        wavfile.write(wav_buffer, sampling_rate, combined_audio)
-        wav_buffer.seek(0)  # Rewind the buffer
         return {
-            "audio": wav_buffer.getvalue(),
-            "sampling_rate": sampling_rate
         }
     except Exception as e:
         st.error(f"Error generating audio: {str(e)}")
-        # Fallback to a pre-recorded audio file if available
-        try:
-            with open("fallback_audio.wav", "rb") as f:
-                return {
-                    "audio": f.read(),
-                    "sampling_rate": 22050  # Common sample rate
-                }
-        except:
-            return None
 # Function to save temporary image file
 def save_uploaded_image(uploaded_file):

 import os
 import numpy as np
 import io
 # function part
 # img2text
     return story_text
+# text2audio - REVISED to use a simpler approach without scipy
 def text2audio(story_text):
     try:
+        # Use the facebook/mms-tts-eng model with fewer features
         synthesizer = pipeline("text-to-speech", model="facebook/mms-tts-eng")
+        # For simplicity, we'll limit the text length to avoid timeouts
+        # If text is too long, truncate it to a reasonable length (500 chars ~ 100 words)
+        max_length = 500
+        if len(story_text) > max_length:
+            last_period = story_text[:max_length].rfind('.')
+            if last_period > 0:
+                story_text = story_text[:last_period + 1]
+            else:
+                story_text = story_text[:max_length]
+        # Generate speech
+        speech = synthesizer(story_text)
+        # Save the audio to a file instead of using in-memory processing
+        # This avoids needing scipy
+        temp_audio_path = "temp_audio.wav"
+        # Convert numpy array to bytes and save
+        with open(temp_audio_path, "wb") as f:
+            # Assuming the audio is in the right format already
+            np.save(f, speech["audio"])
+        # Read the file back
+        with open(temp_audio_path, "rb") as f:
+            audio_data = f.read()
+        # Clean up
+        try:
+            os.remove(temp_audio_path)
+        except:
+            pass
         return {
+            "audio": audio_data,
+            "sampling_rate": speech["sampling_rate"]
         }
     except Exception as e:
         st.error(f"Error generating audio: {str(e)}")
+        # No fallback - just return None
+        return None
 # Function to save temporary image file
 def save_uploaded_image(uploaded_file):