Spaces:

CR7CAD
/

Assignment1

Sleeping

App Files Files Community

CR7CAD commited on Mar 8

Commit

6706f05

verified ·

1 Parent(s): e5f2129

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -6

app.py CHANGED Viewed

@@ -43,9 +43,10 @@ def text2story(text):
     return story_text
 # text2audio - REVISED to handle audio format correctly
 def text2audio(story_text):
     try:
-        # Use a simple, reliable TTS model
         synthesizer = pipeline("text-to-speech", model="facebook/mms-tts-eng")
         # Limit text length to avoid timeouts
@@ -60,15 +61,41 @@ def text2audio(story_text):
         # Generate speech
         speech = synthesizer(story_text)
-        # Create a temporary file with .wav extension
         temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
         temp_filename = temp_file.name
-        temp_file.close()  # Close the file so we can write to it
-        # Write the raw audio data to the file
         with open(temp_filename, 'wb') as f:
-            f.write(speech['bytes'])  # Using the 'bytes' field instead of 'audio'
         return temp_filename
     except Exception as e:

     return story_text
 # text2audio - REVISED to handle audio format correctly
+# text2audio - REVISED with proper audio field handling
 def text2audio(story_text):
     try:
+        # Use the facebook TTS model
         synthesizer = pipeline("text-to-speech", model="facebook/mms-tts-eng")
         # Limit text length to avoid timeouts
         # Generate speech
         speech = synthesizer(story_text)
+        # DEBUG: Print the keys in the speech output to understand its structure
+        st.write(f"Speech output keys: {list(speech.keys())}")
+        # Create a temporary WAV file
         temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
         temp_filename = temp_file.name
+        temp_file.close()
+        # Write the audio data to the temporary file
+        # The key is likely 'audio' or 'raw' rather than 'bytes'
         with open(temp_filename, 'wb') as f:
+            # Try to write using the correct key from the output
+            if 'audio' in speech and isinstance(speech['audio'], (bytes, bytearray)):
+                f.write(speech['audio'])
+            elif 'raw' in speech and isinstance(speech['raw'], (bytes, bytearray)):
+                f.write(speech['raw'])
+            elif 'wav' in speech and isinstance(speech['wav'], (bytes, bytearray)):
+                f.write(speech['wav'])
+            elif 'audio' in speech and hasattr(speech['audio'], 'tobytes'):
+                # It might be a numpy array
+                f.write(speech['audio'].tobytes())
+            else:
+                # Try the first value that looks like audio data
+                for key, value in speech.items():
+                    if isinstance(value, (bytes, bytearray)) or (
+                            hasattr(value, 'tobytes') and len(value) > 1000):
+                        if hasattr(value, 'tobytes'):
+                            f.write(value.tobytes())
+                        else:
+                            f.write(value)
+                        st.write(f"Used key: {key} for audio data")
+                        break
+                else:
+                    raise ValueError(f"No suitable audio data found in keys: {list(speech.keys())}")
         return temp_filename
     except Exception as e: