CR7CAD commited on
Commit
6706f05
·
verified ·
1 Parent(s): e5f2129

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -6
app.py CHANGED
@@ -43,9 +43,10 @@ def text2story(text):
43
  return story_text
44
 
45
  # text2audio - REVISED to handle audio format correctly
 
46
  def text2audio(story_text):
47
  try:
48
- # Use a simple, reliable TTS model
49
  synthesizer = pipeline("text-to-speech", model="facebook/mms-tts-eng")
50
 
51
  # Limit text length to avoid timeouts
@@ -60,15 +61,41 @@ def text2audio(story_text):
60
  # Generate speech
61
  speech = synthesizer(story_text)
62
 
63
- # Create a temporary file with .wav extension
 
 
 
64
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
65
  temp_filename = temp_file.name
66
- temp_file.close() # Close the file so we can write to it
67
 
68
- # Write the raw audio data to the file
 
69
  with open(temp_filename, 'wb') as f:
70
- f.write(speech['bytes']) # Using the 'bytes' field instead of 'audio'
71
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  return temp_filename
73
 
74
  except Exception as e:
 
43
  return story_text
44
 
45
  # text2audio - REVISED to handle audio format correctly
46
+ # text2audio - REVISED with proper audio field handling
47
  def text2audio(story_text):
48
  try:
49
+ # Use the facebook TTS model
50
  synthesizer = pipeline("text-to-speech", model="facebook/mms-tts-eng")
51
 
52
  # Limit text length to avoid timeouts
 
61
  # Generate speech
62
  speech = synthesizer(story_text)
63
 
64
+ # DEBUG: Print the keys in the speech output to understand its structure
65
+ st.write(f"Speech output keys: {list(speech.keys())}")
66
+
67
+ # Create a temporary WAV file
68
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
69
  temp_filename = temp_file.name
70
+ temp_file.close()
71
 
72
+ # Write the audio data to the temporary file
73
+ # The key is likely 'audio' or 'raw' rather than 'bytes'
74
  with open(temp_filename, 'wb') as f:
75
+ # Try to write using the correct key from the output
76
+ if 'audio' in speech and isinstance(speech['audio'], (bytes, bytearray)):
77
+ f.write(speech['audio'])
78
+ elif 'raw' in speech and isinstance(speech['raw'], (bytes, bytearray)):
79
+ f.write(speech['raw'])
80
+ elif 'wav' in speech and isinstance(speech['wav'], (bytes, bytearray)):
81
+ f.write(speech['wav'])
82
+ elif 'audio' in speech and hasattr(speech['audio'], 'tobytes'):
83
+ # It might be a numpy array
84
+ f.write(speech['audio'].tobytes())
85
+ else:
86
+ # Try the first value that looks like audio data
87
+ for key, value in speech.items():
88
+ if isinstance(value, (bytes, bytearray)) or (
89
+ hasattr(value, 'tobytes') and len(value) > 1000):
90
+ if hasattr(value, 'tobytes'):
91
+ f.write(value.tobytes())
92
+ else:
93
+ f.write(value)
94
+ st.write(f"Used key: {key} for audio data")
95
+ break
96
+ else:
97
+ raise ValueError(f"No suitable audio data found in keys: {list(speech.keys())}")
98
+
99
  return temp_filename
100
 
101
  except Exception as e: