CR7CAD commited on
Commit
cd9e32e
·
verified ·
1 Parent(s): 6706f05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -31
app.py CHANGED
@@ -46,8 +46,8 @@ def text2story(text):
46
  # text2audio - REVISED with proper audio field handling
47
  def text2audio(story_text):
48
  try:
49
- # Use the facebook TTS model
50
- synthesizer = pipeline("text-to-speech", model="facebook/mms-tts-eng")
51
 
52
  # Limit text length to avoid timeouts
53
  max_chars = 500
@@ -61,45 +61,32 @@ def text2audio(story_text):
61
  # Generate speech
62
  speech = synthesizer(story_text)
63
 
64
- # DEBUG: Print the keys in the speech output to understand its structure
65
- st.write(f"Speech output keys: {list(speech.keys())}")
66
-
67
  # Create a temporary WAV file
68
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
69
  temp_filename = temp_file.name
70
  temp_file.close()
71
 
72
- # Write the audio data to the temporary file
73
- # The key is likely 'audio' or 'raw' rather than 'bytes'
74
- with open(temp_filename, 'wb') as f:
75
- # Try to write using the correct key from the output
76
- if 'audio' in speech and isinstance(speech['audio'], (bytes, bytearray)):
77
- f.write(speech['audio'])
78
- elif 'raw' in speech and isinstance(speech['raw'], (bytes, bytearray)):
79
- f.write(speech['raw'])
80
- elif 'wav' in speech and isinstance(speech['wav'], (bytes, bytearray)):
81
- f.write(speech['wav'])
82
- elif 'audio' in speech and hasattr(speech['audio'], 'tobytes'):
83
- # It might be a numpy array
84
- f.write(speech['audio'].tobytes())
85
- else:
86
- # Try the first value that looks like audio data
87
- for key, value in speech.items():
88
- if isinstance(value, (bytes, bytearray)) or (
89
- hasattr(value, 'tobytes') and len(value) > 1000):
90
- if hasattr(value, 'tobytes'):
91
- f.write(value.tobytes())
92
- else:
93
- f.write(value)
94
- st.write(f"Used key: {key} for audio data")
95
- break
96
- else:
97
- raise ValueError(f"No suitable audio data found in keys: {list(speech.keys())}")
98
 
99
  return temp_filename
100
 
101
  except Exception as e:
102
  st.error(f"Error generating audio: {str(e)}")
 
 
103
  return None
104
 
105
  # Function to save temporary image file
 
46
  # text2audio - REVISED with proper audio field handling
47
  def text2audio(story_text):
48
  try:
49
+ # Use the MeloTTS model which has better audio quality
50
+ synthesizer = pipeline("text-to-speech", model="myshell-ai/MeloTTS-English")
51
 
52
  # Limit text length to avoid timeouts
53
  max_chars = 500
 
61
  # Generate speech
62
  speech = synthesizer(story_text)
63
 
 
 
 
64
  # Create a temporary WAV file
65
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
66
  temp_filename = temp_file.name
67
  temp_file.close()
68
 
69
+ # Debug: Show what keys are available in the speech output
70
+ st.write(f"Speech output keys: {list(speech.keys())}")
71
+
72
+ # Write the audio data to the temporary file - MeloTTS should have audio and sampling_rate
73
+ if 'audio' in speech and 'sampling_rate' in speech:
74
+ # Convert numpy array to WAV file
75
+ scipy.io.wavfile.write(
76
+ temp_filename,
77
+ speech['sampling_rate'],
78
+ speech['audio'].astype(np.float32)
79
+ )
80
+ st.write("Audio successfully written to file")
81
+ else:
82
+ raise ValueError(f"Expected 'audio' and 'sampling_rate' in output, but got: {list(speech.keys())}")
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  return temp_filename
85
 
86
  except Exception as e:
87
  st.error(f"Error generating audio: {str(e)}")
88
+ import traceback
89
+ st.error(traceback.format_exc())
90
  return None
91
 
92
  # Function to save temporary image file