CR7CAD commited on
Commit
83842b8
·
verified ·
1 Parent(s): b1c6cd6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -54
app.py CHANGED
@@ -12,42 +12,10 @@ def img2text(image_path):
12
  return text
13
 
14
  # text2story
15
- def text2story(text):
16
- # Using a smaller text generation model
17
- generator = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
18
-
19
- # Create a prompt for the story generation
20
- prompt = f"Write a fun children's story based on this: {text}. Once upon a time, "
21
-
22
- # Generate the story
23
- story_result = generator(
24
- prompt,
25
- max_length=150,
26
- num_return_sequences=1,
27
- temperature=0.7,
28
- top_k=50,
29
- top_p=0.95,
30
- do_sample=True
31
- )
32
-
33
- # Extract the generated text
34
- story_text = story_result[0]['generated_text']
35
- story_text = story_text.replace(prompt, "Once upon a time, ")
36
-
37
- # Make sure the story is at least 100 words
38
- words = story_text.split()
39
- if len(words) > 100:
40
- # Simply truncate to 100 words
41
- story_text = " ".join(words[:100])
42
-
43
- return story_text
44
-
45
- # text2audio - REVISED to handle audio format correctly
46
- # text2audio - REVISED with proper audio field handling
47
  def text2audio(story_text):
48
  try:
49
- # Use the MeloTTS model which has better audio quality
50
- synthesizer = pipeline("text-to-speech", model="capleaf/viXTTS")
51
 
52
  # Limit text length to avoid timeouts
53
  max_chars = 500
@@ -59,29 +27,13 @@ def text2audio(story_text):
59
  story_text = story_text[:max_chars]
60
 
61
  # Generate speech
 
62
  speech = synthesizer(story_text)
63
-
64
- # Create a temporary WAV file
65
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
66
- temp_filename = temp_file.name
67
- temp_file.close()
68
-
69
- # Debug: Show what keys are available in the speech output
70
  st.write(f"Speech output keys: {list(speech.keys())}")
71
 
72
- # Write the audio data to the temporary file - MeloTTS should have audio and sampling_rate
73
- if 'audio' in speech and 'sampling_rate' in speech:
74
- # Convert numpy array to WAV file
75
- scipy.io.wavfile.write(
76
- temp_filename,
77
- speech['sampling_rate'],
78
- speech['audio'].astype(np.float32)
79
- )
80
- st.write("Audio successfully written to file")
81
- else:
82
- raise ValueError(f"Expected 'audio' and 'sampling_rate' in output, but got: {list(speech.keys())}")
83
-
84
- return temp_filename
85
 
86
  except Exception as e:
87
  st.error(f"Error generating audio: {str(e)}")
 
12
  return text
13
 
14
  # text2story
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def text2audio(story_text):
16
  try:
17
+ # Use the HelpingAI TTS model as requested
18
+ synthesizer = pipeline("text-to-speech", model="HelpingAI/HelpingAI-TTS-v1")
19
 
20
  # Limit text length to avoid timeouts
21
  max_chars = 500
 
27
  story_text = story_text[:max_chars]
28
 
29
  # Generate speech
30
+ st.write("Generating audio...")
31
  speech = synthesizer(story_text)
 
 
 
 
 
 
 
32
  st.write(f"Speech output keys: {list(speech.keys())}")
33
 
34
+ # We'll pass the audio data directly to Streamlit instead of saving to a file
35
+ # This works because Streamlit's st.audio() can take raw audio data
36
+ return speech
 
 
 
 
 
 
 
 
 
 
37
 
38
  except Exception as e:
39
  st.error(f"Error generating audio: {str(e)}")