Spaces:

CR7CAD
/

Assignment1

Sleeping

App Files Files Community

CR7CAD commited on Mar 8

Commit

83842b8

verified ·

1 Parent(s): b1c6cd6

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -54

app.py CHANGED Viewed

@@ -12,42 +12,10 @@ def img2text(image_path):
     return text
 # text2story
-def text2story(text):
-    # Using a smaller text generation model
-    generator = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
-    # Create a prompt for the story generation
-    prompt = f"Write a fun children's story based on this: {text}. Once upon a time, "
-    # Generate the story
-    story_result = generator(
-        prompt,
-        max_length=150,
-        num_return_sequences=1,
-        temperature=0.7,
-        top_k=50,
-        top_p=0.95,
-        do_sample=True
-    )
-    # Extract the generated text
-    story_text = story_result[0]['generated_text']
-    story_text = story_text.replace(prompt, "Once upon a time, ")
-    # Make sure the story is at least 100 words
-    words = story_text.split()
-    if len(words) > 100:
-        # Simply truncate to 100 words
-        story_text = " ".join(words[:100])
-    return story_text
-# text2audio - REVISED to handle audio format correctly
-# text2audio - REVISED with proper audio field handling
 def text2audio(story_text):
     try:
-        # Use the MeloTTS model which has better audio quality
-        synthesizer = pipeline("text-to-speech", model="capleaf/viXTTS")
         # Limit text length to avoid timeouts
         max_chars = 500
@@ -59,29 +27,13 @@ def text2audio(story_text):
                 story_text = story_text[:max_chars]
         # Generate speech
         speech = synthesizer(story_text)
-        # Create a temporary WAV file
-        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
-        temp_filename = temp_file.name
-        temp_file.close()
-        # Debug: Show what keys are available in the speech output
         st.write(f"Speech output keys: {list(speech.keys())}")
-        # Write the audio data to the temporary file - MeloTTS should have audio and sampling_rate
-        if 'audio' in speech and 'sampling_rate' in speech:
-            # Convert numpy array to WAV file
-            scipy.io.wavfile.write(
-                temp_filename,
-                speech['sampling_rate'],
-                speech['audio'].astype(np.float32)
-            )
-            st.write("Audio successfully written to file")
-        else:
-            raise ValueError(f"Expected 'audio' and 'sampling_rate' in output, but got: {list(speech.keys())}")
-        return temp_filename
     except Exception as e:
         st.error(f"Error generating audio: {str(e)}")

     return text
 # text2story
 def text2audio(story_text):
     try:
+        # Use the HelpingAI TTS model as requested
+        synthesizer = pipeline("text-to-speech", model="HelpingAI/HelpingAI-TTS-v1")
         # Limit text length to avoid timeouts
         max_chars = 500
                 story_text = story_text[:max_chars]
         # Generate speech
+        st.write("Generating audio...")
         speech = synthesizer(story_text)
         st.write(f"Speech output keys: {list(speech.keys())}")
+        # We'll pass the audio data directly to Streamlit instead of saving to a file
+        # This works because Streamlit's st.audio() can take raw audio data
+        return speech
     except Exception as e:
         st.error(f"Error generating audio: {str(e)}")