Spaces:

pm6six
/

audio

Sleeping

App Files Files Community

pm6six commited on Nov 20, 2024

Commit

569b020

verified ·

1 Parent(s): 9266b68

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -29

app.py CHANGED Viewed

@@ -1,17 +1,8 @@
 import streamlit as st
-from io import BytesIO
-from urllib.request import urlopen
-import librosa
 from transformers import pipeline
-model = pipeline("text-to-speech", model="espnet/kan-bayashi_ljspeech_vits")
-import pyttsx3  # For text-to-speech
-# Load Qwen2Audio model and processor
-processor = AutoProcessor.from_pretrained("Qwen/Qwen2-Audio-7B-Instruct")
-model = Qwen2AudioForConditionalGeneration.from_pretrained("Qwen/Qwen2-Audio-7B-Instruct", device_map="auto")
-tts_engine = pyttsx3.init()
 # Streamlit app UI
 st.title("Text-to-Audio App")
@@ -19,23 +10,21 @@ st.text("This app generates audio from text input using Hugging Face models.")
 # User input
 text_input = st.text_area("Enter some text for the model:")
-if st.button("Generate Audio"):
-    conversation = [{"role": "user", "content": text_input}]
-    # Preprocess conversation
-    text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
-    inputs = processor(text=text, return_tensors="pt", padding=True)
-    inputs.input_ids = inputs.input_ids.to("cuda")
-    # Generate response
-    generate_ids = model.generate(**inputs, max_length=256)
-    generate_ids = generate_ids[:, inputs.input_ids.size(1):]
-    # Decode response
-    response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
-    st.text(f"Model Response: {response}")
-    # Convert response to speech
-    tts_engine.say(response)
-    tts_engine.runAndWait()
-    st.success("Audio generated and played!")

 import streamlit as st
 from transformers import pipeline
+# Initialize text-to-speech model (small lightweight model)
+tts_model = pipeline("text-to-speech", model="espnet/kan-bayashi_ljspeech_vits")
 # Streamlit app UI
 st.title("Text-to-Audio App")
 # User input
 text_input = st.text_area("Enter some text for the model:")
+if st.button("Generate Audio"):
+    if not text_input.strip():
+        st.error("Please enter some text!")
+    else:
+        # Generate response
+        st.text("Generating audio response...")
+        tts_audio = tts_model(text_input)
+        # Save the audio output
+        audio_file = "response.wav"
+        with open(audio_file, "wb") as f:
+            f.write(tts_audio["wav"])
+        # Display audio response
+        st.audio(audio_file, format="audio/wav")
+        st.success("Audio generated successfully!")