pm6six commited on
Commit
569b020
·
verified ·
1 Parent(s): 9266b68

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -29
app.py CHANGED
@@ -1,17 +1,8 @@
1
  import streamlit as st
2
- from io import BytesIO
3
- from urllib.request import urlopen
4
- import librosa
5
  from transformers import pipeline
6
- model = pipeline("text-to-speech", model="espnet/kan-bayashi_ljspeech_vits")
7
 
8
-
9
- import pyttsx3 # For text-to-speech
10
-
11
- # Load Qwen2Audio model and processor
12
- processor = AutoProcessor.from_pretrained("Qwen/Qwen2-Audio-7B-Instruct")
13
- model = Qwen2AudioForConditionalGeneration.from_pretrained("Qwen/Qwen2-Audio-7B-Instruct", device_map="auto")
14
- tts_engine = pyttsx3.init()
15
 
16
  # Streamlit app UI
17
  st.title("Text-to-Audio App")
@@ -19,23 +10,21 @@ st.text("This app generates audio from text input using Hugging Face models.")
19
 
20
  # User input
21
  text_input = st.text_area("Enter some text for the model:")
22
- if st.button("Generate Audio"):
23
- conversation = [{"role": "user", "content": text_input}]
24
 
25
- # Preprocess conversation
26
- text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
27
- inputs = processor(text=text, return_tensors="pt", padding=True)
28
- inputs.input_ids = inputs.input_ids.to("cuda")
29
-
30
- # Generate response
31
- generate_ids = model.generate(**inputs, max_length=256)
32
- generate_ids = generate_ids[:, inputs.input_ids.size(1):]
33
-
34
- # Decode response
35
- response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
36
- st.text(f"Model Response: {response}")
 
 
 
 
37
 
38
- # Convert response to speech
39
- tts_engine.say(response)
40
- tts_engine.runAndWait()
41
- st.success("Audio generated and played!")
 
1
  import streamlit as st
 
 
 
2
  from transformers import pipeline
 
3
 
4
+ # Initialize text-to-speech model (small lightweight model)
5
+ tts_model = pipeline("text-to-speech", model="espnet/kan-bayashi_ljspeech_vits")
 
 
 
 
 
6
 
7
  # Streamlit app UI
8
  st.title("Text-to-Audio App")
 
10
 
11
  # User input
12
  text_input = st.text_area("Enter some text for the model:")
 
 
13
 
14
+ if st.button("Generate Audio"):
15
+ if not text_input.strip():
16
+ st.error("Please enter some text!")
17
+ else:
18
+ # Generate response
19
+ st.text("Generating audio response...")
20
+ tts_audio = tts_model(text_input)
21
+
22
+ # Save the audio output
23
+ audio_file = "response.wav"
24
+ with open(audio_file, "wb") as f:
25
+ f.write(tts_audio["wav"])
26
+
27
+ # Display audio response
28
+ st.audio(audio_file, format="audio/wav")
29
+ st.success("Audio generated successfully!")
30