Beehzod commited on
Commit
87269ae
·
verified ·
1 Parent(s): dbf5729

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -75
app.py CHANGED
@@ -1,90 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
- from transformers import SeamlessM4Tv2Model, AutoProcessor
3
  import torch
4
  import numpy as np
5
  from scipy.io.wavfile import write
6
- import re
7
  from io import BytesIO
8
 
9
- # Load the processor and model
10
- processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
11
  model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
12
 
 
13
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
  model.to(device)
15
 
16
- # Number to words function for Uzbek
17
- number_words = {
18
- 0: "nol", 1: "bir", 2: "ikki", 3: "uch", 4: "to'rt", 5: "besh", 6: "olti", 7: "yetti", 8: "sakkiz", 9: "to'qqiz",
19
- 10: "o'n", 11: "o'n bir", 12: "o'n ikki", 13: "o'n uch", 14: "o'n to'rt", 15: "o'n besh", 16: "o'n oltı", 17: "o'n yetti",
20
- 18: "o'n sakkiz", 19: "o'n toqqiz", 20: "yigirma", 30: "o'ttiz", 40: "qirq", 50: "ellik", 60: "oltmish", 70: "yetmish",
21
- 80: "sakson", 90: "to'qson", 100: "yuz", 1000: "ming", 1000000: "million"
22
- }
23
-
24
- def number_to_words(number):
25
- if number < 20:
26
- return number_words[number]
27
- elif number < 100:
28
- tens, unit = divmod(number, 10)
29
- return number_words[tens * 10] + (" " + number_words[unit] if unit else "")
30
- elif number < 1000:
31
- hundreds, remainder = divmod(number, 100)
32
- return (number_words[hundreds] + " yuz" if hundreds > 1 else "yuz") + (" " + number_to_words(remainder) if remainder else "")
33
- elif number < 1000000:
34
- thousands, remainder = divmod(number, 1000)
35
- return (number_to_words(thousands) + " ming" if thousands > 1 else "ming") + (" " + number_to_words(remainder) if remainder else "")
36
- elif number < 1000000000:
37
- millions, remainder = divmod(number, 1000000)
38
- return number_to_words(millions) + " million" + (" " + number_to_words(remainder) if remainder else "")
39
- elif number < 1000000000000:
40
- billions, remainder = divmod(number, 1000000000)
41
- return number_to_words(billions) + " milliard" + (" " + number_to_words(remainder) if remainder else "")
42
- else:
43
- return str(number)
44
-
45
- def replace_numbers_with_words(text):
46
- def replace(match):
47
- number = int(match.group())
48
- return number_to_words(number)
49
- result = re.sub(r'\b\d+\b', replace, text)
50
- return result
51
-
52
- # Replacements
53
- replacements = [
54
- ("bo‘ladi", "bo'ladi"),
55
- ("yog‘ingarchilik", "yog'ingarchilik"),
56
- ]
57
-
58
- def cleanup_text(text):
59
- for src, dst in replacements:
60
- text = text.replace(src, dst)
61
- return text
62
-
63
- # Streamlit App
64
- st.title("Text-to-Speech using Seamless M4T Model")
65
-
66
- # User Input
67
- user_input = st.text_area("Enter the text for speech generation", height=200)
68
-
69
- # Process the text and generate speech
70
- if st.button("Generate Speech"):
71
- if user_input.strip():
72
- # Apply text transformations
73
- converted_text = replace_numbers_with_words(user_input)
74
- cleaned_text = cleanup_text(converted_text)
75
-
76
- # Process input for model
77
- inputs = processor(text=cleaned_text, src_lang="uzn", return_tensors="pt").to(device)
78
-
79
- # Generate audio from text
80
  audio_array_from_text = model.generate(**inputs, tgt_lang="uzn")[0].cpu().numpy().squeeze()
81
 
82
- # Save to BytesIO
83
- audio_io = BytesIO()
84
- write(audio_io, 16000, audio_array_from_text.astype(np.float32))
85
- audio_io.seek(0)
86
 
87
- # Provide audio for playback
88
- st.audio(audio_io, format='audio/wav')
89
  else:
90
- st.warning("Please enter some text to generate speech.")
 
1
+ # import streamlit as st
2
+ # from transformers import SeamlessM4Tv2Model, AutoProcessor
3
+ # import torch
4
+ # import numpy as np
5
+ # from scipy.io.wavfile import write
6
+ # import re
7
+ # from io import BytesIO
8
+
9
+ # # Load the processor and model
10
+ # processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
11
+ # model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
12
+
13
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+ # model.to(device)
15
+
16
+ # # Number to words function for Uzbek
17
+ # number_words = {
18
+ # 0: "nol", 1: "bir", 2: "ikki", 3: "uch", 4: "to'rt", 5: "besh", 6: "olti", 7: "yetti", 8: "sakkiz", 9: "to'qqiz",
19
+ # 10: "o'n", 11: "o'n bir", 12: "o'n ikki", 13: "o'n uch", 14: "o'n to'rt", 15: "o'n besh", 16: "o'n oltı", 17: "o'n yetti",
20
+ # 18: "o'n sakkiz", 19: "o'n toqqiz", 20: "yigirma", 30: "o'ttiz", 40: "qirq", 50: "ellik", 60: "oltmish", 70: "yetmish",
21
+ # 80: "sakson", 90: "to'qson", 100: "yuz", 1000: "ming", 1000000: "million"
22
+ # }
23
+
24
+ # def number_to_words(number):
25
+ # if number < 20:
26
+ # return number_words[number]
27
+ # elif number < 100:
28
+ # tens, unit = divmod(number, 10)
29
+ # return number_words[tens * 10] + (" " + number_words[unit] if unit else "")
30
+ # elif number < 1000:
31
+ # hundreds, remainder = divmod(number, 100)
32
+ # return (number_words[hundreds] + " yuz" if hundreds > 1 else "yuz") + (" " + number_to_words(remainder) if remainder else "")
33
+ # elif number < 1000000:
34
+ # thousands, remainder = divmod(number, 1000)
35
+ # return (number_to_words(thousands) + " ming" if thousands > 1 else "ming") + (" " + number_to_words(remainder) if remainder else "")
36
+ # elif number < 1000000000:
37
+ # millions, remainder = divmod(number, 1000000)
38
+ # return number_to_words(millions) + " million" + (" " + number_to_words(remainder) if remainder else "")
39
+ # elif number < 1000000000000:
40
+ # billions, remainder = divmod(number, 1000000000)
41
+ # return number_to_words(billions) + " milliard" + (" " + number_to_words(remainder) if remainder else "")
42
+ # else:
43
+ # return str(number)
44
+
45
+ # def replace_numbers_with_words(text):
46
+ # def replace(match):
47
+ # number = int(match.group())
48
+ # return number_to_words(number)
49
+ # result = re.sub(r'\b\d+\b', replace, text)
50
+ # return result
51
+
52
+ # # Replacements
53
+ # replacements = [
54
+ # ("bo‘ladi", "bo'ladi"),
55
+ # ("yog‘ingarchilik", "yog'ingarchilik"),
56
+ # ]
57
+
58
+ # def cleanup_text(text):
59
+ # for src, dst in replacements:
60
+ # text = text.replace(src, dst)
61
+ # return text
62
+
63
+ # # Streamlit App
64
+ # st.title("Text-to-Speech using Seamless M4T Model")
65
+
66
+ # # User Input
67
+ # user_input = st.text_area("Enter the text for speech generation", height=200)
68
+
69
+ # # Process the text and generate speech
70
+ # if st.button("Generate Speech"):
71
+ # if user_input.strip():
72
+ # # Apply text transformations
73
+ # converted_text = replace_numbers_with_words(user_input)
74
+ # cleaned_text = cleanup_text(converted_text)
75
+
76
+ # # Process input for model
77
+ # inputs = processor(text=cleaned_text, src_lang="uzn", return_tensors="pt").to(device)
78
+
79
+ # # Generate audio from text
80
+ # audio_array_from_text = model.generate(**inputs, tgt_lang="uzn")[0].cpu().numpy().squeeze()
81
+
82
+ # # Save to BytesIO
83
+ # audio_io = BytesIO()
84
+ # write(audio_io, 16000, audio_array_from_text.astype(np.float32))
85
+ # audio_io.seek(0)
86
+
87
+ # # Provide audio for playback
88
+ # st.audio(audio_io, format='audio/wav')
89
+ # else:
90
+ # st.warning("Please enter some text to generate speech.")
91
  import streamlit as st
92
+ from transformers import SeamlessM4TTokenizer, SeamlessM4Tv2Model
93
  import torch
94
  import numpy as np
95
  from scipy.io.wavfile import write
 
96
  from io import BytesIO
97
 
98
+ # Load the tokenizer and model
99
+ tokenizer = SeamlessM4TTokenizer.from_pretrained("facebook/seamless-m4t-v2-large")
100
  model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
101
 
102
+ # Set the device (CUDA if available, else CPU)
103
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
104
  model.to(device)
105
 
106
+ # Streamlit title
107
+ st.title("Text-to-Speech with Seamless M4T Model")
108
+
109
+ # Input text field
110
+ text = st.text_area("Enter text for audio generation", "Nutq texnologiyasining til qamrovini kengaytirish...")
111
+
112
+ # Button to generate audio
113
+ if st.button("Generate Audio"):
114
+ if text:
115
+ # Preprocess the text and convert to tensor
116
+ inputs = tokenizer(text=text, src_lang="uzn", return_tensors="pt").to(device)
117
+
118
+ # Generate audio from the model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  audio_array_from_text = model.generate(**inputs, tgt_lang="uzn")[0].cpu().numpy().squeeze()
120
 
121
+ # Save the audio as a .wav file in memory
122
+ audio_file = BytesIO()
123
+ write(audio_file, 16000, audio_array_from_text.astype(np.float32))
124
+ audio_file.seek(0) # Reset the pointer to the start of the file
125
 
126
+ # Display the audio player in the Streamlit app
127
+ st.audio(audio_file, format="audio/wav")
128
  else:
129
+ st.warning("Please enter text to generate audio.")