Aspik101 commited on
Commit
1c75b42
·
1 Parent(s): d9e436a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -46
app.py CHANGED
@@ -78,55 +78,144 @@ print("DiffusionPipeline Loaded!")
78
  model_audio_gen = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small").to("cuda")
79
  processor_audio_gen = AutoProcessor.from_pretrained("facebook/musicgen-small")
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  with gr.Blocks() as chat_demo:
82
- chatbot = gr.Chatbot()
83
- audio_input = gr.Audio(source="microphone", type="filepath", show_label=False)
84
- submit_audio = gr.Button("Submit Audio")
85
- clear = gr.Button("Clear")
86
  audio_output = gr.Audio('temp_file.wav', label="Generated Audio (wav)", type='filepath', autoplay=False)
87
 
88
- def translate(audio):
89
- print("__Wysyłam nagranie do whisper!")
90
- transcription = whisper_model.transcribe(audio, language="pl")
91
- return transcription["text"]
92
-
93
- def read_text(text):
94
- print("Tutaj jest tekst to przeczytania!", text[-1][-1])
95
- inputs = tokenizer(text[-1][-1], return_tensors="pt").to("cuda")
96
- with torch.no_grad():
97
- output = tts_model(**inputs).waveform.squeeze().cpu().numpy()
98
- sf.write('temp_file.wav', output, tts_model.config.sampling_rate)
99
- return 'temp_file.wav'
100
-
101
- def user(audio_data, history):
102
- if audio_data:
103
- user_message = translate(audio_data)
104
- print("USER!:")
105
- print("", history + [[user_message, None]])
106
- return history + [[user_message, None]]
107
-
108
- def parse_history(hist):
109
- history_ = ""
110
- for q, a in hist:
111
- history_ += f"<user>: {q } \n"
112
- if a:
113
- history_ += f"<assistant>: {a} \n"
114
- return history_
115
-
116
- def bot(history):
117
- print(f"When: {datetime.today().strftime('%Y-%m-%d %H:%M:%S')}")
118
- prompt = f"Jesteś AI assystentem. Odpowiadaj krótko i po polsku. {parse_history(history)}. <assistant>:"
119
- stream = llm(prompt, **params)
120
- history[-1][1] = ""
121
- answer_save = ""
122
- for character in stream:
123
- history[-1][1] += character
124
- answer_save += character
125
- time.sleep(0.005)
126
- yield history
127
-
128
- submit_audio.click(user, [audio_input, chatbot], [chatbot], queue=False).then(bot, chatbot, chatbot).then(read_text, chatbot, audio_output)
129
- clear.click(lambda: None, None, chatbot, queue=False)
130
 
131
 
132
  ##### Audio Gen ####
 
78
  model_audio_gen = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small").to("cuda")
79
  processor_audio_gen = AutoProcessor.from_pretrained("facebook/musicgen-small")
80
 
81
+ ##### Chat z LLAMA ####
82
+ ##### Chat z LLAMA ####
83
+ ##### Chat z LLAMA ####
84
+
85
+
86
+ def _load_model_tokenizer():
87
+ model_id = 'tangger/Qwen-7B-Chat'
88
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
89
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto",trust_remote_code=True, fp16=True).eval()
90
+ return model, tokenizer
91
+ model, tokenizer = _load_model_tokenizer()
92
+
93
+
94
+ def postprocess(self, y):
95
+ if y is None:
96
+ return []
97
+ for i, (message, response) in enumerate(y):
98
+ y[i] = (
99
+ None if message is None else mdtex2html.convert(message),
100
+ None if response is None else mdtex2html.convert(response),
101
+ )
102
+ return y
103
+
104
+
105
+ def _parse_text(text):
106
+ lines = text.split("\n")
107
+ lines = [line for line in lines if line != ""]
108
+ count = 0
109
+ for i, line in enumerate(lines):
110
+ if "```" in line:
111
+ count += 1
112
+ items = line.split("`")
113
+ if count % 2 == 1:
114
+ lines[i] = f'<pre><code class="language-{items[-1]}">'
115
+ else:
116
+ lines[i] = f"<br></code></pre>"
117
+ else:
118
+ if i > 0:
119
+ if count % 2 == 1:
120
+ line = line.replace("`", r"\`")
121
+ line = line.replace("<", "&lt;")
122
+ line = line.replace(">", "&gt;")
123
+ line = line.replace(" ", "&nbsp;")
124
+ line = line.replace("*", "&ast;")
125
+ line = line.replace("_", "&lowbar;")
126
+ line = line.replace("-", "&#45;")
127
+ line = line.replace(".", "&#46;")
128
+ line = line.replace("!", "&#33;")
129
+ line = line.replace("(", "&#40;")
130
+ line = line.replace(")", "&#41;")
131
+ line = line.replace("$", "&#36;")
132
+ lines[i] = "<br>" + line
133
+ text = "".join(lines)
134
+ return text
135
+
136
+ def predict(_query, _chatbot, _task_history):
137
+ print(f"User: {_parse_text(_query)}")
138
+ _chatbot.append((_parse_text(_query), ""))
139
+ full_response = ""
140
+
141
+ for response in model.chat_stream(tokenizer, _query, history=_task_history,system = "Jesteś assystentem AI. Odpowiadaj zawsze w języku poslkim" ):
142
+ _chatbot[-1] = (_parse_text(_query), _parse_text(response))
143
+
144
+ yield _chatbot
145
+ full_response = _parse_text(response)
146
+
147
+ print(f"History: {_task_history}")
148
+ _task_history.append((_query, full_response))
149
+ print(f"Qwen-7B-Chat: {_parse_text(full_response)}")
150
+
151
+ def read_text(text):
152
+ print("___Tekst do przeczytania!")
153
+ inputs = tokenizer_tss(text, return_tensors="pt").to("cuda")
154
+ with torch.no_grad():
155
+ output = tts_model(**inputs).waveform.squeeze().cpu().numpy()
156
+ sf.write('temp_file.wav', output, tts_model.config.sampling_rate)
157
+ return 'temp_file.wav'
158
+
159
+
160
+ def update_audio(text):
161
+ return 'temp_file.wav'
162
+
163
+ def translate(audio):
164
+ print("__Wysyłam nagranie do whisper!")
165
+ transcription = whisper_model.transcribe(audio, language="pl")
166
+ return transcription["text"]
167
+
168
+
169
+ def predict(audio, _chatbot, _task_history):
170
+ # Użyj funkcji translate, aby przekształcić audio w tekst
171
+ _query = translate(audio)
172
+
173
+ print(f"____User: {_parse_text(_query)}")
174
+ _chatbot.append((_parse_text(_query), ""))
175
+ full_response = ""
176
+
177
+ for response in model.chat_stream(tokenizer,
178
+ _query,
179
+ history= _task_history,
180
+ system = "Jesteś assystentem AI. Odpowiadaj zawsze w języku polskim. Odpowiadaj krótko."):
181
+ _chatbot[-1] = (_parse_text(_query), _parse_text(response))
182
+ yield _chatbot
183
+ full_response = _parse_text(response)
184
+
185
+ print(f"____History: {_task_history}")
186
+ _task_history.append((_query, full_response))
187
+ print(f"__Qwen-7B-Chat: {_parse_text(full_response)}")
188
+ print("____full_response",full_response)
189
+ audio_file = read_text(_parse_text(full_response)) # Generowanie audio
190
+ return full_response
191
+ # return 'temp_file.wav' # Zwrócenie ścieżki do pliku audio
192
+
193
+ def regenerate(_chatbot, _task_history):
194
+ if not _task_history:
195
+ yield _chatbot
196
+ return
197
+ item = _task_history.pop(-1)
198
+ _chatbot.pop(-1)
199
+ yield from predict(item[0], _chatbot, _task_history)
200
+
201
  with gr.Blocks() as chat_demo:
202
+ chatbot = gr.Chatbot(label='Llama Voice Chatbot', elem_classes="control-height")
203
+ query = gr.Textbox(lines=2, label='Input')
204
+ task_history = gr.State([])
 
205
  audio_output = gr.Audio('temp_file.wav', label="Generated Audio (wav)", type='filepath', autoplay=False)
206
 
207
+ with gr.Row():
208
+ submit_btn = gr.Button("🚀 Wyślij tekst")
209
+
210
+ with gr.Row():
211
+ audio_upload = gr.Audio(source="microphone", type="filepath", show_label=False)
212
+ submit_audio_btn = gr.Button("🎙️ Wyślij audio")
213
+
214
+ submit_btn.click(predict, [query, chatbot, task_history], [chatbot], show_progress=True)
215
+ submit_audio_btn.click(predict, [audio_upload, chatbot, task_history], [chatbot], show_progress=True).then(update_audio, chatbot, audio_output)
216
+
217
+ chat_demo.queue().launch(share=False)
218
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
220
 
221
  ##### Audio Gen ####