Ahmed0011 commited on
Commit
a6a0b43
·
verified ·
1 Parent(s): 499ffe6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +330 -143
app.py CHANGED
@@ -1,3 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # import gradio as gr
2
  # import edge_tts
3
  # import asyncio
@@ -170,174 +357,174 @@
170
  # demo.queue(max_size=200).launch()
171
 
172
 
173
- import gradio as gr
174
- import edge_tts
175
- import asyncio
176
- import tempfile
177
- import numpy as np
178
- import soxr
179
- from pydub import AudioSegment
180
- import torch
181
- import sentencepiece as spm
182
- import onnxruntime as ort
183
- from huggingface_hub import hf_hub_download, InferenceClient
184
- import requests
185
- from bs4 import BeautifulSoup
186
- import urllib
187
- import random
188
 
189
- theme = gr.themes.Soft(
190
- primary_hue="blue",
191
- secondary_hue="orange")
192
 
193
 
194
- # List of user agents to choose from for requests
195
- _useragent_list = [
196
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
197
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
198
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
199
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
200
- 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
201
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62',
202
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0'
203
- ]
204
 
205
- def get_useragent():
206
- """Returns a random user agent from the list."""
207
- return random.choice(_useragent_list)
208
 
209
- def extract_text_from_webpage(html_content):
210
- """Extracts visible text from HTML content using BeautifulSoup."""
211
- soup = BeautifulSoup(html_content, "html.parser")
212
- # Remove unwanted tags
213
- for tag in soup(["script", "style", "header", "footer", "nav"]):
214
- tag.extract()
215
- # Get the remaining visible text
216
- visible_text = soup.get_text(strip=True)
217
- return visible_text
218
 
219
- def search(term, num_results=1, lang="en", advanced=True, sleep_interval=0, timeout=5, safe="active", ssl_verify=None):
220
- """Performs a Google search and returns the results."""
221
- escaped_term = urllib.parse.quote_plus(term)
222
- start = 0
223
- all_results = []
224
 
225
- # Fetch results in batches
226
- while start < num_results:
227
- resp = requests.get(
228
- url="https://www.google.com/search",
229
- headers={"User-Agent": get_useragent()}, # Set random user agent
230
- params={
231
- "q": term,
232
- "num": num_results - start, # Number of results to fetch in this batch
233
- "hl": lang,
234
- "start": start,
235
- "safe": safe,
236
- },
237
- timeout=timeout,
238
- verify=ssl_verify,
239
- )
240
- resp.raise_for_status() # Raise an exception if request fails
241
 
242
- soup = BeautifulSoup(resp.text, "html.parser")
243
- result_block = soup.find_all("div", attrs={"class": "g"})
244
 
245
- # If no results, continue to the next batch
246
- if not result_block:
247
- start += 1
248
- continue
249
 
250
- # Extract link and text from each result
251
- for result in result_block:
252
- link = result.find("a", href=True)
253
- if link:
254
- link = link["href"]
255
- try:
256
- # Fetch webpage content
257
- webpage = requests.get(link, headers={"User-Agent": get_useragent()})
258
- webpage.raise_for_status()
259
- # Extract visible text from webpage
260
- visible_text = extract_text_from_webpage(webpage.text)
261
- all_results.append({"link": link, "text": visible_text})
262
- except requests.exceptions.RequestException as e:
263
- # Handle errors fetching or processing webpage
264
- print(f"Error fetching or processing {link}: {e}")
265
- all_results.append({"link": link, "text": None})
266
- else:
267
- all_results.append({"link": None, "text": None})
268
 
269
- start += len(result_block) # Update starting index for next batch
270
 
271
- return all_results
272
 
273
- # Speech Recognition Model Configuration
274
- model_name = "neongeckocom/stt_en_citrinet_512_gamma_0_25"
275
- sample_rate = 16000
276
 
277
- # Download preprocessor, encoder and tokenizer
278
- preprocessor = torch.jit.load(hf_hub_download(model_name, "preprocessor.ts", subfolder="onnx"))
279
- encoder = ort.InferenceSession(hf_hub_download(model_name, "model.onnx", subfolder="onnx"))
280
- tokenizer = spm.SentencePieceProcessor(hf_hub_download(model_name, "tokenizer.spm", subfolder="onnx"))
281
 
282
- # Mistral Model Configuration
283
- client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
284
- system_instructions1 = "<s>[SYSTEM] Answer as OpenGPT 4o, Made by 'KingNish', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. The request asks you to provide friendly responses. The expectation is that I will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
285
 
286
- def resample(audio_fp32, sr):
287
- return soxr.resample(audio_fp32, sr, sample_rate)
288
 
289
- def to_float32(audio_buffer):
290
- return np.divide(audio_buffer, np.iinfo(audio_buffer.dtype).max, dtype=np.float32)
291
 
292
- def transcribe(audio_path):
293
- audio_file = AudioSegment.from_file(audio_path)
294
- sr = audio_file.frame_rate
295
- audio_buffer = np.array(audio_file.get_array_of_samples())
296
 
297
- audio_fp32 = to_float32(audio_buffer)
298
- audio_16k = resample(audio_fp32, sr)
299
 
300
- input_signal = torch.tensor(audio_16k).unsqueeze(0)
301
- length = torch.tensor(len(audio_16k)).unsqueeze(0)
302
- processed_signal, _ = preprocessor.forward(input_signal=input_signal, length=length)
303
 
304
- logits = encoder.run(None, {'audio_signal': processed_signal.numpy(), 'length': length.numpy()})[0][0]
305
 
306
- blank_id = tokenizer.vocab_size()
307
- decoded_prediction = [p for p in logits.argmax(axis=1).tolist() if p != blank_id]
308
- text = tokenizer.decode_ids(decoded_prediction)
309
 
310
- return text
311
 
312
- def model(text, web_search):
313
- if web_search is True:
314
- """Performs a web search, feeds the results to a language model, and returns the answer."""
315
- web_results = search(text)
316
- web2 = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in web_results])
317
- formatted_prompt = system_instructions1 + text + "[WEB]" + str(web2) + "[OpenGPT 4o]"
318
- stream = client1.text_generation(formatted_prompt, max_new_tokens=512, stream=True, details=True, return_full_text=False)
319
- return "".join([response.token.text for response in stream if response.token.text != "</s>"])
320
- else:
321
- formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
322
- stream = client1.text_generation(formatted_prompt, max_new_tokens=512, stream=True, details=True, return_full_text=False)
323
- return "".join([response.token.text for response in stream if response.token.text != "</s>"])
324
 
325
- async def respond(audio, web_search):
326
- user = transcribe(audio)
327
- reply = model(user, web_search)
328
- communicate = edge_tts.Communicate(reply)
329
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
330
- tmp_path = tmp_file.name
331
- await communicate.save(tmp_path)
332
- return tmp_path
333
 
334
- with gr.Blocks(theme=theme) as demo:
335
- with gr.Row():
336
- web_search = gr.Checkbox(label="Web Search", value=False)
337
- input = gr.Audio(label="User Input", sources="microphone", type="filepath")
338
- output = gr.Audio(label="AI", autoplay=True)
339
- gr.Interface(fn=respond, inputs=[input, web_search], outputs=[output], live=True)
340
 
341
- if __name__ == "__main__":
342
- demo.queue(max_size=200).launch()
343
 
 
1
+ import gradio as gr
2
+ import edge_tts
3
+ import asyncio
4
+ import tempfile
5
+ import numpy as np
6
+ import soxr
7
+ from pydub import AudioSegment
8
+ import torch
9
+ import sentencepiece as spm
10
+ import onnxruntime as ort
11
+ from huggingface_hub import hf_hub_download, InferenceClient
12
+ import requests
13
+ from bs4 import BeautifulSoup
14
+ import urllib
15
+ import random
16
+ import speech_recognition as sr
17
+
18
+ theme = gr.themes.Soft(
19
+ primary_hue="blue",
20
+ secondary_hue="orange")
21
+
22
+ # List of user agents to choose from for requests
23
+ _useragent_list = [
24
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
25
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
26
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
27
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
28
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
29
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62',
30
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0'
31
+ ]
32
+
33
+ def get_useragent():
34
+ """Returns a random user agent from the list."""
35
+ return random.choice(_useragent_list)
36
+
37
+ def extract_text_from_webpage(html_content):
38
+ """Extracts visible text from HTML content using BeautifulSoup."""
39
+ soup = BeautifulSoup(html_content, "html.parser")
40
+ # Remove unwanted tags
41
+ for tag in soup(["script", "style", "header", "footer", "nav"]):
42
+ tag.extract()
43
+ # Get the remaining visible text
44
+ visible_text = soup.get_text(strip=True)
45
+ return visible_text
46
+
47
+ def search(term, num_results=1, lang="en", advanced=True, sleep_interval=0, timeout=5, safe="active", ssl_verify=None):
48
+ """Performs a Google search and returns the results."""
49
+ escaped_term = urllib.parse.quote_plus(term)
50
+ start = 0
51
+ all_results = []
52
+
53
+ # Fetch results in batches
54
+ while start < num_results:
55
+ resp = requests.get(
56
+ url="https://www.google.com/search",
57
+ headers={"User-Agent": get_useragent()}, # Set random user agent
58
+ params={
59
+ "q": term,
60
+ "num": num_results - start, # Number of results to fetch in this batch
61
+ "hl": lang,
62
+ "start": start,
63
+ "safe": safe,
64
+ },
65
+ timeout=timeout,
66
+ verify=ssl_verify,
67
+ )
68
+ resp.raise_for_status() # Raise an exception if request fails
69
+
70
+ soup = BeautifulSoup(resp.text, "html.parser")
71
+ result_block = soup.find_all("div", attrs={"class": "g"})
72
+
73
+ # If no results, continue to the next batch
74
+ if not result_block:
75
+ start += 1
76
+ continue
77
+
78
+ # Extract link and text from each result
79
+ for result in result_block:
80
+ link = result.find("a", href=True)
81
+ if link:
82
+ link = link["href"]
83
+ try:
84
+ # Fetch webpage content
85
+ webpage = requests.get(link, headers={"User-Agent": get_useragent()})
86
+ webpage.raise_for_status()
87
+ # Extract visible text from webpage
88
+ visible_text = extract_text_from_webpage(webpage.text)
89
+ all_results.append({"link": link, "text": visible_text})
90
+ except requests.exceptions.RequestException as e:
91
+ # Handle errors fetching or processing webpage
92
+ print(f"Error fetching or processing {link}: {e}")
93
+ all_results.append({"link": link, "text": None})
94
+ else:
95
+ all_results.append({"link": None, "text": None})
96
+
97
+ start += len(result_block) # Update starting index for next batch
98
+
99
+ return all_results
100
+
101
+ # Speech Recognition Model Configuration
102
+ model_name = "neongeckocom/stt_en_citrinet_512_gamma_0_25"
103
+ sample_rate = 16000
104
+
105
+ # Download preprocessor, encoder and tokenizer
106
+ preprocessor = torch.jit.load(hf_hub_download(model_name, "preprocessor.ts", subfolder="onnx"))
107
+ encoder = ort.InferenceSession(hf_hub_download(model_name, "model.onnx", subfolder="onnx"))
108
+ tokenizer = spm.SentencePieceProcessor(hf_hub_download(model_name, "tokenizer.spm", subfolder="onnx"))
109
+
110
+ # Mistral Model Configuration
111
+ client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
112
+ system_instructions1 = "<s>[SYSTEM] Answer as OpenGPT 4o, Made by 'KingNish', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. The request asks you to provide friendly responses. The expectation is that I will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
113
+
114
+ def resample(audio_fp32, sr):
115
+ return soxr.resample(audio_fp32, sr, sample_rate)
116
+
117
+ def to_float32(audio_buffer):
118
+ return np.divide(audio_buffer, np.iinfo(audio_buffer.dtype).max, dtype=np.float32)
119
+
120
+ def transcribe(audio_path):
121
+ audio_file = AudioSegment.from_file(audio_path)
122
+ sr = audio_file.frame_rate
123
+ audio_buffer = np.array(audio_file.get_array_of_samples())
124
+
125
+ audio_fp32 = to_float32(audio_buffer)
126
+ audio_16k = resample(audio_fp32, sr)
127
+
128
+ input_signal = torch.tensor(audio_16k).unsqueeze(0)
129
+ length = torch.tensor(len(audio_16k)).unsqueeze(0)
130
+ processed_signal, _ = preprocessor.forward(input_signal=input_signal, length=length)
131
+
132
+ logits = encoder.run(None, {'audio_signal': processed_signal.numpy(), 'length': length.numpy()})[0][0]
133
+
134
+ blank_id = tokenizer.vocab_size()
135
+ decoded_prediction = [p for p in logits.argmax(axis=1).tolist() if p != blank_id]
136
+ text = tokenizer.decode_ids(decoded_prediction)
137
+
138
+ return text
139
+
140
+ def model(text, web_search):
141
+ if web_search is True:
142
+ """Performs a web search, feeds the results to a language model, and returns the answer."""
143
+ web_results = search(text)
144
+ web2 = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in web_results])
145
+ formatted_prompt = system_instructions1 + text + "[WEB]" + str(web2) + "[OpenGPT 4o]"
146
+ stream = client1.text_generation(formatted_prompt, max_new_tokens=512, stream=True, details=True, return_full_text=False)
147
+ return "".join([response.token.text for response in stream if response.token.text != "</s>"])
148
+ else:
149
+ formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
150
+ stream = client1.text_generation(formatted_prompt, max_new_tokens=512, stream=True, details=True, return_full_text=False)
151
+ return "".join([response.token.text for response in stream if response.token.text != "</s>"])
152
+
153
+ async def respond(audio, web_search):
154
+ user = transcribe(audio)
155
+ reply = model(user, web_search)
156
+ communicate = edge_tts.Communicate(reply)
157
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
158
+ tmp_path = tmp_file.name
159
+ await communicate.save(tmp_path)
160
+ return tmp_path
161
+
162
+ def listen_for_speech(web_search):
163
+ recognizer = sr.Recognizer()
164
+ with sr.Microphone() as source:
165
+ print("Listening for speech...")
166
+ audio_data = recognizer.listen(source)
167
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
168
+ tmp_path = tmp_file.name
169
+ with open(tmp_path, 'wb') as f:
170
+ f.write(audio_data.get_wav_data())
171
+ return asyncio.run(respond(tmp_path, web_search))
172
+
173
+ with gr.Blocks(theme=theme) as demo:
174
+ with gr.Row():
175
+ web_search = gr.Checkbox(label="Web Search", value=False)
176
+ output = gr.Audio(label="AI", autoplay=True)
177
+ demo.add_listener(listen_for_speech, inputs=[web_search], outputs=[output])
178
+
179
+ if __name__ == "__main__":
180
+ demo.queue(max_size=200).launch()
181
+
182
+
183
+
184
+
185
+
186
+
187
+
188
  # import gradio as gr
189
  # import edge_tts
190
  # import asyncio
 
357
  # demo.queue(max_size=200).launch()
358
 
359
 
360
+ # import gradio as gr
361
+ # import edge_tts
362
+ # import asyncio
363
+ # import tempfile
364
+ # import numpy as np
365
+ # import soxr
366
+ # from pydub import AudioSegment
367
+ # import torch
368
+ # import sentencepiece as spm
369
+ # import onnxruntime as ort
370
+ # from huggingface_hub import hf_hub_download, InferenceClient
371
+ # import requests
372
+ # from bs4 import BeautifulSoup
373
+ # import urllib
374
+ # import random
375
 
376
+ # theme = gr.themes.Soft(
377
+ # primary_hue="blue",
378
+ # secondary_hue="orange")
379
 
380
 
381
+ # # List of user agents to choose from for requests
382
+ # _useragent_list = [
383
+ # 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
384
+ # 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
385
+ # 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
386
+ # 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
387
+ # 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
388
+ # 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62',
389
+ # 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0'
390
+ # ]
391
 
392
+ # def get_useragent():
393
+ # """Returns a random user agent from the list."""
394
+ # return random.choice(_useragent_list)
395
 
396
+ # def extract_text_from_webpage(html_content):
397
+ # """Extracts visible text from HTML content using BeautifulSoup."""
398
+ # soup = BeautifulSoup(html_content, "html.parser")
399
+ # # Remove unwanted tags
400
+ # for tag in soup(["script", "style", "header", "footer", "nav"]):
401
+ # tag.extract()
402
+ # # Get the remaining visible text
403
+ # visible_text = soup.get_text(strip=True)
404
+ # return visible_text
405
 
406
+ # def search(term, num_results=1, lang="en", advanced=True, sleep_interval=0, timeout=5, safe="active", ssl_verify=None):
407
+ # """Performs a Google search and returns the results."""
408
+ # escaped_term = urllib.parse.quote_plus(term)
409
+ # start = 0
410
+ # all_results = []
411
 
412
+ # # Fetch results in batches
413
+ # while start < num_results:
414
+ # resp = requests.get(
415
+ # url="https://www.google.com/search",
416
+ # headers={"User-Agent": get_useragent()}, # Set random user agent
417
+ # params={
418
+ # "q": term,
419
+ # "num": num_results - start, # Number of results to fetch in this batch
420
+ # "hl": lang,
421
+ # "start": start,
422
+ # "safe": safe,
423
+ # },
424
+ # timeout=timeout,
425
+ # verify=ssl_verify,
426
+ # )
427
+ # resp.raise_for_status() # Raise an exception if request fails
428
 
429
+ # soup = BeautifulSoup(resp.text, "html.parser")
430
+ # result_block = soup.find_all("div", attrs={"class": "g"})
431
 
432
+ # # If no results, continue to the next batch
433
+ # if not result_block:
434
+ # start += 1
435
+ # continue
436
 
437
+ # # Extract link and text from each result
438
+ # for result in result_block:
439
+ # link = result.find("a", href=True)
440
+ # if link:
441
+ # link = link["href"]
442
+ # try:
443
+ # # Fetch webpage content
444
+ # webpage = requests.get(link, headers={"User-Agent": get_useragent()})
445
+ # webpage.raise_for_status()
446
+ # # Extract visible text from webpage
447
+ # visible_text = extract_text_from_webpage(webpage.text)
448
+ # all_results.append({"link": link, "text": visible_text})
449
+ # except requests.exceptions.RequestException as e:
450
+ # # Handle errors fetching or processing webpage
451
+ # print(f"Error fetching or processing {link}: {e}")
452
+ # all_results.append({"link": link, "text": None})
453
+ # else:
454
+ # all_results.append({"link": None, "text": None})
455
 
456
+ # start += len(result_block) # Update starting index for next batch
457
 
458
+ # return all_results
459
 
460
+ # # Speech Recognition Model Configuration
461
+ # model_name = "neongeckocom/stt_en_citrinet_512_gamma_0_25"
462
+ # sample_rate = 16000
463
 
464
+ # # Download preprocessor, encoder and tokenizer
465
+ # preprocessor = torch.jit.load(hf_hub_download(model_name, "preprocessor.ts", subfolder="onnx"))
466
+ # encoder = ort.InferenceSession(hf_hub_download(model_name, "model.onnx", subfolder="onnx"))
467
+ # tokenizer = spm.SentencePieceProcessor(hf_hub_download(model_name, "tokenizer.spm", subfolder="onnx"))
468
 
469
+ # # Mistral Model Configuration
470
+ # client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
471
+ # system_instructions1 = "<s>[SYSTEM] Answer as OpenGPT 4o, Made by 'KingNish', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. The request asks you to provide friendly responses. The expectation is that I will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
472
 
473
+ # def resample(audio_fp32, sr):
474
+ # return soxr.resample(audio_fp32, sr, sample_rate)
475
 
476
+ # def to_float32(audio_buffer):
477
+ # return np.divide(audio_buffer, np.iinfo(audio_buffer.dtype).max, dtype=np.float32)
478
 
479
+ # def transcribe(audio_path):
480
+ # audio_file = AudioSegment.from_file(audio_path)
481
+ # sr = audio_file.frame_rate
482
+ # audio_buffer = np.array(audio_file.get_array_of_samples())
483
 
484
+ # audio_fp32 = to_float32(audio_buffer)
485
+ # audio_16k = resample(audio_fp32, sr)
486
 
487
+ # input_signal = torch.tensor(audio_16k).unsqueeze(0)
488
+ # length = torch.tensor(len(audio_16k)).unsqueeze(0)
489
+ # processed_signal, _ = preprocessor.forward(input_signal=input_signal, length=length)
490
 
491
+ # logits = encoder.run(None, {'audio_signal': processed_signal.numpy(), 'length': length.numpy()})[0][0]
492
 
493
+ # blank_id = tokenizer.vocab_size()
494
+ # decoded_prediction = [p for p in logits.argmax(axis=1).tolist() if p != blank_id]
495
+ # text = tokenizer.decode_ids(decoded_prediction)
496
 
497
+ # return text
498
 
499
+ # def model(text, web_search):
500
+ # if web_search is True:
501
+ # """Performs a web search, feeds the results to a language model, and returns the answer."""
502
+ # web_results = search(text)
503
+ # web2 = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in web_results])
504
+ # formatted_prompt = system_instructions1 + text + "[WEB]" + str(web2) + "[OpenGPT 4o]"
505
+ # stream = client1.text_generation(formatted_prompt, max_new_tokens=512, stream=True, details=True, return_full_text=False)
506
+ # return "".join([response.token.text for response in stream if response.token.text != "</s>"])
507
+ # else:
508
+ # formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
509
+ # stream = client1.text_generation(formatted_prompt, max_new_tokens=512, stream=True, details=True, return_full_text=False)
510
+ # return "".join([response.token.text for response in stream if response.token.text != "</s>"])
511
 
512
+ # async def respond(audio, web_search):
513
+ # user = transcribe(audio)
514
+ # reply = model(user, web_search)
515
+ # communicate = edge_tts.Communicate(reply)
516
+ # with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
517
+ # tmp_path = tmp_file.name
518
+ # await communicate.save(tmp_path)
519
+ # return tmp_path
520
 
521
+ # with gr.Blocks(theme=theme) as demo:
522
+ # with gr.Row():
523
+ # web_search = gr.Checkbox(label="Web Search", value=False)
524
+ # input = gr.Audio(label="User Input", sources="microphone", type="filepath")
525
+ # output = gr.Audio(label="AI", autoplay=True)
526
+ # gr.Interface(fn=respond, inputs=[input, web_search], outputs=[output], live=True)
527
 
528
+ # if __name__ == "__main__":
529
+ # demo.queue(max_size=200).launch()
530