Ahmed0011 commited on
Commit
434ed70
·
verified ·
1 Parent(s): e4bac04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -107
app.py CHANGED
@@ -1,7 +1,6 @@
1
 
2
  import gradio as gr
3
  import edge_tts
4
- import asyncio
5
  import tempfile
6
  import numpy as np
7
  import soxr
@@ -10,95 +9,11 @@ import torch
10
  import sentencepiece as spm
11
  import onnxruntime as ort
12
  from huggingface_hub import hf_hub_download, InferenceClient
13
- import requests
14
- from bs4 import BeautifulSoup
15
- import urllib
16
- import random
17
 
18
  theme = gr.themes.Soft(
19
  primary_hue="blue",
20
  secondary_hue="orange")
21
 
22
-
23
- # List of user agents to choose from for requests
24
- _useragent_list = [
25
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
26
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
27
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
28
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
29
- 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
30
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62',
31
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0'
32
- ]
33
-
34
- def get_useragent():
35
- """Returns a random user agent from the list."""
36
- return random.choice(_useragent_list)
37
-
38
- def extract_text_from_webpage(html_content):
39
- """Extracts visible text from HTML content using BeautifulSoup."""
40
- soup = BeautifulSoup(html_content, "html.parser")
41
- # Remove unwanted tags
42
- for tag in soup(["script", "style", "header", "footer", "nav"]):
43
- tag.extract()
44
- # Get the remaining visible text
45
- visible_text = soup.get_text(strip=True)
46
- return visible_text
47
-
48
- def search(term, num_results=1, lang="en", advanced=True, sleep_interval=0, timeout=5, safe="active", ssl_verify=None):
49
- """Performs a Google search and returns the results."""
50
- escaped_term = urllib.parse.quote_plus(term)
51
- start = 0
52
- all_results = []
53
-
54
- # Fetch results in batches
55
- while start < num_results:
56
- resp = requests.get(
57
- url="https://www.google.com/search",
58
- headers={"User-Agent": get_useragent()}, # Set random user agent
59
- params={
60
- "q": term,
61
- "num": num_results - start, # Number of results to fetch in this batch
62
- "hl": lang,
63
- "start": start,
64
- "safe": safe,
65
- },
66
- timeout=timeout,
67
- verify=ssl_verify,
68
- )
69
- resp.raise_for_status() # Raise an exception if request fails
70
-
71
- soup = BeautifulSoup(resp.text, "html.parser")
72
- result_block = soup.find_all("div", attrs={"class": "g"})
73
-
74
- # If no results, continue to the next batch
75
- if not result_block:
76
- start += 1
77
- continue
78
-
79
- # Extract link and text from each result
80
- for result in result_block:
81
- link = result.find("a", href=True)
82
- if link:
83
- link = link["href"]
84
- try:
85
- # Fetch webpage content
86
- webpage = requests.get(link, headers={"User-Agent": get_useragent()})
87
- webpage.raise_for_status()
88
- # Extract visible text from webpage
89
- visible_text = extract_text_from_webpage(webpage.text)
90
- all_results.append({"link": link, "text": visible_text})
91
- except requests.exceptions.RequestException as e:
92
- # Handle errors fetching or processing webpage
93
- print(f"Error fetching or processing {link}: {e}")
94
- all_results.append({"link": link, "text": None})
95
- else:
96
- all_results.append({"link": None, "text": None})
97
-
98
- start += len(result_block) # Update starting index for next batch
99
-
100
- return all_results
101
-
102
  # Speech Recognition Model Configuration
103
  model_name = "neongeckocom/stt_en_citrinet_512_gamma_0_25"
104
  sample_rate = 16000
@@ -118,7 +33,7 @@ def resample(audio_fp32, sr):
118
  def to_float32(audio_buffer):
119
  return np.divide(audio_buffer, np.iinfo(audio_buffer.dtype).max, dtype=np.float32)
120
 
121
- def transcribe(audio_path):
122
  audio_file = AudioSegment.from_file(audio_path)
123
  sr = audio_file.frame_rate
124
  audio_buffer = np.array(audio_file.get_array_of_samples())
@@ -138,22 +53,14 @@ def transcribe(audio_path):
138
 
139
  return text
140
 
141
- def model(text, web_search):
142
- if web_search is True:
143
- """Performs a web search, feeds the results to a language model, and returns the answer."""
144
- web_results = search(text)
145
- web2 = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in web_results])
146
- formatted_prompt = system_instructions1 + text + "[WEB]" + str(web2) + "[OpenGPT 4o]"
147
- stream = client1.text_generation(formatted_prompt, max_new_tokens=512, stream=True, details=True, return_full_text=False)
148
- return "".join([response.token.text for response in stream if response.token.text != "</s>"])
149
- else:
150
- formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
151
- stream = client1.text_generation(formatted_prompt, max_new_tokens=512, stream=True, details=True, return_full_text=False)
152
- return "".join([response.token.text for response in stream if response.token.text != "</s>"])
153
-
154
- async def respond(audio, web_search):
155
- user = transcribe(audio)
156
- reply = model(user, web_search)
157
  communicate = edge_tts.Communicate(reply)
158
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
159
  tmp_path = tmp_file.name
@@ -161,11 +68,9 @@ async def respond(audio, web_search):
161
  return tmp_path
162
 
163
  with gr.Blocks(theme=theme) as demo:
164
- with gr.Row():
165
- web_search = gr.Checkbox(label="Web Search", value=False)
166
- input = gr.Audio(label="User Input", sources="microphone", type="filepath")
167
- output = gr.Audio(label="AI", autoplay=True)
168
- gr.Interface(fn=respond, inputs=[input, web_search], outputs=[output], live=True)
169
 
170
  if __name__ == "__main__":
171
  demo.queue(max_size=200).launch()
 
1
 
2
  import gradio as gr
3
  import edge_tts
 
4
  import tempfile
5
  import numpy as np
6
  import soxr
 
9
  import sentencepiece as spm
10
  import onnxruntime as ort
11
  from huggingface_hub import hf_hub_download, InferenceClient
 
 
 
 
12
 
13
  theme = gr.themes.Soft(
14
  primary_hue="blue",
15
  secondary_hue="orange")
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  # Speech Recognition Model Configuration
18
  model_name = "neongeckocom/stt_en_citrinet_512_gamma_0_25"
19
  sample_rate = 16000
 
33
  def to_float32(audio_buffer):
34
  return np.divide(audio_buffer, np.iinfo(audio_buffer.dtype).max, dtype=np.float32)
35
 
36
+ async def transcribe(audio_path):
37
  audio_file = AudioSegment.from_file(audio_path)
38
  sr = audio_file.frame_rate
39
  audio_buffer = np.array(audio_file.get_array_of_samples())
 
53
 
54
  return text
55
 
56
+ async def model(text):
57
+ formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
58
+ stream = client1.text_generation(formatted_prompt, max_new_tokens=512, stream=True, details=True, return_full_text=False)
59
+ return "".join([response.token.text for response in stream if response.token.text != "</s>"])
60
+
61
+ async def respond(audio):
62
+ user = await transcribe(audio)
63
+ reply = await model(user)
 
 
 
 
 
 
 
 
64
  communicate = edge_tts.Communicate(reply)
65
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
66
  tmp_path = tmp_file.name
 
68
  return tmp_path
69
 
70
  with gr.Blocks(theme=theme) as demo:
71
+ input = gr.Audio(label="User Input", sources="microphone", type="filepath")
72
+ output = gr.Audio(label="AI", autoplay=True)
73
+ gr.Interface(fn=respond, inputs=[input], outputs=[output], live=True)
 
 
74
 
75
  if __name__ == "__main__":
76
  demo.queue(max_size=200).launch()