pratham0011 commited on
Commit
252fde6
·
verified ·
1 Parent(s): 3e4baba

Upload 6 files

Browse files
Files changed (5) hide show
  1. app.py +103 -96
  2. requirements.txt +0 -0
  3. services/qwen.py +16 -18
  4. services/search.py +121 -85
  5. services/whisper.py +44 -20
app.py CHANGED
@@ -1,96 +1,103 @@
1
- import asyncio
2
- import logging
3
- import gradio as gr
4
-
5
- from services.qwen import respond
6
-
7
-
8
- logger = logging.getLogger(__name__)
9
-
10
- # Track conversation state
11
- conversation_history = []
12
-
13
- def clear_conversation():
14
- global conversation_history
15
- conversation_history = []
16
- return [],None
17
-
18
- def sync_respond(audio, text_input, do_search, history):
19
- if not audio and not text_input:
20
- return None, history
21
-
22
- logger.info(f"Processing request with search enabled: {do_search}")
23
- result = asyncio.run(respond(audio, text_input, do_search, history))
24
- audio_path, response_text = result
25
-
26
- if audio:
27
- user_message = {"role": "user", "content": "Voice message"}
28
- else:
29
- user_message = {"role": "user", "content": text_input}
30
-
31
- assistant_message = {"role": "assistant", "content": response_text}
32
- history.extend([user_message, assistant_message])
33
-
34
- return audio_path, history
35
-
36
- # Build Gradio interface
37
- with gr.Blocks(theme=gr.themes.Soft()) as interface:
38
- gr.Markdown(
39
- """
40
- <div style="text-align: center; margin-bottom: 1rem;">
41
- <h1 style="font-weight: bold;">ConversAI: AI Voice & Chat Assistant</h1>
42
- </div>
43
- """,
44
- show_label=False
45
- )
46
-
47
- # Input components (left column)
48
- with gr.Row():
49
- with gr.Column(scale=2):
50
- audio_input = gr.Audio(
51
- label="Your Voice Input",
52
- type="filepath",
53
- sources=["microphone"]
54
- )
55
- text_input = gr.Textbox(
56
- label="Or Type Your Message",
57
- placeholder="Type here..."
58
- )
59
- search_checkbox = gr.Checkbox(
60
- label="Enable web search",
61
- value=False
62
- )
63
- clear_btn = gr.Button("Clear Chat")
64
-
65
- # Output components (right column)
66
- with gr.Column(scale=3):
67
- chatbot = gr.Chatbot(label="Conversation", type="messages")
68
- audio_output = gr.Audio(
69
- label="AI Voice Response",
70
- type="filepath",
71
- autoplay=True
72
- )
73
-
74
- # Define input event handlers
75
- input_events = [
76
- audio_input.change(
77
- fn=sync_respond,
78
- inputs=[audio_input, text_input,search_checkbox, chatbot],
79
- outputs=[audio_output, chatbot]
80
- ),
81
- text_input.submit(
82
- fn=sync_respond,
83
- inputs=[audio_input, text_input, search_checkbox, chatbot],
84
- outputs=[audio_output, chatbot]
85
- )
86
- ]
87
-
88
- # Clear chat button handler
89
- clear_btn.click(
90
- fn=clear_conversation,
91
- outputs=[chatbot, audio_output]
92
- )
93
-
94
- # Start server
95
- if __name__ == "__main__":
96
- interface.launch(debug=True)
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import logging
3
+ import gradio as gr
4
+
5
+ from services.qwen import respond
6
+
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ # Track conversation state
11
+ conversation_history = []
12
+
13
+ def clear_conversation():
14
+ global conversation_history
15
+ conversation_history = []
16
+ return [],None
17
+
18
+ def sync_respond(audio, text_input, do_search, history):
19
+ if not audio and not text_input:
20
+ return None, history
21
+
22
+ logger.info(f"Processing request with search enabled: {do_search}")
23
+ result = asyncio.run(respond(audio, text_input, do_search, history))
24
+ audio_path, response_text = result
25
+
26
+ if audio:
27
+ user_message = {"role": "user", "content": "Voice message"}
28
+ else:
29
+ user_message = {"role": "user", "content": text_input}
30
+
31
+ assistant_message = {"role": "assistant", "content": response_text}
32
+ history.extend([user_message, assistant_message])
33
+
34
+ return audio_path, history
35
+
36
+ # Build Gradio interface
37
+ with gr.Blocks(
38
+ theme=gr.themes.Soft(),
39
+ css=""".message { font-family: "Times New Roman", Times, serif !important;}"""
40
+ ) as interface:
41
+ gr.Markdown(
42
+ """
43
+ <div style="text-align: center; margin-bottom: 1rem;">
44
+ <h1 style="font-weight: bold;">ConversAI: AI Voice & Chat Assistant</h1>
45
+ </div>
46
+ """,
47
+ show_label=False
48
+ )
49
+
50
+ # Input components (left column)
51
+ with gr.Row():
52
+ with gr.Column(scale=2):
53
+ audio_input = gr.Audio(
54
+ label="Your Voice Input",
55
+ type="filepath",
56
+ sources=["microphone"]
57
+ )
58
+ text_input = gr.Textbox(
59
+ label="Or Type Your Message",
60
+ placeholder="Type here..."
61
+ )
62
+ search_checkbox = gr.Checkbox(
63
+ label="Enable web search",
64
+ value=False
65
+ )
66
+ clear_btn = gr.Button("Clear Chat")
67
+
68
+ # Output components (right column)
69
+ with gr.Column(scale=3):
70
+ chatbot = gr.Chatbot(label="Conversation", type="messages")
71
+ audio_output = gr.Audio(
72
+ label="AI Voice Response",
73
+ type="filepath",
74
+ autoplay=True
75
+ )
76
+
77
+ # Define input event handlers
78
+ input_events = [
79
+ audio_input.change(
80
+ fn=sync_respond,
81
+ inputs=[audio_input, text_input,search_checkbox, chatbot],
82
+ outputs=[audio_output, chatbot]
83
+ ),
84
+ text_input.submit(
85
+ fn=sync_respond,
86
+ inputs=[audio_input, text_input, search_checkbox, chatbot],
87
+ outputs=[audio_output, chatbot]
88
+ )
89
+ ]
90
+
91
+ # Clear chat button handler
92
+ clear_btn.click(
93
+ fn=clear_conversation,
94
+ outputs=[chatbot, audio_output]
95
+ )
96
+
97
+ # Start server
98
+ if __name__ == "__main__":
99
+ interface.launch(
100
+ server_name="0.0.0.0",
101
+ server_port=7860,
102
+ debug=True
103
+ )
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
services/qwen.py CHANGED
@@ -2,10 +2,10 @@ import logging
2
  from typing import List, Dict, Optional, Tuple
3
 
4
  import torch
5
- # from transformers import pipeline
6
- from huggingface_hub import InferenceClient
7
 
8
- from config.config import token, SYSTEM_PROMPT
9
  from services.whisper import generate_speech, transcribe
10
  from services.search import WebSearcher
11
 
@@ -19,12 +19,13 @@ model_kwargs = {
19
  "torch_dtype": torch.float32,
20
  'use_cache': True
21
  }
22
- client = InferenceClient(
 
23
  model="Qwen/Qwen2.5-0.5B-Instruct",
24
- token=token
25
- # trust_remote_code=True,
26
- # device=device,
27
- # model_kwargs=model_kwargs
28
  )
29
 
30
  async def respond(
@@ -64,27 +65,24 @@ async def respond(
64
  if results:
65
  search_context = "Based on search results:\n"
66
  for result in results:
67
- snippet = result['content'][:5000].strip()
68
  search_context += f"{snippet}\n"
69
  prompt = prompt.replace(SYSTEM_PROMPT, f"{SYSTEM_PROMPT}\n{search_context}")
70
 
71
  # Generate response
72
- reply = client.text_generation(
73
  prompt,
74
- max_new_tokens=300,
75
  do_sample=True,
76
  temperature=0.7,
77
  top_p=0.9,
78
- return_full_text=False
79
  )
80
 
81
  # Extract and clean assistant response
82
- assistant_response = reply # Reply is already the generated text string
83
- if "<|im_start|>assistant\n" in assistant_response:
84
- assistant_response = assistant_response.split("<|im_start|>assistant\n")[-1]
85
- if "<|im_end|>" in assistant_response:
86
- assistant_response = assistant_response.split("<|im_end|>")[0]
87
- assistant_response = assistant_response.strip()
88
 
89
  # Convert response to speech
90
  audio_path = await generate_speech(assistant_response)
 
2
  from typing import List, Dict, Optional, Tuple
3
 
4
  import torch
5
+ from transformers import pipeline
6
+ from transformers import pipeline
7
 
8
+ from config.config import token, device, SYSTEM_PROMPT
9
  from services.whisper import generate_speech, transcribe
10
  from services.search import WebSearcher
11
 
 
19
  "torch_dtype": torch.float32,
20
  'use_cache': True
21
  }
22
+ client = pipeline(
23
+ "text-generation",
24
  model="Qwen/Qwen2.5-0.5B-Instruct",
25
+ token=token,
26
+ trust_remote_code=True,
27
+ device=device,
28
+ model_kwargs=model_kwargs
29
  )
30
 
31
  async def respond(
 
65
  if results:
66
  search_context = "Based on search results:\n"
67
  for result in results:
68
+ snippet = result['content'][:500].strip()
69
  search_context += f"{snippet}\n"
70
  prompt = prompt.replace(SYSTEM_PROMPT, f"{SYSTEM_PROMPT}\n{search_context}")
71
 
72
  # Generate response
73
+ reply = client(
74
  prompt,
75
+ max_new_tokens=400,
76
  do_sample=True,
77
  temperature=0.7,
78
  top_p=0.9,
79
+ num_return_sequences=1
80
  )
81
 
82
  # Extract and clean assistant response
83
+ assistant_response = reply[0]['generated_text']
84
+ assistant_response = assistant_response.split("<|im_start|>assistant\n")[-1]
85
+ assistant_response = assistant_response.split("<|im_end|>")[0].strip()
 
 
 
86
 
87
  # Convert response to speech
88
  audio_path = await generate_speech(assistant_response)
services/search.py CHANGED
@@ -1,85 +1,121 @@
1
- import logging
2
- from typing import List, Dict
3
-
4
- import requests
5
- from bs4 import BeautifulSoup
6
- from urllib3.exceptions import InsecureRequestWarning
7
-
8
- # Disable SSL warnings for requests
9
- requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
10
-
11
- logger = logging.getLogger(__name__)
12
-
13
- class WebSearcher:
14
- def __init__(self):
15
- self.headers = {
16
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"
17
- }
18
-
19
- def extract_text(self, html_content: str) -> str:
20
- soup = BeautifulSoup(html_content, 'html.parser')
21
- # Remove unwanted elements
22
- for element in soup(['script', 'style', 'nav', 'header', 'footer', 'iframe']):
23
- element.decompose()
24
- text = ' '.join(soup.stripped_strings)
25
- return text[:8000] # Limit text length
26
-
27
- def search(self, query: str, max_results: int = 3) -> List[Dict]:
28
- results = []
29
- try:
30
- with requests.Session() as session:
31
- # Google search parameters
32
- search_url = "https://www.google.com/search"
33
- params = {
34
- "q": query,
35
- "num": max_results,
36
- "hl": "en"
37
- }
38
-
39
- response = session.get(
40
- search_url,
41
- headers=self.headers,
42
- params=params,
43
- timeout=10,
44
- verify=False
45
- )
46
- response.raise_for_status()
47
-
48
- # Parse search results
49
- soup = BeautifulSoup(response.text, 'html.parser')
50
- search_results = soup.select('div.g')
51
-
52
- for result in search_results[:max_results]:
53
- link = result.find('a')
54
- if not link:
55
- continue
56
-
57
- url = link.get('href', '')
58
- if not url.startswith('http'):
59
- continue
60
-
61
- try:
62
- # Fetch webpage content
63
- page_response = session.get(
64
- url,
65
- headers=self.headers,
66
- timeout=5,
67
- verify=False
68
- )
69
- page_response.raise_for_status()
70
-
71
- content = self.extract_text(page_response.text)
72
- results.append({
73
- "url": url,
74
- "content": content
75
- })
76
- logger.info(f"Successfully fetched content from {url}")
77
-
78
- except Exception as e:
79
- logger.warning(f"Failed to fetch {url}: {str(e)}")
80
- continue
81
-
82
- except Exception as e:
83
- logger.error(f"Search failed: {str(e)}")
84
-
85
- return results[:max_results]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import logging
2
+ # from typing import List, Dict
3
+
4
+ # import requests
5
+ # from bs4 import BeautifulSoup
6
+ # from urllib3.exceptions import InsecureRequestWarning
7
+
8
+ # # Disable SSL warnings for requests
9
+ # requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
10
+
11
+ # logger = logging.getLogger(__name__)
12
+
13
+ # class WebSearcher:
14
+ # def __init__(self):
15
+ # self.headers = {
16
+ # "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"
17
+ # }
18
+
19
+ # def extract_text(self, html_content: str) -> str:
20
+ # soup = BeautifulSoup(html_content, 'html.parser')
21
+ # # Remove unwanted elements
22
+ # for element in soup(['script', 'style', 'nav', 'header', 'footer', 'iframe']):
23
+ # element.decompose()
24
+ # text = ' '.join(soup.stripped_strings)
25
+ # return text[:8000] # Limit text length
26
+
27
+ # def search(self, query: str, max_results: int = 3) -> List[Dict]:
28
+ # results = []
29
+ # try:
30
+ # with requests.Session() as session:
31
+ # # Google search parameters
32
+ # search_url = "https://www.google.com/search"
33
+ # params = {
34
+ # "q": query,
35
+ # "num": max_results,
36
+ # "hl": "en"
37
+ # }
38
+
39
+ # response = session.get(
40
+ # search_url,
41
+ # headers=self.headers,
42
+ # params=params,
43
+ # timeout=10,
44
+ # verify=False
45
+ # )
46
+ # response.raise_for_status()
47
+
48
+ # # Parse search results
49
+ # soup = BeautifulSoup(response.text, 'html.parser')
50
+ # search_results = soup.select('div.g')
51
+
52
+ # for result in search_results[:max_results]:
53
+ # link = result.find('a')
54
+ # if not link:
55
+ # continue
56
+
57
+ # url = link.get('href', '')
58
+ # if not url.startswith('http'):
59
+ # continue
60
+
61
+ # try:
62
+ # # Fetch webpage content
63
+ # page_response = session.get(
64
+ # url,
65
+ # headers=self.headers,
66
+ # timeout=5,
67
+ # verify=False
68
+ # )
69
+ # page_response.raise_for_status()
70
+
71
+ # content = self.extract_text(page_response.text)
72
+ # results.append({
73
+ # "url": url,
74
+ # "content": content
75
+ # })
76
+ # logger.info(f"Successfully fetched content from {url}")
77
+
78
+ # except Exception as e:
79
+ # logger.warning(f"Failed to fetch {url}: {str(e)}")
80
+ # continue
81
+
82
+ # except Exception as e:
83
+ # logger.error(f"Search failed: {str(e)}")
84
+
85
+ # return results[:max_results]
86
+
87
+
88
+
89
+
90
+ import logging
91
+ from typing import List, Dict
92
+ from transformers.agents import DuckDuckGoSearchTool
93
+
94
+ logger = logging.getLogger(__name__)
95
+
96
+ class WebSearcher:
97
+ def __init__(self):
98
+ self.search_tool = DuckDuckGoSearchTool()
99
+
100
+ def search(self, query: str) -> List[Dict]:
101
+ try:
102
+ # Execute search
103
+ search_results = self.search_tool(query)
104
+
105
+ # Convert list to string if necessary
106
+ if isinstance(search_results, list):
107
+ search_results = ' '.join(str(result) for result in search_results)
108
+
109
+ results = [{
110
+ "url": "duckduckgo_search",
111
+ "content": str(search_results) # Limit content length and ensure string
112
+ }]
113
+
114
+ return results
115
+
116
+ except Exception as e:
117
+ logger.error(f"Search error: {str(e)}")
118
+ return []
119
+
120
+ # Initialize searcher
121
+ searcher = WebSearcher()
services/whisper.py CHANGED
@@ -1,19 +1,29 @@
1
  import os
2
  import tempfile
3
  import logging
4
- import requests
5
  from typing import Optional
6
 
 
 
7
  import edge_tts
 
8
 
9
- from config.config import VOICE, FALLBACK_VOICES, token
10
 
11
 
12
  logger = logging.getLogger(__name__)
13
 
14
  # Whisper model for speech to text
15
- API_URL = "https://api-inference.huggingface.co/models/openai/whisper-tiny"
16
- headers = {"Authorization": f"Bearer {token}"}
 
 
 
 
 
 
 
 
17
 
18
  # Voice selection handling
19
  async def get_valid_voice() -> str:
@@ -49,20 +59,34 @@ async def generate_speech(text: str) -> Optional[str]:
49
 
50
  # Speech-to-text using Whisper
51
  async def transcribe(audio_file: str) -> str:
52
- try:
53
- with open(audio_file, "rb") as f:
54
- data = f.read()
55
-
56
- response = requests.post(API_URL, headers=headers, data=data)
57
- result = response.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
- if "text" in result:
60
- transcription = result["text"].strip()
61
- logger.info(f"Transcribed text: {transcription}")
62
- return transcription
63
- else:
64
- raise ValueError("No transcription in response")
65
-
66
- except Exception as e:
67
- logger.error(f"Transcription error: {str(e)}")
68
- raise RuntimeError(f"Failed to transcribe audio: {str(e)}")
 
1
  import os
2
  import tempfile
3
  import logging
 
4
  from typing import Optional
5
 
6
+ import torch
7
+ import librosa
8
  import edge_tts
9
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
10
 
11
+ from config.config import VOICE, FALLBACK_VOICES
12
 
13
 
14
  logger = logging.getLogger(__name__)
15
 
16
  # Whisper model for speech to text
17
+ processor = WhisperProcessor.from_pretrained(
18
+ "openai/whisper-tiny",
19
+ local_files_only=False
20
+ )
21
+ model = WhisperForConditionalGeneration.from_pretrained(
22
+ "openai/whisper-tiny",
23
+ local_files_only=False,
24
+ low_cpu_mem_usage=True,
25
+ torch_dtype=torch.float32,
26
+ ).to("cpu")
27
 
28
  # Voice selection handling
29
  async def get_valid_voice() -> str:
 
59
 
60
  # Speech-to-text using Whisper
61
  async def transcribe(audio_file: str) -> str:
62
+ audio, sr = librosa.load(
63
+ audio_file,
64
+ sr=16000,
65
+ mono=True,
66
+ duration=30
67
+ )
68
+
69
+ inputs = processor(
70
+ audio,
71
+ sampling_rate=sr,
72
+ return_tensors="pt",
73
+ return_attention_mask=True
74
+ ).to(model.device)
75
+
76
+ with torch.no_grad():
77
+ generated_ids = model.generate(
78
+ input_features=inputs.input_features,
79
+ attention_mask=inputs.attention_mask,
80
+ language="en",
81
+ task="transcribe",
82
+ max_length=448,
83
+ temperature=0.0
84
+ )
85
 
86
+ transcription = processor.batch_decode(
87
+ generated_ids,
88
+ skip_special_tokens=True
89
+ )[0].strip()
90
+
91
+ logger.info(f"Transcribed text: {transcription}")
92
+ return transcription