pratham0011 commited on
Commit
3ba1bc7
·
verified ·
1 Parent(s): 1b5933b

Upload 7 files

Browse files
Files changed (5) hide show
  1. __init__.py +0 -0
  2. config.py +25 -0
  3. qwen.py +95 -0
  4. search.py +85 -0
  5. whisper.py +68 -0
__init__.py ADDED
File without changes
config.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from dotenv import load_dotenv
4
+
5
+ # Configure logging
6
+ logging.basicConfig(level=logging.INFO)
7
+ logger = logging.getLogger(__name__)
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+ token = os.getenv("hf_key")
12
+
13
+ # Set compute device (cpu/cuda)
14
+ device = "cpu"
15
+ logger.info(f"Device set to use {device}")
16
+
17
+ # AI Assistant Configuration
18
+ SYSTEM_PROMPT = """You are ConversAI, a helpful AI assistant who remembers conversation history. Keep responses clear, friendly and natural. Always refer to previous context when responding."""
19
+
20
+ # Text-to-Speech Voice Settings (primary/backup)
21
+ VOICE = "en-US-JennyNeural"
22
+ FALLBACK_VOICES = ["en-US-ChristopherNeural", "en-US-EricNeural"]
23
+
24
+ # Audio Output Configuration
25
+ OUTPUT_FORMAT = "audio-24khz-48kbit-mono-mp3"
qwen.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import List, Dict, Optional, Tuple
3
+
4
+ import torch
5
+ # from transformers import pipeline
6
+ from huggingface_hub import InferenceClient
7
+
8
+ from config.config import token, SYSTEM_PROMPT
9
+ from services.whisper import generate_speech, transcribe
10
+ from services.search import WebSearcher
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ searcher = WebSearcher()
15
+
16
+ # Qwen Configuration
17
+ model_kwargs = {
18
+ "low_cpu_mem_usage": True,
19
+ "torch_dtype": torch.float32,
20
+ 'use_cache': True
21
+ }
22
+ client = InferenceClient(
23
+ model="Qwen/Qwen2.5-0.5B-Instruct",
24
+ token=token
25
+ # trust_remote_code=True,
26
+ # device=device,
27
+ # model_kwargs=model_kwargs
28
+ )
29
+
30
+ async def respond(
31
+ audio: Optional[str] = None,
32
+ text: Optional[str] = None,
33
+ do_search: bool = False,
34
+ history: List[Dict] = None
35
+ ) -> Tuple[Optional[str], str]:
36
+ try:
37
+ if text:
38
+ user_text = text.strip()
39
+ elif audio:
40
+ user_text = await transcribe(audio)
41
+ else:
42
+ return None, "No input provided"
43
+
44
+ # Build conversation context
45
+ messages = []
46
+ messages.append({"role": "system", "content": SYSTEM_PROMPT})
47
+
48
+ if history:
49
+ messages.extend(history)
50
+
51
+ # Format message history for Qwen
52
+ prompt = ""
53
+ for msg in messages:
54
+ role = msg["role"]
55
+ content = msg["content"]
56
+ prompt += f"<|im_start|>{role}\n{content}<|im_end|>\n"
57
+
58
+ # Add current user message
59
+ prompt += f"<|im_start|>user\n{user_text}<|im_end|>\n<|im_start|>assistant\n"
60
+
61
+ # Add web-search context if enabled
62
+ if do_search:
63
+ results = searcher.search(user_text)
64
+ if results:
65
+ search_context = "Based on search results:\n"
66
+ for result in results:
67
+ snippet = result['content'][:5000].strip()
68
+ search_context += f"{snippet}\n"
69
+ prompt = prompt.replace(SYSTEM_PROMPT, f"{SYSTEM_PROMPT}\n{search_context}")
70
+
71
+ # Generate response
72
+ reply = client.text_generation(
73
+ prompt,
74
+ max_new_tokens=300,
75
+ do_sample=True,
76
+ temperature=0.7,
77
+ top_p=0.9,
78
+ return_full_text=False
79
+ )
80
+
81
+ # Extract and clean assistant response
82
+ assistant_response = reply # Reply is already the generated text string
83
+ if "<|im_start|>assistant\n" in assistant_response:
84
+ assistant_response = assistant_response.split("<|im_start|>assistant\n")[-1]
85
+ if "<|im_end|>" in assistant_response:
86
+ assistant_response = assistant_response.split("<|im_end|>")[0]
87
+ assistant_response = assistant_response.strip()
88
+
89
+ # Convert response to speech
90
+ audio_path = await generate_speech(assistant_response)
91
+ return audio_path, assistant_response
92
+
93
+ except Exception as e:
94
+ logger.error(f"Error in respond: {str(e)}")
95
+ return None, "Sorry, I encountered an error"
search.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import List, Dict
3
+
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+ from urllib3.exceptions import InsecureRequestWarning
7
+
8
+ # Disable SSL warnings for requests
9
+ requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ class WebSearcher:
14
+ def __init__(self):
15
+ self.headers = {
16
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"
17
+ }
18
+
19
+ def extract_text(self, html_content: str) -> str:
20
+ soup = BeautifulSoup(html_content, 'html.parser')
21
+ # Remove unwanted elements
22
+ for element in soup(['script', 'style', 'nav', 'header', 'footer', 'iframe']):
23
+ element.decompose()
24
+ text = ' '.join(soup.stripped_strings)
25
+ return text[:8000] # Limit text length
26
+
27
+ def search(self, query: str, max_results: int = 3) -> List[Dict]:
28
+ results = []
29
+ try:
30
+ with requests.Session() as session:
31
+ # Google search parameters
32
+ search_url = "https://www.google.com/search"
33
+ params = {
34
+ "q": query,
35
+ "num": max_results,
36
+ "hl": "en"
37
+ }
38
+
39
+ response = session.get(
40
+ search_url,
41
+ headers=self.headers,
42
+ params=params,
43
+ timeout=3,
44
+ verify=False
45
+ )
46
+ response.raise_for_status()
47
+
48
+ # Parse search results
49
+ soup = BeautifulSoup(response.text, 'html.parser')
50
+ search_results = soup.select('div.g')
51
+
52
+ for result in search_results[:max_results]:
53
+ link = result.find('a')
54
+ if not link:
55
+ continue
56
+
57
+ url = link.get('href', '')
58
+ if not url.startswith('http'):
59
+ continue
60
+
61
+ try:
62
+ # Fetch webpage content
63
+ page_response = session.get(
64
+ url,
65
+ headers=self.headers,
66
+ timeout=5,
67
+ verify=False
68
+ )
69
+ page_response.raise_for_status()
70
+
71
+ content = self.extract_text(page_response.text)
72
+ results.append({
73
+ "url": url,
74
+ "content": content
75
+ })
76
+ logger.info(f"Successfully fetched content from {url}")
77
+
78
+ except Exception as e:
79
+ logger.warning(f"Failed to fetch {url}: {str(e)}")
80
+ continue
81
+
82
+ except Exception as e:
83
+ logger.error(f"Search failed: {str(e)}")
84
+
85
+ return results[:max_results]
whisper.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import logging
4
+ import requests
5
+ from typing import Optional
6
+
7
+ import edge_tts
8
+
9
+ from config.config import VOICE, FALLBACK_VOICES, token
10
+
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Whisper model for speech to text
15
+ API_URL = "https://api-inference.huggingface.co/models/openai/whisper-tiny"
16
+ headers = {"Authorization": f"Bearer {token}"}
17
+
18
+ # Voice selection handling
19
+ async def get_valid_voice() -> str:
20
+ available_voices = await edge_tts.list_voices()
21
+ voice_names = [VOICE] + FALLBACK_VOICES
22
+
23
+ available_voice_names = {v["ShortName"] for v in available_voices}
24
+ for voice in voice_names:
25
+ if voice in available_voice_names:
26
+ return voice
27
+
28
+ raise RuntimeError("No valid voice found")
29
+
30
+ # Text-to-speech conversion using Edge TTS
31
+ async def generate_speech(text: str) -> Optional[str]:
32
+ if not text or not isinstance(text, str):
33
+ raise ValueError("Invalid text input")
34
+
35
+ voice = await get_valid_voice()
36
+ logger.info(f"Using voice: {voice}")
37
+
38
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
39
+ tmp_path = tmp_file.name
40
+
41
+ communicate = edge_tts.Communicate(text, voice)
42
+ await communicate.save(tmp_path)
43
+
44
+ if not os.path.exists(tmp_path) or os.path.getsize(tmp_path) == 0:
45
+ raise RuntimeError("Speech file empty or not created")
46
+
47
+ logger.info(f"Speech generated successfully: {tmp_path}")
48
+ return tmp_path
49
+
50
+ # Speech-to-text using Whisper
51
+ async def transcribe(audio_file: str) -> str:
52
+ try:
53
+ with open(audio_file, "rb") as f:
54
+ data = f.read()
55
+
56
+ response = requests.post(API_URL, headers=headers, data=data)
57
+ result = response.json()
58
+
59
+ if "text" in result:
60
+ transcription = result["text"].strip()
61
+ logger.info(f"Transcribed text: {transcription}")
62
+ return transcription
63
+ else:
64
+ raise ValueError("No transcription in response")
65
+
66
+ except Exception as e:
67
+ logger.error(f"Transcription error: {str(e)}")
68
+ raise RuntimeError(f"Failed to transcribe audio: {str(e)}")