pratham0011 commited on
Commit
ffe3553
·
verified ·
1 Parent(s): 2995035

Delete whisper.py

Browse files
Files changed (1) hide show
  1. whisper.py +0 -68
whisper.py DELETED
@@ -1,68 +0,0 @@
1
- import os
2
- import tempfile
3
- import logging
4
- import requests
5
- from typing import Optional
6
-
7
- import edge_tts
8
-
9
- from config.config import VOICE, FALLBACK_VOICES, token
10
-
11
-
12
- logger = logging.getLogger(__name__)
13
-
14
- # Whisper model for speech to text
15
- API_URL = "https://api-inference.huggingface.co/models/openai/whisper-tiny"
16
- headers = {"Authorization": f"Bearer {token}"}
17
-
18
- # Voice selection handling
19
- async def get_valid_voice() -> str:
20
- available_voices = await edge_tts.list_voices()
21
- voice_names = [VOICE] + FALLBACK_VOICES
22
-
23
- available_voice_names = {v["ShortName"] for v in available_voices}
24
- for voice in voice_names:
25
- if voice in available_voice_names:
26
- return voice
27
-
28
- raise RuntimeError("No valid voice found")
29
-
30
- # Text-to-speech conversion using Edge TTS
31
- async def generate_speech(text: str) -> Optional[str]:
32
- if not text or not isinstance(text, str):
33
- raise ValueError("Invalid text input")
34
-
35
- voice = await get_valid_voice()
36
- logger.info(f"Using voice: {voice}")
37
-
38
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
39
- tmp_path = tmp_file.name
40
-
41
- communicate = edge_tts.Communicate(text, voice)
42
- await communicate.save(tmp_path)
43
-
44
- if not os.path.exists(tmp_path) or os.path.getsize(tmp_path) == 0:
45
- raise RuntimeError("Speech file empty or not created")
46
-
47
- logger.info(f"Speech generated successfully: {tmp_path}")
48
- return tmp_path
49
-
50
- # Speech-to-text using Whisper
51
- async def transcribe(audio_file: str) -> str:
52
- try:
53
- with open(audio_file, "rb") as f:
54
- data = f.read()
55
-
56
- response = requests.post(API_URL, headers=headers, data=data)
57
- result = response.json()
58
-
59
- if "text" in result:
60
- transcription = result["text"].strip()
61
- logger.info(f"Transcribed text: {transcription}")
62
- return transcription
63
- else:
64
- raise ValueError("No transcription in response")
65
-
66
- except Exception as e:
67
- logger.error(f"Transcription error: {str(e)}")
68
- raise RuntimeError(f"Failed to transcribe audio: {str(e)}")