Spaces:
Runtime error
Runtime error
Commit
·
fe5b216
1
Parent(s):
2144d43
Resolving audio path for file and url
Browse files- app.py +2 -0
- whisper_app.py +27 -20
app.py
CHANGED
@@ -61,8 +61,10 @@ def audio_processor(wav_file,API_key,wav_model='small',llm='HuggingFace',tempera
|
|
61 |
|
62 |
metadata = {"source": f"{wav_file}","duration":text_info['duration'],"language":text_info['language']}
|
63 |
document = [Document(page_content=text_info['text'], metadata=metadata)]
|
|
|
64 |
logger.info("Document",document)
|
65 |
logging.info("Loading General Text Embeddings (GTE) model{}".format('thenlper/gte-large'))
|
|
|
66 |
embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large',model_kwargs={"device": device})
|
67 |
texts = process_documents(documents=document)
|
68 |
global vector_db
|
|
|
61 |
|
62 |
metadata = {"source": f"{wav_file}","duration":text_info['duration'],"language":text_info['language']}
|
63 |
document = [Document(page_content=text_info['text'], metadata=metadata)]
|
64 |
+
|
65 |
logger.info("Document",document)
|
66 |
logging.info("Loading General Text Embeddings (GTE) model{}".format('thenlper/gte-large'))
|
67 |
+
|
68 |
embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large',model_kwargs={"device": device})
|
69 |
texts = process_documents(documents=document)
|
70 |
global vector_db
|
whisper_app.py
CHANGED
@@ -17,30 +17,37 @@ class WHISPERModel:
|
|
17 |
clip_audio = whisper.pad_or_trim(audio_data, length=SAMPLE_RATE * conv_duration)
|
18 |
result = self.model.transcribe(clip_audio)
|
19 |
return result['language']
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
def speech_to_text(self, audio_path):
|
22 |
text_data = dict()
|
23 |
audio_duration = 0
|
24 |
conv_language = ""
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
audio =
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
raise("Unable to reach for URL {}".format(audio_path))
|
44 |
return text_data
|
45 |
|
46 |
|
|
|
17 |
clip_audio = whisper.pad_or_trim(audio_data, length=SAMPLE_RATE * conv_duration)
|
18 |
result = self.model.transcribe(clip_audio)
|
19 |
return result['language']
|
20 |
+
|
21 |
+
def read_audio(self,audio_path):
|
22 |
+
audio = None
|
23 |
+
try:
|
24 |
+
audio = whisper.load_audio(audio_path)
|
25 |
+
except IOError as err:
|
26 |
+
raise err
|
27 |
+
return audio
|
28 |
+
|
29 |
def speech_to_text(self, audio_path):
|
30 |
text_data = dict()
|
31 |
audio_duration = 0
|
32 |
conv_language = ""
|
33 |
+
if audio_path.startswith('http'):
|
34 |
+
r = requests.get(audio_path)
|
35 |
+
if r.status_code == 200:
|
36 |
+
audio = self.read_audio(audio_path)
|
37 |
+
else:
|
38 |
+
raise("Unable to reach for URL {}".format(audio_path))
|
39 |
+
if audio :
|
40 |
+
conv_language = self.get_info(audio)
|
41 |
+
if conv_language !='en':
|
42 |
+
res = self.model.transcribe(audio,task='translate')
|
43 |
+
if self.openai_flag:
|
44 |
+
res['text'] = self.translate_text(res['text'], orginal_text=conv_language, convert_to='English')
|
45 |
+
else:
|
46 |
+
res = self.model.transcribe(audio)
|
47 |
+
audio_duration = audio.shape[0] / SAMPLE_RATE
|
48 |
+
text_data['text'] = res['text']
|
49 |
+
text_data['duration'] = audio_duration
|
50 |
+
text_data['language'] = conv_language
|
|
|
51 |
return text_data
|
52 |
|
53 |
|