Spaces:
Sleeping
Sleeping
import subprocess | |
import speech_recognition as sr | |
from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForCTC, AutoModelForCTC | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
from utils import WHITESPACE_HANDLER | |
from transformers import pipeline | |
from settings import settings | |
from transformers import AutoProcessor, AutoModelForCTC | |
import torchaudio | |
import requests | |
async def create_wav(audio_file): | |
wav_audio_path = audio_file.replace(audio_file.split(".")[-1], '.wav') | |
subprocess.run(['ffmpeg', '-i', audio_file, wav_audio_path]) | |
return wav_audio_path | |
async def speech2text(audio_file): | |
if not audio_file.endswith(".wav"): | |
audio_file = await create_wav() | |
# recognizer = sr.Recognizer() | |
# with sr.AudioFile(audio_file) as audio_file: | |
# audio = recognizer.record(audio_file) | |
# aligned_transcript = recognizer.recognize_google(audio, language=settings.LANGUAGE) | |
url = settings.URL | |
headers = {'Authorization': settings.API} | |
files = {'file': (audio_file, open(audio_file, 'rb'))} | |
response = requests.post(url, headers=headers, files=files) | |
aligned_transcript = response.json()['result']["text"] | |
return aligned_transcript | |
async def summerizer(aligned_transcript): | |
model_name = settings.SUMMARIZER_MODEL | |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False) | |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
input_ids = tokenizer( | |
[WHITESPACE_HANDLER(aligned_transcript)], | |
return_tensors="pt", | |
padding="max_length", | |
truncation=True, | |
max_length=512)["input_ids"] | |
output_ids = model.generate( | |
input_ids=input_ids, | |
max_length=84, | |
no_repeat_ngram_size=2, | |
num_beams=4 | |
)[0] | |
summary = tokenizer.decode( | |
output_ids, | |
skip_special_tokens=True, | |
clean_up_tokenization_spaces=False | |
) | |
return summary | |
async def STT_with_Summary(audio_file): | |
aligned_transcript = await speech2text(audio_file) | |
summary = await summerizer(aligned_transcript) | |
return aligned_transcript, summary | |